a
    PSic?                  	   @   sn  d dl mZ d dlmZmZmZmZmZmZm	Z	m
Z
 d dlmZ d dlmZ ddlmZ ddlmZ dd	lmZmZ dd
lmZ ddlmZmZ g dZG dd dejZG dd dejZG dd dejZ G dd dej!Z"G dd dej!Z#G dd dejZ$G dd dejZ%G dd dej!Z&ee
e"e#f  eee
ee ef   ee' edej!f ee e(e	e&ddd Z)d!ed"d#d$Z*G d%d& d&eZ+G d'd( d(eZ,G d)d* d*eZ-ed+e+j.fd,dd-d.ee+ e(e	e&d/d0d1Z/ed+e,j.fd,dd-d.ee, e(e	e&d/d2d3Z0ed+e-j.fd,dd-d.ee- e(e	e&d/d4d5Z1dd6lm2Z2 e2e+j.j3e,j.j3e-j.j3d7Z4dS )8    )partial)TupleOptionalCallableListSequenceTypeAnyUnionN)Tensor   )VideoClassification)_log_api_usage_once   )WeightsEnumWeights)_KINETICS400_CATEGORIES)handle_legacy_interface_ovewrite_named_param)VideoResNetR3D_18_WeightsMC3_18_WeightsR2Plus1D_18_Weightsr3d_18mc3_18r2plus1d_18c                       sP   e Zd Zd	eeee eedd fddZeeeeeef dddZ  Z	S )
Conv3DSimpleN   	in_planes
out_planes	midplanesstridepaddingreturnc                    s   t  j||d||dd d S )N)r   r   r   Fin_channelsout_channelskernel_sizer"   r#   biassuper__init__selfr   r    r!   r"   r#   	__class__ [/var/www/html/django/DPS/env/lib/python3.9/site-packages/torchvision/models/video/resnet.pyr,      s    zConv3DSimple.__init__r"   r$   c                 C   s
   | | | fS Nr1   r"   r1   r1   r2   get_downsample_stride'   s    z"Conv3DSimple.get_downsample_stride)Nr   r   
__name__
__module____qualname__intr   r,   staticmethodr   r6   __classcell__r1   r1   r/   r2   r      s    r   c                       sL   e Zd Zd	eeeeedd fddZeeeeeef dddZ  ZS )
Conv2Plus1Dr   Nr   c                    s`   t  tj||dd||fd||fddt|tjddtj||d|ddf|ddfdd d S )	Nr   r   r   r   r   Fr(   r"   r#   r)   Tinplacer   r   r   r+   r,   nnConv3dBatchNorm3dReLUr-   r/   r1   r2   r,   -   s    
zConv2Plus1D.__init__r3   c                 C   s
   | | | fS r4   r1   r5   r1   r1   r2   r6   >   s    z!Conv2Plus1D.get_downsample_stride)r   r   )	r8   r9   r:   r;   r,   r<   r   r6   r=   r1   r1   r/   r2   r>   ,   s   r>   c                       sP   e Zd Zd	eeee eedd fddZeeeeeef dddZ  Z	S )
Conv3DNoTemporalNr   r   c                    s(   t  j||dd||fd||fdd d S )Nr?   r   r   Fr%   r*   r-   r/   r1   r2   r,   D   s    zConv3DNoTemporal.__init__r3   c                 C   s
   d| | fS Nr   r1   r5   r1   r1   r2   r6   Q   s    z&Conv3DNoTemporal.get_downsample_stride)Nr   r   r7   r1   r1   r/   r2   rI   C   s    rI   c                       sR   e Zd ZdZd
eeedejf eeej dd fddZ	e
e
ddd	Z  ZS )
BasicBlockr   N.inplanesplanesconv_builderr"   
downsampler$   c                    s   || d d d |d d d|   }t    t|||||t|tjdd| _t||||t|| _tjdd| _|| _	|| _
d S )Nr   TrA   )r+   r,   rE   
SequentialrG   rH   conv1conv2relurP   r"   r.   rM   rN   rO   r"   rP   r!   r/   r1   r2   r,   Z   s    (
zBasicBlock.__init__xr$   c                 C   sB   |}|  |}| |}| jd ur,| |}||7 }| |}|S r4   )rR   rS   rP   rT   r.   rW   residualoutr1   r1   r2   forwardm   s    




zBasicBlock.forward)r   Nr8   r9   r:   	expansionr;   r   rE   Moduler   r,   r   r[   r=   r1   r1   r/   r2   rK   V   s     rK   c                       sR   e Zd ZdZdeeedejf eeej dd fddZ	e
e
dd	d
Z  ZS )
Bottleneck   r   N.rL   c                    s   t    || d d d |d d d|   }ttj||dddt|tjdd| _t|||||t|tjdd| _ttj||| j	 dddt|| j	 | _
tjdd| _|| _|| _d S )Nr   r   F)r(   r)   TrA   )r+   r,   rE   rQ   rF   rG   rH   rR   rS   r]   conv3rT   rP   r"   rU   r/   r1   r2   r,   ~   s    	
("zBottleneck.__init__rV   c                 C   sL   |}|  |}| |}| |}| jd ur6| |}||7 }| |}|S r4   )rR   rS   ra   rP   rT   rX   r1   r1   r2   r[      s    





zBottleneck.forward)r   Nr\   r1   r1   r/   r2   r_   {   s     r_   c                       s&   e Zd ZdZdd fddZ  ZS )	BasicStemz$The default conv-batchnorm-relu stemNr$   c              
      s4   t  tjdddddddtdtjdd	 d S )
Nr   @   )r      re   r   r   r   r?   Fr@   TrA   rD   r.   r/   r1   r2   r,      s
    
zBasicStem.__init__r8   r9   r:   __doc__r,   r=   r1   r1   r/   r2   rb      s   rb   c                       s&   e Zd ZdZdd fddZ  ZS )R2Plus1dStemzRR(2+1)D stem is different than the default one as it uses separated 3D convolutionNrc   c                    sZ   t  tjdddddddtdtjdd	tjdd
dddddtd
tjdd	 d S )Nr   -   )r   re   re   rf   )r   r   r   Fr@   TrA   rd   rC   r   r   r   )r   r   r   rD   rg   r/   r1   r2   r,      s    

zR2Plus1dStem.__init__rh   r1   r1   r/   r2   rj      s   rj   c                	       s   e Zd Zdeeeef  eeeee	e
f   ee edejf eedd fddZeedd	d
Zdeeeef  eeee	e
f  eeeejdddZ  ZS )r     F.N)blockconv_makerslayersstemnum_classeszero_init_residualr$   c                    s  t    t|  d| _| | _| j||d d|d dd| _| j||d d|d dd| _| j||d d|d dd| _| j||d d	|d dd| _	t
d
| _t
d	|j || _|  D ]}t|t
jrt
jj|jddd |jdurbt
j|jd qt|t
jr4t
j|jd t
j|jd qt|t
jrt
j|jdd t
j|jd q|r|  D ]$}t|trrt
j|jjd qrdS )a^  Generic resnet video generator.

        Args:
            block (Type[Union[BasicBlock, Bottleneck]]): resnet building block
            conv_makers (List[Type[Union[Conv3DSimple, Conv3DNoTemporal, Conv2Plus1D]]]): generator
                function for each layer
            layers (List[int]): number of blocks per layer
            stem (Callable[..., nn.Module]): module specifying the ResNet stem.
            num_classes (int, optional): Dimension of the final FC layer. Defaults to 400.
            zero_init_residual (bool, optional): Zero init bottleneck residual BN. Defaults to False.
        rd   r   r   r5      r      r   i   rl   fan_outrT   )modenonlinearityNg{Gz?)r+   r,   r   rM   rq   _make_layerlayer1layer2layer3layer4rE   AdaptiveAvgPool3davgpoolLinearr]   fcmodules
isinstancerF   initkaiming_normal_weightr)   	constant_rG   normal_r_   bn3)r.   rn   ro   rp   rq   rr   rs   mr/   r1   r2   r,      s2    
zVideoResNet.__init__rV   c                 C   sT   |  |}| |}| |}| |}| |}| |}|d}| |}|S rJ   )rq   rz   r{   r|   r}   r   flattenr   )r.   rW   r1   r1   r2   r[      s    







zVideoResNet.forwardr   )rn   rO   rN   blocksr"   r$   c           
   	   C   s   d }|dks| j ||j krV||}ttj| j ||j d|ddt||j }g }||| j |||| ||j | _ td|D ]}	||| j || qtj| S )Nr   F)r(   r"   r)   )	rM   r]   r6   rE   rQ   rF   rG   appendrange)
r.   rn   rO   rN   r   r"   rP   Z	ds_striderp   ir1   r1   r2   ry   	  s    
zVideoResNet._make_layer)rm   F)r   )r8   r9   r:   r   r
   rK   r_   r   r   rI   r>   r   r;   r   rE   r^   boolr,   r   r[   rQ   ry   r=   r1   r1   r/   r2   r      s*     4 r   .)rn   ro   rp   rq   weightsprogresskwargsr$   c                 K   sR   |d urt |dt|jd  t| |||fi |}|d urN||j|d |S )Nrr   
categories)r   )r   lenmetar   load_state_dictget_state_dict)rn   ro   rp   rq   r   r   r   modelr1   r1   r2   _video_resnet#  s    	r   )r   r   zKhttps://github.com/pytorch/vision/tree/main/references/video_classificationzSThese weights reproduce closely the accuracy of the paper for 16-frame clip inputs.)min_sizer   recipe_docsc                	   @   s@   e Zd Zedeedddi eddddd	id
dZeZdS )r   z7https://download.pytorch.org/models/r3d_18-b3b3357e.pthp   r   rt      	crop_sizeresize_sizeiP5Kinetics-400g     `J@gR@zacc@1zacc@5
num_params_metricsurl
transformsr   N	r8   r9   r:   r   r   r   _COMMON_METAKINETICS400_V1DEFAULTr1   r1   r1   r2   r   ?  s   r   c                	   @   s@   e Zd Zedeedddi eddddd	id
dZeZdS )r   z7https://download.pytorch.org/models/mc3_18-a90a0ba3.pthr   r   r   iPu r   g33333J@g(\S@r   r   r   Nr   r1   r1   r1   r2   r   Q  s   r   c                	   @   s@   e Zd Zedeedddi eddddd	id
dZeZdS )r   z<https://download.pytorch.org/models/r2plus1d_18-91a641e6.pthr   r   r   ir   g     L@gp=
׳S@r   r   r   Nr   r1   r1   r1   r2   r   c  s   r   
pretrained)r   T)r   r   )r   r   r   r$   c                 K   s.   t | } tttgd g dt| |fi |S )a  Construct 18 layer Resnet3D model.

    .. betastatus:: video module

    Reference: `A Closer Look at Spatiotemporal Convolutions for Action Recognition <https://arxiv.org/abs/1711.11248>`__.

    Args:
        weights (:class:`~torchvision.models.video.R3D_18_Weights`, optional): The
            pretrained weights to use. See
            :class:`~torchvision.models.video.R3D_18_Weights`
            below for more details, and possible values. By default, no
            pre-trained weights are used.
        progress (bool): If True, displays a progress bar of the download to stderr. Default is True.
        **kwargs: parameters passed to the ``torchvision.models.video.resnet.VideoResNet`` base class.
            Please refer to the `source code
            <https://github.com/pytorch/vision/blob/main/torchvision/models/video/resnet.py>`_
            for more details about this class.

    .. autoclass:: torchvision.models.video.R3D_18_Weights
        :members:
    r`   r   r   r   r   )r   verifyr   rK   r   rb   r   r   r   r1   r1   r2   r   u  s    
r   c                 K   s4   t | } tttgtgd  g dt| |fi |S )a  Construct 18 layer Mixed Convolution network as in

    .. betastatus:: video module

    Reference: `A Closer Look at Spatiotemporal Convolutions for Action Recognition <https://arxiv.org/abs/1711.11248>`__.

    Args:
        weights (:class:`~torchvision.models.video.MC3_18_Weights`, optional): The
            pretrained weights to use. See
            :class:`~torchvision.models.video.MC3_18_Weights`
            below for more details, and possible values. By default, no
            pre-trained weights are used.
        progress (bool): If True, displays a progress bar of the download to stderr. Default is True.
        **kwargs: parameters passed to the ``torchvision.models.video.resnet.VideoResNet`` base class.
            Please refer to the `source code
            <https://github.com/pytorch/vision/blob/main/torchvision/models/video/resnet.py>`_
            for more details about this class.

    .. autoclass:: torchvision.models.video.MC3_18_Weights
        :members:
    r   r   )r   r   r   rK   r   rI   rb   r   r1   r1   r2   r     s    
r   c                 K   s.   t | } tttgd g dt| |fi |S )a  Construct 18 layer deep R(2+1)D network as in

    .. betastatus:: video module

    Reference: `A Closer Look at Spatiotemporal Convolutions for Action Recognition <https://arxiv.org/abs/1711.11248>`__.

    Args:
        weights (:class:`~torchvision.models.video.R2Plus1D_18_Weights`, optional): The
            pretrained weights to use. See
            :class:`~torchvision.models.video.R2Plus1D_18_Weights`
            below for more details, and possible values. By default, no
            pre-trained weights are used.
        progress (bool): If True, displays a progress bar of the download to stderr. Default is True.
        **kwargs: parameters passed to the ``torchvision.models.video.resnet.VideoResNet`` base class.
            Please refer to the `source code
            <https://github.com/pytorch/vision/blob/main/torchvision/models/video/resnet.py>`_
            for more details about this class.

    .. autoclass:: torchvision.models.video.R2Plus1D_18_Weights
        :members:
    r`   r   )r   r   r   rK   r>   rj   r   r1   r1   r2   r     s    
r   )
_ModelURLs)r   r   r   )5	functoolsr   typingr   r   r   r   r   r   r	   r
   torch.nnrE   torchr   Ztransforms._presetsr   utilsr   _apir   r   _metar   _utilsr   r   __all__rF   r   rQ   r>   rI   r^   rK   r_   rb   rj   r   r;   r   r   r   r   r   r   r   r   r   r   r   r   
model_urlsr1   r1   r1   r2   <module>   s^   (%1_"#"#"$