a
    yµÿf•F  ã                   @   s  d Z ddlZddlZddlmZ ddlm  mZ ddlm	Z	m
Z
 ddlmZ ddlmZmZmZ dZG dd	„ d	ejƒZG d
d„ deƒZG dd„ dejƒZG dd„ dejƒZG dd„ dejƒZG dd„ dejƒZG dd„ dejƒZG dd„ dejƒZG dd„ dejƒZG dd„ dejƒZdS )zTransformer modules.é    N)Ú	constant_Úxavier_uniform_é   )ÚConv)Ú_get_clonesÚinverse_sigmoidÚ#multi_scale_deformable_attn_pytorch)
ÚTransformerEncoderLayerÚTransformerLayerÚTransformerBlockÚMLPBlockÚLayerNorm2dÚAIFIÚDeformableTransformerDecoderÚ!DeformableTransformerDecoderLayerÚMSDeformAttnÚMLPc                       s\   e Zd ZdZddde ¡ df‡ fdd„	Zedd	d
„ƒZddd„Z	ddd„Z
ddd„Z‡  ZS )r	   z2Defines a single layer of the transformer encoder.é   é   ç        Fc                    sž   t ƒ  ¡  ddlm} |s"tdƒ‚tj|||dd| _t ||¡| _	t ||¡| _
t |¡| _t |¡| _t |¡| _t |¡| _t |¡| _|| _|| _dS )zAInitialize the TransformerEncoderLayer with specified parameters.é   )Ú	TORCH_1_9z]TransformerEncoderLayer() requires torch>=1.9 to use nn.MultiheadAttention(batch_first=True).T)ÚdropoutZbatch_firstN)ÚsuperÚ__init__Zutils.torch_utilsr   ÚModuleNotFoundErrorÚnnÚMultiheadAttentionÚmaÚLinearÚfc1Úfc2Ú	LayerNormÚnorm1Únorm2ÚDropoutr   Údropout1Údropout2ÚactÚnormalize_before)ÚselfÚc1ÚcmÚ	num_headsr   r(   r)   r   ©Ú	__class__© ú^/var/www/html/django/DPS/env/lib/python3.9/site-packages/ultralytics/nn/modules/transformer.pyr      s     
ÿz TransformerEncoderLayer.__init__Nc                 C   s   |du r| S | | S )z2Add position embeddings to the tensor if provided.Nr0   ©ZtensorÚposr0   r0   r1   Úwith_pos_embed6   s    z&TransformerEncoderLayer.with_pos_embedc              	   C   st   |   ||¡ }}| j|||||dd }||  |¡ }|  |¡}|  |  |  |  |¡¡¡¡}||  |¡ }|  	|¡S )z.Performs forward pass with post-normalization.©ÚvalueÚ	attn_maskZkey_padding_maskr   )
r4   r   r&   r#   r!   r   r(   r    r'   r$   )r*   ÚsrcÚsrc_maskÚsrc_key_padding_maskr3   ÚqÚkÚsrc2r0   r0   r1   Úforward_post;   s    
z$TransformerEncoderLayer.forward_postc              	   C   st   |   |¡}|  ||¡ }}| j|||||dd }||  |¡ }|  |¡}|  |  |  |  |¡¡¡¡}||  	|¡ S )z-Performs forward pass with pre-normalization.r5   r   )
r#   r4   r   r&   r$   r!   r   r(   r    r'   )r*   r8   r9   r:   r3   r=   r;   r<   r0   r0   r1   Úforward_preE   s    

z#TransformerEncoderLayer.forward_prec                 C   s&   | j r|  ||||¡S |  ||||¡S )z8Forward propagates the input through the encoder module.)r)   r?   r>   )r*   r8   r9   r:   r3   r0   r0   r1   ÚforwardO   s    zTransformerEncoderLayer.forward)N)NNN)NNN)NNN)Ú__name__Ú
__module__Ú__qualname__Ú__doc__r   ÚGELUr   Ústaticmethodr4   r>   r?   r@   Ú__classcell__r0   r0   r.   r1   r	      s   



r	   c                       sJ   e Zd ZdZddde ¡ df‡ fdd„	Z‡ fdd	„Zeddd„ƒZ	‡  Z
S )r   z#Defines the AIFI transformer layer.r   r   r   Fc                    s   t ƒ  ||||||¡ dS )z7Initialize the AIFI instance with specified parameters.N)r   r   )r*   r+   r,   r-   r   r(   r)   r.   r0   r1   r   Y   s    zAIFI.__init__c                    sp   |j dd… \}}}|  |||¡}tƒ j| d¡ ddd¡|j|j|jdd}| ddd¡ 	d|||g¡ 
¡ S )z,Forward pass for the AIFI transformer layer.r   Né   r   )ÚdeviceÚdtype)r3   éÿÿÿÿ)ÚshapeÚ"build_2d_sincos_position_embeddingr   r@   ÚflattenÚpermuteÚtorI   rJ   ÚviewÚ
contiguous)r*   ÚxÚcÚhÚwZ	pos_embedr.   r0   r1   r@   ]   s    .zAIFI.forwardé   ç     ˆÃ@c           
      C   sÈ   |d dksJ dƒ‚t j| t jd}t j|t jd}t j||dd\}}|d }t j|t jd| }d||  }| ¡ d |d	  }| ¡ d |d	  }	t  t  |¡t  |¡t  |	¡t  |	¡gd
¡d	 S )z)Builds 2D sine-cosine position embedding.é   r   zHEmbed dimension must be divisible by 4 for 2D sin-cos position embedding©rJ   Zij)Zindexingg      ð?©.NNr   )ÚtorchÚarangeÚfloat32ZmeshgridrN   ÚcatÚsinÚcos)
rV   rU   Ú	embed_dimZtemperatureZgrid_wZgrid_hZpos_dimÚomegaZout_wZout_hr0   r0   r1   rM   e   s    z'AIFI.build_2d_sincos_position_embedding)rW   rX   )rA   rB   rC   rD   r   rE   r   r@   rF   rM   rG   r0   r0   r.   r1   r   V   s
   r   c                       s(   e Zd ZdZ‡ fdd„Zdd„ Z‡  ZS )r
   zeTransformer layer https://arxiv.org/abs/2010.11929 (LayerNorm layers removed for better performance).c                    sx   t ƒ  ¡  tj||dd| _tj||dd| _tj||dd| _tj||d| _tj||dd| _	tj||dd| _
dS )z]Initializes a self-attention mechanism using linear transformations and multi-head attention.F)Úbias)rb   r-   N)r   r   r   r   r;   r<   Úvr   r   r    r!   )r*   rT   r-   r.   r0   r1   r   y   s    
zTransformerLayer.__init__c                 C   s<   |   |  |¡|  |¡|  |¡¡d | }|  |  |¡¡| S )z?Apply a transformer block to the input x and return the output.r   )r   r;   r<   re   r!   r    ©r*   rS   r0   r0   r1   r@   ƒ   s    (zTransformerLayer.forward©rA   rB   rC   rD   r   r@   rG   r0   r0   r.   r1   r
   v   s   
r
   c                       s(   e Zd ZdZ‡ fdd„Zdd„ Z‡  ZS )r   z4Vision Transformer https://arxiv.org/abs/2010.11929.c                    s\   t ƒ  ¡  d| _|ˆ kr$t|ˆ ƒ| _t ˆ ˆ ¡| _tj‡ ‡fdd„t|ƒD ƒŽ | _	ˆ | _
dS )zaInitialize a Transformer module with position embedding and specified number of heads and layers.Nc                 3   s   | ]}t ˆ ˆƒV  qd S ©N)r
   )Ú.0Ú_©Úc2r-   r0   r1   Ú	<genexpr>“   ó    z,TransformerBlock.__init__.<locals>.<genexpr>)r   r   Úconvr   r   r   ÚlinearZ
SequentialÚrangeÚtrrl   )r*   r+   rl   r-   Ú
num_layersr.   rk   r1   r   Œ   s    
 zTransformerBlock.__init__c                 C   sb   | j dur|   |¡}|j\}}}}| d¡ ddd¡}|  ||  |¡ ¡ ddd¡ || j||¡S )z;Forward propagates the input through the bottleneck module.NrH   r   r   )ro   rL   rN   rO   rr   rp   Zreshaperl   )r*   rS   Úbrj   rV   rU   Úpr0   r0   r1   r@   –   s
    

zTransformerBlock.forwardrg   r0   r0   r.   r1   r   ‰   s   
r   c                       s:   e Zd ZdZejf‡ fdd„	Zejejdœdd„Z	‡  Z
S )r   z6Implements a single block of a multi-layer perceptron.c                    s2   t ƒ  ¡  t ||¡| _t ||¡| _|ƒ | _dS )zcInitialize the MLPBlock with specified embedding dimension, MLP dimension, and activation function.N)r   r   r   r   Úlin1Úlin2r(   )r*   Zembedding_dimZmlp_dimr(   r.   r0   r1   r   ¢   s    
zMLPBlock.__init__)rS   Úreturnc                 C   s   |   |  |  |¡¡¡S )zForward pass for the MLPBlock.)rw   r(   rv   rf   r0   r0   r1   r@   ©   s    zMLPBlock.forward)rA   rB   rC   rD   r   rE   r   r\   ZTensorr@   rG   r0   r0   r.   r1   r   Ÿ   s   r   c                       s0   e Zd ZdZejdf‡ fdd„	Zdd„ Z‡  ZS )r   z=Implements a simple multi-layer perceptron (also called FFN).Fc                    sX   t ƒ  ¡  || _|g|d  }t dd„ t|g| ||g ƒD ƒ¡| _|| _|ƒ | _dS )zXInitialize the MLP with specified input, hidden, output dimensions and number of layers.r   c                 s   s   | ]\}}t  ||¡V  qd S rh   )r   r   )ri   Únr<   r0   r0   r1   rm   ¶   rn   zMLP.__init__.<locals>.<genexpr>N)	r   r   rs   r   Z
ModuleListÚzipÚlayersÚsigmoidr(   )r*   Z	input_dimÚ
hidden_dimZ
output_dimrs   r(   r|   rU   r.   r0   r1   r   ±   s    
(zMLP.__init__c                 C   sZ   t | jƒD ]6\}}|| jd k r8t| dt ¡ ƒ||ƒƒn||ƒ}q
t| ddƒrV| ¡ S |S )z Forward pass for the entire MLP.r   r(   r|   F)Ú	enumerater{   rs   Úgetattrr   ÚReLUr|   )r*   rS   ÚiÚlayerr0   r0   r1   r@   º   s    0zMLP.forward)	rA   rB   rC   rD   r   r€   r   r@   rG   r0   r0   r.   r1   r   ®   s   	r   c                       s*   e Zd ZdZd‡ fdd„	Zdd„ Z‡  ZS )r   a1  
    2D Layer Normalization module inspired by Detectron2 and ConvNeXt implementations.

    Original implementations in
    https://github.com/facebookresearch/detectron2/blob/main/detectron2/layers/batch_norm.py
    and
    https://github.com/facebookresearch/ConvNeXt/blob/main/models/convnext.py.
    çíµ ÷Æ°>c                    s8   t ƒ  ¡  t t |¡¡| _t t |¡¡| _|| _	dS )z1Initialize LayerNorm2d with the given parameters.N)
r   r   r   Ú	Parameterr\   ZonesÚweightZzerosrd   Úeps)r*   Znum_channelsr†   r.   r0   r1   r   Ë   s    
zLayerNorm2d.__init__c                 C   sj   |j ddd}||  d¡j ddd}|| t || j ¡ }| jdd…ddf | | jdd…ddf  S )z0Perform forward pass for 2D layer normalization.r   T©ZkeepdimrH   N)ÚmeanÚpowr\   Úsqrtr†   r…   rd   )r*   rS   ÚuÚsr0   r0   r1   r@   Ò   s    zLayerNorm2d.forward)rƒ   rg   r0   r0   r.   r1   r   Á   s   	r   c                       s4   e Zd ZdZd‡ fdd„	Zdd„ Zdd
d„Z‡  ZS )r   z×
    Multiscale Deformable Attention Module based on Deformable-DETR and PaddleDetection implementations.

    https://github.com/fundamentalvision/Deformable-DETR/blob/main/models/ops/modules/ms_deform_attn.py
    rW   rY   r   c                    s¼   t ƒ  ¡  || dkr*td|› d|› ƒ‚|| }|| |ksFJ dƒ‚d| _|| _|| _|| _|| _t 	||| | d ¡| _
t 	||| | ¡| _t 	||¡| _t 	||¡| _|  ¡  dS )z2Initialize MSDeformAttn with the given parameters.r   z.d_model must be divisible by n_heads, but got z and z(`d_model` must be divisible by `n_heads`é@   rH   N)r   r   Ú
ValueErrorZim2col_stepÚd_modelÚn_levelsÚn_headsÚn_pointsr   r   Úsampling_offsetsÚattention_weightsÚ
value_projÚoutput_projÚ_reset_parameters)r*   r   r   r‘   r’   Z_d_per_headr.   r0   r1   r   á   s    
zMSDeformAttn.__init__c                 C   sV  t | jjjdƒ tj| jtjddtj	 | j  }t 
| ¡ | ¡ gd¡}|| ¡ jdddd   | jddd	¡ d| j| jd¡}t| jƒD ],}|d
d
…d
d
…|d
d
…f  |d 9  < qŒt ¡ $ t | d¡¡| j_W d
  ƒ n1 sì0    Y  t | jjjdƒ t | jjjdƒ t| jjjƒ t | jjjdƒ t| jjjƒ t | jjjdƒ d
S )zReset module parameters.r   rZ   g       @rK   Tr‡   r   r   rH   N)r   r“   r…   Údatar\   r]   r‘   r^   ÚmathÚpiÚstackra   r`   ÚabsÚmaxrQ   Úrepeatr   r’   rq   Zno_gradr   r„   rd   r”   r   r•   r–   )r*   ZthetasZ	grid_initr   r0   r0   r1   r—   ø   s&    "
ÿþÿ*
2zMSDeformAttn._reset_parametersNc              	   C   sÜ  |j dd… \}}|j d }tdd„ |D ƒƒ|ks6J ‚|  |¡}|dur\| |d tdƒ¡}| ||| j| j| j ¡}|  |¡ ||| j| j	| j
d¡}	|  |¡ ||| j| j	| j
 ¡}
t |
d¡ ||| j| j	| j
¡}
|j d }|dkrNtj||j|jd	 d¡}|	|ddddd…ddd…f  }|dd…dd…ddd…ddd…f | }nv|d
kr´|	| j
 |dd…dd…ddd…ddd…f  d }|dd…dd…ddd…ddd…f | }ntd|› dƒ‚t||||
ƒ}|  |¡S )a  
        Perform forward pass for multiscale deformable attention.

        https://github.com/PaddlePaddle/PaddleDetection/blob/develop/ppdet/modeling/transformers/deformable_transformer.py

        Args:
            query (torch.Tensor): [bs, query_length, C]
            refer_bbox (torch.Tensor): [bs, query_length, n_levels, 2], range in [0, 1], top-left (0,0),
                bottom-right (1, 1), including padding area
            value (torch.Tensor): [bs, value_length, C]
            value_shapes (List): [n_levels, 2], [(H_0, W_0), (H_1, W_1), ..., (H_{L-1}, W_{L-1})]
            value_mask (Tensor): [bs, value_length], True for non-padding elements, False for padding elements

        Returns:
            output (Tensor): [bs, Length_{query}, C]
        NrH   r   c                 s   s   | ]}|d  |d  V  qdS )r   r   Nr0   )ri   rŒ   r0   r0   r1   rm      rn   z'MSDeformAttn.forward.<locals>.<genexpr>r[   r   rK   )rJ   rI   rY   g      à?z5Last dim of reference_points must be 2 or 4, but got Ú.)rL   Úsumr•   Zmasked_fillÚfloatrQ   r‘   r   r“   r   r’   r”   ÚFZsoftmaxr\   Z	as_tensorrJ   rI   ÚfliprŽ   r   r–   )r*   ÚqueryÚ
refer_bboxr6   Zvalue_shapesZ
value_maskÚbsÚlen_qZlen_vr“   r”   Z
num_pointsZoffset_normalizerÚaddZsampling_locationsÚoutputr0   r0   r1   r@     s*    

   

 *
2*zMSDeformAttn.forward)rW   rY   r   rY   )N)rA   rB   rC   rD   r   r—   r@   rG   r0   r0   r.   r1   r   Ú   s   r   c                       sR   e Zd ZdZdddde ¡ ddf‡ fdd„	Zed	d
„ ƒZdd„ Z	ddd„Z
‡  ZS )r   aK  
    Deformable Transformer Decoder Layer inspired by PaddleDetection and Deformable-DETR implementations.

    https://github.com/PaddlePaddle/PaddleDetection/blob/develop/ppdet/modeling/transformers/deformable_transformer.py
    https://github.com/fundamentalvision/Deformable-DETR/blob/main/models/deformable_transformer.py
    rW   r   i   r   rY   c                    s¦   t ƒ  ¡  tj|||d| _t |¡| _t |¡| _t	||||ƒ| _
t |¡| _t |¡| _t ||¡| _|| _t |¡| _t ||¡| _t |¡| _t |¡| _dS )zKInitialize the DeformableTransformerDecoderLayer with the given parameters.)r   N)r   r   r   r   Ú	self_attnr%   r&   r"   r#   r   Ú
cross_attnr'   r$   r   Úlinear1r(   Údropout3Úlinear2Údropout4Únorm3)r*   r   r‘   Zd_ffnr   r(   r   r’   r.   r0   r1   r   @  s    
z*DeformableTransformerDecoderLayer.__init__c                 C   s   |du r| S | | S )z;Add positional embeddings to the input tensor, if provided.Nr0   r2   r0   r0   r1   r4   V  s    z0DeformableTransformerDecoderLayer.with_pos_embedc              	   C   s4   |   |  |  |  |¡¡¡¡}||  |¡ }|  |¡S )zHPerform forward pass through the Feed-Forward Network part of the layer.)r®   r­   r(   r¬   r¯   r°   )r*   ÚtgtZtgt2r0   r0   r1   Úforward_ffn[  s    z-DeformableTransformerDecoderLayer.forward_ffnNc                 C   s    |   ||¡ }}	| j| dd¡|	 dd¡| dd¡|dd  dd¡}
||  |
¡ }|  |¡}|  |   ||¡| d¡|||¡}
||  |
¡ }|  |¡}|  	|¡S )z:Perform the forward pass through the entire decoder layer.r   r   )r7   rH   )
r4   rª   Z	transposer&   r#   r«   Z	unsqueezer'   r$   r²   )r*   Úembedr¥   ÚfeatsÚshapesÚpadding_maskr7   Z	query_posr;   r<   r±   r0   r0   r1   r@   a  s    (ÿþ
ÿ
z)DeformableTransformerDecoderLayer.forward)NNN)rA   rB   rC   rD   r   r€   r   rF   r4   r²   r@   rG   r0   r0   r.   r1   r   8  s    
r   c                       s,   e Zd ZdZd‡ fdd„	Zd	dd„Z‡  ZS )
r   zÌ
    Implementation of Deformable Transformer Decoder based on PaddleDetection.

    https://github.com/PaddlePaddle/PaddleDetection/blob/develop/ppdet/modeling/transformers/deformable_transformer.py
    rK   c                    s<   t ƒ  ¡  t||ƒ| _|| _|| _|dkr.|n|| | _dS )zFInitialize the DeformableTransformerDecoder with the given parameters.r   N)r   r   r   r{   rs   r}   Úeval_idx)r*   r}   Zdecoder_layerrs   r·   r.   r0   r1   r   }  s
    
z%DeformableTransformerDecoder.__init__Nc
              
   C   sü   |}
g }g }d}|  ¡ }t| jƒD ]Ä\}}||
||||	|||ƒƒ}
|| |
ƒ}t  |t|ƒ ¡}| jr¦| || |
ƒ¡ |dkrŒ| |¡ qÐ| t  |t|ƒ ¡¡ n*|| jkrÐ| || |
ƒ¡ | |¡  qè|}| jrâ| ¡ n|}q"t 	|¡t 	|¡fS )z4Perform the forward pass through the entire decoder.Nr   )
r|   r~   r{   r\   r   ZtrainingÚappendr·   Údetachr›   )r*   r³   r¥   r´   rµ   Z	bbox_headZ
score_headZpos_mlpr7   r¶   r©   Z
dec_bboxesZdec_clsZlast_refined_bboxr   r‚   ZbboxZrefined_bboxr0   r0   r1   r@   …  s*    

z$DeformableTransformerDecoder.forward)rK   )NNrg   r0   r0   r.   r1   r   v  s
     ör   )rD   r™   r\   Ztorch.nnr   Ztorch.nn.functionalZ
functionalr¢   Ztorch.nn.initr   r   ro   r   Úutilsr   r   r   Ú__all__ÚModuler	   r   r
   r   r   r   r   r   r   r   r0   r0   r0   r1   Ú<module>   s$   : ^>