a
    PSic}s                  
   @   s  d dl Z d dlmZ d dlmZmZmZmZmZ d dl	Z	d dl
m  mZ d dl	mZmZ ddlmZ ddlmZ ddlmZ d	d
lmZmZ d	dlmZ d	dlmZmZ d	dlmZm Z m!Z! ddl"mZ# ddl$m%Z% ddl&m'Z' ddl(m)Z) ddgZ*G dd deZ+ej,dddZ-G dd dej,Z.G dd dej,Z/G dd de/Z0G dd  d e/Z1G d!d" d"ej,Z2G d#d$ d$ej,Z3ee4e5d%d&d'Z6ed(e+j7fd)e j8fd*dd+de j8dd,ee+ e4ee5 ee  ee5 ee2d-d.dZ9d	d/lm:Z: e:d0e+j7j;iZ<e:d1e j8j;iZ=dS )2    N)OrderedDict)AnyDictListOptionalTuple)nnTensor   )boxes)ObjectDetection)_log_api_usage_once   )WeightsEnumWeights)_COCO_CATEGORIES)handle_legacy_interface_ovewrite_value_param)VGGVGG16_Weightsvgg16   )_utils)DefaultBoxGenerator)_validate_trainable_layers)GeneralizedRCNNTransformSSD300_VGG16_Weightsssd300_vgg16c                
   @   s4   e Zd Zedededddddiidd	d
ZeZdS )r   zBhttps://download.pytorch.org/models/ssd300_vgg16_coco-b556d3b4.pthi)r   r   zMhttps://github.com/pytorch/vision/tree/main/references/detection#ssd300-vgg16zCOCO-val2017box_mapg9@zSThese weights were produced by following a similar training recipe as on the paper.)
num_params
categoriesmin_sizerecipe_metrics_docs)url
transformsmetaN)__name__
__module____qualname__r   r   r   COCO_V1DEFAULT r-   r-   \/var/www/html/django/DPS/env/lib/python3.9/site-packages/torchvision/models/detection/ssd.pyr      s   )convc                 C   sJ   |   D ]<}t|tjrtjj|j |jd urtjj	|jd qd S )Ng        )
modules
isinstancer   Conv2dtorchinitxavier_uniform_weightbias	constant_)r/   layerr-   r-   r.   _xavier_init0   s
    
r:   c                       sJ   e Zd Zee ee ed fddZee eeef dddZ	  Z
S )SSDHeadin_channelsnum_anchorsnum_classesc                    s(   t    t|||| _t||| _d S N)super__init__SSDClassificationHeadclassification_headSSDRegressionHeadregression_head)selfr=   r>   r?   	__class__r-   r.   rB   9   s    
zSSDHead.__init__xreturnc                 C   s   |  || |dS )N)bbox_regression
cls_logits)rF   rD   )rG   rK   r-   r-   r.   forward>   s    zSSDHead.forward)r(   r)   r*   r   intrB   r	   r   strrO   __classcell__r-   r-   rH   r.   r;   8   s   r;   c                       sL   e Zd Zejed fddZeeedddZe	e eddd	Z
  ZS )
SSDScoringHead)module_listnum_columnsc                    s   t    || _|| _d S r@   )rA   rB   rT   rU   )rG   rT   rU   rH   r-   r.   rB   F   s    
zSSDScoringHead.__init__)rK   idxrL   c                 C   sF   t | j}|dk r||7 }|}t| jD ]\}}||kr(||}q(|S )zr
        This is equivalent to self.module_list[idx](x),
        but torchscript doesn't support this yet
        r   )lenrT   	enumerate)rG   rK   rV   
num_blocksoutimoduler-   r-   r.   _get_result_from_module_listK   s    

z+SSDScoringHead._get_result_from_module_listrJ   c           
      C   s~   g }t |D ]b\}}| ||}|j\}}}}	||d| j||	}|ddddd}||d| j}|| qtj	|ddS )Nr   r
      r   r   dim)
rX   r]   shapeviewrU   permutereshapeappendr3   cat)
rG   rK   Zall_resultsr[   featuresresultsN_HWr-   r-   r.   rO   Y   s    zSSDScoringHead.forward)r(   r)   r*   r   
ModuleListrP   rB   r	   r]   r   rO   rR   r-   r-   rH   r.   rS   E   s   rS   c                       s.   e Zd Zee ee ed fddZ  ZS )rC   r<   c              	      sR   t  }t||D ]$\}}|t j||| ddd qt| t || d S )Nr
   r   kernel_sizepaddingr   rn   ziprf   r2   r:   rA   rB   )rG   r=   r>   r?   rN   channelsanchorsrH   r-   r.   rB   k   s
    zSSDClassificationHead.__init__r(   r)   r*   r   rP   rB   rR   r-   r-   rH   r.   rC   j   s   rC   c                       s,   e Zd Zee ee d fddZ  ZS )rE   )r=   r>   c              	      sR   t  }t||D ]$\}}|t j|d| ddd qt| t |d d S )Nr_   r
   r   ro   rr   )rG   r=   r>   bbox_regrt   ru   rH   r-   r.   rB   t   s
    zSSDRegressionHead.__init__rv   r-   r-   rH   r.   rE   s   s   rE   c                       sp  e Zd ZdZejejdZdej	e
eeef eeee  eee  eej	 eeeeeeed
 fddZejjeeef eeeef  eeeef eeeef  f dddZeeeef  eeef ee ee eeef dddZdee eeeeef   eeeef eeeef  f dddZeeef ee eeeef  eeeef  dddZ  ZS )SSDaS  
    Implements SSD architecture from `"SSD: Single Shot MultiBox Detector" <https://arxiv.org/abs/1512.02325>`_.

    The input to the model is expected to be a list of tensors, each of shape [C, H, W], one for each
    image, and should be in 0-1 range. Different images can have different sizes but they will be resized
    to a fixed size before passing it to the backbone.

    The behavior of the model changes depending if it is in training or evaluation mode.

    During training, the model expects both the input tensors, as well as a targets (list of dictionary),
    containing:
        - boxes (``FloatTensor[N, 4]``): the ground-truth boxes in ``[x1, y1, x2, y2]`` format, with
          ``0 <= x1 < x2 <= W`` and ``0 <= y1 < y2 <= H``.
        - labels (Int64Tensor[N]): the class label for each ground-truth box

    The model returns a Dict[Tensor] during training, containing the classification and regression
    losses.

    During inference, the model requires only the input tensors, and returns the post-processed
    predictions as a List[Dict[Tensor]], one for each input image. The fields of the Dict are as
    follows, where ``N`` is the number of detections:

        - boxes (``FloatTensor[N, 4]``): the predicted boxes in ``[x1, y1, x2, y2]`` format, with
          ``0 <= x1 < x2 <= W`` and ``0 <= y1 < y2 <= H``.
        - labels (Int64Tensor[N]): the predicted labels for each detection
        - scores (Tensor[N]): the scores for each detection

    Args:
        backbone (nn.Module): the network used to compute the features for the model.
            It should contain an out_channels attribute with the list of the output channels of
            each feature map. The backbone should return a single Tensor or an OrderedDict[Tensor].
        anchor_generator (DefaultBoxGenerator): module that generates the default boxes for a
            set of feature maps.
        size (Tuple[int, int]): the width and height to which images will be rescaled before feeding them
            to the backbone.
        num_classes (int): number of output classes of the model (including the background).
        image_mean (Tuple[float, float, float]): mean values used for input normalization.
            They are generally the mean values of the dataset on which the backbone has been trained
            on
        image_std (Tuple[float, float, float]): std values used for input normalization.
            They are generally the std values of the dataset on which the backbone has been trained on
        head (nn.Module, optional): Module run on top of the backbone features. Defaults to a module containing
            a classification and regression module.
        score_thresh (float): Score threshold used for postprocessing the detections.
        nms_thresh (float): NMS threshold used for postprocessing the detections.
        detections_per_img (int): Number of best detections to keep after NMS.
        iou_thresh (float): minimum IoU between the anchor and the GT box so that they can be
            considered as positive during training.
        topk_candidates (int): Number of best detections to keep before NMS.
        positive_fraction (float): a number between 0 and 1 which indicates the proportion of positive
            proposals used during the training of the classification head. It is used to estimate the negative to
            positive ratio.
    )	box_coderproposal_matcherN{Gz??         ?        ?)backboneanchor_generatorsizer?   
image_mean	image_stdheadscore_thresh
nms_threshdetections_per_img
iou_threshtopk_candidatespositive_fractionkwargsc                    s"  t    t|  || _|| _tjdd| _|d u rt|drF|j	}nt
||}t|t|jkrtdt| dt|j d| j }t|||}|| _t|| _|d u rg d}|d u rg d}tt|t|||fd	|d
|| _|| _|	| _|
| _|| _d| | | _d| _d S )N)      $@r         @r   )weightsout_channelsz5The length of the output channels from the backbone (zA) do not match the length of the anchor generator aspect ratios ())g
ףp=
?gv/?gCl?)gZd;O?gy&1?g?r   )size_divisible
fixed_sizeg      ?F)rA   rB   r   r   r   	det_utilsBoxCoderry   hasattrr   retrieve_out_channelsrW   aspect_ratios
ValueErrornum_anchors_per_locationr;   r   
SSDMatcherrz   r   minmax	transformr   r   r   r   neg_to_pos_ratio_has_warned)rG   r   r   r   r?   r   r   r   r   r   r   r   r   r   r   r   r>   rH   r-   r.   rB      sD    


zSSD.__init__)losses
detectionsrL   c                 C   s   | j r
|S |S r@   )training)rG   r   r   r-   r-   r.   eager_outputs   s    zSSD.eager_outputs)targetshead_outputsru   matched_idxsrL   c                 C   s  |d }|d }d}g }g }	t |||||D ]\}
}}}}t|dkd }|| }|| 7 }|
d | }||d d f }||d d f }| j||}|tjjj	||dd tj
|df|
d j|
d jd}|
d | ||< |	| q,t|}t|	}	|d	}tj|d	||	d	d
d|	 }|	dk}| j|jddd }| }td ||< |jddd\}}|dd |k }td|}| | ||  ||   | dS )NrM   rN   r   r   sum)	reductionlabelsdtypedevicer^   noner   T)keepdiminf)
descending)rM   classification)rs   r3   wherenumelry   encode_singlerf   r   
functionalsmooth_l1_losszerosr   r   r   stackFcross_entropyrc   r   r   clonefloatsortr   )rG   r   r   ru   r   rM   rN   num_foregroundZ	bbox_lossZcls_targetstargets_per_imagebbox_regression_per_imagecls_logits_per_imageanchors_per_imagematched_idxs_per_imageforeground_idxs_per_imageZ!foreground_matched_idxs_per_imagematched_gt_boxes_per_imagetarget_regressiongt_classes_targetr?   Zcls_lossZforeground_idxsZnum_negativeZnegative_lossvaluesrV   Zbackground_idxsrj   r-   r-   r.   compute_loss   s^    





zSSD.compute_loss)imagesr   rL   c              	   C   s  | j r|d u rtdd nf|D ]`}|d }t|tjrhtt|jdkoV|jd dkd|j d q tdd	t| d q g }|D ]L}|jd
d  }tt|dkd|jd
d    ||d |d f q| 	||\}}|d ur|t
|D ]\}}|d }|d d dd f |d d d df k}	|	 rt|	jddd d }
||
  }tdd| d| d q| |j}t|tjrtd|fg}t| }| |}| ||}i }g }| j rtg }|d u rtdd nt||D ]d\}}|d  dkr>|tj|dfdtj|jd qt|d |}|| | q| ||||}n"| |||j }| j	!||j |}tj"# r| j$st%&d d| _$||fS | '||S )NFz0targets should not be none when in training moder   r   r^   r_   z:Expected target boxes to be a tensor of shape [N, 4], got .z0Expected target boxes to be of type Tensor, got zJexpecting the last two dimensions of the Tensor to be H and W instead got r   r   r`   zLAll bounding boxes should have positive height and width. Found invalid box z for target at index 0r   z<SSD always returns a (Losses, Detections) tuple in scriptingT)(r   r3   _assertr1   r	   rW   rb   typerf   r   rX   anyr   tolistr   tensorsr   listr   r   r   rs   r   fullr   int64r   box_opsbox_iourz   r   postprocess_detectionsimage_sizespostprocessjitis_scriptingr   warningswarnr   )rG   r   r   targetr   original_image_sizesimgval
target_idxdegenerate_boxesbb_idxdegen_bbrh   r   ru   r   r   r   r   r   match_quality_matrixr-   r-   r.   rO   D  s    

(


zSSD.forward)r   image_anchorsimage_shapesrL   c              
   C   sl  |d }t j|d dd}|d}|j}g }t||||D ](\}	}
}}| j|	|}	t|	|}	g }g }g }t	d|D ]}|
d d |f }|| j
k}|| }|	| }t|| jd}||\}}|| }|| || |tj||tj|d qztj|dd}tj|dd}tj|dd}t|||| j}|d | j }||| || || d q<|S )	NrM   rN   r^   r`   r   r   )
fill_valuer   r   )r   scoresr   )r   softmaxr   r   rs   ry   decode_singler   clip_boxes_to_imageranger   r   	_topk_minr   topkrf   r3   	full_liker   rg   batched_nmsr   r   )rG   r   r   r   rM   pred_scoresr?   r   r   r   r   ru   image_shapeimage_boxesimage_scoresimage_labelslabelscore	keep_idxsboxnum_topkidxskeepr-   r-   r.   r     sD    



zSSD.postprocess_detections)	NNNr{   r|   r}   r~   r   r   )N)r(   r)   r*   __doc__r   r   Matcher__annotations__r   Moduler   r   rP   r   r   r   r   rB   r3   r   unusedr   rQ   r	   r   r   rO   r   rR   r-   r-   rH   r.   rx   |   s^   7         


<


H Yrx   c                       s>   e Zd Zejed fddZeee	ef dddZ
  ZS )SSDFeatureExtractorVGG)r   highresc                    s  t    dd t|D \}}}}}d|| _ttdd | _tj	|d |  | _
tt	tjdddd	tjdd
tjddddddtjdd
t	tjdddd	tjdd
tjddddddtjdd
t	tjdddd	tjdd
tjdddd	tjdd
t	tjdddd	tjdd
tjdddd	tjdd
g}|r|t	tjdddd	tjdd
tjdddd	tjdd
 t| t	tjdddddtjddddddtjdd
tjddddtjdd
}t| |dtj	g ||d |R   || _d S )Nc                 s   s"   | ]\}}t |tjr|V  qd S r@   r1   r   	MaxPool2d).0r[   r9   r-   r-   r.   	<genexpr>      z2SSDFeatureExtractorVGG.__init__.<locals>.<genexpr>Ti      i      r   )rp   )inplacer
   r   )rp   rq   stride   r_   F)rp   r  rq   	ceil_mode   )r=   r   rp   rq   dilation)r=   r   rp   r   r^   )rA   rB   rX   r  r   	Parameterr3   onesscale_weight
Sequentialrh   rn   r2   ReLUrf   r:   r
  insertextra)rG   r   r  rk   Zmaxpool3_posZmaxpool4_posr  fcrH   r-   r.   rB     sv    














zSSDFeatureExtractorVGG.__init__rJ   c                 C   s`   |  |}| jddddt| }|g}| jD ]}||}|| q2tdd t|D S )Nr   r^   c                 S   s   g | ]\}}t ||fqS r-   )rQ   )r  r[   vr-   r-   r.   
<listcomp>$  r  z2SSDFeatureExtractorVGG.forward.<locals>.<listcomp>)	rh   r  rc   r   	normalizer  rf   r   rX   )rG   rK   rescaledoutputblockr-   r-   r.   rO     s    

zSSDFeatureExtractorVGG.forward)r(   r)   r*   r   r  boolrB   r	   r   rQ   rO   rR   r-   r-   rH   r.   r    s   Hr  )r   r  trainable_layersc                 C   s   | j } dgdd t| D d d  }t|}td|  koD|kn  d| d|  |dkrjt| n
|||  }| d | D ]}| D ]}|d qqt| |S )Nr   c                 S   s    g | ]\}}t |tjr|qS r-   r	  )r  r[   br-   r-   r.   r  *  r  z"_vgg_extractor.<locals>.<listcomp>r^   z,trainable_layers should be in the range [0, z]. Instead got F)rh   rX   rW   r3   r   
parametersrequires_grad_r  )r   r  r%  stage_indices
num_stagesfreeze_beforer&  	parameterr-   r-   r.   _vgg_extractor'  s     r-  
pretrainedpretrained_backbone)r   weights_backboneT)r   progressr?   r0  trainable_backbone_layers)r   r1  r?   r0  r2  r   rL   c           
      K   s  t | } t|}d|v r&td | durHd}t|t| jd }n|du rTd}t| dupd|du|dd}t	||d}t
|d	|}td
gd
dgd
dgd
dgd
gd
ggg dg dd}g dg dd}i ||}t||d|fi |}	| dur
|	| j|d |	S )a:  The SSD300 model is based on the `SSD: Single Shot MultiBox Detector
    <https://arxiv.org/abs/1512.02325>`_ paper.

    .. betastatus:: detection module

    The input to the model is expected to be a list of tensors, each of shape [C, H, W], one for each
    image, and should be in 0-1 range. Different images can have different sizes but they will be resized
    to a fixed size before passing it to the backbone.

    The behavior of the model changes depending if it is in training or evaluation mode.

    During training, the model expects both the input tensors, as well as a targets (list of dictionary),
    containing:

        - boxes (``FloatTensor[N, 4]``): the ground-truth boxes in ``[x1, y1, x2, y2]`` format, with
          ``0 <= x1 < x2 <= W`` and ``0 <= y1 < y2 <= H``.
        - labels (Int64Tensor[N]): the class label for each ground-truth box

    The model returns a Dict[Tensor] during training, containing the classification and regression
    losses.

    During inference, the model requires only the input tensors, and returns the post-processed
    predictions as a List[Dict[Tensor]], one for each input image. The fields of the Dict are as
    follows, where ``N`` is the number of detections:

        - boxes (``FloatTensor[N, 4]``): the predicted boxes in ``[x1, y1, x2, y2]`` format, with
          ``0 <= x1 < x2 <= W`` and ``0 <= y1 < y2 <= H``.
        - labels (Int64Tensor[N]): the predicted labels for each detection
        - scores (Tensor[N]): the scores for each detection

    Example:

        >>> model = torchvision.models.detection.ssd300_vgg16(weights=SSD300_VGG16_Weights.DEFAULT)
        >>> model.eval()
        >>> x = [torch.rand(3, 300, 300), torch.rand(3, 500, 400)]
        >>> predictions = model(x)

    Args:
        weights (:class:`~torchvision.models.detection.SSD300_VGG16_Weights`, optional): The pretrained
                weights to use. See
                :class:`~torchvision.models.detection.SSD300_VGG16_Weights`
                below for more details, and possible values. By default, no
                pre-trained weights are used.
        progress (bool, optional): If True, displays a progress bar of the download to stderr
            Default is True.
        num_classes (int, optional): number of output classes of the model (including the background)
        weights_backbone (:class:`~torchvision.models.VGG16_Weights`, optional): The pretrained weights for the
            backbone
        trainable_backbone_layers (int, optional): number of trainable (not frozen) layers starting from final block.
            Valid values are between 0 and 5, with 5 meaning all backbone layers are trainable. If ``None`` is
            passed (the default) this value is set to 4.
        **kwargs: parameters passed to the ``torchvision.models.detection.SSD``
            base class. Please refer to the `source code
            <https://github.com/pytorch/vision/blob/main/torchvision/models/detection/ssd.py>`_
            for more details about this class.

    .. autoclass:: torchvision.models.detection.SSD300_VGG16_Weights
        :members:
    r   z?The size of the model is already fixed; ignoring the parameter.Nr    [      r_   )r   r1  Fr   r
   )gQ?g333333?gQ?gRQ?gGz?gףp=
?g?)          @   d   ,  )scalessteps)g;pΈ?gN]?g|
?)p?r=  r=  )r   r   )r:  r:  )r1  )r   verifyr   r   r   r   rW   r'   r   r   r-  r   rx   load_state_dictget_state_dict)
r   r1  r?   r0  r2  r   r   r   defaultsmodelr-   r-   r.   r   ;  s6    H


 
)
_ModelURLsZssd300_vgg16_cocoZvgg16_features)>r   collectionsr   typingr   r   r   r   r   r3   torch.nn.functionalr   r   r   r	   opsr   r   Ztransforms._presetsr   utilsr   _apir   r   _metar   r   r   r   vggr   r   r    r   anchor_utilsr   backbone_utilsr   r   r   __all__r   r  r:   r;   rS   rC   rE   rx   r  r$  rP   r-  r+   IMAGENET1K_FEATURESr   rC  r%   
model_urlsZbackbone_urlsr-   r-   r-   r.   <module>   sr   %		  VWl