a
    ==ic†  ã                   @   sV   d dl mZ d dlmZ d dlmZ d dlmZ d dlZe 	e
¡ZG dd„ deƒZdS )é    )Úcore)Ú
caffe2_pb2)Úget_param_device)ÚNetModifierNc                   @   s@   e Zd ZdZdZdZdZeegZeegZdd
d„Z	ddd„Z
d	S )ÚGradientClippingZl1_normÚl2_normZby_normZby_valueçš™™™™™¹?Fé   éÿÿÿÿNc
           
      C   s~   || j v sJ d |¡ƒ‚|dur8|| jv s8J d |¡ƒ‚|| _|| _t|ƒ| _|| _|| _t|ƒ| _	t|ƒ| _
|| _|	| _dS )a  
        Clips gradient to avoid gradient magnitude explosion or vanishing gradient.

        Args:
        grad_clip_method: ways to clip the gradients
        clip_norm_type: type of norm used in the necessary computation
        clip_threshold: threshold used to determine whether to clip
        use_parameter_norm: a boolean to indicate whether to incorporate
            the norm of the parameter
        compute_norm_ratio: a boolean to compute the ratio between gradient norm
            and parameter norm explicitly for debugging purpose
        clip_max: when clipping by_value, any value that is greater than
            clip_max will be clipped to clip_max
        clip_min: when clipping by_value, any value that is smaller than
            clip_min will be clipped to clip_min
        blobs_to_include: names of blobs whose gradient is to be clipped. If it is set
            to none, all param 's gradient in grad_map will be clipped.
        blobs_to_exclude: names of blobs whose gradient is not to be clipped.
        z6This method of clipping, {}, has not been implemented.N)ÚGRAD_CLIP_METHODSÚformatÚCLIP_GRADIENT_NORM_TYPESÚgrad_clip_methodÚclip_norm_typeÚfloatÚclip_thresholdÚuse_parameter_normÚcompute_norm_ratioÚclip_maxÚclip_minÚblobs_to_includeÚblobs_to_exclude)
Úselfr   r   r   r   r   r   r   r   r   © r   úy/home/droni/.local/share/virtualenvs/DPS-5Je3_V2c/lib/python3.9/site-packages/caffe2/python/modeling/gradient_clipping.pyÚ__init__   s(    ÿÿÿÿ


zGradientClipping.__init__c              	   C   s2  |d usJ ‚t  tj¡}i }| jd u r,|}n@| jD ]8}t  |¡}	| |	¡s^td |	| 	¡ ¡ƒ‚||	 ||	< q2| j
d urŽ| j
D ]}| |d ¡ q|| ¡ D ]”\}	}
t|
t jƒr®q–t|	|t|	ƒ ||d}t  |¡H | j| jkrä| j| jkrôd}n| j| jkrd}|j|
g|jt|
ƒd |¡ d|d}|dkrH|j|gdd	}|
|g}| jrÎ|j|	g|jt|	ƒd |¡ d|d}|dkrš|j|gdd	}| |¡ | jrÎ| ||g|jt|	ƒd
 dg¡ |j||
g| jd n(| j| j kr|j!|
g|
g| j"| j#d W d   ƒ q–1 s"0    Y  q–d S )Nz#param {0} is not defined in net {1})Zparam_to_deviceZdefault_deviceé   r	   z	_l{}_norm)Úprefix)Úpg      à?)ÚexponentZ_norm_ratio)Ú	threshold)ÚmaxÚmin)$r   ZDeviceOptionr   ÚCPUr   ZBlobReferenceZBlobIsDefinedÚ	Exceptionr   ÚNamer   ÚpopÚitemsÚ
isinstanceZGradientSlicer   ÚstrZDeviceScoper   ÚBY_NORMr   ÚL2_NORMÚL1_NORMZLpNormZNextScopedBlobÚPowr   Úappendr   ÚDivZClipTensorByScalingr   ÚBY_VALUEZClipr   r   )r   ÚnetZinit_netZgrad_mapZblob_to_deviceZmodify_output_recordr#   Zfinal_param_mapZblobÚparamZgradZdevicer   Z	grad_normZ	op_inputsZ
param_normr   r   r   Ú
modify_netE   sˆ    



ÿ


üý
ÿü


ÿþýüzGradientClipping.modify_net)r   r   FFr	   r
   NN)NNNF)Ú__name__Ú
__module__Ú__qualname__r,   r+   r*   r0   r   r   r   r3   r   r   r   r   r      s       ý
*  ÿr   )Zcaffe2.pythonr   Zcaffe2.protor   Zcaffe2.python.optimizerr   Z#caffe2.python.modeling.net_modifierr   ÚloggingÚ	getLoggerr4   Úloggerr   r   r   r   r   Ú<module>   s   
