a
    Sic.                     @   s  d Z ddlm  mZ ddlmZ ddlm	Z	 ddlm
Z
 ddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ G dd dejZedg dG dd dee	jZedg dG dd dee
jZedg dG dd deejZedg dG dd deejZedg dG dd deejZed g dG d!d" d"eejZej e	jj  e_ ej e
jj  e_ ej ejj  e_ ej ejj  e_ ej ejj  e_ ej ejj  e_ dS )#z"DTensor specific Keras optimizers.    N)dtensor_api)adadelta)adagrad)adam)adamw)	optimizer)rmsprop)sgd)learning_rate_schedule)keras_export)doc_controlsc                       sj   e Zd ZdZd fdd	Zdd ZdddZejd	d
 Z	dd Z
dd Zdd Zdd Zdd Z  ZS )	OptimizerzDTensor specific optimizers.

    The major changes for this class is that all the variable init logic will be
    mesh/layout aware.

    Nc                    s   || _ t j|d dS )a  Create a new Optimizer.

        Args:
          name: String. The name of the optimizer, which will appear in all the
            state variables created by this optimizer.
          mesh: dtensor.Mesh. The optional Mesh which will be used to create
            the states. Note that usually the state variable will use the layout
            from the corresponding model variables. This mesh only used for
            global variables like globle steps, learning rate, etc.
        nameN)_meshsuper__init__)selfr   mesh	__class__ T/var/www/html/django/DPS/env/lib/python3.9/site-packages/keras/dtensor/optimizers.pyr   .   s    zOptimizer.__init__c                 C   sl   t jdt jd}| jr0t|tjj| jdd}t    tj	|dd| _
W d    n1 s^0    Y  d S )Nr   dtyperank	iterationr   )tfconstantint64r   dtensorcopy_to_meshLayout
replicated
init_scope	DVariable_iterations)r   init_valr   r   r   _create_iteration_variable?   s    
z$Optimizer._create_iteration_variablec                 C   sn   |du rt |}n*t|t jr>t|tjj| j|j	j
d}tj|| d|j |jdd}| j| |S )a4  Create an optimizer variable from model variable.

        Create an optimizer variable based on the information of model variable.
        For example, in SGD optimizer momemtum, for each model variable, a
        corresponding momemtum variable is created of the same shape and dtype.

        Args:
          model_variable: The corresponding model variable to the optimizer
            variable to be created.
          variable_name: The name prefix of the optimizer variable to be
            created.  The create variables name will follow the pattern
            `{variable_name}/{model_variable.name}`, e.g., `momemtum/dense_1`.
          initial_value: The initial value of the optimizer variable, if None,
            the value will be default to 0.

        Returns:
          An optimizer variable.
        Nr   /F)initial_valuer   r   	trainable)r   
zeros_like
isinstanceTensorr!   r"   r#   r$   r   shaper   r&   _shared_namer   
_variablesappend)r   model_variablevariable_namer+   variabler   r   r   add_variable_from_referenceK   s"    
z%Optimizer.add_variable_from_referencec                 C   s   t dd S )Nz4Dtensor doesn't need to manually aggregate gradients)NotImplementedErrorr   grads_and_varsr   r   r   aggregate_gradientst   s    zOptimizer.aggregate_gradientsc                 C   s   t j| |S )z.Get a unique identifier of the given variable.)optimizer_lib_BaseOptimizer_var_key)r   r6   r   r   r   r>   {   s    zOptimizer._var_keyc                 C   s   t j| | dS )zApply gradients to variables.

        Args:
          grads_and_vars: List of (gradient, variable) pairs.

        Returns:
          None

        Raises:
          TypeError: If `grads_and_vars` is malformed.
        N)r<   r=   apply_gradientsr9   r   r   r   r?      s    zOptimizer.apply_gradientsc                 C   s   t j| | dS )zHelper function of apply gradients.

        This is required for separating out distributed training logic.

        Args:
          grads_and_vars: List of (gradient, variable) pairs.
        N)r<   r=   _internal_apply_gradientsr9   r   r   r   r@      s    
z#Optimizer._internal_apply_gradientsc                 C   s   t j| | dS )zAHelper function to _overwrite_model_variables_with_average_value.N)r<   r=   4_overwrite_model_variables_with_average_value_helper)r   var_listr   r   r   rA      s    z>Optimizer._overwrite_model_variables_with_average_value_helperc                 C   sh   t |tjr*tj|| jdtjd| _|S tj	|tjd}| j
rZt|tjj| j
dd}tj|ddS )Nlearning_rate)r   r   r   r   r   r   )r.   r
   LearningRateScheduler!   r&   
iterationsr   float32_current_learning_rater   r   r"   r#   r$   )r   rC   r(   r   r   r   _build_learning_rate   s    zOptimizer._build_learning_rate)N)N)__name__
__module____qualname____doc__r   r)   r7   r   do_not_generate_docsr;   r>   r?   r@   rA   rH   __classcell__r   r   r   r   r   "   s    
)
r   z.keras.dtensor.experimental.optimizers.Adadelta)v1c                   @   s   e Zd ZdddZdS )AdadeltaMbP?ffffff?Hz>Nc                 C   s,   t j| ||d | || _|| _|| _d S Nr   r   )r   r   rH   _learning_raterhoepsilon)r   rC   rW   rX   gradients_clip_option
ema_optionr   r   r   r   r   r      s    zAdadelta.__init__)rQ   rR   rS   NNrP   NrI   rJ   rK   r   r   r   r   r   rP      s          rP   z-keras.dtensor.experimental.optimizers.Adagradc                   @   s   e Zd ZdddZdS )AdagradrQ   皙?rS   Nc                 C   s,   t j| ||d | || _|| _|| _d S rT   )r   r   rH   rV   initial_accumulator_valuerX   )r   rC   r^   rX   rY   rZ   r   r   r   r   r   r      s    
zAdagrad.__init__)rQ   r]   rS   NNr\   Nr[   r   r   r   r   r\      s          r\   z*keras.dtensor.experimental.optimizers.Adamc                	   @   s   e Zd Zd	ddZdS )
AdamrQ   ?+?rS   FNc
           
      C   s8   t j| ||	d | || _|| _|| _|| _|| _d S rT   )r   r   rH   rV   beta_1beta_2rX   amsgrad)
r   rC   rb   rc   rX   rd   rY   rZ   r   r   r   r   r   r      s    zAdam.__init__)	rQ   r`   ra   rS   FNNr_   Nr[   r   r   r   r   r_      s            r_   z+keras.dtensor.experimental.optimizers.AdamWc                   @   s   e Zd Zd
dd	ZdS )AdamWrQ   Mbp?r`   ra   rS   FNc	           	      C   sP   t j| ||d | || _|| _|| _|| _|| _|| _| jd u rLt	dd S )NrU   zLMissing value of `weight_decay` which is required and must be a float value.)
r   r   rH   rV   weight_decayrb   rc   rX   rd   
ValueError)	r   rC   rg   rb   rc   rX   rd   r   r   r   r   r   r      s    
zAdamW.__init__)rQ   rf   r`   ra   rS   Fre   Nr[   r   r   r   r   re      s           re   z-keras.dtensor.experimental.optimizers.RMSpropc                
   @   s   e Zd Zd	ddZdS )
RMSproprQ   r`           rS   FNc                 C   s8   t j| |	|
d | || _|| _|| _|| _|| _d S rT   )r   r   rH   rV   rW   momentumrX   centered)r   rC   rW   rk   rX   rl   rY   rZ   jit_compiler   r   r   r   r   r     s    zRMSprop.__init__)
rQ   r`   rj   rS   FNNFri   Nr[   r   r   r   r   ri     s             ri   z)keras.dtensor.experimental.optimizers.SGDc                	   @   s   e Zd ZdddZdS )SGD{Gz?rj   FNc
           
      C   sR   t j| ||	d | || _|| _|| _t|ttfrN|dk sF|dkrNt	dd S )NrU   r      z"`momentum` must be between [0, 1].)
r   r   rH   rV   rk   nesterovr.   intfloatrh   )
r   rC   rk   rq   rd   rY   rZ   rm   r   r   r   r   r   r   ,  s    zSGD.__init__)	ro   rj   FFNNFrn   Nr[   r   r   r   r   rn   *  s            rn   )rL   Ztensorflow.compat.v2compatv2r   keras.dtensorr   r!   'keras.optimizers.optimizer_experimentalr   r   r   r   r   r<   r   r	   keras.optimizers.schedulesr
    tensorflow.python.util.tf_exportr   tensorflow.tools.docsr   r=   r   rP   r\   r_   re   ri   rn   r   r   r   r   <module>   s@    





