a
    8Sic&                     @   s   d dl Z d dl mZ ddlmZ d dlmZmZ G dd deZdee ee ee ee ee	e	e	e	ed
d	d
Z
ee ee ee ee e	e	e	e	ed	ddZee ee ee ee e	e	e	e	ed	ddZdS )    N)Tensor   )	Optimizer)ListOptionalc                       sR   e Zd ZdZdddee ed	 fd
dZ fddZe	 dddZ
  ZS )Adadeltaa  Implements Adadelta algorithm.

    .. math::
       \begin{aligned}
            &\rule{110mm}{0.4pt}                                                                 \\
            &\textbf{input}      : \gamma \text{ (lr)}, \: \theta_0 \text{ (params)},
                \: f(\theta) \text{ (objective)}, \: \rho \text{ (decay)},
                \: \lambda \text{ (weight decay)}                                                \\
            &\textbf{initialize} :  v_0  \leftarrow 0 \: \text{ (square avg)},
                \: u_0 \leftarrow 0 \: \text{ (accumulate variables)}                     \\[-1.ex]
            &\rule{110mm}{0.4pt}                                                                 \\
            &\textbf{for} \: t=1 \: \textbf{to} \: \ldots \: \textbf{do}                         \\
            &\hspace{5mm}g_t           \leftarrow   \nabla_{\theta} f_t (\theta_{t-1})           \\
            &\hspace{5mm}if \: \lambda \neq 0                                                    \\
            &\hspace{10mm} g_t \leftarrow g_t + \lambda  \theta_{t-1}                            \\
            &\hspace{5mm} v_t      \leftarrow v_{t-1} \rho + g^2_t (1 - \rho)                    \\
            &\hspace{5mm}\Delta x_t    \leftarrow   \frac{\sqrt{u_{t-1} +
                \epsilon }}{ \sqrt{v_t + \epsilon}  }g_t \hspace{21mm}                           \\
            &\hspace{5mm} u_t  \leftarrow   u_{t-1}  \rho +
                 \Delta x^2_t  (1 - \rho)                                                        \\
            &\hspace{5mm}\theta_t      \leftarrow   \theta_{t-1} - \gamma  \Delta x_t            \\
            &\rule{110mm}{0.4pt}                                                          \\[-1.ex]
            &\bf{return} \:  \theta_t                                                     \\[-1.ex]
            &\rule{110mm}{0.4pt}                                                          \\[-1.ex]
       \end{aligned}

    For further details regarding the algorithm we refer to `ADADELTA: An Adaptive Learning Rate Method`_.

    Args:
        params (iterable): iterable of parameters to optimize or dicts defining
            parameter groups
        rho (float, optional): coefficient used for computing a running average
            of squared gradients (default: 0.9)
        eps (float, optional): term added to the denominator to improve
            numerical stability (default: 1e-6)
        lr (float, optional): coefficient that scale delta before it is applied
            to the parameters (default: 1.0)
        weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
        foreach (bool, optional): whether foreach implementation of optimizer is used (default: None)
        maximize (bool, optional): maximize the params based on the objective, instead of
            minimizing (default: False)

    .. _ADADELTA\: An Adaptive Learning Rate Method:
        https://arxiv.org/abs/1212.5701
          ??ư>r   NF)maximize)foreachr   c          	         s   d|kst d|d|  kr*dks:n t d|d|ksPt d|d|ksft d|t||||||d}tt| || d S )Ng        zInvalid learning rate: {}r   zInvalid rho value: {}zInvalid epsilon value: {}zInvalid weight_decay value: {})lrrhoepsweight_decayr   r   )
ValueErrorformatdictsuperr   __init__)	selfparamsr   r   r   r   r   r   defaults	__class__ P/var/www/html/django/DPS/env/lib/python3.9/site-packages/torch/optim/adadelta.pyr   7   s    
zAdadelta.__init__c                    s4   t  | | jD ]}|dd  |dd qd S )Nr   r   F)r   __setstate__param_groups
setdefault)r   stategroupr   r   r   r   F   s    
zAdadelta.__setstate__c                 C   s`  d}|dur:t   | }W d   n1 s00    Y  | jD ]}g }g }g }g }|d |d |d |d |d |d f\}}	}
}}}|d D ]}|jdu rq|| |jjrtd	||j | j| }t|d
krd
|d< t j	|t j
d|d< t j	|t j
d|d< ||d  ||d  |d  d7  < qt||||||	|
|||d
 q@|S )zPerforms a single optimization step.

        Args:
            closure (callable, optional): A closure that reevaluates the model
                and returns the loss.
        Nr   r   r   r   r   r   r   z*Adadelta does not support sparse gradientsr   step)memory_format
square_avg	acc_deltar   )r   r   r   r   r   r   )torchenable_gradr   gradappend	is_sparseRuntimeErrorr    len
zeros_likepreserve_formatadadelta)r   closurelossr!   Zparams_with_gradgradssquare_avgs
acc_deltasr   r   r   r   r   r   pr    r   r   r   r"   L   sV    
$


zAdadelta.step)r   r	   r
   r   N)N)__name__
__module____qualname____doc__r   boolr   r   r&   no_gradr"   __classcell__r   r   r   r   r      s   .  r   )
r   r2   r3   r4   r   r   r   r   r   r   c                C   sX   |du rd}|r"t j r"td|r6t j s6t}
nt}
|
| ||||||||	d	 dS )zvFunctional API that performs Adadelta algorithm computation.

    See :class:`~torch.optim.Adadelta` for details.
    NFz6torch.jit.script not supported with foreach optimizers)r   r   r   r   r   )r&   jitis_scriptingr+   _multi_tensor_adadelta_single_tensor_adadelta)r   r2   r3   r4   r   r   r   r   r   r   funcr   r   r   r/      s"    r/   )	r   r2   r3   r4   r   r   r   r   r   c                C   s   t | |||D ]\}	}
}}|s"|
n|
 }
|dkr>|
j|	|d}
t|	rft|}t|}t|
}
||j|
|
d| d || }|| ||
}||j||d| d t|	rt	|}|	j
|| d qd S Nr   )alphar   )value)zipaddr&   
is_complexview_as_realmul_addcmul_sqrt_div_view_as_complexadd_)r   r2   r3   r4   r   r   r   r   r   paramr(   r$   r%   stddeltar   r   r   r@      s    





r@   )	r   r2   r3   r4   r   r   r   r   r   c                C   s   t | dkrd S |rt|}|dkr6tj|| |d t|| tj|||d| d t||}	t|	 t||}
t|
 t|
|	 t|
| tj| |
| d t|| tj||
|
d| d d S rB   )	r,   r&   Z_foreach_negZ_foreach_add_Z_foreach_mul_Z_foreach_addcmul_Z_foreach_addZ_foreach_sqrt_Z_foreach_div_)r   r2   r3   r4   r   r   r   r   r   rP   Zdeltasr   r   r   r?      s"    


r?   )N)r&   r   	optimizerr   typingr   r   r   r:   floatr/   r@   r?   r   r   r   r   <module>   sJ     )