a
    8Sic                     @   s4   d dl Z ddlmZ ddlmZ G dd deZdS )    N   )_functional)	Optimizerc                       s4   e Zd ZdZd
 fdd	Ze ddd	Z  ZS )
SparseAdama  Implements lazy version of Adam algorithm suitable for sparse tensors.

    In this variant, only moments that show up in the gradient get updated, and
    only those portions of the gradient get applied to the parameters.

    Args:
        params (iterable): iterable of parameters to optimize or dicts defining
            parameter groups
        lr (float, optional): learning rate (default: 1e-3)
        betas (Tuple[float, float], optional): coefficients used for computing
            running averages of gradient and its square (default: (0.9, 0.999))
        eps (float, optional): term added to the denominator to improve
            numerical stability (default: 1e-8)

    .. _Adam\: A Method for Stochastic Optimization:
        https://arxiv.org/abs/1412.6980
    MbP?g?g+?:0yE>c                    s&  d|k st d|d|k s,t d|d|d   krDdk sXn t d|d d|d   krpdk sn t d|d t|}g }t|D ]R\}}t|trt|d	g D ]\}}	|	jr|||g qq|jr|| q|rt d
| dt|||d}
t	t
| ||
 d S )Ng        zInvalid learning rate: {}zInvalid epsilon value: {}r   g      ?z%Invalid beta parameter at index 0: {}r   z%Invalid beta parameter at index 1: {}paramszSparse params at indices z-: SparseAdam requires dense parameter tensors)lrbetaseps)
ValueErrorformatlist	enumerate
isinstancedictget	is_sparseappendsuperr   __init__)selfr	   r
   r   r   Zsparse_paramsindexparamZd_indexZd_paramdefaults	__class__ S/var/www/html/django/DPS/env/lib/python3.9/site-packages/torch/optim/sparse_adam.pyr      s.    

zSparseAdam.__init__Nc                 C   s^  d}|dur:t   | }W d   n1 s00    Y  | jD ]}g }g }g }g }g }|d }	|d }
|d \}}|d D ]}|jdur~|| |jjstd||j | j| }t|dkrd|d< t j	|t j
d	|d
< t j	|t j
d	|d< ||d
  ||d  |d  d7  < ||d  q~tj||||||||d |d d	 q@|S )zPerforms a single optimization step.

        Args:
            closure (callable, optional): A closure that reevaluates the model
                and returns the loss.
        Nr   r
   r   r	   zISparseAdam does not support dense gradients, please consider Adam insteadr   step)memory_formatexp_avg
exp_avg_sqr   )beta1beta2r
   r   )torchenable_gradparam_groupsgradr   r   RuntimeErrorstatelen
zeros_likepreserve_formatFsparse_adam)r   closurelossgroupparams_with_gradgradsexp_avgsexp_avg_sqsstate_stepsr   r
   r$   r%   pr+   r   r   r   r    5   sN    
$



zSparseAdam.step)r   r   r   )N)	__name__
__module____qualname____doc__r   r&   no_gradr    __classcell__r   r   r   r   r      s   r   )r&    r   r/   	optimizerr   r   r   r   r   r   <module>   s   