a
    Sic1                     @   sL   d Z ddlm  mZ ddlmZ ddlmZ edG dd dej	Z
dS )z'Ftrl-proximal optimizer implementation.    N)optimizer_v2)keras_exportzkeras.optimizers.Ftrlc                       sV   e Zd ZdZd fdd	Zdd	 Z fd
dZdddZdddZ fddZ	  Z
S )Ftrla  Optimizer that implements the FTRL algorithm.

    "Follow The Regularized Leader" (FTRL) is an optimization algorithm
    developed at Google for click-through rate prediction in the early 2010s. It
    is most suitable for shallow models with large and sparse feature spaces.
    The algorithm is described by
    [McMahan et al., 2013](https://research.google.com/pubs/archive/41159.pdf).
    The Keras version has support for both online L2 regularization
    (the L2 regularization described in the paper
    above) and shrinkage-type L2 regularization
    (which is the addition of an L2 penalty to the loss function).

    Initialization:

    ```python
    n = 0
    sigma = 0
    z = 0
    ```

    Update rule for one variable `w`:

    ```python
    prev_n = n
    n = n + g ** 2
    sigma = (sqrt(n) - sqrt(prev_n)) / lr
    z = z + g - sigma * w
    if abs(z) < lambda_1:
      w = 0
    else:
      w = (sgn(z) * lambda_1 - z) / ((beta + sqrt(n)) / alpha + lambda_2)
    ```

    Notation:

    - `lr` is the learning rate
    - `g` is the gradient for the variable
    - `lambda_1` is the L1 regularization strength
    - `lambda_2` is the L2 regularization strength

    Check the documentation for the `l2_shrinkage_regularization_strength`
    parameter for more details when shrinkage is enabled, in which case gradient
    is replaced with a gradient with shrinkage.

    Args:
      learning_rate: A `Tensor`, floating point value, or a schedule that is a
        `tf.keras.optimizers.schedules.LearningRateSchedule`. The learning rate.
      learning_rate_power: A float value, must be less or equal to zero.
        Controls how the learning rate decreases during training. Use zero for
        a fixed learning rate.
      initial_accumulator_value: The starting value for accumulators.
        Only zero or positive values are allowed.
      l1_regularization_strength: A float value, must be greater than or
        equal to zero. Defaults to 0.0.
      l2_regularization_strength: A float value, must be greater than or
        equal to zero. Defaults to 0.0.
      name: Optional name prefix for the operations created when applying
        gradients.  Defaults to `"Ftrl"`.
      l2_shrinkage_regularization_strength: A float value, must be greater than
        or equal to zero. This differs from L2 above in that the L2 above is a
        stabilization penalty, whereas this L2 shrinkage is a magnitude penalty.
        When input is sparse shrinkage will only happen on the active weights.
      beta: A float value, representing the beta value from the paper.
        Defaults to 0.0.
      **kwargs: keyword arguments. Allowed arguments are `clipvalue`,
        `clipnorm`, `global_clipnorm`.
        If `clipvalue` (float) is set, the gradient of each weight
        is clipped to be no higher than this value.
        If `clipnorm` (float) is set, the gradient of each weight
        is individually clipped so that its norm is no higher than this value.
        If `global_clipnorm` (float) is set the gradient of all weights is
        clipped so that their global norm is no higher than this value.

    Reference:
      - [McMahan et al., 2013](
        https://research.google.com/pubs/archive/41159.pdf)
    MbP?      皙?        c	           
         s   t  j|fi |	 |dk r,td| d|dkrDtd| d|dk r\td| d|dk rttd| d|dk rtd| d| d| | d	| j | d
| | d| | d| | d| || _|| _d S )Nr   z^`initial_accumulator_value` needs to be positive or zero. Received: initial_accumulator_value=.zR`learning_rate_power` needs to be negative or zero. Received: learning_rate_power=z``l1_regularization_strength` needs to be positive or zero. Received: l1_regularization_strength=z``l2_regularization_strength` needs to be positive or zero. Received: l2_regularization_strength=zt`l2_shrinkage_regularization_strength` needs to be positive or zero. Received: l2_shrinkage_regularization_strength=learning_ratedecaylearning_rate_powerl1_regularization_strengthl2_regularization_strengthbeta)super__init__
ValueError
_set_hyper_initial_decay_initial_accumulator_value%_l2_shrinkage_regularization_strength)
selfr
   r   initial_accumulator_valuer   r   name$l2_shrinkage_regularization_strengthr   kwargs	__class__ ^/var/www/html/django/DPS/env/lib/python3.9/site-packages/keras/optimizers/optimizer_v2/ftrl.pyr   j   sX    zFtrl.__init__c                 C   sD   |D ]:}|j j}tjjj| j|d}| |d| | |d qd S )N)dtypeaccumulatorlinear)r    
base_dtypetfcompatv1constant_initializerr   add_slot)r   var_listvarr    initr   r   r   _create_slots   s    zFtrl._create_slotsc                    sv   t  ||| |||f tt| d|t| d|t| d|t| d|t| j|d d S )Nr   r   r   r   )r   r   r   r   r   )	r   _prepare_localupdatedictr$   identity
_get_hypercastr   )r   
var_device	var_dtypeapply_stater   r   r   r-      s$    


zFtrl._prepare_localNc           
      C   s   |j |jj }}|pi ||fp,| ||}|d |d d|d    }| |d}| |d}	| jdkrtjj	|j
|j
|	j
||d |d ||d	 | jd
	S tjj|j
|j
|	j
||d |d ||d |d	 | jd
S d S )Nr   r          @lr_tr!   r"   r   r   r   )	r*   accumr"   gradlrl1l2lr_poweruse_lockingr   )
r*   r8   r"   r9   r:   r;   r<   l2_shrinkager=   r>   )devicer    r#   get_fallback_apply_stateget_slotr   r$   raw_opsResourceApplyFtrlhandle_use_lockingResourceApplyFtrlV2)
r   r9   r*   r5   r3   r4   coefficients#adjusted_l2_regularization_strengthr8   r"   r   r   r   _resource_apply_dense   sN    

zFtrl._resource_apply_densec                 C   s   |j |jj }}|pi ||fp,| ||}|d |d d|d    }| |d}	| |d}
| jdkrtjj	|j
|	j
|
j
|||d |d ||d	 | jd

S tjj|j
|	j
|
j
|||d |d ||d |d	 | jdS d S )Nr   r   r6   r7   r!   r"   r   r   r   )
r*   r8   r"   r9   indicesr:   r;   r<   r=   r>   r   )r*   r8   r"   r9   rL   r:   r;   r<   r?   r=   r>   )r@   r    r#   rA   rB   rC   r   r$   rD   ResourceSparseApplyFtrlrF   rG   ResourceSparseApplyFtrlV2)r   r9   r*   rL   r5   r3   r4   rI   rJ   r8   r"   r   r   r   _resource_apply_sparse   sR    

zFtrl._resource_apply_sparsec                    sN   t   }|| d| j| j| d| d| d| d| jd |S )Nr
   r   r   r   r   )r
   r   r   r   r   r   r   r   )r   
get_configr.   _serialize_hyperparameterr   r   r   )r   configr   r   r   rP     s*    
zFtrl.get_config)r   r   r   r   r   r   r   r   )N)N)__name__
__module____qualname____doc__r   r,   r-   rK   rO   rP   __classcell__r   r   r   r   r      s   P        <

+
-r   )rV   tensorflow.compat.v2r%   v2r$   keras.optimizers.optimizer_v2r    tensorflow.python.util.tf_exportr   OptimizerV2r   r   r   r   r   <module>   s
   