a
    Sicc{                 	   @   s  d Z ddlZddlZddlm  mZ ddlmZ ddl	m
Z
 ddlmZ ddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlmZ edG dd dZG dd deZedG dd deZedG dd deZedG dd deZedG dd deZ ed G d!d" d"eZ!ed#G d$d% d%eZ"ed&G d'd( d(eZ#ed)G d*d+ d+eZ$ed,G d-d. d.eZ%ed/G d0d1 d1eZ&ed2G d3d4 d4eZ'ed5G d6d7 d7eZ(ed8G d9d: d:eZ)ed;G d<d= d=eZ*ed>G d?d@ d@eZ+edAdBdCdDdEdFej,jj-dGdH Z.ddJdKZ/e0e.ej1dLdM Z2edNdOdPdQdRdSej,jj-dTdU Z3e0e3ej1dVdW Z4edXdYdZd[d\d]ej,jj-d^d_ Z5e0e5ej1d`da Z6edbdcdddedfdgej,jj-dhdi Z7e0e7ej1djdk Z8dldm Z9edndoej,jj-dpdq Z:edrdsej,jj-dtdu Z;edvej,jj-dwdx Z<edyg dzej,jj-dd|d}Z=ed~dddej,jj-dd Z>eddej,jj-dddZ?e0e?ej1dddZ@eddej,jj-dddZAe0eAej1dddZBeddej,jj-dddZCe0eCej1dddZDeddej,jj-dddZEe0eEej1dddZFeddddddddej,jj-dd ZGeddej,jj-dd ZHedg ddzej,jj-dddZIedG dd deZJeC ZKZLe. ZMZNe3 ZOZPe5 ZQZRe7 ZSZTeG ZU ZVZWe>ZXe=ZYdd ZZeddd Z[eddddZ\eddd Z]ejj^j_j`deAdiZadS )zBuilt-in loss functions.    N)backend)
saving_lib)generic_utils)losses_utils)tf_utils)deserialize_keras_objectserialize_keras_object)ragged_map_ops)ragged_util)dispatch)keras_export)doc_controlszkeras.losses.Lossc                   @   sd   e Zd ZdZejjdfddZdd ZdddZ	e
d	d
 Zdd Zejejdd Zdd ZdS )Lossar  Loss base class.

    To be implemented by subclasses:
    * `call()`: Contains the logic for loss calculation using `y_true`,
      `y_pred`.

    Example subclass implementation:

    ```python
    class MeanSquaredError(Loss):

      def call(self, y_true, y_pred):
        return tf.reduce_mean(tf.math.square(y_pred - y_true), axis=-1)
    ```

    When used with `tf.distribute.Strategy`, outside of built-in training loops
    such as `tf.keras` `compile` and `fit`, please use 'SUM' or 'NONE' reduction
    types, and reduce losses explicitly in your training loop. Using 'AUTO' or
    'SUM_OVER_BATCH_SIZE' will raise an error.

    Please see this custom training [tutorial](
      https://www.tensorflow.org/tutorials/distribute/custom_training) for more
    details on this.

    You can implement 'SUM_OVER_BATCH_SIZE' using global batch size like:

    ```python
    with strategy.scope():
      loss_obj = tf.keras.losses.CategoricalCrossentropy(
          reduction=tf.keras.losses.Reduction.NONE)
      ....
      loss = (tf.reduce_sum(loss_obj(labels, predictions)) *
              (1. / global_batch_size))
    ```
    Nc                 C   s*   t j| || _|| _d| _|   dS )a  Initializes `Loss` class.

        Args:
          reduction: Type of `tf.keras.losses.Reduction` to apply to
            loss. Default value is `AUTO`. `AUTO` indicates that the reduction
            option will be determined by the usage context. For almost all cases
            this defaults to `SUM_OVER_BATCH_SIZE`. When used with
            `tf.distribute.Strategy`, outside of built-in training loops such as
            `tf.keras` `compile` and `fit`, using `AUTO` or
            `SUM_OVER_BATCH_SIZE`
            will raise an error. Please see this custom training [tutorial](
              https://www.tensorflow.org/tutorials/distribute/custom_training)
              for more details.
          name: Optional name for the instance.
        FN)r   ReductionV2validate	reductionname_allow_sum_over_batch_size_set_name_scopeselfr   r    r   H/var/www/html/django/DPS/env/lib/python3.9/site-packages/keras/losses.py__init__N   s
    zLoss.__init__c                 C   s@   | j du r| jjd| _n | j dkr.d| _n| j d| _dS )z"Creates a valid `name_scope` name.N_z<lambda>lambda)r   	__class____name__strip_name_scoper   r   r   r   r   f   s
    

zLoss._set_name_scopec           	   	   C   s   t |||}t| j | t r2| j}ntjj	
| jtjj	 }|||}t|}|  }t||||}tj|||dW  d   W  d   S 1 s0    Y  W d   n1 s0    Y  dS )a|  Invokes the `Loss` instance.

        Args:
          y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`, except
            sparse loss functions such as sparse categorical crossentropy where
            shape = `[batch_size, d0, .. dN-1]`
          y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`
          sample_weight: Optional `sample_weight` acts as a coefficient for the
            loss. If a scalar is provided, then the loss is simply scaled by the
            given value. If `sample_weight` is a tensor of size `[batch_size]`,
            then the total loss for each sample of the batch is rescaled by the
            corresponding element in the `sample_weight` vector. If the shape of
            `sample_weight` is `[batch_size, d0, .. dN-1]` (or can be
            broadcasted to this shape), then each loss element of `y_pred` is
            scaled by the corresponding value of `sample_weight`. (Note
            on`dN-1`: all loss functions reduce by 1 dimension, usually
            axis=-1.)

        Returns:
          Weighted loss float `Tensor`. If `reduction` is `NONE`, this has
            shape `[batch_size, d0, .. dN-1]`; otherwise, it is scalar. (Note
            `dN-1` because all loss functions reduce by 1 dimension, usually
            axis=-1.)

        Raises:
          ValueError: If the shape of `sample_weight` is invalid.
        )r   N)r   "graph_context_for_symbolic_tensorsr   
name_scoper    tfexecuting_eagerlycall__internal__	autograph
tf_convertcontrol_status_ctxr   get_mask_get_reductionZapply_valid_maskcompute_weighted_loss)	r   y_truey_predsample_weight	graph_ctxcall_fnlossesmaskr   r   r   r   __call__p   s$    

zLoss.__call__c                 C   s   | f i |S )zInstantiates a `Loss` from its config (output of `get_config()`).

        Args:
            config: Output of `get_config()`.

        Returns:
            A `Loss` instance.
        r   )clsconfigr   r   r   from_config   s    
zLoss.from_configc                 C   s   | j | jdS )z4Returns the config dictionary for a `Loss` instance.r   r   r9   r!   r   r   r   
get_config   s    zLoss.get_configc                 C   s   t ddS )a  Invokes the `Loss` instance.

        Args:
          y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`, except
            sparse loss functions such as sparse categorical crossentropy where
            shape = `[batch_size, d0, .. dN-1]`
          y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`

        Returns:
          Loss values with the shape `[batch_size, d0, .. dN-1]`.
        z"Must be implemented in subclasses.N)NotImplementedError)r   r.   r/   r   r   r   r&      s    z	Loss.callc                 C   sP   | j s4tj r4| jtjjks,| jtjjkr4t	d| jtjjkrJtjjS | jS )z?Handles `AUTO` reduction cases and returns the reduction value.aQ  Please use `tf.keras.losses.Reduction.SUM` or `tf.keras.losses.Reduction.NONE` for loss reduction when losses are used with `tf.distribute.Strategy` outside of the built-in training loops. You can implement `tf.keras.losses.Reduction.SUM_OVER_BATCH_SIZE` using global batch size like:
```
with strategy.scope():
    loss_obj = tf.keras.losses.CategoricalCrossentropy(reduction=tf.keras.losses.Reduction.NONE)
....
    loss = tf.reduce_sum(loss_obj(labels, predictions)) * (1. / global_batch_size)
```
Please see https://www.tensorflow.org/tutorials/distribute/custom_training for more details.)
r   r$   
distributehas_strategyr   r   r   AUTOSUM_OVER_BATCH_SIZE
ValueErrorr!   r   r   r   r,      s     	zLoss._get_reduction)N)r   
__module____qualname____doc__r   r   r>   r   r   r5   classmethodr8   r:   abcabstractmethodr   for_subclass_implementersr&   r,   r   r   r   r   r   (   s   $

2
r   c                       sJ   e Zd ZdZejjdf fdd	Zdd Z fddZ	e
d	d
 Z  ZS )LossFunctionWrapperz*Wraps a loss function in the `Loss` class.Nc                    s    t  j||d || _|| _dS )a  Initializes `LossFunctionWrapper` class.

        Args:
          fn: The loss function to wrap, with signature `fn(y_true, y_pred,
            **kwargs)`.
          reduction: Type of `tf.keras.losses.Reduction` to apply to
            loss. Default value is `AUTO`. `AUTO` indicates that the reduction
            option will be determined by the usage context. For almost all cases
            this defaults to `SUM_OVER_BATCH_SIZE`. When used with
            `tf.distribute.Strategy`, outside of built-in training loops such as
            `tf.keras` `compile` and `fit`, using `AUTO` or
            `SUM_OVER_BATCH_SIZE` will raise an error. Please see this custom
            training [tutorial](
            https://www.tensorflow.org/tutorials/distribute/custom_training) for
            more details.
          name: Optional name for the instance.
          **kwargs: The keyword arguments that are passed on to `fn`.
        r9   N)superr   fn
_fn_kwargs)r   rJ   r   r   kwargsr   r   r   r      s    zLossFunctionWrapper.__init__c                 C   sR   t |r$t |r$t||\}}t jj| jt jj }|||fi | j	S )zInvokes the `LossFunctionWrapper` instance.

        Args:
          y_true: Ground truth values.
          y_pred: The predicted values.

        Returns:
          Loss values per sample.
        )
r$   	is_tensorr   squeeze_or_expand_dimensionsr'   r(   r)   rJ   r*   rK   )r   r.   r/   ag_fnr   r   r   r&      s    
zLossFunctionWrapper.callc                    sp   i }| j  D ]$\}}t|r*t|n|||< qtjrJt	| j
|d< t  }tt| t|  S )NrJ   )rK   itemsr   is_tensor_or_variabler   evalr   _ENABLEDr   get_registered_namerJ   rI   r:   dictlist)r   r7   kvbase_configrM   r   r   r:     s    
zLossFunctionWrapper.get_configc                 C   s8   t jr*|dd}|r*| tu r*t||d< | f i |S )zInstantiates a `Loss` from its config (output of `get_config()`).

        Args:
            config: Output of `get_config()`.

        Returns:
            A `keras.losses.Loss` instance.
        rJ   N)r   rT   poprH   get)r6   r7   fn_namer   r   r   r8     s
    
zLossFunctionWrapper.from_config)r   rA   rB   rC   r   r   r>   r   r&   r:   rD   r8   __classcell__r   r   rM   r   rH      s   rH   zkeras.losses.MeanSquaredErrorc                       s*   e Zd ZdZejjdf fdd	Z  ZS )MeanSquaredErrora  Computes the mean of squares of errors between labels and predictions.

    `loss = square(y_true - y_pred)`

    Standalone usage:

    >>> y_true = [[0., 1.], [0., 0.]]
    >>> y_pred = [[1., 1.], [1., 0.]]
    >>> # Using 'auto'/'sum_over_batch_size' reduction type.
    >>> mse = tf.keras.losses.MeanSquaredError()
    >>> mse(y_true, y_pred).numpy()
    0.5

    >>> # Calling with 'sample_weight'.
    >>> mse(y_true, y_pred, sample_weight=[0.7, 0.3]).numpy()
    0.25

    >>> # Using 'sum' reduction type.
    >>> mse = tf.keras.losses.MeanSquaredError(
    ...     reduction=tf.keras.losses.Reduction.SUM)
    >>> mse(y_true, y_pred).numpy()
    1.0

    >>> # Using 'none' reduction type.
    >>> mse = tf.keras.losses.MeanSquaredError(
    ...     reduction=tf.keras.losses.Reduction.NONE)
    >>> mse(y_true, y_pred).numpy()
    array([0.5, 0.5], dtype=float32)

    Usage with the `compile()` API:

    ```python
    model.compile(optimizer='sgd', loss=tf.keras.losses.MeanSquaredError())
    ```
    mean_squared_errorc                    s   t  jt||d dS )a-  Initializes `MeanSquaredError` instance.

        Args:
          reduction: Type of `tf.keras.losses.Reduction` to apply to
            loss. Default value is `AUTO`. `AUTO` indicates that the reduction
            option will be determined by the usage context. For almost all cases
            this defaults to `SUM_OVER_BATCH_SIZE`. When used with
            `tf.distribute.Strategy`, outside of built-in training loops such as
            `tf.keras` `compile` and `fit`, using `AUTO` or
            `SUM_OVER_BATCH_SIZE` will raise an error. Please see this custom
            training [tutorial](
            https://www.tensorflow.org/tutorials/distribute/custom_training) for
            more details.
          name: Optional name for the instance. Defaults to
            'mean_squared_error'.
        r   r   N)rI   r   r`   r   rM   r   r   r   V  s    zMeanSquaredError.__init__	r   rA   rB   rC   r   r   r>   r   r^   r   r   rM   r   r_   0  s   %r_   zkeras.losses.MeanAbsoluteErrorc                       s*   e Zd ZdZejjdf fdd	Z  ZS )MeanAbsoluteErrora  Computes the mean of absolute difference between labels and predictions.

    `loss = abs(y_true - y_pred)`

    Standalone usage:

    >>> y_true = [[0., 1.], [0., 0.]]
    >>> y_pred = [[1., 1.], [1., 0.]]
    >>> # Using 'auto'/'sum_over_batch_size' reduction type.
    >>> mae = tf.keras.losses.MeanAbsoluteError()
    >>> mae(y_true, y_pred).numpy()
    0.5

    >>> # Calling with 'sample_weight'.
    >>> mae(y_true, y_pred, sample_weight=[0.7, 0.3]).numpy()
    0.25

    >>> # Using 'sum' reduction type.
    >>> mae = tf.keras.losses.MeanAbsoluteError(
    ...     reduction=tf.keras.losses.Reduction.SUM)
    >>> mae(y_true, y_pred).numpy()
    1.0

    >>> # Using 'none' reduction type.
    >>> mae = tf.keras.losses.MeanAbsoluteError(
    ...     reduction=tf.keras.losses.Reduction.NONE)
    >>> mae(y_true, y_pred).numpy()
    array([0.5, 0.5], dtype=float32)

    Usage with the `compile()` API:

    ```python
    model.compile(optimizer='sgd', loss=tf.keras.losses.MeanAbsoluteError())
    ```
    mean_absolute_errorc                    s   t  jt||d dS )a/  Initializes `MeanAbsoluteError` instance.

        Args:
          reduction: Type of `tf.keras.losses.Reduction` to apply to
            loss. Default value is `AUTO`. `AUTO` indicates that the reduction
            option will be determined by the usage context. For almost all cases
            this defaults to `SUM_OVER_BATCH_SIZE`. When used with
            `tf.distribute.Strategy`, outside of built-in training loops such as
            `tf.keras` `compile` and `fit`, using `AUTO` or
            `SUM_OVER_BATCH_SIZE` will raise an error. Please see this custom
            training [tutorial](
            https://www.tensorflow.org/tutorials/distribute/custom_training) for
            more details.
          name: Optional name for the instance. Defaults to
            'mean_absolute_error'.
        ra   N)rI   r   rd   r   rM   r   r   r     s    zMeanAbsoluteError.__init__rb   r   r   rM   r   rc   l  s   &rc   z(keras.losses.MeanAbsolutePercentageErrorc                       s*   e Zd ZdZejjdf fdd	Z  ZS )MeanAbsolutePercentageErrora  Computes the mean absolute percentage error between `y_true` and `y_pred`.

    Formula:

    `loss = 100 * abs((y_true - y_pred) / y_true)`

    Note that to avoid dividing by zero, a small epsilon value
    is added to the denominator.

    Standalone usage:

    >>> y_true = [[2., 1.], [2., 3.]]
    >>> y_pred = [[1., 1.], [1., 0.]]
    >>> # Using 'auto'/'sum_over_batch_size' reduction type.
    >>> mape = tf.keras.losses.MeanAbsolutePercentageError()
    >>> mape(y_true, y_pred).numpy()
    50.

    >>> # Calling with 'sample_weight'.
    >>> mape(y_true, y_pred, sample_weight=[0.7, 0.3]).numpy()
    20.

    >>> # Using 'sum' reduction type.
    >>> mape = tf.keras.losses.MeanAbsolutePercentageError(
    ...     reduction=tf.keras.losses.Reduction.SUM)
    >>> mape(y_true, y_pred).numpy()
    100.

    >>> # Using 'none' reduction type.
    >>> mape = tf.keras.losses.MeanAbsolutePercentageError(
    ...     reduction=tf.keras.losses.Reduction.NONE)
    >>> mape(y_true, y_pred).numpy()
    array([25., 75.], dtype=float32)

    Usage with the `compile()` API:

    ```python
    model.compile(optimizer='sgd',
                  loss=tf.keras.losses.MeanAbsolutePercentageError())
    ```
    mean_absolute_percentage_errorc                    s   t  jt||d dS )aD  Initializes `MeanAbsolutePercentageError` instance.

        Args:
          reduction: Type of `tf.keras.losses.Reduction` to apply to
            loss. Default value is `AUTO`. `AUTO` indicates that the reduction
            option will be determined by the usage context. For almost all cases
            this defaults to `SUM_OVER_BATCH_SIZE`. When used with
            `tf.distribute.Strategy`, outside of built-in training loops such as
            `tf.keras` `compile` and `fit`, using `AUTO` or
            `SUM_OVER_BATCH_SIZE` will raise an error. Please see this custom
            training [tutorial](
            https://www.tensorflow.org/tutorials/distribute/custom_training) for
            more details.
          name: Optional name for the instance. Defaults to
            'mean_absolute_percentage_error'.
        ra   N)rI   r   rf   r   rM   r   r   r     s    z$MeanAbsolutePercentageError.__init__rb   r   r   rM   r   re     s   ,re   z(keras.losses.MeanSquaredLogarithmicErrorc                       s*   e Zd ZdZejjdf fdd	Z  ZS )MeanSquaredLogarithmicErrora\  Computes the mean squared logarithmic error between `y_true` and `y_pred`.

    `loss = square(log(y_true + 1.) - log(y_pred + 1.))`

    Standalone usage:

    >>> y_true = [[0., 1.], [0., 0.]]
    >>> y_pred = [[1., 1.], [1., 0.]]
    >>> # Using 'auto'/'sum_over_batch_size' reduction type.
    >>> msle = tf.keras.losses.MeanSquaredLogarithmicError()
    >>> msle(y_true, y_pred).numpy()
    0.240

    >>> # Calling with 'sample_weight'.
    >>> msle(y_true, y_pred, sample_weight=[0.7, 0.3]).numpy()
    0.120

    >>> # Using 'sum' reduction type.
    >>> msle = tf.keras.losses.MeanSquaredLogarithmicError(
    ...     reduction=tf.keras.losses.Reduction.SUM)
    >>> msle(y_true, y_pred).numpy()
    0.480

    >>> # Using 'none' reduction type.
    >>> msle = tf.keras.losses.MeanSquaredLogarithmicError(
    ...     reduction=tf.keras.losses.Reduction.NONE)
    >>> msle(y_true, y_pred).numpy()
    array([0.240, 0.240], dtype=float32)

    Usage with the `compile()` API:

    ```python
    model.compile(optimizer='sgd',
                  loss=tf.keras.losses.MeanSquaredLogarithmicError())
    ```
    mean_squared_logarithmic_errorc                    s   t  jt||d dS )aD  Initializes `MeanSquaredLogarithmicError` instance.

        Args:
          reduction: Type of `tf.keras.losses.Reduction` to apply to
            loss. Default value is `AUTO`. `AUTO` indicates that the reduction
            option will be determined by the usage context. For almost all cases
            this defaults to `SUM_OVER_BATCH_SIZE`. When used with
            `tf.distribute.Strategy`, outside of built-in training loops such as
            `tf.keras` `compile` and `fit`, using `AUTO` or
            `SUM_OVER_BATCH_SIZE` will raise an error. Please see this custom
            training [tutorial](
            https://www.tensorflow.org/tutorials/distribute/custom_training) for
            more details.
          name: Optional name for the instance. Defaults to
            'mean_squared_logarithmic_error'.
        ra   N)rI   r   rh   r   rM   r   r   r     s    z$MeanSquaredLogarithmicError.__init__rb   r   r   rM   r   rg     s   'rg   zkeras.losses.BinaryCrossentropyc                       s0   e Zd ZdZdddejjdf fdd	Z  ZS )BinaryCrossentropya	  Computes the cross-entropy loss between true labels and predicted labels.

    Use this cross-entropy loss for binary (0 or 1) classification applications.
    The loss function requires the following inputs:

    - `y_true` (true label): This is either 0 or 1.
    - `y_pred` (predicted value): This is the model's prediction, i.e, a single
      floating-point value which either represents a
      [logit](https://en.wikipedia.org/wiki/Logit), (i.e, value in [-inf, inf]
      when `from_logits=True`) or a probability (i.e, value in [0., 1.] when
      `from_logits=False`).

    **Recommended Usage:** (set `from_logits=True`)

    With `tf.keras` API:

    ```python
    model.compile(
      loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
      ....
    )
    ```

    As a standalone function:

    >>> # Example 1: (batch_size = 1, number of samples = 4)
    >>> y_true = [0, 1, 0, 0]
    >>> y_pred = [-18.6, 0.51, 2.94, -12.8]
    >>> bce = tf.keras.losses.BinaryCrossentropy(from_logits=True)
    >>> bce(y_true, y_pred).numpy()
    0.865

    >>> # Example 2: (batch_size = 2, number of samples = 4)
    >>> y_true = [[0, 1], [0, 0]]
    >>> y_pred = [[-18.6, 0.51], [2.94, -12.8]]
    >>> # Using default 'auto'/'sum_over_batch_size' reduction type.
    >>> bce = tf.keras.losses.BinaryCrossentropy(from_logits=True)
    >>> bce(y_true, y_pred).numpy()
    0.865
    >>> # Using 'sample_weight' attribute
    >>> bce(y_true, y_pred, sample_weight=[0.8, 0.2]).numpy()
    0.243
    >>> # Using 'sum' reduction` type.
    >>> bce = tf.keras.losses.BinaryCrossentropy(from_logits=True,
    ...     reduction=tf.keras.losses.Reduction.SUM)
    >>> bce(y_true, y_pred).numpy()
    1.730
    >>> # Using 'none' reduction type.
    >>> bce = tf.keras.losses.BinaryCrossentropy(from_logits=True,
    ...     reduction=tf.keras.losses.Reduction.NONE)
    >>> bce(y_true, y_pred).numpy()
    array([0.235, 1.496], dtype=float32)

    **Default Usage:** (set `from_logits=False`)

    >>> # Make the following updates to the above "Recommended Usage" section
    >>> # 1. Set `from_logits=False`
    >>> tf.keras.losses.BinaryCrossentropy() # OR ...('from_logits=False')
    >>> # 2. Update `y_pred` to use probabilities instead of logits
    >>> y_pred = [0.6, 0.3, 0.2, 0.8] # OR [[0.6, 0.3], [0.2, 0.8]]
    F        binary_crossentropyc                    s"   t  jt|||||d || _dS )a  Initializes `BinaryCrossentropy` instance.

        Args:
          from_logits: Whether to interpret `y_pred` as a tensor of
            [logit](https://en.wikipedia.org/wiki/Logit) values. By default, we
            assume that `y_pred` contains probabilities (i.e., values in [0,
            1]).
          label_smoothing: Float in [0, 1]. When 0, no smoothing occurs. When >
            0, we compute the loss between the predicted labels and a smoothed
            version of the true labels, where the smoothing squeezes the labels
            towards 0.5.  Larger values of `label_smoothing` correspond to
            heavier smoothing.
          axis: The axis along which to compute crossentropy (the features
            axis).  Defaults to -1.
          reduction: Type of `tf.keras.losses.Reduction` to apply to
            loss. Default value is `AUTO`. `AUTO` indicates that the reduction
            option will be determined by the usage context. For almost all cases
            this defaults to `SUM_OVER_BATCH_SIZE`. When used with
            `tf.distribute.Strategy`, outside of built-in training loops such as
            `tf.keras` `compile` and `fit`, using `AUTO` or
            `SUM_OVER_BATCH_SIZE` will raise an error. Please see this custom
            training [tutorial](
            https://www.tensorflow.org/tutorials/distribute/custom_training) for
            more details.
          name: Name for the op. Defaults to 'binary_crossentropy'.
        r   r   from_logitslabel_smoothingaxisN)rI   r   rl   rn   r   rn   ro   rp   r   r   rM   r   r   r   q  s    "zBinaryCrossentropy.__init__rb   r   r   rM   r   ri   1  s   @ri   z$keras.losses.BinaryFocalCrossentropyc                       sB   e Zd ZdZddddddejjdf fdd		Z fd
dZ  Z	S )BinaryFocalCrossentropya  Computes the focal cross-entropy loss between true labels and predictions.

    Binary cross-entropy loss is often used for binary (0 or 1) classification
    tasks. The loss function requires the following inputs:

    - `y_true` (true label): This is either 0 or 1.
    - `y_pred` (predicted value): This is the model's prediction, i.e, a single
      floating-point value which either represents a
      [logit](https://en.wikipedia.org/wiki/Logit), (i.e, value in [-inf, inf]
      when `from_logits=True`) or a probability (i.e, value in `[0., 1.]` when
      `from_logits=False`).

    According to [Lin et al., 2018](https://arxiv.org/pdf/1708.02002.pdf), it
    helps to apply a "focal factor" to down-weight easy examples and focus more
    on hard examples. By default, the focal tensor is computed as follows:

    `focal_factor = (1 - output) ** gamma` for class 1
    `focal_factor = output ** gamma` for class 0
    where `gamma` is a focusing parameter. When `gamma=0`, this function is
    equivalent to the binary crossentropy loss.

    With the `compile()` API:

    ```python
    model.compile(
      loss=tf.keras.losses.BinaryFocalCrossentropy(gamma=2.0, from_logits=True),
      ....
    )
    ```

    As a standalone function:

    >>> # Example 1: (batch_size = 1, number of samples = 4)
    >>> y_true = [0, 1, 0, 0]
    >>> y_pred = [-18.6, 0.51, 2.94, -12.8]
    >>> loss = tf.keras.losses.BinaryFocalCrossentropy(gamma=2,
    ...                                                from_logits=True)
    >>> loss(y_true, y_pred).numpy()
    0.691

    >>> # Apply class weight
    >>> loss = tf.keras.losses.BinaryFocalCrossentropy(
    ...     apply_class_balancing=True, gamma=2, from_logits=True)
    >>> loss(y_true, y_pred).numpy()
    0.51

    >>> # Example 2: (batch_size = 2, number of samples = 4)
    >>> y_true = [[0, 1], [0, 0]]
    >>> y_pred = [[-18.6, 0.51], [2.94, -12.8]]
    >>> # Using default 'auto'/'sum_over_batch_size' reduction type.
    >>> loss = tf.keras.losses.BinaryFocalCrossentropy(gamma=3,
    ...                                                from_logits=True)
    >>> loss(y_true, y_pred).numpy()
    0.647

    >>> # Apply class weight
    >>> loss = tf.keras.losses.BinaryFocalCrossentropy(
    ...     apply_class_balancing=True, gamma=3, from_logits=True)
    >>> loss(y_true, y_pred).numpy()
    0.482

    >>> # Using 'sample_weight' attribute with focal effect
    >>> loss = tf.keras.losses.BinaryFocalCrossentropy(gamma=3,
    ...                                                from_logits=True)
    >>> loss(y_true, y_pred, sample_weight=[0.8, 0.2]).numpy()
    0.133

    >>> # Apply class weight
    >>> loss = tf.keras.losses.BinaryFocalCrossentropy(
    ...     apply_class_balancing=True, gamma=3, from_logits=True)
    >>> loss(y_true, y_pred, sample_weight=[0.8, 0.2]).numpy()
    0.097

    >>> # Using 'sum' reduction` type.
    >>> loss = tf.keras.losses.BinaryFocalCrossentropy(gamma=4,
    ...                                                from_logits=True,
    ...     reduction=tf.keras.losses.Reduction.SUM)
    >>> loss(y_true, y_pred).numpy()
    1.222

    >>> # Apply class weight
    >>> loss = tf.keras.losses.BinaryFocalCrossentropy(
    ...     apply_class_balancing=True, gamma=4, from_logits=True,
    ...     reduction=tf.keras.losses.Reduction.SUM)
    >>> loss(y_true, y_pred).numpy()
    0.914

    >>> # Using 'none' reduction type.
    >>> loss = tf.keras.losses.BinaryFocalCrossentropy(
    ...     gamma=5, from_logits=True,
    ...     reduction=tf.keras.losses.Reduction.NONE)
    >>> loss(y_true, y_pred).numpy()
    array([0.0017 1.1561], dtype=float32)

    >>> # Apply class weight
    >>> loss = tf.keras.losses.BinaryFocalCrossentropy(
    ...     apply_class_balancing=True, gamma=5, from_logits=True,
    ...     reduction=tf.keras.losses.Reduction.NONE)
    >>> loss(y_true, y_pred).numpy()
    array([0.0004 0.8670], dtype=float32)


    Args:
      apply_class_balancing: A bool, whether to apply weight balancing on the
        binary classes 0 and 1.
      alpha: A weight balancing factor for class 1, default is `0.25` as
        mentioned in reference [Lin et al., 2018](
        https://arxiv.org/pdf/1708.02002.pdf).  The weight for class 0 is
        `1.0 - alpha`.
      gamma: A focusing parameter used to compute the focal factor, default is
        `2.0` as mentioned in the reference
        [Lin et al., 2018](https://arxiv.org/pdf/1708.02002.pdf).
      from_logits: Whether to interpret `y_pred` as a tensor of
        [logit](https://en.wikipedia.org/wiki/Logit) values. By default, we
        assume that `y_pred` are probabilities (i.e., values in `[0, 1]`).
      label_smoothing: Float in `[0, 1]`. When `0`, no smoothing occurs. When >
        `0`, we compute the loss between the predicted labels and a smoothed
        version of the true labels, where the smoothing squeezes the labels
        towards `0.5`. Larger values of `label_smoothing` correspond to heavier
        smoothing.
      axis: The axis along which to compute crossentropy (the features axis).
        Defaults to `-1`.
      reduction: Type of `tf.keras.losses.Reduction` to apply to
        loss. Default value is `AUTO`. `AUTO` indicates that the reduction
        option will be determined by the usage context. For almost all cases
        this defaults to `SUM_OVER_BATCH_SIZE`. When used with
        `tf.distribute.Strategy`, outside of built-in training loops such as
        `tf.keras`, `compile()` and `fit()`, using `SUM_OVER_BATCH_SIZE` or
        `AUTO` will raise an error. Please see this custom training [tutorial](
        https://www.tensorflow.org/tutorials/distribute/custom_training) for
        more details.
      name: Name for the op. Defaults to 'binary_focal_crossentropy'.
    F      ?       @rj   rk   binary_focal_crossentropyc	           	         s:   t  jt||||||||d	 || _|| _|| _|| _dS )z/Initializes `BinaryFocalCrossentropy` instance.)apply_class_balancingalphagammar   r   rn   ro   rp   N)rI   r   ru   rn   rv   rw   rx   )	r   rv   rw   rx   rn   ro   rp   r   r   rM   r   r   r   &  s    z BinaryFocalCrossentropy.__init__c                    s8   | j | j| jd}t  }tt| t|  S )N)rv   rw   rx   )rv   rw   rx   rI   r:   rV   rW   rQ   )r   r7   rZ   rM   r   r   r:   B  s    
z"BinaryFocalCrossentropy.get_config)
r   rA   rB   rC   r   r   r>   r   r:   r^   r   r   rM   r   rr     s    	rr   z$keras.losses.CategoricalCrossentropyc                       s0   e Zd ZdZdddejjdf fdd	Z  ZS )CategoricalCrossentropya  Computes the crossentropy loss between the labels and predictions.

    Use this crossentropy loss function when there are two or more label
    classes. We expect labels to be provided in a `one_hot` representation. If
    you want to provide labels as integers, please use
    `SparseCategoricalCrossentropy` loss.  There should be `# classes` floating
    point values per feature.

    In the snippet below, there is `# classes` floating pointing values per
    example. The shape of both `y_pred` and `y_true` are
    `[batch_size, num_classes]`.

    Standalone usage:

    >>> y_true = [[0, 1, 0], [0, 0, 1]]
    >>> y_pred = [[0.05, 0.95, 0], [0.1, 0.8, 0.1]]
    >>> # Using 'auto'/'sum_over_batch_size' reduction type.
    >>> cce = tf.keras.losses.CategoricalCrossentropy()
    >>> cce(y_true, y_pred).numpy()
    1.177

    >>> # Calling with 'sample_weight'.
    >>> cce(y_true, y_pred, sample_weight=tf.constant([0.3, 0.7])).numpy()
    0.814

    >>> # Using 'sum' reduction type.
    >>> cce = tf.keras.losses.CategoricalCrossentropy(
    ...     reduction=tf.keras.losses.Reduction.SUM)
    >>> cce(y_true, y_pred).numpy()
    2.354

    >>> # Using 'none' reduction type.
    >>> cce = tf.keras.losses.CategoricalCrossentropy(
    ...     reduction=tf.keras.losses.Reduction.NONE)
    >>> cce(y_true, y_pred).numpy()
    array([0.0513, 2.303], dtype=float32)

    Usage with the `compile()` API:

    ```python
    model.compile(optimizer='sgd',
                  loss=tf.keras.losses.CategoricalCrossentropy())
    ```
    Frj   rk   categorical_crossentropyc                    s   t  jt|||||d dS )af  Initializes `CategoricalCrossentropy` instance.

        Args:
          from_logits: Whether `y_pred` is expected to be a logits tensor. By
            default, we assume that `y_pred` encodes a probability distribution.
          label_smoothing: Float in [0, 1]. When > 0, label values are smoothed,
            meaning the confidence on label values are relaxed. For example, if
            `0.1`, use `0.1 / num_classes` for non-target labels and
            `0.9 + 0.1 / num_classes` for target labels.
          axis: The axis along which to compute crossentropy (the features
            axis). Defaults to -1.
          reduction: Type of `tf.keras.losses.Reduction` to apply to
            loss. Default value is `AUTO`. `AUTO` indicates that the reduction
            option will be determined by the usage context. For almost all cases
            this defaults to `SUM_OVER_BATCH_SIZE`. When used with
            `tf.distribute.Strategy`, outside of built-in training loops such as
            `tf.keras` `compile` and `fit`, using `AUTO` or
            `SUM_OVER_BATCH_SIZE` will raise an error. Please see this custom
            training [tutorial](
            https://www.tensorflow.org/tutorials/distribute/custom_training) for
            more details.
          name: Optional name for the instance.
            Defaults to 'categorical_crossentropy'.
        rm   N)rI   r   rz   rq   rM   r   r   r   {  s     z CategoricalCrossentropy.__init__rb   r   r   rM   r   ry   L  s   /ry   z*keras.losses.SparseCategoricalCrossentropyc                       s.   e Zd ZdZddejjdf fdd	Z  ZS )SparseCategoricalCrossentropya  Computes the crossentropy loss between the labels and predictions.

    Use this crossentropy loss function when there are two or more label
    classes.  We expect labels to be provided as integers. If you want to
    provide labels using `one-hot` representation, please use
    `CategoricalCrossentropy` loss.  There should be `# classes` floating point
    values per feature for `y_pred` and a single floating point value per
    feature for `y_true`.

    In the snippet below, there is a single floating point value per example for
    `y_true` and `# classes` floating pointing values per example for `y_pred`.
    The shape of `y_true` is `[batch_size]` and the shape of `y_pred` is
    `[batch_size, num_classes]`.

    Standalone usage:

    >>> y_true = [1, 2]
    >>> y_pred = [[0.05, 0.95, 0], [0.1, 0.8, 0.1]]
    >>> # Using 'auto'/'sum_over_batch_size' reduction type.
    >>> scce = tf.keras.losses.SparseCategoricalCrossentropy()
    >>> scce(y_true, y_pred).numpy()
    1.177

    >>> # Calling with 'sample_weight'.
    >>> scce(y_true, y_pred, sample_weight=tf.constant([0.3, 0.7])).numpy()
    0.814

    >>> # Using 'sum' reduction type.
    >>> scce = tf.keras.losses.SparseCategoricalCrossentropy(
    ...     reduction=tf.keras.losses.Reduction.SUM)
    >>> scce(y_true, y_pred).numpy()
    2.354

    >>> # Using 'none' reduction type.
    >>> scce = tf.keras.losses.SparseCategoricalCrossentropy(
    ...     reduction=tf.keras.losses.Reduction.NONE)
    >>> scce(y_true, y_pred).numpy()
    array([0.0513, 2.303], dtype=float32)

    Usage with the `compile()` API:

    ```python
    model.compile(optimizer='sgd',
                  loss=tf.keras.losses.SparseCategoricalCrossentropy())
    ```
    FNsparse_categorical_crossentropyc                    s   t  jt||||d dS )a1  Initializes `SparseCategoricalCrossentropy` instance.

        Args:
          from_logits: Whether `y_pred` is expected to be a logits tensor. By
            default, we assume that `y_pred` encodes a probability distribution.
          ignore_class: Optional integer. The ID of a class to be ignored during
            loss computation. This is useful, for example, in segmentation
            problems featuring a "void" class (commonly -1 or 255) in
            segmentation maps.
            By default (`ignore_class=None`), all classes are considered.
          reduction: Type of `tf.keras.losses.Reduction` to apply to
            loss. Default value is `AUTO`. `AUTO` indicates that the reduction
            option will be determined by the usage context. For almost all cases
            this defaults to `SUM_OVER_BATCH_SIZE`. When used with
            `tf.distribute.Strategy`, outside of built-in training loops such as
            `tf.keras` `compile` and `fit`, using `AUTO` or
            `SUM_OVER_BATCH_SIZE` will raise an error. Please see this custom
            training [tutorial](
            https://www.tensorflow.org/tutorials/distribute/custom_training) for
            more details.
          name: Optional name for the instance. Defaults to
            'sparse_categorical_crossentropy'.
        )r   r   rn   ignore_classN)rI   r   r|   )r   rn   r}   r   r   rM   r   r   r     s    z&SparseCategoricalCrossentropy.__init__rb   r   r   rM   r   r{     s   1r{   zkeras.losses.Hingec                       s*   e Zd ZdZejjdf fdd	Z  ZS )Hingea1  Computes the hinge loss between `y_true` and `y_pred`.

    `loss = maximum(1 - y_true * y_pred, 0)`

    `y_true` values are expected to be -1 or 1. If binary (0 or 1) labels are
    provided we will convert them to -1 or 1.

    Standalone usage:

    >>> y_true = [[0., 1.], [0., 0.]]
    >>> y_pred = [[0.6, 0.4], [0.4, 0.6]]
    >>> # Using 'auto'/'sum_over_batch_size' reduction type.
    >>> h = tf.keras.losses.Hinge()
    >>> h(y_true, y_pred).numpy()
    1.3

    >>> # Calling with 'sample_weight'.
    >>> h(y_true, y_pred, sample_weight=[1, 0]).numpy()
    0.55

    >>> # Using 'sum' reduction type.
    >>> h = tf.keras.losses.Hinge(
    ...     reduction=tf.keras.losses.Reduction.SUM)
    >>> h(y_true, y_pred).numpy()
    2.6

    >>> # Using 'none' reduction type.
    >>> h = tf.keras.losses.Hinge(
    ...     reduction=tf.keras.losses.Reduction.NONE)
    >>> h(y_true, y_pred).numpy()
    array([1.1, 1.5], dtype=float32)

    Usage with the `compile()` API:

    ```python
    model.compile(optimizer='sgd', loss=tf.keras.losses.Hinge())
    ```
    hingec                    s   t  jt||d dS )a	  Initializes `Hinge` instance.

        Args:
          reduction: Type of `tf.keras.losses.Reduction` to apply to
            loss. Default value is `AUTO`. `AUTO` indicates that the reduction
            option will be determined by the usage context. For almost all cases
            this defaults to `SUM_OVER_BATCH_SIZE`. When used with
            `tf.distribute.Strategy`, outside of built-in training loops such as
            `tf.keras` `compile` and `fit`, using `AUTO` or
            `SUM_OVER_BATCH_SIZE` will raise an error. Please see this custom
            training [tutorial](
            https://www.tensorflow.org/tutorials/distribute/custom_training) for
            more details.
          name: Optional name for the instance. Defaults to 'hinge'.
        ra   N)rI   r   r   r   rM   r   r   r   &  s    zHinge.__init__rb   r   r   rM   r   r~     s   'r~   zkeras.losses.SquaredHingec                       s*   e Zd ZdZejjdf fdd	Z  ZS )SquaredHingeaa  Computes the squared hinge loss between `y_true` and `y_pred`.

    `loss = square(maximum(1 - y_true * y_pred, 0))`

    `y_true` values are expected to be -1 or 1. If binary (0 or 1) labels are
    provided we will convert them to -1 or 1.

    Standalone usage:

    >>> y_true = [[0., 1.], [0., 0.]]
    >>> y_pred = [[0.6, 0.4], [0.4, 0.6]]
    >>> # Using 'auto'/'sum_over_batch_size' reduction type.
    >>> h = tf.keras.losses.SquaredHinge()
    >>> h(y_true, y_pred).numpy()
    1.86

    >>> # Calling with 'sample_weight'.
    >>> h(y_true, y_pred, sample_weight=[1, 0]).numpy()
    0.73

    >>> # Using 'sum' reduction type.
    >>> h = tf.keras.losses.SquaredHinge(
    ...     reduction=tf.keras.losses.Reduction.SUM)
    >>> h(y_true, y_pred).numpy()
    3.72

    >>> # Using 'none' reduction type.
    >>> h = tf.keras.losses.SquaredHinge(
    ...     reduction=tf.keras.losses.Reduction.NONE)
    >>> h(y_true, y_pred).numpy()
    array([1.46, 2.26], dtype=float32)

    Usage with the `compile()` API:

    ```python
    model.compile(optimizer='sgd', loss=tf.keras.losses.SquaredHinge())
    ```
    squared_hingec                    s   t  jt||d dS )a  Initializes `SquaredHinge` instance.

        Args:
          reduction: Type of `tf.keras.losses.Reduction` to apply to
            loss. Default value is `AUTO`. `AUTO` indicates that the reduction
            option will be determined by the usage context. For almost all cases
            this defaults to `SUM_OVER_BATCH_SIZE`. When used with
            `tf.distribute.Strategy`, outside of built-in training loops such as
            `tf.keras` `compile` and `fit`, using `AUTO` or
            `SUM_OVER_BATCH_SIZE` will raise an error. Please see this custom
            training [tutorial](
            https://www.tensorflow.org/tutorials/distribute/custom_training) for
            more details.
          name: Optional name for the instance. Defaults to 'squared_hinge'.
        ra   N)rI   r   r   r   rM   r   r   r   b  s    zSquaredHinge.__init__rb   r   r   rM   r   r   9  s   (r   zkeras.losses.CategoricalHingec                       s*   e Zd ZdZejjdf fdd	Z  ZS )CategoricalHingea'  Computes the categorical hinge loss between `y_true` and `y_pred`.

    `loss = maximum(neg - pos + 1, 0)`
    where `neg=maximum((1-y_true)*y_pred) and pos=sum(y_true*y_pred)`

    Standalone usage:

    >>> y_true = [[0, 1], [0, 0]]
    >>> y_pred = [[0.6, 0.4], [0.4, 0.6]]
    >>> # Using 'auto'/'sum_over_batch_size' reduction type.
    >>> h = tf.keras.losses.CategoricalHinge()
    >>> h(y_true, y_pred).numpy()
    1.4

    >>> # Calling with 'sample_weight'.
    >>> h(y_true, y_pred, sample_weight=[1, 0]).numpy()
    0.6

    >>> # Using 'sum' reduction type.
    >>> h = tf.keras.losses.CategoricalHinge(
    ...     reduction=tf.keras.losses.Reduction.SUM)
    >>> h(y_true, y_pred).numpy()
    2.8

    >>> # Using 'none' reduction type.
    >>> h = tf.keras.losses.CategoricalHinge(
    ...     reduction=tf.keras.losses.Reduction.NONE)
    >>> h(y_true, y_pred).numpy()
    array([1.2, 1.6], dtype=float32)

    Usage with the `compile()` API:

    ```python
    model.compile(optimizer='sgd', loss=tf.keras.losses.CategoricalHinge())
    ```
    categorical_hingec                    s   t  jt||d dS )a   Initializes `CategoricalHinge` instance.

        Args:
          reduction: Type of `tf.keras.losses.Reduction` to apply to
            loss. Default value is `AUTO`. `AUTO` indicates that the reduction
            option will be determined by the usage context. For almost all cases
            this defaults to `SUM_OVER_BATCH_SIZE`. When used with
            `tf.distribute.Strategy`, outside of built-in training loops such as
            `tf.keras` `compile` and `fit`, using `AUTO` or
            `SUM_OVER_BATCH_SIZE` will raise an error. Please see this custom
            training [tutorial](
            https://www.tensorflow.org/tutorials/distribute/custom_training) for
            more details.
          name: Optional name for the instance. Defaults to 'categorical_hinge'.
        ra   N)rI   r   r   r   rM   r   r   r     s    zCategoricalHinge.__init__rb   r   r   rM   r   r   w  s   &r   zkeras.losses.Poissonc                       s*   e Zd ZdZejjdf fdd	Z  ZS )Poissona  Computes the Poisson loss between `y_true` and `y_pred`.

    `loss = y_pred - y_true * log(y_pred)`

    Standalone usage:

    >>> y_true = [[0., 1.], [0., 0.]]
    >>> y_pred = [[1., 1.], [0., 0.]]
    >>> # Using 'auto'/'sum_over_batch_size' reduction type.
    >>> p = tf.keras.losses.Poisson()
    >>> p(y_true, y_pred).numpy()
    0.5

    >>> # Calling with 'sample_weight'.
    >>> p(y_true, y_pred, sample_weight=[0.8, 0.2]).numpy()
    0.4

    >>> # Using 'sum' reduction type.
    >>> p = tf.keras.losses.Poisson(
    ...     reduction=tf.keras.losses.Reduction.SUM)
    >>> p(y_true, y_pred).numpy()
    0.999

    >>> # Using 'none' reduction type.
    >>> p = tf.keras.losses.Poisson(
    ...     reduction=tf.keras.losses.Reduction.NONE)
    >>> p(y_true, y_pred).numpy()
    array([0.999, 0.], dtype=float32)

    Usage with the `compile()` API:

    ```python
    model.compile(optimizer='sgd', loss=tf.keras.losses.Poisson())
    ```
    poissonc                    s   t  jt||d dS )a  Initializes `Poisson` instance.

        Args:
          reduction: Type of `tf.keras.losses.Reduction` to apply to
            loss. Default value is `AUTO`. `AUTO` indicates that the reduction
            option will be determined by the usage context. For almost all cases
            this defaults to `SUM_OVER_BATCH_SIZE`. When used with
            `tf.distribute.Strategy`, outside of built-in training loops such as
            `tf.keras` `compile` and `fit`, using `AUTO` or
            `SUM_OVER_BATCH_SIZE` will raise an error. Please see this custom
            training [tutorial](
            https://www.tensorflow.org/tutorials/distribute/custom_training) for
            more details.
          name: Optional name for the instance. Defaults to 'poisson'.
        ra   N)rI   r   r   r   rM   r   r   r     s    zPoisson.__init__rb   r   r   rM   r   r     s   $r   zkeras.losses.LogCoshc                       s*   e Zd ZdZejjdf fdd	Z  ZS )LogCosha  Computes the logarithm of the hyperbolic cosine of the prediction error.

    `logcosh = log((exp(x) + exp(-x))/2)`,
    where x is the error `y_pred - y_true`.

    Standalone usage:

    >>> y_true = [[0., 1.], [0., 0.]]
    >>> y_pred = [[1., 1.], [0., 0.]]
    >>> # Using 'auto'/'sum_over_batch_size' reduction type.
    >>> l = tf.keras.losses.LogCosh()
    >>> l(y_true, y_pred).numpy()
    0.108

    >>> # Calling with 'sample_weight'.
    >>> l(y_true, y_pred, sample_weight=[0.8, 0.2]).numpy()
    0.087

    >>> # Using 'sum' reduction type.
    >>> l = tf.keras.losses.LogCosh(
    ...     reduction=tf.keras.losses.Reduction.SUM)
    >>> l(y_true, y_pred).numpy()
    0.217

    >>> # Using 'none' reduction type.
    >>> l = tf.keras.losses.LogCosh(
    ...     reduction=tf.keras.losses.Reduction.NONE)
    >>> l(y_true, y_pred).numpy()
    array([0.217, 0.], dtype=float32)

    Usage with the `compile()` API:

    ```python
    model.compile(optimizer='sgd', loss=tf.keras.losses.LogCosh())
    ```
    log_coshc                    s   t  jt||d dS )a  Initializes `LogCosh` instance.

        Args:
          reduction: Type of `tf.keras.losses.Reduction` to apply to
            loss. Default value is `AUTO`. `AUTO` indicates that the reduction
            option will be determined by the usage context. For almost all cases
            this defaults to `SUM_OVER_BATCH_SIZE`. When used with
            `tf.distribute.Strategy`, outside of built-in training loops such as
            `tf.keras` `compile` and `fit`, using `AUTO` or
            `SUM_OVER_BATCH_SIZE` will raise an error. Please see this custom
            training [tutorial](
            https://www.tensorflow.org/tutorials/distribute/custom_training) for
            more details.
          name: Optional name for the instance. Defaults to 'log_cosh'.
        ra   N)rI   r   r   r   rM   r   r   r     s    zLogCosh.__init__rb   r   r   rM   r   r     s   &r   zkeras.losses.KLDivergencec                       s*   e Zd ZdZejjdf fdd	Z  ZS )KLDivergencea@  Computes Kullback-Leibler divergence loss between `y_true` and `y_pred`.

    `loss = y_true * log(y_true / y_pred)`

    See: https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence

    Standalone usage:

    >>> y_true = [[0, 1], [0, 0]]
    >>> y_pred = [[0.6, 0.4], [0.4, 0.6]]
    >>> # Using 'auto'/'sum_over_batch_size' reduction type.
    >>> kl = tf.keras.losses.KLDivergence()
    >>> kl(y_true, y_pred).numpy()
    0.458

    >>> # Calling with 'sample_weight'.
    >>> kl(y_true, y_pred, sample_weight=[0.8, 0.2]).numpy()
    0.366

    >>> # Using 'sum' reduction type.
    >>> kl = tf.keras.losses.KLDivergence(
    ...     reduction=tf.keras.losses.Reduction.SUM)
    >>> kl(y_true, y_pred).numpy()
    0.916

    >>> # Using 'none' reduction type.
    >>> kl = tf.keras.losses.KLDivergence(
    ...     reduction=tf.keras.losses.Reduction.NONE)
    >>> kl(y_true, y_pred).numpy()
    array([0.916, -3.08e-06], dtype=float32)

    Usage with the `compile()` API:

    ```python
    model.compile(optimizer='sgd', loss=tf.keras.losses.KLDivergence())
    ```
    kl_divergencec                    s   t  jt||d dS )a  Initializes `KLDivergence` instance.

        Args:
          reduction: Type of `tf.keras.losses.Reduction` to apply to
            loss. Default value is `AUTO`. `AUTO` indicates that the reduction
            option will be determined by the usage context. For almost all cases
            this defaults to `SUM_OVER_BATCH_SIZE`. When used with
            `tf.distribute.Strategy`, outside of built-in training loops such as
            `tf.keras` `compile` and `fit`, using `AUTO` or
            `SUM_OVER_BATCH_SIZE` will raise an error. Please see this custom
            training [tutorial](
            https://www.tensorflow.org/tutorials/distribute/custom_training) for
            more details.
          name: Optional name for the instance. Defaults to 'kl_divergence'.
        ra   N)rI   r   r   r   rM   r   r   r   P  s    zKLDivergence.__init__rb   r   r   rM   r   r   (  s   'r   zkeras.losses.Huberc                       s,   e Zd ZdZdejjdf fdd	Z  ZS )Huberau  Computes the Huber loss between `y_true` and `y_pred`.

    For each value x in `error = y_true - y_pred`:

    ```
    loss = 0.5 * x^2                  if |x| <= d
    loss = 0.5 * d^2 + d * (|x| - d)  if |x| > d
    ```
    where d is `delta`. See: https://en.wikipedia.org/wiki/Huber_loss

    Standalone usage:

    >>> y_true = [[0, 1], [0, 0]]
    >>> y_pred = [[0.6, 0.4], [0.4, 0.6]]
    >>> # Using 'auto'/'sum_over_batch_size' reduction type.
    >>> h = tf.keras.losses.Huber()
    >>> h(y_true, y_pred).numpy()
    0.155

    >>> # Calling with 'sample_weight'.
    >>> h(y_true, y_pred, sample_weight=[1, 0]).numpy()
    0.09

    >>> # Using 'sum' reduction type.
    >>> h = tf.keras.losses.Huber(
    ...     reduction=tf.keras.losses.Reduction.SUM)
    >>> h(y_true, y_pred).numpy()
    0.31

    >>> # Using 'none' reduction type.
    >>> h = tf.keras.losses.Huber(
    ...     reduction=tf.keras.losses.Reduction.NONE)
    >>> h(y_true, y_pred).numpy()
    array([0.18, 0.13], dtype=float32)

    Usage with the `compile()` API:

    ```python
    model.compile(optimizer='sgd', loss=tf.keras.losses.Huber())
    ```
          ?
huber_lossc                    s   t  jt|||d dS )a  Initializes `Huber` instance.

        Args:
          delta: A float, the point where the Huber loss function changes from a
            quadratic to linear.
          reduction: Type of `tf.keras.losses.Reduction` to apply to
            loss. Default value is `AUTO`. `AUTO` indicates that the reduction
            option will be determined by the usage context. For almost all cases
            this defaults to `SUM_OVER_BATCH_SIZE`. When used with
            `tf.distribute.Strategy`, outside of built-in training loops such as
            `tf.keras` `compile` and `fit`, using `AUTO` or
            `SUM_OVER_BATCH_SIZE` will raise an error. Please see this custom
            training [tutorial](
            https://www.tensorflow.org/tutorials/distribute/custom_training) for
            more details.
          name: Optional name for the instance. Defaults to 'huber_loss'.
        )r   r   deltaN)rI   r   huber)r   r   r   r   rM   r   r   r     s    zHuber.__init__rb   r   r   rM   r   r   e  s
   ,r   z keras.metrics.mean_squared_errorzkeras.metrics.msezkeras.metrics.MSEzkeras.losses.mean_squared_errorzkeras.losses.msezkeras.losses.MSEc                 C   s0   t |}t | |j} tjt j|| ddS )a#  Computes the mean squared error between labels and predictions.

    After computing the squared distance between the inputs, the mean value over
    the last dimension is returned.

    `loss = mean(square(y_true - y_pred), axis=-1)`

    Standalone usage:

    >>> y_true = np.random.randint(0, 2, size=(2, 3))
    >>> y_pred = np.random.random(size=(2, 3))
    >>> loss = tf.keras.losses.mean_squared_error(y_true, y_pred)
    >>> assert loss.shape == (2,)
    >>> assert np.array_equal(
    ...     loss.numpy(), np.mean(np.square(y_true - y_pred), axis=-1))

    Args:
      y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`.
      y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.

    Returns:
      Mean squared error values. shape = `[batch_size, d0, .. dN-1]`.
    rk   rp   )r$   convert_to_tensorcastdtyper   meanmathsquared_differencer.   r/   r   r   r   r`     s    !
r`   Fc                    s8  dd dd fdd  fdd}t |tjsH|| S |j d	d
 }t|dkrxtj||jd}ntj	g |jd}dd ||fD }|rdd |D }|d |d	 d	 kr|d	 dd
 |d	< t
j|t|d	kd}	t|}
t|
$ tj|	||f|dW  d   S 1 s*0    Y  dS )a  Apply a loss function on a per batch basis.

    Args:
      loss_fn: The loss function
      y_true: truth values (RaggedTensor)
      y_pred: predicted values (RaggedTensor)
      y_pred_extra_dim: whether y_pred has an additional dimension compared to
        y_true

    Returns:
      Loss-function result. A dense tensor if the output has a single dimension
      (per-batch loss value); a ragged tensor otherwise.
    c                 S   s   t dd |  D S )zReturns true if this RaggedTensor has the same row_lenghts across

           all ragged dimensions and thus can be converted to a dense tensor
           without loss of information.

        Args:
          rt: RaggedTensor.
        c                 S   s4   g | ],}t t jt |t t d gqS )rj   )r$   equalr   reduce_variancer   r   floatxconstant).0row_lensr   r   r   
<listcomp>  s   
zH_ragged_tensor_apply_loss.<locals>.rt_is_equiv_dense.<locals>.<listcomp>)r$   
reduce_allnested_row_lengths)rtr   r   r   rt_is_equiv_dense  s
    	z4_ragged_tensor_apply_loss.<locals>.rt_is_equiv_densec                 S   s   t dd | D S )Nc                 s   s&   | ]}t |tjr| n|V  qd S N)
isinstancer$   RaggedTensor	to_tensorr   r   r   r   r   	<genexpr>  s   zG_ragged_tensor_apply_loss.<locals>._convert_to_dense.<locals>.<genexpr>)tuple)inputsr   r   r   _convert_to_dense  s    z4_ragged_tensor_apply_loss.<locals>._convert_to_densec                    sB    |  }|r&t |tjs&tj|}n|s>t |tjr>| }|S )zAdapt the result to ragged or dense tensor according to the expected

        output type. This is done so that all the return values of the map
        operation have the same type.
        )r   r$   r   from_tensorr   )r   ragged_outputr)loss_fnr   r   
_call_loss  s    z-_ragged_tensor_apply_loss.<locals>._call_lossc                    sH    \}}t |tjr@t| fdd fddS   S )Nc                      s    S r   r   r   )r   r   r   r   r   r   <lambda>      z=_ragged_tensor_apply_loss.<locals>._wrapper.<locals>.<lambda>c                      s
    S r   r   r   )r   r   r   r   r   r     r   )r   r$   r   cond)r   r   r   r/   r   r   r   r   )r   r   r   _wrapper  s    z+_ragged_tensor_apply_loss.<locals>._wrapper   rk   r   )shaper   c                 S   s   g | ]
}|j qS r   )nested_row_splitsr   r   r   r   r     r   z-_ragged_tensor_apply_loss.<locals>.<listcomp>c                 S   s   g | ]}t |qS r   )len)r   slistr   r   r   r     r   N)r   )elemsr   )r   r$   r   r   r   as_listr   RaggedTensorSpecr   
TensorSpec	functoolspartialr   assert_splits_matchcontrol_dependenciesr
   map_fn)r   r.   r/   y_pred_extra_dimr   lshapespecnested_splits_listrdimsr   assertion_listr   r   r   _ragged_tensor_apply_loss  s&    
r   c                 C   s   t t| |S )a  Implements support for handling RaggedTensors.

    Args:
      y_true: RaggedTensor truth values. shape = `[batch_size, d0, .. dN]`.
      y_pred: RaggedTensor predicted values. shape = `[batch_size, d0, .. dN]`.

    Returns:
      Mean squared error values. shape = `[batch_size, d0, .. dN-1]`.
      When the number of dimensions of the batch feature vector [d0, .. dN] is
      greater than one the return value is a RaggedTensor. Otherwise a Dense
      tensor with dimensions [batch_size] is returned.
    )r   r`   r   r   r   r   _ragged_tensor_mse*  s    r   z!keras.metrics.mean_absolute_errorzkeras.metrics.maezkeras.metrics.MAEz keras.losses.mean_absolute_errorzkeras.losses.maezkeras.losses.MAEc                 C   s0   t |}t | |j} tjt ||  ddS )a  Computes the mean absolute error between labels and predictions.

    `loss = mean(abs(y_true - y_pred), axis=-1)`

    Standalone usage:

    >>> y_true = np.random.randint(0, 2, size=(2, 3))
    >>> y_pred = np.random.random(size=(2, 3))
    >>> loss = tf.keras.losses.mean_absolute_error(y_true, y_pred)
    >>> assert loss.shape == (2,)
    >>> assert np.array_equal(
    ...     loss.numpy(), np.mean(np.abs(y_true - y_pred), axis=-1))

    Args:
      y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`.
      y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.

    Returns:
      Mean absolute error values. shape = `[batch_size, d0, .. dN-1]`.
    rk   r   )r$   r   r   r   r   r   absr   r   r   r   rd   ;  s    
rd   c                 C   s   t t| |S )z-RaggedTensor adapter for mean_absolute_error.)r   rd   r   r   r   r   _ragged_tensor_mae^  s    r   z,keras.metrics.mean_absolute_percentage_errorzkeras.metrics.mapezkeras.metrics.MAPEz+keras.losses.mean_absolute_percentage_errorzkeras.losses.mapezkeras.losses.MAPEc                 C   sN   t |}t | |j} t | | tt | t  }dtj|dd S )a>  Computes the mean absolute percentage error between `y_true` and `y_pred`.

    `loss = 100 * mean(abs((y_true - y_pred) / y_true), axis=-1)`

    Standalone usage:

    >>> y_true = np.random.random(size=(2, 3))
    >>> y_true = np.maximum(y_true, 1e-7)  # Prevent division by zero
    >>> y_pred = np.random.random(size=(2, 3))
    >>> loss = tf.keras.losses.mean_absolute_percentage_error(y_true, y_pred)
    >>> assert loss.shape == (2,)
    >>> assert np.array_equal(
    ...     loss.numpy(),
    ...     100. * np.mean(np.abs((y_true - y_pred) / y_true), axis=-1))

    Args:
      y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`.
      y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.

    Returns:
      Mean absolute percentage error values. shape = `[batch_size, d0, ..
      dN-1]`.
    g      Y@rk   r   )	r$   r   r   r   r   r   maximumepsilonr   )r.   r/   diffr   r   r   rf   d  s    !
rf   c                 C   s   t t| |S )zSupport RaggedTensors.)r   rf   r   r   r   r   _ragged_tensor_mape  s    r   z,keras.metrics.mean_squared_logarithmic_errorzkeras.metrics.mslezkeras.metrics.MSLEz+keras.losses.mean_squared_logarithmic_errorzkeras.losses.mslezkeras.losses.MSLEc                 C   sh   t |}t | |j} t jt|t d }t jt| t d }tj	t j
||ddS )ap  Computes the mean squared logarithmic error between `y_true` and `y_pred`.

    `loss = mean(square(log(y_true + 1) - log(y_pred + 1)), axis=-1)`

    Standalone usage:

    >>> y_true = np.random.randint(0, 2, size=(2, 3))
    >>> y_pred = np.random.random(size=(2, 3))
    >>> loss = tf.keras.losses.mean_squared_logarithmic_error(y_true, y_pred)
    >>> assert loss.shape == (2,)
    >>> y_true = np.maximum(y_true, 1e-7)
    >>> y_pred = np.maximum(y_pred, 1e-7)
    >>> assert np.allclose(
    ...     loss.numpy(),
    ...     np.mean(
    ...         np.square(np.log(y_true + 1.) - np.log(y_pred + 1.)), axis=-1))

    Args:
      y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`.
      y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.

    Returns:
      Mean squared logarithmic error values. shape = `[batch_size, d0, ..
      dN-1]`.
    r   rk   r   )r$   r   r   r   r   logr   r   r   r   r   )r.   r/   	first_log
second_logr   r   r   rh     s    #
rh   c                 C   s   t t| |S )z.Implements support for handling RaggedTensors.)r   rh   r   r   r   r   _ragged_tensor_msle  s    r   c                    sT   t  d}t  d}t t ||} fdd}t jj|| fdd}|S )z!Converts binary labels into -1/1.r   r   c                      s   d  d S )Nrt   r   r   r   r.   r   r   _convert_binary_labels  s    z5_maybe_convert_labels.<locals>._convert_binary_labelsc                      s    S r   r   r   r   r   r   r     r   z'_maybe_convert_labels.<locals>.<lambda>)r$   r   r   
logical_orr'   
smart_cond)r.   	are_zerosare_ones	is_binaryr   updated_y_truer   r   r   _maybe_convert_labels  s    r   zkeras.metrics.squared_hingezkeras.losses.squared_hingec                 C   sD   t |}t | |j} t| } tjt t d| |  dddS )ac  Computes the squared hinge loss between `y_true` and `y_pred`.

    `loss = mean(square(maximum(1 - y_true * y_pred, 0)), axis=-1)`

    Standalone usage:

    >>> y_true = np.random.choice([-1, 1], size=(2, 3))
    >>> y_pred = np.random.random(size=(2, 3))
    >>> loss = tf.keras.losses.squared_hinge(y_true, y_pred)
    >>> assert loss.shape == (2,)
    >>> assert np.array_equal(
    ...     loss.numpy(),
    ...     np.mean(np.square(np.maximum(1. - y_true * y_pred, 0.)), axis=-1))

    Args:
      y_true: The ground truth values. `y_true` values are expected to be -1 or
        1. If binary (0 or 1) labels are provided we will convert them to -1 or
        1. shape = `[batch_size, d0, .. dN]`.
      y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.

    Returns:
       Squared hinge loss values. shape = `[batch_size, d0, .. dN-1]`.
    r   rj   rk   r   )	r$   r   r   r   r   r   r   squarer   r   r   r   r   r     s    
r   zkeras.metrics.hingezkeras.losses.hingec                 C   s>   t |}t | |j} t| } tjt d| |  dddS )a9  Computes the hinge loss between `y_true` and `y_pred`.

    `loss = mean(maximum(1 - y_true * y_pred, 0), axis=-1)`

    Standalone usage:

    >>> y_true = np.random.choice([-1, 1], size=(2, 3))
    >>> y_pred = np.random.random(size=(2, 3))
    >>> loss = tf.keras.losses.hinge(y_true, y_pred)
    >>> assert loss.shape == (2,)
    >>> assert np.array_equal(
    ...     loss.numpy(),
    ...     np.mean(np.maximum(1. - y_true * y_pred, 0.), axis=-1))

    Args:
      y_true: The ground truth values. `y_true` values are expected to be -1 or
        1. If binary (0 or 1) labels are provided they will be converted to -1
        or 1. shape = `[batch_size, d0, .. dN]`.
      y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.

    Returns:
      Hinge loss values. shape = `[batch_size, d0, .. dN-1]`.
    r   rj   rk   r   )r$   r   r   r   r   r   r   r   r   r   r   r   r     s    
r   zkeras.losses.categorical_hingec                 C   sb   t |}t | |j} t j| | dd}t jd|  | dd}t d|j}t || d |S )a|  Computes the categorical hinge loss between `y_true` and `y_pred`.

    `loss = maximum(neg - pos + 1, 0)`
    where `neg=maximum((1-y_true)*y_pred) and pos=sum(y_true*y_pred)`

    Standalone usage:

    >>> y_true = np.random.randint(0, 3, size=(2,))
    >>> y_true = tf.keras.utils.to_categorical(y_true, num_classes=3)
    >>> y_pred = np.random.random(size=(2, 3))
    >>> loss = tf.keras.losses.categorical_hinge(y_true, y_pred)
    >>> assert loss.shape == (2,)
    >>> pos = np.sum(y_true * y_pred, axis=-1)
    >>> neg = np.amax((1. - y_true) * y_pred, axis=-1)
    >>> assert np.array_equal(loss.numpy(), np.maximum(0., neg - pos + 1.))

    Args:
      y_true: The ground truth values. `y_true` values are expected to be
      either `{-1, +1}` or `{0, 1}` (i.e. a one-hot-encoded tensor).
      y_pred: The predicted values.

    Returns:
      Categorical hinge loss values.
    rk   r   r   rj   )r$   r   r   r   
reduce_sum
reduce_maxr   )r.   r/   posnegzeror   r   r   r     s    
r   zkeras.losses.huber)v1r   c              
   C   s   t j|t d}t j| t d} t j|t d}t || }t |}t jd|jd}tjt 	||k|t 
| || |t 
|  ddS )a  Computes Huber loss value.

    For each value x in `error = y_true - y_pred`:

    ```
    loss = 0.5 * x^2                  if |x| <= d
    loss = d * |x| - 0.5 * d^2        if |x| > d
    ```
    where d is `delta`. See: https://en.wikipedia.org/wiki/Huber_loss

    Args:
      y_true: tensor of true targets.
      y_pred: tensor of predicted targets.
      delta: A float, the point where the Huber loss function changes from a
        quadratic to linear.

    Returns:
      Tensor with one scalar loss entry per sample.
    r         ?rk   r   )r$   r   r   r   subtractr   r   r   r   wherer   )r.   r/   r   error	abs_errorhalfr   r   r   r   >  s    
r   zkeras.losses.log_coshzkeras.losses.logcoshzkeras.metrics.log_coshzkeras.metrics.logcoshc                 C   s6   t |}t | |j} dd }tj|||  ddS )a  Logarithm of the hyperbolic cosine of the prediction error.

    `log(cosh(x))` is approximately equal to `(x ** 2) / 2` for small `x` and
    to `abs(x) - log(2)` for large `x`. This means that 'logcosh' works mostly
    like the mean squared error, but will not be so strongly affected by the
    occasional wildly incorrect prediction.

    Standalone usage:

    >>> y_true = np.random.random(size=(2, 3))
    >>> y_pred = np.random.random(size=(2, 3))
    >>> loss = tf.keras.losses.logcosh(y_true, y_pred)
    >>> assert loss.shape == (2,)
    >>> x = y_pred - y_true
    >>> assert np.allclose(
    ...     loss.numpy(),
    ...     np.mean(x + np.log(np.exp(-2. * x) + 1.) - tf.math.log(2.),
    ...             axis=-1),
    ...     atol=1e-5)

    Args:
      y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`.
      y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.

    Returns:
      Logcosh error values. shape = `[batch_size, d0, .. dN-1]`.
    c                 S   s*   | t jd|   t t jd| j S )Ng       rt   )r$   r   softplusr   r   r   )xr   r   r   _logcosh  s    (zlog_cosh.<locals>._logcoshrk   r   )r$   r   r   r   r   r   )r.   r/   r   r   r   r   r   d  s    #
r   z&keras.metrics.categorical_crossentropyz%keras.losses.categorical_crossentropyrj   rk   c                    s   t |tr"td| dt| ttjtj jd  fdd}tjj		 |fddt
j||dS )	a  Computes the categorical crossentropy loss.

    Standalone usage:

    >>> y_true = [[0, 1, 0], [0, 0, 1]]
    >>> y_pred = [[0.05, 0.95, 0], [0.1, 0.8, 0.1]]
    >>> loss = tf.keras.losses.categorical_crossentropy(y_true, y_pred)
    >>> assert loss.shape == (2,)
    >>> loss.numpy()
    array([0.0513, 2.303], dtype=float32)

    Args:
      y_true: Tensor of one-hot true targets.
      y_pred: Tensor of predicted targets.
      from_logits: Whether `y_pred` is expected to be a logits tensor. By
        default, we assume that `y_pred` encodes a probability distribution.
      label_smoothing: Float in [0, 1]. If > `0` then smooth the labels. For
        example, if `0.1`, use `0.1 / num_classes` for non-target labels
        and `0.9 + 0.1 / num_classes` for target labels.
      axis: Defaults to -1. The dimension along which the entropy is
        computed.

    Returns:
      Categorical crossentropy loss value.
    z-`axis` must be of type `int`. Received: axis=z	 of type r   c                     s,   t t d j} d    |   S )Nrk   r   )r$   r   r   r   )num_classesro   r/   r.   r   r   _smooth_labels  s    
z0categorical_crossentropy.<locals>._smooth_labelsc                      s    S r   r   r   r   r   r   r     r   z*categorical_crossentropy.<locals>.<lambda>)rn   rp   )r   boolr@   typer$   r   r   r   r'   r   r   rz   r.   r/   rn   ro   rp   r   r   r   r   rz     s$    !

rz   c                 C   s   t jt|||d}t|| |S )a  Implements support for handling RaggedTensors.

    Args:
      y_true: Tensor of one-hot true targets.
      y_pred: Tensor of predicted targets.
      from_logits: Whether `y_pred` is expected to be a logits tensor. By
        default, we assume that `y_pred` encodes a probability distribution.
      label_smoothing: Float in [0, 1]. If > `0` then smooth the labels. For
        example, if `0.1`, use `0.1 / num_classes` for non-target labels
        and `0.9 + 0.1 / num_classes` for target labels.
      axis: The axis along which to compute crossentropy (the features axis).
          Defaults to -1.

    Returns:
      Categorical crossentropy loss value.

    Expected shape: (batch, sequence_len, n_classes) with sequence_len
    being variable per batch.
    Return shape: (batch, sequence_len).

    When used by CategoricalCrossentropy() with the default reduction
    (SUM_OVER_BATCH_SIZE), the reduction averages the loss over the
    number of elements independent of the batch. E.g. if the RaggedTensor
    has 2 batches with [2, 1] values respectively the resulting loss is
    the sum of the individual loss values divided by 3.
    rn   ro   rp   )r   r   rz   r   r.   r/   rn   ro   rp   rJ   r   r   r   '_ragged_tensor_categorical_crossentropy  s    r   z-keras.metrics.sparse_categorical_crossentropyz,keras.losses.sparse_categorical_crossentropyc                 C   s   t j| ||||dS )a  Computes the sparse categorical crossentropy loss.

    Standalone usage:

    >>> y_true = [1, 2]
    >>> y_pred = [[0.05, 0.95, 0], [0.1, 0.8, 0.1]]
    >>> loss = tf.keras.losses.sparse_categorical_crossentropy(y_true, y_pred)
    >>> assert loss.shape == (2,)
    >>> loss.numpy()
    array([0.0513, 2.303], dtype=float32)

    >>> y_true = [[[ 0,  2],
    ...            [-1, -1]],
    ...           [[ 0,  2],
    ...            [-1, -1]]]
    >>> y_pred = [[[[1.0, 0.0, 0.0], [0.0, 0.0, 1.0]],
    ...             [[0.2, 0.5, 0.3], [0.0, 1.0, 0.0]]],
    ...           [[[1.0, 0.0, 0.0], [0.0, 0.5, 0.5]],
    ...            [[0.2, 0.5, 0.3], [0.0, 1.0, 0.0]]]]
    >>> loss = tf.keras.losses.sparse_categorical_crossentropy(
    ...   y_true, y_pred, ignore_class=-1)
    >>> loss.numpy()
    array([[[2.3841855e-07, 2.3841855e-07],
            [0.0000000e+00, 0.0000000e+00]],
           [[2.3841855e-07, 6.9314730e-01],
            [0.0000000e+00, 0.0000000e+00]]], dtype=float32)

    Args:
      y_true: Ground truth values.
      y_pred: The predicted values.
      from_logits: Whether `y_pred` is expected to be a logits tensor. By
        default, we assume that `y_pred` encodes a probability distribution.
      axis: Defaults to -1. The dimension along which the entropy is
        computed.
      ignore_class: Optional integer. The ID of a class to be ignored during
        loss computation. This is useful, for example, in segmentation
        problems featuring a "void" class (commonly -1 or 255) in segmentation
        maps. By default (`ignore_class=None`), all classes are considered.

    Returns:
      Sparse categorical crossentropy loss value.
    rn   r}   rp   )r   r|   )r.   r/   rn   rp   r}   r   r   r   r|     s    2r|   c                 C   s"   t jt|||d}t|| |ddS )a%  Implements support for handling RaggedTensors.

    Expected y_pred shape: (batch, sequence_len, n_classes) with sequence_len
    being variable per batch.
    Return shape: (batch, sequence_len).

    When used by SparseCategoricalCrossentropy() with the default reduction
    (SUM_OVER_BATCH_SIZE), the reduction averages the loss over the
    number of elements independent of the batch. E.g. if the RaggedTensor
    has 2 batches with [2, 1] values respectively, the resulting loss is
    the sum of the individual loss values divided by 3.
    r   T)r   )r   r   r|   r   )r.   r/   rn   rp   r}   rJ   r   r   r   ._ragged_tensor_sparse_categorical_crossentropy-  s    r   z!keras.metrics.binary_crossentropyz keras.losses.binary_crossentropyc                    sj   t |}t |jt j |jd  fdd}t jj |fddtjtj||d|dS )a  Computes the binary crossentropy loss.

    Standalone usage:

    >>> y_true = [[0, 1], [0, 0]]
    >>> y_pred = [[0.6, 0.4], [0.4, 0.6]]
    >>> loss = tf.keras.losses.binary_crossentropy(y_true, y_pred)
    >>> assert loss.shape == (2,)
    >>> loss.numpy()
    array([0.916 , 0.714], dtype=float32)

    Args:
      y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`.
      y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.
      from_logits: Whether `y_pred` is expected to be a logits tensor. By
        default, we assume that `y_pred` encodes a probability distribution.
      label_smoothing: Float in [0, 1]. If > `0` then smooth the labels by
        squeezing them towards 0.5 That is, using `1. - 0.5 * label_smoothing`
        for the target class and `0.5 * label_smoothing` for the non-target
        class.
      axis: The axis along which the mean is computed. Defaults to -1.

    Returns:
      Binary crossentropy loss value. shape = `[batch_size, d0, .. dN-1]`.
    r   c                      s   d   d   S Nr   r   r   r   ro   r.   r   r   r   j  s    z+binary_crossentropy.<locals>._smooth_labelsc                      s    S r   r   r   r   r   r   r   n  r   z%binary_crossentropy.<locals>.<lambda>)rn   r   )	r$   r   r   r   r'   r   r   r   rl   r   r   r   r   rl   F  s     
rl   c                 C   s   t jt|||d}t|| |S )a  Implements support for handling RaggedTensors.

    Args:
      y_true: Tensor of one-hot true targets.
      y_pred: Tensor of predicted targets.
      from_logits: Whether `y_pred` is expected to be a logits tensor. By
        default, we assume that `y_pred` encodes a probability distribution.
      label_smoothing: Float in [0, 1]. If > `0` then smooth the labels. For
        example, if `0.1`, use `0.1 / num_classes` for non-target labels
        and `0.9 + 0.1 / num_classes` for target labels.
      axis: Axis along which to compute crossentropy.

    Returns:
      Binary crossentropy loss value.

    Expected shape: (batch, sequence_len) with sequence_len being variable
    per batch.
    Return shape: (batch,); returns the per batch mean of the loss values.

    When used by BinaryCrossentropy() with the default reduction
    (SUM_OVER_BATCH_SIZE), the reduction averages the per batch losses over
    the number of batches.
    r   )r   r   rl   r   r   r   r   r   "_ragged_tensor_binary_crossentropyw  s    r   z'keras.metrics.binary_focal_crossentropyz&keras.losses.binary_focal_crossentropyrs   rt   c           	   	      sp   t |}t |jt j |jd  fdd}t jj |fddtjtj|||||d|dS )a  Computes the binary focal crossentropy loss.

    According to [Lin et al., 2018](https://arxiv.org/pdf/1708.02002.pdf), it
    helps to apply a focal factor to down-weight easy examples and focus more on
    hard examples. By default, the focal tensor is computed as follows:

    `focal_factor = (1 - output)**gamma` for class 1
    `focal_factor = output**gamma` for class 0
    where `gamma` is a focusing parameter. When `gamma` = 0, there is no focal
    effect on the binary crossentropy loss.

    If `apply_class_balancing == True`, this function also takes into account a
    weight balancing factor for the binary classes 0 and 1 as follows:

    `weight = alpha` for class 1 (`target == 1`)
    `weight = 1 - alpha` for class 0
    where `alpha` is a float in the range of `[0, 1]`.

    Standalone usage:

    >>> y_true = [[0, 1], [0, 0]]
    >>> y_pred = [[0.6, 0.4], [0.4, 0.6]]
    >>> loss = tf.keras.losses.binary_focal_crossentropy(y_true, y_pred,
    ...                                                  gamma=2)
    >>> assert loss.shape == (2,)
    >>> loss.numpy()
    array([0.330, 0.206], dtype=float32)

    Args:
      y_true: Ground truth values, of shape `(batch_size, d0, .. dN)`.
      y_pred: The predicted values, of shape `(batch_size, d0, .. dN)`.
      apply_class_balancing: A bool, whether to apply weight balancing on the
        binary classes 0 and 1.
      alpha: A weight balancing factor for class 1, default is `0.25` as
        mentioned in the reference. The weight for class 0 is `1.0 - alpha`.
      gamma: A focusing parameter, default is `2.0` as mentioned in the
        reference.
      from_logits: Whether `y_pred` is expected to be a logits tensor. By
        default, we assume that `y_pred` encodes a probability distribution.
      label_smoothing: Float in `[0, 1]`. If higher than 0 then smooth the
        labels by squeezing them towards `0.5`, i.e., using `1. - 0.5 *
        label_smoothing` for the target class and `0.5 * label_smoothing` for
        the non-target class.
      axis: The axis along which the mean is computed. Defaults to `-1`.

    Returns:
      Binary focal crossentropy loss value. shape = `[batch_size, d0, .. dN-1]`.
    r   c                      s   d   d   S r   r   r   r   r   r   r     s    z1binary_focal_crossentropy.<locals>._smooth_labelsc                      s    S r   r   r   r   r   r   r     r   z+binary_focal_crossentropy.<locals>.<lambda>)targetoutputrv   rw   rx   rn   r   )	r$   r   r   r   r'   r   r   r   ru   )	r.   r/   rv   rw   rx   rn   ro   rp   r   r   r   r   ru     s$    ?
ru   c           	   	   C   s$   t jt||||||d}t|| |S )ap  Implements support for handling RaggedTensors.

    Expected shape: `(batch, sequence_len)` with sequence_len being variable per
    batch.
    Return shape: `(batch,)`; returns the per batch mean of the loss values.

    When used by BinaryFocalCrossentropy() with the default reduction
    (SUM_OVER_BATCH_SIZE), the reduction averages the per batch losses over
    the number of batches.

    Args:
      y_true: Tensor of one-hot true targets.
      y_pred: Tensor of predicted targets.
      apply_class_balancing: A bool, whether to apply weight balancing on the
        binary classes 0 and 1.
      alpha: A weight balancing factor for class 1, default is `0.25` as
        mentioned in the reference [Lin et al., 2018](
        https://arxiv.org/pdf/1708.02002.pdf). The weight for class 0 is
        `1.0 - alpha`.
      gamma: A focusing parameter, default is `2.0` as mentioned in the
        reference.
      from_logits: Whether `y_pred` is expected to be a logits tensor. By
        default, we assume that `y_pred` encodes a probability distribution.
      label_smoothing: Float in `[0, 1]`. If > `0` then smooth the labels. For
        example, if `0.1`, use `0.1 / num_classes` for non-target labels
        and `0.9 + 0.1 / num_classes` for target labels.
      axis: Axis along which to compute crossentropy.

    Returns:
      Binary focal crossentropy loss value.
    )rv   rw   rx   rn   ro   rp   )r   r   ru   r   )	r.   r/   rv   rw   rx   rn   ro   rp   rJ   r   r   r   (_ragged_tensor_binary_focal_crossentropy  s    *	r   zkeras.metrics.kl_divergencez)keras.metrics.kullback_leibler_divergencezkeras.metrics.kldzkeras.metrics.KLDzkeras.losses.kl_divergencez(keras.losses.kullback_leibler_divergencezkeras.losses.kldzkeras.losses.KLDc                 C   sZ   t |}t | |j} t| t d} t|t d}t j| t j	| |  ddS )at  Computes Kullback-Leibler divergence loss between `y_true` and `y_pred`.

    `loss = y_true * log(y_true / y_pred)`

    See: https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence

    Standalone usage:

    >>> y_true = np.random.randint(0, 2, size=(2, 3)).astype(np.float64)
    >>> y_pred = np.random.random(size=(2, 3))
    >>> loss = tf.keras.losses.kullback_leibler_divergence(y_true, y_pred)
    >>> assert loss.shape == (2,)
    >>> y_true = tf.keras.backend.clip(y_true, 1e-7, 1)
    >>> y_pred = tf.keras.backend.clip(y_pred, 1e-7, 1)
    >>> assert np.array_equal(
    ...     loss.numpy(), np.sum(y_true * np.log(y_true / y_pred), axis=-1))

    Args:
      y_true: Tensor of true targets.
      y_pred: Tensor of predicted targets.

    Returns:
      A `Tensor` with loss.

    Raises:
      TypeError: If `y_true` cannot be cast to the `y_pred.dtype`.
    r   rk   r   )
r$   r   r   r   r   clipr   r   r   r   r   r   r   r   r   (	  s
    '
r   zkeras.metrics.poissonzkeras.losses.poissonc                 C   s>   t |}t | |j} tj|| t j|t    ddS )aX  Computes the Poisson loss between y_true and y_pred.

    The Poisson loss is the mean of the elements of the `Tensor`
    `y_pred - y_true * log(y_pred)`.

    Standalone usage:

    >>> y_true = np.random.randint(0, 2, size=(2, 3))
    >>> y_pred = np.random.random(size=(2, 3))
    >>> loss = tf.keras.losses.poisson(y_true, y_pred)
    >>> assert loss.shape == (2,)
    >>> y_pred = y_pred + 1e-7
    >>> assert np.allclose(
    ...     loss.numpy(), np.mean(y_pred - y_true * np.log(y_pred), axis=-1),
    ...     atol=1e-5)

    Args:
      y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`.
      y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.

    Returns:
       Poisson loss value. shape = `[batch_size, d0, .. dN-1]`.

    Raises:
      InvalidArgumentError: If `y_true` and `y_pred` have incompatible shapes.
    rk   r   )	r$   r   r   r   r   r   r   r   r   r   r   r   r   r   V	  s
    
r   keras.losses.cosine_similarity)zkeras.metrics.cosine_proximityzkeras.metrics.cosinezkeras.losses.cosine_proximityzkeras.losses.cosiner  c                 C   s4   t jj| |d} t jj||d}t j| | |d S )aI  Computes the cosine similarity between labels and predictions.

    Note that it is a number between -1 and 1. When it is a negative number
    between -1 and 0, 0 indicates orthogonality and values closer to -1
    indicate greater similarity. The values closer to 1 indicate greater
    dissimilarity. This makes it usable as a loss function in a setting
    where you try to maximize the proximity between predictions and
    targets. If either `y_true` or `y_pred` is a zero vector, cosine
    similarity will be 0 regardless of the proximity between predictions
    and targets.

    `loss = -sum(l2_norm(y_true) * l2_norm(y_pred))`

    Standalone usage:

    >>> y_true = [[0., 1.], [1., 1.], [1., 1.]]
    >>> y_pred = [[1., 0.], [1., 1.], [-1., -1.]]
    >>> loss = tf.keras.losses.cosine_similarity(y_true, y_pred, axis=1)
    >>> loss.numpy()
    array([-0., -0.999, 0.999], dtype=float32)

    Args:
      y_true: Tensor of true targets.
      y_pred: Tensor of predicted targets.
      axis: Axis along which to determine similarity.

    Returns:
      Cosine similarity tensor.
    r   )r$   linalgl2_normalizer   )r.   r/   rp   r   r   r   cosine_similarityz	  s    )r  zkeras.losses.CosineSimilarityc                       s,   e Zd ZdZdejjdf fdd	Z  ZS )CosineSimilaritya
  Computes the cosine similarity between labels and predictions.

    Note that it is a number between -1 and 1. When it is a negative number
    between -1 and 0, 0 indicates orthogonality and values closer to -1
    indicate greater similarity. The values closer to 1 indicate greater
    dissimilarity. This makes it usable as a loss function in a setting
    where you try to maximize the proximity between predictions and targets.
    If either `y_true` or `y_pred` is a zero vector, cosine similarity will be 0
    regardless of the proximity between predictions and targets.

    `loss = -sum(l2_norm(y_true) * l2_norm(y_pred))`

    Standalone usage:

    >>> y_true = [[0., 1.], [1., 1.]]
    >>> y_pred = [[1., 0.], [1., 1.]]
    >>> # Using 'auto'/'sum_over_batch_size' reduction type.
    >>> cosine_loss = tf.keras.losses.CosineSimilarity(axis=1)
    >>> # l2_norm(y_true) = [[0., 1.], [1./1.414, 1./1.414]]
    >>> # l2_norm(y_pred) = [[1., 0.], [1./1.414, 1./1.414]]
    >>> # l2_norm(y_true) . l2_norm(y_pred) = [[0., 0.], [0.5, 0.5]]
    >>> # loss = mean(sum(l2_norm(y_true) . l2_norm(y_pred), axis=1))
    >>> #       = -((0. + 0.) +  (0.5 + 0.5)) / 2
    >>> cosine_loss(y_true, y_pred).numpy()
    -0.5

    >>> # Calling with 'sample_weight'.
    >>> cosine_loss(y_true, y_pred, sample_weight=[0.8, 0.2]).numpy()
    -0.0999

    >>> # Using 'sum' reduction type.
    >>> cosine_loss = tf.keras.losses.CosineSimilarity(axis=1,
    ...     reduction=tf.keras.losses.Reduction.SUM)
    >>> cosine_loss(y_true, y_pred).numpy()
    -0.999

    >>> # Using 'none' reduction type.
    >>> cosine_loss = tf.keras.losses.CosineSimilarity(axis=1,
    ...     reduction=tf.keras.losses.Reduction.NONE)
    >>> cosine_loss(y_true, y_pred).numpy()
    array([-0., -0.999], dtype=float32)

    Usage with the `compile()` API:

    ```python
    model.compile(optimizer='sgd',
                  loss=tf.keras.losses.CosineSimilarity(axis=1))
    ```

    Args:
      axis: The axis along which the cosine similarity is computed
        (the features axis). Defaults to -1.
      reduction: Type of `tf.keras.losses.Reduction` to apply to loss.
        Default value is `AUTO`. `AUTO` indicates that the reduction option will
        be determined by the usage context. For almost all cases this defaults
        to `SUM_OVER_BATCH_SIZE`. When used with `tf.distribute.Strategy`,
        outside of built-in training loops such as `tf.keras` `compile` and
        `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE` will raise an error. Please
        see this custom training [tutorial](
        https://www.tensorflow.org/tutorials/distribute/custom_training) for
        more details.
      name: Optional name for the instance.
    rk   r  c                    s   t  jt|||d d S )N)r   r   rp   )rI   r   r  )r   rp   r   r   rM   r   r   r   	  s    zCosineSimilarity.__init__rb   r   r   rM   r   r  	  s
   Br  c                 C   s>   t | tp8t | tr| jtkp8t| dr2| jdkp8| dk}|S )Nr   rz   )r   ry   rH   rJ   rz   hasattrr   )lossresultr   r   r   is_categorical_crossentropy
  s    


	r	  zkeras.losses.serializec                 C   s   t | S )zSerializes loss function or `Loss` instance.

    Args:
      loss: A Keras `Loss` instance or a loss function.

    Returns:
      Loss configuration dictionary.
    r   )r  r   r   r   	serialize
  s    
r
  zkeras.losses.deserializec                 C   s   t | t |ddS )aV  Deserializes a serialized loss class/function instance.

    Args:
        name: Loss configuration.
        custom_objects: Optional dictionary mapping names (strings) to custom
          objects (classes and functions) to be considered during
          deserialization.

    Returns:
        A Keras `Loss` instance or a loss function.
    zloss function)module_objectscustom_objectsprintable_module_name)r   globals)r   r  r   r   r   deserialize
  s    r  zkeras.losses.getc                 C   sV   | du rdS t | tr&t| } t| S t | tr8t| S t| rD| S td|  dS )a  Retrieves a Keras loss as a `function`/`Loss` class instance.

    The `identifier` may be the string name of a loss function or `Loss` class.

    >>> loss = tf.keras.losses.get("categorical_crossentropy")
    >>> type(loss)
    <class 'function'>
    >>> loss = tf.keras.losses.get("CategoricalCrossentropy")
    >>> type(loss)
    <class '...keras.losses.CategoricalCrossentropy'>

    You can also specify `config` of the loss to this function by passing dict
    containing `class_name` and `config` as an identifier. Also note that the
    `class_name` must map to a `Loss` class

    >>> identifier = {"class_name": "CategoricalCrossentropy",
    ...               "config": {"from_logits": True}}
    >>> loss = tf.keras.losses.get(identifier)
    >>> type(loss)
    <class '...keras.losses.CategoricalCrossentropy'>

    Args:
      identifier: A loss identifier. One of None or string name of a loss
        function/class or loss configuration dictionary or a loss function or a
        loss class instance.

    Returns:
      A Keras loss as a `function`/ `Loss` class instance.

    Raises:
      ValueError: If `identifier` cannot be interpreted.
    Nz.Could not interpret loss function identifier: )r   strr  rV   callabler@   )
identifierr   r   r   r\   3
  s    "

r\   int32)F)r   )Frj   rk   )Frj   rk   )Frk   N)Frk   N)Frj   rk   )Frj   rk   )Frs   rt   Frj   rk   )Frs   rt   Frj   rk   )rk   )N)brC   rE   r   tensorflow.compat.v2compatv2r$   kerasr   Zkeras.saving.experimentalr   keras.utilsr   r   r   keras.utils.generic_utilsr   r	   tensorflow.python.ops.raggedr
   r   tensorflow.python.utilr    tensorflow.python.util.tf_exportr   tensorflow.tools.docsr   r   rH   r_   rc   re   rg   ri   rr   ry   r{   r~   r   r   r   r   r   r   r'   add_dispatch_supportr`   r   dispatch_for_typesr   r   rd   r   rf   r   rh   r   r   r   r   r   r   r   rz   r   r|   r   rl   r   ru   r   r   r   r  r  bceBCEmseMSEmaeMAEmapeMAPEmsleMSLEkldKLDkullback_leibler_divergencelogcoshr   r	  r
  r  r\   r   r3   sparse_softmax_cross_entropyLABEL_DTYPES_FOR_LOSSESr   r   r   r   <module>   s   :N;=E@l .XW;=;8;<E
Y

 
#
 !
$' 4 & 6  - #      R      5
#"
#N

0