a
    SicK                     @   s   d Z ddlmZ ddlZddlZzddlmZ dZ	W n" e
yV   ddlmZ dZ	Y n0 ddlmZ dd	lmZ G d
d dZG dd dZG dd dZG dd dZdd Zdd ZdS )aV  Statistical transformations for visualization.

This module is currently private, but is being written to eventually form part
of the public API.

The classes should behave roughly in the style of scikit-learn.

- All data-independent parameters should be passed to the class constructor.
- Each class should implement a default transformation that is exposed through
  __call__. These are currently written for vector arguments, but I think
  consuming a whole `plot_data` DataFrame and return it with transformed
  variables would make more sense.
- Some class have data-dependent preprocessing that should be cached and used
  multiple times (think defining histogram bins off all data and then counting
  observations within each bin multiple times per data subsets). These currently
  have unique names, but it would be good to have a common name. Not quite
  `fit`, but something similar.
- Alternatively, the transform interface could take some information about grouping
  variables and do a groupby internally.
- Some classes should define alternate transforms that might make the most sense
  with a different function. For example, KDE usually evaluates the distribution
  on a regular grid, but it would be useful for it to transform at the actual
  datapoints. Then again, this could be controlled by a parameter at  the time of
  class instantiation.

    )NumberN)gaussian_kdeF   T)	bootstrap)_check_argumentc                   @   sr   e Zd ZdZddddddddd	Zd
d Zdd Zdd ZdddZdddZ	dddZ
dddZdddZdS ) KDEz2Univariate and bivariate kernel density estimator.Nr         F)	bw_method	bw_adjustgridsizecutclip
cumulativec                C   sJ   |du rd}|| _ || _|| _|| _|| _|| _|r@tr@tdd| _dS )a'  Initialize the estimator with its parameters.

        Parameters
        ----------
        bw_method : string, scalar, or callable, optional
            Method for determining the smoothing bandwidth to use; passed to
            :class:`scipy.stats.gaussian_kde`.
        bw_adjust : number, optional
            Factor that multiplicatively scales the value chosen using
            ``bw_method``. Increasing will make the curve smoother. See Notes.
        gridsize : int, optional
            Number of points on each dimension of the evaluation grid.
        cut : number, optional
            Factor, multiplied by the smoothing bandwidth, that determines how
            far the evaluation grid extends past the extreme datapoints. When
            set to 0, truncate the curve at the data limits.
        clip : pair of numbers or None, or a pair of such pairs
            Do not evaluate the density outside of these limits.
        cumulative : bool, optional
            If True, estimate a cumulative distribution function. Requires scipy.

        NNNz(Cumulative KDE evaluation requires scipy)	r
   r   r   r   r   r   	_no_scipyRuntimeErrorsupport)selfr
   r   r   r   r   r    r   O/var/www/html/django/DPS/env/lib/python3.9/site-packages/seaborn/_statistics.py__init__+   s    zKDE.__init__c           
      C   sr   |d du rt j n|d }|d du r0t j
 n|d }t| ||  |}t| ||  |}	t ||	|S )z<Create the grid of evaluation points depending for vector x.r   Nr   )npinfmaxminlinspace)
r   xbwr   r   r   Zclip_loZclip_hiZgridminZgridmaxr   r   r   _define_support_gridY   s
    zKDE._define_support_gridc                 C   s8   |  ||}t|j }| ||| j| j| j}|S )z&Create a 1D grid of evaluation points.)	_fitr   sqrt
covariancesqueezer   r   r   r   )r   r   weightskder   gridr   r   r   _define_support_univariatea   s    zKDE._define_support_univariatec           	      C   s   | j }|d du s t|d r(||f}| ||g|}tt|j }| ||d | j	|d | j
}| ||d | j	|d | j
}||fS )z&Create a 2D grid of evaluation points.r   Nr   )r   r   isscalarr    r!   diagr"   r#   r   r   r   )	r   x1x2r$   r   r%   r   grid1grid2r   r   r   _define_support_bivariatej   s    zKDE._define_support_bivariateTc                 C   s2   |du r|  ||}n| |||}|r.|| _|S )z0Create the evaluation grid for a given data set.N)r'   r.   r   )r   r*   r+   r$   cacher   r   r   r   define_support|   s    zKDE.define_supportc                 C   s@   d| j i}|dur||d< t|fi |}||j| j  |S )zAFit the scipy kde while adding bw_adjust logic and version check.r
   Nr$   )r
   r   Zset_bandwidthfactorr   )r   Zfit_datar$   Zfit_kwsr%   r   r   r   r       s    
zKDE._fitc                    sb   | j }|du r| j|dd}| || | jrR|d t fdd|D }n |}||fS )z1Fit and evaluate a univariate on univariate data.NFr/   r   c                    s   g | ]}  |qS r   )Zintegrate_box_1d).0Zs_ir%   Zs_0r   r   
<listcomp>   s   z(KDE._eval_univariate.<locals>.<listcomp>)r   r0   r    r   r   array)r   r   r$   r   densityr   r4   r   _eval_univariate   s    
zKDE._eval_univariatec                 C   s   | j }|du r| j||dd}| ||g|}| jr|\}}t|j|jf}| | f}	t|D ]2\}
}t|D ] \}}|	|	||f||
|f< qvqfn*tj
| \}}|| | g|j}||fS )z0Fit and evaluate a univariate on bivariate data.NFr2   )r   r0   r    r   r   zerossizer   	enumerateZintegrate_boxmeshgridravelreshapeshape)r   r*   r+   r$   r   r%   r,   r-   r7   p0ixijZxjZxx1Zxx2r   r   r   _eval_bivariate   s    zKDE._eval_bivariatec                 C   s&   |du r|  ||S | |||S dS )z1Fit and evaluate on univariate or bivariate data.Nr8   rD   r   r*   r+   r$   r   r   r   __call__   s    zKDE.__call__)NNT)N)N)N)NN)__name__
__module____qualname____doc__r   r   r'   r.   r0   r    r8   rD   rG   r   r   r   r   r   )   s    .	



r   c                   @   sF   e Zd ZdZdddZdd	 ZdddZdd Zdd ZdddZ	dS )	Histogramz-Univariate and bivariate histogram estimator.countautoNFc                 C   sB   g d}t d|| || _|| _|| _|| _|| _|| _d| _dS )au  Initialize the estimator with its parameters.

        Parameters
        ----------
        stat : str
            Aggregate statistic to compute in each bin.

            - `count`: show the number of observations in each bin
            - `frequency`: show the number of observations divided by the bin width
            - `probability` or `proportion`: normalize such that bar heights sum to 1
            - `percent`: normalize such that bar heights sum to 100
            - `density`: normalize such that the total area of the histogram equals 1

        bins : str, number, vector, or a pair of such values
            Generic bin parameter that can be the name of a reference rule,
            the number of bins, or the breaks of the bins.
            Passed to :func:`numpy.histogram_bin_edges`.
        binwidth : number or pair of numbers
            Width of each bin, overrides ``bins`` but can be used with
            ``binrange``.
        binrange : pair of numbers or a pair of pairs
            Lowest and highest value for bin edges; can be used either
            with ``bins`` or ``binwidth``. Defaults to data extremes.
        discrete : bool or pair of bools
            If True, set ``binwidth`` and ``binrange`` such that bin
            edges cover integer values in the dataset.
        cumulative : bool
            If True, return the cumulative statistic.

        )rM   	frequencyr7   probability
proportionpercentstatN)r   rS   binsbinwidthbinrangediscreter   bin_kws)r   rS   rT   rU   rV   rW   r   Zstat_choicesr   r   r   r      s    'zHistogram.__init__c                 C   s   |du r|  |  }}n|\}}|r>t|d |d }	n\|dur|}
t|||
 |
}	|	 |k stt|	dk rt|	|	 |
 }	nt||||}	|	S )z6Inner function that takes bin parameters as arguments.Ng      ?g      ?   )r   r   r   arangelenappendhistogram_bin_edges)r   r   r$   rT   rU   rV   rW   startstop	bin_edgesstepr   r   r   _define_bin_edges   s    zHistogram._define_bin_edgesTc                 C   sf  |du rj|  ||| j| j| j| j}t| jttfr^t|d }|	 |
 f}t||d}n
t|d}ng }t||gD ]\}	}
| j}|rt|ttfrn,t||	 tr||	 }nt|dkr||	 }| j}|du rnt|ts||	 }| j}|du rnt|d ts||	 }| j}t|ts,||	 }||  |
||||| qztt|d}|rb|| _|S )z=Given data, return numpy.histogram parameters to define bins.Nr   )rT   range)rT   rY   r   )rb   rT   rU   rV   rW   
isinstancestrr   r[   r   r   dictr;   boolr\   tuplerX   )r   r*   r+   r$   r/   r`   n_bins	bin_rangerX   rA   r   rT   rU   rV   rW   r   r   r   define_bin_params  sL    

zHistogram.define_bin_paramsc           	      C   s  | j }|du r| j||dd}| jdk}tj||fi |||d^}}tt|d t|d }| jdks~| jd	kr|t|	  }n:| jd
kr|t|	  d }n| jdkr|t| }| j
r| jdv r|| jddjdd}n|jddjdd}||fS )z.Inner function for histogram of two variables.NFr2   r7   r$   r7   r   r   rP   rQ   rR   d   rO   r7   rO   )axis)rX   rk   rS   r   histogram2douterdiffastypefloatsumr   cumsum)	r   r*   r+   r$   rX   r7   histr`   arear   r   r   rD   O  s4    



zHistogram._eval_bivariatec                 C   s   | j }|du r| j||dd}| jdk}tj|fi |||d\}}| jdks\| jdkrp|t|  }n@| jdkr|t|  d	 }n| jd
kr|tt| }| j	r| jdv r|t| 
 }n|
 }||fS )z-Inner function for histogram of one variable.NF)r$   r/   r7   rl   rP   rQ   rR   rm   rO   rn   )rX   rk   rS   r   	histogramrs   rt   ru   rr   r   rv   )r   r   r$   rX   r7   rw   r`   r   r   r   r8   o  s,    



zHistogram._eval_univariatec                 C   s&   |du r|  ||S | |||S dS )z3Count the occurrences in each bin, maybe normalize.NrE   rF   r   r   r   rG     s    zHistogram.__call__)rM   rN   NNFF)NNT)NN)
rH   rI   rJ   rK   r   rb   rk   rD   r8   rG   r   r   r   r   rL      s         
5
< rL   c                   @   s4   e Zd ZdZdddZdd Zdd	 ZdddZd
S )ECDFz7Univariate empirical cumulative distribution estimator.rQ   Fc                 C   s    t dddg| || _|| _dS )a  Initialize the class with its parameters

        Parameters
        ----------
        stat : {{"proportion", "count"}}
            Distribution statistic to compute.
        complementary : bool
            If True, use the complementary CDF (1 - CDF)

        rS   rM   rQ   N)r   rS   complementary)r   rS   r{   r   r   r   r     s    zECDF.__init__c                 C   s   t ddS )z)Inner function for ECDF of two variables.z!Bivariate ECDF is not implementedN)NotImplementedErrorrF   r   r   r   rD     s    zECDF._eval_bivariatec                 C   sp   |  }|| }|| }| }| jdkr6||  }tjtj |f }tjd|f }| jrh| | }||fS )z(Inner function for ECDF of one variable.rQ   r   )argsortrv   rS   r   r   r_r   r{   )r   r   r$   sorteryr   r   r   r8     s    
zECDF._eval_univariateNc                 C   sN   t |}|du rt |}n
t |}|du r<| ||S | |||S dS )zGReturn proportion or count of observations below each sorted datapoint.N)r   asarray	ones_liker8   rD   rF   r   r   r   rG     s    

zECDF.__call__)rQ   F)NN)rH   rI   rJ   rK   r   rD   r8   rG   r   r   r   r   rz     s
   
rz   c                   @   s   e Zd ZdddZdd ZdS )EstimateAggregatorNc                 K   s(   || _ t|\}}|| _|| _|| _dS )a`  
        Data aggregator that produces an estimate and error bar interval.

        Parameters
        ----------
        estimator : callable or string
            Function (or method name) that maps a vector to a scalar.
        errorbar : string, (string, number) tuple, or callable
            Name of errorbar method (either "ci", "pi", "se", or "sd"), or a tuple
            with a method name and a level parameter, or a function that maps from a
            vector to a (min, max) interval.
        boot_kws
            Additional keywords are passed to bootstrap when error_method is "ci".

        N)	estimator_validate_errorbar_argerror_methoderror_levelboot_kws)r   r   errorbarr   methodlevelr   r   r   r     s
    zEstimateAggregator.__init__c           
      C   sJ  || }t | jr| |}n|| j}| jdu r@tj }}nt|dkrXtj }}nt | jrr| |\}}n| jdkr| | j }|| ||  }}n| jdkr|	 | j }|| ||  }}n^| jdkrt
|| j\}}nB| jdkr(|dd}t|f|| jd| j}	t
|	| j\}}t||| d	|| d
|iS )zGAggregate over `var` column of `data` with estimate and error interval.Nr   sdsepiciunits)r   funcr   r   )callabler   aggr   r   nanr[   stdr   sem_percentile_intervalgetr   r   pdSeries)
r   datavarvalsestimateZerr_minZerr_maxZhalf_intervalr   Zbootsr   r   r   rG     s.    





zEstimateAggregator.__call__)N)rH   rI   rJ   r   rG   r   r   r   r   r     s   
r   c                 C   s$   d| d }|d| f}t | |S )z8Return a percentile interval from data of a given width.rm   rY   )r   nanpercentile)r   widthedgepercentilesr   r   r   r     s    r   c              
   C   s   ddddd}d}| du rdS t | r.| dfS t| trJ| }||d}nBz| \}}W n4 ttfy } z|||W Y d}~n
d}~0 0 tdt|| |durt|t	st|||fS )zCCheck type and value of errorbar argument and assign default level._   r   )r   r   r   r   z@`errorbar` must be a callable, string, or (string, number) tupleNr   r   )
r   rd   re   r   
ValueError	TypeError	__class__r   listr   )argDEFAULT_LEVELSusager   r   errr   r   r   r     s*    
"r   )rK   numbersr   numpyr   pandasr   scipy.statsr   r   ImportErrorZexternal.kde
algorithmsr   utilsr   r   rL   rz   r   r   r   r   r   r   r   <module>   s&   
  K5A