a
    BCCf?                     @   s   d Z g dZddlZddlmZmZ ddlmZ ddlmZ ddl	m
Z ddlmZmZmZmZ eg d	dd
fddZd#ddZeg d	dfddZd$ddZg d	dfddZg d	ddfddZd%ddZd&ddZd'dd Zd(d!d"ZdS ))zB
Additional statistics functions with support for masked arrays.

)
compare_medians_mshdquantileshdmedianhdquantiles_sdidealfourthsmedian_cihsmjcimquantiles_cimjrshtrimmed_mean_ci    N)float64ndarray)MaskedArray   )_mstats_basic)normbetatbinom)g      ?      ?g      ?Fc                 C   s   dd }t j| dtd} tt|}|du s:| jdkrH|| ||}n*| jdkr`td| j t ||| ||}t j	|dd	S )
a$  
    Computes quantile estimates with the Harrell-Davis method.

    The quantile estimates are calculated as a weighted linear combination
    of order statistics.

    Parameters
    ----------
    data : array_like
        Data array.
    prob : sequence, optional
        Sequence of probabilities at which to compute the quantiles.
    axis : int or None, optional
        Axis along which to compute the quantiles. If None, use a flattened
        array.
    var : bool, optional
        Whether to return the variance of the estimate.

    Returns
    -------
    hdquantiles : MaskedArray
        A (p,) array of quantiles (if `var` is False), or a (2,p) array of
        quantiles and variances (if `var` is True), where ``p`` is the
        number of quantiles.

    See Also
    --------
    hdquantiles_sd

    Examples
    --------
    >>> import numpy as np
    >>> from scipy.stats.mstats import hdquantiles
    >>>
    >>> # Sample data
    >>> data = np.array([1.2, 2.5, 3.7, 4.0, 5.1, 6.3, 7.0, 8.2, 9.4])
    >>>
    >>> # Probabilities at which to compute quantiles
    >>> probabilities = [0.25, 0.5, 0.75]
    >>>
    >>> # Compute Harrell-Davis quantile estimates
    >>> quantile_estimates = hdquantiles(data, prob=probabilities)
    >>>
    >>> # Display the quantile estimates
    >>> for i, quantile in enumerate(probabilities):
    ...     print(f"{int(quantile * 100)}th percentile: {quantile_estimates[i]}")
    25th percentile: 3.1505820231763066 # may vary
    50th percentile: 5.194344084883956
    75th percentile: 7.430626414674935

    c                 S   sJ  t t |  t}|j}t dt|ft	}|dk rTt j
|_|rL|S |d S t |d t| }tj}t|D ]t\}}	|||d |	 |d d|	  }
|
dd |
dd  }t ||}||d|f< t ||| d |d|f< qx|d |d|dkf< |d |d|dkf< |rBt j
 |d|dkf< |d|dkf< |S |d S )zGComputes the HD quantiles for a 1D array. Returns nan for invalid data.   r   r   N)npsqueezesort
compressedviewr   sizeemptylenr   nanflatarangefloatr   cdf	enumeratedot)dataprobvarxsortednZhdvbetacdfip_wwZhd_mean r2   V/var/www/html/django/DPS/env/lib/python3.9/site-packages/scipy/stats/_mstats_extras.py_hd_1DP   s,     "zhdquantiles.<locals>._hd_1DFcopydtypeNr   r   DArray 'data' must be at most two dimensional, but got data.ndim = %dr6   )
maarrayr   r   
atleast_1dasarrayndim
ValueErrorapply_along_axisfix_invalid)r'   r(   axisr)   r4   r/   resultr2   r2   r3   r      s    4
r   r   c                 C   s   t | dg||d}| S )a9  
    Returns the Harrell-Davis estimate of the median along the given axis.

    Parameters
    ----------
    data : ndarray
        Data array.
    axis : int, optional
        Axis along which to compute the quantiles. If None, use a flattened
        array.
    var : bool, optional
        Whether to return the variance of the estimate.

    Returns
    -------
    hdmedian : MaskedArray
        The median values.  If ``var=True``, the variance is returned inside
        the masked array.  E.g. for a 1-D array the shape change from (1,) to
        (2,).

    r   )rB   r)   )r   r   )r'   rB   r)   rC   r2   r2   r3   r   |   s    r   c                 C   sv   dd }t j| dtd} tt|}|du r<|| |}n(| jdkrTtd| j t ||| |}t j	|dd
 S )	a  
    The standard error of the Harrell-Davis quantile estimates by jackknife.

    Parameters
    ----------
    data : array_like
        Data array.
    prob : sequence, optional
        Sequence of quantiles to compute.
    axis : int, optional
        Axis along which to compute the quantiles. If None, use a flattened
        array.

    Returns
    -------
    hdquantiles_sd : MaskedArray
        Standard error of the Harrell-Davis quantile estimates.

    See Also
    --------
    hdquantiles

    c                 S   s  t |  }t|}t t|t}|dk r6t j|_t |t	|d  }t
j}t|D ]\}}|||| |d|  }	|	dd |	dd  }
t |}t |
|dd  |dd< |dd  t |
ddd |ddd  ddd 7  < t | |d  ||< qZ|S )z%Computes the std error for 1D arrays.r   r   Nr   r   )r   r   r   r   r   r   r    r!   r"   r#   r   r$   r%   Z
zeros_likeZcumsumsqrtr)   )r'   r(   r*   r+   Zhdsdvvr-   r.   r/   r0   r1   Zmx_r2   r2   r3   _hdsd_1D   s    
<z hdquantiles_sd.<locals>._hdsd_1DFr5   Nr   r8   r9   )r:   r;   r   r   r<   r=   r>   r?   r@   rA   Zravel)r'   r(   rB   rF   r/   rC   r2   r2   r3   r      s    
r   皙?rH   TT皙?c           
      C   s|   t j| dd} tj| |||d}||}tj| |||d}||d }td|d  |}	t	||	|  ||	|  fS )a  
    Selected confidence interval of the trimmed mean along the given axis.

    Parameters
    ----------
    data : array_like
        Input data.
    limits : {None, tuple}, optional
        None or a two item tuple.
        Tuple of the percentages to cut on each side of the array, with respect
        to the number of unmasked data, as floats between 0. and 1. If ``n``
        is the number of unmasked data before trimming, then
        (``n * limits[0]``)th smallest data and (``n * limits[1]``)th
        largest data are masked.  The total number of unmasked data after
        trimming is ``n * (1. - sum(limits))``.
        The value of one limit can be set to None to indicate an open interval.

        Defaults to (0.2, 0.2).
    inclusive : (2,) tuple of boolean, optional
        If relative==False, tuple indicating whether values exactly equal to
        the absolute limits are allowed.
        If relative==True, tuple indicating whether the number of data being
        masked on each side should be rounded (True) or truncated (False).

        Defaults to (True, True).
    alpha : float, optional
        Confidence level of the intervals.

        Defaults to 0.05.
    axis : int, optional
        Axis along which to cut. If None, uses a flattened version of `data`.

        Defaults to None.

    Returns
    -------
    trimmed_mean_ci : (2,) ndarray
        The lower and upper confidence intervals of the trimmed data.

    Fr9   )limits	inclusiverB   r          @)
r:   r;   mstatsZtrimrmeanZtrimmed_stdecountr   ppfr   )
r'   rK   rL   alpharB   ZtrimmedZtmeanZtstdedfZtppfr2   r2   r3   r
      s    *
r
   c                 C   sd   dd }t j| dd} | jdkr.td| j tt|}|du rP|| |S t ||| |S dS )a  
    Returns the Maritz-Jarrett estimators of the standard error of selected
    experimental quantiles of the data.

    Parameters
    ----------
    data : ndarray
        Data array.
    prob : sequence, optional
        Sequence of quantiles to compute.
    axis : int or None, optional
        Axis along which to compute the quantiles. If None, use a flattened
        array.

    c                 S   s   t |  } | j}t || d t}tj}t 	t
|t}t jd|d td| }|d|  }t|D ]b\}}	|||	d ||	 |||	d ||	  }
t |
| }t |
| d }t ||d  ||< qn|S )Nr   r   )r7   g      ?r   )r   r   r   r   r;   Zastypeintr   r$   r   r   r   r"   r%   r&   rD   )r'   r/   r+   r(   r-   Zmjxyr.   mWZC1ZC2r2   r2   r3   _mjci_1D  s    (zmjci.<locals>._mjci_1DFr9   r   r8   N)r:   r;   r>   r?   r   r<   r=   r@   )r'   r(   rB   rY   r/   r2   r2   r3   r     s    

r   c                 C   sZ   t |d| }td|d  }tj| |dd|d}t| ||d}|||  |||  fS )a  
    Computes the alpha confidence interval for the selected quantiles of the
    data, with Maritz-Jarrett estimators.

    Parameters
    ----------
    data : ndarray
        Data array.
    prob : sequence, optional
        Sequence of quantiles to compute.
    alpha : float, optional
        Confidence level of the intervals.
    axis : int or None, optional
        Axis along which to compute the quantiles.
        If None, use a flattened array.

    Returns
    -------
    ci_lower : ndarray
        The lower boundaries of the confidence interval.  Of the same length as
        `prob`.
    ci_upper : ndarray
        The upper boundaries of the confidence interval.  Of the same length as
        `prob`.

    r   rM   r   )ZalphapZbetaprB   rB   )minr   rQ   rN   Z
mquantilesr   )r'   r(   rR   rB   zxqZsmjr2   r2   r3   r   5  s
    r   c                 C   sV   dd }t j| dd} |du r*|| |}n(| jdkrBtd| j t ||| |}|S )aA  
    Computes the alpha-level confidence interval for the median of the data.

    Uses the Hettmasperger-Sheather method.

    Parameters
    ----------
    data : array_like
        Input data. Masked values are discarded. The input should be 1D only,
        or `axis` should be set to None.
    alpha : float, optional
        Confidence level of the intervals.
    axis : int or None, optional
        Axis along which to compute the quantiles. If None, use a flattened
        array.

    Returns
    -------
    median_cihs
        Alpha level confidence interval.

    c           	      S   s>  t |  } t| }t|d| }tt|d |d}t|| |dt|d |d }|d| k r|d8 }t|| |dt|d |d }t|| d |dt||d }|d | ||  }|| | t	||d|  |   }|| |  d| | |d    || || d   d| | ||    f}|S )Nr   rM   r   r   )
r   r   r   r   r[   rT   r   Z_ppfr$   r#   )	r'   rR   r+   kZgkZgkkIlambdZlimsr2   r2   r3   _cihs_1Dn  s    $$$$&zmedian_cihs.<locals>._cihs_1DFr9   Nr   r8   )r:   r;   r>   r?   r@   )r'   rR   rB   ra   rC   r2   r2   r3   r   W  s    
r   c                 C   sn   t j| |dt j||d }}tj| |dtj||d }}t|| t |d |d   }dt| S )a"  
    Compares the medians from two independent groups along the given axis.

    The comparison is performed using the McKean-Schrader estimate of the
    standard error of the medians.

    Parameters
    ----------
    group_1 : array_like
        First dataset.  Has to be of size >=7.
    group_2 : array_like
        Second dataset.  Has to be of size >=7.
    axis : int, optional
        Axis along which the medians are estimated. If None, the arrays are
        flattened.  If `axis` is not None, then `group_1` and `group_2`
        should have the same shape.

    Returns
    -------
    compare_medians_ms : {float, ndarray}
        If `axis` is None, then returns a float, otherwise returns a 1-D
        ndarray of floats with a length equal to the length of `group_1`
        along `axis`.

    Examples
    --------

    >>> from scipy import stats
    >>> a = [1, 2, 3, 4, 5, 6, 7]
    >>> b = [8, 9, 10, 11, 12, 13, 14]
    >>> stats.mstats.compare_medians_ms(a, b, axis=None)
    1.0693225866553746e-05

    The function is vectorized to compute along a given axis.

    >>> import numpy as np
    >>> rng = np.random.default_rng()
    >>> x = rng.random(size=(3, 7))
    >>> y = rng.random(size=(3, 8))
    >>> stats.mstats.compare_medians_ms(x, y, axis=1)
    array([0.36908985, 0.36092538, 0.2765313 ])

    References
    ----------
    .. [1] McKean, Joseph W., and Ronald M. Schrader. "A comparison of methods
       for studentizing the sample median." Communications in
       Statistics-Simulation and Computation 13.6 (1984): 751-773.

    rZ   r   r   )	r:   ZmedianrN   Zstde_medianr   absrD   r   r$   )Zgroup_1Zgroup_2rB   Zmed_1Zmed_2Zstd_1Zstd_2rX   r2   r2   r3   r     s    2$r   c                 C   s>   dd }t j| |dt} |du r,|| S t ||| S dS )aC  
    Returns an estimate of the lower and upper quartiles.

    Uses the ideal fourths algorithm.

    Parameters
    ----------
    data : array_like
        Input array.
    axis : int, optional
        Axis along which the quartiles are estimated. If None, the arrays are
        flattened.

    Returns
    -------
    idealfourths : {list of floats, masked array}
        Returns the two internal values that divide `data` into four parts
        using the ideal fourths algorithm either along the flattened array
        (if `axis` is None) or along `axis` of `data`.

    c                 S   s   |   }t|}|dk r$tjtjgS t|d d d\}}t|}d| ||d   |||   }|| }d| ||  |||d    }||gS )N   g      @g?r   )r   r   r   r    divmodrT   )r'   rU   r+   jhZqlor^   Zqupr2   r2   r3   _idf  s      zidealfourths.<locals>._idfrZ   N)r:   r   r   r   r@   )r'   rB   rg   r2   r2   r3   r     s
    r   c                 C   s   t j| dd} |du r| }ntt|}| jdkr>td|  }t| dd}d|d |d	   |d
  }| dddf |dddf | k	d	}| dddf |dddf | k 	d	}|| d| |  S )a  
    Evaluates Rosenblatt's shifted histogram estimators for each data point.

    Rosenblatt's estimator is a centered finite-difference approximation to the
    derivative of the empirical cumulative distribution function.

    Parameters
    ----------
    data : sequence
        Input data, should be 1-D. Masked values are ignored.
    points : sequence or None, optional
        Sequence of points where to evaluate Rosenblatt shifted histogram.
        If None, use the data.

    Fr9   Nr   z#The input array should be 1D only !rZ   g333333?r   r   rH   rM   )
r:   r;   r   r<   r=   r>   AttributeErrorrP   r   sum)r'   Zpointsr+   rrf   ZnhiZnlor2   r2   r3   r	     s    
**r	   )r   F)rG   rI   rJ   N)rJ   N)N)N)N)__doc____all__numpyr   r   r   Znumpy.mar:   r    r   rN   Zscipy.stats.distributionsr   r   r   r   listr   r   r   r
   r   r   r   r   r   r	   r2   r2   r2   r3   <module>   s&   `
?  
3-"
3
9
(