a
    BCCf*P                     @   s   d dl Zd dlmZ d dlmZ d dlmZ d dlmZ ddl	m
Z
 dd	 ZG d
d dZe Zdd ZdddZdd Zdd ZeddZe
edddddZdS )    N)
namedtuple)special)stats)	_rankdata   )_axis_nan_policy_factoryc                 C   s   t | |d} t ||d}t | d |d }t | |j| jd f } t ||j|jd f }t j| |fdd}| ||fS )zBBroadcast then concatenate arrays, leaving concatenation axis last).r   axis)npZmoveaxis	broadcastZbroadcast_toshapeZconcatenate)xyr
   z r   U/var/www/html/django/DPS/env/lib/python3.9/site-packages/scipy/stats/_mannwhitneyu.py_broadcast_concatenate	   s    r   c                   @   sP   e Zd ZdZdd Zdd Zdd Zdd	 Zd
d Zdd Z	dd Z
dd ZdS )_MWUz7Distribution of MWU statistic under the null hypothesisc                 C   s   t d | _d| _dS )zMinimal initializer)r   r   r   N)r   ones_fmnks
_recursive)selfr   r   r   __init__   s    z_MWU.__init__c                 C   s@   | j d u r|dkr|dks | j r.| |||S | |||S d S )Ni  )r   pmf_recursivepmf_iterative)r   kmnr   r   r   pmf   s
    	z_MWU.pmfc                 C   sR   |  ||t| t|D ]}| ||| q| j|||f t|| | S )z,Probability mass function, recursive version)_resize_fmnksr   maxravel_fr   r   binomr   r   r   r   ir   r   r   r   ,   s    z_MWU.pmf_recursivec                    sN   i  t |D ]}t|  qt  fdd|D t  S )z,Probability mass function, iterative versionc                    s   g | ]} |f qS r   r   ).0Zkifmnksr   r   r   r   
<listcomp>:       z&_MWU.pmf_iterative.<locals>.<listcomp>)r   r"   _mwu_f_iterativearrayr   r$   r%   r   r(   r   r   5   s    z_MWU.pmf_iterativec                 C   s2   |  tdt|d ||}t|}|| S )z Cumulative distribution functionr   r   )r   r   Zaranger!   Zcumsum)r   r   r   r   Zpmfscdfsr   r   r   cdf=   s     
z_MWU.cdfc                 C   s   t || | }||k }t |rh|| ||< t | |||}d||  | || || ||< nt | |||}|d S )zSurvival functiong      ?r   )r   Zasarrayanyr/   r   )r   r   r   r   kcr&   r.   r   r   r   sfF   s    
$z_MWU.sfc                 C   s   t | jj}t |d |d |d f}t ||kr|t ||}t | }|\}}	}
| j|d|d|	d|
f< || _dS )z8If necessary, expand the array that remembers PMF valuesr   N)r   r-   r   r   r0   maximumr   )r   r   r   r   Z	shape_oldZ	shape_newr   r)   Zm0Zn0Zk0r   r   r   r    X   s    
z_MWU._resize_fmnksc                 C   s   |dk s$|dk s$|dk s$||| kr(dS | j |||f dkrL| j |||f S |dkrj|dkrj|dkrjd}n(| |d ||| | ||d | }|| j |||f< |S )z7Recursive implementation of function of [3] Theorem 2.5r   r   )r   r#   )r   r   r   r   fmnkr   r   r   r#   d   s    $(z_MWU._fN)__name__
__module____qualname____doc__r   r   r   r   r/   r2   r    r#   r   r   r   r   r      s   		r   c           	         s    fdd}| ||fg}d}|r|  \} }}|| ||}|durR| | ||f< q|| d ||| }|| |d |}|dur|dur|| }| | ||f< q|| ||f |du r|| d ||| f |du r|| |d |f q S )z7Iterative implementation of function of [3] Theorem 2.5c                    sl     | ||fddkr$ | ||f S |dk sH| dk sH|dk sH|| | krLdS |dkrh| dkrh|dkrhdS dS )z!Base cases from recursive versionr   r   r   N)get)r   r   r   r)   r   r   
_base_case   s    $z$_mwu_f_iterative.<locals>._base_caseNr   )popappend)	r   r   r   r)   r;   stackr4   f1f2r   r:   r   r,   }   s*    r,   Tc                 C   s   || d }|| }|d | j dd}t|| d |d |||d     }	| | }
|rf|
d8 }
tjddd	 |
|	 }W d
   n1 s0    Y  |S )zStandardized MWU statistic      r   r	      r   g      ?ignore)divideinvalidN)sumr   sqrtZerrstate)Un1n2tr
   
continuitymur   Ztie_terms	numeratorr   r   r   r   
_get_mwu_z   s    *&rQ   c           
      C   s  t | t | } }t |  s2t | r:tdt | dksVt |dkr^tdddh}||vr~td| dh d}| }||vrtd	| dt|}||krtd
t|t	j
sh d}	| }||	vrtd|	 d| |||||fS )z7 Input validation and standardization for mannwhitneyu z"`x` and `y` must not contain NaNs.r   z$`x` and `y` must be of nonzero size.TFz `use_continuity` must be one of .>   	two-sidedgreaterlessz`alternative` must be one of z`axis` must be an integer.>   auto
asymptoticexactz`method` must be one of )r   Z
atleast_1disnanr0   
ValueErrorsizelowerint
isinstancer   ZPermutationMethod)
r   r   use_continuityalternativer
   methodZboolsZalternativesaxis_intmethodsr   r   r   _mwu_input_validation   s*    rd   c                 C   s    | dkr|dkrdS |rdS dS )zCChoose method 'asymptotic' or 'exact' depending on input size, ties   rW   rX   r   )rJ   rK   Ztiesr   r   r   _mwu_choose_method   s
    rf   MannwhitneyuResult)	statisticpvaluerA   )Z	n_samplesrS   rV   c                    s  t | | ||\} } }}t| ||\} }}| jd |jd  }}	t|ddd\}
}|
dd|f jdd}|||d  d	  }||	 | } d
kr|d }}n& dkr|d }}nt||d	 }}|dkrt||	t|dk}|dkrt	
|tt||	t||	}nj|dkr>t|||	|d}tj
|}n@ fdd}tj| |f|fd|i| d i}|j}d}||9 }t|dd}t||S )u<(  Perform the Mann-Whitney U rank test on two independent samples.

    The Mann-Whitney U test is a nonparametric test of the null hypothesis
    that the distribution underlying sample `x` is the same as the
    distribution underlying sample `y`. It is often used as a test of
    difference in location between distributions.

    Parameters
    ----------
    x, y : array-like
        N-d arrays of samples. The arrays must be broadcastable except along
        the dimension given by `axis`.
    use_continuity : bool, optional
            Whether a continuity correction (1/2) should be applied.
            Default is True when `method` is ``'asymptotic'``; has no effect
            otherwise.
    alternative : {'two-sided', 'less', 'greater'}, optional
        Defines the alternative hypothesis. Default is 'two-sided'.
        Let *F(u)* and *G(u)* be the cumulative distribution functions of the
        distributions underlying `x` and `y`, respectively. Then the following
        alternative hypotheses are available:

        * 'two-sided': the distributions are not equal, i.e. *F(u) ≠ G(u)* for
          at least one *u*.
        * 'less': the distribution underlying `x` is stochastically less
          than the distribution underlying `y`, i.e. *F(u) > G(u)* for all *u*.
        * 'greater': the distribution underlying `x` is stochastically greater
          than the distribution underlying `y`, i.e. *F(u) < G(u)* for all *u*.

        Note that the mathematical expressions in the alternative hypotheses
        above describe the CDFs of the underlying distributions. The directions
        of the inequalities appear inconsistent with the natural language
        description at first glance, but they are not. For example, suppose
        *X* and *Y* are random variables that follow distributions with CDFs
        *F* and *G*, respectively. If *F(u) > G(u)* for all *u*, samples drawn
        from *X* tend to be less than those drawn from *Y*.

        Under a more restrictive set of assumptions, the alternative hypotheses
        can be expressed in terms of the locations of the distributions;
        see [5] section 5.1.
    axis : int, optional
        Axis along which to perform the test. Default is 0.
    method : {'auto', 'asymptotic', 'exact'} or `PermutationMethod` instance, optional
        Selects the method used to calculate the *p*-value.
        Default is 'auto'. The following options are available.

        * ``'asymptotic'``: compares the standardized test statistic
          against the normal distribution, correcting for ties.
        * ``'exact'``: computes the exact *p*-value by comparing the observed
          :math:`U` statistic against the exact distribution of the :math:`U`
          statistic under the null hypothesis. No correction is made for ties.
        * ``'auto'``: chooses ``'exact'`` when the size of one of the samples
          is less than or equal to 8 and there are no ties;
          chooses ``'asymptotic'`` otherwise.
        * `PermutationMethod` instance. In this case, the p-value
          is computed using `permutation_test` with the provided
          configuration options and other appropriate settings.

    Returns
    -------
    res : MannwhitneyuResult
        An object containing attributes:

        statistic : float
            The Mann-Whitney U statistic corresponding with sample `x`. See
            Notes for the test statistic corresponding with sample `y`.
        pvalue : float
            The associated *p*-value for the chosen `alternative`.

    Notes
    -----
    If ``U1`` is the statistic corresponding with sample `x`, then the
    statistic corresponding with sample `y` is
    ``U2 = x.shape[axis] * y.shape[axis] - U1``.

    `mannwhitneyu` is for independent samples. For related / paired samples,
    consider `scipy.stats.wilcoxon`.

    `method` ``'exact'`` is recommended when there are no ties and when either
    sample size is less than 8 [1]_. The implementation follows the recurrence
    relation originally proposed in [1]_ as it is described in [3]_.
    Note that the exact method is *not* corrected for ties, but
    `mannwhitneyu` will not raise errors or warnings if there are ties in the
    data. If there are ties and either samples is small (fewer than ~10
    observations), consider passing an instance of `PermutationMethod`
    as the `method` to perform a permutation test.

    The Mann-Whitney U test is a non-parametric version of the t-test for
    independent samples. When the means of samples from the populations
    are normally distributed, consider `scipy.stats.ttest_ind`.

    See Also
    --------
    scipy.stats.wilcoxon, scipy.stats.ranksums, scipy.stats.ttest_ind

    References
    ----------
    .. [1] H.B. Mann and D.R. Whitney, "On a test of whether one of two random
           variables is stochastically larger than the other", The Annals of
           Mathematical Statistics, Vol. 18, pp. 50-60, 1947.
    .. [2] Mann-Whitney U Test, Wikipedia,
           http://en.wikipedia.org/wiki/Mann-Whitney_U_test
    .. [3] A. Di Bucchianico, "Combinatorics, computer algebra, and the
           Wilcoxon-Mann-Whitney test", Journal of Statistical Planning and
           Inference, Vol. 79, pp. 349-364, 1999.
    .. [4] Rosie Shier, "Statistics: 2.3 The Mann-Whitney U Test", Mathematics
           Learning Support Centre, 2004.
    .. [5] Michael P. Fay and Michael A. Proschan. "Wilcoxon-Mann-Whitney
           or t-test? On assumptions for hypothesis tests and multiple \
           interpretations of decision rules." Statistics surveys, Vol. 4, pp.
           1-39, 2010. https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2857732/

    Examples
    --------
    We follow the example from [4]_: nine randomly sampled young adults were
    diagnosed with type II diabetes at the ages below.

    >>> males = [19, 22, 16, 29, 24]
    >>> females = [20, 11, 17, 12]

    We use the Mann-Whitney U test to assess whether there is a statistically
    significant difference in the diagnosis age of males and females.
    The null hypothesis is that the distribution of male diagnosis ages is
    the same as the distribution of female diagnosis ages. We decide
    that a confidence level of 95% is required to reject the null hypothesis
    in favor of the alternative that the distributions are different.
    Since the number of samples is very small and there are no ties in the
    data, we can compare the observed test statistic against the *exact*
    distribution of the test statistic under the null hypothesis.

    >>> from scipy.stats import mannwhitneyu
    >>> U1, p = mannwhitneyu(males, females, method="exact")
    >>> print(U1)
    17.0

    `mannwhitneyu` always reports the statistic associated with the first
    sample, which, in this case, is males. This agrees with :math:`U_M = 17`
    reported in [4]_. The statistic associated with the second statistic
    can be calculated:

    >>> nx, ny = len(males), len(females)
    >>> U2 = nx*ny - U1
    >>> print(U2)
    3.0

    This agrees with :math:`U_F = 3` reported in [4]_. The two-sided
    *p*-value can be calculated from either statistic, and the value produced
    by `mannwhitneyu` agrees with :math:`p = 0.11` reported in [4]_.

    >>> print(p)
    0.1111111111111111

    The exact distribution of the test statistic is asymptotically normal, so
    the example continues by comparing the exact *p*-value against the
    *p*-value produced using the normal approximation.

    >>> _, pnorm = mannwhitneyu(males, females, method="asymptotic")
    >>> print(pnorm)
    0.11134688653314041

    Here `mannwhitneyu`'s reported *p*-value appears to conflict with the
    value :math:`p = 0.09` given in [4]_. The reason is that [4]_
    does not apply the continuity correction performed by `mannwhitneyu`;
    `mannwhitneyu` reduces the distance between the test statistic and the
    mean :math:`\mu = n_x n_y / 2` by 0.5 to correct for the fact that the
    discrete statistic is being compared against a continuous distribution.
    Here, the :math:`U` statistic used is less than the mean, so we reduce
    the distance by adding 0.5 in the numerator.

    >>> import numpy as np
    >>> from scipy.stats import norm
    >>> U = min(U1, U2)
    >>> N = nx + ny
    >>> z = (U - nx*ny/2 + 0.5) / np.sqrt(nx*ny * (N + 1)/ 12)
    >>> p = 2 * norm.cdf(z)  # use CDF to get p-value from smaller statistic
    >>> print(p)
    0.11134688653314041

    If desired, we can disable the continuity correction to get a result
    that agrees with that reported in [4]_.

    >>> _, pnorm = mannwhitneyu(males, females, use_continuity=False,
    ...                         method="asymptotic")
    >>> print(pnorm)
    0.0864107329737

    Regardless of whether we perform an exact or asymptotic test, the
    probability of the test statistic being as extreme or more extreme by
    chance exceeds 5%, so we do not consider the results statistically
    significant.

    Suppose that, before seeing the data, we had hypothesized that females
    would tend to be diagnosed at a younger age than males.
    In that case, it would be natural to provide the female ages as the
    first input, and we would have performed a one-sided test using
    ``alternative = 'less'``: females are diagnosed at an age that is
    stochastically less than that of males.

    >>> res = mannwhitneyu(females, males, alternative="less", method="exact")
    >>> print(res)
    MannwhitneyuResult(statistic=3.0, pvalue=0.05555555555555555)

    Again, the probability of getting a sufficiently low value of the
    test statistic by chance under the null hypothesis is greater than 5%,
    so we do not reject the null hypothesis in favor of our alternative.

    If it is reasonable to assume that the means of samples from the
    populations are normally distributed, we could have used a t-test to
    perform the analysis.

    >>> from scipy.stats import ttest_ind
    >>> res = ttest_ind(females, males, alternative="less")
    >>> print(res)
    Ttest_indResult(statistic=-2.239334696520584, pvalue=0.030068441095757924)

    Under this assumption, the *p*-value would be low enough to reject the
    null hypothesis in favor of the alternative.

    r   ZaverageT)Zreturn_ties.Nr	   r   rA   rT   rU   rV   rX   rW   )rM   c                    s   t | | |ddjS )NrW   )r_   r`   r
   ra   )mannwhitneyurh   )r   r   r
   r`   r_   r   r   rh     s    zmannwhitneyu.<locals>.statisticr
   r`   r   )rd   r   r   r   rG   r   r3   rf   r0   
_mwu_stater2   Zastyper]   minr!   rQ   r   ZnormZpermutation_test_asdictri   Zcliprg   )r   r   r_   r`   r
   ra   rb   ZxyrJ   rK   ZranksrL   ZR1ZU1ZU2rI   fpr   rh   resr   rk   r   rj      s@     a
"
rj   )r   T)TrS   r   rV   )numpyr   collectionsr   Zscipyr   r   Zscipy.stats._stats_pyr   Z_axis_nan_policyr   r   r   rl   r,   rQ   rd   rf   rg   rj   r   r   r   r   <module>   s"   f4


  