a
    BCCfe                     @  s  d dl mZ d dlmZmZ d dlmZ d dlZd dlZ	d dl
mZmZmZ d dlmZ d dlmZ d dlmZ erd d	lmZ d dlmZ d
dgZeG dd dZeG dd dZd#ddddddZddddd
Zdd Zdd ZeG dd dZd$ddd dd!d"dZdS )%    )annotations)	dataclassfield)TYPE_CHECKINGN)specialinterpolatestats)CensoredData)ConfidenceInterval)norm)Literalecdflogrankc                   @  s   e Zd ZU dZded< ded< eddZded< eddZded< eddZded	< eddZ	d
ed< dd Z
dd ZdddZdddddZdd Zdd ZdS )EmpiricalDistributionFunctiona  An empirical distribution function produced by `scipy.stats.ecdf`

    Attributes
    ----------
    quantiles : ndarray
        The unique values of the sample from which the
        `EmpiricalDistributionFunction` was estimated.
    probabilities : ndarray
        The point estimates of the cumulative distribution function (CDF) or
        its complement, the survival function (SF), corresponding with
        `quantiles`.
    
np.ndarray	quantilesprobabilitiesF)repr_n_d_sfstr_kindc           
      C  s   || _ || _|| _|| _|dkr$|nd| | _|| _|dkr@dnd}d| }t|dt|gtj	 tj	g}t|dt|g||g}	t
j||	ddd| _d S )Nsf   r   previousT)kindZassume_sorted)r   r   r   r   r   r   npinsertleninfr   Zinterp1d_f)
selfqpndr   Zf0f1xy r*   Q/var/www/html/django/DPS/env/lib/python3.9/site-packages/scipy/stats/_survival.py__init__+   s     
z&EmpiricalDistributionFunction.__init__c                 C  s
   |  |S )zEvaluate the empirical CDF/SF function at the input.

        Parameters
        ----------
        x : ndarray
            Argument to the CDF/SF

        Returns
        -------
        y : ndarray
            The CDF/SF evaluated at the input
        )r!   )r"   r(   r*   r*   r+   evaluate<   s    z&EmpiricalDistributionFunction.evaluateNc           
   
   K  s   zddl }W n2 ty> } zd}t||W Y d}~n
d}~0 0 |du r\ddlm} | }ddi}|| t| jd }| j}	|	d | gt	|	 |	d | g }	|j
|	| |	fi |S )a4  Plot the empirical distribution function

        Available only if ``matplotlib`` is installed.

        Parameters
        ----------
        ax : matplotlib.axes.Axes
            Axes object to draw the plot onto, otherwise uses the current Axes.

        **matplotlib_kwargs : dict, optional
            Keyword arguments passed directly to `matplotlib.axes.Axes.step`.
            Unless overridden, ``where='post'``.

        Returns
        -------
        lines : list of `matplotlib.lines.Line2D`
            Objects representing the plotted data
        r   Nz2matplotlib must be installed to use method `plot`.wherepostg?)
matplotlibModuleNotFoundErrorZmatplotlib.pyplotZpyplotZgcaupdater   Zptpr   liststepr-   )
r"   axZmatplotlib_kwargsr1   excmessageZpltkwargsdeltar#   r*   r*   r+   plotK   s     
$z"EmpiricalDistributionFunction.plotffffff?linear)methodc                C  s  d}| j du rt|| j| jd}dt| d}| |vrHt|d}t|d }|j	svd|  krtd	ks~n t|||  }||\}}d
}t
t|t|B rtj|tdd t|dd	t|dd	 }}t| j|dd| j}t| j|dd| j}t||S )a^  Compute a confidence interval around the CDF/SF point estimate

        Parameters
        ----------
        confidence_level : float, default: 0.95
            Confidence level for the computed confidence interval

        method : str, {"linear", "log-log"}
            Method used to compute the confidence interval. Options are
            "linear" for the conventional Greenwood confidence interval
            (default)  and "log-log" for the "exponential Greenwood",
            log-negative-log-transformed confidence interval.

        Returns
        -------
        ci : ``ConfidenceInterval``
            An object with attributes ``low`` and ``high``, instances of
            `~scipy.stats._result_classes.EmpiricalDistributionFunction` that
            represent the lower and upper bounds (respectively) of the
            confidence interval.

        Notes
        -----
        Confidence intervals are computed according to the Greenwood formula
        (``method='linear'``) or the more recent "exponential Greenwood"
        formula (``method='log-log'``) as described in [1]_. The conventional
        Greenwood formula can result in lower confidence limits less than 0
        and upper confidence limits greater than 1; these are clipped to the
        unit interval. NaNs may be produced by either method; these are
        features of the formulas.

        References
        ----------
        .. [1] Sawyer, Stanley. "The Greenwood and Exponential Greenwood
               Confidence Intervals in Survival Analysis."
               https://www.math.wustl.edu/~sawyer/handouts/greenwood.pdf

        zKConfidence interval bounds do not implement a `confidence_interval` method.N)r=   zlog-logz`method` must be one of .z4`confidence_level` must be a scalar between 0 and 1.r*   r   r   zThe confidence interval is undefined at some observations. This is a feature of the mathematical formula used, not an error in its implementation.   )
stacklevel)r   NotImplementedError
_linear_ci
_loglog_cisetlower
ValueErrorr   asarrayshapeanyisnanwarningswarnRuntimeWarningZclipr   r   r   r
   )r"   confidence_levelr>   r8   methodsZ
method_funlowhighr*   r*   r+   confidence_intervalq   s4    '
z1EmpiricalDistributionFunction.confidence_intervalc                 C  s   | j | j| j  }}}tjddd. |d t||||    }W d    n1 sX0    Y  t|}td|d  }|| }| j	| }	| j	| }
|	|
fS )Nignoredivideinvalidr@         ?)
r   r   r   r   errstatecumsumsqrtr   ndtrir   )r"   rO   r   r&   r%   varsezz_serQ   rR   r*   r*   r+   rC      s    <


z(EmpiricalDistributionFunction._linear_cic                 C  s   | j | j| j  }}}tjddd8 dt|d  t||||    }W d    n1 sb0    Y  t|}t	d|d  }tjdd" tt| }W d    n1 s0    Y  || }	t
t
||	  }
t
t
||	  }| jdkrd| d|
  }
}|
|fS )NrT   rU   r   r@   rX   )rV   cdf)r   r   r   r   rY   logrZ   r[   r   r\   expr   )r"   rO   r   r&   r%   r]   r^   r_   Z
lnl_pointsr`   rQ   rR   r*   r*   r+   rD      s    F
0z(EmpiricalDistributionFunction._loglog_ci)N)r<   )__name__
__module____qualname____doc____annotations__r   r   r   r   r   r,   r-   r;   rS   rC   rD   r*   r*   r*   r+   r      s   

&Hr   c                   @  s*   e Zd ZU dZded< ded< dd ZdS )
ECDFResulta   Result object returned by `scipy.stats.ecdf`

    Attributes
    ----------
    cdf : `~scipy.stats._result_classes.EmpiricalDistributionFunction`
        An object representing the empirical cumulative distribution function.
    sf : `~scipy.stats._result_classes.EmpiricalDistributionFunction`
        An object representing the complement of the empirical cumulative
        distribution function.
    r   ra   r   c                 C  s(   t ||||d| _t ||||d| _d S )Nra   r   )r   ra   r   )r"   r#   ra   r   r%   r&   r*   r*   r+   r,      s    zECDFResult.__init__N)rd   re   rf   rg   rh   r,   r*   r*   r*   r+   ri      s   

ri   sampleznpt.ArrayLike | CensoredDatar   r	   )rj   
param_namereturnc              
   C  s`   t | ts\zt| d} W nB tyZ } z*t|d|}t|||W Y d}~n
d}~0 0 | S )z.Attempt to convert `sample` to `CensoredData`.)
uncensoredrm   N)
isinstancer	   rG   r   replacetype)rj   rk   er8   r*   r*   r+   _iv_CensoredData   s    
$rr   )rj   rl   c                 C  sf   t | } |  dkr"t|  }n&|  | jjkr<t| }nd}t||\}}}}}t|||||S )a  Empirical cumulative distribution function of a sample.

    The empirical cumulative distribution function (ECDF) is a step function
    estimate of the CDF of the distribution underlying a sample. This function
    returns objects representing both the empirical distribution function and
    its complement, the empirical survival function.

    Parameters
    ----------
    sample : 1D array_like or `scipy.stats.CensoredData`
        Besides array_like, instances of `scipy.stats.CensoredData` containing
        uncensored and right-censored observations are supported. Currently,
        other instances of `scipy.stats.CensoredData` will result in a
        ``NotImplementedError``.

    Returns
    -------
    res : `~scipy.stats._result_classes.ECDFResult`
        An object with the following attributes.

        cdf : `~scipy.stats._result_classes.EmpiricalDistributionFunction`
            An object representing the empirical cumulative distribution
            function.
        sf : `~scipy.stats._result_classes.EmpiricalDistributionFunction`
            An object representing the empirical survival function.

        The `cdf` and `sf` attributes themselves have the following attributes.

        quantiles : ndarray
            The unique values in the sample that defines the empirical CDF/SF.
        probabilities : ndarray
            The point estimates of the probabilities corresponding with
            `quantiles`.

        And the following methods:

        evaluate(x) :
            Evaluate the CDF/SF at the argument.

        plot(ax) :
            Plot the CDF/SF on the provided axes.

        confidence_interval(confidence_level=0.95) :
            Compute the confidence interval around the CDF/SF at the values in
            `quantiles`.

    Notes
    -----
    When each observation of the sample is a precise measurement, the ECDF
    steps up by ``1/len(sample)`` at each of the observations [1]_.

    When observations are lower bounds, upper bounds, or both upper and lower
    bounds, the data is said to be "censored", and `sample` may be provided as
    an instance of `scipy.stats.CensoredData`.

    For right-censored data, the ECDF is given by the Kaplan-Meier estimator
    [2]_; other forms of censoring are not supported at this time.

    Confidence intervals are computed according to the Greenwood formula or the
    more recent "Exponential Greenwood" formula as described in [4]_.

    References
    ----------
    .. [1] Conover, William Jay. Practical nonparametric statistics. Vol. 350.
           John Wiley & Sons, 1999.

    .. [2] Kaplan, Edward L., and Paul Meier. "Nonparametric estimation from
           incomplete observations." Journal of the American statistical
           association 53.282 (1958): 457-481.

    .. [3] Goel, Manish Kumar, Pardeep Khanna, and Jugal Kishore.
           "Understanding survival analysis: Kaplan-Meier estimate."
           International journal of Ayurveda research 1.4 (2010): 274.

    .. [4] Sawyer, Stanley. "The Greenwood and Exponential Greenwood Confidence
           Intervals in Survival Analysis."
           https://www.math.wustl.edu/~sawyer/handouts/greenwood.pdf

    Examples
    --------
    **Uncensored Data**

    As in the example from [1]_ page 79, five boys were selected at random from
    those in a single high school. Their one-mile run times were recorded as
    follows.

    >>> sample = [6.23, 5.58, 7.06, 6.42, 5.20]  # one-mile run times (minutes)

    The empirical distribution function, which approximates the distribution
    function of one-mile run times of the population from which the boys were
    sampled, is calculated as follows.

    >>> from scipy import stats
    >>> res = stats.ecdf(sample)
    >>> res.cdf.quantiles
    array([5.2 , 5.58, 6.23, 6.42, 7.06])
    >>> res.cdf.probabilities
    array([0.2, 0.4, 0.6, 0.8, 1. ])

    To plot the result as a step function:

    >>> import matplotlib.pyplot as plt
    >>> ax = plt.subplot()
    >>> res.cdf.plot(ax)
    >>> ax.set_xlabel('One-Mile Run Time (minutes)')
    >>> ax.set_ylabel('Empirical CDF')
    >>> plt.show()

    **Right-censored Data**

    As in the example from [1]_ page 91, the lives of ten car fanbelts were
    tested. Five tests concluded because the fanbelt being tested broke, but
    the remaining tests concluded for other reasons (e.g. the study ran out of
    funding, but the fanbelt was still functional). The mileage driven
    with the fanbelts were recorded as follows.

    >>> broken = [77, 47, 81, 56, 80]  # in thousands of miles driven
    >>> unbroken = [62, 60, 43, 71, 37]

    Precise survival times of the fanbelts that were still functional at the
    end of the tests are unknown, but they are known to exceed the values
    recorded in ``unbroken``. Therefore, these observations are said to be
    "right-censored", and the data is represented using
    `scipy.stats.CensoredData`.

    >>> sample = stats.CensoredData(uncensored=broken, right=unbroken)

    The empirical survival function is calculated as follows.

    >>> res = stats.ecdf(sample)
    >>> res.sf.quantiles
    array([37., 43., 47., 56., 60., 62., 71., 77., 80., 81.])
    >>> res.sf.probabilities
    array([1.   , 1.   , 0.875, 0.75 , 0.75 , 0.75 , 0.75 , 0.5  , 0.25 , 0.   ])

    To plot the result as a step function:

    >>> ax = plt.subplot()
    >>> res.cdf.plot(ax)
    >>> ax.set_xlabel('Fanbelt Survival Time (thousands of miles)')
    >>> ax.set_ylabel('Empirical SF')
    >>> plt.show()

    r   z@Currently, only uncensored and right-censored data is supported.)	rr   Znum_censored_ecdf_uncensoredZ	_uncensor_rightsize_ecdf_right_censoredrB   ri   )rj   resr8   tra   r   r%   r&   r*   r*   r+   r      s     
c                 C  sf   t | } t j| dd\}}t |}| j}|| }d| }t |g||d d  f}|||||fS )NT)Zreturn_countsr   r0   )r   sortuniquerZ   ru   concatenate)rj   r(   countseventsr%   ra   r   at_riskr*   r*   r+   rs     s    

rs   c                 C  s   | j }| j}t||f}tdg|j dg|j  }t|}|| }|| }t|jdd}tj|tj	 tj	ddk}|d d }|dd  }	|| }
|| }t
||	 }tj|dd}t|| | }d| }|
||||fS )Nr   r   r0   )prependappend)r   )_uncensoredrt   r   r{   rH   ru   ZargsortZarangediffr    rZ   Zcumprod)rj   ZtodZtoltimesZdiedir~   jZj_lZj_rrx   r%   cdr&   r   ra   r*   r*   r+   rv     s$    
rv   c                   @  s"   e Zd ZU dZded< ded< dS )LogRankResulta^  Result object returned by `scipy.stats.logrank`.

    Attributes
    ----------
    statistic : float ndarray
        The computed statistic (defined below). Its magnitude is the
        square root of the magnitude returned by most other logrank test
        implementations.
    pvalue : float ndarray
        The computed p-value of the test.
    r   	statisticpvalueN)rd   re   rf   rg   rh   r*   r*   r*   r+   r     s   
r   	two-sidedz'Literal['two-sided', 'less', 'greater'])r(   r)   alternativerl   c                 C  s@  t | dd} t |dd}tt| j|jft| j|jfd}t|}|jj	t
}|jj| }|jj| }|jj| }t| }	t|	jj|}
t|	jjd|
 }|| }|| | ||  }|d |d  }|dk}
t||
 ||
  }| jj}t|||  }|| t| }tj|t|}t|d |d d	S )
a  Compare the survival distributions of two samples via the logrank test.

    Parameters
    ----------
    x, y : array_like or CensoredData
        Samples to compare based on their empirical survival functions.
    alternative : {'two-sided', 'less', 'greater'}, optional
        Defines the alternative hypothesis.

        The null hypothesis is that the survival distributions of the two
        groups, say *X* and *Y*, are identical.

        The following alternative hypotheses [4]_ are available (default is
        'two-sided'):

        * 'two-sided': the survival distributions of the two groups are not
          identical.
        * 'less': survival of group *X* is favored: the group *X* failure rate
          function is less than the group *Y* failure rate function at some
          times.
        * 'greater': survival of group *Y* is favored: the group *X* failure
          rate function is greater than the group *Y* failure rate function at
          some times.

    Returns
    -------
    res : `~scipy.stats._result_classes.LogRankResult`
        An object containing attributes:

        statistic : float ndarray
            The computed statistic (defined below). Its magnitude is the
            square root of the magnitude returned by most other logrank test
            implementations.
        pvalue : float ndarray
            The computed p-value of the test.

    See Also
    --------
    scipy.stats.ecdf

    Notes
    -----
    The logrank test [1]_ compares the observed number of events to
    the expected number of events under the null hypothesis that the two
    samples were drawn from the same distribution. The statistic is

    .. math::

        Z_i = \frac{\sum_{j=1}^J(O_{i,j}-E_{i,j})}{\sqrt{\sum_{j=1}^J V_{i,j}}}
        \rightarrow \mathcal{N}(0,1)

    where

    .. math::

        E_{i,j} = O_j \frac{N_{i,j}}{N_j},
        \qquad
        V_{i,j} = E_{i,j} \left(\frac{N_j-O_j}{N_j}\right)
        \left(\frac{N_j-N_{i,j}}{N_j-1}\right),

    :math:`i` denotes the group (i.e. it may assume values :math:`x` or
    :math:`y`, or it may be omitted to refer to the combined sample)
    :math:`j` denotes the time (at which an event occurred),
    :math:`N` is the number of subjects at risk just before an event occurred,
    and :math:`O` is the observed number of events at that time.

    The ``statistic`` :math:`Z_x` returned by `logrank` is the (signed) square
    root of the statistic returned by many other implementations. Under the
    null hypothesis, :math:`Z_x**2` is asymptotically distributed according to
    the chi-squared distribution with one degree of freedom. Consequently,
    :math:`Z_x` is asymptotically distributed according to the standard normal
    distribution. The advantage of using :math:`Z_x` is that the sign
    information (i.e. whether the observed number of events tends to be less
    than or greater than the number expected under the null hypothesis) is
    preserved, allowing `scipy.stats.logrank` to offer one-sided alternative
    hypotheses.

    References
    ----------
    .. [1] Mantel N. "Evaluation of survival data and two new rank order
           statistics arising in its consideration."
           Cancer Chemotherapy Reports, 50(3):163-170, PMID: 5910392, 1966
    .. [2] Bland, Altman, "The logrank test", BMJ, 328:1073,
           :doi:`10.1136/bmj.328.7447.1073`, 2004
    .. [3] "Logrank test", Wikipedia,
           https://en.wikipedia.org/wiki/Logrank_test
    .. [4] Brown, Mark. "On the choice of variance for the log rank test."
           Biometrika 71.1 (1984): 65-74.
    .. [5] Klein, John P., and Melvin L. Moeschberger. Survival analysis:
           techniques for censored and truncated data. Vol. 1230. New York:
           Springer, 2003.

    Examples
    --------
    Reference [2]_ compared the survival times of patients with two different
    types of recurrent malignant gliomas. The samples below record the time
    (number of weeks) for which each patient participated in the study. The
    `scipy.stats.CensoredData` class is used because the data is
    right-censored: the uncensored observations correspond with observed deaths
    whereas the censored observations correspond with the patient leaving the
    study for another reason.

    >>> from scipy import stats
    >>> x = stats.CensoredData(
    ...     uncensored=[6, 13, 21, 30, 37, 38, 49, 50,
    ...                 63, 79, 86, 98, 202, 219],
    ...     right=[31, 47, 80, 82, 82, 149]
    ... )
    >>> y = stats.CensoredData(
    ...     uncensored=[10, 10, 12, 13, 14, 15, 16, 17, 18, 20, 24, 24,
    ...                 25, 28,30, 33, 35, 37, 40, 40, 46, 48, 76, 81,
    ...                 82, 91, 112, 181],
    ...     right=[34, 40, 70]
    ... )

    We can calculate and visualize the empirical survival functions
    of both groups as follows.

    >>> import numpy as np
    >>> import matplotlib.pyplot as plt
    >>> ax = plt.subplot()
    >>> ecdf_x = stats.ecdf(x)
    >>> ecdf_x.sf.plot(ax, label='Astrocytoma')
    >>> ecdf_y = stats.ecdf(y)
    >>> ecdf_y.sf.plot(ax, label='Glioblastoma')
    >>> ax.set_xlabel('Time to death (weeks)')
    >>> ax.set_ylabel('Empirical SF')
    >>> plt.legend()
    >>> plt.show()

    Visual inspection of the empirical survival functions suggests that the
    survival times tend to be different between the two groups. To formally
    assess whether the difference is significant at the 1% level, we use the
    logrank test.

    >>> res = stats.logrank(x=x, y=y)
    >>> res.statistic
    -2.73799...
    >>> res.pvalue
    0.00618...

    The p-value is less than 1%, so we can consider the data to be evidence
    against the null hypothesis in favor of the alternative that there is a
    difference between the two survival functions.

    r(   )rj   rk   r)   )rm   rightr   r@   r   r*   )r   r   )rr   r	   r   r{   r   rt   r   r   r   Zastypeboolr   r   Zsearchsortedr   sumru   r[   r   Z	_stats_pyZ_get_pvaluer   r   )r(   r)   r   Zxyrw   idxZtimes_xyZ
at_risk_xyZ	deaths_xyZres_xr   Z	at_risk_xZ	at_risk_ynumZdenZsum_varZn_died_xZsum_exp_deaths_xr   r   r*   r*   r+   r     s2     )rj   )r   ) 
__future__r   dataclassesr   r   typingr   rL   numpyr   Zscipyr   r   r   Zscipy.stats._censored_datar	   Zscipy.stats._commonr
   Zscipy.statsr   r   Znumpy.typingZnpt__all__r   ri   rr   r   rs   rv   r   r   r*   r*   r*   r+   <module>   s6    J  "( 