# Tests for fitting specific distributions to censored data.

import numpy as np
from numpy.testing import assert_allclose

from scipy.optimize import fmin
from scipy.stats import (CensoredData, beta, cauchy, chi2, expon, gamma,
                         gumbel_l, gumbel_r, invgauss, invweibull, laplace,
                         logistic, lognorm, nct, ncx2, norm, weibull_max,
                         weibull_min)


# In some tests, we'll use this optimizer for improved accuracy.
def optimizer(func, x0, args=(), disp=0):
    return fmin(func, x0, args=args, disp=disp, xtol=1e-12, ftol=1e-12)


def test_beta():
    """
    Test fitting beta shape parameters to interval-censored data.

    Calculation in R:

    > library(fitdistrplus)
    > data <- data.frame(left=c(0.10, 0.50, 0.75, 0.80),
    +                    right=c(0.20, 0.55, 0.90, 0.95))
    > result = fitdistcens(data, 'beta', control=list(reltol=1e-14))

    > result
    Fitting of the distribution ' beta ' on censored data by maximum likelihood
    Parameters:
           estimate
    shape1 1.419941
    shape2 1.027066
    > result$sd
       shape1    shape2
    0.9914177 0.6866565
    """
    data = CensoredData(interval=[[0.10, 0.20],
                                  [0.50, 0.55],
                                  [0.75, 0.90],
                                  [0.80, 0.95]])

    # For this test, fit only the shape parameters; loc and scale are fixed.
    a, b, loc, scale = beta.fit(data, floc=0, fscale=1, optimizer=optimizer)

    assert_allclose(a, 1.419941, rtol=5e-6)
    assert_allclose(b, 1.027066, rtol=5e-6)
    assert loc == 0
    assert scale == 1


def test_cauchy_right_censored():
    """
    Test fitting the Cauchy distribution to right-censored data.

    Calculation in R, with two values not censored [1, 10] and
    one right-censored value [30].

    > library(fitdistrplus)
    > data <- data.frame(left=c(1, 10, 30), right=c(1, 10, NA))
    > result = fitdistcens(data, 'cauchy', control=list(reltol=1e-14))
    > result
    Fitting of the distribution ' cauchy ' on censored data by maximum
    likelihood
    Parameters:
             estimate
    location 7.100001
    scale    7.455866
    """
    data = CensoredData(uncensored=[1, 10], right=[30])
    loc, scale = cauchy.fit(data, optimizer=optimizer)
    assert_allclose(loc, 7.10001, rtol=5e-6)
    assert_allclose(scale, 7.455866, rtol=5e-6)


def test_cauchy_mixed():
    """
    Test fitting the Cauchy distribution to data with mixed censoring.

    Calculation in R, with:
    * two values not censored [1, 10],
    * one left-censored [1],
    * one right-censored [30], and
    * one interval-censored [[4, 8]].

    > library(fitdistrplus)
    > data <- data.frame(left=c(NA, 1, 4, 10, 30), right=c(1, 1, 8, 10, NA))
    > result = fitdistcens(data, 'cauchy', control=list(reltol=1e-14))
    > result
    Fitting of the distribution ' cauchy ' on censored data by maximum
    likelihood
    Parameters:
             estimate
    location 4.605150
    scale    5.900852
    """
    data = CensoredData(uncensored=[1, 10], left=[1], right=[30],
                        interval=[[4, 8]])
    loc, scale = cauchy.fit(data, optimizer=optimizer)
    assert_allclose(loc, 4.605150, rtol=5e-6)
    assert_allclose(scale, 5.900852, rtol=5e-6)


def test_chi2_mixed():
    """
    Test fitting just the shape parameter (df) of chi2 to mixed data.

    Calculation in R, with:
    * two values not censored [1, 10],
    * one left-censored [1],
    * one right-censored [30], and
    * one interval-censored [[4, 8]].

    > library(fitdistrplus)
    > data <- data.frame(left=c(NA, 1, 4, 10, 30), right=c(1, 1, 8, 10, NA))
    > result = fitdistcens(data, 'chisq', control=list(reltol=1e-14))
    > result
    Fitting of the distribution ' chisq ' on censored data by maximum
    likelihood
    Parameters:
             estimate
    df 5.060329
    """
    data = CensoredData(uncensored=[1, 10], left=[1], right=[30],
                        interval=[[4, 8]])
    df, loc, scale = chi2.fit(data, floc=0, fscale=1, optimizer=optimizer)
    assert_allclose(df, 5.060329, rtol=5e-6)
    assert loc == 0
    assert scale == 1


def test_expon_right_censored():
    """
    For the exponential distribution with loc=0, the exact solution for
    fitting n uncensored points x[0]...x[n-1] and m right-censored points
    x[n]..x[n+m-1] is

        scale = sum(x)/n

    That is, divide the sum of all the values (not censored and
    right-censored) by the number of uncensored values.  (See, for example,
    https://en.wikipedia.org/wiki/Censoring_(statistics)#Likelihood.)

    The second derivative of the log-likelihood function is

        n/scale**2 - 2*sum(x)/scale**3

    from which the estimate of the standard error can be computed.

    -----

    Calculation in R, for reference only. The R results are not
    used in the test.

    > library(fitdistrplus)
    > dexps <- function(x, scale) {
    +     return(dexp(x, 1/scale))
    + }
    > pexps <- function(q, scale) {
    +     return(pexp(q, 1/scale))
    + }
    > left <- c(1, 2.5, 3, 6, 7.5, 10, 12, 12, 14.5, 15,
    +                                     16, 16, 20, 20, 21, 22)
    > right <- c(1, 2.5, 3, 6, 7.5, 10, 12, 12, 14.5, 15,
    +                                     NA, NA, NA, NA, NA, NA)
    > result = fitdistcens(data, 'exps', start=list(scale=mean(data$left)),
    +                      control=list(reltol=1e-14))
    > result
    Fitting of the distribution ' exps ' on censored data by maximum likelihood
    Parameters:
          estimate
    scale    19.85
    > result$sd
       scale
    6.277119
    """
    # This data has 10 uncensored values and 6 right-censored values.
    obs = [1, 2.5, 3, 6, 7.5, 10, 12, 12, 14.5, 15, 16, 16, 20, 20, 21, 22]
    cens = [False]*10 + [True]*6
    data = CensoredData.right_censored(obs, cens)

    loc, scale = expon.fit(data, floc=0, optimizer=optimizer)

    assert loc == 0
    # Use the analytical solution to compute the expected value.  This
    # is the sum of the observed values divided by the number of uncensored
    # values.
    n = len(data) - data.num_censored()
    total = data._uncensored.sum() + data._right.sum()
    expected = total / n
    assert_allclose(scale, expected, 1e-8)


def test_gamma_right_censored():
    """
    Fit gamma shape and scale to data with one right-censored value.

    Calculation in R:

    > library(fitdistrplus)
    > data <- data.frame(left=c(2.5, 2.9, 3.8, 9.1, 9.3, 12.0, 23.0, 25.0),
    +                    right=c(2.5, 2.9, 3.8, 9.1, 9.3, 12.0, 23.0, NA))
    > result = fitdistcens(data, 'gamma', start=list(shape=1, scale=10),
    +                      control=list(reltol=1e-13))
    > result
    Fitting of the distribution ' gamma ' on censored data by maximum
      likelihood
    Parameters:
          estimate
    shape 1.447623
    scale 8.360197
    > result$sd
        shape     scale
    0.7053086 5.1016531
    """
    # The last value is right-censored.
    x = CensoredData.right_censored([2.5, 2.9, 3.8, 9.1, 9.3, 12.0, 23.0,
                                     25.0],
                                    [0]*7 + [1])

    a, loc, scale = gamma.fit(x, floc=0, optimizer=optimizer)

    assert_allclose(a, 1.447623, rtol=5e-6)
    assert loc == 0
    assert_allclose(scale, 8.360197, rtol=5e-6)


def test_gumbel():
    """
    Fit gumbel_l and gumbel_r to censored data.

    This R calculation should match gumbel_r.

    > library(evd)
    > library(fitdistrplus)
    > data = data.frame(left=c(0, 2, 3, 9, 10, 10),
    +                   right=c(1, 2, 3, 9, NA, NA))
    > result = fitdistcens(data, 'gumbel',
    +                      control=list(reltol=1e-14),
    +                      start=list(loc=4, scale=5))
    > result
    Fitting of the distribution ' gumbel ' on censored data by maximum
    likelihood
    Parameters:
          estimate
    loc   4.487853
    scale 4.843640
    """
    # First value is interval-censored. Last two are right-censored.
    uncensored = np.array([2, 3, 9])
    right = np.array([10, 10])
    interval = np.array([[0, 1]])
    data = CensoredData(uncensored, right=right, interval=interval)
    loc, scale = gumbel_r.fit(data, optimizer=optimizer)
    assert_allclose(loc, 4.487853, rtol=5e-6)
    assert_allclose(scale, 4.843640, rtol=5e-6)

    # Negate the data and reverse the intervals, and test with gumbel_l.
    data2 = CensoredData(-uncensored, left=-right,
                         interval=-interval[:, ::-1])
    # Fitting gumbel_l to data2 should give the same result as above, but
    # with loc negated.
    loc2, scale2 = gumbel_l.fit(data2, optimizer=optimizer)
    assert_allclose(loc2, -4.487853, rtol=5e-6)
    assert_allclose(scale2, 4.843640, rtol=5e-6)


def test_invgauss():
    """
    Fit just the shape parameter of invgauss to data with one value
    left-censored and one value right-censored.

    Calculation in R; using a fixed dispersion parameter amounts to fixing
    the scale to be 1.

    > library(statmod)
    > library(fitdistrplus)
    > left <- c(NA, 0.4813096, 0.5571880, 0.5132463, 0.3801414, 0.5904386,
    +           0.4822340, 0.3478597, 3, 0.7191797, 1.5810902, 0.4442299)
    > right <- c(0.15, 0.4813096, 0.5571880, 0.5132463, 0.3801414, 0.5904386,
    +            0.4822340, 0.3478597, NA, 0.7191797, 1.5810902, 0.4442299)
    > data <- data.frame(left=left, right=right)
    > result = fitdistcens(data, 'invgauss', control=list(reltol=1e-12),
    +                      fix.arg=list(dispersion=1), start=list(mean=3))
    > result
    Fitting of the distribution ' invgauss ' on censored data by maximum
      likelihood
    Parameters:
         estimate
    mean 0.853469
    Fixed parameters:
               value
    dispersion     1
    > result$sd
        mean
    0.247636

    Here's the R calculation with the dispersion as a free parameter to
    be fit.

    > result = fitdistcens(data, 'invgauss', control=list(reltol=1e-12),
    +                      start=list(mean=3, dispersion=1))
    > result
    Fitting of the distribution ' invgauss ' on censored data by maximum
    likelihood
    Parameters:
                estimate
    mean       0.8699819
    dispersion 1.2261362

    The parametrization of the inverse Gaussian distribution in the
    `statmod` package is not the same as in SciPy (see
        https://arxiv.org/abs/1603.06687
    for details).  The translation from R to SciPy is

        scale = 1/dispersion
        mu    = mean * dispersion

    > 1/result$estimate['dispersion']  # 1/dispersion
    dispersion
     0.8155701
    > result$estimate['mean'] * result$estimate['dispersion']
        mean
    1.066716

    Those last two values are the SciPy scale and shape parameters.
    """
    # One point is left-censored, and one is right-censored.
    x = [0.4813096, 0.5571880, 0.5132463, 0.3801414,
         0.5904386, 0.4822340, 0.3478597, 0.7191797,
         1.5810902, 0.4442299]
    data = CensoredData(uncensored=x, left=[0.15], right=[3])

    # Fit only the shape parameter.
    mu, loc, scale = invgauss.fit(data, floc=0, fscale=1, optimizer=optimizer)

    assert_allclose(mu, 0.853469, rtol=5e-5)
    assert loc == 0
    assert scale == 1

    # Fit the shape and scale.
    mu, loc, scale = invgauss.fit(data, floc=0, optimizer=optimizer)

    assert_allclose(mu, 1.066716, rtol=5e-5)
    assert loc == 0
    assert_allclose(scale, 0.8155701, rtol=5e-5)


def test_invweibull():
    """
    Fit invweibull to censored data.

    Here is the calculation in R.  The 'frechet' distribution from the evd
    package matches SciPy's invweibull distribution.  The `loc` parameter
    is fixed at 0.

    > library(evd)
    > library(fitdistrplus)
    > data = data.frame(left=c(0, 2, 3, 9, 10, 10),
    +                   right=c(1, 2, 3, 9, NA, NA))
    > result = fitdistcens(data, 'frechet',
    +                      control=list(reltol=1e-14),
    +                      start=list(loc=4, scale=5))
    > result
    Fitting of the distribution ' frechet ' on censored data by maximum
    likelihood
    Parameters:
           estimate
    scale 2.7902200
    shape 0.6379845
    Fixed parameters:
        value
    loc     0
    """
    # In the R data, the first value is interval-censored, and the last
    # two are right-censored.  The rest are not censored.
    data = CensoredData(uncensored=[2, 3, 9], right=[10, 10],
                        interval=[[0, 1]])
    c, loc, scale = invweibull.fit(data, floc=0, optimizer=optimizer)
    assert_allclose(c, 0.6379845, rtol=5e-6)
    assert loc == 0
    assert_allclose(scale, 2.7902200, rtol=5e-6)


def test_laplace():
    """
    Fir the Laplace distribution to left- and right-censored data.

    Calculation in R:

    > library(fitdistrplus)
    > dlaplace <- function(x, location=0, scale=1) {
    +     return(0.5*exp(-abs((x - location)/scale))/scale)
    + }
    > plaplace <- function(q, location=0, scale=1) {
    +     z <- (q - location)/scale
    +     s <- sign(z)
    +     f <- -s*0.5*exp(-abs(z)) + (s+1)/2
    +     return(f)
    + }
    > left <- c(NA, -41.564, 50.0, 15.7384, 50.0, 10.0452, -2.0684,
    +           -19.5399, 50.0,   9.0005, 27.1227, 4.3113, -3.7372,
    +           25.3111, 14.7987,  34.0887,  50.0, 42.8496, 18.5862,
    +           32.8921, 9.0448, -27.4591, NA, 19.5083, -9.7199)
    > right <- c(-50.0, -41.564,  NA, 15.7384, NA, 10.0452, -2.0684,
    +            -19.5399, NA, 9.0005, 27.1227, 4.3113, -3.7372,
    +            25.3111, 14.7987, 34.0887, NA,  42.8496, 18.5862,
    +            32.8921, 9.0448, -27.4591, -50.0, 19.5083, -9.7199)
    > data <- data.frame(left=left, right=right)
    > result <- fitdistcens(data, 'laplace', start=list(location=10, scale=10),
    +                       control=list(reltol=1e-13))
    > result
    Fitting of the distribution ' laplace ' on censored data by maximum
      likelihood
    Parameters:
             estimate
    location 14.79870
    scale    30.93601
    > result$sd
         location     scale
    0.1758864 7.0972125
    """
    # The value -50 is left-censored, and the value 50 is right-censored.
    obs = np.array([-50.0, -41.564, 50.0, 15.7384, 50.0, 10.0452, -2.0684,
                    -19.5399, 50.0, 9.0005, 27.1227, 4.3113, -3.7372,
                    25.3111, 14.7987, 34.0887, 50.0, 42.8496, 18.5862,
                    32.8921, 9.0448, -27.4591, -50.0, 19.5083, -9.7199])
    x = obs[(obs != -50.0) & (obs != 50)]
    left = obs[obs == -50.0]
    right = obs[obs == 50.0]
    data = CensoredData(uncensored=x, left=left, right=right)
    loc, scale = laplace.fit(data, loc=10, scale=10, optimizer=optimizer)
    assert_allclose(loc, 14.79870, rtol=5e-6)
    assert_allclose(scale, 30.93601, rtol=5e-6)


def test_logistic():
    """
    Fit the logistic distribution to left-censored data.

    Calculation in R:
    > library(fitdistrplus)
    > left = c(13.5401, 37.4235, 11.906 , 13.998 ,  NA    ,  0.4023,  NA    ,
    +          10.9044, 21.0629,  9.6985,  NA    , 12.9016, 39.164 , 34.6396,
    +          NA    , 20.3665, 16.5889, 18.0952, 45.3818, 35.3306,  8.4949,
    +          3.4041,  NA    ,  7.2828, 37.1265,  6.5969, 17.6868, 17.4977,
    +          16.3391, 36.0541)
    > right = c(13.5401, 37.4235, 11.906 , 13.998 ,  0.    ,  0.4023,  0.    ,
    +           10.9044, 21.0629,  9.6985,  0.    , 12.9016, 39.164 , 34.6396,
    +           0.    , 20.3665, 16.5889, 18.0952, 45.3818, 35.3306,  8.4949,
    +           3.4041,  0.    ,  7.2828, 37.1265,  6.5969, 17.6868, 17.4977,
    +           16.3391, 36.0541)
    > data = data.frame(left=left, right=right)
    > result = fitdistcens(data, 'logis', control=list(reltol=1e-14))
    > result
    Fitting of the distribution ' logis ' on censored data by maximum
      likelihood
    Parameters:
              estimate
    location 14.633459
    scale     9.232736
    > result$sd
    location    scale
    2.931505 1.546879
    """
    # Values that are zero are left-censored; the true values are less than 0.
    x = np.array([13.5401, 37.4235, 11.906, 13.998, 0.0, 0.4023, 0.0, 10.9044,
                  21.0629, 9.6985, 0.0, 12.9016, 39.164, 34.6396, 0.0, 20.3665,
                  16.5889, 18.0952, 45.3818, 35.3306, 8.4949, 3.4041, 0.0,
                  7.2828, 37.1265, 6.5969, 17.6868, 17.4977, 16.3391,
                  36.0541])
    data = CensoredData.left_censored(x, censored=(x == 0))
    loc, scale = logistic.fit(data, optimizer=optimizer)
    assert_allclose(loc, 14.633459, rtol=5e-7)
    assert_allclose(scale, 9.232736, rtol=5e-6)


def test_lognorm():
    """
    Ref: https://math.montana.edu/jobo/st528/documents/relc.pdf

    The data is the locomotive control time to failure example that starts
    on page 8.  That's the 8th page in the PDF; the page number shown in
    the text is 270).
    The document includes SAS output for the data.
    """
    # These are the uncensored measurements.  There are also 59 right-censored
    # measurements where the lower bound is 135.
    miles_to_fail = [22.5, 37.5, 46.0, 48.5, 51.5, 53.0, 54.5, 57.5, 66.5,
                     68.0, 69.5, 76.5, 77.0, 78.5, 80.0, 81.5, 82.0, 83.0,
                     84.0, 91.5, 93.5, 102.5, 107.0, 108.5, 112.5, 113.5,
                     116.0, 117.0, 118.5, 119.0, 120.0, 122.5, 123.0, 127.5,
                     131.0, 132.5, 134.0]

    data = CensoredData.right_censored(miles_to_fail + [135]*59,
                                       [0]*len(miles_to_fail) + [1]*59)
    sigma, loc, scale = lognorm.fit(data, floc=0)

    assert loc == 0
    # Convert the lognorm parameters to the mu and sigma of the underlying
    # normal distribution.
    mu = np.log(scale)
    # The expected results are from the 17th page of the PDF document
    # (labeled page 279), in the SAS output on the right side of the page.
    assert_allclose(mu, 5.1169, rtol=5e-4)
    assert_allclose(sigma, 0.7055, rtol=5e-3)


def test_nct():
    """
    Test fitting the noncentral t distribution to censored data.

    Calculation in R:

    > library(fitdistrplus)
    > data <- data.frame(left=c(1, 2, 3, 5, 8, 10, 25, 25),
    +                    right=c(1, 2, 3, 5, 8, 10, NA, NA))
    > result = fitdistcens(data, 't', control=list(reltol=1e-14),
    +                      start=list(df=1, ncp=2))
    > result
    Fitting of the distribution ' t ' on censored data by maximum likelihood
    Parameters:
         estimate
    df  0.5432336
    ncp 2.8893565

    """
    data = CensoredData.right_censored([1, 2, 3, 5, 8, 10, 25, 25],
                                       [0, 0, 0, 0, 0, 0, 1, 1])
    # Fit just the shape parameter df and nc; loc and scale are fixed.
    with np.errstate(over='ignore'):  # remove context when gh-14901 is closed
        df, nc, loc, scale = nct.fit(data, floc=0, fscale=1,
                                     optimizer=optimizer)
    assert_allclose(df, 0.5432336, rtol=5e-6)
    assert_allclose(nc, 2.8893565, rtol=5e-6)
    assert loc == 0
    assert scale == 1


def test_ncx2():
    """
    Test fitting the shape parameters (df, ncp) of ncx2 to mixed data.

    Calculation in R, with
    * 5 not censored values [2.7, 0.2, 6.5, 0.4, 0.1],
    * 1 interval-censored value [[0.6, 1.0]], and
    * 2 right-censored values [8, 8].

    > library(fitdistrplus)
    > data <- data.frame(left=c(2.7, 0.2, 6.5, 0.4, 0.1, 0.6, 8, 8),
    +                    right=c(2.7, 0.2, 6.5, 0.4, 0.1, 1.0, NA, NA))
    > result = fitdistcens(data, 'chisq', control=list(reltol=1e-14),
    +                      start=list(df=1, ncp=2))
    > result
    Fitting of the distribution ' chisq ' on censored data by maximum
    likelihood
    Parameters:
        estimate
    df  1.052871
    ncp 2.362934
    """
    data = CensoredData(uncensored=[2.7, 0.2, 6.5, 0.4, 0.1], right=[8, 8],
                        interval=[[0.6, 1.0]])
    with np.errstate(over='ignore'):  # remove context when gh-14901 is closed
        df, ncp, loc, scale = ncx2.fit(data, floc=0, fscale=1,
                                       optimizer=optimizer)
    assert_allclose(df, 1.052871, rtol=5e-6)
    assert_allclose(ncp, 2.362934, rtol=5e-6)
    assert loc == 0
    assert scale == 1


def test_norm():
    """
    Test fitting the normal distribution to interval-censored data.

    Calculation in R:

    > library(fitdistrplus)
    > data <- data.frame(left=c(0.10, 0.50, 0.75, 0.80),
    +                    right=c(0.20, 0.55, 0.90, 0.95))
    > result = fitdistcens(data, 'norm', control=list(reltol=1e-14))

    > result
    Fitting of the distribution ' norm ' on censored data by maximum likelihood
    Parameters:
          estimate
    mean 0.5919990
    sd   0.2868042
    > result$sd
         mean        sd
    0.1444432 0.1029451
    """
    data = CensoredData(interval=[[0.10, 0.20],
                                  [0.50, 0.55],
                                  [0.75, 0.90],
                                  [0.80, 0.95]])

    loc, scale = norm.fit(data, optimizer=optimizer)

    assert_allclose(loc, 0.5919990, rtol=5e-6)
    assert_allclose(scale, 0.2868042, rtol=5e-6)


def test_weibull_censored1():
    # Ref: http://www.ams.sunysb.edu/~zhu/ams588/Lecture_3_likelihood.pdf

    # Survival times; '*' indicates right-censored.
    s = "3,5,6*,8,10*,11*,15,20*,22,23,27*,29,32,35,40,26,28,33*,21,24*"

    times, cens = zip(*[(float(t[0]), len(t) == 2)
                        for t in [w.split('*') for w in s.split(',')]])
    data = CensoredData.right_censored(times, cens)

    c, loc, scale = weibull_min.fit(data, floc=0)

    # Expected values are from the reference.
    assert_allclose(c, 2.149, rtol=1e-3)
    assert loc == 0
    assert_allclose(scale, 28.99, rtol=1e-3)

    # Flip the sign of the data, and make the censored values
    # left-censored. We should get the same parameters when we fit
    # weibull_max to the flipped data.
    data2 = CensoredData.left_censored(-np.array(times), cens)

    c2, loc2, scale2 = weibull_max.fit(data2, floc=0)

    assert_allclose(c2, 2.149, rtol=1e-3)
    assert loc2 == 0
    assert_allclose(scale2, 28.99, rtol=1e-3)


def test_weibull_min_sas1():
    # Data and SAS results from
    #   https://support.sas.com/documentation/cdl/en/qcug/63922/HTML/default/
    #         viewer.htm#qcug_reliability_sect004.htm

    text = """
           450 0    460 1   1150 0   1150 0   1560 1
          1600 0   1660 1   1850 1   1850 1   1850 1
          1850 1   1850 1   2030 1   2030 1   2030 1
          2070 0   2070 0   2080 0   2200 1   3000 1
          3000 1   3000 1   3000 1   3100 0   3200 1
          3450 0   3750 1   3750 1   4150 1   4150 1
          4150 1   4150 1   4300 1   4300 1   4300 1
          4300 1   4600 0   4850 1   4850 1   4850 1
          4850 1   5000 1   5000 1   5000 1   6100 1
          6100 0   6100 1   6100 1   6300 1   6450 1
          6450 1   6700 1   7450 1   7800 1   7800 1
          8100 1   8100 1   8200 1   8500 1   8500 1
          8500 1   8750 1   8750 0   8750 1   9400 1
          9900 1  10100 1  10100 1  10100 1  11500 1
    """

    life, cens = np.array([int(w) for w in text.split()]).reshape(-1, 2).T
    life = life/1000.0

    data = CensoredData.right_censored(life, cens)

    c, loc, scale = weibull_min.fit(data, floc=0, optimizer=optimizer)
    assert_allclose(c, 1.0584, rtol=1e-4)
    assert_allclose(scale, 26.2968, rtol=1e-5)
    assert loc == 0


def test_weibull_min_sas2():
    # http://support.sas.com/documentation/cdl/en/ormpug/67517/HTML/default/
    #      viewer.htm#ormpug_nlpsolver_examples06.htm

    # The last two values are right-censored.
    days = np.array([143, 164, 188, 188, 190, 192, 206, 209, 213, 216, 220,
                     227, 230, 234, 246, 265, 304, 216, 244])

    data = CensoredData.right_censored(days, [0]*(len(days) - 2) + [1]*2)

    c, loc, scale = weibull_min.fit(data, 1, loc=100, scale=100,
                                    optimizer=optimizer)

    assert_allclose(c, 2.7112, rtol=5e-4)
    assert_allclose(loc, 122.03, rtol=5e-4)
    assert_allclose(scale, 108.37, rtol=5e-4)
