Videre

2026-04-10 15:06:59 +02:00
parent 3031b7153b
commit e5a4711004
7806 changed files with 1918528 additions and 335 deletions
--- a/linedance-app/venv/lib/python3.12/site-packages/scipy/stats/_multicomp.py
+++ b/linedance-app/venv/lib/python3.12/site-packages/scipy/stats/_multicomp.py
@@ -0,0 +1,451 @@
+import warnings
+from collections.abc import Sequence
+from dataclasses import dataclass, field
+from typing import TYPE_CHECKING, Literal
+
+import numpy as np
+
+from scipy import stats
+from scipy.optimize import minimize_scalar
+from scipy.stats._common import ConfidenceInterval
+from scipy.stats._qmc import check_random_state
+from scipy.stats._stats_py import _var
+from scipy._lib._array_api import xp_capabilities
+from scipy._lib._util import _transition_to_rng, DecimalNumber, SeedType
+
+
+if TYPE_CHECKING:
+    import numpy.typing as npt
+
+
+__all__ = [
+    'dunnett'
+]
+
+
+@dataclass
+class DunnettResult:
+    """Result object returned by `scipy.stats.dunnett`.
+
+    Attributes
+    ----------
+    statistic : float ndarray
+        The computed statistic of the test for each comparison. The element
+        at index ``i`` is the statistic for the comparison between
+        groups ``i`` and the control.
+    pvalue : float ndarray
+        The computed p-value of the test for each comparison. The element
+        at index ``i`` is the p-value for the comparison between
+        group ``i`` and the control.
+    """
+    statistic: np.ndarray
+    pvalue: np.ndarray
+    _alternative: Literal['two-sided', 'less', 'greater'] = field(repr=False)
+    _rho: np.ndarray = field(repr=False)
+    _df: int = field(repr=False)
+    _std: float = field(repr=False)
+    _mean_samples: np.ndarray = field(repr=False)
+    _mean_control: np.ndarray = field(repr=False)
+    _n_samples: np.ndarray = field(repr=False)
+    _n_control: int = field(repr=False)
+    _rng: SeedType = field(repr=False)
+    _ci: ConfidenceInterval | None = field(default=None, repr=False)
+    _ci_cl: DecimalNumber | None = field(default=None, repr=False)
+
+    def __str__(self):
+        # Note: `__str__` prints the confidence intervals from the most
+        # recent call to `confidence_interval`. If it has not been called,
+        # it will be called with the default CL of .95.
+        if self._ci is None:
+            self.confidence_interval(confidence_level=.95)
+        s = (
+            "Dunnett's test"
+            f" ({self._ci_cl*100:.1f}% Confidence Interval)\n"
+            "Comparison               Statistic  p-value  Lower CI  Upper CI\n"
+        )
+        for i in range(self.pvalue.size):
+            s += (f" (Sample {i} - Control) {self.statistic[i]:>10.3f}"
+                  f"{self.pvalue[i]:>10.3f}"
+                  f"{self._ci.low[i]:>10.3f}"
+                  f"{self._ci.high[i]:>10.3f}\n")
+
+        return s
+
+    def _allowance(
+        self, confidence_level: DecimalNumber = 0.95, tol: DecimalNumber = 1e-3
+    ) -> float:
+        """Allowance.
+
+        It is the quantity to add/subtract from the observed difference
+        between the means of observed groups and the mean of the control
+        group. The result gives confidence limits.
+
+        Parameters
+        ----------
+        confidence_level : float, optional
+            Confidence level for the computed confidence interval.
+            Default is .95.
+        tol : float, optional
+            A tolerance for numerical optimization: the allowance will produce
+            a confidence within ``10*tol*(1 - confidence_level)`` of the
+            specified level, or a warning will be emitted. Tight tolerances
+            may be impractical due to noisy evaluation of the objective.
+            Default is 1e-3.
+
+        Returns
+        -------
+        allowance : float
+            Allowance around the mean.
+        """
+        alpha = 1 - confidence_level
+
+        def pvalue_from_stat(statistic):
+            statistic = np.array(statistic)
+            sf = _pvalue_dunnett(
+                rho=self._rho, df=self._df,
+                statistic=statistic, alternative=self._alternative,
+                rng=self._rng
+            )
+            return abs(sf - alpha)/alpha
+
+        # Evaluation of `pvalue_from_stat` is noisy due to the use of RQMC to
+        # evaluate `multivariate_t.cdf`. `minimize_scalar` is not designed
+        # to tolerate a noisy objective function and may fail to find the
+        # minimum accurately. We mitigate this possibility with the validation
+        # step below, but implementation of a noise-tolerant root finder or
+        # minimizer would be a welcome enhancement. See gh-18150.
+        res = minimize_scalar(pvalue_from_stat, method='brent', tol=tol)
+        critical_value = res.x
+
+        # validation
+        # tol*10 because tol=1e-3 means we tolerate a 1% change at most
+        if res.success is False or res.fun >= tol*10:
+            warnings.warn(
+                "Computation of the confidence interval did not converge to "
+                "the desired level. The confidence level corresponding with "
+                f"the returned interval is approximately {alpha*(1+res.fun)}.",
+                stacklevel=3
+            )
+
+        # From [1] p. 1101 between (1) and (3)
+        allowance = critical_value*self._std*np.sqrt(
+            1/self._n_samples + 1/self._n_control
+        )
+        return abs(allowance)
+
+    def confidence_interval(
+        self, confidence_level: DecimalNumber = 0.95
+    ) -> ConfidenceInterval:
+        """Compute the confidence interval for the specified confidence level.
+
+        Parameters
+        ----------
+        confidence_level : float, optional
+            Confidence level for the computed confidence interval.
+            Default is .95.
+
+        Returns
+        -------
+        ci : ``ConfidenceInterval`` object
+            The object has attributes ``low`` and ``high`` that hold the
+            lower and upper bounds of the confidence intervals for each
+            comparison. The high and low values are accessible for each
+            comparison at index ``i`` for each group ``i``.
+
+        """
+        # check to see if the supplied confidence level matches that of the
+        # previously computed CI.
+        if (self._ci is not None) and (confidence_level == self._ci_cl):
+            return self._ci
+
+        if not (0 < confidence_level < 1):
+            raise ValueError("Confidence level must be between 0 and 1.")
+
+        allowance = self._allowance(confidence_level=confidence_level)
+        diff_means = self._mean_samples - self._mean_control
+
+        low = diff_means-allowance
+        high = diff_means+allowance
+
+        if self._alternative == 'greater':
+            high = [np.inf] * len(diff_means)
+        elif self._alternative == 'less':
+            low = [-np.inf] * len(diff_means)
+
+        self._ci_cl = confidence_level
+        self._ci = ConfidenceInterval(
+            low=low,
+            high=high
+        )
+        return self._ci
+
+
+@xp_capabilities(np_only=True)
+@_transition_to_rng('random_state', replace_doc=False)
+def dunnett(
+    *samples: "npt.ArrayLike",  # noqa: D417
+    control: "npt.ArrayLike",
+    alternative: Literal['two-sided', 'less', 'greater'] = "two-sided",
+    rng: SeedType = None
+) -> DunnettResult:
+    """Dunnett's test: multiple comparisons of means against a control group.
+
+    This is an implementation of Dunnett's original, single-step test as
+    described in [1]_.
+
+    Parameters
+    ----------
+    sample1, sample2, ... : 1D array_like
+        The sample measurements for each experimental group.
+    control : 1D array_like
+        The sample measurements for the control group.
+    alternative : {'two-sided', 'less', 'greater'}, optional
+        Defines the alternative hypothesis.
+
+        The null hypothesis is that the means of the distributions underlying
+        the samples and control are equal. The following alternative
+        hypotheses are available (default is 'two-sided'):
+
+        * 'two-sided': the means of the distributions underlying the samples
+          and control are unequal.
+        * 'less': the means of the distributions underlying the samples
+          are less than the mean of the distribution underlying the control.
+        * 'greater': the means of the distributions underlying the
+          samples are greater than the mean of the distribution underlying
+          the control.
+    rng : `numpy.random.Generator`, optional
+        Pseudorandom number generator state. When `rng` is None, a new
+        `numpy.random.Generator` is created using entropy from the
+        operating system. Types other than `numpy.random.Generator` are
+        passed to `numpy.random.default_rng` to instantiate a ``Generator``.
+
+        .. versionchanged:: 1.15.0
+
+            As part of the `SPEC-007 <https://scientific-python.org/specs/spec-0007/>`_
+            transition from use of `numpy.random.RandomState` to
+            `numpy.random.Generator`, this keyword was changed from `random_state` to
+            `rng`. For an interim period, both keywords will continue to work, although
+            only one may be specified at a time. After the interim period, function
+            calls using the `random_state` keyword will emit warnings. Following a
+            deprecation period, the `random_state` keyword will be removed.
+
+    Returns
+    -------
+    res : `~scipy.stats._result_classes.DunnettResult`
+        An object containing attributes:
+
+        statistic : float ndarray
+            The computed statistic of the test for each comparison. The element
+            at index ``i`` is the statistic for the comparison between
+            groups ``i`` and the control.
+        pvalue : float ndarray
+            The computed p-value of the test for each comparison. The element
+            at index ``i`` is the p-value for the comparison between
+            group ``i`` and the control.
+
+        And the following method:
+
+        confidence_interval(confidence_level=0.95) :
+            Compute the difference in means of the groups
+            with the control +- the allowance.
+
+    See Also
+    --------
+    tukey_hsd : performs pairwise comparison of means.
+    :ref:`hypothesis_dunnett` : Extended example
+
+    Notes
+    -----
+    Like the independent-sample t-test, Dunnett's test [1]_ is used to make
+    inferences about the means of distributions from which samples were drawn.
+    However, when multiple t-tests are performed at a fixed significance level,
+    the "family-wise error rate" - the probability of incorrectly rejecting the
+    null hypothesis in at least one test - will exceed the significance level.
+    Dunnett's test is designed to perform multiple comparisons while
+    controlling the family-wise error rate.
+
+    Dunnett's test compares the means of multiple experimental groups
+    against a single control group. Tukey's Honestly Significant Difference Test
+    is another multiple-comparison test that controls the family-wise error
+    rate, but `tukey_hsd` performs *all* pairwise comparisons between groups.
+    When pairwise comparisons between experimental groups are not needed,
+    Dunnett's test is preferable due to its higher power.
+
+    The use of this test relies on several assumptions.
+
+    1. The observations are independent within and among groups.
+    2. The observations within each group are normally distributed.
+    3. The distributions from which the samples are drawn have the same finite
+       variance.
+
+    References
+    ----------
+    .. [1] Dunnett, Charles W. (1955) "A Multiple Comparison Procedure for
+           Comparing Several Treatments with a Control." Journal of the American
+           Statistical Association, 50:272, 1096-1121,
+           :doi:`10.1080/01621459.1955.10501294`
+    .. [2] Thomson, M. L., & Short, M. D. (1969). Mucociliary function in
+           health, chronic obstructive airway disease, and asbestosis. Journal
+           of applied physiology, 26(5), 535-539.
+           :doi:`10.1152/jappl.1969.26.5.535`
+
+    Examples
+    --------
+    We'll use data from [2]_, Table 1. The null hypothesis is that the means of
+    the distributions underlying the samples and control are equal.
+
+    First, we test that the means of the distributions underlying the samples
+    and control are unequal (``alternative='two-sided'``, the default).
+
+    >>> import numpy as np
+    >>> from scipy.stats import dunnett
+    >>> samples = [[3.8, 2.7, 4.0, 2.4], [2.8, 3.4, 3.7, 2.2, 2.0]]
+    >>> control = [2.9, 3.0, 2.5, 2.6, 3.2]
+    >>> res = dunnett(*samples, control=control)
+    >>> res.statistic
+    array([ 0.90874545, -0.05007117])
+    >>> res.pvalue
+    array([0.58325114, 0.99819341])
+
+    Now, we test that the means of the distributions underlying the samples are
+    greater than the mean of the distribution underlying the control.
+
+    >>> res = dunnett(*samples, control=control, alternative='greater')
+    >>> res.statistic
+    array([ 0.90874545, -0.05007117])
+    >>> res.pvalue
+    array([0.30230596, 0.69115597])
+
+    For a more detailed example, see :ref:`hypothesis_dunnett`.
+    """
+    samples_, control_, rng = _iv_dunnett(
+        samples=samples, control=control,
+        alternative=alternative, rng=rng
+    )
+
+    rho, df, n_group, n_samples, n_control = _params_dunnett(
+        samples=samples_, control=control_
+    )
+
+    statistic, std, mean_control, mean_samples = _statistic_dunnett(
+        samples_, control_, df, n_samples, n_control
+    )
+
+    pvalue = _pvalue_dunnett(
+        rho=rho, df=df, statistic=statistic, alternative=alternative, rng=rng
+    )
+
+    return DunnettResult(
+        statistic=statistic, pvalue=pvalue,
+        _alternative=alternative,
+        _rho=rho, _df=df, _std=std,
+        _mean_samples=mean_samples,
+        _mean_control=mean_control,
+        _n_samples=n_samples,
+        _n_control=n_control,
+        _rng=rng
+    )
+
+
+def _iv_dunnett(
+    samples: Sequence["npt.ArrayLike"],
+    control: "npt.ArrayLike",
+    alternative: Literal['two-sided', 'less', 'greater'],
+    rng: SeedType
+) -> tuple[list[np.ndarray], np.ndarray, SeedType]:
+    """Input validation for Dunnett's test."""
+    rng = check_random_state(rng)
+
+    if alternative not in {'two-sided', 'less', 'greater'}:
+        raise ValueError(
+            "alternative must be 'less', 'greater' or 'two-sided'"
+        )
+
+    ndim_msg = "Control and samples groups must be 1D arrays"
+    n_obs_msg = "Control and samples groups must have at least 1 observation"
+
+    control = np.asarray(control)
+    samples_ = [np.asarray(sample) for sample in samples]
+
+    # samples checks
+    samples_control: list[np.ndarray] = samples_ + [control]
+    for sample in samples_control:
+        if sample.ndim > 1:
+            raise ValueError(ndim_msg)
+
+        if sample.size < 1:
+            raise ValueError(n_obs_msg)
+
+    return samples_, control, rng
+
+
+def _params_dunnett(
+    samples: list[np.ndarray], control: np.ndarray
+) -> tuple[np.ndarray, int, int, np.ndarray, int]:
+    """Specific parameters for Dunnett's test.
+
+    Degree of freedom is the number of observations minus the number of groups
+    including the control.
+    """
+    n_samples = np.array([sample.size for sample in samples])
+
+    # From [1] p. 1100 d.f. = (sum N)-(p+1)
+    n_sample = n_samples.sum()
+    n_control = control.size
+    n = n_sample + n_control
+    n_groups = len(samples)
+    df = n - n_groups - 1
+
+    # From [1] p. 1103 rho_ij = 1/sqrt((N0/Ni+1)(N0/Nj+1))
+    rho = n_control/n_samples + 1
+    rho = 1/np.sqrt(rho[:, None] * rho[None, :])
+    np.fill_diagonal(rho, 1)
+
+    return rho, df, n_groups, n_samples, n_control
+
+
+def _statistic_dunnett(
+    samples: list[np.ndarray], control: np.ndarray, df: int,
+    n_samples: np.ndarray, n_control: int
+) -> tuple[np.ndarray, float, np.ndarray, np.ndarray]:
+    """Statistic of Dunnett's test.
+
+    Computation based on the original single-step test from [1].
+    """
+    mean_control = np.mean(control)
+    mean_samples = np.array([np.mean(sample) for sample in samples])
+    all_samples = [control] + samples
+    all_means = np.concatenate([[mean_control], mean_samples])
+
+    # Variance estimate s^2 from [1] Eq. 1
+    s2 = np.sum([_var(sample, mean=mean)*sample.size
+                 for sample, mean in zip(all_samples, all_means)]) / df
+    std = np.sqrt(s2)
+
+    # z score inferred from [1] unlabeled equation after Eq. 1
+    z = (mean_samples - mean_control) / np.sqrt(1/n_samples + 1/n_control)
+
+    return z / std, std, mean_control, mean_samples
+
+
+def _pvalue_dunnett(
+    rho: np.ndarray, df: int, statistic: np.ndarray,
+    alternative: Literal['two-sided', 'less', 'greater'],
+    rng: SeedType = None
+) -> np.ndarray:
+    """pvalue from the multivariate t-distribution.
+
+    Critical values come from the multivariate student-t distribution.
+    """
+    statistic = statistic.reshape(-1, 1)
+
+    mvt = stats.multivariate_t(shape=rho, df=df, seed=rng)
+    if alternative == "two-sided":
+        statistic = abs(statistic)
+        pvalue = 1 - mvt.cdf(statistic, lower_limit=-statistic)
+    elif alternative == "greater":
+        pvalue = 1 - mvt.cdf(statistic, lower_limit=-np.inf)
+    else:
+        pvalue = 1 - mvt.cdf(np.inf, lower_limit=statistic)
+
+    return np.atleast_1d(pvalue)