Videre

2026-04-10 15:06:59 +02:00
parent 3031b7153b
commit e5a4711004
7806 changed files with 1918528 additions and 335 deletions
--- a/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/init.py
+++ b/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/init.py
@@ -0,0 +1,45 @@
+"""Ensemble-based methods for classification, regression and anomaly detection."""
+
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
+from sklearn.ensemble._bagging import BaggingClassifier, BaggingRegressor
+from sklearn.ensemble._base import BaseEnsemble
+from sklearn.ensemble._forest import (
+    ExtraTreesClassifier,
+    ExtraTreesRegressor,
+    RandomForestClassifier,
+    RandomForestRegressor,
+    RandomTreesEmbedding,
+)
+from sklearn.ensemble._gb import GradientBoostingClassifier, GradientBoostingRegressor
+from sklearn.ensemble._hist_gradient_boosting.gradient_boosting import (
+    HistGradientBoostingClassifier,
+    HistGradientBoostingRegressor,
+)
+from sklearn.ensemble._iforest import IsolationForest
+from sklearn.ensemble._stacking import StackingClassifier, StackingRegressor
+from sklearn.ensemble._voting import VotingClassifier, VotingRegressor
+from sklearn.ensemble._weight_boosting import AdaBoostClassifier, AdaBoostRegressor
+
+__all__ = [
+    "AdaBoostClassifier",
+    "AdaBoostRegressor",
+    "BaggingClassifier",
+    "BaggingRegressor",
+    "BaseEnsemble",
+    "ExtraTreesClassifier",
+    "ExtraTreesRegressor",
+    "GradientBoostingClassifier",
+    "GradientBoostingRegressor",
+    "HistGradientBoostingClassifier",
+    "HistGradientBoostingRegressor",
+    "IsolationForest",
+    "RandomForestClassifier",
+    "RandomForestRegressor",
+    "RandomTreesEmbedding",
+    "StackingClassifier",
+    "StackingRegressor",
+    "VotingClassifier",
+    "VotingRegressor",
+]
--- a/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/pycache/init.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/pycache/init.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/pycache/_bagging.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/pycache/_bagging.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/pycache/_base.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/pycache/_base.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/pycache/_forest.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/pycache/_forest.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/pycache/_gb.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/pycache/_gb.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/pycache/_iforest.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/pycache/_iforest.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/pycache/_stacking.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/pycache/_stacking.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/pycache/_voting.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/pycache/_voting.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/pycache/_weight_boosting.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/pycache/_weight_boosting.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_bagging.py
+++ b/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_bagging.py
--- a/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_base.py
+++ b/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_base.py
@@ -0,0 +1,313 @@
+"""Base class for ensemble-based estimators."""
+
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
+from abc import ABCMeta, abstractmethod
+
+import numpy as np
+from joblib import effective_n_jobs
+
+from sklearn.base import (
+    BaseEstimator,
+    MetaEstimatorMixin,
+    clone,
+    is_classifier,
+    is_regressor,
+)
+from sklearn.utils import Bunch, check_random_state
+from sklearn.utils._tags import get_tags
+from sklearn.utils._user_interface import _print_elapsed_time
+from sklearn.utils.metadata_routing import _routing_enabled
+from sklearn.utils.metaestimators import _BaseComposition
+
+
+def _fit_single_estimator(
+    estimator, X, y, fit_params, message_clsname=None, message=None
+):
+    """Private function used to fit an estimator within a job."""
+    # TODO(SLEP6): remove if-condition for unrouted sample_weight when metadata
+    # routing can't be disabled.
+    if not _routing_enabled() and "sample_weight" in fit_params:
+        try:
+            with _print_elapsed_time(message_clsname, message):
+                estimator.fit(X, y, sample_weight=fit_params["sample_weight"])
+        except TypeError as exc:
+            if "unexpected keyword argument 'sample_weight'" in str(exc):
+                raise TypeError(
+                    "Underlying estimator {} does not support sample weights.".format(
+                        estimator.__class__.__name__
+                    )
+                ) from exc
+            raise
+    else:
+        with _print_elapsed_time(message_clsname, message):
+            estimator.fit(X, y, **fit_params)
+    return estimator
+
+
+def _set_random_states(estimator, random_state=None):
+    """Set fixed random_state parameters for an estimator.
+
+    Finds all parameters ending ``random_state`` and sets them to integers
+    derived from ``random_state``.
+
+    Parameters
+    ----------
+    estimator : estimator supporting get/set_params
+        Estimator with potential randomness managed by random_state
+        parameters.
+
+    random_state : int, RandomState instance or None, default=None
+        Pseudo-random number generator to control the generation of the random
+        integers. Pass an int for reproducible output across multiple function
+        calls.
+        See :term:`Glossary <random_state>`.
+
+    Notes
+    -----
+    This does not necessarily set *all* ``random_state`` attributes that
+    control an estimator's randomness, only those accessible through
+    ``estimator.get_params()``.  ``random_state``s not controlled include
+    those belonging to:
+
+        * cross-validation splitters
+        * ``scipy.stats`` rvs
+    """
+    random_state = check_random_state(random_state)
+    to_set = {}
+    for key in sorted(estimator.get_params(deep=True)):
+        if key == "random_state" or key.endswith("__random_state"):
+            to_set[key] = random_state.randint(np.iinfo(np.int32).max)
+
+    if to_set:
+        estimator.set_params(**to_set)
+
+
+class BaseEnsemble(MetaEstimatorMixin, BaseEstimator, metaclass=ABCMeta):
+    """Base class for all ensemble classes.
+
+    Warning: This class should not be used directly. Use derived classes
+    instead.
+
+    Parameters
+    ----------
+    estimator : object
+        The base estimator from which the ensemble is built.
+
+    n_estimators : int, default=10
+        The number of estimators in the ensemble.
+
+    estimator_params : list of str, default=tuple()
+        The list of attributes to use as parameters when instantiating a
+        new base estimator. If none are given, default parameters are used.
+
+    Attributes
+    ----------
+    estimator_ : estimator
+        The base estimator from which the ensemble is grown.
+
+    estimators_ : list of estimators
+        The collection of fitted base estimators.
+    """
+
+    @abstractmethod
+    def __init__(
+        self,
+        estimator=None,
+        *,
+        n_estimators=10,
+        estimator_params=tuple(),
+    ):
+        # Set parameters
+        self.estimator = estimator
+        self.n_estimators = n_estimators
+        self.estimator_params = estimator_params
+
+        # Don't instantiate estimators now! Parameters of estimator might
+        # still change. Eg., when grid-searching with the nested object syntax.
+        # self.estimators_ needs to be filled by the derived classes in fit.
+
+    def _validate_estimator(self, default=None):
+        """Check the base estimator.
+
+        Sets the `estimator_` attributes.
+        """
+        if self.estimator is not None:
+            self.estimator_ = self.estimator
+        else:
+            self.estimator_ = default
+
+    def _make_estimator(self, append=True, random_state=None):
+        """Make and configure a copy of the `estimator_` attribute.
+
+        Warning: This method should be used to properly instantiate new
+        sub-estimators.
+        """
+        estimator = clone(self.estimator_)
+        estimator.set_params(**{p: getattr(self, p) for p in self.estimator_params})
+
+        if random_state is not None:
+            _set_random_states(estimator, random_state)
+
+        if append:
+            self.estimators_.append(estimator)
+
+        return estimator
+
+    def __len__(self):
+        """Return the number of estimators in the ensemble."""
+        return len(self.estimators_)
+
+    def __getitem__(self, index):
+        """Return the index'th estimator in the ensemble."""
+        return self.estimators_[index]
+
+    def __iter__(self):
+        """Return iterator over estimators in the ensemble."""
+        return iter(self.estimators_)
+
+
+def _partition_estimators(n_estimators, n_jobs):
+    """Private function used to partition estimators between jobs."""
+    # Compute the number of jobs
+    n_jobs = min(effective_n_jobs(n_jobs), n_estimators)
+
+    # Partition estimators between jobs
+    n_estimators_per_job = np.full(n_jobs, n_estimators // n_jobs, dtype=int)
+    n_estimators_per_job[: n_estimators % n_jobs] += 1
+    starts = np.cumsum(n_estimators_per_job)
+
+    return n_jobs, n_estimators_per_job.tolist(), [0] + starts.tolist()
+
+
+class _BaseHeterogeneousEnsemble(
+    MetaEstimatorMixin, _BaseComposition, metaclass=ABCMeta
+):
+    """Base class for heterogeneous ensemble of learners.
+
+    Parameters
+    ----------
+    estimators : list of (str, estimator) tuples
+        The ensemble of estimators to use in the ensemble. Each element of the
+        list is defined as a tuple of string (i.e. name of the estimator) and
+        an estimator instance. An estimator can be set to `'drop'` using
+        `set_params`.
+
+    Attributes
+    ----------
+    estimators_ : list of estimators
+        The elements of the estimators parameter, having been fitted on the
+        training data. If an estimator has been set to `'drop'`, it will not
+        appear in `estimators_`.
+    """
+
+    @property
+    def named_estimators(self):
+        """Dictionary to access any fitted sub-estimators by name.
+
+        Returns
+        -------
+        :class:`~sklearn.utils.Bunch`
+        """
+        return Bunch(**dict(self.estimators))
+
+    @abstractmethod
+    def __init__(self, estimators):
+        self.estimators = estimators
+
+    def _validate_estimators(self):
+        if len(self.estimators) == 0 or not all(
+            isinstance(item, (tuple, list)) and isinstance(item[0], str)
+            for item in self.estimators
+        ):
+            raise ValueError(
+                "Invalid 'estimators' attribute, 'estimators' should be a "
+                "non-empty list of (string, estimator) tuples."
+            )
+        names, estimators = zip(*self.estimators)
+        # defined by MetaEstimatorMixin
+        self._validate_names(names)
+
+        has_estimator = any(est != "drop" for est in estimators)
+        if not has_estimator:
+            raise ValueError(
+                "All estimators are dropped. At least one is required "
+                "to be an estimator."
+            )
+
+        is_estimator_type = is_classifier if is_classifier(self) else is_regressor
+
+        for est in estimators:
+            if est != "drop" and not is_estimator_type(est):
+                raise ValueError(
+                    "The estimator {} should be a {}.".format(
+                        est.__class__.__name__, is_estimator_type.__name__[3:]
+                    )
+                )
+
+        return names, estimators
+
+    def set_params(self, **params):
+        """
+        Set the parameters of an estimator from the ensemble.
+
+        Valid parameter keys can be listed with `get_params()`. Note that you
+        can directly set the parameters of the estimators contained in
+        `estimators`.
+
+        Parameters
+        ----------
+        **params : keyword arguments
+            Specific parameters using e.g.
+            `set_params(parameter_name=new_value)`. In addition, to setting the
+            parameters of the estimator, the individual estimator of the
+            estimators can also be set, or can be removed by setting them to
+            'drop'.
+
+        Returns
+        -------
+        self : object
+            Estimator instance.
+        """
+        super()._set_params("estimators", **params)
+        return self
+
+    def get_params(self, deep=True):
+        """
+        Get the parameters of an estimator from the ensemble.
+
+        Returns the parameters given in the constructor as well as the
+        estimators contained within the `estimators` parameter.
+
+        Parameters
+        ----------
+        deep : bool, default=True
+            Setting it to True gets the various estimators and the parameters
+            of the estimators as well.
+
+        Returns
+        -------
+        params : dict
+            Parameter and estimator names mapped to their values or parameter
+            names mapped to their values.
+        """
+        return super()._get_params("estimators", deep=deep)
+
+    def __sklearn_tags__(self):
+        tags = super().__sklearn_tags__()
+        try:
+            tags.input_tags.allow_nan = all(
+                get_tags(est[1]).input_tags.allow_nan if est[1] != "drop" else True
+                for est in self.estimators
+            )
+            tags.input_tags.sparse = all(
+                get_tags(est[1]).input_tags.sparse if est[1] != "drop" else True
+                for est in self.estimators
+            )
+        except Exception:
+            # If `estimators` does not comply with our API (list of tuples) then it will
+            # fail. In this case, we assume that `allow_nan` and `sparse` are False but
+            # the parameter validation will raise an error during `fit`.
+            pass  # pragma: no cover
+        return tags
--- a/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_forest.py
+++ b/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_forest.py
--- a/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_gb.py
+++ b/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_gb.py
--- a/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_gradient_boosting.cpython-312-x86_64-linux-gnu.so
+++ b/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_gradient_boosting.cpython-312-x86_64-linux-gnu.so
--- a/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_gradient_boosting.pyx
+++ b/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_gradient_boosting.pyx
@@ -0,0 +1,262 @@
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
+from libc.stdlib cimport free
+from libc.string cimport memset
+
+import numpy as np
+from scipy.sparse import issparse
+
+from sklearn.utils._typedefs cimport float32_t, float64_t, intp_t, int32_t, uint8_t
+# Note: _tree uses cimport numpy, cnp.import_array, so we need to include
+# numpy headers in the build configuration of this extension
+from sklearn.tree._tree cimport Node
+from sklearn.tree._tree cimport Tree
+from sklearn.tree._utils cimport safe_realloc
+
+
+# no namespace lookup for numpy dtype and array creation
+from numpy import zeros as np_zeros
+
+
+# constant to mark tree leafs
+cdef intp_t TREE_LEAF = -1
+
+cdef void _predict_regression_tree_inplace_fast_dense(
+    const float32_t[:, ::1] X,
+    Node* root_node,
+    double *value,
+    double scale,
+    Py_ssize_t k,
+    float64_t[:, :] out
+) noexcept nogil:
+    """Predicts output for regression tree and stores it in ``out[i, k]``.
+
+    This function operates directly on the data arrays of the tree
+    data structures. This is 5x faster than the variant above because
+    it allows us to avoid buffer validation.
+
+    The function assumes that the ndarray that wraps ``X`` is
+    c-continuous.
+
+    Parameters
+    ----------
+    X : float32_t 2d memory view
+        The memory view on the data ndarray of the input ``X``.
+        Assumes that the array is c-continuous.
+    root_node : tree Node pointer
+        Pointer to the main node array of the :class:``sklearn.tree.Tree``.
+    value : np.float64_t pointer
+        The pointer to the data array of the ``value`` array attribute
+        of the :class:``sklearn.tree.Tree``.
+    scale : double
+        A constant to scale the predictions.
+    k : int
+        The index of the tree output to be predicted. Must satisfy
+        0 <= ``k`` < ``K``.
+    out : memory view on array of type np.float64_t
+        The data array where the predictions are stored.
+        ``out`` is assumed to be a two-dimensional array of
+        shape ``(n_samples, K)``.
+    """
+    cdef intp_t n_samples = X.shape[0]
+    cdef Py_ssize_t i
+    cdef Node *node
+    for i in range(n_samples):
+        node = root_node
+        # While node not a leaf
+        while node.left_child != TREE_LEAF:
+            if X[i, node.feature] <= node.threshold:
+                node = root_node + node.left_child
+            else:
+                node = root_node + node.right_child
+        out[i, k] += scale * value[node - root_node]
+
+
+def _predict_regression_tree_stages_sparse(
+    object[:, :] estimators,
+    object X,
+    double scale,
+    float64_t[:, :] out
+):
+    """Predicts output for regression tree inplace and adds scaled value to ``out[i, k]``.
+
+    The function assumes that the ndarray that wraps ``X`` is csr_matrix.
+    """
+    cdef const float32_t[::1] X_data = X.data
+    cdef const int32_t[::1] X_indices = X.indices
+    cdef const int32_t[::1] X_indptr = X.indptr
+
+    cdef intp_t n_samples = X.shape[0]
+    cdef intp_t n_features = X.shape[1]
+    cdef intp_t n_stages = estimators.shape[0]
+    cdef intp_t n_outputs = estimators.shape[1]
+
+    # Indices and temporary variables
+    cdef intp_t sample_i
+    cdef intp_t feature_i
+    cdef intp_t stage_i
+    cdef intp_t output_i
+    cdef Node *root_node = NULL
+    cdef Node *node = NULL
+    cdef double *value = NULL
+
+    cdef Tree tree
+    cdef Node** nodes = NULL
+    cdef double** values = NULL
+    safe_realloc(&nodes, n_stages * n_outputs)
+    safe_realloc(&values, n_stages * n_outputs)
+    for stage_i in range(n_stages):
+        for output_i in range(n_outputs):
+            tree = estimators[stage_i, output_i].tree_
+            nodes[stage_i * n_outputs + output_i] = tree.nodes
+            values[stage_i * n_outputs + output_i] = tree.value
+
+    # Initialize auxiliary data-structure
+    cdef float32_t feature_value = 0.
+    cdef float32_t* X_sample = NULL
+
+    # feature_to_sample as a data structure records the last seen sample
+    # for each feature; functionally, it is an efficient way to identify
+    # which features are nonzero in the present sample.
+    cdef intp_t* feature_to_sample = NULL
+
+    safe_realloc(&X_sample, n_features)
+    safe_realloc(&feature_to_sample, n_features)
+
+    memset(feature_to_sample, -1, n_features * sizeof(intp_t))
+
+    # Cycle through all samples
+    for sample_i in range(n_samples):
+        for feature_i in range(X_indptr[sample_i], X_indptr[sample_i + 1]):
+            feature_to_sample[X_indices[feature_i]] = sample_i
+            X_sample[X_indices[feature_i]] = X_data[feature_i]
+
+        # Cycle through all stages
+        for stage_i in range(n_stages):
+            # Cycle through all trees
+            for output_i in range(n_outputs):
+                root_node = nodes[stage_i * n_outputs + output_i]
+                value = values[stage_i * n_outputs + output_i]
+                node = root_node
+
+                # While node not a leaf
+                while node.left_child != TREE_LEAF:
+                    # ... and node.right_child != TREE_LEAF:
+                    if feature_to_sample[node.feature] == sample_i:
+                        feature_value = X_sample[node.feature]
+                    else:
+                        feature_value = 0.
+
+                    if feature_value <= node.threshold:
+                        node = root_node + node.left_child
+                    else:
+                        node = root_node + node.right_child
+                out[sample_i, output_i] += scale * value[node - root_node]
+
+    # Free auxiliary arrays
+    free(X_sample)
+    free(feature_to_sample)
+    free(nodes)
+    free(values)
+
+
+def predict_stages(
+    object[:, :] estimators,
+    object X,
+    double scale,
+    float64_t[:, :] out
+):
+    """Add predictions of ``estimators`` to ``out``.
+
+    Each estimator is scaled by ``scale`` before its prediction
+    is added to ``out``.
+    """
+    cdef Py_ssize_t i
+    cdef Py_ssize_t k
+    cdef Py_ssize_t n_estimators = estimators.shape[0]
+    cdef Py_ssize_t K = estimators.shape[1]
+    cdef Tree tree
+
+    if issparse(X):
+        if X.format != 'csr':
+            raise ValueError("When X is a sparse matrix, a CSR format is"
+                             " expected, got {!r}".format(type(X)))
+        _predict_regression_tree_stages_sparse(
+            estimators=estimators, X=X, scale=scale, out=out
+        )
+    else:
+        if not isinstance(X, np.ndarray) or np.isfortran(X):
+            raise ValueError(f"X should be C-ordered np.ndarray, got {type(X)}")
+
+        for i in range(n_estimators):
+            for k in range(K):
+                tree = estimators[i, k].tree_
+
+                # avoid buffer validation by casting to ndarray
+                # and get data pointer
+                # need brackets because of casting operator priority
+                _predict_regression_tree_inplace_fast_dense(
+                    X=X,
+                    root_node=tree.nodes,
+                    value=tree.value,
+                    scale=scale,
+                    k=k,
+                    out=out
+                )
+                # out[:, k] += scale * tree.predict(X).ravel()
+
+
+def predict_stage(
+    object[:, :] estimators,
+    int stage,
+    object X,
+    double scale,
+    float64_t[:, :] out
+):
+    """Add predictions of ``estimators[stage]`` to ``out``.
+
+    Each estimator in the stage is scaled by ``scale`` before
+    its prediction is added to ``out``.
+    """
+    return predict_stages(
+        estimators=estimators[stage:stage + 1], X=X, scale=scale, out=out
+    )
+
+
+def _random_sample_mask(
+    intp_t n_total_samples,
+    intp_t n_total_in_bag,
+    random_state
+):
+    """Create a random sample mask where ``n_total_in_bag`` elements are set.
+
+    Parameters
+    ----------
+    n_total_samples : int
+        The length of the resulting mask.
+
+    n_total_in_bag : int
+        The number of elements in the sample mask which are set to 1.
+
+    random_state : RandomState
+        A numpy ``RandomState`` object.
+
+    Returns
+    -------
+    sample_mask : np.ndarray, shape=[n_total_samples]
+        An ndarray where ``n_total_in_bag`` elements are set to ``True``
+        the others are ``False``.
+    """
+    cdef float64_t[::1] rand = random_state.uniform(size=n_total_samples)
+    cdef uint8_t[::1] sample_mask = np_zeros((n_total_samples,), dtype=bool)
+
+    cdef intp_t n_bagged = 0
+    cdef intp_t i = 0
+
+    for i in range(n_total_samples):
+        if rand[i] * (n_total_samples - i) < (n_total_in_bag - n_bagged):
+            sample_mask[i] = 1
+            n_bagged += 1
+
+    return sample_mask.base
--- a/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/init.py
+++ b/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/init.py
@@ -0,0 +1,8 @@
+"""This module implements histogram-based gradient boosting estimators.
+
+The implementation is a port from pygbm which is itself strongly inspired
+from LightGBM.
+"""
+
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
--- a/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/pycache/init.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/pycache/init.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/pycache/binning.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/pycache/binning.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/pycache/gradient_boosting.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/pycache/gradient_boosting.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/pycache/grower.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/pycache/grower.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/pycache/predictor.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/pycache/predictor.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/pycache/utils.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/pycache/utils.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/_binning.cpython-312-x86_64-linux-gnu.so
+++ b/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/_binning.cpython-312-x86_64-linux-gnu.so
--- a/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/_binning.pyx
+++ b/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/_binning.pyx
@@ -0,0 +1,85 @@
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
+from cython.parallel import prange
+from libc.math cimport isnan
+
+from sklearn.ensemble._hist_gradient_boosting.common cimport X_DTYPE_C, X_BINNED_DTYPE_C
+from sklearn.utils._typedefs cimport uint8_t
+
+
+def _map_to_bins(const X_DTYPE_C [:, :] data,
+                 list binning_thresholds,
+                 const uint8_t[::1] is_categorical,
+                 const uint8_t missing_values_bin_idx,
+                 int n_threads,
+                 X_BINNED_DTYPE_C [::1, :] binned):
+    """Bin continuous and categorical values to discrete integer-coded levels.
+
+    A given value x is mapped into bin value i iff
+    thresholds[i - 1] < x <= thresholds[i]
+
+    Parameters
+    ----------
+    data : ndarray, shape (n_samples, n_features)
+        The data to bin.
+    binning_thresholds : list of arrays
+        For each feature, stores the increasing numeric values that are
+        used to separate the bins.
+    is_categorical : ndarray of uint8_t of shape (n_features,)
+        Indicates categorical features.
+    n_threads : int
+        Number of OpenMP threads to use.
+    binned : ndarray, shape (n_samples, n_features)
+        Output array, must be fortran aligned.
+    """
+    cdef:
+        int feature_idx
+
+    for feature_idx in range(data.shape[1]):
+        _map_col_to_bins(
+            data[:, feature_idx],
+            binning_thresholds[feature_idx],
+            is_categorical[feature_idx],
+            missing_values_bin_idx,
+            n_threads,
+            binned[:, feature_idx]
+        )
+
+
+cdef void _map_col_to_bins(
+    const X_DTYPE_C [:] data,
+    const X_DTYPE_C [:] binning_thresholds,
+    const uint8_t is_categorical,
+    const uint8_t missing_values_bin_idx,
+    int n_threads,
+    X_BINNED_DTYPE_C [:] binned
+):
+    """Binary search to find the bin index for each value in the data."""
+    cdef:
+        int i
+        int left
+        int right
+        int middle
+
+    for i in prange(data.shape[0], schedule='static', nogil=True,
+                    num_threads=n_threads):
+        if (
+            isnan(data[i]) or
+            # To follow LightGBM's conventions, negative values for
+            # categorical features are considered as missing values.
+            (is_categorical and data[i] < 0)
+        ):
+            binned[i] = missing_values_bin_idx
+        else:
+            # for known values, use binary search
+            left, right = 0, binning_thresholds.shape[0]
+            while left < right:
+                # equal to (right + left - 1) // 2 but avoids overflow
+                middle = left + (right - left - 1) // 2
+                if data[i] <= binning_thresholds[middle]:
+                    right = middle
+                else:
+                    left = middle + 1
+
+            binned[i] = left
--- a/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/_bitset.cpython-312-x86_64-linux-gnu.so
+++ b/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/_bitset.cpython-312-x86_64-linux-gnu.so
--- a/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/_bitset.pxd
+++ b/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/_bitset.pxd
@@ -0,0 +1,20 @@
+from sklearn.ensemble._hist_gradient_boosting.common cimport X_BINNED_DTYPE_C
+from sklearn.ensemble._hist_gradient_boosting.common cimport BITSET_DTYPE_C
+from sklearn.ensemble._hist_gradient_boosting.common cimport BITSET_INNER_DTYPE_C
+from sklearn.ensemble._hist_gradient_boosting.common cimport X_DTYPE_C
+from sklearn.utils._typedefs cimport uint8_t
+
+
+cdef void init_bitset(BITSET_DTYPE_C bitset) noexcept nogil
+
+cdef void set_bitset(BITSET_DTYPE_C bitset, X_BINNED_DTYPE_C val) noexcept nogil
+
+cdef uint8_t in_bitset(BITSET_DTYPE_C bitset, X_BINNED_DTYPE_C val) noexcept nogil
+
+cpdef uint8_t in_bitset_memoryview(const BITSET_INNER_DTYPE_C[:] bitset,
+                                   X_BINNED_DTYPE_C val) noexcept nogil
+
+cdef uint8_t in_bitset_2d_memoryview(
+    const BITSET_INNER_DTYPE_C[:, :] bitset,
+    X_BINNED_DTYPE_C val,
+    unsigned int row) noexcept nogil
--- a/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/_bitset.pyx
+++ b/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/_bitset.pyx
@@ -0,0 +1,65 @@
+from sklearn.ensemble._hist_gradient_boosting.common cimport BITSET_INNER_DTYPE_C
+from sklearn.ensemble._hist_gradient_boosting.common cimport BITSET_DTYPE_C
+from sklearn.ensemble._hist_gradient_boosting.common cimport X_DTYPE_C
+from sklearn.ensemble._hist_gradient_boosting.common cimport X_BINNED_DTYPE_C
+from sklearn.utils._typedefs cimport uint8_t
+
+
+# A bitset is a data structure used to represent sets of integers in [0, n]. We
+# use them to represent sets of features indices (e.g. features that go to the
+# left child, or features that are categorical). For familiarity with bitsets
+# and bitwise operations:
+# https://en.wikipedia.org/wiki/Bit_array
+# https://en.wikipedia.org/wiki/Bitwise_operation
+
+
+cdef inline void init_bitset(BITSET_DTYPE_C bitset) noexcept nogil:  # OUT
+    cdef:
+        unsigned int i
+
+    for i in range(8):
+        bitset[i] = 0
+
+
+cdef inline void set_bitset(BITSET_DTYPE_C bitset,  # OUT
+                            X_BINNED_DTYPE_C val) noexcept nogil:
+    bitset[val // 32] |= (1 << (val % 32))
+
+
+cdef inline uint8_t in_bitset(BITSET_DTYPE_C bitset,
+                              X_BINNED_DTYPE_C val) noexcept nogil:
+    return (bitset[val // 32] >> (val % 32)) & 1
+
+
+cpdef inline uint8_t in_bitset_memoryview(const BITSET_INNER_DTYPE_C[:] bitset,
+                                          X_BINNED_DTYPE_C val) noexcept nogil:
+    return (bitset[val // 32] >> (val % 32)) & 1
+
+
+cdef inline uint8_t in_bitset_2d_memoryview(const BITSET_INNER_DTYPE_C[:, :] bitset,
+                                            X_BINNED_DTYPE_C val,
+                                            unsigned int row) noexcept nogil:
+    # Same as above but works on 2d memory views to avoid the creation of 1d
+    # memory views. See https://github.com/scikit-learn/scikit-learn/issues/17299
+    return (bitset[row, val // 32] >> (val % 32)) & 1
+
+
+cpdef inline void set_bitset_memoryview(BITSET_INNER_DTYPE_C[:] bitset,  # OUT
+                                        X_BINNED_DTYPE_C val):
+    bitset[val // 32] |= (1 << (val % 32))
+
+
+def set_raw_bitset_from_binned_bitset(BITSET_INNER_DTYPE_C[:] raw_bitset,  # OUT
+                                      BITSET_INNER_DTYPE_C[:] binned_bitset,
+                                      X_DTYPE_C[:] categories):
+    """Set the raw_bitset from the values of the binned bitset
+
+    categories is a mapping from binned category value to raw category value.
+    """
+    cdef:
+        int binned_cat_value
+        X_DTYPE_C raw_cat_value
+
+    for binned_cat_value, raw_cat_value in enumerate(categories):
+        if in_bitset_memoryview(binned_bitset, binned_cat_value):
+            set_bitset_memoryview(raw_bitset, <X_BINNED_DTYPE_C>raw_cat_value)
--- a/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/_gradient_boosting.cpython-312-x86_64-linux-gnu.so
+++ b/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/_gradient_boosting.cpython-312-x86_64-linux-gnu.so
--- a/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/_gradient_boosting.pyx
+++ b/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/_gradient_boosting.pyx
@@ -0,0 +1,59 @@
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
+from cython.parallel import prange
+import numpy as np
+
+from sklearn.ensemble._hist_gradient_boosting.common import Y_DTYPE
+from sklearn.ensemble._hist_gradient_boosting.common cimport Y_DTYPE_C
+
+
+def _update_raw_predictions(
+        Y_DTYPE_C [::1] raw_predictions,  # OUT
+        grower,
+        n_threads,
+):
+    """Update raw_predictions with the predictions of the newest tree.
+
+    This is equivalent to (and much faster than):
+        raw_predictions += last_estimator.predict(X_train)
+
+    It's only possible for data X_train that is used to train the trees (it
+    isn't usable for e.g. X_val).
+    """
+    cdef:
+        unsigned int [::1] starts  # start of each leaf in partition
+        unsigned int [::1] stops  # end of each leaf in partition
+        Y_DTYPE_C [::1] values  # value of each leaf
+        const unsigned int [::1] partition = grower.splitter.partition
+        list leaves
+
+    leaves = grower.finalized_leaves
+    starts = np.array([leaf.partition_start for leaf in leaves],
+                      dtype=np.uint32)
+    stops = np.array([leaf.partition_stop for leaf in leaves],
+                     dtype=np.uint32)
+    values = np.array([leaf.value for leaf in leaves], dtype=Y_DTYPE)
+
+    _update_raw_predictions_helper(raw_predictions, starts, stops, partition,
+                                   values, n_threads)
+
+
+cdef inline void _update_raw_predictions_helper(
+        Y_DTYPE_C [::1] raw_predictions,  # OUT
+        const unsigned int [::1] starts,
+        const unsigned int [::1] stops,
+        const unsigned int [::1] partition,
+        const Y_DTYPE_C [::1] values,
+        int n_threads,
+):
+
+    cdef:
+        unsigned int position
+        int leaf_idx
+        int n_leaves = starts.shape[0]
+
+    for leaf_idx in prange(n_leaves, schedule='static', nogil=True,
+                           num_threads=n_threads):
+        for position in range(starts[leaf_idx], stops[leaf_idx]):
+            raw_predictions[partition[position]] += values[leaf_idx]
--- a/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/_predictor.cpython-312-x86_64-linux-gnu.so
+++ b/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/_predictor.cpython-312-x86_64-linux-gnu.so
--- a/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/_predictor.pyx
+++ b/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/_predictor.pyx
@@ -0,0 +1,256 @@
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
+from cython.parallel import prange
+from libc.math cimport isnan
+import numpy as np
+
+from sklearn.utils._typedefs cimport intp_t, uint8_t
+from sklearn.ensemble._hist_gradient_boosting.common cimport X_DTYPE_C
+from sklearn.ensemble._hist_gradient_boosting.common cimport Y_DTYPE_C
+from sklearn.ensemble._hist_gradient_boosting.common import Y_DTYPE
+from sklearn.ensemble._hist_gradient_boosting.common cimport X_BINNED_DTYPE_C
+from sklearn.ensemble._hist_gradient_boosting.common cimport BITSET_INNER_DTYPE_C
+from sklearn.ensemble._hist_gradient_boosting.common cimport node_struct
+from sklearn.ensemble._hist_gradient_boosting._bitset cimport in_bitset_2d_memoryview
+
+
+def _predict_from_raw_data(  # raw data = non-binned data
+        const node_struct [:] nodes,
+        const X_DTYPE_C [:, :] numeric_data,
+        const BITSET_INNER_DTYPE_C [:, ::1] raw_left_cat_bitsets,
+        const BITSET_INNER_DTYPE_C [:, ::1] known_cat_bitsets,
+        const unsigned int [::1] f_idx_map,
+        int n_threads,
+        Y_DTYPE_C [:] out):
+
+    cdef:
+        int i
+
+    for i in prange(numeric_data.shape[0], schedule='static', nogil=True,
+                    num_threads=n_threads):
+        out[i] = _predict_one_from_raw_data(
+            nodes, numeric_data, raw_left_cat_bitsets,
+            known_cat_bitsets,
+            f_idx_map, i)
+
+
+cdef inline Y_DTYPE_C _predict_one_from_raw_data(
+        const node_struct [:] nodes,
+        const X_DTYPE_C [:, :] numeric_data,
+        const BITSET_INNER_DTYPE_C [:, ::1] raw_left_cat_bitsets,
+        const BITSET_INNER_DTYPE_C [:, ::1] known_cat_bitsets,
+        const unsigned int [::1] f_idx_map,
+        const int row) noexcept nogil:
+    # Need to pass the whole array and the row index, else prange won't work.
+    # See issue Cython #2798
+
+    cdef:
+        node_struct node = nodes[0]
+        unsigned int node_idx = 0
+        X_DTYPE_C data_val
+
+    while True:
+        if node.is_leaf:
+            return node.value
+
+        data_val = numeric_data[row, node.feature_idx]
+
+        if isnan(data_val):
+            if node.missing_go_to_left:
+                node_idx = node.left
+            else:
+                node_idx = node.right
+        elif node.is_categorical:
+            if data_val < 0:
+                # data_val is not in the accepted range, so it is treated as missing value
+                node_idx = node.left if node.missing_go_to_left else node.right
+            elif in_bitset_2d_memoryview(
+                    raw_left_cat_bitsets,
+                    <X_BINNED_DTYPE_C>data_val,
+                    node.bitset_idx):
+                node_idx = node.left
+            elif in_bitset_2d_memoryview(
+                    known_cat_bitsets,
+                    <X_BINNED_DTYPE_C>data_val,
+                    f_idx_map[node.feature_idx]):
+                node_idx = node.right
+            else:
+                # Treat unknown categories as missing.
+                node_idx = node.left if node.missing_go_to_left else node.right
+        else:
+            if data_val <= node.num_threshold:
+                node_idx = node.left
+            else:
+                node_idx = node.right
+        node = nodes[node_idx]
+
+
+def _predict_from_binned_data(
+        node_struct [:] nodes,
+        const X_BINNED_DTYPE_C [:, :] binned_data,
+        BITSET_INNER_DTYPE_C [:, :] binned_left_cat_bitsets,
+        const uint8_t missing_values_bin_idx,
+        int n_threads,
+        Y_DTYPE_C [:] out):
+
+    cdef:
+        int i
+
+    for i in prange(binned_data.shape[0], schedule='static', nogil=True,
+                    num_threads=n_threads):
+        out[i] = _predict_one_from_binned_data(nodes,
+                                               binned_data,
+                                               binned_left_cat_bitsets, i,
+                                               missing_values_bin_idx)
+
+
+cdef inline Y_DTYPE_C _predict_one_from_binned_data(
+        node_struct [:] nodes,
+        const X_BINNED_DTYPE_C [:, :] binned_data,
+        const BITSET_INNER_DTYPE_C [:, :] binned_left_cat_bitsets,
+        const int row,
+        const uint8_t missing_values_bin_idx) noexcept nogil:
+    # Need to pass the whole array and the row index, else prange won't work.
+    # See issue Cython #2798
+
+    cdef:
+        node_struct node = nodes[0]
+        unsigned int node_idx = 0
+        X_BINNED_DTYPE_C data_val
+
+    while True:
+        if node.is_leaf:
+            return node.value
+
+        data_val = binned_data[row, node.feature_idx]
+
+        if data_val == missing_values_bin_idx:
+            if node.missing_go_to_left:
+                node_idx = node.left
+            else:
+                node_idx = node.right
+        elif node.is_categorical:
+            if in_bitset_2d_memoryview(
+                    binned_left_cat_bitsets,
+                    data_val,
+                    node.bitset_idx):
+                node_idx = node.left
+            else:
+                node_idx = node.right
+        else:
+            if data_val <= node.bin_threshold:
+                node_idx = node.left
+            else:
+                node_idx = node.right
+        node = nodes[node_idx]
+
+
+def _compute_partial_dependence(
+    node_struct [:] nodes,
+    const X_DTYPE_C [:, ::1] X,
+    const intp_t [:] target_features,
+    Y_DTYPE_C [:] out
+):
+    """Partial dependence of the response on the ``target_features`` set.
+
+    For each sample in ``X`` a tree traversal is performed.
+    Each traversal starts from the root with weight 1.0.
+
+    At each non-leaf node that splits on a target feature, either
+    the left child or the right child is visited based on the feature
+    value of the current sample, and the weight is not modified.
+    At each non-leaf node that splits on a complementary feature,
+    both children are visited and the weight is multiplied by the fraction
+    of training samples which went to each child.
+
+    At each leaf, the value of the node is multiplied by the current
+    weight (weights sum to 1 for all visited terminal nodes).
+
+    Parameters
+    ----------
+    nodes : view on array of PREDICTOR_RECORD_DTYPE, shape (n_nodes)
+        The array representing the predictor tree.
+    X : view on 2d ndarray, shape (n_samples, n_target_features)
+        The grid points on which the partial dependence should be
+        evaluated.
+    target_features : view on 1d ndarray of intp_t, shape (n_target_features)
+        The set of target features for which the partial dependence
+        should be evaluated.
+    out : view on 1d ndarray, shape (n_samples)
+        The value of the partial dependence function on each grid
+        point.
+    """
+
+    cdef:
+        unsigned int current_node_idx
+        unsigned int [:] node_idx_stack = np.zeros(shape=nodes.shape[0],
+                                                   dtype=np.uint32)
+        Y_DTYPE_C [::1] weight_stack = np.zeros(shape=nodes.shape[0],
+                                                dtype=Y_DTYPE)
+        node_struct * current_node  # pointer to avoid copying attributes
+
+        unsigned int sample_idx
+        intp_t feature_idx
+        unsigned stack_size
+        Y_DTYPE_C left_sample_frac
+        Y_DTYPE_C current_weight
+        Y_DTYPE_C total_weight  # used for sanity check only
+        bint is_target_feature
+
+    for sample_idx in range(X.shape[0]):
+        # init stacks for current sample
+        stack_size = 1
+        node_idx_stack[0] = 0  # root node
+        weight_stack[0] = 1  # all the samples are in the root node
+        total_weight = 0
+
+        while stack_size > 0:
+
+            # pop the stack
+            stack_size -= 1
+            current_node_idx = node_idx_stack[stack_size]
+            current_node = &nodes[current_node_idx]
+
+            if current_node.is_leaf:
+                out[sample_idx] += (weight_stack[stack_size] *
+                                    current_node.value)
+                total_weight += weight_stack[stack_size]
+            else:
+                # determine if the split feature is a target feature
+                is_target_feature = False
+                for feature_idx in range(target_features.shape[0]):
+                    if target_features[feature_idx] == current_node.feature_idx:
+                        is_target_feature = True
+                        break
+
+                if is_target_feature:
+                    # In this case, we push left or right child on stack
+                    if X[sample_idx, feature_idx] <= current_node.num_threshold:
+                        node_idx_stack[stack_size] = current_node.left
+                    else:
+                        node_idx_stack[stack_size] = current_node.right
+                    stack_size += 1
+                else:
+                    # In this case, we push both children onto the stack,
+                    # and give a weight proportional to the number of
+                    # samples going through each branch.
+
+                    # push left child
+                    node_idx_stack[stack_size] = current_node.left
+                    left_sample_frac = (
+                        <Y_DTYPE_C> nodes[current_node.left].count /
+                        current_node.count)
+                    current_weight = weight_stack[stack_size]
+                    weight_stack[stack_size] = current_weight * left_sample_frac
+                    stack_size += 1
+
+                    # push right child
+                    node_idx_stack[stack_size] = current_node.right
+                    weight_stack[stack_size] = (
+                        current_weight * (1 - left_sample_frac))
+                    stack_size += 1
+
+        # Sanity check. Should never happen.
+        if not (0.999 < total_weight < 1.001):
+            raise ValueError("Total weight should be 1.0 but was %.9f" %total_weight)
--- a/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/binning.py
+++ b/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/binning.py
@@ -0,0 +1,338 @@
+"""
+This module contains the BinMapper class.
+
+BinMapper is used for mapping a real-valued dataset into integer-valued bins.
+Bin thresholds are computed with the quantiles so that each bin contains
+approximately the same number of samples.
+"""
+
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
+import numpy as np
+
+from sklearn.base import BaseEstimator, TransformerMixin
+from sklearn.ensemble._hist_gradient_boosting._binning import _map_to_bins
+from sklearn.ensemble._hist_gradient_boosting._bitset import set_bitset_memoryview
+from sklearn.ensemble._hist_gradient_boosting.common import (
+    ALMOST_INF,
+    X_BINNED_DTYPE,
+    X_BITSET_INNER_DTYPE,
+    X_DTYPE,
+)
+from sklearn.utils import check_array, check_random_state
+from sklearn.utils._openmp_helpers import _openmp_effective_n_threads
+from sklearn.utils.parallel import Parallel, delayed
+from sklearn.utils.validation import check_is_fitted
+
+
+def _find_binning_thresholds(col_data, max_bins):
+    """Extract quantiles from a continuous feature.
+
+    Missing values are ignored for finding the thresholds.
+
+    Parameters
+    ----------
+    col_data : array-like, shape (n_samples,)
+        The continuous feature to bin.
+    max_bins: int
+        The maximum number of bins to use for non-missing values. If for a
+        given feature the number of unique values is less than ``max_bins``,
+        then those unique values will be used to compute the bin thresholds,
+        instead of the quantiles
+
+    Return
+    ------
+    binning_thresholds : ndarray of shape(min(max_bins, n_unique_values) - 1,)
+        The increasing numeric values that can be used to separate the bins.
+        A given value x will be mapped into bin value i iff
+        bining_thresholds[i - 1] < x <= binning_thresholds[i]
+    """
+    # ignore missing values when computing bin thresholds
+    missing_mask = np.isnan(col_data)
+    if missing_mask.any():
+        col_data = col_data[~missing_mask]
+    # The data will be sorted anyway in np.unique and again in percentile, so we do it
+    # here. Sorting also returns a contiguous array.
+    col_data = np.sort(col_data)
+    distinct_values = np.unique(col_data).astype(X_DTYPE)
+    if len(distinct_values) <= max_bins:
+        midpoints = distinct_values[:-1] + distinct_values[1:]
+        midpoints *= 0.5
+    else:
+        # We could compute approximate midpoint percentiles using the output of
+        # np.unique(col_data, return_counts) instead but this is more
+        # work and the performance benefit will be limited because we
+        # work on a fixed-size subsample of the full data.
+        percentiles = np.linspace(0, 100, num=max_bins + 1)
+        percentiles = percentiles[1:-1]
+        midpoints = np.percentile(col_data, percentiles, method="midpoint").astype(
+            X_DTYPE
+        )
+        assert midpoints.shape[0] == max_bins - 1
+
+    # We avoid having +inf thresholds: +inf thresholds are only allowed in
+    # a "split on nan" situation.
+    np.clip(midpoints, a_min=None, a_max=ALMOST_INF, out=midpoints)
+    return midpoints
+
+
+class _BinMapper(TransformerMixin, BaseEstimator):
+    """Transformer that maps a dataset into integer-valued bins.
+
+    For continuous features, the bins are created in a feature-wise fashion,
+    using quantiles so that each bins contains approximately the same number
+    of samples. For large datasets, quantiles are computed on a subset of the
+    data to speed-up the binning, but the quantiles should remain stable.
+
+    For categorical features, the raw categorical values are expected to be
+    in [0, 254] (this is not validated here though) and each category
+    corresponds to a bin. All categorical values must be known at
+    initialization: transform() doesn't know how to bin unknown categorical
+    values. Note that transform() is only used on non-training data in the
+    case of early stopping.
+
+    Features with a small number of values may be binned into less than
+    ``n_bins`` bins. The last bin (at index ``n_bins - 1``) is always reserved
+    for missing values.
+
+    Parameters
+    ----------
+    n_bins : int, default=256
+        The maximum number of bins to use (including the bin for missing
+        values). Should be in [3, 256]. Non-missing values are binned on
+        ``max_bins = n_bins - 1`` bins. The last bin is always reserved for
+        missing values. If for a given feature the number of unique values is
+        less than ``max_bins``, then those unique values will be used to
+        compute the bin thresholds, instead of the quantiles. For categorical
+        features indicated by ``is_categorical``, the docstring for
+        ``is_categorical`` details on this procedure.
+    subsample : int or None, default=2e5
+        If ``n_samples > subsample``, then ``sub_samples`` samples will be
+        randomly chosen to compute the quantiles. If ``None``, the whole data
+        is used.
+    is_categorical : ndarray of bool of shape (n_features,), default=None
+        Indicates categorical features. By default, all features are
+        considered continuous.
+    known_categories : list of {ndarray, None} of shape (n_features,), \
+            default=none
+        For each categorical feature, the array indicates the set of unique
+        categorical values. These should be the possible values over all the
+        data, not just the training data. For continuous features, the
+        corresponding entry should be None.
+    random_state: int, RandomState instance or None, default=None
+        Pseudo-random number generator to control the random sub-sampling.
+        Pass an int for reproducible output across multiple
+        function calls.
+        See :term:`Glossary <random_state>`.
+    n_threads : int, default=None
+        Number of OpenMP threads to use. `_openmp_effective_n_threads` is called
+        to determine the effective number of threads use, which takes cgroups CPU
+        quotes into account. See the docstring of `_openmp_effective_n_threads`
+        for details.
+
+    Attributes
+    ----------
+    bin_thresholds_ : list of ndarray
+        For each feature, each array indicates how to map a feature into a
+        binned feature. The semantic and size depends on the nature of the
+        feature:
+        - for real-valued features, the array corresponds to the real-valued
+          bin thresholds (the upper bound of each bin). There are ``max_bins
+          - 1`` thresholds, where ``max_bins = n_bins - 1`` is the number of
+          bins used for non-missing values.
+        - for categorical features, the array is a map from a binned category
+          value to the raw category value. The size of the array is equal to
+          ``min(max_bins, category_cardinality)`` where we ignore missing
+          values in the cardinality.
+    n_bins_non_missing_ : ndarray, dtype=np.uint32
+        For each feature, gives the number of bins actually used for
+        non-missing values. For features with a lot of unique values, this is
+        equal to ``n_bins - 1``.
+    is_categorical_ : ndarray of shape (n_features,), dtype=np.uint8
+        Indicator for categorical features.
+    missing_values_bin_idx_ : np.uint8
+        The index of the bin where missing values are mapped. This is a
+        constant across all features. This corresponds to the last bin, and
+        it is always equal to ``n_bins - 1``. Note that if ``n_bins_non_missing_``
+        is less than ``n_bins - 1`` for a given feature, then there are
+        empty (and unused) bins.
+    """
+
+    def __init__(
+        self,
+        n_bins=256,
+        subsample=int(2e5),
+        is_categorical=None,
+        known_categories=None,
+        random_state=None,
+        n_threads=None,
+    ):
+        self.n_bins = n_bins
+        self.subsample = subsample
+        self.is_categorical = is_categorical
+        self.known_categories = known_categories
+        self.random_state = random_state
+        self.n_threads = n_threads
+
+    def fit(self, X, y=None):
+        """Fit data X by computing the binning thresholds.
+
+        The last bin is reserved for missing values, whether missing values
+        are present in the data or not.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            The data to bin.
+        y: None
+            Ignored.
+
+        Returns
+        -------
+        self : object
+        """
+        if not (3 <= self.n_bins <= 256):
+            # min is 3: at least 2 distinct bins and a missing values bin
+            raise ValueError(
+                "n_bins={} should be no smaller than 3 and no larger than 256.".format(
+                    self.n_bins
+                )
+            )
+
+        X = check_array(X, dtype=[X_DTYPE], ensure_all_finite=False)
+        max_bins = self.n_bins - 1
+
+        rng = check_random_state(self.random_state)
+        if self.subsample is not None and X.shape[0] > self.subsample:
+            subset = rng.choice(X.shape[0], self.subsample, replace=False)
+            X = X.take(subset, axis=0)
+
+        if self.is_categorical is None:
+            self.is_categorical_ = np.zeros(X.shape[1], dtype=np.uint8)
+        else:
+            self.is_categorical_ = np.asarray(self.is_categorical, dtype=np.uint8)
+
+        n_features = X.shape[1]
+        known_categories = self.known_categories
+        if known_categories is None:
+            known_categories = [None] * n_features
+
+        # validate is_categorical and known_categories parameters
+        for f_idx in range(n_features):
+            is_categorical = self.is_categorical_[f_idx]
+            known_cats = known_categories[f_idx]
+            if is_categorical and known_cats is None:
+                raise ValueError(
+                    f"Known categories for feature {f_idx} must be provided."
+                )
+            if not is_categorical and known_cats is not None:
+                raise ValueError(
+                    f"Feature {f_idx} isn't marked as a categorical feature, "
+                    "but categories were passed."
+                )
+
+        self.missing_values_bin_idx_ = self.n_bins - 1
+
+        self.bin_thresholds_ = [None] * n_features
+        n_bins_non_missing = [None] * n_features
+
+        non_cat_thresholds = Parallel(n_jobs=self.n_threads, backend="threading")(
+            delayed(_find_binning_thresholds)(X[:, f_idx], max_bins)
+            for f_idx in range(n_features)
+            if not self.is_categorical_[f_idx]
+        )
+
+        non_cat_idx = 0
+        for f_idx in range(n_features):
+            if self.is_categorical_[f_idx]:
+                # Since categories are assumed to be encoded in
+                # [0, n_cats] and since n_cats <= max_bins,
+                # the thresholds *are* the unique categorical values. This will
+                # lead to the correct mapping in transform()
+                thresholds = known_categories[f_idx]
+                n_bins_non_missing[f_idx] = thresholds.shape[0]
+                self.bin_thresholds_[f_idx] = thresholds
+            else:
+                self.bin_thresholds_[f_idx] = non_cat_thresholds[non_cat_idx]
+                n_bins_non_missing[f_idx] = self.bin_thresholds_[f_idx].shape[0] + 1
+                non_cat_idx += 1
+
+        self.n_bins_non_missing_ = np.array(n_bins_non_missing, dtype=np.uint32)
+        return self
+
+    def transform(self, X):
+        """Bin data X.
+
+        Missing values will be mapped to the last bin.
+
+        For categorical features, the mapping will be incorrect for unknown
+        categories. Since the BinMapper is given known_categories of the
+        entire training data (i.e. before the call to train_test_split() in
+        case of early-stopping), this never happens.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            The data to bin.
+
+        Returns
+        -------
+        X_binned : array-like of shape (n_samples, n_features)
+            The binned data (fortran-aligned).
+        """
+        X = check_array(X, dtype=[X_DTYPE], ensure_all_finite=False)
+        check_is_fitted(self)
+        if X.shape[1] != self.n_bins_non_missing_.shape[0]:
+            raise ValueError(
+                "This estimator was fitted with {} features but {} got passed "
+                "to transform()".format(self.n_bins_non_missing_.shape[0], X.shape[1])
+            )
+
+        n_threads = _openmp_effective_n_threads(self.n_threads)
+        binned = np.zeros_like(X, dtype=X_BINNED_DTYPE, order="F")
+        _map_to_bins(
+            X,
+            self.bin_thresholds_,
+            self.is_categorical_,
+            self.missing_values_bin_idx_,
+            n_threads,
+            binned,
+        )
+        return binned
+
+    def make_known_categories_bitsets(self):
+        """Create bitsets of known categories.
+
+        Returns
+        -------
+        - known_cat_bitsets : ndarray of shape (n_categorical_features, 8)
+            Array of bitsets of known categories, for each categorical feature.
+        - f_idx_map : ndarray of shape (n_features,)
+            Map from original feature index to the corresponding index in the
+            known_cat_bitsets array.
+        """
+
+        categorical_features_indices = np.flatnonzero(self.is_categorical_)
+
+        n_features = self.is_categorical_.size
+        n_categorical_features = categorical_features_indices.size
+
+        f_idx_map = np.zeros(n_features, dtype=np.uint32)
+        f_idx_map[categorical_features_indices] = np.arange(
+            n_categorical_features, dtype=np.uint32
+        )
+
+        known_categories = self.bin_thresholds_
+
+        known_cat_bitsets = np.zeros(
+            (n_categorical_features, 8), dtype=X_BITSET_INNER_DTYPE
+        )
+
+        # TODO: complexity is O(n_categorical_features * 255). Maybe this is
+        # worth cythonizing
+        for mapped_f_idx, f_idx in enumerate(categorical_features_indices):
+            for raw_cat_val in known_categories[f_idx]:
+                set_bitset_memoryview(known_cat_bitsets[mapped_f_idx], raw_cat_val)
+
+        return known_cat_bitsets, f_idx_map
--- a/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/common.cpython-312-x86_64-linux-gnu.so
+++ b/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/common.cpython-312-x86_64-linux-gnu.so
--- a/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/common.pxd
+++ b/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/common.pxd
@@ -0,0 +1,43 @@
+from sklearn.utils._typedefs cimport float32_t, float64_t, intp_t, uint8_t, uint32_t
+
+
+ctypedef float64_t X_DTYPE_C
+ctypedef uint8_t X_BINNED_DTYPE_C
+ctypedef float64_t Y_DTYPE_C
+ctypedef float32_t G_H_DTYPE_C
+ctypedef uint32_t BITSET_INNER_DTYPE_C
+ctypedef BITSET_INNER_DTYPE_C[8] BITSET_DTYPE_C
+
+
+cdef packed struct hist_struct:
+    # Same as histogram dtype but we need a struct to declare views. It needs
+    # to be packed since by default numpy dtypes aren't aligned
+    Y_DTYPE_C sum_gradients
+    Y_DTYPE_C sum_hessians
+    unsigned int count
+
+
+cdef packed struct node_struct:
+    # Equivalent struct to PREDICTOR_RECORD_DTYPE to use in memory views. It
+    # needs to be packed since by default numpy dtypes aren't aligned
+    Y_DTYPE_C value
+    unsigned int count
+    intp_t feature_idx
+    X_DTYPE_C num_threshold
+    uint8_t missing_go_to_left
+    unsigned int left
+    unsigned int right
+    Y_DTYPE_C gain
+    unsigned int depth
+    uint8_t is_leaf
+    X_BINNED_DTYPE_C bin_threshold
+    uint8_t is_categorical
+    # The index of the corresponding bitsets in the Predictor's bitset arrays.
+    # Only used if is_categorical is True
+    unsigned int bitset_idx
+
+
+cpdef enum MonotonicConstraint:
+    NO_CST = 0
+    POS = 1
+    NEG = -1
--- a/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/common.pyx
+++ b/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/common.pyx
@@ -0,0 +1,44 @@
+import numpy as np
+
+# Y_DYTPE is the dtype to which the targets y are converted to. This is also
+# dtype for leaf values, gains, and sums of gradients / hessians. The gradients
+# and hessians arrays are stored as floats to avoid using too much memory.
+Y_DTYPE = np.float64
+X_DTYPE = np.float64
+X_BINNED_DTYPE = np.uint8  # hence max_bins == 256
+# dtype for gradients and hessians arrays
+G_H_DTYPE = np.float32
+X_BITSET_INNER_DTYPE = np.uint32
+
+# Note that we use Y_DTYPE=float64 to avoid issues with floating point precision when
+# summing gradients and hessians (both float32). Those are difficult to protect via
+# tools like (Kahan-) Neumaier summation as in CPython, see
+# https://github.com/python/cpython/issues/100425, or pairwise summation as numpy, see
+# https://github.com/numpy/numpy/pull/3685, due to the way histograms are summed
+# (number of additions per bin is not known in advance). See also comment in
+# _subtract_histograms.
+HISTOGRAM_DTYPE = np.dtype([
+    ('sum_gradients', Y_DTYPE),  # sum of sample gradients in bin
+    ('sum_hessians', Y_DTYPE),  # sum of sample hessians in bin
+    ('count', np.uint32),  # number of samples in bin
+])
+
+PREDICTOR_RECORD_DTYPE = np.dtype([
+    ('value', Y_DTYPE),
+    ('count', np.uint32),
+    ('feature_idx', np.intp),
+    ('num_threshold', X_DTYPE),
+    ('missing_go_to_left', np.uint8),
+    ('left', np.uint32),
+    ('right', np.uint32),
+    ('gain', Y_DTYPE),
+    ('depth', np.uint32),
+    ('is_leaf', np.uint8),
+    ('bin_threshold', X_BINNED_DTYPE),
+    ('is_categorical', np.uint8),
+    # The index of the corresponding bitsets in the Predictor's bitset arrays.
+    # Only used if is_categorical is True
+    ('bitset_idx', np.uint32)
+])
+
+ALMOST_INF = 1e300  # see LightGBM AvoidInf()
--- a/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py
+++ b/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py
--- a/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/grower.py
+++ b/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/grower.py
@@ -0,0 +1,822 @@
+"""
+This module contains the TreeGrower class.
+
+TreeGrower builds a regression tree fitting a Newton-Raphson step, based on
+the gradients and hessians of the training data.
+"""
+
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
+import numbers
+from heapq import heappop, heappush
+from timeit import default_timer as time
+
+import numpy as np
+
+from sklearn.ensemble._hist_gradient_boosting._bitset import (
+    set_raw_bitset_from_binned_bitset,
+)
+from sklearn.ensemble._hist_gradient_boosting.common import (
+    PREDICTOR_RECORD_DTYPE,
+    X_BITSET_INNER_DTYPE,
+    MonotonicConstraint,
+)
+from sklearn.ensemble._hist_gradient_boosting.histogram import HistogramBuilder
+from sklearn.ensemble._hist_gradient_boosting.predictor import TreePredictor
+from sklearn.ensemble._hist_gradient_boosting.splitting import Splitter
+from sklearn.utils._openmp_helpers import _openmp_effective_n_threads
+
+
+class TreeNode:
+    """Tree Node class used in TreeGrower.
+
+    This isn't used for prediction purposes, only for training (see
+    TreePredictor).
+
+    Parameters
+    ----------
+    depth : int
+        The depth of the node, i.e. its distance from the root.
+    sample_indices : ndarray of shape (n_samples_at_node,), dtype=np.uint32
+        The indices of the samples at the node.
+    partition_start : int
+        start position of the node's sample_indices in splitter.partition.
+    partition_stop : int
+        stop position of the node's sample_indices in splitter.partition.
+    sum_gradients : float
+        The sum of the gradients of the samples at the node.
+    sum_hessians : float
+        The sum of the hessians of the samples at the node.
+
+    Attributes
+    ----------
+    depth : int
+        The depth of the node, i.e. its distance from the root.
+    sample_indices : ndarray of shape (n_samples_at_node,), dtype=np.uint32
+        The indices of the samples at the node.
+    sum_gradients : float
+        The sum of the gradients of the samples at the node.
+    sum_hessians : float
+        The sum of the hessians of the samples at the node.
+    split_info : SplitInfo or None
+        The result of the split evaluation.
+    is_leaf : bool
+        True if node is a leaf
+    left_child : TreeNode or None
+        The left child of the node. None for leaves.
+    right_child : TreeNode or None
+        The right child of the node. None for leaves.
+    value : float or None
+        The value of the leaf, as computed in finalize_leaf(). None for
+        non-leaf nodes.
+    partition_start : int
+        start position of the node's sample_indices in splitter.partition.
+    partition_stop : int
+        stop position of the node's sample_indices in splitter.partition.
+    allowed_features : None or ndarray, dtype=int
+        Indices of features allowed to split for children.
+    interaction_cst_indices : None or list of ints
+        Indices of the interaction sets that have to be applied on splits of
+        child nodes. The fewer sets the stronger the constraint as fewer sets
+        contain fewer features.
+    children_lower_bound : float
+    children_upper_bound : float
+    """
+
+    def __init__(
+        self,
+        *,
+        depth,
+        sample_indices,
+        partition_start,
+        partition_stop,
+        sum_gradients,
+        sum_hessians,
+        value=None,
+    ):
+        self.depth = depth
+        self.sample_indices = sample_indices
+        self.n_samples = sample_indices.shape[0]
+        self.sum_gradients = sum_gradients
+        self.sum_hessians = sum_hessians
+        self.value = value
+        self.is_leaf = False
+        self.allowed_features = None
+        self.interaction_cst_indices = None
+        self.set_children_bounds(float("-inf"), float("+inf"))
+        self.split_info = None
+        self.left_child = None
+        self.right_child = None
+        self.histograms = None
+        # start and stop indices of the node in the splitter.partition
+        # array. Concretely,
+        # self.sample_indices = view(self.splitter.partition[start:stop])
+        # Please see the comments about splitter.partition and
+        # splitter.split_indices for more info about this design.
+        # These 2 attributes are only used in _update_raw_prediction, because we
+        # need to iterate over the leaves and I don't know how to efficiently
+        # store the sample_indices views because they're all of different sizes.
+        self.partition_start = partition_start
+        self.partition_stop = partition_stop
+
+    def set_children_bounds(self, lower, upper):
+        """Set children values bounds to respect monotonic constraints."""
+
+        # These are bounds for the node's *children* values, not the node's
+        # value. The bounds are used in the splitter when considering potential
+        # left and right child.
+        self.children_lower_bound = lower
+        self.children_upper_bound = upper
+
+    def __lt__(self, other_node):
+        """Comparison for priority queue.
+
+        Nodes with high gain are higher priority than nodes with low gain.
+
+        heapq.heappush only need the '<' operator.
+        heapq.heappop take the smallest item first (smaller is higher
+        priority).
+
+        Parameters
+        ----------
+        other_node : TreeNode
+            The node to compare with.
+        """
+        return self.split_info.gain > other_node.split_info.gain
+
+
+class TreeGrower:
+    """Tree grower class used to build a tree.
+
+    The tree is fitted to predict the values of a Newton-Raphson step. The
+    splits are considered in a best-first fashion, and the quality of a
+    split is defined in splitting._split_gain.
+
+    Parameters
+    ----------
+    X_binned : ndarray of shape (n_samples, n_features), dtype=np.uint8
+        The binned input samples. Must be Fortran-aligned.
+    gradients : ndarray of shape (n_samples,)
+        The gradients of each training sample. Those are the gradients of the
+        loss w.r.t the predictions, evaluated at iteration ``i - 1``.
+    hessians : ndarray of shape (n_samples,)
+        The hessians of each training sample. Those are the hessians of the
+        loss w.r.t the predictions, evaluated at iteration ``i - 1``.
+    max_leaf_nodes : int, default=None
+        The maximum number of leaves for each tree. If None, there is no
+        maximum limit.
+    max_depth : int, default=None
+        The maximum depth of each tree. The depth of a tree is the number of
+        edges to go from the root to the deepest leaf.
+        Depth isn't constrained by default.
+    min_samples_leaf : int, default=20
+        The minimum number of samples per leaf.
+    min_gain_to_split : float, default=0.
+        The minimum gain needed to split a node. Splits with lower gain will
+        be ignored.
+    min_hessian_to_split : float, default=1e-3
+        The minimum sum of hessians needed in each node. Splits that result in
+        at least one child having a sum of hessians less than
+        ``min_hessian_to_split`` are discarded.
+    n_bins : int, default=256
+        The total number of bins, including the bin for missing values. Used
+        to define the shape of the histograms.
+    n_bins_non_missing : ndarray, dtype=np.uint32, default=None
+        For each feature, gives the number of bins actually used for
+        non-missing values. For features with a lot of unique values, this
+        is equal to ``n_bins - 1``. If it's an int, all features are
+        considered to have the same number of bins. If None, all features
+        are considered to have ``n_bins - 1`` bins.
+    has_missing_values : bool or ndarray, dtype=bool, default=False
+        Whether each feature contains missing values (in the training data).
+        If it's a bool, the same value is used for all features.
+    is_categorical : ndarray of bool of shape (n_features,), default=None
+        Indicates categorical features.
+    monotonic_cst : array-like of int of shape (n_features,), dtype=int, default=None
+        Indicates the monotonic constraint to enforce on each feature.
+          - 1: monotonic increase
+          - 0: no constraint
+          - -1: monotonic decrease
+
+        Read more in the :ref:`User Guide <monotonic_cst_gbdt>`.
+    interaction_cst : list of sets of integers, default=None
+        List of interaction constraints.
+    l2_regularization : float, default=0.
+        The L2 regularization parameter penalizing leaves with small hessians.
+        Use ``0`` for no regularization (default).
+    feature_fraction_per_split : float, default=1
+        Proportion of randomly chosen features in each and every node split.
+        This is a form of regularization, smaller values make the trees weaker
+        learners and might prevent overfitting.
+    rng : Generator
+        Numpy random Generator used for feature subsampling.
+    shrinkage : float, default=1.
+        The shrinkage parameter to apply to the leaves values, also known as
+        learning rate.
+    n_threads : int, default=None
+        Number of OpenMP threads to use. `_openmp_effective_n_threads` is called
+        to determine the effective number of threads use, which takes cgroups CPU
+        quotes into account. See the docstring of `_openmp_effective_n_threads`
+        for details.
+
+    Attributes
+    ----------
+    histogram_builder : HistogramBuilder
+    splitter : Splitter
+    root : TreeNode
+    finalized_leaves : list of TreeNode
+    splittable_nodes : list of TreeNode
+    missing_values_bin_idx : int
+        Equals n_bins - 1
+    n_categorical_splits : int
+    n_features : int
+    n_nodes : int
+    total_find_split_time : float
+        Time spent finding the best splits
+    total_compute_hist_time : float
+        Time spent computing histograms
+    total_apply_split_time : float
+        Time spent splitting nodes
+    with_monotonic_cst : bool
+        Whether there are monotonic constraints that apply. False iff monotonic_cst is
+        None.
+    """
+
+    def __init__(
+        self,
+        X_binned,
+        gradients,
+        hessians,
+        max_leaf_nodes=None,
+        max_depth=None,
+        min_samples_leaf=20,
+        min_gain_to_split=0.0,
+        min_hessian_to_split=1e-3,
+        n_bins=256,
+        n_bins_non_missing=None,
+        has_missing_values=False,
+        is_categorical=None,
+        monotonic_cst=None,
+        interaction_cst=None,
+        l2_regularization=0.0,
+        feature_fraction_per_split=1.0,
+        rng=np.random.default_rng(),
+        shrinkage=1.0,
+        n_threads=None,
+    ):
+        self._validate_parameters(
+            X_binned,
+            min_gain_to_split,
+            min_hessian_to_split,
+        )
+        n_threads = _openmp_effective_n_threads(n_threads)
+
+        if n_bins_non_missing is None:
+            n_bins_non_missing = n_bins - 1
+
+        if isinstance(n_bins_non_missing, numbers.Integral):
+            n_bins_non_missing = np.array(
+                [n_bins_non_missing] * X_binned.shape[1], dtype=np.uint32
+            )
+        else:
+            n_bins_non_missing = np.asarray(n_bins_non_missing, dtype=np.uint32)
+
+        if isinstance(has_missing_values, bool):
+            has_missing_values = [has_missing_values] * X_binned.shape[1]
+        has_missing_values = np.asarray(has_missing_values, dtype=np.uint8)
+
+        # `monotonic_cst` validation is done in _validate_monotonic_cst
+        # at the estimator level and therefore the following should not be
+        # needed when using the public API.
+        if monotonic_cst is None:
+            monotonic_cst = np.full(
+                shape=X_binned.shape[1],
+                fill_value=MonotonicConstraint.NO_CST,
+                dtype=np.int8,
+            )
+        else:
+            monotonic_cst = np.asarray(monotonic_cst, dtype=np.int8)
+        self.with_monotonic_cst = np.any(monotonic_cst != MonotonicConstraint.NO_CST)
+
+        if is_categorical is None:
+            is_categorical = np.zeros(shape=X_binned.shape[1], dtype=np.uint8)
+        else:
+            is_categorical = np.asarray(is_categorical, dtype=np.uint8)
+
+        if np.any(
+            np.logical_and(
+                is_categorical == 1, monotonic_cst != MonotonicConstraint.NO_CST
+            )
+        ):
+            raise ValueError("Categorical features cannot have monotonic constraints.")
+
+        hessians_are_constant = hessians.shape[0] == 1
+        self.histogram_builder = HistogramBuilder(
+            X_binned, n_bins, gradients, hessians, hessians_are_constant, n_threads
+        )
+        missing_values_bin_idx = n_bins - 1
+        self.splitter = Splitter(
+            X_binned=X_binned,
+            n_bins_non_missing=n_bins_non_missing,
+            missing_values_bin_idx=missing_values_bin_idx,
+            has_missing_values=has_missing_values,
+            is_categorical=is_categorical,
+            monotonic_cst=monotonic_cst,
+            l2_regularization=l2_regularization,
+            min_hessian_to_split=min_hessian_to_split,
+            min_samples_leaf=min_samples_leaf,
+            min_gain_to_split=min_gain_to_split,
+            hessians_are_constant=hessians_are_constant,
+            feature_fraction_per_split=feature_fraction_per_split,
+            rng=rng,
+            n_threads=n_threads,
+        )
+        self.X_binned = X_binned
+        self.max_leaf_nodes = max_leaf_nodes
+        self.max_depth = max_depth
+        self.min_samples_leaf = min_samples_leaf
+        self.min_gain_to_split = min_gain_to_split
+        self.n_bins_non_missing = n_bins_non_missing
+        self.missing_values_bin_idx = missing_values_bin_idx
+        self.has_missing_values = has_missing_values
+        self.is_categorical = is_categorical
+        self.monotonic_cst = monotonic_cst
+        self.interaction_cst = interaction_cst
+        self.l2_regularization = l2_regularization
+        self.shrinkage = shrinkage
+        self.n_features = X_binned.shape[1]
+        self.n_threads = n_threads
+        self.splittable_nodes = []
+        self.finalized_leaves = []
+        self.total_find_split_time = 0.0  # time spent finding the best splits
+        self.total_compute_hist_time = 0.0  # time spent computing histograms
+        self.total_apply_split_time = 0.0  # time spent splitting nodes
+        self.n_categorical_splits = 0
+        self._initialize_root()
+        self.n_nodes = 1
+
+    def _validate_parameters(
+        self,
+        X_binned,
+        min_gain_to_split,
+        min_hessian_to_split,
+    ):
+        """Validate parameters passed to __init__.
+
+        Also validate parameters passed to splitter.
+        """
+        if X_binned.dtype != np.uint8:
+            raise NotImplementedError("X_binned must be of type uint8.")
+        if not X_binned.flags.f_contiguous:
+            raise ValueError(
+                "X_binned should be passed as Fortran contiguous "
+                "array for maximum efficiency."
+            )
+        if min_gain_to_split < 0:
+            raise ValueError(
+                "min_gain_to_split={} must be positive.".format(min_gain_to_split)
+            )
+        if min_hessian_to_split < 0:
+            raise ValueError(
+                "min_hessian_to_split={} must be positive.".format(min_hessian_to_split)
+            )
+
+    def grow(self):
+        """Grow the tree, from root to leaves."""
+        while self.splittable_nodes:
+            self.split_next()
+
+        self._apply_shrinkage()
+
+    def _apply_shrinkage(self):
+        """Multiply leaves values by shrinkage parameter.
+
+        This must be done at the very end of the growing process. If this were
+        done during the growing process e.g. in finalize_leaf(), then a leaf
+        would be shrunk but its sibling would potentially not be (if it's a
+        non-leaf), which would lead to a wrong computation of the 'middle'
+        value needed to enforce the monotonic constraints.
+        """
+        for leaf in self.finalized_leaves:
+            leaf.value *= self.shrinkage
+
+    def _initialize_root(self):
+        """Initialize root node and finalize it if needed."""
+        tic = time()
+        if self.interaction_cst is not None:
+            allowed_features = set().union(*self.interaction_cst)
+            allowed_features = np.fromiter(
+                allowed_features, dtype=np.uint32, count=len(allowed_features)
+            )
+            arbitrary_feature = allowed_features[0]
+        else:
+            allowed_features = None
+            arbitrary_feature = 0
+
+        # TreeNode init needs the total sum of gradients and hessians. Therefore, we
+        # first compute the histograms and then compute the total grad/hess on an
+        # arbitrary feature histogram. This way we replace a loop over n_samples by a
+        # loop over n_bins.
+        histograms = self.histogram_builder.compute_histograms_brute(
+            self.splitter.partition,  # =self.root.sample_indices
+            allowed_features,
+        )
+        self.total_compute_hist_time += time() - tic
+
+        tic = time()
+        n_samples = self.X_binned.shape[0]
+        depth = 0
+        histogram_array = np.asarray(histograms[arbitrary_feature])
+        sum_gradients = histogram_array["sum_gradients"].sum()
+        if self.histogram_builder.hessians_are_constant:
+            sum_hessians = self.histogram_builder.hessians[0] * n_samples
+        else:
+            sum_hessians = histogram_array["sum_hessians"].sum()
+        self.root = TreeNode(
+            depth=depth,
+            sample_indices=self.splitter.partition,
+            partition_start=0,
+            partition_stop=n_samples,
+            sum_gradients=sum_gradients,
+            sum_hessians=sum_hessians,
+            value=0,
+        )
+
+        if self.root.n_samples < 2 * self.min_samples_leaf:
+            # Do not even bother computing any splitting statistics.
+            self._finalize_leaf(self.root)
+            return
+        if sum_hessians < self.splitter.min_hessian_to_split:
+            self._finalize_leaf(self.root)
+            return
+
+        if self.interaction_cst is not None:
+            self.root.interaction_cst_indices = range(len(self.interaction_cst))
+            self.root.allowed_features = allowed_features
+
+        self.root.histograms = histograms
+
+        self._compute_best_split_and_push(self.root)
+        self.total_find_split_time += time() - tic
+
+    def _compute_best_split_and_push(self, node):
+        """Compute the best possible split (SplitInfo) of a given node.
+
+        Also push it in the heap of splittable nodes if gain isn't zero.
+        The gain of a node is 0 if either all the leaves are pure
+        (best gain = 0), or if no split would satisfy the constraints,
+        (min_hessians_to_split, min_gain_to_split, min_samples_leaf)
+        """
+
+        node.split_info = self.splitter.find_node_split(
+            n_samples=node.n_samples,
+            histograms=node.histograms,
+            sum_gradients=node.sum_gradients,
+            sum_hessians=node.sum_hessians,
+            value=node.value,
+            lower_bound=node.children_lower_bound,
+            upper_bound=node.children_upper_bound,
+            allowed_features=node.allowed_features,
+        )
+
+        if node.split_info.gain <= 0:  # no valid split
+            self._finalize_leaf(node)
+        else:
+            heappush(self.splittable_nodes, node)
+
+    def split_next(self):
+        """Split the node with highest potential gain.
+
+        Returns
+        -------
+        left : TreeNode
+            The resulting left child.
+        right : TreeNode
+            The resulting right child.
+        """
+        # Consider the node with the highest loss reduction (a.k.a. gain)
+        node = heappop(self.splittable_nodes)
+
+        tic = time()
+        (
+            sample_indices_left,
+            sample_indices_right,
+            right_child_pos,
+        ) = self.splitter.split_indices(node.split_info, node.sample_indices)
+        self.total_apply_split_time += time() - tic
+
+        depth = node.depth + 1
+        n_leaf_nodes = len(self.finalized_leaves) + len(self.splittable_nodes)
+        n_leaf_nodes += 2
+
+        left_child_node = TreeNode(
+            depth=depth,
+            sample_indices=sample_indices_left,
+            partition_start=node.partition_start,
+            partition_stop=node.partition_start + right_child_pos,
+            sum_gradients=node.split_info.sum_gradient_left,
+            sum_hessians=node.split_info.sum_hessian_left,
+            value=node.split_info.value_left,
+        )
+        right_child_node = TreeNode(
+            depth=depth,
+            sample_indices=sample_indices_right,
+            partition_start=left_child_node.partition_stop,
+            partition_stop=node.partition_stop,
+            sum_gradients=node.split_info.sum_gradient_right,
+            sum_hessians=node.split_info.sum_hessian_right,
+            value=node.split_info.value_right,
+        )
+
+        node.right_child = right_child_node
+        node.left_child = left_child_node
+
+        # set interaction constraints (the indices of the constraints sets)
+        if self.interaction_cst is not None:
+            # Calculate allowed_features and interaction_cst_indices only once. Child
+            # nodes inherit them before they get split.
+            (
+                left_child_node.allowed_features,
+                left_child_node.interaction_cst_indices,
+            ) = self._compute_interactions(node)
+            right_child_node.interaction_cst_indices = (
+                left_child_node.interaction_cst_indices
+            )
+            right_child_node.allowed_features = left_child_node.allowed_features
+
+        if not self.has_missing_values[node.split_info.feature_idx]:
+            # If no missing values are encountered at fit time, then samples
+            # with missing values during predict() will go to whichever child
+            # has the most samples.
+            node.split_info.missing_go_to_left = (
+                left_child_node.n_samples > right_child_node.n_samples
+            )
+
+        self.n_nodes += 2
+        self.n_categorical_splits += node.split_info.is_categorical
+
+        if self.max_leaf_nodes is not None and n_leaf_nodes == self.max_leaf_nodes:
+            self._finalize_leaf(left_child_node)
+            self._finalize_leaf(right_child_node)
+            self._finalize_splittable_nodes()
+            return left_child_node, right_child_node
+
+        if self.max_depth is not None and depth == self.max_depth:
+            self._finalize_leaf(left_child_node)
+            self._finalize_leaf(right_child_node)
+            return left_child_node, right_child_node
+
+        if left_child_node.n_samples < self.min_samples_leaf * 2:
+            self._finalize_leaf(left_child_node)
+        if right_child_node.n_samples < self.min_samples_leaf * 2:
+            self._finalize_leaf(right_child_node)
+
+        if self.with_monotonic_cst:
+            # Set value bounds for respecting monotonic constraints
+            # See test_nodes_values() for details
+            if (
+                self.monotonic_cst[node.split_info.feature_idx]
+                == MonotonicConstraint.NO_CST
+            ):
+                lower_left = lower_right = node.children_lower_bound
+                upper_left = upper_right = node.children_upper_bound
+            else:
+                mid = (left_child_node.value + right_child_node.value) / 2
+                if (
+                    self.monotonic_cst[node.split_info.feature_idx]
+                    == MonotonicConstraint.POS
+                ):
+                    lower_left, upper_left = node.children_lower_bound, mid
+                    lower_right, upper_right = mid, node.children_upper_bound
+                else:  # NEG
+                    lower_left, upper_left = mid, node.children_upper_bound
+                    lower_right, upper_right = node.children_lower_bound, mid
+            left_child_node.set_children_bounds(lower_left, upper_left)
+            right_child_node.set_children_bounds(lower_right, upper_right)
+
+        # Compute histograms of children, and compute their best possible split
+        # (if needed)
+        should_split_left = not left_child_node.is_leaf
+        should_split_right = not right_child_node.is_leaf
+        if should_split_left or should_split_right:
+            # We will compute the histograms of both nodes even if one of them
+            # is a leaf, since computing the second histogram is very cheap
+            # (using histogram subtraction).
+            n_samples_left = left_child_node.sample_indices.shape[0]
+            n_samples_right = right_child_node.sample_indices.shape[0]
+            if n_samples_left < n_samples_right:
+                smallest_child = left_child_node
+                largest_child = right_child_node
+            else:
+                smallest_child = right_child_node
+                largest_child = left_child_node
+
+            # We use the brute O(n_samples) method on the child that has the
+            # smallest number of samples, and the subtraction trick O(n_bins)
+            # on the other one.
+            # Note that both left and right child have the same allowed_features.
+            tic = time()
+            smallest_child.histograms = self.histogram_builder.compute_histograms_brute(
+                smallest_child.sample_indices, smallest_child.allowed_features
+            )
+            largest_child.histograms = (
+                self.histogram_builder.compute_histograms_subtraction(
+                    node.histograms,
+                    smallest_child.histograms,
+                    smallest_child.allowed_features,
+                )
+            )
+            # node.histograms is reused in largest_child.histograms. To break cyclic
+            # memory references and help garbage collection, we set it to None.
+            node.histograms = None
+            self.total_compute_hist_time += time() - tic
+
+            tic = time()
+            if should_split_left:
+                self._compute_best_split_and_push(left_child_node)
+            if should_split_right:
+                self._compute_best_split_and_push(right_child_node)
+            self.total_find_split_time += time() - tic
+
+            # Release memory used by histograms as they are no longer needed
+            # for leaf nodes since they won't be split.
+            for child in (left_child_node, right_child_node):
+                if child.is_leaf:
+                    del child.histograms
+
+        # Release memory used by histograms as they are no longer needed for
+        # internal nodes once children histograms have been computed.
+        del node.histograms
+
+        return left_child_node, right_child_node
+
+    def _compute_interactions(self, node):
+        r"""Compute features allowed by interactions to be inherited by child nodes.
+
+        Example: Assume constraints [{0, 1}, {1, 2}].
+           1      <- Both constraint groups could be applied from now on
+          / \
+         1   2    <- Left split still fulfills both constraint groups.
+        / \ / \      Right split at feature 2 has only group {1, 2} from now on.
+
+        LightGBM uses the same logic for overlapping groups. See
+        https://github.com/microsoft/LightGBM/issues/4481 for details.
+
+        Parameters:
+        ----------
+        node : TreeNode
+            A node that might have children. Based on its feature_idx, the interaction
+            constraints for possible child nodes are computed.
+
+        Returns
+        -------
+        allowed_features : ndarray, dtype=uint32
+            Indices of features allowed to split for children.
+        interaction_cst_indices : list of ints
+            Indices of the interaction sets that have to be applied on splits of
+            child nodes. The fewer sets the stronger the constraint as fewer sets
+            contain fewer features.
+        """
+        # Note:
+        #  - Case of no interactions is already captured before function call.
+        #  - This is for nodes that are already split and have a
+        #    node.split_info.feature_idx.
+        allowed_features = set()
+        interaction_cst_indices = []
+        for i in node.interaction_cst_indices:
+            if node.split_info.feature_idx in self.interaction_cst[i]:
+                interaction_cst_indices.append(i)
+                allowed_features.update(self.interaction_cst[i])
+        return (
+            np.fromiter(allowed_features, dtype=np.uint32, count=len(allowed_features)),
+            interaction_cst_indices,
+        )
+
+    def _finalize_leaf(self, node):
+        """Make node a leaf of the tree being grown."""
+
+        node.is_leaf = True
+        self.finalized_leaves.append(node)
+
+    def _finalize_splittable_nodes(self):
+        """Transform all splittable nodes into leaves.
+
+        Used when some constraint is met e.g. maximum number of leaves or
+        maximum depth."""
+        while len(self.splittable_nodes) > 0:
+            node = self.splittable_nodes.pop()
+            self._finalize_leaf(node)
+
+    def make_predictor(self, binning_thresholds):
+        """Make a TreePredictor object out of the current tree.
+
+        Parameters
+        ----------
+        binning_thresholds : array-like of floats
+            Corresponds to the bin_thresholds_ attribute of the BinMapper.
+            For each feature, this stores:
+
+            - the bin frontiers for continuous features
+            - the unique raw category values for categorical features
+
+        Returns
+        -------
+        A TreePredictor object.
+        """
+        predictor_nodes = np.zeros(self.n_nodes, dtype=PREDICTOR_RECORD_DTYPE)
+        binned_left_cat_bitsets = np.zeros(
+            (self.n_categorical_splits, 8), dtype=X_BITSET_INNER_DTYPE
+        )
+        raw_left_cat_bitsets = np.zeros(
+            (self.n_categorical_splits, 8), dtype=X_BITSET_INNER_DTYPE
+        )
+        _fill_predictor_arrays(
+            predictor_nodes,
+            binned_left_cat_bitsets,
+            raw_left_cat_bitsets,
+            self.root,
+            binning_thresholds,
+            self.n_bins_non_missing,
+        )
+        return TreePredictor(
+            predictor_nodes, binned_left_cat_bitsets, raw_left_cat_bitsets
+        )
+
+
+def _fill_predictor_arrays(
+    predictor_nodes,
+    binned_left_cat_bitsets,
+    raw_left_cat_bitsets,
+    grower_node,
+    binning_thresholds,
+    n_bins_non_missing,
+    next_free_node_idx=0,
+    next_free_bitset_idx=0,
+):
+    """Helper used in make_predictor to set the TreePredictor fields."""
+    node = predictor_nodes[next_free_node_idx]
+    node["count"] = grower_node.n_samples
+    node["depth"] = grower_node.depth
+    if grower_node.split_info is not None:
+        node["gain"] = grower_node.split_info.gain
+    else:
+        node["gain"] = -1
+
+    node["value"] = grower_node.value
+
+    if grower_node.is_leaf:
+        # Leaf node
+        node["is_leaf"] = True
+        return next_free_node_idx + 1, next_free_bitset_idx
+
+    split_info = grower_node.split_info
+    feature_idx, bin_idx = split_info.feature_idx, split_info.bin_idx
+    node["feature_idx"] = feature_idx
+    node["bin_threshold"] = bin_idx
+    node["missing_go_to_left"] = split_info.missing_go_to_left
+    node["is_categorical"] = split_info.is_categorical
+
+    if split_info.bin_idx == n_bins_non_missing[feature_idx] - 1:
+        # Split is on the last non-missing bin: it's a "split on nans".
+        # All nans go to the right, the rest go to the left.
+        # Note: for categorical splits, bin_idx is 0 and we rely on the bitset
+        node["num_threshold"] = np.inf
+    elif split_info.is_categorical:
+        categories = binning_thresholds[feature_idx]
+        node["bitset_idx"] = next_free_bitset_idx
+        binned_left_cat_bitsets[next_free_bitset_idx] = split_info.left_cat_bitset
+        set_raw_bitset_from_binned_bitset(
+            raw_left_cat_bitsets[next_free_bitset_idx],
+            split_info.left_cat_bitset,
+            categories,
+        )
+        next_free_bitset_idx += 1
+    else:
+        node["num_threshold"] = binning_thresholds[feature_idx][bin_idx]
+
+    next_free_node_idx += 1
+
+    node["left"] = next_free_node_idx
+    next_free_node_idx, next_free_bitset_idx = _fill_predictor_arrays(
+        predictor_nodes,
+        binned_left_cat_bitsets,
+        raw_left_cat_bitsets,
+        grower_node.left_child,
+        binning_thresholds=binning_thresholds,
+        n_bins_non_missing=n_bins_non_missing,
+        next_free_node_idx=next_free_node_idx,
+        next_free_bitset_idx=next_free_bitset_idx,
+    )
+
+    node["right"] = next_free_node_idx
+    return _fill_predictor_arrays(
+        predictor_nodes,
+        binned_left_cat_bitsets,
+        raw_left_cat_bitsets,
+        grower_node.right_child,
+        binning_thresholds=binning_thresholds,
+        n_bins_non_missing=n_bins_non_missing,
+        next_free_node_idx=next_free_node_idx,
+        next_free_bitset_idx=next_free_bitset_idx,
+    )
--- a/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/histogram.cpython-312-x86_64-linux-gnu.so
+++ b/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/histogram.cpython-312-x86_64-linux-gnu.so
--- a/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/histogram.pyx
+++ b/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/histogram.pyx
@@ -0,0 +1,520 @@
+"""This module contains routines for building histograms."""
+
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
+cimport cython
+from cython.parallel import prange
+from libc.string cimport memset
+
+import numpy as np
+
+from sklearn.ensemble._hist_gradient_boosting.common import HISTOGRAM_DTYPE
+from sklearn.ensemble._hist_gradient_boosting.common cimport hist_struct
+from sklearn.ensemble._hist_gradient_boosting.common cimport X_BINNED_DTYPE_C
+from sklearn.ensemble._hist_gradient_boosting.common cimport G_H_DTYPE_C
+from sklearn.utils._typedefs cimport uint8_t
+
+
+# Notes:
+# - IN views are read-only, OUT views are write-only
+# - In a lot of functions here, we pass feature_idx and the whole 2d
+#   histograms arrays instead of just histograms[feature_idx]. This is because
+#   Cython generated C code will have strange Python interactions (likely
+#   related to the GIL release and the custom histogram dtype) when using 1d
+#   histogram arrays that come from 2d arrays.
+# - The for loops are un-wrapped, for example:
+#
+#   for i in range(n):
+#       array[i] = i
+#
+#   will become
+#
+#   for i in range(n // 4):
+#       array[i] = i
+#       array[i + 1] = i + 1
+#       array[i + 2] = i + 2
+#       array[i + 3] = i + 3
+#
+#   This is to hint gcc that it can auto-vectorize these 4 operations and
+#   perform them all at once.
+
+
+@cython.final
+cdef class HistogramBuilder:
+    """A Histogram builder... used to build histograms.
+
+    A histogram is an array with n_bins entries of type HISTOGRAM_DTYPE. Each
+    feature has its own histogram. A histogram contains the sum of gradients
+    and hessians of all the samples belonging to each bin.
+
+    There are different ways to build a histogram:
+    - by subtraction: hist(child) = hist(parent) - hist(sibling)
+    - from scratch. In this case we have routines that update the hessians
+      or not (not useful when hessians are constant for some losses e.g.
+      least squares). Also, there's a special case for the root which
+      contains all the samples, leading to some possible optimizations.
+      Overall all the implementations look the same, and are optimized for
+      cache hit.
+
+    Parameters
+    ----------
+    X_binned : ndarray of int, shape (n_samples, n_features)
+        The binned input samples. Must be Fortran-aligned.
+    n_bins : int
+        The total number of bins, including the bin for missing values. Used
+        to define the shape of the histograms.
+    gradients : ndarray, shape (n_samples,)
+        The gradients of each training sample. Those are the gradients of the
+        loss w.r.t the predictions, evaluated at iteration i - 1.
+    hessians : ndarray, shape (n_samples,)
+        The hessians of each training sample. Those are the hessians of the
+        loss w.r.t the predictions, evaluated at iteration i - 1.
+    hessians_are_constant : bool
+        Whether hessians are constant.
+    """
+    cdef public:
+        const X_BINNED_DTYPE_C [::1, :] X_binned
+        unsigned int n_features
+        unsigned int n_bins
+        G_H_DTYPE_C [::1] gradients
+        G_H_DTYPE_C [::1] hessians
+        G_H_DTYPE_C [::1] ordered_gradients
+        G_H_DTYPE_C [::1] ordered_hessians
+        uint8_t hessians_are_constant
+        int n_threads
+
+    def __init__(self, const X_BINNED_DTYPE_C [::1, :] X_binned,
+                 unsigned int n_bins, G_H_DTYPE_C [::1] gradients,
+                 G_H_DTYPE_C [::1] hessians,
+                 uint8_t hessians_are_constant,
+                 int n_threads):
+
+        self.X_binned = X_binned
+        self.n_features = X_binned.shape[1]
+        # Note: all histograms will have <n_bins> bins, but some of the
+        # bins may be unused if a feature has a small number of unique values.
+        self.n_bins = n_bins
+        self.gradients = gradients
+        self.hessians = hessians
+        # for root node, gradients and hessians are already ordered
+        self.ordered_gradients = gradients.copy()
+        self.ordered_hessians = hessians.copy()
+        self.hessians_are_constant = hessians_are_constant
+        self.n_threads = n_threads
+
+    def compute_histograms_brute(
+        HistogramBuilder self,
+        const unsigned int [::1] sample_indices,       # IN
+        const unsigned int [:] allowed_features=None,  # IN
+    ):
+        """Compute the histograms of the node by scanning through all the data.
+
+        For a given feature, the complexity is O(n_samples)
+
+        Parameters
+        ----------
+        sample_indices : array of int, shape (n_samples_at_node,)
+            The indices of the samples at the node to split.
+
+        allowed_features : None or ndarray, dtype=np.uint32
+            Indices of the features that are allowed by interaction constraints to be
+            split.
+
+        Returns
+        -------
+        histograms : ndarray of HISTOGRAM_DTYPE, shape (n_features, n_bins)
+            The computed histograms of the current node.
+        """
+        cdef:
+            int n_samples
+            int feature_idx
+            int f_idx
+            int i
+            # need local views to avoid python interactions
+            uint8_t hessians_are_constant = self.hessians_are_constant
+            int n_allowed_features = self.n_features
+            G_H_DTYPE_C [::1] ordered_gradients = self.ordered_gradients
+            G_H_DTYPE_C [::1] gradients = self.gradients
+            G_H_DTYPE_C [::1] ordered_hessians = self.ordered_hessians
+            G_H_DTYPE_C [::1] hessians = self.hessians
+            # Histograms will be initialized to zero later within a prange
+            hist_struct [:, ::1] histograms = np.empty(
+                shape=(self.n_features, self.n_bins),
+                dtype=HISTOGRAM_DTYPE
+            )
+            bint has_interaction_cst = allowed_features is not None
+            int n_threads = self.n_threads
+
+        if has_interaction_cst:
+            n_allowed_features = allowed_features.shape[0]
+
+        with nogil:
+            n_samples = sample_indices.shape[0]
+
+            # Populate ordered_gradients and ordered_hessians. (Already done
+            # for root) Ordering the gradients and hessians helps to improve
+            # cache hit.
+            if sample_indices.shape[0] != gradients.shape[0]:
+                if hessians_are_constant:
+                    for i in prange(n_samples, schedule='static',
+                                    num_threads=n_threads):
+                        ordered_gradients[i] = gradients[sample_indices[i]]
+                else:
+                    for i in prange(n_samples, schedule='static',
+                                    num_threads=n_threads):
+                        ordered_gradients[i] = gradients[sample_indices[i]]
+                        ordered_hessians[i] = hessians[sample_indices[i]]
+
+            # Compute histogram of each feature
+            for f_idx in prange(
+                n_allowed_features, schedule='static', num_threads=n_threads
+            ):
+                if has_interaction_cst:
+                    feature_idx = allowed_features[f_idx]
+                else:
+                    feature_idx = f_idx
+
+                self._compute_histogram_brute_single_feature(
+                    feature_idx, sample_indices, histograms
+                )
+
+        return histograms
+
+    cdef void _compute_histogram_brute_single_feature(
+            HistogramBuilder self,
+            const int feature_idx,
+            const unsigned int [::1] sample_indices,  # IN
+            hist_struct [:, ::1] histograms) noexcept nogil:  # OUT
+        """Compute the histogram for a given feature."""
+
+        cdef:
+            unsigned int n_samples = sample_indices.shape[0]
+            const X_BINNED_DTYPE_C [::1] X_binned = \
+                self.X_binned[:, feature_idx]
+            unsigned int root_node = X_binned.shape[0] == n_samples
+            G_H_DTYPE_C [::1] ordered_gradients = \
+                self.ordered_gradients[:n_samples]
+            G_H_DTYPE_C [::1] ordered_hessians = \
+                self.ordered_hessians[:n_samples]
+            uint8_t hessians_are_constant = \
+                self.hessians_are_constant
+
+        # Set histograms to zero.
+        memset(&histograms[feature_idx, 0], 0, self.n_bins * sizeof(hist_struct))
+
+        if root_node:
+            if hessians_are_constant:
+                _build_histogram_root_no_hessian(feature_idx, X_binned,
+                                                 ordered_gradients,
+                                                 histograms)
+            else:
+                _build_histogram_root(feature_idx, X_binned,
+                                      ordered_gradients, ordered_hessians,
+                                      histograms)
+        else:
+            if hessians_are_constant:
+                _build_histogram_no_hessian(feature_idx,
+                                            sample_indices, X_binned,
+                                            ordered_gradients, histograms)
+            else:
+                _build_histogram(feature_idx, sample_indices,
+                                 X_binned, ordered_gradients,
+                                 ordered_hessians, histograms)
+
+    def compute_histograms_subtraction(
+        HistogramBuilder self,
+        hist_struct [:, ::1] parent_histograms,        # IN and OUT
+        hist_struct [:, ::1] sibling_histograms,       # IN
+        const unsigned int [:] allowed_features=None,  # IN
+    ):
+        """Compute the histograms of the node using the subtraction trick.
+
+        hist(parent) = hist(left_child) + hist(right_child)
+
+        For a given feature, the complexity is O(n_bins). This is much more
+        efficient than compute_histograms_brute, but it's only possible for one
+        of the siblings.
+
+        Parameters
+        ----------
+        parent_histograms : ndarray of HISTOGRAM_DTYPE, \
+                shape (n_features, n_bins)
+            The histograms of the parent.
+        sibling_histograms : ndarray of HISTOGRAM_DTYPE, \
+                shape (n_features, n_bins)
+            The histograms of the sibling.
+        allowed_features : None or ndarray, dtype=np.uint32
+            Indices of the features that are allowed by interaction constraints to be
+            split.
+
+        Returns
+        -------
+        histograms : ndarray of HISTOGRAM_DTYPE, shape(n_features, n_bins)
+            The computed histograms of the current node.
+            We repurpose parent_histograms for this and don't need to allocate new
+            memory.
+        """
+
+        cdef:
+            int feature_idx
+            int f_idx
+            int n_allowed_features = self.n_features
+            bint has_interaction_cst = allowed_features is not None
+            int n_threads = self.n_threads
+
+        if has_interaction_cst:
+            n_allowed_features = allowed_features.shape[0]
+
+        # Compute histogram of each feature
+        for f_idx in prange(n_allowed_features, schedule='static', nogil=True,
+                            num_threads=n_threads):
+            if has_interaction_cst:
+                feature_idx = allowed_features[f_idx]
+            else:
+                feature_idx = f_idx
+
+            _subtract_histograms(
+                feature_idx,
+                self.n_bins,
+                parent_histograms,
+                sibling_histograms,
+            )
+        return parent_histograms
+
+
+cpdef void _build_histogram_naive(
+        const int feature_idx,
+        unsigned int [:] sample_indices,  # IN
+        X_BINNED_DTYPE_C [:] binned_feature,  # IN
+        G_H_DTYPE_C [:] ordered_gradients,  # IN
+        G_H_DTYPE_C [:] ordered_hessians,  # IN
+        hist_struct [:, :] out) noexcept nogil:  # OUT
+    """Build histogram in a naive way, without optimizing for cache hit.
+
+    Used in tests to compare with the optimized version."""
+    cdef:
+        unsigned int i
+        unsigned int n_samples = sample_indices.shape[0]
+        unsigned int sample_idx
+        unsigned int bin_idx
+
+    for i in range(n_samples):
+        sample_idx = sample_indices[i]
+        bin_idx = binned_feature[sample_idx]
+        out[feature_idx, bin_idx].sum_gradients += ordered_gradients[i]
+        out[feature_idx, bin_idx].sum_hessians += ordered_hessians[i]
+        out[feature_idx, bin_idx].count += 1
+
+
+cpdef void _subtract_histograms(
+        const int feature_idx,
+        unsigned int n_bins,
+        hist_struct [:, ::1] hist_a,  # IN and OUT
+        hist_struct [:, ::1] hist_b,  # IN
+) noexcept nogil:  # OUT
+    """compute hist_a = hist_a - hist_b"""
+    # Note that subtraction of large sums of floating point numbers, as we have here,
+    # can exhibit catastrophic cancallation. This is in particular true for gradients
+    # as they can be positive and negative, while hessians are non-negative.
+    # Remember that gradients and hessians are originally computed in
+    # G_H_DTYPE_C = float32 precision. Therefore, if sum_gradients and sum_hessians are
+    # float64, we don't loose precision. But if we also used float32 for summation, we
+    # would need to take care of floating point errors.
+    #
+    # Note that we could protect for negative hessians by setting:
+    #     sum_hessians = max(0, sum_hessians)
+    # But as we use float64 for summing float32, that's veeeery unlikely.
+    cdef:
+        unsigned int i = 0
+    for i in range(n_bins):
+        hist_a[feature_idx, i].sum_gradients -= hist_b[feature_idx, i].sum_gradients
+        hist_a[feature_idx, i].sum_hessians -= hist_b[feature_idx, i].sum_hessians
+        hist_a[feature_idx, i].count -= hist_b[feature_idx, i].count
+
+
+cpdef void _build_histogram(
+        const int feature_idx,
+        const unsigned int [::1] sample_indices,  # IN
+        const X_BINNED_DTYPE_C [::1] binned_feature,  # IN
+        const G_H_DTYPE_C [::1] ordered_gradients,  # IN
+        const G_H_DTYPE_C [::1] ordered_hessians,  # IN
+        hist_struct [:, ::1] out) noexcept nogil:  # OUT
+    """Return histogram for a given feature."""
+    cdef:
+        unsigned int i = 0
+        unsigned int n_node_samples = sample_indices.shape[0]
+        unsigned int unrolled_upper = (n_node_samples // 4) * 4
+
+        unsigned int bin_0
+        unsigned int bin_1
+        unsigned int bin_2
+        unsigned int bin_3
+        unsigned int bin_idx
+
+    for i in range(0, unrolled_upper, 4):
+        bin_0 = binned_feature[sample_indices[i]]
+        bin_1 = binned_feature[sample_indices[i + 1]]
+        bin_2 = binned_feature[sample_indices[i + 2]]
+        bin_3 = binned_feature[sample_indices[i + 3]]
+
+        out[feature_idx, bin_0].sum_gradients += ordered_gradients[i]
+        out[feature_idx, bin_1].sum_gradients += ordered_gradients[i + 1]
+        out[feature_idx, bin_2].sum_gradients += ordered_gradients[i + 2]
+        out[feature_idx, bin_3].sum_gradients += ordered_gradients[i + 3]
+
+        out[feature_idx, bin_0].sum_hessians += ordered_hessians[i]
+        out[feature_idx, bin_1].sum_hessians += ordered_hessians[i + 1]
+        out[feature_idx, bin_2].sum_hessians += ordered_hessians[i + 2]
+        out[feature_idx, bin_3].sum_hessians += ordered_hessians[i + 3]
+
+        out[feature_idx, bin_0].count += 1
+        out[feature_idx, bin_1].count += 1
+        out[feature_idx, bin_2].count += 1
+        out[feature_idx, bin_3].count += 1
+
+    for i in range(unrolled_upper, n_node_samples):
+        bin_idx = binned_feature[sample_indices[i]]
+        out[feature_idx, bin_idx].sum_gradients += ordered_gradients[i]
+        out[feature_idx, bin_idx].sum_hessians += ordered_hessians[i]
+        out[feature_idx, bin_idx].count += 1
+
+
+cpdef void _build_histogram_no_hessian(
+        const int feature_idx,
+        const unsigned int [::1] sample_indices,  # IN
+        const X_BINNED_DTYPE_C [::1] binned_feature,  # IN
+        const G_H_DTYPE_C [::1] ordered_gradients,  # IN
+        hist_struct [:, ::1] out) noexcept nogil:  # OUT
+    """Return histogram for a given feature, not updating hessians.
+
+    Used when the hessians of the loss are constant (typically LS loss).
+    """
+
+    cdef:
+        unsigned int i = 0
+        unsigned int n_node_samples = sample_indices.shape[0]
+        unsigned int unrolled_upper = (n_node_samples // 4) * 4
+
+        unsigned int bin_0
+        unsigned int bin_1
+        unsigned int bin_2
+        unsigned int bin_3
+        unsigned int bin_idx
+
+    for i in range(0, unrolled_upper, 4):
+        bin_0 = binned_feature[sample_indices[i]]
+        bin_1 = binned_feature[sample_indices[i + 1]]
+        bin_2 = binned_feature[sample_indices[i + 2]]
+        bin_3 = binned_feature[sample_indices[i + 3]]
+
+        out[feature_idx, bin_0].sum_gradients += ordered_gradients[i]
+        out[feature_idx, bin_1].sum_gradients += ordered_gradients[i + 1]
+        out[feature_idx, bin_2].sum_gradients += ordered_gradients[i + 2]
+        out[feature_idx, bin_3].sum_gradients += ordered_gradients[i + 3]
+
+        out[feature_idx, bin_0].count += 1
+        out[feature_idx, bin_1].count += 1
+        out[feature_idx, bin_2].count += 1
+        out[feature_idx, bin_3].count += 1
+
+    for i in range(unrolled_upper, n_node_samples):
+        bin_idx = binned_feature[sample_indices[i]]
+        out[feature_idx, bin_idx].sum_gradients += ordered_gradients[i]
+        out[feature_idx, bin_idx].count += 1
+
+
+cpdef void _build_histogram_root(
+        const int feature_idx,
+        const X_BINNED_DTYPE_C [::1] binned_feature,  # IN
+        const G_H_DTYPE_C [::1] all_gradients,  # IN
+        const G_H_DTYPE_C [::1] all_hessians,  # IN
+        hist_struct [:, ::1] out) noexcept nogil:  # OUT
+    """Compute histogram of the root node.
+
+    Unlike other nodes, the root node has to find the split among *all* the
+    samples from the training set. binned_feature and all_gradients /
+    all_hessians already have a consistent ordering.
+    """
+
+    cdef:
+        unsigned int i = 0
+        unsigned int n_samples = binned_feature.shape[0]
+        unsigned int unrolled_upper = (n_samples // 4) * 4
+
+        unsigned int bin_0
+        unsigned int bin_1
+        unsigned int bin_2
+        unsigned int bin_3
+        unsigned int bin_idx
+
+    for i in range(0, unrolled_upper, 4):
+
+        bin_0 = binned_feature[i]
+        bin_1 = binned_feature[i + 1]
+        bin_2 = binned_feature[i + 2]
+        bin_3 = binned_feature[i + 3]
+
+        out[feature_idx, bin_0].sum_gradients += all_gradients[i]
+        out[feature_idx, bin_1].sum_gradients += all_gradients[i + 1]
+        out[feature_idx, bin_2].sum_gradients += all_gradients[i + 2]
+        out[feature_idx, bin_3].sum_gradients += all_gradients[i + 3]
+
+        out[feature_idx, bin_0].sum_hessians += all_hessians[i]
+        out[feature_idx, bin_1].sum_hessians += all_hessians[i + 1]
+        out[feature_idx, bin_2].sum_hessians += all_hessians[i + 2]
+        out[feature_idx, bin_3].sum_hessians += all_hessians[i + 3]
+
+        out[feature_idx, bin_0].count += 1
+        out[feature_idx, bin_1].count += 1
+        out[feature_idx, bin_2].count += 1
+        out[feature_idx, bin_3].count += 1
+
+    for i in range(unrolled_upper, n_samples):
+        bin_idx = binned_feature[i]
+        out[feature_idx, bin_idx].sum_gradients += all_gradients[i]
+        out[feature_idx, bin_idx].sum_hessians += all_hessians[i]
+        out[feature_idx, bin_idx].count += 1
+
+
+cpdef void _build_histogram_root_no_hessian(
+        const int feature_idx,
+        const X_BINNED_DTYPE_C [::1] binned_feature,  # IN
+        const G_H_DTYPE_C [::1] all_gradients,  # IN
+        hist_struct [:, ::1] out) noexcept nogil:  # OUT
+    """Compute histogram of the root node, not updating hessians.
+
+    Used when the hessians of the loss are constant (typically LS loss).
+    """
+
+    cdef:
+        unsigned int i = 0
+        unsigned int n_samples = binned_feature.shape[0]
+        unsigned int unrolled_upper = (n_samples // 4) * 4
+
+        unsigned int bin_0
+        unsigned int bin_1
+        unsigned int bin_2
+        unsigned int bin_3
+        unsigned int bin_idx
+
+    for i in range(0, unrolled_upper, 4):
+        bin_0 = binned_feature[i]
+        bin_1 = binned_feature[i + 1]
+        bin_2 = binned_feature[i + 2]
+        bin_3 = binned_feature[i + 3]
+
+        out[feature_idx, bin_0].sum_gradients += all_gradients[i]
+        out[feature_idx, bin_1].sum_gradients += all_gradients[i + 1]
+        out[feature_idx, bin_2].sum_gradients += all_gradients[i + 2]
+        out[feature_idx, bin_3].sum_gradients += all_gradients[i + 3]
+
+        out[feature_idx, bin_0].count += 1
+        out[feature_idx, bin_1].count += 1
+        out[feature_idx, bin_2].count += 1
+        out[feature_idx, bin_3].count += 1
+
+    for i in range(unrolled_upper, n_samples):
+        bin_idx = binned_feature[i]
+        out[feature_idx, bin_idx].sum_gradients += all_gradients[i]
+        out[feature_idx, bin_idx].count += 1
--- a/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/meson.build
+++ b/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/meson.build
@@ -0,0 +1,20 @@
+hist_gradient_boosting_extension_metadata = {
+  '_gradient_boosting': {'sources': [cython_gen.process('_gradient_boosting.pyx')],
+                         'dependencies': [openmp_dep]},
+  'histogram': {'sources': [cython_gen.process('histogram.pyx')], 'dependencies': [openmp_dep]},
+  'splitting': {'sources': [cython_gen.process('splitting.pyx')], 'dependencies': [openmp_dep]},
+  '_binning': {'sources': [cython_gen.process('_binning.pyx')], 'dependencies': [openmp_dep]},
+  '_predictor': {'sources': [cython_gen.process('_predictor.pyx')], 'dependencies': [openmp_dep]},
+  '_bitset': {'sources': [cython_gen.process('_bitset.pyx')]},
+  'common': {'sources': [cython_gen.process('common.pyx')]},
+}
+
+foreach ext_name, ext_dict : hist_gradient_boosting_extension_metadata
+  py.extension_module(
+    ext_name,
+    ext_dict.get('sources'),
+    dependencies: ext_dict.get('dependencies', []),
+    subdir: 'sklearn/ensemble/_hist_gradient_boosting',
+    install: true
+  )
+endforeach
--- a/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/predictor.py
+++ b/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/predictor.py
@@ -0,0 +1,149 @@
+"""
+This module contains the TreePredictor class which is used for prediction.
+"""
+
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
+import numpy as np
+
+from sklearn.ensemble._hist_gradient_boosting._predictor import (
+    _compute_partial_dependence,
+    _predict_from_binned_data,
+    _predict_from_raw_data,
+)
+from sklearn.ensemble._hist_gradient_boosting.common import (
+    PREDICTOR_RECORD_DTYPE,
+    Y_DTYPE,
+)
+
+
+class TreePredictor:
+    """Tree class used for predictions.
+
+    Parameters
+    ----------
+    nodes : ndarray of PREDICTOR_RECORD_DTYPE
+        The nodes of the tree.
+    binned_left_cat_bitsets : ndarray of shape (n_categorical_splits, 8), dtype=uint32
+        Array of bitsets for binned categories used in predict_binned when a
+        split is categorical.
+    raw_left_cat_bitsets : ndarray of shape (n_categorical_splits, 8), dtype=uint32
+        Array of bitsets for raw categories used in predict when a split is
+        categorical.
+    """
+
+    def __init__(self, nodes, binned_left_cat_bitsets, raw_left_cat_bitsets):
+        self.nodes = nodes
+        self.binned_left_cat_bitsets = binned_left_cat_bitsets
+        self.raw_left_cat_bitsets = raw_left_cat_bitsets
+
+    def get_n_leaf_nodes(self):
+        """Return number of leaves."""
+        return int(self.nodes["is_leaf"].sum())
+
+    def get_max_depth(self):
+        """Return maximum depth among all leaves."""
+        return int(self.nodes["depth"].max())
+
+    def predict(self, X, known_cat_bitsets, f_idx_map, n_threads):
+        """Predict raw values for non-binned data.
+
+        Parameters
+        ----------
+        X : ndarray, shape (n_samples, n_features)
+            The input samples.
+
+        known_cat_bitsets : ndarray of shape (n_categorical_features, 8)
+            Array of bitsets of known categories, for each categorical feature.
+
+        f_idx_map : ndarray of shape (n_features,)
+            Map from original feature index to the corresponding index in the
+            known_cat_bitsets array.
+
+        n_threads : int
+            Number of OpenMP threads to use.
+
+        Returns
+        -------
+        y : ndarray, shape (n_samples,)
+            The raw predicted values.
+        """
+        out = np.empty(X.shape[0], dtype=Y_DTYPE)
+
+        _predict_from_raw_data(
+            self.nodes,
+            X,
+            self.raw_left_cat_bitsets,
+            known_cat_bitsets,
+            f_idx_map,
+            n_threads,
+            out,
+        )
+        return out
+
+    def predict_binned(self, X, missing_values_bin_idx, n_threads):
+        """Predict raw values for binned data.
+
+        Parameters
+        ----------
+        X : ndarray, shape (n_samples, n_features)
+            The input samples.
+        missing_values_bin_idx : uint8
+            Index of the bin that is used for missing values. This is the
+            index of the last bin and is always equal to max_bins (as passed
+            to the GBDT classes), or equivalently to n_bins - 1.
+        n_threads : int
+            Number of OpenMP threads to use.
+
+        Returns
+        -------
+        y : ndarray, shape (n_samples,)
+            The raw predicted values.
+        """
+        out = np.empty(X.shape[0], dtype=Y_DTYPE)
+        _predict_from_binned_data(
+            self.nodes,
+            X,
+            self.binned_left_cat_bitsets,
+            missing_values_bin_idx,
+            n_threads,
+            out,
+        )
+        return out
+
+    def compute_partial_dependence(self, grid, target_features, out):
+        """Fast partial dependence computation.
+
+        Parameters
+        ----------
+        grid : ndarray, shape (n_samples, n_target_features)
+            The grid points on which the partial dependence should be
+            evaluated.
+        target_features : ndarray, shape (n_target_features)
+            The set of target features for which the partial dependence
+            should be evaluated.
+        out : ndarray, shape (n_samples)
+            The value of the partial dependence function on each grid
+            point.
+        """
+        _compute_partial_dependence(self.nodes, grid, target_features, out)
+
+    def __setstate__(self, state):
+        try:
+            super().__setstate__(state)
+        except AttributeError:
+            self.__dict__.update(state)
+
+        # The dtype of feature_idx is np.intp which is platform dependent. Here, we
+        # make sure that saving and loading on different bitness systems works without
+        # errors. For instance, on a 64 bit Python runtime, np.intp = np.int64,
+        # while on 32 bit np.intp = np.int32.
+        #
+        # TODO: consider always using platform agnostic dtypes for fitted
+        # estimator attributes. For this particular estimator, this would
+        # mean replacing the intp field of PREDICTOR_RECORD_DTYPE by an int32
+        # field. Ideally this should be done consistently throughout
+        # scikit-learn along with a common test.
+        if self.nodes.dtype != PREDICTOR_RECORD_DTYPE:
+            self.nodes = self.nodes.astype(PREDICTOR_RECORD_DTYPE, casting="same_kind")
--- a/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/splitting.cpython-312-x86_64-linux-gnu.so
+++ b/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/splitting.cpython-312-x86_64-linux-gnu.so
--- a/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/splitting.pyx
+++ b/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/splitting.pyx
--- a/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/tests/init.py
+++ b/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/tests/init.py
--- a/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/tests/pycache/init.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/tests/pycache/init.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/tests/pycache/test_binning.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/tests/pycache/test_binning.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/tests/pycache/test_bitset.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/tests/pycache/test_bitset.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/tests/pycache/test_compare_lightgbm.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/tests/pycache/test_compare_lightgbm.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/tests/pycache/test_gradient_boosting.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/tests/pycache/test_gradient_boosting.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/tests/pycache/test_grower.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/tests/pycache/test_grower.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/tests/pycache/test_histogram.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/tests/pycache/test_histogram.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/tests/pycache/test_monotonic_constraints.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/tests/pycache/test_monotonic_constraints.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/tests/pycache/test_predictor.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/tests/pycache/test_predictor.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/tests/pycache/test_splitting.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/tests/pycache/test_splitting.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/tests/pycache/test_warm_start.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/tests/pycache/test_warm_start.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/tests/test_binning.py
+++ b/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/tests/test_binning.py
@@ -0,0 +1,489 @@
+import numpy as np
+import pytest
+from numpy.testing import assert_allclose, assert_array_equal
+
+from sklearn.ensemble._hist_gradient_boosting.binning import (
+    _BinMapper,
+    _find_binning_thresholds,
+    _map_to_bins,
+)
+from sklearn.ensemble._hist_gradient_boosting.common import (
+    ALMOST_INF,
+    X_BINNED_DTYPE,
+    X_DTYPE,
+)
+from sklearn.utils._openmp_helpers import _openmp_effective_n_threads
+
+n_threads = _openmp_effective_n_threads()
+
+
+DATA = (
+    np.random.RandomState(42)
+    .normal(loc=[0, 10], scale=[1, 0.01], size=(int(1e6), 2))
+    .astype(X_DTYPE)
+)
+
+
+def test_find_binning_thresholds_regular_data():
+    data = np.linspace(0, 10, 1001)
+    bin_thresholds = _find_binning_thresholds(data, max_bins=10)
+    assert_allclose(bin_thresholds, [1, 2, 3, 4, 5, 6, 7, 8, 9])
+
+    bin_thresholds = _find_binning_thresholds(data, max_bins=5)
+    assert_allclose(bin_thresholds, [2, 4, 6, 8])
+
+
+def test_find_binning_thresholds_small_regular_data():
+    data = np.linspace(0, 10, 11)
+
+    bin_thresholds = _find_binning_thresholds(data, max_bins=5)
+    assert_allclose(bin_thresholds, [2, 4, 6, 8])
+
+    bin_thresholds = _find_binning_thresholds(data, max_bins=10)
+    assert_allclose(bin_thresholds, [1, 2, 3, 4, 5, 6, 7, 8, 9])
+
+    bin_thresholds = _find_binning_thresholds(data, max_bins=11)
+    assert_allclose(bin_thresholds, np.arange(10) + 0.5)
+
+    bin_thresholds = _find_binning_thresholds(data, max_bins=255)
+    assert_allclose(bin_thresholds, np.arange(10) + 0.5)
+
+
+def test_find_binning_thresholds_random_data():
+    bin_thresholds = [
+        _find_binning_thresholds(DATA[:, i], max_bins=255) for i in range(2)
+    ]
+    for i in range(len(bin_thresholds)):
+        assert bin_thresholds[i].shape == (254,)  # 255 - 1
+        assert bin_thresholds[i].dtype == DATA.dtype
+
+    assert_allclose(
+        bin_thresholds[0][[64, 128, 192]], np.array([-0.7, 0.0, 0.7]), atol=1e-1
+    )
+
+    assert_allclose(
+        bin_thresholds[1][[64, 128, 192]], np.array([9.99, 10.00, 10.01]), atol=1e-2
+    )
+
+
+def test_find_binning_thresholds_low_n_bins():
+    bin_thresholds = [
+        _find_binning_thresholds(DATA[:, i], max_bins=128) for i in range(2)
+    ]
+    for i in range(len(bin_thresholds)):
+        assert bin_thresholds[i].shape == (127,)  # 128 - 1
+        assert bin_thresholds[i].dtype == DATA.dtype
+
+
+@pytest.mark.parametrize("n_bins", (2, 257))
+def test_invalid_n_bins(n_bins):
+    err_msg = "n_bins={} should be no smaller than 3 and no larger than 256".format(
+        n_bins
+    )
+    with pytest.raises(ValueError, match=err_msg):
+        _BinMapper(n_bins=n_bins).fit(DATA)
+
+
+def test_bin_mapper_n_features_transform():
+    mapper = _BinMapper(n_bins=42, random_state=42).fit(DATA)
+    err_msg = "This estimator was fitted with 2 features but 4 got passed"
+    with pytest.raises(ValueError, match=err_msg):
+        mapper.transform(np.repeat(DATA, 2, axis=1))
+
+
+@pytest.mark.parametrize("max_bins", [16, 128, 255])
+def test_map_to_bins(max_bins):
+    bin_thresholds = [
+        _find_binning_thresholds(DATA[:, i], max_bins=max_bins) for i in range(2)
+    ]
+    binned = np.zeros_like(DATA, dtype=X_BINNED_DTYPE, order="F")
+    is_categorical = np.zeros(2, dtype=np.uint8)
+    last_bin_idx = max_bins
+    _map_to_bins(DATA, bin_thresholds, is_categorical, last_bin_idx, n_threads, binned)
+    assert binned.shape == DATA.shape
+    assert binned.dtype == np.uint8
+    assert binned.flags.f_contiguous
+
+    min_indices = DATA.argmin(axis=0)
+    max_indices = DATA.argmax(axis=0)
+
+    for feature_idx, min_idx in enumerate(min_indices):
+        assert binned[min_idx, feature_idx] == 0
+    for feature_idx, max_idx in enumerate(max_indices):
+        assert binned[max_idx, feature_idx] == max_bins - 1
+
+
+@pytest.mark.parametrize("max_bins", [5, 10, 42])
+def test_bin_mapper_random_data(max_bins):
+    n_samples, n_features = DATA.shape
+
+    expected_count_per_bin = n_samples // max_bins
+    tol = int(0.05 * expected_count_per_bin)
+
+    # max_bins is the number of bins for non-missing values
+    n_bins = max_bins + 1
+    mapper = _BinMapper(n_bins=n_bins, random_state=42).fit(DATA)
+    binned = mapper.transform(DATA)
+
+    assert binned.shape == (n_samples, n_features)
+    assert binned.dtype == np.uint8
+    assert_array_equal(binned.min(axis=0), np.array([0, 0]))
+    assert_array_equal(binned.max(axis=0), np.array([max_bins - 1, max_bins - 1]))
+    assert len(mapper.bin_thresholds_) == n_features
+    for bin_thresholds_feature in mapper.bin_thresholds_:
+        assert bin_thresholds_feature.shape == (max_bins - 1,)
+        assert bin_thresholds_feature.dtype == DATA.dtype
+    assert np.all(mapper.n_bins_non_missing_ == max_bins)
+
+    # Check that the binned data is approximately balanced across bins.
+    for feature_idx in range(n_features):
+        for bin_idx in range(max_bins):
+            count = (binned[:, feature_idx] == bin_idx).sum()
+            assert abs(count - expected_count_per_bin) < tol
+
+
+@pytest.mark.parametrize("n_samples, max_bins", [(5, 5), (5, 10), (5, 11), (42, 255)])
+def test_bin_mapper_small_random_data(n_samples, max_bins):
+    data = np.random.RandomState(42).normal(size=n_samples).reshape(-1, 1)
+    assert len(np.unique(data)) == n_samples
+
+    # max_bins is the number of bins for non-missing values
+    n_bins = max_bins + 1
+    mapper = _BinMapper(n_bins=n_bins, random_state=42)
+    binned = mapper.fit_transform(data)
+
+    assert binned.shape == data.shape
+    assert binned.dtype == np.uint8
+    assert_array_equal(binned.ravel()[np.argsort(data.ravel())], np.arange(n_samples))
+
+
+@pytest.mark.parametrize(
+    "max_bins, n_distinct, multiplier",
+    [
+        (5, 5, 1),
+        (5, 5, 3),
+        (255, 12, 42),
+    ],
+)
+def test_bin_mapper_identity_repeated_values(max_bins, n_distinct, multiplier):
+    data = np.array(list(range(n_distinct)) * multiplier).reshape(-1, 1)
+    # max_bins is the number of bins for non-missing values
+    n_bins = max_bins + 1
+    binned = _BinMapper(n_bins=n_bins).fit_transform(data)
+    assert_array_equal(data, binned)
+
+
+@pytest.mark.parametrize("n_distinct", [2, 7, 42])
+def test_bin_mapper_repeated_values_invariance(n_distinct):
+    rng = np.random.RandomState(42)
+    distinct_values = rng.normal(size=n_distinct)
+    assert len(np.unique(distinct_values)) == n_distinct
+
+    repeated_indices = rng.randint(low=0, high=n_distinct, size=1000)
+    data = distinct_values[repeated_indices]
+    rng.shuffle(data)
+    assert_array_equal(np.unique(data), np.sort(distinct_values))
+
+    data = data.reshape(-1, 1)
+
+    mapper_1 = _BinMapper(n_bins=n_distinct + 1)
+    binned_1 = mapper_1.fit_transform(data)
+    assert_array_equal(np.unique(binned_1[:, 0]), np.arange(n_distinct))
+
+    # Adding more bins to the mapper yields the same results (same thresholds)
+    mapper_2 = _BinMapper(n_bins=min(256, n_distinct * 3) + 1)
+    binned_2 = mapper_2.fit_transform(data)
+
+    assert_allclose(mapper_1.bin_thresholds_[0], mapper_2.bin_thresholds_[0])
+    assert_array_equal(binned_1, binned_2)
+
+
+@pytest.mark.parametrize(
+    "max_bins, scale, offset",
+    [
+        (3, 2, -1),
+        (42, 1, 0),
+        (255, 0.3, 42),
+    ],
+)
+def test_bin_mapper_identity_small(max_bins, scale, offset):
+    data = np.arange(max_bins).reshape(-1, 1) * scale + offset
+    # max_bins is the number of bins for non-missing values
+    n_bins = max_bins + 1
+    binned = _BinMapper(n_bins=n_bins).fit_transform(data)
+    assert_array_equal(binned, np.arange(max_bins).reshape(-1, 1))
+
+
+@pytest.mark.parametrize(
+    "max_bins_small, max_bins_large",
+    [
+        (2, 2),
+        (3, 3),
+        (4, 4),
+        (42, 42),
+        (255, 255),
+        (5, 17),
+        (42, 255),
+    ],
+)
+def test_bin_mapper_idempotence(max_bins_small, max_bins_large):
+    assert max_bins_large >= max_bins_small
+    data = np.random.RandomState(42).normal(size=30000).reshape(-1, 1)
+    mapper_small = _BinMapper(n_bins=max_bins_small + 1)
+    mapper_large = _BinMapper(n_bins=max_bins_small + 1)
+    binned_small = mapper_small.fit_transform(data)
+    binned_large = mapper_large.fit_transform(binned_small)
+    assert_array_equal(binned_small, binned_large)
+
+
+@pytest.mark.parametrize("n_bins", [10, 100, 256])
+@pytest.mark.parametrize("diff", [-5, 0, 5])
+def test_n_bins_non_missing(n_bins, diff):
+    # Check that n_bins_non_missing is n_unique_values when
+    # there are not a lot of unique values, else n_bins - 1.
+
+    n_unique_values = n_bins + diff
+    X = list(range(n_unique_values)) * 2
+    X = np.array(X).reshape(-1, 1)
+    mapper = _BinMapper(n_bins=n_bins).fit(X)
+    assert np.all(mapper.n_bins_non_missing_ == min(n_bins - 1, n_unique_values))
+
+
+def test_subsample():
+    # Make sure bin thresholds are different when applying subsampling
+    mapper_no_subsample = _BinMapper(subsample=None, random_state=0).fit(DATA)
+    mapper_subsample = _BinMapper(subsample=256, random_state=0).fit(DATA)
+
+    for feature in range(DATA.shape[1]):
+        assert not np.allclose(
+            mapper_no_subsample.bin_thresholds_[feature],
+            mapper_subsample.bin_thresholds_[feature],
+            rtol=1e-4,
+        )
+
+
+@pytest.mark.parametrize(
+    "n_bins, n_bins_non_missing, X_trans_expected",
+    [
+        (
+            256,
+            [4, 2, 2],
+            [
+                [0, 0, 0],  # 255 <=> missing value
+                [255, 255, 0],
+                [1, 0, 0],
+                [255, 1, 1],
+                [2, 1, 1],
+                [3, 0, 0],
+            ],
+        ),
+        (
+            3,
+            [2, 2, 2],
+            [
+                [0, 0, 0],  # 2 <=> missing value
+                [2, 2, 0],
+                [0, 0, 0],
+                [2, 1, 1],
+                [1, 1, 1],
+                [1, 0, 0],
+            ],
+        ),
+    ],
+)
+def test_missing_values_support(n_bins, n_bins_non_missing, X_trans_expected):
+    # check for missing values: make sure nans are mapped to the last bin
+    # and that the _BinMapper attributes are correct
+
+    X = [
+        [1, 1, 0],
+        [np.nan, np.nan, 0],
+        [2, 1, 0],
+        [np.nan, 2, 1],
+        [3, 2, 1],
+        [4, 1, 0],
+    ]
+
+    X = np.array(X)
+
+    mapper = _BinMapper(n_bins=n_bins)
+    mapper.fit(X)
+
+    assert_array_equal(mapper.n_bins_non_missing_, n_bins_non_missing)
+
+    for feature_idx in range(X.shape[1]):
+        assert (
+            len(mapper.bin_thresholds_[feature_idx])
+            == n_bins_non_missing[feature_idx] - 1
+        )
+
+    assert mapper.missing_values_bin_idx_ == n_bins - 1
+
+    X_trans = mapper.transform(X)
+    assert_array_equal(X_trans, X_trans_expected)
+
+
+def test_infinite_values():
+    # Make sure infinite values are properly handled.
+    bin_mapper = _BinMapper()
+
+    X = np.array([-np.inf, 0, 1, np.inf]).reshape(-1, 1)
+
+    bin_mapper.fit(X)
+    assert_allclose(bin_mapper.bin_thresholds_[0], [-np.inf, 0.5, ALMOST_INF])
+    assert bin_mapper.n_bins_non_missing_ == [4]
+
+    expected_binned_X = np.array([0, 1, 2, 3]).reshape(-1, 1)
+    assert_array_equal(bin_mapper.transform(X), expected_binned_X)
+
+
+@pytest.mark.parametrize("n_bins", [15, 256])
+def test_categorical_feature(n_bins):
+    # Basic test for categorical features
+    # we make sure that categories are mapped into [0, n_categories - 1] and
+    # that nans are mapped to the last bin
+    X = np.array(
+        [[4] * 500 + [1] * 3 + [10] * 4 + [0] * 4 + [13] + [7] * 5 + [np.nan] * 2],
+        dtype=X_DTYPE,
+    ).T
+    known_categories = [np.unique(X[~np.isnan(X)])]
+
+    bin_mapper = _BinMapper(
+        n_bins=n_bins,
+        is_categorical=np.array([True]),
+        known_categories=known_categories,
+    ).fit(X)
+    assert bin_mapper.n_bins_non_missing_ == [6]
+    assert_array_equal(bin_mapper.bin_thresholds_[0], [0, 1, 4, 7, 10, 13])
+
+    X = np.array([[0, 1, 4, np.nan, 7, 10, 13]], dtype=X_DTYPE).T
+    expected_trans = np.array([[0, 1, 2, n_bins - 1, 3, 4, 5]]).T
+    assert_array_equal(bin_mapper.transform(X), expected_trans)
+
+    # Negative categories are mapped to the missing values' bin
+    # (i.e. the bin of index `missing_values_bin_idx_ == n_bins - 1).
+    # Unknown positive categories does not happen in practice and tested
+    # for illustration purpose.
+    X = np.array([[-4, -1, 100]], dtype=X_DTYPE).T
+    expected_trans = np.array([[n_bins - 1, n_bins - 1, 6]]).T
+    assert_array_equal(bin_mapper.transform(X), expected_trans)
+
+
+def test_categorical_feature_negative_missing():
+    """Make sure bin mapper treats negative categories as missing values."""
+    X = np.array(
+        [[4] * 500 + [1] * 3 + [5] * 10 + [-1] * 3 + [np.nan] * 4], dtype=X_DTYPE
+    ).T
+    bin_mapper = _BinMapper(
+        n_bins=4,
+        is_categorical=np.array([True]),
+        known_categories=[np.array([1, 4, 5], dtype=X_DTYPE)],
+    ).fit(X)
+
+    assert bin_mapper.n_bins_non_missing_ == [3]
+
+    X = np.array([[-1, 1, 3, 5, np.nan]], dtype=X_DTYPE).T
+
+    # Negative values for categorical features are considered as missing values.
+    # They are mapped to the bin of index `bin_mapper.missing_values_bin_idx_`,
+    # which is 3 here.
+    assert bin_mapper.missing_values_bin_idx_ == 3
+    expected_trans = np.array([[3, 0, 1, 2, 3]]).T
+    assert_array_equal(bin_mapper.transform(X), expected_trans)
+
+
+@pytest.mark.parametrize("n_bins", (128, 256))
+def test_categorical_with_numerical_features(n_bins):
+    # basic check for binmapper with mixed data
+    X1 = np.arange(10, 20).reshape(-1, 1)  # numerical
+    X2 = np.arange(10, 15).reshape(-1, 1)  # categorical
+    X2 = np.r_[X2, X2]
+    X = np.c_[X1, X2]
+    known_categories = [None, np.unique(X2).astype(X_DTYPE)]
+
+    bin_mapper = _BinMapper(
+        n_bins=n_bins,
+        is_categorical=np.array([False, True]),
+        known_categories=known_categories,
+    ).fit(X)
+
+    assert_array_equal(bin_mapper.n_bins_non_missing_, [10, 5])
+
+    bin_thresholds = bin_mapper.bin_thresholds_
+    assert len(bin_thresholds) == 2
+    assert_array_equal(bin_thresholds[1], np.arange(10, 15))
+
+    expected_X_trans = [
+        [0, 0],
+        [1, 1],
+        [2, 2],
+        [3, 3],
+        [4, 4],
+        [5, 0],
+        [6, 1],
+        [7, 2],
+        [8, 3],
+        [9, 4],
+    ]
+    assert_array_equal(bin_mapper.transform(X), expected_X_trans)
+
+
+def test_make_known_categories_bitsets():
+    # Check the output of make_known_categories_bitsets
+    X = np.array(
+        [[14, 2, 30], [30, 4, 70], [40, 10, 180], [40, 240, 180]], dtype=X_DTYPE
+    )
+
+    bin_mapper = _BinMapper(
+        n_bins=256,
+        is_categorical=np.array([False, True, True]),
+        known_categories=[None, X[:, 1], X[:, 2]],
+    )
+    bin_mapper.fit(X)
+
+    known_cat_bitsets, f_idx_map = bin_mapper.make_known_categories_bitsets()
+
+    # Note that for non-categorical features, values are left to 0
+    expected_f_idx_map = np.array([0, 0, 1], dtype=np.uint8)
+    assert_allclose(expected_f_idx_map, f_idx_map)
+
+    expected_cat_bitset = np.zeros((2, 8), dtype=np.uint32)
+
+    # first categorical feature: [2, 4, 10, 240]
+    f_idx = 1
+    mapped_f_idx = f_idx_map[f_idx]
+    expected_cat_bitset[mapped_f_idx, 0] = 2**2 + 2**4 + 2**10
+    # 240 = 32**7 + 16, therefore the 16th bit of the 7th array is 1.
+    expected_cat_bitset[mapped_f_idx, 7] = 2**16
+
+    # second categorical feature [30, 70, 180]
+    f_idx = 2
+    mapped_f_idx = f_idx_map[f_idx]
+    expected_cat_bitset[mapped_f_idx, 0] = 2**30
+    expected_cat_bitset[mapped_f_idx, 2] = 2**6
+    expected_cat_bitset[mapped_f_idx, 5] = 2**20
+
+    assert_allclose(expected_cat_bitset, known_cat_bitsets)
+
+
+@pytest.mark.parametrize(
+    "is_categorical, known_categories, match",
+    [
+        (np.array([True]), [None], "Known categories for feature 0 must be provided"),
+        (
+            np.array([False]),
+            np.array([1, 2, 3]),
+            "isn't marked as a categorical feature, but categories were passed",
+        ),
+    ],
+)
+def test_categorical_parameters(is_categorical, known_categories, match):
+    # test the validation of the is_categorical and known_categories parameters
+
+    X = np.array([[1, 2, 3]], dtype=X_DTYPE)
+
+    bin_mapper = _BinMapper(
+        is_categorical=is_categorical, known_categories=known_categories
+    )
+    with pytest.raises(ValueError, match=match):
+        bin_mapper.fit(X)
--- a/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/tests/test_bitset.py
+++ b/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/tests/test_bitset.py
@@ -0,0 +1,64 @@
+import numpy as np
+import pytest
+from numpy.testing import assert_allclose
+
+from sklearn.ensemble._hist_gradient_boosting._bitset import (
+    in_bitset_memoryview,
+    set_bitset_memoryview,
+    set_raw_bitset_from_binned_bitset,
+)
+from sklearn.ensemble._hist_gradient_boosting.common import X_DTYPE
+
+
+@pytest.mark.parametrize(
+    "values_to_insert, expected_bitset",
+    [
+        ([0, 4, 33], np.array([2**0 + 2**4, 2**1, 0], dtype=np.uint32)),
+        (
+            [31, 32, 33, 79],
+            np.array([2**31, 2**0 + 2**1, 2**15], dtype=np.uint32),
+        ),
+    ],
+)
+def test_set_get_bitset(values_to_insert, expected_bitset):
+    n_32bits_ints = 3
+    bitset = np.zeros(n_32bits_ints, dtype=np.uint32)
+    for value in values_to_insert:
+        set_bitset_memoryview(bitset, value)
+    assert_allclose(expected_bitset, bitset)
+    for value in range(32 * n_32bits_ints):
+        if value in values_to_insert:
+            assert in_bitset_memoryview(bitset, value)
+        else:
+            assert not in_bitset_memoryview(bitset, value)
+
+
+@pytest.mark.parametrize(
+    "raw_categories, binned_cat_to_insert, expected_raw_bitset",
+    [
+        (
+            [3, 4, 5, 10, 31, 32, 43],
+            [0, 2, 4, 5, 6],
+            [2**3 + 2**5 + 2**31, 2**0 + 2**11],
+        ),
+        ([3, 33, 50, 52], [1, 3], [0, 2**1 + 2**20]),
+    ],
+)
+def test_raw_bitset_from_binned_bitset(
+    raw_categories, binned_cat_to_insert, expected_raw_bitset
+):
+    binned_bitset = np.zeros(2, dtype=np.uint32)
+    raw_bitset = np.zeros(2, dtype=np.uint32)
+    raw_categories = np.asarray(raw_categories, dtype=X_DTYPE)
+
+    for val in binned_cat_to_insert:
+        set_bitset_memoryview(binned_bitset, val)
+
+    set_raw_bitset_from_binned_bitset(raw_bitset, binned_bitset, raw_categories)
+
+    assert_allclose(expected_raw_bitset, raw_bitset)
+    for binned_cat_val, raw_cat_val in enumerate(raw_categories):
+        if binned_cat_val in binned_cat_to_insert:
+            assert in_bitset_memoryview(raw_bitset, raw_cat_val)
+        else:
+            assert not in_bitset_memoryview(raw_bitset, raw_cat_val)
--- a/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/tests/test_compare_lightgbm.py
+++ b/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/tests/test_compare_lightgbm.py
@@ -0,0 +1,279 @@
+import numpy as np
+import pytest
+
+from sklearn.datasets import make_classification, make_regression
+from sklearn.ensemble import (
+    HistGradientBoostingClassifier,
+    HistGradientBoostingRegressor,
+)
+from sklearn.ensemble._hist_gradient_boosting.binning import _BinMapper
+from sklearn.ensemble._hist_gradient_boosting.utils import get_equivalent_estimator
+from sklearn.metrics import accuracy_score
+from sklearn.model_selection import train_test_split
+
+
+@pytest.mark.parametrize("seed", range(5))
+@pytest.mark.parametrize(
+    "loss",
+    [
+        "squared_error",
+        "poisson",
+        pytest.param(
+            "gamma",
+            marks=pytest.mark.skip("LightGBM with gamma loss has larger deviation."),
+        ),
+    ],
+)
+@pytest.mark.parametrize("min_samples_leaf", (1, 20))
+@pytest.mark.parametrize(
+    "n_samples, max_leaf_nodes",
+    [
+        (255, 4096),
+        (1000, 8),
+    ],
+)
+def test_same_predictions_regression(
+    seed, loss, min_samples_leaf, n_samples, max_leaf_nodes
+):
+    # Make sure sklearn has the same predictions as lightgbm for easy targets.
+    #
+    # In particular when the size of the trees are bound and the number of
+    # samples is large enough, the structure of the prediction trees found by
+    # LightGBM and sklearn should be exactly identical.
+    #
+    # Notes:
+    # - Several candidate splits may have equal gains when the number of
+    #   samples in a node is low (and because of float errors). Therefore the
+    #   predictions on the test set might differ if the structure of the tree
+    #   is not exactly the same. To avoid this issue we only compare the
+    #   predictions on the test set when the number of samples is large enough
+    #   and max_leaf_nodes is low enough.
+    # - To ignore discrepancies caused by small differences in the binning
+    #   strategy, data is pre-binned if n_samples > 255.
+    # - We don't check the absolute_error loss here. This is because
+    #   LightGBM's computation of the median (used for the initial value of
+    #   raw_prediction) is a bit off (they'll e.g. return midpoints when there
+    #   is no need to.). Since these tests only run 1 iteration, the
+    #   discrepancy between the initial values leads to biggish differences in
+    #   the predictions. These differences are much smaller with more
+    #   iterations.
+    pytest.importorskip("lightgbm")
+
+    rng = np.random.RandomState(seed=seed)
+    max_iter = 1
+    max_bins = 255
+
+    X, y = make_regression(
+        n_samples=n_samples, n_features=5, n_informative=5, random_state=0
+    )
+
+    if loss in ("gamma", "poisson"):
+        # make the target positive
+        y = np.abs(y) + np.mean(np.abs(y))
+
+    if n_samples > 255:
+        # bin data and convert it to float32 so that the estimator doesn't
+        # treat it as pre-binned
+        X = _BinMapper(n_bins=max_bins + 1).fit_transform(X).astype(np.float32)
+
+    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=rng)
+
+    est_sklearn = HistGradientBoostingRegressor(
+        loss=loss,
+        max_iter=max_iter,
+        max_bins=max_bins,
+        learning_rate=1,
+        early_stopping=False,
+        min_samples_leaf=min_samples_leaf,
+        max_leaf_nodes=max_leaf_nodes,
+    )
+    est_lightgbm = get_equivalent_estimator(est_sklearn, lib="lightgbm")
+    est_lightgbm.set_params(min_sum_hessian_in_leaf=0)
+
+    est_lightgbm.fit(X_train, y_train)
+    est_sklearn.fit(X_train, y_train)
+
+    # We need X to be treated a numerical data, not pre-binned data.
+    X_train, X_test = X_train.astype(np.float32), X_test.astype(np.float32)
+
+    pred_lightgbm = est_lightgbm.predict(X_train)
+    pred_sklearn = est_sklearn.predict(X_train)
+    if loss in ("gamma", "poisson"):
+        # More than 65% of the predictions must be close up to the 2nd decimal.
+        # TODO: We are not entirely satisfied with this lax comparison, but the root
+        # cause is not clear, maybe algorithmic differences. One such example is the
+        # poisson_max_delta_step parameter of LightGBM which does not exist in HGBT.
+        assert (
+            np.mean(np.isclose(pred_lightgbm, pred_sklearn, rtol=1e-2, atol=1e-2))
+            > 0.65
+        )
+    else:
+        # Less than 1% of the predictions may deviate more than 1e-3 in relative terms.
+        assert np.mean(np.isclose(pred_lightgbm, pred_sklearn, rtol=1e-3)) > 1 - 0.01
+
+    if max_leaf_nodes < 10 and n_samples >= 1000 and loss in ("squared_error",):
+        pred_lightgbm = est_lightgbm.predict(X_test)
+        pred_sklearn = est_sklearn.predict(X_test)
+        # Less than 1% of the predictions may deviate more than 1e-4 in relative terms.
+        assert np.mean(np.isclose(pred_lightgbm, pred_sklearn, rtol=1e-4)) > 1 - 0.01
+
+
+@pytest.mark.parametrize("seed", range(5))
+@pytest.mark.parametrize("min_samples_leaf", (1, 20))
+@pytest.mark.parametrize(
+    "n_samples, max_leaf_nodes",
+    [
+        (255, 4096),
+        (1000, 8),
+    ],
+)
+def test_same_predictions_classification(
+    seed, min_samples_leaf, n_samples, max_leaf_nodes
+):
+    # Same as test_same_predictions_regression but for classification
+    pytest.importorskip("lightgbm")
+
+    rng = np.random.RandomState(seed=seed)
+    max_iter = 1
+    n_classes = 2
+    max_bins = 255
+
+    X, y = make_classification(
+        n_samples=n_samples,
+        n_classes=n_classes,
+        n_features=5,
+        n_informative=5,
+        n_redundant=0,
+        random_state=0,
+    )
+
+    if n_samples > 255:
+        # bin data and convert it to float32 so that the estimator doesn't
+        # treat it as pre-binned
+        X = _BinMapper(n_bins=max_bins + 1).fit_transform(X).astype(np.float32)
+
+    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=rng)
+
+    est_sklearn = HistGradientBoostingClassifier(
+        loss="log_loss",
+        max_iter=max_iter,
+        max_bins=max_bins,
+        learning_rate=1,
+        early_stopping=False,
+        min_samples_leaf=min_samples_leaf,
+        max_leaf_nodes=max_leaf_nodes,
+    )
+    est_lightgbm = get_equivalent_estimator(
+        est_sklearn, lib="lightgbm", n_classes=n_classes
+    )
+
+    est_lightgbm.fit(X_train, y_train)
+    est_sklearn.fit(X_train, y_train)
+
+    # We need X to be treated a numerical data, not pre-binned data.
+    X_train, X_test = X_train.astype(np.float32), X_test.astype(np.float32)
+
+    pred_lightgbm = est_lightgbm.predict(X_train)
+    pred_sklearn = est_sklearn.predict(X_train)
+    assert np.mean(pred_sklearn == pred_lightgbm) > 0.89
+
+    acc_lightgbm = accuracy_score(y_train, pred_lightgbm)
+    acc_sklearn = accuracy_score(y_train, pred_sklearn)
+    np.testing.assert_almost_equal(acc_lightgbm, acc_sklearn)
+
+    if max_leaf_nodes < 10 and n_samples >= 1000:
+        pred_lightgbm = est_lightgbm.predict(X_test)
+        pred_sklearn = est_sklearn.predict(X_test)
+        assert np.mean(pred_sklearn == pred_lightgbm) > 0.89
+
+        acc_lightgbm = accuracy_score(y_test, pred_lightgbm)
+        acc_sklearn = accuracy_score(y_test, pred_sklearn)
+        np.testing.assert_almost_equal(acc_lightgbm, acc_sklearn, decimal=2)
+
+
+@pytest.mark.parametrize("seed", range(5))
+@pytest.mark.parametrize("min_samples_leaf", (1, 20))
+@pytest.mark.parametrize(
+    "n_samples, max_leaf_nodes",
+    [
+        (255, 4096),
+        (10000, 8),
+    ],
+)
+def test_same_predictions_multiclass_classification(
+    seed, min_samples_leaf, n_samples, max_leaf_nodes
+):
+    # Same as test_same_predictions_regression but for classification
+    pytest.importorskip("lightgbm")
+
+    rng = np.random.RandomState(seed=seed)
+    n_classes = 3
+    max_iter = 1
+    max_bins = 255
+    lr = 1
+
+    X, y = make_classification(
+        n_samples=n_samples,
+        n_classes=n_classes,
+        n_features=5,
+        n_informative=5,
+        n_redundant=0,
+        n_clusters_per_class=1,
+        random_state=0,
+    )
+
+    if n_samples > 255:
+        # bin data and convert it to float32 so that the estimator doesn't
+        # treat it as pre-binned
+        X = _BinMapper(n_bins=max_bins + 1).fit_transform(X).astype(np.float32)
+
+    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=rng)
+
+    est_sklearn = HistGradientBoostingClassifier(
+        loss="log_loss",
+        max_iter=max_iter,
+        max_bins=max_bins,
+        learning_rate=lr,
+        early_stopping=False,
+        min_samples_leaf=min_samples_leaf,
+        max_leaf_nodes=max_leaf_nodes,
+    )
+    est_lightgbm = get_equivalent_estimator(
+        est_sklearn, lib="lightgbm", n_classes=n_classes
+    )
+
+    est_lightgbm.fit(X_train, y_train)
+    est_sklearn.fit(X_train, y_train)
+
+    # We need X to be treated a numerical data, not pre-binned data.
+    X_train, X_test = X_train.astype(np.float32), X_test.astype(np.float32)
+
+    pred_lightgbm = est_lightgbm.predict(X_train)
+    pred_sklearn = est_sklearn.predict(X_train)
+    assert np.mean(pred_sklearn == pred_lightgbm) > 0.89
+
+    proba_lightgbm = est_lightgbm.predict_proba(X_train)
+    proba_sklearn = est_sklearn.predict_proba(X_train)
+    # assert more than 75% of the predicted probabilities are the same up to
+    # the second decimal
+    assert np.mean(np.abs(proba_lightgbm - proba_sklearn) < 1e-2) > 0.75
+
+    acc_lightgbm = accuracy_score(y_train, pred_lightgbm)
+    acc_sklearn = accuracy_score(y_train, pred_sklearn)
+
+    np.testing.assert_allclose(acc_lightgbm, acc_sklearn, rtol=0, atol=5e-2)
+
+    if max_leaf_nodes < 10 and n_samples >= 1000:
+        pred_lightgbm = est_lightgbm.predict(X_test)
+        pred_sklearn = est_sklearn.predict(X_test)
+        assert np.mean(pred_sklearn == pred_lightgbm) > 0.89
+
+        proba_lightgbm = est_lightgbm.predict_proba(X_train)
+        proba_sklearn = est_sklearn.predict_proba(X_train)
+        # assert more than 75% of the predicted probabilities are the same up
+        # to the second decimal
+        assert np.mean(np.abs(proba_lightgbm - proba_sklearn) < 1e-2) > 0.75
+
+        acc_lightgbm = accuracy_score(y_test, pred_lightgbm)
+        acc_sklearn = accuracy_score(y_test, pred_sklearn)
+        np.testing.assert_almost_equal(acc_lightgbm, acc_sklearn, decimal=2)
--- a/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py
+++ b/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/tests/test_gradient_boosting.py
--- a/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/tests/test_grower.py
+++ b/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/tests/test_grower.py
@@ -0,0 +1,650 @@
+import numpy as np
+import pytest
+from numpy.testing import assert_allclose, assert_array_equal
+from pytest import approx
+
+from sklearn.ensemble._hist_gradient_boosting.binning import _BinMapper
+from sklearn.ensemble._hist_gradient_boosting.common import (
+    G_H_DTYPE,
+    X_BINNED_DTYPE,
+    X_BITSET_INNER_DTYPE,
+    X_DTYPE,
+    Y_DTYPE,
+)
+from sklearn.ensemble._hist_gradient_boosting.grower import TreeGrower
+from sklearn.preprocessing import OneHotEncoder
+from sklearn.utils._openmp_helpers import _openmp_effective_n_threads
+
+n_threads = _openmp_effective_n_threads()
+
+
+def _make_training_data(n_bins=256, constant_hessian=True):
+    rng = np.random.RandomState(42)
+    n_samples = 10000
+
+    # Generate some test data directly binned so as to test the grower code
+    # independently of the binning logic.
+    X_binned = rng.randint(0, n_bins - 1, size=(n_samples, 2), dtype=X_BINNED_DTYPE)
+    X_binned = np.asfortranarray(X_binned)
+
+    def true_decision_function(input_features):
+        """Ground truth decision function
+
+        This is a very simple yet asymmetric decision tree. Therefore the
+        grower code should have no trouble recovering the decision function
+        from 10000 training samples.
+        """
+        if input_features[0] <= n_bins // 2:
+            return -1
+        else:
+            return -1 if input_features[1] <= n_bins // 3 else 1
+
+    target = np.array([true_decision_function(x) for x in X_binned], dtype=Y_DTYPE)
+
+    # Assume a square loss applied to an initial model that always predicts 0
+    # (hardcoded for this test):
+    all_gradients = target.astype(G_H_DTYPE)
+    shape_hessians = 1 if constant_hessian else all_gradients.shape
+    all_hessians = np.ones(shape=shape_hessians, dtype=G_H_DTYPE)
+
+    return X_binned, all_gradients, all_hessians
+
+
+def _check_children_consistency(parent, left, right):
+    # Make sure the samples are correctly dispatched from a parent to its
+    # children
+    assert parent.left_child is left
+    assert parent.right_child is right
+
+    # each sample from the parent is propagated to one of the two children
+    assert len(left.sample_indices) + len(right.sample_indices) == len(
+        parent.sample_indices
+    )
+
+    assert set(left.sample_indices).union(set(right.sample_indices)) == set(
+        parent.sample_indices
+    )
+
+    # samples are sent either to the left or the right node, never to both
+    assert set(left.sample_indices).intersection(set(right.sample_indices)) == set()
+
+
+@pytest.mark.parametrize(
+    "n_bins, constant_hessian, stopping_param, shrinkage",
+    [
+        (11, True, "min_gain_to_split", 0.5),
+        (11, False, "min_gain_to_split", 1.0),
+        (11, True, "max_leaf_nodes", 1.0),
+        (11, False, "max_leaf_nodes", 0.1),
+        (42, True, "max_leaf_nodes", 0.01),
+        (42, False, "max_leaf_nodes", 1.0),
+        (256, True, "min_gain_to_split", 1.0),
+        (256, True, "max_leaf_nodes", 0.1),
+    ],
+)
+def test_grow_tree(n_bins, constant_hessian, stopping_param, shrinkage):
+    X_binned, all_gradients, all_hessians = _make_training_data(
+        n_bins=n_bins, constant_hessian=constant_hessian
+    )
+    n_samples = X_binned.shape[0]
+
+    if stopping_param == "max_leaf_nodes":
+        stopping_param = {"max_leaf_nodes": 3}
+    else:
+        stopping_param = {"min_gain_to_split": 0.01}
+
+    grower = TreeGrower(
+        X_binned,
+        all_gradients,
+        all_hessians,
+        n_bins=n_bins,
+        shrinkage=shrinkage,
+        min_samples_leaf=1,
+        **stopping_param,
+    )
+
+    # The root node is not yet split, but the best possible split has
+    # already been evaluated:
+    assert grower.root.left_child is None
+    assert grower.root.right_child is None
+
+    root_split = grower.root.split_info
+    assert root_split.feature_idx == 0
+    assert root_split.bin_idx == n_bins // 2
+    assert len(grower.splittable_nodes) == 1
+
+    # Calling split next applies the next split and computes the best split
+    # for each of the two newly introduced children nodes.
+    left_node, right_node = grower.split_next()
+
+    # All training samples have ben split in the two nodes, approximately
+    # 50%/50%
+    _check_children_consistency(grower.root, left_node, right_node)
+    assert len(left_node.sample_indices) > 0.4 * n_samples
+    assert len(left_node.sample_indices) < 0.6 * n_samples
+
+    if grower.min_gain_to_split > 0:
+        # The left node is too pure: there is no gain to split it further.
+        assert left_node.split_info.gain < grower.min_gain_to_split
+        assert left_node in grower.finalized_leaves
+
+    # The right node can still be split further, this time on feature #1
+    split_info = right_node.split_info
+    assert split_info.gain > 1.0
+    assert split_info.feature_idx == 1
+    assert split_info.bin_idx == n_bins // 3
+    assert right_node.left_child is None
+    assert right_node.right_child is None
+
+    # The right split has not been applied yet. Let's do it now:
+    assert len(grower.splittable_nodes) == 1
+    right_left_node, right_right_node = grower.split_next()
+    _check_children_consistency(right_node, right_left_node, right_right_node)
+    assert len(right_left_node.sample_indices) > 0.1 * n_samples
+    assert len(right_left_node.sample_indices) < 0.2 * n_samples
+
+    assert len(right_right_node.sample_indices) > 0.2 * n_samples
+    assert len(right_right_node.sample_indices) < 0.4 * n_samples
+
+    # All the leafs are pure, it is not possible to split any further:
+    assert not grower.splittable_nodes
+
+    grower._apply_shrinkage()
+
+    # Check the values of the leaves:
+    assert grower.root.left_child.value == approx(shrinkage)
+    assert grower.root.right_child.left_child.value == approx(shrinkage)
+    assert grower.root.right_child.right_child.value == approx(-shrinkage, rel=1e-3)
+
+
+def test_predictor_from_grower():
+    # Build a tree on the toy 3-leaf dataset to extract the predictor.
+    n_bins = 256
+    X_binned, all_gradients, all_hessians = _make_training_data(n_bins=n_bins)
+    grower = TreeGrower(
+        X_binned,
+        all_gradients,
+        all_hessians,
+        n_bins=n_bins,
+        shrinkage=1.0,
+        max_leaf_nodes=3,
+        min_samples_leaf=5,
+    )
+    grower.grow()
+    assert grower.n_nodes == 5  # (2 decision nodes + 3 leaves)
+
+    # Check that the node structure can be converted into a predictor
+    # object to perform predictions at scale
+    # We pass undefined binning_thresholds because we won't use predict anyway
+    predictor = grower.make_predictor(
+        binning_thresholds=np.zeros((X_binned.shape[1], n_bins))
+    )
+    assert predictor.nodes.shape[0] == 5
+    assert predictor.nodes["is_leaf"].sum() == 3
+
+    # Probe some predictions for each leaf of the tree
+    # each group of 3 samples corresponds to a condition in _make_training_data
+    input_data = np.array(
+        [
+            [0, 0],
+            [42, 99],
+            [128, 254],
+            [129, 0],
+            [129, 85],
+            [254, 85],
+            [129, 86],
+            [129, 254],
+            [242, 100],
+        ],
+        dtype=np.uint8,
+    )
+    missing_values_bin_idx = n_bins - 1
+    predictions = predictor.predict_binned(
+        input_data, missing_values_bin_idx, n_threads
+    )
+    expected_targets = [1, 1, 1, 1, 1, 1, -1, -1, -1]
+    assert np.allclose(predictions, expected_targets)
+
+    # Check that training set can be recovered exactly:
+    predictions = predictor.predict_binned(X_binned, missing_values_bin_idx, n_threads)
+    assert np.allclose(predictions, -all_gradients)
+
+
+@pytest.mark.parametrize(
+    "n_samples, min_samples_leaf, n_bins, constant_hessian, noise",
+    [
+        (11, 10, 7, True, 0),
+        (13, 10, 42, False, 0),
+        (56, 10, 255, True, 0.1),
+        (101, 3, 7, True, 0),
+        (200, 42, 42, False, 0),
+        (300, 55, 255, True, 0.1),
+        (300, 301, 255, True, 0.1),
+    ],
+)
+def test_min_samples_leaf(n_samples, min_samples_leaf, n_bins, constant_hessian, noise):
+    rng = np.random.RandomState(seed=0)
+    # data = linear target, 3 features, 1 irrelevant.
+    X = rng.normal(size=(n_samples, 3))
+    y = X[:, 0] - X[:, 1]
+    if noise:
+        y_scale = y.std()
+        y += rng.normal(scale=noise, size=n_samples) * y_scale
+    mapper = _BinMapper(n_bins=n_bins)
+    X = mapper.fit_transform(X)
+
+    all_gradients = y.astype(G_H_DTYPE)
+    shape_hessian = 1 if constant_hessian else all_gradients.shape
+    all_hessians = np.ones(shape=shape_hessian, dtype=G_H_DTYPE)
+    grower = TreeGrower(
+        X,
+        all_gradients,
+        all_hessians,
+        n_bins=n_bins,
+        shrinkage=1.0,
+        min_samples_leaf=min_samples_leaf,
+        max_leaf_nodes=n_samples,
+    )
+    grower.grow()
+    predictor = grower.make_predictor(binning_thresholds=mapper.bin_thresholds_)
+
+    if n_samples >= min_samples_leaf:
+        for node in predictor.nodes:
+            if node["is_leaf"]:
+                assert node["count"] >= min_samples_leaf
+    else:
+        assert predictor.nodes.shape[0] == 1
+        assert predictor.nodes[0]["is_leaf"]
+        assert predictor.nodes[0]["count"] == n_samples
+
+
+@pytest.mark.parametrize("n_samples, min_samples_leaf", [(99, 50), (100, 50)])
+def test_min_samples_leaf_root(n_samples, min_samples_leaf):
+    # Make sure root node isn't split if n_samples is not at least twice
+    # min_samples_leaf
+    rng = np.random.RandomState(seed=0)
+
+    n_bins = 256
+
+    # data = linear target, 3 features, 1 irrelevant.
+    X = rng.normal(size=(n_samples, 3))
+    y = X[:, 0] - X[:, 1]
+    mapper = _BinMapper(n_bins=n_bins)
+    X = mapper.fit_transform(X)
+
+    all_gradients = y.astype(G_H_DTYPE)
+    all_hessians = np.ones(shape=1, dtype=G_H_DTYPE)
+    grower = TreeGrower(
+        X,
+        all_gradients,
+        all_hessians,
+        n_bins=n_bins,
+        shrinkage=1.0,
+        min_samples_leaf=min_samples_leaf,
+        max_leaf_nodes=n_samples,
+    )
+    grower.grow()
+    if n_samples >= min_samples_leaf * 2:
+        assert len(grower.finalized_leaves) >= 2
+    else:
+        assert len(grower.finalized_leaves) == 1
+
+
+def assert_is_stump(grower):
+    # To assert that stumps are created when max_depth=1
+    for leaf in (grower.root.left_child, grower.root.right_child):
+        assert leaf.left_child is None
+        assert leaf.right_child is None
+
+
+@pytest.mark.parametrize("max_depth", [1, 2, 3])
+def test_max_depth(max_depth):
+    # Make sure max_depth parameter works as expected
+    rng = np.random.RandomState(seed=0)
+
+    n_bins = 256
+    n_samples = 1000
+
+    # data = linear target, 3 features, 1 irrelevant.
+    X = rng.normal(size=(n_samples, 3))
+    y = X[:, 0] - X[:, 1]
+    mapper = _BinMapper(n_bins=n_bins)
+    X = mapper.fit_transform(X)
+
+    all_gradients = y.astype(G_H_DTYPE)
+    all_hessians = np.ones(shape=1, dtype=G_H_DTYPE)
+    grower = TreeGrower(X, all_gradients, all_hessians, max_depth=max_depth)
+    grower.grow()
+
+    depth = max(leaf.depth for leaf in grower.finalized_leaves)
+    assert depth == max_depth
+
+    if max_depth == 1:
+        assert_is_stump(grower)
+
+
+def test_input_validation():
+    X_binned, all_gradients, all_hessians = _make_training_data()
+
+    X_binned_float = X_binned.astype(np.float32)
+    with pytest.raises(NotImplementedError, match="X_binned must be of type uint8"):
+        TreeGrower(X_binned_float, all_gradients, all_hessians)
+
+    X_binned_C_array = np.ascontiguousarray(X_binned)
+    with pytest.raises(
+        ValueError, match="X_binned should be passed as Fortran contiguous array"
+    ):
+        TreeGrower(X_binned_C_array, all_gradients, all_hessians)
+
+
+def test_init_parameters_validation():
+    X_binned, all_gradients, all_hessians = _make_training_data()
+    with pytest.raises(ValueError, match="min_gain_to_split=-1 must be positive"):
+        TreeGrower(X_binned, all_gradients, all_hessians, min_gain_to_split=-1)
+
+    with pytest.raises(ValueError, match="min_hessian_to_split=-1 must be positive"):
+        TreeGrower(X_binned, all_gradients, all_hessians, min_hessian_to_split=-1)
+
+
+def test_missing_value_predict_only():
+    # Make sure that missing values are supported at predict time even if they
+    # were not encountered in the training data: the missing values are
+    # assigned to whichever child has the most samples.
+
+    rng = np.random.RandomState(0)
+    n_samples = 100
+    X_binned = rng.randint(0, 256, size=(n_samples, 1), dtype=np.uint8)
+    X_binned = np.asfortranarray(X_binned)
+
+    gradients = rng.normal(size=n_samples).astype(G_H_DTYPE)
+    hessians = np.ones(shape=1, dtype=G_H_DTYPE)
+
+    grower = TreeGrower(
+        X_binned, gradients, hessians, min_samples_leaf=5, has_missing_values=False
+    )
+    grower.grow()
+
+    # We pass undefined binning_thresholds because we won't use predict anyway
+    predictor = grower.make_predictor(
+        binning_thresholds=np.zeros((X_binned.shape[1], X_binned.max() + 1))
+    )
+
+    # go from root to a leaf, always following node with the most samples.
+    # That's the path nans are supposed to take
+    node = predictor.nodes[0]
+    while not node["is_leaf"]:
+        left = predictor.nodes[node["left"]]
+        right = predictor.nodes[node["right"]]
+        node = left if left["count"] > right["count"] else right
+
+    prediction_main_path = node["value"]
+
+    # now build X_test with only nans, and make sure all predictions are equal
+    # to prediction_main_path
+    all_nans = np.full(shape=(n_samples, 1), fill_value=np.nan)
+    known_cat_bitsets = np.zeros((0, 8), dtype=X_BITSET_INNER_DTYPE)
+    f_idx_map = np.zeros(0, dtype=np.uint32)
+
+    y_pred = predictor.predict(all_nans, known_cat_bitsets, f_idx_map, n_threads)
+    assert np.all(y_pred == prediction_main_path)
+
+
+def test_split_on_nan_with_infinite_values():
+    # Make sure the split on nan situations are respected even when there are
+    # samples with +inf values (we set the threshold to +inf when we have a
+    # split on nan so this test makes sure this does not introduce edge-case
+    # bugs). We need to use the private API so that we can also test
+    # predict_binned().
+
+    X = np.array([0, 1, np.inf, np.nan, np.nan]).reshape(-1, 1)
+    # the gradient values will force a split on nan situation
+    gradients = np.array([0, 0, 0, 100, 100], dtype=G_H_DTYPE)
+    hessians = np.ones(shape=1, dtype=G_H_DTYPE)
+
+    bin_mapper = _BinMapper()
+    X_binned = bin_mapper.fit_transform(X)
+
+    n_bins_non_missing = 3
+    has_missing_values = True
+    grower = TreeGrower(
+        X_binned,
+        gradients,
+        hessians,
+        n_bins_non_missing=n_bins_non_missing,
+        has_missing_values=has_missing_values,
+        min_samples_leaf=1,
+        n_threads=n_threads,
+    )
+
+    grower.grow()
+
+    predictor = grower.make_predictor(binning_thresholds=bin_mapper.bin_thresholds_)
+
+    # sanity check: this was a split on nan
+    assert predictor.nodes[0]["num_threshold"] == np.inf
+    assert predictor.nodes[0]["bin_threshold"] == n_bins_non_missing - 1
+
+    known_cat_bitsets, f_idx_map = bin_mapper.make_known_categories_bitsets()
+
+    # Make sure in particular that the +inf sample is mapped to the left child
+    # Note that lightgbm "fails" here and will assign the inf sample to the
+    # right child, even though it's a "split on nan" situation.
+    predictions = predictor.predict(X, known_cat_bitsets, f_idx_map, n_threads)
+    predictions_binned = predictor.predict_binned(
+        X_binned,
+        missing_values_bin_idx=bin_mapper.missing_values_bin_idx_,
+        n_threads=n_threads,
+    )
+    np.testing.assert_allclose(predictions, -gradients)
+    np.testing.assert_allclose(predictions_binned, -gradients)
+
+
+def test_grow_tree_categories():
+    # Check that the grower produces the right predictor tree when a split is
+    # categorical
+    X_binned = np.array([[0, 1] * 11 + [1]], dtype=X_BINNED_DTYPE).T
+    X_binned = np.asfortranarray(X_binned)
+
+    all_gradients = np.array([10, 1] * 11 + [1], dtype=G_H_DTYPE)
+    all_hessians = np.ones(1, dtype=G_H_DTYPE)
+    is_categorical = np.ones(1, dtype=np.uint8)
+
+    grower = TreeGrower(
+        X_binned,
+        all_gradients,
+        all_hessians,
+        n_bins=4,
+        shrinkage=1.0,
+        min_samples_leaf=1,
+        is_categorical=is_categorical,
+        n_threads=n_threads,
+    )
+    grower.grow()
+    assert grower.n_nodes == 3
+
+    categories = [np.array([4, 9], dtype=X_DTYPE)]
+    predictor = grower.make_predictor(binning_thresholds=categories)
+    root = predictor.nodes[0]
+    assert root["count"] == 23
+    assert root["depth"] == 0
+    assert root["is_categorical"]
+
+    left, right = predictor.nodes[root["left"]], predictor.nodes[root["right"]]
+
+    # arbitrary validation, but this means ones go to the left.
+    assert left["count"] >= right["count"]
+
+    # check binned category value (1)
+    expected_binned_cat_bitset = [2**1] + [0] * 7
+    binned_cat_bitset = predictor.binned_left_cat_bitsets
+    assert_array_equal(binned_cat_bitset[0], expected_binned_cat_bitset)
+
+    # check raw category value (9)
+    expected_raw_cat_bitsets = [2**9] + [0] * 7
+    raw_cat_bitsets = predictor.raw_left_cat_bitsets
+    assert_array_equal(raw_cat_bitsets[0], expected_raw_cat_bitsets)
+
+    # Note that since there was no missing values during training, the missing
+    # values aren't part of the bitsets. However, we expect the missing values
+    # to go to the biggest child (i.e. the left one).
+    # The left child has a value of -1 = negative gradient.
+    assert root["missing_go_to_left"]
+
+    # make sure binned missing values are mapped to the left child during
+    # prediction
+    prediction_binned = predictor.predict_binned(
+        np.asarray([[6]]).astype(X_BINNED_DTYPE),
+        missing_values_bin_idx=6,
+        n_threads=n_threads,
+    )
+    assert_allclose(prediction_binned, [-1])  # negative gradient
+
+    # make sure raw missing values are mapped to the left child during
+    # prediction
+    known_cat_bitsets = np.zeros((1, 8), dtype=np.uint32)  # ignored anyway
+    f_idx_map = np.array([0], dtype=np.uint32)
+    prediction = predictor.predict(
+        np.array([[np.nan]]), known_cat_bitsets, f_idx_map, n_threads
+    )
+    assert_allclose(prediction, [-1])
+
+
+@pytest.mark.parametrize("min_samples_leaf", (1, 20))
+@pytest.mark.parametrize("n_unique_categories", (2, 10, 100))
+@pytest.mark.parametrize("target", ("binary", "random", "equal"))
+def test_ohe_equivalence(min_samples_leaf, n_unique_categories, target):
+    # Make sure that native categorical splits are equivalent to using a OHE,
+    # when given enough depth
+
+    rng = np.random.RandomState(0)
+    n_samples = 10_000
+    X_binned = rng.randint(0, n_unique_categories, size=(n_samples, 1), dtype=np.uint8)
+
+    X_ohe = OneHotEncoder(sparse_output=False).fit_transform(X_binned)
+    X_ohe = np.asfortranarray(X_ohe).astype(np.uint8)
+
+    if target == "equal":
+        gradients = X_binned.reshape(-1)
+    elif target == "binary":
+        gradients = (X_binned % 2).reshape(-1)
+    else:
+        gradients = rng.randn(n_samples)
+    gradients = gradients.astype(G_H_DTYPE)
+
+    hessians = np.ones(shape=1, dtype=G_H_DTYPE)
+
+    grower_params = {
+        "min_samples_leaf": min_samples_leaf,
+        "max_depth": None,
+        "max_leaf_nodes": None,
+    }
+
+    grower = TreeGrower(
+        X_binned, gradients, hessians, is_categorical=[True], **grower_params
+    )
+    grower.grow()
+    # we pass undefined bin_thresholds because we won't use predict()
+    predictor = grower.make_predictor(
+        binning_thresholds=np.zeros((1, n_unique_categories))
+    )
+    preds = predictor.predict_binned(
+        X_binned, missing_values_bin_idx=255, n_threads=n_threads
+    )
+
+    grower_ohe = TreeGrower(X_ohe, gradients, hessians, **grower_params)
+    grower_ohe.grow()
+    predictor_ohe = grower_ohe.make_predictor(
+        binning_thresholds=np.zeros((X_ohe.shape[1], n_unique_categories))
+    )
+    preds_ohe = predictor_ohe.predict_binned(
+        X_ohe, missing_values_bin_idx=255, n_threads=n_threads
+    )
+
+    assert predictor.get_max_depth() <= predictor_ohe.get_max_depth()
+    if target == "binary" and n_unique_categories > 2:
+        # OHE needs more splits to achieve the same predictions
+        assert predictor.get_max_depth() < predictor_ohe.get_max_depth()
+
+    np.testing.assert_allclose(preds, preds_ohe)
+
+
+def test_grower_interaction_constraints():
+    """Check that grower respects interaction constraints."""
+    n_features = 6
+    interaction_cst = [{0, 1}, {1, 2}, {3, 4, 5}]
+    n_samples = 10
+    n_bins = 6
+    root_feature_splits = []
+
+    def get_all_children(node):
+        res = []
+        if node.is_leaf:
+            return res
+        for n in [node.left_child, node.right_child]:
+            res.append(n)
+            res.extend(get_all_children(n))
+        return res
+
+    for seed in range(20):
+        rng = np.random.RandomState(seed)
+
+        X_binned = rng.randint(
+            0, n_bins - 1, size=(n_samples, n_features), dtype=X_BINNED_DTYPE
+        )
+        X_binned = np.asfortranarray(X_binned)
+        gradients = rng.normal(size=n_samples).astype(G_H_DTYPE)
+        hessians = np.ones(shape=1, dtype=G_H_DTYPE)
+
+        grower = TreeGrower(
+            X_binned,
+            gradients,
+            hessians,
+            n_bins=n_bins,
+            min_samples_leaf=1,
+            interaction_cst=interaction_cst,
+            n_threads=n_threads,
+        )
+        grower.grow()
+
+        root_feature_idx = grower.root.split_info.feature_idx
+        root_feature_splits.append(root_feature_idx)
+
+        feature_idx_to_constraint_set = {
+            0: {0, 1},
+            1: {0, 1, 2},
+            2: {1, 2},
+            3: {3, 4, 5},
+            4: {3, 4, 5},
+            5: {3, 4, 5},
+        }
+
+        root_constraint_set = feature_idx_to_constraint_set[root_feature_idx]
+        for node in (grower.root.left_child, grower.root.right_child):
+            # Root's children's allowed_features must be the root's constraints set.
+            assert_array_equal(node.allowed_features, list(root_constraint_set))
+        for node in get_all_children(grower.root):
+            if node.is_leaf:
+                continue
+            # Ensure that each node uses a subset of features of its parent node.
+            parent_interaction_cst_indices = set(node.interaction_cst_indices)
+            right_interactions_cst_indices = set(
+                node.right_child.interaction_cst_indices
+            )
+            left_interactions_cst_indices = set(node.left_child.interaction_cst_indices)
+
+            assert right_interactions_cst_indices.issubset(
+                parent_interaction_cst_indices
+            )
+            assert left_interactions_cst_indices.issubset(
+                parent_interaction_cst_indices
+            )
+            # The features used for split must have been present in the root's
+            # constraint set.
+            assert node.split_info.feature_idx in root_constraint_set
+
+    # Make sure that every feature is used at least once as split for the root node.
+    assert (
+        len(set(root_feature_splits))
+        == len(set().union(*interaction_cst))
+        == n_features
+    )
--- a/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/tests/test_histogram.py
+++ b/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/tests/test_histogram.py
@@ -0,0 +1,239 @@
+import numpy as np
+import pytest
+from numpy.testing import assert_allclose, assert_array_equal
+
+from sklearn.ensemble._hist_gradient_boosting.common import (
+    G_H_DTYPE,
+    HISTOGRAM_DTYPE,
+    X_BINNED_DTYPE,
+)
+from sklearn.ensemble._hist_gradient_boosting.histogram import (
+    _build_histogram,
+    _build_histogram_naive,
+    _build_histogram_no_hessian,
+    _build_histogram_root,
+    _build_histogram_root_no_hessian,
+    _subtract_histograms,
+)
+
+
+@pytest.mark.parametrize("build_func", [_build_histogram_naive, _build_histogram])
+def test_build_histogram(build_func):
+    binned_feature = np.array([0, 2, 0, 1, 2, 0, 2, 1], dtype=X_BINNED_DTYPE)
+
+    # Small sample_indices (below unrolling threshold)
+    ordered_gradients = np.array([0, 1, 3], dtype=G_H_DTYPE)
+    ordered_hessians = np.array([1, 1, 2], dtype=G_H_DTYPE)
+
+    sample_indices = np.array([0, 2, 3], dtype=np.uint32)
+    hist = np.zeros((1, 3), dtype=HISTOGRAM_DTYPE)
+    build_func(
+        0, sample_indices, binned_feature, ordered_gradients, ordered_hessians, hist
+    )
+    hist = hist[0]
+    assert_array_equal(hist["count"], [2, 1, 0])
+    assert_allclose(hist["sum_gradients"], [1, 3, 0])
+    assert_allclose(hist["sum_hessians"], [2, 2, 0])
+
+    # Larger sample_indices (above unrolling threshold)
+    sample_indices = np.array([0, 2, 3, 6, 7], dtype=np.uint32)
+    ordered_gradients = np.array([0, 1, 3, 0, 1], dtype=G_H_DTYPE)
+    ordered_hessians = np.array([1, 1, 2, 1, 0], dtype=G_H_DTYPE)
+
+    hist = np.zeros((1, 3), dtype=HISTOGRAM_DTYPE)
+    build_func(
+        0, sample_indices, binned_feature, ordered_gradients, ordered_hessians, hist
+    )
+    hist = hist[0]
+    assert_array_equal(hist["count"], [2, 2, 1])
+    assert_allclose(hist["sum_gradients"], [1, 4, 0])
+    assert_allclose(hist["sum_hessians"], [2, 2, 1])
+
+
+def test_histogram_sample_order_independence():
+    # Make sure the order of the samples has no impact on the histogram
+    # computations
+    rng = np.random.RandomState(42)
+    n_sub_samples = 100
+    n_samples = 1000
+    n_bins = 256
+
+    binned_feature = rng.randint(0, n_bins - 1, size=n_samples, dtype=X_BINNED_DTYPE)
+    sample_indices = rng.choice(
+        np.arange(n_samples, dtype=np.uint32), n_sub_samples, replace=False
+    )
+    ordered_gradients = rng.randn(n_sub_samples).astype(G_H_DTYPE)
+    hist_gc = np.zeros((1, n_bins), dtype=HISTOGRAM_DTYPE)
+    _build_histogram_no_hessian(
+        0, sample_indices, binned_feature, ordered_gradients, hist_gc
+    )
+
+    ordered_hessians = rng.exponential(size=n_sub_samples).astype(G_H_DTYPE)
+    hist_ghc = np.zeros((1, n_bins), dtype=HISTOGRAM_DTYPE)
+    _build_histogram(
+        0, sample_indices, binned_feature, ordered_gradients, ordered_hessians, hist_ghc
+    )
+
+    permutation = rng.permutation(n_sub_samples)
+    hist_gc_perm = np.zeros((1, n_bins), dtype=HISTOGRAM_DTYPE)
+    _build_histogram_no_hessian(
+        0,
+        sample_indices[permutation],
+        binned_feature,
+        ordered_gradients[permutation],
+        hist_gc_perm,
+    )
+
+    hist_ghc_perm = np.zeros((1, n_bins), dtype=HISTOGRAM_DTYPE)
+    _build_histogram(
+        0,
+        sample_indices[permutation],
+        binned_feature,
+        ordered_gradients[permutation],
+        ordered_hessians[permutation],
+        hist_ghc_perm,
+    )
+
+    hist_gc = hist_gc[0]
+    hist_ghc = hist_ghc[0]
+    hist_gc_perm = hist_gc_perm[0]
+    hist_ghc_perm = hist_ghc_perm[0]
+
+    assert_allclose(hist_gc["sum_gradients"], hist_gc_perm["sum_gradients"])
+    assert_array_equal(hist_gc["count"], hist_gc_perm["count"])
+
+    assert_allclose(hist_ghc["sum_gradients"], hist_ghc_perm["sum_gradients"])
+    assert_allclose(hist_ghc["sum_hessians"], hist_ghc_perm["sum_hessians"])
+    assert_array_equal(hist_ghc["count"], hist_ghc_perm["count"])
+
+
+@pytest.mark.parametrize("constant_hessian", [True, False])
+def test_unrolled_equivalent_to_naive(constant_hessian):
+    # Make sure the different unrolled histogram computations give the same
+    # results as the naive one.
+    rng = np.random.RandomState(42)
+    n_samples = 10
+    n_bins = 5
+    sample_indices = np.arange(n_samples).astype(np.uint32)
+    binned_feature = rng.randint(0, n_bins - 1, size=n_samples, dtype=np.uint8)
+    ordered_gradients = rng.randn(n_samples).astype(G_H_DTYPE)
+    if constant_hessian:
+        ordered_hessians = np.ones(n_samples, dtype=G_H_DTYPE)
+    else:
+        ordered_hessians = rng.lognormal(size=n_samples).astype(G_H_DTYPE)
+
+    hist_gc_root = np.zeros((1, n_bins), dtype=HISTOGRAM_DTYPE)
+    hist_ghc_root = np.zeros((1, n_bins), dtype=HISTOGRAM_DTYPE)
+    hist_gc = np.zeros((1, n_bins), dtype=HISTOGRAM_DTYPE)
+    hist_ghc = np.zeros((1, n_bins), dtype=HISTOGRAM_DTYPE)
+    hist_naive = np.zeros((1, n_bins), dtype=HISTOGRAM_DTYPE)
+
+    _build_histogram_root_no_hessian(0, binned_feature, ordered_gradients, hist_gc_root)
+    _build_histogram_root(
+        0, binned_feature, ordered_gradients, ordered_hessians, hist_ghc_root
+    )
+    _build_histogram_no_hessian(
+        0, sample_indices, binned_feature, ordered_gradients, hist_gc
+    )
+    _build_histogram(
+        0, sample_indices, binned_feature, ordered_gradients, ordered_hessians, hist_ghc
+    )
+    _build_histogram_naive(
+        0,
+        sample_indices,
+        binned_feature,
+        ordered_gradients,
+        ordered_hessians,
+        hist_naive,
+    )
+
+    hist_naive = hist_naive[0]
+    hist_gc_root = hist_gc_root[0]
+    hist_ghc_root = hist_ghc_root[0]
+    hist_gc = hist_gc[0]
+    hist_ghc = hist_ghc[0]
+    for hist in (hist_gc_root, hist_ghc_root, hist_gc, hist_ghc):
+        assert_array_equal(hist["count"], hist_naive["count"])
+        assert_allclose(hist["sum_gradients"], hist_naive["sum_gradients"])
+    for hist in (hist_ghc_root, hist_ghc):
+        assert_allclose(hist["sum_hessians"], hist_naive["sum_hessians"])
+    for hist in (hist_gc_root, hist_gc):
+        assert_array_equal(hist["sum_hessians"], np.zeros(n_bins))
+
+
+@pytest.mark.parametrize("constant_hessian", [True, False])
+def test_hist_subtraction(constant_hessian):
+    # Make sure the histogram subtraction trick gives the same result as the
+    # classical method.
+    rng = np.random.RandomState(42)
+    n_samples = 10
+    n_bins = 5
+    sample_indices = np.arange(n_samples).astype(np.uint32)
+    binned_feature = rng.randint(0, n_bins - 1, size=n_samples, dtype=np.uint8)
+    ordered_gradients = rng.randn(n_samples).astype(G_H_DTYPE)
+    if constant_hessian:
+        ordered_hessians = np.ones(n_samples, dtype=G_H_DTYPE)
+    else:
+        ordered_hessians = rng.lognormal(size=n_samples).astype(G_H_DTYPE)
+
+    hist_parent = np.zeros((1, n_bins), dtype=HISTOGRAM_DTYPE)
+    if constant_hessian:
+        _build_histogram_no_hessian(
+            0, sample_indices, binned_feature, ordered_gradients, hist_parent
+        )
+    else:
+        _build_histogram(
+            0,
+            sample_indices,
+            binned_feature,
+            ordered_gradients,
+            ordered_hessians,
+            hist_parent,
+        )
+
+    mask = rng.randint(0, 2, n_samples).astype(bool)
+
+    sample_indices_left = sample_indices[mask]
+    ordered_gradients_left = ordered_gradients[mask]
+    ordered_hessians_left = ordered_hessians[mask]
+    hist_left = np.zeros((1, n_bins), dtype=HISTOGRAM_DTYPE)
+    if constant_hessian:
+        _build_histogram_no_hessian(
+            0, sample_indices_left, binned_feature, ordered_gradients_left, hist_left
+        )
+    else:
+        _build_histogram(
+            0,
+            sample_indices_left,
+            binned_feature,
+            ordered_gradients_left,
+            ordered_hessians_left,
+            hist_left,
+        )
+
+    sample_indices_right = sample_indices[~mask]
+    ordered_gradients_right = ordered_gradients[~mask]
+    ordered_hessians_right = ordered_hessians[~mask]
+    hist_right = np.zeros((1, n_bins), dtype=HISTOGRAM_DTYPE)
+    if constant_hessian:
+        _build_histogram_no_hessian(
+            0, sample_indices_right, binned_feature, ordered_gradients_right, hist_right
+        )
+    else:
+        _build_histogram(
+            0,
+            sample_indices_right,
+            binned_feature,
+            ordered_gradients_right,
+            ordered_hessians_right,
+            hist_right,
+        )
+
+    hist_left_sub = np.copy(hist_parent)
+    hist_right_sub = np.copy(hist_parent)
+    _subtract_histograms(0, n_bins, hist_left_sub, hist_right)
+    _subtract_histograms(0, n_bins, hist_right_sub, hist_left)
+
+    for key in ("count", "sum_hessians", "sum_gradients"):
+        assert_allclose(hist_left[key], hist_left_sub[key], rtol=1e-6)
+        assert_allclose(hist_right[key], hist_right_sub[key], rtol=1e-6)
--- a/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/tests/test_monotonic_constraints.py
+++ b/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/tests/test_monotonic_constraints.py
@@ -0,0 +1,446 @@
+import re
+
+import numpy as np
+import pytest
+
+from sklearn.ensemble import (
+    HistGradientBoostingClassifier,
+    HistGradientBoostingRegressor,
+)
+from sklearn.ensemble._hist_gradient_boosting.common import (
+    G_H_DTYPE,
+    X_BINNED_DTYPE,
+    MonotonicConstraint,
+)
+from sklearn.ensemble._hist_gradient_boosting.grower import TreeGrower
+from sklearn.ensemble._hist_gradient_boosting.histogram import HistogramBuilder
+from sklearn.ensemble._hist_gradient_boosting.splitting import (
+    Splitter,
+    compute_node_value,
+)
+from sklearn.utils._openmp_helpers import _openmp_effective_n_threads
+from sklearn.utils._testing import _convert_container
+
+n_threads = _openmp_effective_n_threads()
+
+
+def is_increasing(a):
+    return (np.diff(a) >= 0.0).all()
+
+
+def is_decreasing(a):
+    return (np.diff(a) <= 0.0).all()
+
+
+def assert_leaves_values_monotonic(predictor, monotonic_cst):
+    # make sure leaves values (from left to right) are either all increasing
+    # or all decreasing (or neither) depending on the monotonic constraint.
+    nodes = predictor.nodes
+
+    def get_leaves_values():
+        """get leaves values from left to right"""
+        values = []
+
+        def depth_first_collect_leaf_values(node_idx):
+            node = nodes[node_idx]
+            if node["is_leaf"]:
+                values.append(node["value"])
+                return
+            depth_first_collect_leaf_values(node["left"])
+            depth_first_collect_leaf_values(node["right"])
+
+        depth_first_collect_leaf_values(0)  # start at root (0)
+        return values
+
+    values = get_leaves_values()
+
+    if monotonic_cst == MonotonicConstraint.NO_CST:
+        # some increasing, some decreasing
+        assert not is_increasing(values) and not is_decreasing(values)
+    elif monotonic_cst == MonotonicConstraint.POS:
+        # all increasing
+        assert is_increasing(values)
+    else:  # NEG
+        # all decreasing
+        assert is_decreasing(values)
+
+
+def assert_children_values_monotonic(predictor, monotonic_cst):
+    # Make sure siblings values respect the monotonic constraints. Left should
+    # be lower (resp greater) than right child if constraint is POS (resp.
+    # NEG).
+    # Note that this property alone isn't enough to ensure full monotonicity,
+    # since we also need to guanrantee that all the descendents of the left
+    # child won't be greater (resp. lower) than the right child, or its
+    # descendents. That's why we need to bound the predicted values (this is
+    # tested in assert_children_values_bounded)
+    nodes = predictor.nodes
+    left_lower = []
+    left_greater = []
+    for node in nodes:
+        if node["is_leaf"]:
+            continue
+
+        left_idx = node["left"]
+        right_idx = node["right"]
+
+        if nodes[left_idx]["value"] < nodes[right_idx]["value"]:
+            left_lower.append(node)
+        elif nodes[left_idx]["value"] > nodes[right_idx]["value"]:
+            left_greater.append(node)
+
+    if monotonic_cst == MonotonicConstraint.NO_CST:
+        assert left_lower and left_greater
+    elif monotonic_cst == MonotonicConstraint.POS:
+        assert left_lower and not left_greater
+    else:  # NEG
+        assert not left_lower and left_greater
+
+
+def assert_children_values_bounded(grower, monotonic_cst):
+    # Make sure that the values of the children of a node are bounded by the
+    # middle value between that node and its sibling (if there is a monotonic
+    # constraint).
+    # As a bonus, we also check that the siblings values are properly ordered
+    # which is slightly redundant with assert_children_values_monotonic (but
+    # this check is done on the grower nodes whereas
+    # assert_children_values_monotonic is done on the predictor nodes)
+
+    if monotonic_cst == MonotonicConstraint.NO_CST:
+        return
+
+    def recursively_check_children_node_values(node, right_sibling=None):
+        if node.is_leaf:
+            return
+        if right_sibling is not None:
+            middle = (node.value + right_sibling.value) / 2
+            if monotonic_cst == MonotonicConstraint.POS:
+                assert node.left_child.value <= node.right_child.value <= middle
+                if not right_sibling.is_leaf:
+                    assert (
+                        middle
+                        <= right_sibling.left_child.value
+                        <= right_sibling.right_child.value
+                    )
+            else:  # NEG
+                assert node.left_child.value >= node.right_child.value >= middle
+                if not right_sibling.is_leaf:
+                    assert (
+                        middle
+                        >= right_sibling.left_child.value
+                        >= right_sibling.right_child.value
+                    )
+
+        recursively_check_children_node_values(
+            node.left_child, right_sibling=node.right_child
+        )
+        recursively_check_children_node_values(node.right_child)
+
+    recursively_check_children_node_values(grower.root)
+
+
+@pytest.mark.parametrize("seed", range(3))
+@pytest.mark.parametrize(
+    "monotonic_cst",
+    (
+        MonotonicConstraint.NO_CST,
+        MonotonicConstraint.POS,
+        MonotonicConstraint.NEG,
+    ),
+)
+def test_nodes_values(monotonic_cst, seed):
+    # Build a single tree with only one feature, and make sure the nodes
+    # values respect the monotonic constraints.
+
+    # Considering the following tree with a monotonic POS constraint, we
+    # should have:
+    #
+    #       root
+    #      /    \
+    #     5     10    # middle = 7.5
+    #    / \   / \
+    #   a  b  c  d
+    #
+    # a <= b and c <= d  (assert_children_values_monotonic)
+    # a, b <= middle <= c, d (assert_children_values_bounded)
+    # a <= b <= c <= d (assert_leaves_values_monotonic)
+    #
+    # The last one is a consequence of the others, but can't hurt to check
+
+    rng = np.random.RandomState(seed)
+    n_samples = 1000
+    n_features = 1
+    X_binned = rng.randint(0, 255, size=(n_samples, n_features), dtype=np.uint8)
+    X_binned = np.asfortranarray(X_binned)
+
+    gradients = rng.normal(size=n_samples).astype(G_H_DTYPE)
+    hessians = np.ones(shape=1, dtype=G_H_DTYPE)
+
+    grower = TreeGrower(
+        X_binned, gradients, hessians, monotonic_cst=[monotonic_cst], shrinkage=0.1
+    )
+    grower.grow()
+
+    # grow() will shrink the leaves values at the very end. For our comparison
+    # tests, we need to revert the shrinkage of the leaves, else we would
+    # compare the value of a leaf (shrunk) with a node (not shrunk) and the
+    # test would not be correct.
+    for leave in grower.finalized_leaves:
+        leave.value /= grower.shrinkage
+
+    # We pass undefined binning_thresholds because we won't use predict anyway
+    predictor = grower.make_predictor(
+        binning_thresholds=np.zeros((X_binned.shape[1], X_binned.max() + 1))
+    )
+
+    # The consistency of the bounds can only be checked on the tree grower
+    # as the node bounds are not copied into the predictor tree. The
+    # consistency checks on the values of node children and leaves can be
+    # done either on the grower tree or on the predictor tree. We only
+    # do those checks on the predictor tree as the latter is derived from
+    # the former.
+    assert_children_values_monotonic(predictor, monotonic_cst)
+    assert_children_values_bounded(grower, monotonic_cst)
+    assert_leaves_values_monotonic(predictor, monotonic_cst)
+
+
+@pytest.mark.parametrize("use_feature_names", (True, False))
+def test_predictions(global_random_seed, use_feature_names):
+    # Train a model with a POS constraint on the first non-categorical feature
+    # and a NEG constraint on the second non-categorical feature, and make sure
+    # the constraints are respected by checking the predictions.
+    # test adapted from lightgbm's test_monotone_constraint(), itself inspired
+    # by https://xgboost.readthedocs.io/en/latest/tutorials/monotonic.html
+
+    rng = np.random.RandomState(global_random_seed)
+
+    n_samples = 1000
+    f_0 = rng.rand(n_samples)  # positive correlation with y
+    f_1 = rng.rand(n_samples)  # negative correlation with y
+
+    # extra categorical features, no correlation with y,
+    # to check the correctness of monotonicity constraint remapping, see issue #28898
+    f_a = rng.randint(low=0, high=9, size=n_samples)
+    f_b = rng.randint(low=0, high=9, size=n_samples)
+    f_c = rng.randint(low=0, high=9, size=n_samples)
+
+    X = np.c_[f_a, f_0, f_b, f_1, f_c]
+    columns_name = ["f_a", "f_0", "f_b", "f_1", "f_c"]
+    constructor_name = "dataframe" if use_feature_names else "array"
+    X = _convert_container(X, constructor_name, columns_name=columns_name)
+
+    noise = rng.normal(loc=0.0, scale=0.01, size=n_samples)
+    y = 5 * f_0 + np.sin(10 * np.pi * f_0) - 5 * f_1 - np.cos(10 * np.pi * f_1) + noise
+
+    if use_feature_names:
+        monotonic_cst = {"f_0": +1, "f_1": -1}
+        categorical_features = ["f_a", "f_b", "f_c"]
+    else:
+        monotonic_cst = [0, +1, 0, -1, 0]
+        categorical_features = [0, 2, 4]
+
+    gbdt = HistGradientBoostingRegressor(
+        monotonic_cst=monotonic_cst, categorical_features=categorical_features
+    )
+    gbdt.fit(X, y)
+
+    linspace = np.linspace(0, 1, 100)
+    sin = np.sin(linspace)
+    constant = np.full_like(linspace, fill_value=0.5)
+
+    # We now assert the predictions properly respect the constraints, on each
+    # feature. When testing for a feature we need to set the other one to a
+    # constant, because the monotonic constraints are only a "all else being
+    # equal" type of constraints:
+    # a constraint on the first feature only means that
+    # x0 < x0' => f(x0, x1) < f(x0', x1)
+    # while x1 stays constant.
+    # The constraint does not guanrantee that
+    # x0 < x0' => f(x0, x1) < f(x0', x1')
+
+    # First non-categorical feature (POS)
+    # assert pred is all increasing when f_0 is all increasing
+    X = np.c_[constant, linspace, constant, constant, constant]
+    X = _convert_container(X, constructor_name, columns_name=columns_name)
+    pred = gbdt.predict(X)
+    assert is_increasing(pred)
+    # assert pred actually follows the variations of f_0
+    X = np.c_[constant, sin, constant, constant, constant]
+    X = _convert_container(X, constructor_name, columns_name=columns_name)
+    pred = gbdt.predict(X)
+    assert np.all((np.diff(pred) >= 0) == (np.diff(sin) >= 0))
+
+    # Second non-categorical feature (NEG)
+    # assert pred is all decreasing when f_1 is all increasing
+    X = np.c_[constant, constant, constant, linspace, constant]
+    X = _convert_container(X, constructor_name, columns_name=columns_name)
+    pred = gbdt.predict(X)
+    assert is_decreasing(pred)
+    # assert pred actually follows the inverse variations of f_1
+    X = np.c_[constant, constant, constant, sin, constant]
+    X = _convert_container(X, constructor_name, columns_name=columns_name)
+    pred = gbdt.predict(X)
+    assert ((np.diff(pred) <= 0) == (np.diff(sin) >= 0)).all()
+
+
+def test_input_error():
+    X = [[1, 2], [2, 3], [3, 4]]
+    y = [0, 1, 2]
+
+    gbdt = HistGradientBoostingRegressor(monotonic_cst=[1, 0, -1])
+    with pytest.raises(
+        ValueError, match=re.escape("monotonic_cst has shape (3,) but the input data")
+    ):
+        gbdt.fit(X, y)
+
+    for monotonic_cst in ([1, 3], [1, -3], [0.3, -0.7]):
+        gbdt = HistGradientBoostingRegressor(monotonic_cst=monotonic_cst)
+        expected_msg = re.escape(
+            "must be an array-like of -1, 0 or 1. Observed values:"
+        )
+        with pytest.raises(ValueError, match=expected_msg):
+            gbdt.fit(X, y)
+
+    gbdt = HistGradientBoostingClassifier(monotonic_cst=[0, 1])
+    with pytest.raises(
+        ValueError,
+        match="monotonic constraints are not supported for multiclass classification",
+    ):
+        gbdt.fit(X, y)
+
+
+def test_input_error_related_to_feature_names():
+    pd = pytest.importorskip("pandas")
+    X = pd.DataFrame({"a": [0, 1, 2], "b": [0, 1, 2]})
+    y = np.array([0, 1, 0])
+
+    monotonic_cst = {"d": 1, "a": 1, "c": -1}
+    gbdt = HistGradientBoostingRegressor(monotonic_cst=monotonic_cst)
+    expected_msg = re.escape(
+        "monotonic_cst contains 2 unexpected feature names: ['c', 'd']."
+    )
+    with pytest.raises(ValueError, match=expected_msg):
+        gbdt.fit(X, y)
+
+    monotonic_cst = {k: 1 for k in "abcdefghijklmnopqrstuvwxyz"}
+    gbdt = HistGradientBoostingRegressor(monotonic_cst=monotonic_cst)
+    expected_msg = re.escape(
+        "monotonic_cst contains 24 unexpected feature names: "
+        "['c', 'd', 'e', 'f', 'g', '...']."
+    )
+    with pytest.raises(ValueError, match=expected_msg):
+        gbdt.fit(X, y)
+
+    monotonic_cst = {"a": 1}
+    gbdt = HistGradientBoostingRegressor(monotonic_cst=monotonic_cst)
+    expected_msg = re.escape(
+        "HistGradientBoostingRegressor was not fitted on data with feature "
+        "names. Pass monotonic_cst as an integer array instead."
+    )
+    with pytest.raises(ValueError, match=expected_msg):
+        gbdt.fit(X.values, y)
+
+    monotonic_cst = {"b": -1, "a": "+"}
+    gbdt = HistGradientBoostingRegressor(monotonic_cst=monotonic_cst)
+    expected_msg = re.escape("monotonic_cst['a'] must be either -1, 0 or 1. Got '+'.")
+    with pytest.raises(ValueError, match=expected_msg):
+        gbdt.fit(X, y)
+
+
+def test_bounded_value_min_gain_to_split():
+    # The purpose of this test is to show that when computing the gain at a
+    # given split, the value of the current node should be properly bounded to
+    # respect the monotonic constraints, because it strongly interacts with
+    # min_gain_to_split. We build a simple example where gradients are [1, 1,
+    # 100, 1, 1] (hessians are all ones). The best split happens on the 3rd
+    # bin, and depending on whether the value of the node is bounded or not,
+    # the min_gain_to_split constraint is or isn't satisfied.
+    l2_regularization = 0
+    min_hessian_to_split = 0
+    min_samples_leaf = 1
+    n_bins = n_samples = 5
+    X_binned = np.arange(n_samples).reshape(-1, 1).astype(X_BINNED_DTYPE)
+    sample_indices = np.arange(n_samples, dtype=np.uint32)
+    all_hessians = np.ones(n_samples, dtype=G_H_DTYPE)
+    all_gradients = np.array([1, 1, 100, 1, 1], dtype=G_H_DTYPE)
+    sum_gradients = all_gradients.sum()
+    sum_hessians = all_hessians.sum()
+    hessians_are_constant = False
+
+    builder = HistogramBuilder(
+        X_binned, n_bins, all_gradients, all_hessians, hessians_are_constant, n_threads
+    )
+    n_bins_non_missing = np.array([n_bins - 1] * X_binned.shape[1], dtype=np.uint32)
+    has_missing_values = np.array([False] * X_binned.shape[1], dtype=np.uint8)
+    monotonic_cst = np.array(
+        [MonotonicConstraint.NO_CST] * X_binned.shape[1], dtype=np.int8
+    )
+    is_categorical = np.zeros_like(monotonic_cst, dtype=np.uint8)
+    missing_values_bin_idx = n_bins - 1
+    children_lower_bound, children_upper_bound = -np.inf, np.inf
+
+    min_gain_to_split = 2000
+    splitter = Splitter(
+        X_binned,
+        n_bins_non_missing,
+        missing_values_bin_idx,
+        has_missing_values,
+        is_categorical,
+        monotonic_cst,
+        l2_regularization,
+        min_hessian_to_split,
+        min_samples_leaf,
+        min_gain_to_split,
+        hessians_are_constant,
+    )
+
+    histograms = builder.compute_histograms_brute(sample_indices)
+
+    # Since the gradient array is [1, 1, 100, 1, 1]
+    # the max possible gain happens on the 3rd bin (or equivalently in the 2nd)
+    # and is equal to about 1307, which less than min_gain_to_split = 2000, so
+    # the node is considered unsplittable (gain = -1)
+    current_lower_bound, current_upper_bound = -np.inf, np.inf
+    value = compute_node_value(
+        sum_gradients,
+        sum_hessians,
+        current_lower_bound,
+        current_upper_bound,
+        l2_regularization,
+    )
+    # the unbounded value is equal to -sum_gradients / sum_hessians
+    assert value == pytest.approx(-104 / 5)
+    split_info = splitter.find_node_split(
+        n_samples,
+        histograms,
+        sum_gradients,
+        sum_hessians,
+        value,
+        lower_bound=children_lower_bound,
+        upper_bound=children_upper_bound,
+    )
+    assert split_info.gain == -1  # min_gain_to_split not respected
+
+    # here again the max possible gain is on the 3rd bin but we now cap the
+    # value of the node into [-10, inf].
+    # This means the gain is now about 2430 which is more than the
+    # min_gain_to_split constraint.
+    current_lower_bound, current_upper_bound = -10, np.inf
+    value = compute_node_value(
+        sum_gradients,
+        sum_hessians,
+        current_lower_bound,
+        current_upper_bound,
+        l2_regularization,
+    )
+    assert value == -10
+    split_info = splitter.find_node_split(
+        n_samples,
+        histograms,
+        sum_gradients,
+        sum_hessians,
+        value,
+        lower_bound=children_lower_bound,
+        upper_bound=children_upper_bound,
+    )
+    assert split_info.gain > min_gain_to_split
--- a/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/tests/test_predictor.py
+++ b/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/tests/test_predictor.py
@@ -0,0 +1,187 @@
+import numpy as np
+import pytest
+from numpy.testing import assert_allclose
+
+from sklearn.datasets import make_regression
+from sklearn.ensemble._hist_gradient_boosting._bitset import (
+    set_bitset_memoryview,
+    set_raw_bitset_from_binned_bitset,
+)
+from sklearn.ensemble._hist_gradient_boosting.binning import _BinMapper
+from sklearn.ensemble._hist_gradient_boosting.common import (
+    ALMOST_INF,
+    G_H_DTYPE,
+    PREDICTOR_RECORD_DTYPE,
+    X_BINNED_DTYPE,
+    X_BITSET_INNER_DTYPE,
+    X_DTYPE,
+)
+from sklearn.ensemble._hist_gradient_boosting.grower import TreeGrower
+from sklearn.ensemble._hist_gradient_boosting.predictor import TreePredictor
+from sklearn.metrics import r2_score
+from sklearn.model_selection import train_test_split
+from sklearn.utils._openmp_helpers import _openmp_effective_n_threads
+
+n_threads = _openmp_effective_n_threads()
+
+
+@pytest.mark.parametrize("n_bins", [200, 256])
+def test_regression_dataset(n_bins):
+    X, y = make_regression(
+        n_samples=500, n_features=10, n_informative=5, random_state=42
+    )
+    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
+
+    mapper = _BinMapper(n_bins=n_bins, random_state=42)
+    X_train_binned = mapper.fit_transform(X_train)
+
+    # Init gradients and hessians to that of least squares loss
+    gradients = -y_train.astype(G_H_DTYPE)
+    hessians = np.ones(1, dtype=G_H_DTYPE)
+
+    min_samples_leaf = 10
+    max_leaf_nodes = 30
+    grower = TreeGrower(
+        X_train_binned,
+        gradients,
+        hessians,
+        min_samples_leaf=min_samples_leaf,
+        max_leaf_nodes=max_leaf_nodes,
+        n_bins=n_bins,
+        n_bins_non_missing=mapper.n_bins_non_missing_,
+    )
+    grower.grow()
+
+    predictor = grower.make_predictor(binning_thresholds=mapper.bin_thresholds_)
+
+    known_cat_bitsets = np.zeros((0, 8), dtype=X_BITSET_INNER_DTYPE)
+    f_idx_map = np.zeros(0, dtype=np.uint32)
+
+    y_pred_train = predictor.predict(X_train, known_cat_bitsets, f_idx_map, n_threads)
+    assert r2_score(y_train, y_pred_train) > 0.82
+
+    y_pred_test = predictor.predict(X_test, known_cat_bitsets, f_idx_map, n_threads)
+    assert r2_score(y_test, y_pred_test) > 0.67
+
+
+@pytest.mark.parametrize(
+    "num_threshold, expected_predictions",
+    [
+        (-np.inf, [0, 1, 1, 1]),
+        (10, [0, 0, 1, 1]),
+        (20, [0, 0, 0, 1]),
+        (ALMOST_INF, [0, 0, 0, 1]),
+        (np.inf, [0, 0, 0, 0]),
+    ],
+)
+def test_infinite_values_and_thresholds(num_threshold, expected_predictions):
+    # Make sure infinite values and infinite thresholds are handled properly.
+    # In particular, if a value is +inf and the threshold is ALMOST_INF the
+    # sample should go to the right child. If the threshold is inf (split on
+    # nan), the +inf sample will go to the left child.
+
+    X = np.array([-np.inf, 10, 20, np.inf]).reshape(-1, 1)
+    nodes = np.zeros(3, dtype=PREDICTOR_RECORD_DTYPE)
+
+    # We just construct a simple tree with 1 root and 2 children
+    # parent node
+    nodes[0]["left"] = 1
+    nodes[0]["right"] = 2
+    nodes[0]["feature_idx"] = 0
+    nodes[0]["num_threshold"] = num_threshold
+
+    # left child
+    nodes[1]["is_leaf"] = True
+    nodes[1]["value"] = 0
+
+    # right child
+    nodes[2]["is_leaf"] = True
+    nodes[2]["value"] = 1
+
+    binned_cat_bitsets = np.zeros((0, 8), dtype=X_BITSET_INNER_DTYPE)
+    raw_categorical_bitsets = np.zeros((0, 8), dtype=X_BITSET_INNER_DTYPE)
+    known_cat_bitset = np.zeros((0, 8), dtype=X_BITSET_INNER_DTYPE)
+    f_idx_map = np.zeros(0, dtype=np.uint32)
+
+    predictor = TreePredictor(nodes, binned_cat_bitsets, raw_categorical_bitsets)
+    predictions = predictor.predict(X, known_cat_bitset, f_idx_map, n_threads)
+
+    assert np.all(predictions == expected_predictions)
+
+
+@pytest.mark.parametrize(
+    "bins_go_left, expected_predictions",
+    [
+        ([0, 3, 4, 6], [1, 0, 0, 1, 1, 0]),
+        ([0, 1, 2, 6], [1, 1, 1, 0, 0, 0]),
+        ([3, 5, 6], [0, 0, 0, 1, 0, 1]),
+    ],
+)
+def test_categorical_predictor(bins_go_left, expected_predictions):
+    # Test predictor outputs are correct with categorical features
+
+    X_binned = np.array([[0, 1, 2, 3, 4, 5]], dtype=X_BINNED_DTYPE).T
+    categories = np.array([2, 5, 6, 8, 10, 15], dtype=X_DTYPE)
+
+    bins_go_left = np.array(bins_go_left, dtype=X_BINNED_DTYPE)
+
+    # We just construct a simple tree with 1 root and 2 children
+    # parent node
+    nodes = np.zeros(3, dtype=PREDICTOR_RECORD_DTYPE)
+    nodes[0]["left"] = 1
+    nodes[0]["right"] = 2
+    nodes[0]["feature_idx"] = 0
+    nodes[0]["is_categorical"] = True
+    nodes[0]["missing_go_to_left"] = True
+
+    # left child
+    nodes[1]["is_leaf"] = True
+    nodes[1]["value"] = 1
+
+    # right child
+    nodes[2]["is_leaf"] = True
+    nodes[2]["value"] = 0
+
+    binned_cat_bitsets = np.zeros((1, 8), dtype=X_BITSET_INNER_DTYPE)
+    raw_categorical_bitsets = np.zeros((1, 8), dtype=X_BITSET_INNER_DTYPE)
+    for go_left in bins_go_left:
+        set_bitset_memoryview(binned_cat_bitsets[0], go_left)
+
+    set_raw_bitset_from_binned_bitset(
+        raw_categorical_bitsets[0], binned_cat_bitsets[0], categories
+    )
+
+    predictor = TreePredictor(nodes, binned_cat_bitsets, raw_categorical_bitsets)
+
+    # Check binned data gives correct predictions
+    prediction_binned = predictor.predict_binned(
+        X_binned, missing_values_bin_idx=6, n_threads=n_threads
+    )
+    assert_allclose(prediction_binned, expected_predictions)
+
+    # manually construct bitset
+    known_cat_bitsets = np.zeros((1, 8), dtype=np.uint32)
+    known_cat_bitsets[0, 0] = np.sum(2**categories, dtype=np.uint32)
+    f_idx_map = np.array([0], dtype=np.uint32)
+
+    # Check with un-binned data
+    predictions = predictor.predict(
+        categories.reshape(-1, 1), known_cat_bitsets, f_idx_map, n_threads
+    )
+    assert_allclose(predictions, expected_predictions)
+
+    # Check missing goes left because missing_values_bin_idx=6
+    X_binned_missing = np.array([[6]], dtype=X_BINNED_DTYPE).T
+    predictions = predictor.predict_binned(
+        X_binned_missing, missing_values_bin_idx=6, n_threads=n_threads
+    )
+    assert_allclose(predictions, [1])
+
+    # missing and unknown go left
+    predictions = predictor.predict(
+        np.array([[np.nan, 17]], dtype=X_DTYPE).T,
+        known_cat_bitsets,
+        f_idx_map,
+        n_threads,
+    )
+    assert_allclose(predictions, [1, 1])
--- a/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/tests/test_splitting.py
+++ b/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/tests/test_splitting.py
--- a/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/tests/test_warm_start.py
+++ b/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/tests/test_warm_start.py
@@ -0,0 +1,231 @@
+import numpy as np
+import pytest
+from numpy.testing import assert_allclose, assert_array_equal
+
+from sklearn.base import clone
+from sklearn.datasets import make_classification, make_regression
+from sklearn.ensemble import (
+    HistGradientBoostingClassifier,
+    HistGradientBoostingRegressor,
+)
+from sklearn.metrics import check_scoring
+
+X_classification, y_classification = make_classification(random_state=0)
+X_regression, y_regression = make_regression(random_state=0)
+
+
+def _assert_predictor_equal(gb_1, gb_2, X):
+    """Assert that two HistGBM instances are identical."""
+    # Check identical nodes for each tree
+    for pred_ith_1, pred_ith_2 in zip(gb_1._predictors, gb_2._predictors):
+        for predictor_1, predictor_2 in zip(pred_ith_1, pred_ith_2):
+            assert_array_equal(predictor_1.nodes, predictor_2.nodes)
+
+    # Check identical predictions
+    assert_allclose(gb_1.predict(X), gb_2.predict(X))
+
+
+@pytest.mark.parametrize(
+    "GradientBoosting, X, y",
+    [
+        (HistGradientBoostingClassifier, X_classification, y_classification),
+        (HistGradientBoostingRegressor, X_regression, y_regression),
+    ],
+)
+def test_max_iter_with_warm_start_validation(GradientBoosting, X, y):
+    # Check that a ValueError is raised when the maximum number of iterations
+    # is smaller than the number of iterations from the previous fit when warm
+    # start is True.
+
+    estimator = GradientBoosting(max_iter=10, early_stopping=False, warm_start=True)
+    estimator.fit(X, y)
+    estimator.set_params(max_iter=5)
+    err_msg = (
+        "max_iter=5 must be larger than or equal to n_iter_=10 when warm_start==True"
+    )
+    with pytest.raises(ValueError, match=err_msg):
+        estimator.fit(X, y)
+
+
+@pytest.mark.parametrize(
+    "GradientBoosting, X, y",
+    [
+        (HistGradientBoostingClassifier, X_classification, y_classification),
+        (HistGradientBoostingRegressor, X_regression, y_regression),
+    ],
+)
+def test_warm_start_yields_identical_results(GradientBoosting, X, y):
+    # Make sure that fitting 50 iterations and then 25 with warm start is
+    # equivalent to fitting 75 iterations.
+
+    rng = 42
+    gb_warm_start = GradientBoosting(
+        n_iter_no_change=100, max_iter=50, random_state=rng, warm_start=True
+    )
+    gb_warm_start.fit(X, y).set_params(max_iter=75).fit(X, y)
+
+    gb_no_warm_start = GradientBoosting(
+        n_iter_no_change=100, max_iter=75, random_state=rng, warm_start=False
+    )
+    gb_no_warm_start.fit(X, y)
+
+    # Check that both predictors are equal
+    _assert_predictor_equal(gb_warm_start, gb_no_warm_start, X)
+
+
+@pytest.mark.parametrize(
+    "GradientBoosting, X, y",
+    [
+        (HistGradientBoostingClassifier, X_classification, y_classification),
+        (HistGradientBoostingRegressor, X_regression, y_regression),
+    ],
+)
+def test_warm_start_max_depth(GradientBoosting, X, y):
+    # Test if possible to fit trees of different depth in ensemble.
+    gb = GradientBoosting(
+        max_iter=20,
+        min_samples_leaf=1,
+        warm_start=True,
+        max_depth=2,
+        early_stopping=False,
+    )
+    gb.fit(X, y)
+    gb.set_params(max_iter=30, max_depth=3, n_iter_no_change=110)
+    gb.fit(X, y)
+
+    # First 20 trees have max_depth == 2
+    for i in range(20):
+        assert gb._predictors[i][0].get_max_depth() == 2
+    # Last 10 trees have max_depth == 3
+    for i in range(1, 11):
+        assert gb._predictors[-i][0].get_max_depth() == 3
+
+
+@pytest.mark.parametrize(
+    "GradientBoosting, X, y",
+    [
+        (HistGradientBoostingClassifier, X_classification, y_classification),
+        (HistGradientBoostingRegressor, X_regression, y_regression),
+    ],
+)
+@pytest.mark.parametrize("scoring", (None, "loss"))
+def test_warm_start_early_stopping(GradientBoosting, X, y, scoring):
+    # Make sure that early stopping occurs after a small number of iterations
+    # when fitting a second time with warm starting.
+
+    n_iter_no_change = 5
+    gb = GradientBoosting(
+        n_iter_no_change=n_iter_no_change,
+        max_iter=10000,
+        early_stopping=True,
+        random_state=42,
+        warm_start=True,
+        tol=1e-3,
+        scoring=scoring,
+    )
+    gb.fit(X, y)
+    n_iter_first_fit = gb.n_iter_
+    gb.fit(X, y)
+    n_iter_second_fit = gb.n_iter_
+    assert 0 < n_iter_second_fit - n_iter_first_fit < n_iter_no_change
+
+
+@pytest.mark.parametrize(
+    "GradientBoosting, X, y",
+    [
+        (HistGradientBoostingClassifier, X_classification, y_classification),
+        (HistGradientBoostingRegressor, X_regression, y_regression),
+    ],
+)
+def test_warm_start_equal_n_estimators(GradientBoosting, X, y):
+    # Test if warm start with equal n_estimators does nothing
+    gb_1 = GradientBoosting(max_depth=2, early_stopping=False)
+    gb_1.fit(X, y)
+
+    gb_2 = clone(gb_1)
+    gb_2.set_params(max_iter=gb_1.max_iter, warm_start=True, n_iter_no_change=5)
+    gb_2.fit(X, y)
+
+    # Check that both predictors are equal
+    _assert_predictor_equal(gb_1, gb_2, X)
+
+
+@pytest.mark.parametrize(
+    "GradientBoosting, X, y",
+    [
+        (HistGradientBoostingClassifier, X_classification, y_classification),
+        (HistGradientBoostingRegressor, X_regression, y_regression),
+    ],
+)
+def test_warm_start_clear(GradientBoosting, X, y):
+    # Test if fit clears state.
+    gb_1 = GradientBoosting(n_iter_no_change=5, random_state=42)
+    gb_1.fit(X, y)
+
+    gb_2 = GradientBoosting(n_iter_no_change=5, random_state=42, warm_start=True)
+    gb_2.fit(X, y)  # inits state
+    gb_2.set_params(warm_start=False)
+    gb_2.fit(X, y)  # clears old state and equals est
+
+    # Check that both predictors have the same train_score_ and
+    # validation_score_ attributes
+    assert_allclose(gb_1.train_score_, gb_2.train_score_)
+    assert_allclose(gb_1.validation_score_, gb_2.validation_score_)
+
+    # Check that both predictors are equal
+    _assert_predictor_equal(gb_1, gb_2, X)
+
+
+@pytest.mark.parametrize(
+    "GradientBoosting, X, y",
+    [
+        (HistGradientBoostingClassifier, X_classification, y_classification),
+        (HistGradientBoostingRegressor, X_regression, y_regression),
+    ],
+)
+@pytest.mark.parametrize("rng_type", ("none", "int", "instance"))
+def test_random_seeds_warm_start(GradientBoosting, X, y, rng_type):
+    # Make sure the seeds for train/val split and small trainset subsampling
+    # are correctly set in a warm start context.
+    def _get_rng(rng_type):
+        # Helper to avoid consuming rngs
+        if rng_type == "none":
+            return None
+        elif rng_type == "int":
+            return 42
+        else:
+            return np.random.RandomState(0)
+
+    random_state = _get_rng(rng_type)
+    gb_1 = GradientBoosting(early_stopping=True, max_iter=2, random_state=random_state)
+    gb_1.set_params(scoring=check_scoring(gb_1))
+    gb_1.fit(X, y)
+    random_seed_1_1 = gb_1._random_seed
+
+    gb_1.fit(X, y)
+    random_seed_1_2 = gb_1._random_seed  # clear the old state, different seed
+
+    random_state = _get_rng(rng_type)
+    gb_2 = GradientBoosting(
+        early_stopping=True, max_iter=2, random_state=random_state, warm_start=True
+    )
+    gb_2.set_params(scoring=check_scoring(gb_2))
+    gb_2.fit(X, y)  # inits state
+    random_seed_2_1 = gb_2._random_seed
+    gb_2.fit(X, y)  # clears old state and equals est
+    random_seed_2_2 = gb_2._random_seed
+
+    # Without warm starting, the seeds should be
+    # * all different if random state is None
+    # * all equal if random state is an integer
+    # * different when refitting and equal with a new estimator (because
+    #   the random state is mutated)
+    if rng_type == "none":
+        assert random_seed_1_1 != random_seed_1_2 != random_seed_2_1
+    elif rng_type == "int":
+        assert random_seed_1_1 == random_seed_1_2 == random_seed_2_1
+    else:
+        assert random_seed_1_1 == random_seed_2_1 != random_seed_1_2
+
+    # With warm starting, the seeds must be equal
+    assert random_seed_2_1 == random_seed_2_2
--- a/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/utils.py
+++ b/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_hist_gradient_boosting/utils.py
@@ -0,0 +1,149 @@
+"""This module contains utility routines."""
+
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
+from sklearn.base import is_classifier
+from sklearn.ensemble._hist_gradient_boosting.binning import _BinMapper
+
+
+def get_equivalent_estimator(estimator, lib="lightgbm", n_classes=None):
+    """Return an unfitted estimator from another lib with matching hyperparams.
+
+    This utility function takes care of renaming the sklearn parameters into
+    their LightGBM, XGBoost or CatBoost equivalent parameters.
+
+    # unmapped XGB parameters:
+    # - min_samples_leaf
+    # - min_data_in_bin
+    # - min_split_gain (there is min_split_loss though?)
+
+    # unmapped Catboost parameters:
+    # max_leaves
+    # min_*
+    """
+
+    if lib not in ("lightgbm", "xgboost", "catboost"):
+        raise ValueError(
+            "accepted libs are lightgbm, xgboost, and catboost.  got {}".format(lib)
+        )
+
+    sklearn_params = estimator.get_params()
+
+    if sklearn_params["loss"] == "auto":
+        raise ValueError(
+            "auto loss is not accepted. We need to know if "
+            "the problem is binary or multiclass classification."
+        )
+    if sklearn_params["early_stopping"]:
+        raise NotImplementedError("Early stopping should be deactivated.")
+
+    lightgbm_loss_mapping = {
+        "squared_error": "regression_l2",
+        "absolute_error": "regression_l1",
+        "log_loss": "binary" if n_classes == 2 else "multiclass",
+        "gamma": "gamma",
+        "poisson": "poisson",
+    }
+
+    lightgbm_params = {
+        "objective": lightgbm_loss_mapping[sklearn_params["loss"]],
+        "learning_rate": sklearn_params["learning_rate"],
+        "n_estimators": sklearn_params["max_iter"],
+        "num_leaves": sklearn_params["max_leaf_nodes"],
+        "max_depth": sklearn_params["max_depth"],
+        "min_data_in_leaf": sklearn_params["min_samples_leaf"],
+        "reg_lambda": sklearn_params["l2_regularization"],
+        "max_bin": sklearn_params["max_bins"],
+        "min_data_in_bin": 1,
+        "min_sum_hessian_in_leaf": 1e-3,
+        "min_split_gain": 0,
+        "verbosity": 10 if sklearn_params["verbose"] else -10,
+        "boost_from_average": True,
+        "enable_bundle": False,  # also makes feature order consistent
+        "subsample_for_bin": _BinMapper().subsample,
+        "poisson_max_delta_step": 1e-12,
+        "feature_fraction_bynode": sklearn_params["max_features"],
+    }
+
+    if sklearn_params["loss"] == "log_loss" and n_classes > 2:
+        # LightGBM multiplies hessians by 2 in multiclass loss.
+        lightgbm_params["min_sum_hessian_in_leaf"] *= 2
+        # LightGBM 3.0 introduced a different scaling of the hessian for the multiclass
+        # case.
+        # It is equivalent of scaling the learning rate.
+        # See https://github.com/microsoft/LightGBM/pull/3256.
+        if n_classes is not None:
+            lightgbm_params["learning_rate"] *= n_classes / (n_classes - 1)
+
+    # XGB
+    xgboost_loss_mapping = {
+        "squared_error": "reg:linear",
+        "absolute_error": "LEAST_ABSOLUTE_DEV_NOT_SUPPORTED",
+        "log_loss": "reg:logistic" if n_classes == 2 else "multi:softmax",
+        "gamma": "reg:gamma",
+        "poisson": "count:poisson",
+    }
+
+    xgboost_params = {
+        "tree_method": "hist",
+        "grow_policy": "lossguide",  # so that we can set max_leaves
+        "objective": xgboost_loss_mapping[sklearn_params["loss"]],
+        "learning_rate": sklearn_params["learning_rate"],
+        "n_estimators": sklearn_params["max_iter"],
+        "max_leaves": sklearn_params["max_leaf_nodes"],
+        "max_depth": sklearn_params["max_depth"] or 0,
+        "lambda": sklearn_params["l2_regularization"],
+        "max_bin": sklearn_params["max_bins"],
+        "min_child_weight": 1e-3,
+        "verbosity": 2 if sklearn_params["verbose"] else 0,
+        "silent": sklearn_params["verbose"] == 0,
+        "n_jobs": -1,
+        "colsample_bynode": sklearn_params["max_features"],
+    }
+
+    # Catboost
+    catboost_loss_mapping = {
+        "squared_error": "RMSE",
+        # catboost does not support MAE when leaf_estimation_method is Newton
+        "absolute_error": "LEAST_ASBOLUTE_DEV_NOT_SUPPORTED",
+        "log_loss": "Logloss" if n_classes == 2 else "MultiClass",
+        "gamma": None,
+        "poisson": "Poisson",
+    }
+
+    catboost_params = {
+        "loss_function": catboost_loss_mapping[sklearn_params["loss"]],
+        "learning_rate": sklearn_params["learning_rate"],
+        "iterations": sklearn_params["max_iter"],
+        "depth": sklearn_params["max_depth"],
+        "reg_lambda": sklearn_params["l2_regularization"],
+        "max_bin": sklearn_params["max_bins"],
+        "feature_border_type": "Median",
+        "leaf_estimation_method": "Newton",
+        "verbose": bool(sklearn_params["verbose"]),
+    }
+
+    if lib == "lightgbm":
+        from lightgbm import LGBMClassifier, LGBMRegressor
+
+        if is_classifier(estimator):
+            return LGBMClassifier(**lightgbm_params)
+        else:
+            return LGBMRegressor(**lightgbm_params)
+
+    elif lib == "xgboost":
+        from xgboost import XGBClassifier, XGBRegressor
+
+        if is_classifier(estimator):
+            return XGBClassifier(**xgboost_params)
+        else:
+            return XGBRegressor(**xgboost_params)
+
+    else:
+        from catboost import CatBoostClassifier, CatBoostRegressor
+
+        if is_classifier(estimator):
+            return CatBoostClassifier(**catboost_params)
+        else:
+            return CatBoostRegressor(**catboost_params)
--- a/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_iforest.py
+++ b/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_iforest.py
@@ -0,0 +1,681 @@
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
+import numbers
+import threading
+from numbers import Integral, Real
+from warnings import warn
+
+import numpy as np
+from scipy.sparse import issparse
+
+from sklearn.base import OutlierMixin, _fit_context
+from sklearn.ensemble._bagging import BaseBagging
+from sklearn.tree import ExtraTreeRegressor
+from sklearn.tree._tree import DTYPE as tree_dtype
+from sklearn.utils import check_array, check_random_state, gen_batches
+from sklearn.utils._chunking import get_chunk_n_rows
+from sklearn.utils._param_validation import Interval, RealNotInt, StrOptions
+from sklearn.utils.parallel import Parallel, delayed
+from sklearn.utils.validation import (
+    _check_sample_weight,
+    _num_samples,
+    check_is_fitted,
+    validate_data,
+)
+
+__all__ = ["IsolationForest"]
+
+
+def _parallel_compute_tree_depths(
+    tree,
+    X,
+    features,
+    tree_decision_path_lengths,
+    tree_avg_path_lengths,
+    depths,
+    lock,
+):
+    """Parallel computation of isolation tree depth."""
+    if features is None:
+        X_subset = X
+    else:
+        X_subset = X[:, features]
+
+    leaves_index = tree.apply(X_subset, check_input=False)
+
+    with lock:
+        depths += (
+            tree_decision_path_lengths[leaves_index]
+            + tree_avg_path_lengths[leaves_index]
+            - 1.0
+        )
+
+
+class IsolationForest(OutlierMixin, BaseBagging):
+    """
+    Isolation Forest Algorithm.
+
+    Return the anomaly score of each sample using the IsolationForest algorithm
+
+    The IsolationForest 'isolates' observations by randomly selecting a feature
+    and then randomly selecting a split value between the maximum and minimum
+    values of the selected feature.
+
+    Since recursive partitioning can be represented by a tree structure, the
+    number of splittings required to isolate a sample is equivalent to the path
+    length from the root node to the terminating node.
+
+    This path length, averaged over a forest of such random trees, is a
+    measure of normality and our decision function.
+
+    Random partitioning produces noticeably shorter paths for anomalies.
+    Hence, when a forest of random trees collectively produce shorter path
+    lengths for particular samples, they are highly likely to be anomalies.
+
+    Read more in the :ref:`User Guide <isolation_forest>`.
+
+    .. versionadded:: 0.18
+
+    Parameters
+    ----------
+    n_estimators : int, default=100
+        The number of base estimators in the ensemble.
+
+    max_samples : "auto", int or float, default="auto"
+        The number of samples to draw from X to train each base estimator.
+
+        - If int, then draw `max_samples` samples.
+        - If float, then draw `max_samples * X.shape[0]` samples.
+        - If "auto", then `max_samples=min(256, n_samples)`.
+
+        If max_samples is larger than the number of samples provided,
+        all samples will be used for all trees (no sampling).
+
+    contamination : 'auto' or float, default='auto'
+        The amount of contamination of the data set, i.e. the proportion
+        of outliers in the data set. Used when fitting to define the threshold
+        on the scores of the samples.
+
+        - If 'auto', the threshold is determined as in the
+          original paper.
+        - If float, the contamination should be in the range (0, 0.5].
+
+        .. versionchanged:: 0.22
+           The default value of ``contamination`` changed from 0.1
+           to ``'auto'``.
+
+    max_features : int or float, default=1.0
+        The number of features to draw from X to train each base estimator.
+
+        - If int, then draw `max_features` features.
+        - If float, then draw `max(1, int(max_features * n_features_in_))` features.
+
+        Note: using a float number less than 1.0 or integer less than number of
+        features will enable feature subsampling and leads to a longer runtime.
+
+    bootstrap : bool, default=False
+        If True, individual trees are fit on random subsets of the training
+        data sampled with replacement. If False, sampling without replacement
+        is performed.
+
+    n_jobs : int, default=None
+        The number of jobs to run in parallel for :meth:`fit`. ``None`` means 1
+        unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using
+        all processors. See :term:`Glossary <n_jobs>` for more details.
+
+    random_state : int, RandomState instance or None, default=None
+        Controls the pseudo-randomness of the selection of the feature
+        and split values for each branching step and each tree in the forest.
+
+        Pass an int for reproducible results across multiple function calls.
+        See :term:`Glossary <random_state>`.
+
+    verbose : int, default=0
+        Controls the verbosity of the tree building process.
+
+    warm_start : bool, default=False
+        When set to ``True``, reuse the solution of the previous call to fit
+        and add more estimators to the ensemble, otherwise, just fit a whole
+        new forest. See :term:`the Glossary <warm_start>`.
+
+        .. versionadded:: 0.21
+
+    Attributes
+    ----------
+    estimator_ : :class:`~sklearn.tree.ExtraTreeRegressor` instance
+        The child estimator template used to create the collection of
+        fitted sub-estimators.
+
+        .. versionadded:: 1.2
+           `base_estimator_` was renamed to `estimator_`.
+
+    estimators_ : list of ExtraTreeRegressor instances
+        The collection of fitted sub-estimators.
+
+    estimators_features_ : list of ndarray
+        The subset of drawn features for each base estimator.
+
+    estimators_samples_ : list of ndarray
+        The subset of drawn samples (i.e., the in-bag samples) for each base
+        estimator.
+
+    max_samples_ : int
+        The actual number of samples.
+
+    offset_ : float
+        Offset used to define the decision function from the raw scores. We
+        have the relation: ``decision_function = score_samples - offset_``.
+        ``offset_`` is defined as follows. When the contamination parameter is
+        set to "auto", the offset is equal to -0.5 as the scores of inliers are
+        close to 0 and the scores of outliers are close to -1. When a
+        contamination parameter different than "auto" is provided, the offset
+        is defined in such a way we obtain the expected number of outliers
+        (samples with decision function < 0) in training.
+
+        .. versionadded:: 0.20
+
+    n_features_in_ : int
+        Number of features seen during :term:`fit`.
+
+        .. versionadded:: 0.24
+
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+        Names of features seen during :term:`fit`. Defined only when `X`
+        has feature names that are all strings.
+
+        .. versionadded:: 1.0
+
+    See Also
+    --------
+    sklearn.covariance.EllipticEnvelope : An object for detecting outliers in a
+        Gaussian distributed dataset.
+    sklearn.svm.OneClassSVM : Unsupervised Outlier Detection.
+        Estimate the support of a high-dimensional distribution.
+        The implementation is based on libsvm.
+    sklearn.neighbors.LocalOutlierFactor : Unsupervised Outlier Detection
+        using Local Outlier Factor (LOF).
+
+    Notes
+    -----
+    The implementation is based on an ensemble of ExtraTreeRegressor. The
+    maximum depth of each tree is set to ``ceil(log_2(n))`` where
+    :math:`n` is the number of samples used to build the tree
+    (see [1]_ for more details).
+
+    References
+    ----------
+    .. [1] F. T. Liu, K. M. Ting and Z. -H. Zhou.
+           :doi:`"Isolation forest." <10.1109/ICDM.2008.17>`
+           2008 Eighth IEEE International Conference on Data Mining (ICDM),
+           2008, pp. 413-422.
+    .. [2] F. T. Liu, K. M. Ting and Z. -H. Zhou.
+           :doi:`"Isolation-based anomaly detection."
+           <10.1145/2133360.2133363>` ACM Transactions on
+           Knowledge Discovery from Data (TKDD) 6.1 (2012): 1-39.
+
+    Examples
+    --------
+    >>> from sklearn.ensemble import IsolationForest
+    >>> X = [[-1.1], [0.3], [0.5], [100]]
+    >>> clf = IsolationForest(random_state=0).fit(X)
+    >>> clf.predict([[0.1], [0], [90]])
+    array([ 1,  1, -1])
+
+    For an example of using isolation forest for anomaly detection see
+    :ref:`sphx_glr_auto_examples_ensemble_plot_isolation_forest.py`.
+    """
+
+    _parameter_constraints: dict = {
+        "n_estimators": [Interval(Integral, 1, None, closed="left")],
+        "max_samples": [
+            StrOptions({"auto"}),
+            Interval(Integral, 1, None, closed="left"),
+            Interval(RealNotInt, 0, 1, closed="right"),
+        ],
+        "contamination": [
+            StrOptions({"auto"}),
+            Interval(Real, 0, 0.5, closed="right"),
+        ],
+        "max_features": [
+            Integral,
+            Interval(Real, 0, 1, closed="right"),
+        ],
+        "bootstrap": ["boolean"],
+        "n_jobs": [Integral, None],
+        "random_state": ["random_state"],
+        "verbose": ["verbose"],
+        "warm_start": ["boolean"],
+    }
+
+    def __init__(
+        self,
+        *,
+        n_estimators=100,
+        max_samples="auto",
+        contamination="auto",
+        max_features=1.0,
+        bootstrap=False,
+        n_jobs=None,
+        random_state=None,
+        verbose=0,
+        warm_start=False,
+    ):
+        super().__init__(
+            estimator=None,
+            # here above max_features has no links with self.max_features
+            bootstrap=bootstrap,
+            bootstrap_features=False,
+            n_estimators=n_estimators,
+            max_samples=max_samples,
+            max_features=max_features,
+            warm_start=warm_start,
+            n_jobs=n_jobs,
+            random_state=random_state,
+            verbose=verbose,
+        )
+
+        self.contamination = contamination
+
+    def _get_estimator(self):
+        return ExtraTreeRegressor(
+            # here max_features has no links with self.max_features
+            max_features=1,
+            splitter="random",
+            random_state=self.random_state,
+        )
+
+    def _set_oob_score(self, X, y):
+        raise NotImplementedError("OOB score not supported by iforest")
+
+    def _parallel_args(self):
+        # ExtraTreeRegressor releases the GIL, so it's more efficient to use
+        # a thread-based backend rather than a process-based backend so as
+        # to avoid suffering from communication overhead and extra memory
+        # copies. This is only used in the fit method.
+        return {"prefer": "threads"}
+
+    @_fit_context(prefer_skip_nested_validation=True)
+    def fit(self, X, y=None, sample_weight=None):
+        """
+        Fit estimator.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
+            The input samples. Use ``dtype=np.float32`` for maximum
+            efficiency. Sparse matrices are also supported, use sparse
+            ``csc_matrix`` for maximum efficiency.
+
+        y : Ignored
+            Not used, present for API consistency by convention.
+
+        sample_weight : array-like of shape (n_samples,), default=None
+            Sample weights. If None, then samples are equally weighted.
+
+        Returns
+        -------
+        self : object
+            Fitted estimator.
+        """
+        X = validate_data(
+            self, X, accept_sparse=["csc"], dtype=tree_dtype, ensure_all_finite=False
+        )
+
+        if sample_weight is not None:
+            sample_weight = _check_sample_weight(sample_weight, X, dtype=None)
+
+        if issparse(X):
+            # Pre-sort indices to avoid that each individual tree of the
+            # ensemble sorts the indices.
+            X.sort_indices()
+
+        rnd = check_random_state(self.random_state)
+        y = rnd.uniform(size=X.shape[0])
+
+        # ensure that max_sample is in [1, n_samples]:
+        n_samples = X.shape[0]
+
+        if isinstance(self.max_samples, str) and self.max_samples == "auto":
+            max_samples = min(256, n_samples)
+
+        elif isinstance(self.max_samples, numbers.Integral):
+            if self.max_samples > n_samples:
+                warn(
+                    "max_samples (%s) is greater than the "
+                    "total number of samples (%s). max_samples "
+                    "will be set to n_samples for estimation."
+                    % (self.max_samples, n_samples)
+                )
+                max_samples = n_samples
+            else:
+                max_samples = self.max_samples
+        else:  # max_samples is float
+            max_samples = int(self.max_samples * X.shape[0])
+
+        self.max_samples_ = max_samples
+        max_depth = int(np.ceil(np.log2(max(max_samples, 2))))
+        super()._fit(
+            X,
+            y,
+            max_samples=max_samples,
+            max_depth=max_depth,
+            sample_weight=sample_weight,
+            check_input=False,
+        )
+
+        self._average_path_length_per_tree, self._decision_path_lengths = zip(
+            *[
+                (
+                    _average_path_length(tree.tree_.n_node_samples),
+                    tree.tree_.compute_node_depths(),
+                )
+                for tree in self.estimators_
+            ]
+        )
+
+        if self.contamination == "auto":
+            # 0.5 plays a special role as described in the original paper.
+            # we take the opposite as we consider the opposite of their score.
+            self.offset_ = -0.5
+            return self
+
+        # Else, define offset_ wrt contamination parameter
+        # To avoid performing input validation a second time we call
+        # _score_samples rather than score_samples.
+        # _score_samples expects a CSR matrix, so we convert if necessary.
+        if issparse(X):
+            X = X.tocsr()
+        self.offset_ = np.percentile(self._score_samples(X), 100.0 * self.contamination)
+
+        return self
+
+    def predict(self, X):
+        """
+        Predict if a particular sample is an outlier or not.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
+            The input samples. Internally, it will be converted to
+            ``dtype=np.float32`` and if a sparse matrix is provided
+            to a sparse ``csr_matrix``.
+
+        Returns
+        -------
+        is_inlier : ndarray of shape (n_samples,)
+            For each observation, tells whether or not (+1 or -1) it should
+            be considered as an inlier according to the fitted model.
+
+        Notes
+        -----
+        The predict method can be parallelized by setting a joblib context. This
+        inherently does NOT use the ``n_jobs`` parameter initialized in the class,
+        which is used during ``fit``. This is because, predict may actually be faster
+        without parallelization for a small number of samples,
+        such as for 1000 samples or less. The user can set the
+        number of jobs in the joblib context to control the number of parallel jobs.
+
+        .. code-block:: python
+
+            from joblib import parallel_backend
+
+            # Note, we use threading here as the predict method is not CPU bound.
+            with parallel_backend("threading", n_jobs=4):
+                model.predict(X)
+        """
+        check_is_fitted(self)
+        decision_func = self.decision_function(X)
+        is_inlier = np.ones_like(decision_func, dtype=int)
+        is_inlier[decision_func < 0] = -1
+        return is_inlier
+
+    def decision_function(self, X):
+        """
+        Average anomaly score of X of the base classifiers.
+
+        The anomaly score of an input sample is computed as
+        the mean anomaly score of the trees in the forest.
+
+        The measure of normality of an observation given a tree is the depth
+        of the leaf containing this observation, which is equivalent to
+        the number of splittings required to isolate this point. In case of
+        several observations n_left in the leaf, the average path length of
+        an n_left samples isolation tree is added.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
+            The input samples. Internally, it will be converted to
+            ``dtype=np.float32`` and if a sparse matrix is provided
+            to a sparse ``csr_matrix``.
+
+        Returns
+        -------
+        scores : ndarray of shape (n_samples,)
+            The anomaly score of the input samples.
+            The lower, the more abnormal. Negative scores represent outliers,
+            positive scores represent inliers.
+
+        Notes
+        -----
+        The decision_function method can be parallelized by setting a joblib context.
+        This inherently does NOT use the ``n_jobs`` parameter initialized in the class,
+        which is used during ``fit``. This is because, calculating the score may
+        actually be faster without parallelization for a small number of samples,
+        such as for 1000 samples or less.
+        The user can set the number of jobs in the joblib context to control the
+        number of parallel jobs.
+
+        .. code-block:: python
+
+            from joblib import parallel_backend
+
+            # Note, we use threading here as the decision_function method is
+            # not CPU bound.
+            with parallel_backend("threading", n_jobs=4):
+                model.decision_function(X)
+        """
+        # We subtract self.offset_ to make 0 be the threshold value for being
+        # an outlier:
+
+        return self.score_samples(X) - self.offset_
+
+    def score_samples(self, X):
+        """
+        Opposite of the anomaly score defined in the original paper.
+
+        The anomaly score of an input sample is computed as
+        the mean anomaly score of the trees in the forest.
+
+        The measure of normality of an observation given a tree is the depth
+        of the leaf containing this observation, which is equivalent to
+        the number of splittings required to isolate this point. In case of
+        several observations n_left in the leaf, the average path length of
+        an n_left samples isolation tree is added.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
+            The input samples.
+
+        Returns
+        -------
+        scores : ndarray of shape (n_samples,)
+            The anomaly score of the input samples.
+            The lower, the more abnormal.
+
+        Notes
+        -----
+        The score function method can be parallelized by setting a joblib context. This
+        inherently does NOT use the ``n_jobs`` parameter initialized in the class,
+        which is used during ``fit``. This is because, calculating the score may
+        actually be faster without parallelization for a small number of samples,
+        such as for 1000 samples or less.
+        The user can set the number of jobs in the joblib context to control the
+        number of parallel jobs.
+
+        .. code-block:: python
+
+            from joblib import parallel_backend
+
+            # Note, we use threading here as the score_samples method is not CPU bound.
+            with parallel_backend("threading", n_jobs=4):
+                model.score(X)
+        """
+        # Check data
+        X = validate_data(
+            self,
+            X,
+            accept_sparse="csr",
+            dtype=tree_dtype,
+            reset=False,
+            ensure_all_finite=False,
+        )
+
+        return self._score_samples(X)
+
+    def _score_samples(self, X):
+        """Private version of score_samples without input validation.
+
+        Input validation would remove feature names, so we disable it.
+        """
+        # Code structure from ForestClassifier/predict_proba
+
+        check_is_fitted(self)
+
+        # Take the opposite of the scores as bigger is better (here less abnormal)
+        return -self._compute_chunked_score_samples(X)
+
+    def _compute_chunked_score_samples(self, X):
+        n_samples = _num_samples(X)
+
+        if self._max_features == X.shape[1]:
+            subsample_features = False
+        else:
+            subsample_features = True
+
+        # We get as many rows as possible within our working_memory budget
+        # (defined by sklearn.get_config()['working_memory']) to store
+        # self._max_features in each row during computation.
+        #
+        # Note:
+        #  - this will get at least 1 row, even if 1 row of score will
+        #    exceed working_memory.
+        #  - this does only account for temporary memory usage while loading
+        #    the data needed to compute the scores -- the returned scores
+        #    themselves are 1D.
+
+        chunk_n_rows = get_chunk_n_rows(
+            row_bytes=16 * self._max_features, max_n_rows=n_samples
+        )
+        slices = gen_batches(n_samples, chunk_n_rows)
+
+        scores = np.zeros(n_samples, order="f")
+
+        for sl in slices:
+            # compute score on the slices of test samples:
+            scores[sl] = self._compute_score_samples(X[sl], subsample_features)
+
+        return scores
+
+    def _compute_score_samples(self, X, subsample_features):
+        """
+        Compute the score of each samples in X going through the extra trees.
+
+        Parameters
+        ----------
+        X : array-like or sparse matrix
+            Data matrix.
+
+        subsample_features : bool
+            Whether features should be subsampled.
+
+        Returns
+        -------
+        scores : ndarray of shape (n_samples,)
+            The score of each sample in X.
+        """
+        n_samples = X.shape[0]
+
+        depths = np.zeros(n_samples, order="f")
+
+        average_path_length_max_samples = _average_path_length([self._max_samples])
+
+        # Note: we use default n_jobs value, i.e. sequential computation, which
+        # we expect to be more performant that parallelizing for small number
+        # of samples, e.g. < 1k samples. Default n_jobs value can be overridden
+        # by using joblib.parallel_backend context manager around
+        # ._compute_score_samples. Using a higher n_jobs may speed up the
+        # computation of the scores, e.g. for > 1k samples. See
+        # https://github.com/scikit-learn/scikit-learn/pull/28622 for more
+        # details.
+        lock = threading.Lock()
+        Parallel(
+            verbose=self.verbose,
+            require="sharedmem",
+        )(
+            delayed(_parallel_compute_tree_depths)(
+                tree,
+                X,
+                features if subsample_features else None,
+                self._decision_path_lengths[tree_idx],
+                self._average_path_length_per_tree[tree_idx],
+                depths,
+                lock,
+            )
+            for tree_idx, (tree, features) in enumerate(
+                zip(self.estimators_, self.estimators_features_)
+            )
+        )
+
+        denominator = len(self.estimators_) * average_path_length_max_samples
+        scores = 2 ** (
+            # For a single training sample, denominator and depth are 0.
+            # Therefore, we set the score manually to 1.
+            -np.divide(
+                depths, denominator, out=np.ones_like(depths), where=denominator != 0
+            )
+        )
+        return scores
+
+    def __sklearn_tags__(self):
+        tags = super().__sklearn_tags__()
+        tags.input_tags.allow_nan = True
+        return tags
+
+
+def _average_path_length(n_samples_leaf):
+    """
+    The average path length in an n_samples iTree, which is equal to
+    the average path length of an unsuccessful BST search since the
+    latter has the same structure as an isolation tree.
+    Parameters
+    ----------
+    n_samples_leaf : array-like of shape (n_samples,)
+        The number of training samples in each test sample leaf, for
+        each estimators.
+
+    Returns
+    -------
+    average_path_length : ndarray of shape (n_samples,)
+    """
+
+    n_samples_leaf = check_array(n_samples_leaf, ensure_2d=False)
+
+    n_samples_leaf_shape = n_samples_leaf.shape
+    n_samples_leaf = n_samples_leaf.reshape((1, -1))
+    average_path_length = np.zeros(n_samples_leaf.shape)
+
+    mask_1 = n_samples_leaf <= 1
+    mask_2 = n_samples_leaf == 2
+    not_mask = ~np.logical_or(mask_1, mask_2)
+
+    average_path_length[mask_1] = 0.0
+    average_path_length[mask_2] = 1.0
+    average_path_length[not_mask] = (
+        2.0 * (np.log(n_samples_leaf[not_mask] - 1.0) + np.euler_gamma)
+        - 2.0 * (n_samples_leaf[not_mask] - 1.0) / n_samples_leaf[not_mask]
+    )
+
+    return average_path_length.reshape(n_samples_leaf_shape)
--- a/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_stacking.py
+++ b/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_stacking.py
--- a/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_voting.py
+++ b/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_voting.py
@@ -0,0 +1,734 @@
+"""
+Soft Voting/Majority Rule classifier and Voting regressor.
+
+This module contains:
+ - A Soft Voting/Majority Rule classifier for classification estimators.
+ - A Voting regressor for regression estimators.
+"""
+
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
+from abc import abstractmethod
+from numbers import Integral
+
+import numpy as np
+
+from sklearn.base import (
+    ClassifierMixin,
+    RegressorMixin,
+    TransformerMixin,
+    _fit_context,
+    clone,
+)
+from sklearn.ensemble._base import _BaseHeterogeneousEnsemble, _fit_single_estimator
+from sklearn.exceptions import NotFittedError
+from sklearn.preprocessing import LabelEncoder
+from sklearn.utils import Bunch
+from sklearn.utils._param_validation import StrOptions
+from sklearn.utils._repr_html.estimator import _VisualBlock
+from sklearn.utils.metadata_routing import (
+    MetadataRouter,
+    MethodMapping,
+    _raise_for_params,
+    _routing_enabled,
+    process_routing,
+)
+from sklearn.utils.metaestimators import available_if
+from sklearn.utils.multiclass import type_of_target
+from sklearn.utils.parallel import Parallel, delayed
+from sklearn.utils.validation import (
+    _check_feature_names_in,
+    check_is_fitted,
+    column_or_1d,
+)
+
+
+class _BaseVoting(TransformerMixin, _BaseHeterogeneousEnsemble):
+    """Base class for voting.
+
+    Warning: This class should not be used directly. Use derived classes
+    instead.
+    """
+
+    _parameter_constraints: dict = {
+        "estimators": [list],
+        "weights": ["array-like", None],
+        "n_jobs": [None, Integral],
+        "verbose": ["verbose"],
+    }
+
+    def _log_message(self, name, idx, total):
+        if not self.verbose:
+            return None
+        return f"({idx} of {total}) Processing {name}"
+
+    @property
+    def _weights_not_none(self):
+        """Get the weights of not `None` estimators."""
+        if self.weights is None:
+            return None
+        return [w for est, w in zip(self.estimators, self.weights) if est[1] != "drop"]
+
+    def _predict(self, X):
+        """Collect results from clf.predict calls."""
+        return np.asarray([est.predict(X) for est in self.estimators_]).T
+
+    @abstractmethod
+    def fit(self, X, y, **fit_params):
+        """Get common fit operations."""
+        names, clfs = self._validate_estimators()
+
+        if self.weights is not None and len(self.weights) != len(self.estimators):
+            raise ValueError(
+                "Number of `estimators` and weights must be equal; got"
+                f" {len(self.weights)} weights, {len(self.estimators)} estimators"
+            )
+
+        if _routing_enabled():
+            routed_params = process_routing(self, "fit", **fit_params)
+        else:
+            routed_params = Bunch()
+            for name in names:
+                routed_params[name] = Bunch(fit={})
+                if "sample_weight" in fit_params:
+                    routed_params[name].fit["sample_weight"] = fit_params[
+                        "sample_weight"
+                    ]
+
+        self.estimators_ = Parallel(n_jobs=self.n_jobs)(
+            delayed(_fit_single_estimator)(
+                clone(clf),
+                X,
+                y,
+                fit_params=routed_params[name]["fit"],
+                message_clsname="Voting",
+                message=self._log_message(name, idx + 1, len(clfs)),
+            )
+            for idx, (name, clf) in enumerate(zip(names, clfs))
+            if clf != "drop"
+        )
+
+        self.named_estimators_ = Bunch()
+
+        # Uses 'drop' as placeholder for dropped estimators
+        est_iter = iter(self.estimators_)
+        for name, est in self.estimators:
+            current_est = est if est == "drop" else next(est_iter)
+            self.named_estimators_[name] = current_est
+
+            if hasattr(current_est, "feature_names_in_"):
+                self.feature_names_in_ = current_est.feature_names_in_
+
+        return self
+
+    def fit_transform(self, X, y=None, **fit_params):
+        """Return class labels or probabilities for each estimator.
+
+        Return predictions for X for each estimator.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix, dataframe} of shape \
+                (n_samples, n_features)
+            Input samples.
+
+        y : ndarray of shape (n_samples,), default=None
+            Target values (None for unsupervised transformations).
+
+        **fit_params : dict
+            Additional fit parameters.
+
+        Returns
+        -------
+        X_new : ndarray array of shape (n_samples, n_features_new)
+            Transformed array.
+        """
+        return super().fit_transform(X, y, **fit_params)
+
+    @property
+    def n_features_in_(self):
+        """Number of features seen during :term:`fit`."""
+        # For consistency with other estimators we raise an AttributeError so
+        # that hasattr() fails if the estimator isn't fitted.
+        try:
+            check_is_fitted(self)
+        except NotFittedError as nfe:
+            raise AttributeError(
+                "{} object has no n_features_in_ attribute.".format(
+                    self.__class__.__name__
+                )
+            ) from nfe
+
+        return self.estimators_[0].n_features_in_
+
+    def _sk_visual_block_(self):
+        names, estimators = zip(*self.estimators)
+        return _VisualBlock("parallel", estimators, names=names)
+
+    def get_metadata_routing(self):
+        """Get metadata routing of this object.
+
+        Please check :ref:`User Guide <metadata_routing>` on how the routing
+        mechanism works.
+
+        .. versionadded:: 1.5
+
+        Returns
+        -------
+        routing : MetadataRouter
+            A :class:`~sklearn.utils.metadata_routing.MetadataRouter` encapsulating
+            routing information.
+        """
+        router = MetadataRouter(owner=self)
+
+        # `self.estimators` is a list of (name, est) tuples
+        for name, estimator in self.estimators:
+            router.add(
+                **{name: estimator},
+                method_mapping=MethodMapping().add(callee="fit", caller="fit"),
+            )
+        return router
+
+
+class VotingClassifier(ClassifierMixin, _BaseVoting):
+    """Soft Voting/Majority Rule classifier for unfitted estimators.
+
+    Read more in the :ref:`User Guide <voting_classifier>`.
+
+    .. versionadded:: 0.17
+
+    Parameters
+    ----------
+    estimators : list of (str, estimator) tuples
+        Invoking the ``fit`` method on the ``VotingClassifier`` will fit clones
+        of those original estimators that will be stored in the class attribute
+        ``self.estimators_``. An estimator can be set to ``'drop'`` using
+        :meth:`set_params`.
+
+        .. versionchanged:: 0.21
+            ``'drop'`` is accepted. Using None was deprecated in 0.22 and
+            support was removed in 0.24.
+
+    voting : {'hard', 'soft'}, default='hard'
+        If 'hard', uses predicted class labels for majority rule voting.
+        Else if 'soft', predicts the class label based on the argmax of
+        the sums of the predicted probabilities, which is recommended for
+        an ensemble of well-calibrated classifiers.
+
+    weights : array-like of shape (n_classifiers,), default=None
+        Sequence of weights (`float` or `int`) to weight the occurrences of
+        predicted class labels (`hard` voting) or class probabilities
+        before averaging (`soft` voting). Uses uniform weights if `None`.
+
+    n_jobs : int, default=None
+        The number of jobs to run in parallel for ``fit``.
+        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
+        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
+        for more details.
+
+        .. versionadded:: 0.18
+
+    flatten_transform : bool, default=True
+        Affects shape of transform output only when voting='soft'
+        If voting='soft' and flatten_transform=True, transform method returns
+        matrix with shape (n_samples, n_classifiers * n_classes). If
+        flatten_transform=False, it returns
+        (n_classifiers, n_samples, n_classes).
+
+    verbose : bool, default=False
+        If True, the time elapsed while fitting will be printed as it
+        is completed.
+
+        .. versionadded:: 0.23
+
+    Attributes
+    ----------
+    estimators_ : list of classifiers
+        The collection of fitted sub-estimators as defined in ``estimators``
+        that are not 'drop'.
+
+    named_estimators_ : :class:`~sklearn.utils.Bunch`
+        Attribute to access any fitted sub-estimators by name.
+
+        .. versionadded:: 0.20
+
+    le_ : :class:`~sklearn.preprocessing.LabelEncoder`
+        Transformer used to encode the labels during fit and decode during
+        prediction.
+
+    classes_ : ndarray of shape (n_classes,)
+        The classes labels.
+
+    n_features_in_ : int
+        Number of features seen during :term:`fit`. Only defined if the
+        underlying classifier exposes such an attribute when fit.
+
+        .. versionadded:: 0.24
+
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+        Names of features seen during :term:`fit`. Only defined if the
+        underlying estimators expose such an attribute when fit.
+
+        .. versionadded:: 1.0
+
+    See Also
+    --------
+    VotingRegressor : Prediction voting regressor.
+
+    Examples
+    --------
+    >>> import numpy as np
+    >>> from sklearn.linear_model import LogisticRegression
+    >>> from sklearn.naive_bayes import GaussianNB
+    >>> from sklearn.ensemble import RandomForestClassifier, VotingClassifier
+    >>> clf1 = LogisticRegression(random_state=1)
+    >>> clf2 = RandomForestClassifier(n_estimators=50, random_state=1)
+    >>> clf3 = GaussianNB()
+    >>> X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])
+    >>> y = np.array([1, 1, 1, 2, 2, 2])
+    >>> eclf1 = VotingClassifier(estimators=[
+    ...         ('lr', clf1), ('rf', clf2), ('gnb', clf3)], voting='hard')
+    >>> eclf1 = eclf1.fit(X, y)
+    >>> print(eclf1.predict(X))
+    [1 1 1 2 2 2]
+    >>> np.array_equal(eclf1.named_estimators_.lr.predict(X),
+    ...                eclf1.named_estimators_['lr'].predict(X))
+    True
+    >>> eclf2 = VotingClassifier(estimators=[
+    ...         ('lr', clf1), ('rf', clf2), ('gnb', clf3)],
+    ...         voting='soft')
+    >>> eclf2 = eclf2.fit(X, y)
+    >>> print(eclf2.predict(X))
+    [1 1 1 2 2 2]
+
+    To drop an estimator, :meth:`set_params` can be used to remove it. Here we
+    dropped one of the estimators, resulting in 2 fitted estimators:
+
+    >>> eclf2 = eclf2.set_params(lr='drop')
+    >>> eclf2 = eclf2.fit(X, y)
+    >>> len(eclf2.estimators_)
+    2
+
+    Setting `flatten_transform=True` with `voting='soft'` flattens output shape of
+    `transform`:
+
+    >>> eclf3 = VotingClassifier(estimators=[
+    ...        ('lr', clf1), ('rf', clf2), ('gnb', clf3)],
+    ...        voting='soft', weights=[2,1,1],
+    ...        flatten_transform=True)
+    >>> eclf3 = eclf3.fit(X, y)
+    >>> print(eclf3.predict(X))
+    [1 1 1 2 2 2]
+    >>> print(eclf3.transform(X).shape)
+    (6, 6)
+    """
+
+    _parameter_constraints: dict = {
+        **_BaseVoting._parameter_constraints,
+        "voting": [StrOptions({"hard", "soft"})],
+        "flatten_transform": ["boolean"],
+    }
+
+    def __init__(
+        self,
+        estimators,
+        *,
+        voting="hard",
+        weights=None,
+        n_jobs=None,
+        flatten_transform=True,
+        verbose=False,
+    ):
+        super().__init__(estimators=estimators)
+        self.voting = voting
+        self.weights = weights
+        self.n_jobs = n_jobs
+        self.flatten_transform = flatten_transform
+        self.verbose = verbose
+
+    @_fit_context(
+        # estimators in VotingClassifier.estimators are not validated yet
+        prefer_skip_nested_validation=False
+    )
+    def fit(self, X, y, **fit_params):
+        """Fit the estimators.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
+            Training vectors, where `n_samples` is the number of samples and
+            `n_features` is the number of features.
+
+        y : array-like of shape (n_samples,)
+            Target values.
+
+        **fit_params : dict
+            Parameters to pass to the underlying estimators.
+
+            .. versionadded:: 1.5
+
+                Only available if `enable_metadata_routing=True`,
+                which can be set by using
+                ``sklearn.set_config(enable_metadata_routing=True)``.
+                See :ref:`Metadata Routing User Guide <metadata_routing>` for
+                more details.
+
+        Returns
+        -------
+        self : object
+            Returns the instance itself.
+        """
+        _raise_for_params(fit_params, self, "fit", allow=["sample_weight"])
+
+        y_type = type_of_target(y, input_name="y")
+        if y_type in ("unknown", "continuous"):
+            # raise a specific ValueError for non-classification tasks
+            raise ValueError(
+                f"Unknown label type: {y_type}. Maybe you are trying to fit a "
+                "classifier, which expects discrete classes on a "
+                "regression target with continuous values."
+            )
+        elif y_type not in ("binary", "multiclass"):
+            # raise a NotImplementedError for backward compatibility for non-supported
+            # classification tasks
+            raise NotImplementedError(
+                f"{self.__class__.__name__} only supports binary or multiclass "
+                "classification. Multilabel and multi-output classification are not "
+                "supported."
+            )
+
+        self.le_ = LabelEncoder().fit(y)
+        self.classes_ = self.le_.classes_
+        transformed_y = self.le_.transform(y)
+
+        return super().fit(X, transformed_y, **fit_params)
+
+    def predict(self, X):
+        """Predict class labels for X.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
+            The input samples.
+
+        Returns
+        -------
+        maj : array-like of shape (n_samples,)
+            Predicted class labels.
+        """
+        check_is_fitted(self)
+        if self.voting == "soft":
+            maj = np.argmax(self.predict_proba(X), axis=1)
+
+        else:  # 'hard' voting
+            predictions = self._predict(X)
+            maj = np.apply_along_axis(
+                lambda x: np.argmax(np.bincount(x, weights=self._weights_not_none)),
+                axis=1,
+                arr=predictions,
+            )
+
+        maj = self.le_.inverse_transform(maj)
+
+        return maj
+
+    def _collect_probas(self, X):
+        """Collect results from clf.predict calls."""
+        return np.asarray([clf.predict_proba(X) for clf in self.estimators_])
+
+    def _check_voting(self):
+        if self.voting == "hard":
+            raise AttributeError(
+                f"predict_proba is not available when voting={self.voting!r}"
+            )
+        return True
+
+    @available_if(_check_voting)
+    def predict_proba(self, X):
+        """Compute probabilities of possible outcomes for samples in X.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
+            The input samples.
+
+        Returns
+        -------
+        avg : array-like of shape (n_samples, n_classes)
+            Weighted average probability for each class per sample.
+        """
+        check_is_fitted(self)
+        avg = np.average(
+            self._collect_probas(X), axis=0, weights=self._weights_not_none
+        )
+        return avg
+
+    def transform(self, X):
+        """Return class labels or probabilities for X for each estimator.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
+            Training vectors, where `n_samples` is the number of samples and
+            `n_features` is the number of features.
+
+        Returns
+        -------
+        probabilities_or_labels
+            If `voting='soft'` and `flatten_transform=True`:
+                returns ndarray of shape (n_samples, n_classifiers * n_classes),
+                being class probabilities calculated by each classifier.
+            If `voting='soft' and `flatten_transform=False`:
+                ndarray of shape (n_classifiers, n_samples, n_classes)
+            If `voting='hard'`:
+                ndarray of shape (n_samples, n_classifiers), being
+                class labels predicted by each classifier.
+        """
+        check_is_fitted(self)
+
+        if self.voting == "soft":
+            probas = self._collect_probas(X)
+            if not self.flatten_transform:
+                return probas
+            return np.hstack(probas)
+
+        else:
+            return self._predict(X)
+
+    def get_feature_names_out(self, input_features=None):
+        """Get output feature names for transformation.
+
+        Parameters
+        ----------
+        input_features : array-like of str or None, default=None
+            Not used, present here for API consistency by convention.
+
+        Returns
+        -------
+        feature_names_out : ndarray of str objects
+            Transformed feature names.
+        """
+        check_is_fitted(self, "n_features_in_")
+        if self.voting == "soft" and not self.flatten_transform:
+            raise ValueError(
+                "get_feature_names_out is not supported when `voting='soft'` and "
+                "`flatten_transform=False`"
+            )
+
+        _check_feature_names_in(self, input_features, generate_names=False)
+        class_name = self.__class__.__name__.lower()
+
+        active_names = [name for name, est in self.estimators if est != "drop"]
+
+        if self.voting == "hard":
+            return np.asarray(
+                [f"{class_name}_{name}" for name in active_names], dtype=object
+            )
+
+        # voting == "soft"
+        n_classes = len(self.classes_)
+        names_out = [
+            f"{class_name}_{name}{i}" for name in active_names for i in range(n_classes)
+        ]
+        return np.asarray(names_out, dtype=object)
+
+    def __sklearn_tags__(self):
+        tags = super().__sklearn_tags__()
+        tags.transformer_tags.preserves_dtype = []
+        return tags
+
+
+class VotingRegressor(RegressorMixin, _BaseVoting):
+    """Prediction voting regressor for unfitted estimators.
+
+    A voting regressor is an ensemble meta-estimator that fits several base
+    regressors, each on the whole dataset. Then it averages the individual
+    predictions to form a final prediction.
+
+    For a detailed example, refer to
+    :ref:`sphx_glr_auto_examples_ensemble_plot_voting_regressor.py`.
+
+    Read more in the :ref:`User Guide <voting_regressor>`.
+
+    .. versionadded:: 0.21
+
+    Parameters
+    ----------
+    estimators : list of (str, estimator) tuples
+        Invoking the ``fit`` method on the ``VotingRegressor`` will fit clones
+        of those original estimators that will be stored in the class attribute
+        ``self.estimators_``. An estimator can be set to ``'drop'`` using
+        :meth:`set_params`.
+
+        .. versionchanged:: 0.21
+            ``'drop'`` is accepted. Using None was deprecated in 0.22 and
+            support was removed in 0.24.
+
+    weights : array-like of shape (n_regressors,), default=None
+        Sequence of weights (`float` or `int`) to weight the occurrences of
+        predicted values before averaging. Uses uniform weights if `None`.
+
+    n_jobs : int, default=None
+        The number of jobs to run in parallel for ``fit``.
+        ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
+        ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
+        for more details.
+
+    verbose : bool, default=False
+        If True, the time elapsed while fitting will be printed as it
+        is completed.
+
+        .. versionadded:: 0.23
+
+    Attributes
+    ----------
+    estimators_ : list of regressors
+        The collection of fitted sub-estimators as defined in ``estimators``
+        that are not 'drop'.
+
+    named_estimators_ : :class:`~sklearn.utils.Bunch`
+        Attribute to access any fitted sub-estimators by name.
+
+        .. versionadded:: 0.20
+
+    n_features_in_ : int
+        Number of features seen during :term:`fit`. Only defined if the
+        underlying regressor exposes such an attribute when fit.
+
+        .. versionadded:: 0.24
+
+    feature_names_in_ : ndarray of shape (`n_features_in_`,)
+        Names of features seen during :term:`fit`. Only defined if the
+        underlying estimators expose such an attribute when fit.
+
+        .. versionadded:: 1.0
+
+    See Also
+    --------
+    VotingClassifier : Soft Voting/Majority Rule classifier.
+
+    Examples
+    --------
+    >>> import numpy as np
+    >>> from sklearn.linear_model import LinearRegression
+    >>> from sklearn.ensemble import RandomForestRegressor
+    >>> from sklearn.ensemble import VotingRegressor
+    >>> from sklearn.neighbors import KNeighborsRegressor
+    >>> r1 = LinearRegression()
+    >>> r2 = RandomForestRegressor(n_estimators=10, random_state=1)
+    >>> r3 = KNeighborsRegressor()
+    >>> X = np.array([[1, 1], [2, 4], [3, 9], [4, 16], [5, 25], [6, 36]])
+    >>> y = np.array([2, 6, 12, 20, 30, 42])
+    >>> er = VotingRegressor([('lr', r1), ('rf', r2), ('r3', r3)])
+    >>> print(er.fit(X, y).predict(X))
+    [ 6.8  8.4 12.5 17.8 26  34]
+
+    In the following example, we drop the `'lr'` estimator with
+    :meth:`~VotingRegressor.set_params` and fit the remaining two estimators:
+
+    >>> er = er.set_params(lr='drop')
+    >>> er = er.fit(X, y)
+    >>> len(er.estimators_)
+    2
+    """
+
+    def __init__(self, estimators, *, weights=None, n_jobs=None, verbose=False):
+        super().__init__(estimators=estimators)
+        self.weights = weights
+        self.n_jobs = n_jobs
+        self.verbose = verbose
+
+    @_fit_context(
+        # estimators in VotingRegressor.estimators are not validated yet
+        prefer_skip_nested_validation=False
+    )
+    def fit(self, X, y, **fit_params):
+        """Fit the estimators.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
+            Training vectors, where `n_samples` is the number of samples and
+            `n_features` is the number of features.
+
+        y : array-like of shape (n_samples,)
+            Target values.
+
+        **fit_params : dict
+            Parameters to pass to the underlying estimators.
+
+            .. versionadded:: 1.5
+
+                Only available if `enable_metadata_routing=True`,
+                which can be set by using
+                ``sklearn.set_config(enable_metadata_routing=True)``.
+                See :ref:`Metadata Routing User Guide <metadata_routing>` for
+                more details.
+
+        Returns
+        -------
+        self : object
+            Fitted estimator.
+        """
+        _raise_for_params(fit_params, self, "fit", allow=["sample_weight"])
+
+        y = column_or_1d(y, warn=True)
+
+        return super().fit(X, y, **fit_params)
+
+    def predict(self, X):
+        """Predict regression target for X.
+
+        The predicted regression target of an input sample is computed as the
+        mean predicted regression targets of the estimators in the ensemble.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
+            The input samples.
+
+        Returns
+        -------
+        y : ndarray of shape (n_samples,)
+            The predicted values.
+        """
+        check_is_fitted(self)
+        return np.average(self._predict(X), axis=1, weights=self._weights_not_none)
+
+    def transform(self, X):
+        """Return predictions for X for each estimator.
+
+        Parameters
+        ----------
+        X : {array-like, sparse matrix} of shape (n_samples, n_features)
+            The input samples.
+
+        Returns
+        -------
+        predictions : ndarray of shape (n_samples, n_classifiers)
+            Values predicted by each regressor.
+        """
+        check_is_fitted(self)
+        return self._predict(X)
+
+    def get_feature_names_out(self, input_features=None):
+        """Get output feature names for transformation.
+
+        Parameters
+        ----------
+        input_features : array-like of str or None, default=None
+            Not used, present here for API consistency by convention.
+
+        Returns
+        -------
+        feature_names_out : ndarray of str objects
+            Transformed feature names.
+        """
+        check_is_fitted(self, "n_features_in_")
+        _check_feature_names_in(self, input_features, generate_names=False)
+        class_name = self.__class__.__name__.lower()
+        return np.asarray(
+            [f"{class_name}_{name}" for name, est in self.estimators if est != "drop"],
+            dtype=object,
+        )
--- a/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_weight_boosting.py
+++ b/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/_weight_boosting.py
--- a/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/meson.build
+++ b/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/meson.build
@@ -0,0 +1,9 @@
+py.extension_module(
+  '_gradient_boosting',
+  [cython_gen.process('_gradient_boosting.pyx')] + utils_cython_tree,
+  dependencies: [np_dep],
+  subdir: 'sklearn/ensemble',
+  install: true
+)
+
+subdir('_hist_gradient_boosting')
--- a/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/tests/init.py
+++ b/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/tests/init.py
--- a/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/tests/pycache/init.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/tests/pycache/init.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/tests/pycache/test_bagging.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/tests/pycache/test_bagging.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/tests/pycache/test_base.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/tests/pycache/test_base.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/tests/pycache/test_common.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/tests/pycache/test_common.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/tests/pycache/test_forest.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/tests/pycache/test_forest.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/tests/pycache/test_gradient_boosting.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/tests/pycache/test_gradient_boosting.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/tests/pycache/test_iforest.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/tests/pycache/test_iforest.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/tests/pycache/test_stacking.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/tests/pycache/test_stacking.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/tests/pycache/test_voting.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/tests/pycache/test_voting.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/tests/pycache/test_weight_boosting.cpython-312.pyc
+++ b/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/tests/pycache/test_weight_boosting.cpython-312.pyc
--- a/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/tests/test_bagging.py
+++ b/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/tests/test_bagging.py
--- a/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/tests/test_base.py
+++ b/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/tests/test_base.py
@@ -0,0 +1,109 @@
+"""
+Testing for the base module (sklearn.ensemble.base).
+"""
+
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
+from collections import OrderedDict
+
+import numpy as np
+
+from sklearn.datasets import load_iris
+from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
+from sklearn.ensemble import BaggingClassifier
+from sklearn.ensemble._base import _set_random_states
+from sklearn.feature_selection import SelectFromModel
+from sklearn.linear_model import Perceptron
+from sklearn.pipeline import Pipeline
+
+
+def test_base():
+    # Check BaseEnsemble methods.
+    ensemble = BaggingClassifier(
+        estimator=Perceptron(random_state=None), n_estimators=3
+    )
+
+    iris = load_iris()
+    ensemble.fit(iris.data, iris.target)
+    ensemble.estimators_ = []  # empty the list and create estimators manually
+
+    ensemble._make_estimator()
+    random_state = np.random.RandomState(3)
+    ensemble._make_estimator(random_state=random_state)
+    ensemble._make_estimator(random_state=random_state)
+    ensemble._make_estimator(append=False)
+
+    assert 3 == len(ensemble)
+    assert 3 == len(ensemble.estimators_)
+
+    assert isinstance(ensemble[0], Perceptron)
+    assert ensemble[0].random_state is None
+    assert isinstance(ensemble[1].random_state, int)
+    assert isinstance(ensemble[2].random_state, int)
+    assert ensemble[1].random_state != ensemble[2].random_state
+
+    np_int_ensemble = BaggingClassifier(
+        estimator=Perceptron(), n_estimators=np.int32(3)
+    )
+    np_int_ensemble.fit(iris.data, iris.target)
+
+
+def test_set_random_states():
+    # Linear Discriminant Analysis doesn't have random state: smoke test
+    _set_random_states(LinearDiscriminantAnalysis(), random_state=17)
+
+    clf1 = Perceptron(random_state=None)
+    assert clf1.random_state is None
+    # check random_state is None still sets
+    _set_random_states(clf1, None)
+    assert isinstance(clf1.random_state, int)
+
+    # check random_state fixes results in consistent initialisation
+    _set_random_states(clf1, 3)
+    assert isinstance(clf1.random_state, int)
+    clf2 = Perceptron(random_state=None)
+    _set_random_states(clf2, 3)
+    assert clf1.random_state == clf2.random_state
+
+    # nested random_state
+
+    def make_steps():
+        return [
+            ("sel", SelectFromModel(Perceptron(random_state=None))),
+            ("clf", Perceptron(random_state=None)),
+        ]
+
+    est1 = Pipeline(make_steps())
+    _set_random_states(est1, 3)
+    assert isinstance(est1.steps[0][1].estimator.random_state, int)
+    assert isinstance(est1.steps[1][1].random_state, int)
+    assert (
+        est1.get_params()["sel__estimator__random_state"]
+        != est1.get_params()["clf__random_state"]
+    )
+
+    # ensure multiple random_state parameters are invariant to get_params()
+    # iteration order
+
+    class AlphaParamPipeline(Pipeline):
+        def get_params(self, *args, **kwargs):
+            params = Pipeline.get_params(self, *args, **kwargs).items()
+            return OrderedDict(sorted(params))
+
+    class RevParamPipeline(Pipeline):
+        def get_params(self, *args, **kwargs):
+            params = Pipeline.get_params(self, *args, **kwargs).items()
+            return OrderedDict(sorted(params, reverse=True))
+
+    for cls in [AlphaParamPipeline, RevParamPipeline]:
+        est2 = cls(make_steps())
+        _set_random_states(est2, 3)
+        assert (
+            est1.get_params()["sel__estimator__random_state"]
+            == est2.get_params()["sel__estimator__random_state"]
+        )
+        assert (
+            est1.get_params()["clf__random_state"]
+            == est2.get_params()["clf__random_state"]
+        )
--- a/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/tests/test_common.py
+++ b/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/tests/test_common.py
@@ -0,0 +1,263 @@
+import numpy as np
+import pytest
+
+from sklearn.base import ClassifierMixin, clone, is_classifier
+from sklearn.datasets import (
+    load_diabetes,
+    load_iris,
+    make_classification,
+    make_regression,
+)
+from sklearn.ensemble import (
+    RandomForestClassifier,
+    RandomForestRegressor,
+    StackingClassifier,
+    StackingRegressor,
+    VotingClassifier,
+    VotingRegressor,
+)
+from sklearn.impute import SimpleImputer
+from sklearn.linear_model import LinearRegression, LogisticRegression
+from sklearn.pipeline import make_pipeline
+from sklearn.svm import SVC, SVR, LinearSVC
+
+X, y = load_iris(return_X_y=True)
+
+X_r, y_r = load_diabetes(return_X_y=True)
+
+
+@pytest.mark.parametrize(
+    "X, y, estimator",
+    [
+        (
+            *make_classification(n_samples=10),
+            StackingClassifier(
+                estimators=[
+                    ("lr", LogisticRegression()),
+                    ("svm", LinearSVC()),
+                    ("rf", RandomForestClassifier(n_estimators=5, max_depth=3)),
+                ],
+                cv=2,
+            ),
+        ),
+        (
+            *make_classification(n_samples=10),
+            VotingClassifier(
+                estimators=[
+                    ("lr", LogisticRegression()),
+                    ("svm", LinearSVC()),
+                    ("rf", RandomForestClassifier(n_estimators=5, max_depth=3)),
+                ]
+            ),
+        ),
+        (
+            *make_regression(n_samples=10),
+            StackingRegressor(
+                estimators=[
+                    ("lr", LinearRegression()),
+                    ("svm", SVR(kernel="linear")),
+                    ("rf", RandomForestRegressor(n_estimators=5, max_depth=3)),
+                ],
+                cv=2,
+            ),
+        ),
+        (
+            *make_regression(n_samples=10),
+            VotingRegressor(
+                estimators=[
+                    ("lr", LinearRegression()),
+                    ("svm", SVR(kernel="linear")),
+                    ("rf", RandomForestRegressor(n_estimators=5, max_depth=3)),
+                ]
+            ),
+        ),
+    ],
+    ids=[
+        "stacking-classifier",
+        "voting-classifier",
+        "stacking-regressor",
+        "voting-regressor",
+    ],
+)
+def test_ensemble_heterogeneous_estimators_behavior(X, y, estimator):
+    # check that the behavior of `estimators`, `estimators_`,
+    # `named_estimators`, `named_estimators_` is consistent across all
+    # ensemble classes and when using `set_params()`.
+    estimator = clone(estimator)  # Avoid side effects from shared instances
+
+    # before fit
+    assert "svm" in estimator.named_estimators
+    assert estimator.named_estimators.svm is estimator.estimators[1][1]
+    assert estimator.named_estimators.svm is estimator.named_estimators["svm"]
+
+    # check fitted attributes
+    estimator.fit(X, y)
+    assert len(estimator.named_estimators) == 3
+    assert len(estimator.named_estimators_) == 3
+    assert sorted(list(estimator.named_estimators_.keys())) == sorted(
+        ["lr", "svm", "rf"]
+    )
+
+    # check that set_params() does not add a new attribute
+    estimator_new_params = clone(estimator)
+    svm_estimator = SVC() if is_classifier(estimator) else SVR()
+    estimator_new_params.set_params(svm=svm_estimator).fit(X, y)
+    assert not hasattr(estimator_new_params, "svm")
+    assert (
+        estimator_new_params.named_estimators.lr.get_params()
+        == estimator.named_estimators.lr.get_params()
+    )
+    assert (
+        estimator_new_params.named_estimators.rf.get_params()
+        == estimator.named_estimators.rf.get_params()
+    )
+
+    # check the behavior when setting and dropping an estimator
+    estimator_dropped = clone(estimator)
+    estimator_dropped.set_params(svm="drop")
+    estimator_dropped.fit(X, y)
+    assert len(estimator_dropped.named_estimators) == 3
+    assert estimator_dropped.named_estimators.svm == "drop"
+    assert len(estimator_dropped.named_estimators_) == 3
+    assert sorted(list(estimator_dropped.named_estimators_.keys())) == sorted(
+        ["lr", "svm", "rf"]
+    )
+    for sub_est in estimator_dropped.named_estimators_:
+        # check that the correspondence is correct
+        assert not isinstance(sub_est, type(estimator.named_estimators.svm))
+
+    # check that we can set the parameters of the underlying classifier
+    estimator.set_params(svm__C=10.0)
+    estimator.set_params(rf__max_depth=5)
+    assert (
+        estimator.get_params()["svm__C"]
+        == estimator.get_params()["svm"].get_params()["C"]
+    )
+    assert (
+        estimator.get_params()["rf__max_depth"]
+        == estimator.get_params()["rf"].get_params()["max_depth"]
+    )
+
+
+@pytest.mark.parametrize(
+    "Ensemble",
+    [VotingClassifier, StackingRegressor, VotingRegressor],
+)
+def test_ensemble_heterogeneous_estimators_type(Ensemble):
+    # check that ensemble will fail during validation if the underlying
+    # estimators are not of the same type (i.e. classifier or regressor)
+    # StackingClassifier can have an underlying regresor so it's not checked
+    if issubclass(Ensemble, ClassifierMixin):
+        X, y = make_classification(n_samples=10)
+        estimators = [("lr", LinearRegression())]
+        ensemble_type = "classifier"
+    else:
+        X, y = make_regression(n_samples=10)
+        estimators = [("lr", LogisticRegression())]
+        ensemble_type = "regressor"
+    ensemble = Ensemble(estimators=estimators)
+
+    err_msg = "should be a {}".format(ensemble_type)
+    with pytest.raises(ValueError, match=err_msg):
+        ensemble.fit(X, y)
+
+
+@pytest.mark.parametrize(
+    "X, y, Ensemble",
+    [
+        (*make_classification(n_samples=10), StackingClassifier),
+        (*make_classification(n_samples=10), VotingClassifier),
+        (*make_regression(n_samples=10), StackingRegressor),
+        (*make_regression(n_samples=10), VotingRegressor),
+    ],
+)
+def test_ensemble_heterogeneous_estimators_name_validation(X, y, Ensemble):
+    # raise an error when the name contains dunder
+    if issubclass(Ensemble, ClassifierMixin):
+        estimators = [("lr__", LogisticRegression())]
+    else:
+        estimators = [("lr__", LinearRegression())]
+    ensemble = Ensemble(estimators=estimators)
+
+    err_msg = r"Estimator names must not contain __: got \['lr__'\]"
+    with pytest.raises(ValueError, match=err_msg):
+        ensemble.fit(X, y)
+
+    # raise an error when the name is not unique
+    if issubclass(Ensemble, ClassifierMixin):
+        estimators = [("lr", LogisticRegression()), ("lr", LogisticRegression())]
+    else:
+        estimators = [("lr", LinearRegression()), ("lr", LinearRegression())]
+    ensemble = Ensemble(estimators=estimators)
+
+    err_msg = r"Names provided are not unique: \['lr', 'lr'\]"
+    with pytest.raises(ValueError, match=err_msg):
+        ensemble.fit(X, y)
+
+    # raise an error when the name conflicts with the parameters
+    if issubclass(Ensemble, ClassifierMixin):
+        estimators = [("estimators", LogisticRegression())]
+    else:
+        estimators = [("estimators", LinearRegression())]
+    ensemble = Ensemble(estimators=estimators)
+
+    err_msg = "Estimator names conflict with constructor arguments"
+    with pytest.raises(ValueError, match=err_msg):
+        ensemble.fit(X, y)
+
+
+@pytest.mark.parametrize(
+    "X, y, estimator",
+    [
+        (
+            *make_classification(n_samples=10),
+            StackingClassifier(estimators=[("lr", LogisticRegression())]),
+        ),
+        (
+            *make_classification(n_samples=10),
+            VotingClassifier(estimators=[("lr", LogisticRegression())]),
+        ),
+        (
+            *make_regression(n_samples=10),
+            StackingRegressor(estimators=[("lr", LinearRegression())]),
+        ),
+        (
+            *make_regression(n_samples=10),
+            VotingRegressor(estimators=[("lr", LinearRegression())]),
+        ),
+    ],
+    ids=[
+        "stacking-classifier",
+        "voting-classifier",
+        "stacking-regressor",
+        "voting-regressor",
+    ],
+)
+def test_ensemble_heterogeneous_estimators_all_dropped(X, y, estimator):
+    # check that we raise a consistent error when all estimators are
+    # dropped
+    estimator.set_params(lr="drop")
+    with pytest.raises(ValueError, match="All estimators are dropped."):
+        estimator.fit(X, y)
+
+
+@pytest.mark.parametrize(
+    "Ensemble, Estimator, X, y",
+    [
+        (StackingClassifier, LogisticRegression, X, y),
+        (StackingRegressor, LinearRegression, X_r, y_r),
+        (VotingClassifier, LogisticRegression, X, y),
+        (VotingRegressor, LinearRegression, X_r, y_r),
+    ],
+)
+# FIXME: we should move this test in `estimator_checks` once we are able
+# to construct meta-estimator instances
+def test_heterogeneous_ensemble_support_missing_values(Ensemble, Estimator, X, y):
+    # check that Voting and Stacking predictor delegate the missing values
+    # validation to the underlying estimator.
+    X = X.copy()
+    mask = np.random.choice([1, 0], X.shape, p=[0.1, 0.9]).astype(bool)
+    X[mask] = np.nan
+    pipe = make_pipeline(SimpleImputer(), Estimator())
+    ensemble = Ensemble(estimators=[("pipe1", pipe), ("pipe2", pipe)])
+    ensemble.fit(X, y).score(X, y)
--- a/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/tests/test_forest.py
+++ b/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/tests/test_forest.py
--- a/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/tests/test_gradient_boosting.py
+++ b/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/tests/test_gradient_boosting.py
--- a/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/tests/test_iforest.py
+++ b/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/tests/test_iforest.py
@@ -0,0 +1,395 @@
+"""
+Testing for Isolation Forest algorithm (sklearn.ensemble.iforest).
+"""
+
+# Authors: The scikit-learn developers
+# SPDX-License-Identifier: BSD-3-Clause
+
+import warnings
+from unittest.mock import Mock, patch
+
+import numpy as np
+import pytest
+from joblib import parallel_backend
+
+from sklearn.datasets import load_diabetes, load_iris, make_classification
+from sklearn.ensemble import IsolationForest
+from sklearn.ensemble._iforest import _average_path_length
+from sklearn.metrics import roc_auc_score
+from sklearn.model_selection import ParameterGrid, train_test_split
+from sklearn.utils import check_random_state
+from sklearn.utils._testing import (
+    assert_allclose,
+    assert_array_almost_equal,
+    assert_array_equal,
+    ignore_warnings,
+)
+from sklearn.utils.fixes import CSC_CONTAINERS, CSR_CONTAINERS
+
+# load iris & diabetes dataset
+iris = load_iris()
+diabetes = load_diabetes()
+
+
+def test_iforest(global_random_seed):
+    """Check Isolation Forest for various parameter settings."""
+    X_train = np.array([[0, 1], [1, 2]])
+    X_test = np.array([[2, 1], [1, 1]])
+
+    grid = ParameterGrid(
+        {"n_estimators": [3], "max_samples": [0.5, 1.0, 3], "bootstrap": [True, False]}
+    )
+
+    with ignore_warnings():
+        for params in grid:
+            IsolationForest(random_state=global_random_seed, **params).fit(
+                X_train
+            ).predict(X_test)
+
+
+@pytest.mark.parametrize("sparse_container", CSC_CONTAINERS + CSR_CONTAINERS)
+def test_iforest_sparse(global_random_seed, sparse_container):
+    """Check IForest for various parameter settings on sparse input."""
+    rng = check_random_state(global_random_seed)
+    X_train, X_test = train_test_split(diabetes.data[:50], random_state=rng)
+    grid = ParameterGrid({"max_samples": [0.5, 1.0], "bootstrap": [True, False]})
+
+    X_train_sparse = sparse_container(X_train)
+    X_test_sparse = sparse_container(X_test)
+
+    for params in grid:
+        # Trained on sparse format
+        sparse_classifier = IsolationForest(
+            n_estimators=10, random_state=global_random_seed, **params
+        ).fit(X_train_sparse)
+        sparse_results = sparse_classifier.predict(X_test_sparse)
+
+        # Trained on dense format
+        dense_classifier = IsolationForest(
+            n_estimators=10, random_state=global_random_seed, **params
+        ).fit(X_train)
+        dense_results = dense_classifier.predict(X_test)
+
+        assert_array_equal(sparse_results, dense_results)
+
+
+def test_iforest_error():
+    """Test that it gives proper exception on deficient input."""
+    X = iris.data
+
+    # The dataset has less than 256 samples, explicitly setting
+    # max_samples > n_samples should result in a warning. If not set
+    # explicitly there should be no warning
+    warn_msg = "max_samples will be set to n_samples for estimation"
+    with pytest.warns(UserWarning, match=warn_msg):
+        IsolationForest(max_samples=1000).fit(X)
+    with warnings.catch_warnings():
+        warnings.simplefilter("error", UserWarning)
+        IsolationForest(max_samples="auto").fit(X)
+    with warnings.catch_warnings():
+        warnings.simplefilter("error", UserWarning)
+        IsolationForest(max_samples=np.int64(2)).fit(X)
+
+    # test X_test n_features match X_train one:
+    with pytest.raises(ValueError):
+        IsolationForest().fit(X).predict(X[:, 1:])
+
+
+def test_recalculate_max_depth():
+    """Check max_depth recalculation when max_samples is reset to n_samples"""
+    X = iris.data
+    clf = IsolationForest().fit(X)
+    for est in clf.estimators_:
+        assert est.max_depth == int(np.ceil(np.log2(X.shape[0])))
+
+
+def test_max_samples_attribute():
+    X = iris.data
+    clf = IsolationForest().fit(X)
+    assert clf.max_samples_ == X.shape[0]
+
+    clf = IsolationForest(max_samples=500)
+    warn_msg = "max_samples will be set to n_samples for estimation"
+    with pytest.warns(UserWarning, match=warn_msg):
+        clf.fit(X)
+    assert clf.max_samples_ == X.shape[0]
+
+    clf = IsolationForest(max_samples=0.4).fit(X)
+    assert clf.max_samples_ == 0.4 * X.shape[0]
+
+
+def test_iforest_parallel_regression(global_random_seed):
+    """Check parallel regression."""
+    rng = check_random_state(global_random_seed)
+
+    X_train, X_test = train_test_split(diabetes.data, random_state=rng)
+
+    ensemble = IsolationForest(n_jobs=3, random_state=global_random_seed).fit(X_train)
+
+    ensemble.set_params(n_jobs=1)
+    y1 = ensemble.predict(X_test)
+    ensemble.set_params(n_jobs=2)
+    y2 = ensemble.predict(X_test)
+    assert_array_almost_equal(y1, y2)
+
+    ensemble = IsolationForest(n_jobs=1, random_state=global_random_seed).fit(X_train)
+
+    y3 = ensemble.predict(X_test)
+    assert_array_almost_equal(y1, y3)
+
+
+def test_iforest_performance(global_random_seed):
+    """Test Isolation Forest performs well"""
+
+    # Generate train/test data
+    rng = check_random_state(global_random_seed)
+    X = 0.3 * rng.randn(600, 2)
+    X = rng.permutation(np.vstack((X + 2, X - 2)))
+    X_train = X[:1000]
+
+    # Generate some abnormal novel observations
+    X_outliers = rng.uniform(low=-1, high=1, size=(200, 2))
+    X_test = np.vstack((X[1000:], X_outliers))
+    y_test = np.array([0] * 200 + [1] * 200)
+
+    # fit the model
+    clf = IsolationForest(max_samples=100, random_state=rng).fit(X_train)
+
+    # predict scores (the lower, the more normal)
+    y_pred = -clf.decision_function(X_test)
+
+    # check that there is at most 6 errors (false positive or false negative)
+    assert roc_auc_score(y_test, y_pred) > 0.98
+
+
+@pytest.mark.parametrize("contamination", [0.25, "auto"])
+def test_iforest_works(contamination, global_random_seed):
+    # toy sample (the last two samples are outliers)
+    X = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1], [7, 4], [-5, 9]]
+
+    # Test IsolationForest
+    clf = IsolationForest(random_state=global_random_seed, contamination=contamination)
+    clf.fit(X)
+    decision_func = -clf.decision_function(X)
+    pred = clf.predict(X)
+    # assert detect outliers:
+    assert np.min(decision_func[-2:]) > np.max(decision_func[:-2])
+    assert_array_equal(pred, 6 * [1] + 2 * [-1])
+
+
+def test_max_samples_consistency():
+    # Make sure validated max_samples in iforest and BaseBagging are identical
+    X = iris.data
+    clf = IsolationForest().fit(X)
+    assert clf.max_samples_ == clf._max_samples
+
+
+def test_iforest_subsampled_features():
+    # It tests non-regression for #5732 which failed at predict.
+    rng = check_random_state(0)
+    X_train, X_test, y_train, y_test = train_test_split(
+        diabetes.data[:50], diabetes.target[:50], random_state=rng
+    )
+    clf = IsolationForest(max_features=0.8)
+    clf.fit(X_train, y_train)
+    clf.predict(X_test)
+
+
+def test_iforest_average_path_length():
+    # It tests non-regression for #8549 which used the wrong formula
+    # for average path length, strictly for the integer case
+    # Updated to check average path length when input is <= 2 (issue #11839)
+    result_one = 2.0 * (np.log(4.0) + np.euler_gamma) - 2.0 * 4.0 / 5.0
+    result_two = 2.0 * (np.log(998.0) + np.euler_gamma) - 2.0 * 998.0 / 999.0
+    assert_allclose(_average_path_length([0]), [0.0])
+    assert_allclose(_average_path_length([1]), [0.0])
+    assert_allclose(_average_path_length([2]), [1.0])
+    assert_allclose(_average_path_length([5]), [result_one])
+    assert_allclose(_average_path_length([999]), [result_two])
+    assert_allclose(
+        _average_path_length(np.array([1, 2, 5, 999])),
+        [0.0, 1.0, result_one, result_two],
+    )
+    # _average_path_length is increasing
+    avg_path_length = _average_path_length(np.arange(5))
+    assert_array_equal(avg_path_length, np.sort(avg_path_length))
+
+
+def test_score_samples():
+    X_train = [[1, 1], [1, 2], [2, 1]]
+    clf1 = IsolationForest(contamination=0.1).fit(X_train)
+    clf2 = IsolationForest().fit(X_train)
+    assert_array_equal(
+        clf1.score_samples([[2.0, 2.0]]),
+        clf1.decision_function([[2.0, 2.0]]) + clf1.offset_,
+    )
+    assert_array_equal(
+        clf2.score_samples([[2.0, 2.0]]),
+        clf2.decision_function([[2.0, 2.0]]) + clf2.offset_,
+    )
+    assert_array_equal(
+        clf1.score_samples([[2.0, 2.0]]), clf2.score_samples([[2.0, 2.0]])
+    )
+
+
+def test_iforest_warm_start():
+    """Test iterative addition of iTrees to an iForest"""
+
+    rng = check_random_state(0)
+    X = rng.randn(20, 2)
+
+    # fit first 10 trees
+    clf = IsolationForest(
+        n_estimators=10, max_samples=20, random_state=rng, warm_start=True
+    )
+    clf.fit(X)
+    # remember the 1st tree
+    tree_1 = clf.estimators_[0]
+    # fit another 10 trees
+    clf.set_params(n_estimators=20)
+    clf.fit(X)
+    # expecting 20 fitted trees and no overwritten trees
+    assert len(clf.estimators_) == 20
+    assert clf.estimators_[0] is tree_1
+
+
+# mock get_chunk_n_rows to actually test more than one chunk (here one
+# chunk has 3 rows):
+@patch(
+    "sklearn.ensemble._iforest.get_chunk_n_rows",
+    side_effect=Mock(**{"return_value": 3}),
+)
+@pytest.mark.parametrize("contamination, n_predict_calls", [(0.25, 3), ("auto", 2)])
+@pytest.mark.thread_unsafe  # monkeypatched code
+def test_iforest_chunks_works1(
+    mocked_get_chunk, contamination, n_predict_calls, global_random_seed
+):
+    test_iforest_works(contamination, global_random_seed)
+    assert mocked_get_chunk.call_count == n_predict_calls
+
+
+# idem with chunk_size = 10 rows
+@patch(
+    "sklearn.ensemble._iforest.get_chunk_n_rows",
+    side_effect=Mock(**{"return_value": 10}),
+)
+@pytest.mark.parametrize("contamination, n_predict_calls", [(0.25, 3), ("auto", 2)])
+@pytest.mark.thread_unsafe  # monkeypatched code
+def test_iforest_chunks_works2(
+    mocked_get_chunk, contamination, n_predict_calls, global_random_seed
+):
+    test_iforest_works(contamination, global_random_seed)
+    assert mocked_get_chunk.call_count == n_predict_calls
+
+
+def test_iforest_with_uniform_data():
+    """Test whether iforest predicts inliers when using uniform data"""
+
+    # 2-d array of all 1s
+    X = np.ones((100, 10))
+    iforest = IsolationForest()
+    iforest.fit(X)
+
+    rng = np.random.RandomState(0)
+
+    assert all(iforest.predict(X) == 1)
+    assert all(iforest.predict(rng.randn(100, 10)) == 1)
+    assert all(iforest.predict(X + 1) == 1)
+    assert all(iforest.predict(X - 1) == 1)
+
+    # 2-d array where columns contain the same value across rows
+    X = np.repeat(rng.randn(1, 10), 100, 0)
+    iforest = IsolationForest()
+    iforest.fit(X)
+
+    assert all(iforest.predict(X) == 1)
+    assert all(iforest.predict(rng.randn(100, 10)) == 1)
+    assert all(iforest.predict(np.ones((100, 10))) == 1)
+
+    # Single row
+    X = rng.randn(1, 10)
+    iforest = IsolationForest()
+    iforest.fit(X)
+
+    assert all(iforest.predict(X) == 1)
+    assert all(iforest.predict(rng.randn(100, 10)) == 1)
+    assert all(iforest.predict(np.ones((100, 10))) == 1)
+
+
+@pytest.mark.parametrize("csc_container", CSC_CONTAINERS)
+def test_iforest_with_n_jobs_does_not_segfault(csc_container):
+    """Check that Isolation Forest does not segfault with n_jobs=2
+
+    Non-regression test for #23252
+    """
+    X, _ = make_classification(n_samples=85_000, n_features=100, random_state=0)
+    X = csc_container(X)
+    IsolationForest(n_estimators=10, max_samples=256, n_jobs=2).fit(X)
+
+
+def test_iforest_preserve_feature_names():
+    """Check that feature names are preserved when contamination is not "auto".
+
+    Feature names are required for consistency checks during scoring.
+
+    Non-regression test for Issue #25844
+    """
+    pd = pytest.importorskip("pandas")
+    rng = np.random.RandomState(0)
+
+    X = pd.DataFrame(data=rng.randn(4), columns=["a"])
+    model = IsolationForest(random_state=0, contamination=0.05)
+
+    with warnings.catch_warnings():
+        warnings.simplefilter("error", UserWarning)
+        model.fit(X)
+
+
+@pytest.mark.parametrize("sparse_container", CSC_CONTAINERS + CSR_CONTAINERS)
+def test_iforest_sparse_input_float_contamination(sparse_container):
+    """Check that `IsolationForest` accepts sparse matrix input and float value for
+    contamination.
+
+    Non-regression test for:
+    https://github.com/scikit-learn/scikit-learn/issues/27626
+    """
+    X, _ = make_classification(n_samples=50, n_features=4, random_state=0)
+    X = sparse_container(X)
+    X.sort_indices()
+    contamination = 0.1
+    iforest = IsolationForest(
+        n_estimators=5, contamination=contamination, random_state=0
+    ).fit(X)
+
+    X_decision = iforest.decision_function(X)
+    assert (X_decision < 0).sum() / X.shape[0] == pytest.approx(contamination)
+
+
+@pytest.mark.parametrize("n_jobs", [1, 2])
+@pytest.mark.parametrize("contamination", [0.25, "auto"])
+def test_iforest_predict_parallel(global_random_seed, contamination, n_jobs):
+    """Check that `IsolationForest.predict` is parallelized."""
+    # toy sample (the last two samples are outliers)
+    X = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1], [7, 4], [-5, 9]]
+
+    # Test IsolationForest
+    clf = IsolationForest(
+        random_state=global_random_seed, contamination=contamination, n_jobs=None
+    )
+    clf.fit(X)
+    decision_func = -clf.decision_function(X)
+    pred = clf.predict(X)
+
+    # assert detect outliers:
+    assert np.min(decision_func[-2:]) > np.max(decision_func[:-2])
+    assert_array_equal(pred, 6 * [1] + 2 * [-1])
+
+    clf_parallel = IsolationForest(
+        random_state=global_random_seed, contamination=contamination, n_jobs=-1
+    )
+    clf_parallel.fit(X)
+    with parallel_backend("threading", n_jobs=n_jobs):
+        pred_paralell = clf_parallel.predict(X)
+
+    # assert the same results as non-parallel
+    assert_array_equal(pred, pred_paralell)
--- a/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/tests/test_stacking.py
+++ b/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/tests/test_stacking.py
--- a/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/tests/test_voting.py
+++ b/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/tests/test_voting.py
@@ -0,0 +1,795 @@
+"""Testing for the VotingClassifier and VotingRegressor"""
+
+import re
+
+import numpy as np
+import pytest
+
+from sklearn import config_context, datasets
+from sklearn.base import BaseEstimator, ClassifierMixin, clone
+from sklearn.calibration import CalibratedClassifierCV
+from sklearn.datasets import make_multilabel_classification
+from sklearn.dummy import DummyRegressor
+from sklearn.ensemble import (
+    RandomForestClassifier,
+    RandomForestRegressor,
+    VotingClassifier,
+    VotingRegressor,
+)
+from sklearn.exceptions import NotFittedError
+from sklearn.linear_model import LinearRegression, LogisticRegression
+from sklearn.model_selection import GridSearchCV, cross_val_score, train_test_split
+from sklearn.multiclass import OneVsRestClassifier
+from sklearn.naive_bayes import GaussianNB
+from sklearn.neighbors import KNeighborsClassifier
+from sklearn.preprocessing import StandardScaler
+from sklearn.svm import SVC
+from sklearn.tests.metadata_routing_common import (
+    ConsumingClassifier,
+    ConsumingRegressor,
+    _Registry,
+    check_recorded_metadata,
+)
+from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
+from sklearn.utils._testing import (
+    assert_almost_equal,
+    assert_array_almost_equal,
+    assert_array_equal,
+)
+
+# Load datasets
+iris = datasets.load_iris()
+X, y = iris.data[:, 1:3], iris.target
+# Scaled to solve ConvergenceWarning throw by Logistic Regression
+X_scaled = StandardScaler().fit_transform(X)
+
+X_r, y_r = datasets.load_diabetes(return_X_y=True)
+
+
+@pytest.mark.parametrize(
+    "params, err_msg",
+    [
+        (
+            {"estimators": []},
+            "Invalid 'estimators' attribute, 'estimators' should be a non-empty list",
+        ),
+        (
+            {"estimators": [LogisticRegression()]},
+            "Invalid 'estimators' attribute, 'estimators' should be a non-empty list",
+        ),
+        (
+            {"estimators": [(213, LogisticRegression())]},
+            "Invalid 'estimators' attribute, 'estimators' should be a non-empty list",
+        ),
+        (
+            {"estimators": [("lr", LogisticRegression())], "weights": [1, 2]},
+            "Number of `estimators` and weights must be equal",
+        ),
+    ],
+)
+def test_voting_classifier_estimator_init(params, err_msg):
+    ensemble = VotingClassifier(**params)
+    with pytest.raises(ValueError, match=err_msg):
+        ensemble.fit(X, y)
+
+
+def test_predictproba_hardvoting():
+    eclf = VotingClassifier(
+        estimators=[("lr1", LogisticRegression()), ("lr2", LogisticRegression())],
+        voting="hard",
+    )
+
+    inner_msg = "predict_proba is not available when voting='hard'"
+    outer_msg = "'VotingClassifier' has no attribute 'predict_proba'"
+    with pytest.raises(AttributeError, match=outer_msg) as exec_info:
+        eclf.predict_proba
+    assert isinstance(exec_info.value.__cause__, AttributeError)
+    assert inner_msg in str(exec_info.value.__cause__)
+
+    assert not hasattr(eclf, "predict_proba")
+    eclf.fit(X_scaled, y)
+    assert not hasattr(eclf, "predict_proba")
+
+
+def test_notfitted():
+    eclf = VotingClassifier(
+        estimators=[("lr1", LogisticRegression()), ("lr2", LogisticRegression())],
+        voting="soft",
+    )
+    ereg = VotingRegressor([("dr", DummyRegressor())])
+    msg = (
+        "This %s instance is not fitted yet. Call 'fit'"
+        " with appropriate arguments before using this estimator."
+    )
+    with pytest.raises(NotFittedError, match=msg % "VotingClassifier"):
+        eclf.predict(X)
+    with pytest.raises(NotFittedError, match=msg % "VotingClassifier"):
+        eclf.predict_proba(X)
+    with pytest.raises(NotFittedError, match=msg % "VotingClassifier"):
+        eclf.transform(X)
+    with pytest.raises(NotFittedError, match=msg % "VotingRegressor"):
+        ereg.predict(X_r)
+    with pytest.raises(NotFittedError, match=msg % "VotingRegressor"):
+        ereg.transform(X_r)
+
+
+def test_majority_label_iris(global_random_seed):
+    """Check classification by majority label on dataset iris."""
+    clf1 = LogisticRegression(random_state=global_random_seed)
+    clf2 = RandomForestClassifier(n_estimators=10, random_state=global_random_seed)
+    clf3 = GaussianNB()
+    eclf = VotingClassifier(
+        estimators=[("lr", clf1), ("rf", clf2), ("gnb", clf3)], voting="hard"
+    )
+    scores = cross_val_score(eclf, X, y, scoring="accuracy")
+
+    assert scores.mean() >= 0.9
+
+
+def test_tie_situation():
+    """Check voting classifier selects smaller class label in tie situation."""
+    clf1 = LogisticRegression(random_state=123)
+    clf2 = RandomForestClassifier(random_state=123)
+    eclf = VotingClassifier(estimators=[("lr", clf1), ("rf", clf2)], voting="hard")
+    assert clf1.fit(X, y).predict(X)[52] == 2
+    assert clf2.fit(X, y).predict(X)[52] == 1
+    assert eclf.fit(X, y).predict(X)[52] == 1
+
+
+def test_weights_iris(global_random_seed):
+    """Check classification by average probabilities on dataset iris."""
+    clf1 = LogisticRegression(random_state=global_random_seed)
+    clf2 = RandomForestClassifier(n_estimators=10, random_state=global_random_seed)
+    clf3 = GaussianNB()
+    eclf = VotingClassifier(
+        estimators=[("lr", clf1), ("rf", clf2), ("gnb", clf3)],
+        voting="soft",
+        weights=[1, 2, 10],
+    )
+    scores = cross_val_score(eclf, X_scaled, y, scoring="accuracy")
+    assert scores.mean() >= 0.9
+
+
+def test_weights_regressor():
+    """Check weighted average regression prediction on diabetes dataset."""
+    reg1 = DummyRegressor(strategy="mean")
+    reg2 = DummyRegressor(strategy="median")
+    reg3 = DummyRegressor(strategy="quantile", quantile=0.2)
+    ereg = VotingRegressor(
+        [("mean", reg1), ("median", reg2), ("quantile", reg3)], weights=[1, 2, 10]
+    )
+
+    X_r_train, X_r_test, y_r_train, y_r_test = train_test_split(
+        X_r, y_r, test_size=0.25
+    )
+
+    reg1_pred = reg1.fit(X_r_train, y_r_train).predict(X_r_test)
+    reg2_pred = reg2.fit(X_r_train, y_r_train).predict(X_r_test)
+    reg3_pred = reg3.fit(X_r_train, y_r_train).predict(X_r_test)
+    ereg_pred = ereg.fit(X_r_train, y_r_train).predict(X_r_test)
+
+    avg = np.average(
+        np.asarray([reg1_pred, reg2_pred, reg3_pred]), axis=0, weights=[1, 2, 10]
+    )
+    assert_almost_equal(ereg_pred, avg, decimal=2)
+
+    ereg_weights_none = VotingRegressor(
+        [("mean", reg1), ("median", reg2), ("quantile", reg3)], weights=None
+    )
+    ereg_weights_equal = VotingRegressor(
+        [("mean", reg1), ("median", reg2), ("quantile", reg3)], weights=[1, 1, 1]
+    )
+    ereg_weights_none.fit(X_r_train, y_r_train)
+    ereg_weights_equal.fit(X_r_train, y_r_train)
+    ereg_none_pred = ereg_weights_none.predict(X_r_test)
+    ereg_equal_pred = ereg_weights_equal.predict(X_r_test)
+    assert_almost_equal(ereg_none_pred, ereg_equal_pred, decimal=2)
+
+
+def test_predict_on_toy_problem(global_random_seed):
+    """Manually check predicted class labels for toy dataset."""
+    clf1 = LogisticRegression(random_state=global_random_seed)
+    clf2 = RandomForestClassifier(n_estimators=10, random_state=global_random_seed)
+    clf3 = GaussianNB()
+
+    X = np.array(
+        [[-1.1, -1.5], [-1.2, -1.4], [-3.4, -2.2], [1.1, 1.2], [2.1, 1.4], [3.1, 2.3]]
+    )
+
+    y = np.array([1, 1, 1, 2, 2, 2])
+
+    assert_array_equal(clf1.fit(X, y).predict(X), [1, 1, 1, 2, 2, 2])
+    assert_array_equal(clf2.fit(X, y).predict(X), [1, 1, 1, 2, 2, 2])
+    assert_array_equal(clf3.fit(X, y).predict(X), [1, 1, 1, 2, 2, 2])
+
+    eclf = VotingClassifier(
+        estimators=[("lr", clf1), ("rf", clf2), ("gnb", clf3)],
+        voting="hard",
+        weights=[1, 1, 1],
+    )
+    assert_array_equal(eclf.fit(X, y).predict(X), [1, 1, 1, 2, 2, 2])
+
+    eclf = VotingClassifier(
+        estimators=[("lr", clf1), ("rf", clf2), ("gnb", clf3)],
+        voting="soft",
+        weights=[1, 1, 1],
+    )
+    assert_array_equal(eclf.fit(X, y).predict(X), [1, 1, 1, 2, 2, 2])
+
+
+def test_predict_proba_on_toy_problem():
+    """Calculate predicted probabilities on toy dataset."""
+    clf1 = LogisticRegression(random_state=123)
+    clf2 = RandomForestClassifier(random_state=123)
+    clf3 = GaussianNB()
+    X = np.array([[-1.1, -1.5], [-1.2, -1.4], [-3.4, -2.2], [1.1, 1.2]])
+    y = np.array([1, 1, 2, 2])
+
+    clf1_res = np.array(
+        [
+            [0.59790391, 0.40209609],
+            [0.57622162, 0.42377838],
+            [0.50728456, 0.49271544],
+            [0.40241774, 0.59758226],
+        ]
+    )
+
+    clf2_res = np.array([[0.8, 0.2], [0.8, 0.2], [0.2, 0.8], [0.3, 0.7]])
+
+    clf3_res = np.array(
+        [[0.9985082, 0.0014918], [0.99845843, 0.00154157], [0.0, 1.0], [0.0, 1.0]]
+    )
+
+    t00 = (2 * clf1_res[0][0] + clf2_res[0][0] + clf3_res[0][0]) / 4
+    t11 = (2 * clf1_res[1][1] + clf2_res[1][1] + clf3_res[1][1]) / 4
+    t21 = (2 * clf1_res[2][1] + clf2_res[2][1] + clf3_res[2][1]) / 4
+    t31 = (2 * clf1_res[3][1] + clf2_res[3][1] + clf3_res[3][1]) / 4
+
+    eclf = VotingClassifier(
+        estimators=[("lr", clf1), ("rf", clf2), ("gnb", clf3)],
+        voting="soft",
+        weights=[2, 1, 1],
+    )
+    eclf_res = eclf.fit(X, y).predict_proba(X)
+
+    assert_almost_equal(t00, eclf_res[0][0], decimal=1)
+    assert_almost_equal(t11, eclf_res[1][1], decimal=1)
+    assert_almost_equal(t21, eclf_res[2][1], decimal=1)
+    assert_almost_equal(t31, eclf_res[3][1], decimal=1)
+
+    inner_msg = "predict_proba is not available when voting='hard'"
+    outer_msg = "'VotingClassifier' has no attribute 'predict_proba'"
+    with pytest.raises(AttributeError, match=outer_msg) as exec_info:
+        eclf = VotingClassifier(
+            estimators=[("lr", clf1), ("rf", clf2), ("gnb", clf3)], voting="hard"
+        )
+        eclf.fit(X, y).predict_proba(X)
+
+    assert isinstance(exec_info.value.__cause__, AttributeError)
+    assert inner_msg in str(exec_info.value.__cause__)
+
+
+def test_multilabel():
+    """Check if error is raised for multilabel classification."""
+    X, y = make_multilabel_classification(
+        n_classes=2, n_labels=1, allow_unlabeled=False, random_state=123
+    )
+    clf = OneVsRestClassifier(SVC(kernel="linear"))
+
+    eclf = VotingClassifier(estimators=[("ovr", clf)], voting="hard")
+
+    try:
+        eclf.fit(X, y)
+    except NotImplementedError:
+        return
+
+
+def test_gridsearch():
+    """Check GridSearch support."""
+    clf1 = LogisticRegression(random_state=1)
+    clf2 = RandomForestClassifier(random_state=1, n_estimators=3)
+    clf3 = GaussianNB()
+    eclf = VotingClassifier(
+        estimators=[("lr", clf1), ("rf", clf2), ("gnb", clf3)], voting="soft"
+    )
+
+    params = {
+        "lr__C": [1.0, 100.0],
+        "voting": ["soft", "hard"],
+        "weights": [[0.5, 0.5, 0.5], [1.0, 0.5, 0.5]],
+    }
+
+    grid = GridSearchCV(estimator=eclf, param_grid=params, cv=2)
+    grid.fit(X_scaled, y)
+
+
+def test_parallel_fit(global_random_seed):
+    """Check parallel backend of VotingClassifier on toy dataset."""
+    clf1 = LogisticRegression(random_state=global_random_seed)
+    clf2 = RandomForestClassifier(n_estimators=10, random_state=global_random_seed)
+    clf3 = GaussianNB()
+    X = np.array([[-1.1, -1.5], [-1.2, -1.4], [-3.4, -2.2], [1.1, 1.2]])
+    y = np.array([1, 1, 2, 2])
+
+    eclf1 = VotingClassifier(
+        estimators=[("lr", clf1), ("rf", clf2), ("gnb", clf3)], voting="soft", n_jobs=1
+    ).fit(X, y)
+    eclf2 = VotingClassifier(
+        estimators=[("lr", clf1), ("rf", clf2), ("gnb", clf3)], voting="soft", n_jobs=2
+    ).fit(X, y)
+
+    assert_array_equal(eclf1.predict(X), eclf2.predict(X))
+    assert_array_almost_equal(eclf1.predict_proba(X), eclf2.predict_proba(X))
+
+
+def test_sample_weight(global_random_seed):
+    """Tests sample_weight parameter of VotingClassifier"""
+    clf1 = LogisticRegression(random_state=global_random_seed)
+    clf2 = RandomForestClassifier(n_estimators=10, random_state=global_random_seed)
+    clf3 = CalibratedClassifierCV(SVC(random_state=global_random_seed), ensemble=False)
+    eclf1 = VotingClassifier(
+        estimators=[("lr", clf1), ("rf", clf2), ("svc", clf3)], voting="soft"
+    ).fit(X_scaled, y, sample_weight=np.ones((len(y),)))
+    eclf2 = VotingClassifier(
+        estimators=[("lr", clf1), ("rf", clf2), ("svc", clf3)], voting="soft"
+    ).fit(X_scaled, y)
+    assert_array_equal(eclf1.predict(X_scaled), eclf2.predict(X_scaled))
+    assert_array_almost_equal(
+        eclf1.predict_proba(X_scaled), eclf2.predict_proba(X_scaled)
+    )
+    sample_weight = np.random.RandomState(global_random_seed).uniform(size=(len(y),))
+    eclf3 = VotingClassifier(estimators=[("lr", clf1)], voting="soft")
+    eclf3.fit(X_scaled, y, sample_weight=sample_weight)
+    clf1.fit(X_scaled, y, sample_weight)
+    assert_array_equal(eclf3.predict(X_scaled), clf1.predict(X_scaled))
+    assert_array_almost_equal(
+        eclf3.predict_proba(X_scaled), clf1.predict_proba(X_scaled)
+    )
+
+    # check that an error is raised and indicative if sample_weight is not
+    # supported.
+    clf4 = KNeighborsClassifier()
+    eclf3 = VotingClassifier(
+        estimators=[("lr", clf1), ("svc", clf3), ("knn", clf4)], voting="soft"
+    )
+    msg = "Underlying estimator KNeighborsClassifier does not support sample weights."
+    with pytest.raises(TypeError, match=msg):
+        eclf3.fit(X_scaled, y, sample_weight=sample_weight)
+
+    # check that _fit_single_estimator will raise the right error
+    # it should raise the original error if this is not linked to sample_weight
+    class ClassifierErrorFit(ClassifierMixin, BaseEstimator):
+        def fit(self, X_scaled, y, sample_weight):
+            raise TypeError("Error unrelated to sample_weight.")
+
+    clf = ClassifierErrorFit()
+    with pytest.raises(TypeError, match="Error unrelated to sample_weight"):
+        clf.fit(X_scaled, y, sample_weight=sample_weight)
+
+
+def test_sample_weight_kwargs():
+    """Check that VotingClassifier passes sample_weight as kwargs"""
+
+    class MockClassifier(ClassifierMixin, BaseEstimator):
+        """Mock Classifier to check that sample_weight is received as kwargs"""
+
+        def fit(self, X, y, *args, **sample_weight):
+            assert "sample_weight" in sample_weight
+
+    clf = MockClassifier()
+    eclf = VotingClassifier(estimators=[("mock", clf)], voting="soft")
+
+    # Should not raise an error.
+    eclf.fit(X, y, sample_weight=np.ones((len(y),)))
+
+
+def test_voting_classifier_set_params(global_random_seed):
+    # check equivalence in the output when setting underlying estimators
+    clf1 = LogisticRegression(random_state=global_random_seed)
+    clf2 = RandomForestClassifier(
+        n_estimators=10, random_state=global_random_seed, max_depth=None
+    )
+    clf3 = GaussianNB()
+
+    eclf1 = VotingClassifier(
+        [("lr", clf1), ("rf", clf2)], voting="soft", weights=[1, 2]
+    ).fit(X_scaled, y)
+    eclf2 = VotingClassifier(
+        [("lr", clf1), ("nb", clf3)], voting="soft", weights=[1, 2]
+    )
+    eclf2.set_params(nb=clf2).fit(X_scaled, y)
+
+    assert_array_equal(eclf1.predict(X_scaled), eclf2.predict(X_scaled))
+    assert_array_almost_equal(
+        eclf1.predict_proba(X_scaled), eclf2.predict_proba(X_scaled)
+    )
+    assert eclf2.estimators[0][1].get_params() == clf1.get_params()
+    assert eclf2.estimators[1][1].get_params() == clf2.get_params()
+
+
+def test_set_estimator_drop():
+    # VotingClassifier set_params should be able to set estimators as drop
+    # Test predict
+    clf1 = LogisticRegression(random_state=123)
+    clf2 = RandomForestClassifier(n_estimators=10, random_state=123)
+    clf3 = GaussianNB()
+    eclf1 = VotingClassifier(
+        estimators=[("lr", clf1), ("rf", clf2), ("nb", clf3)],
+        voting="hard",
+        weights=[1, 0, 0.5],
+    ).fit(X, y)
+
+    eclf2 = VotingClassifier(
+        estimators=[("lr", clf1), ("rf", clf2), ("nb", clf3)],
+        voting="hard",
+        weights=[1, 1, 0.5],
+    )
+    eclf2.set_params(rf="drop").fit(X, y)
+
+    assert_array_equal(eclf1.predict(X), eclf2.predict(X))
+
+    assert dict(eclf2.estimators)["rf"] == "drop"
+    assert len(eclf2.estimators_) == 2
+    assert all(
+        isinstance(est, (LogisticRegression, GaussianNB)) for est in eclf2.estimators_
+    )
+    assert eclf2.get_params()["rf"] == "drop"
+
+    eclf1.set_params(voting="soft").fit(X, y)
+    eclf2.set_params(voting="soft").fit(X, y)
+
+    assert_array_equal(eclf1.predict(X), eclf2.predict(X))
+    assert_array_almost_equal(eclf1.predict_proba(X), eclf2.predict_proba(X))
+    msg = "All estimators are dropped. At least one is required"
+    with pytest.raises(ValueError, match=msg):
+        eclf2.set_params(lr="drop", rf="drop", nb="drop").fit(X, y)
+
+    # Test soft voting transform
+    X1 = np.array([[1], [2]])
+    y1 = np.array([1, 2])
+    eclf1 = VotingClassifier(
+        estimators=[("rf", clf2), ("nb", clf3)],
+        voting="soft",
+        weights=[0, 0.5],
+        flatten_transform=False,
+    ).fit(X1, y1)
+
+    eclf2 = VotingClassifier(
+        estimators=[("rf", clf2), ("nb", clf3)],
+        voting="soft",
+        weights=[1, 0.5],
+        flatten_transform=False,
+    )
+    eclf2.set_params(rf="drop").fit(X1, y1)
+    assert_array_almost_equal(
+        eclf1.transform(X1),
+        np.array([[[0.7, 0.3], [0.3, 0.7]], [[1.0, 0.0], [0.0, 1.0]]]),
+    )
+    assert_array_almost_equal(eclf2.transform(X1), np.array([[[1.0, 0.0], [0.0, 1.0]]]))
+    eclf1.set_params(voting="hard")
+    eclf2.set_params(voting="hard")
+    assert_array_equal(eclf1.transform(X1), np.array([[0, 0], [1, 1]]))
+    assert_array_equal(eclf2.transform(X1), np.array([[0], [1]]))
+
+
+def test_estimator_weights_format(global_random_seed):
+    # Test estimator weights inputs as list and array
+    clf1 = LogisticRegression(random_state=global_random_seed)
+    clf2 = RandomForestClassifier(n_estimators=10, random_state=global_random_seed)
+    eclf1 = VotingClassifier(
+        estimators=[("lr", clf1), ("rf", clf2)], weights=[1, 2], voting="soft"
+    )
+    eclf2 = VotingClassifier(
+        estimators=[("lr", clf1), ("rf", clf2)], weights=np.array((1, 2)), voting="soft"
+    )
+    eclf1.fit(X_scaled, y)
+    eclf2.fit(X_scaled, y)
+    assert_array_almost_equal(
+        eclf1.predict_proba(X_scaled), eclf2.predict_proba(X_scaled)
+    )
+
+
+def test_transform(global_random_seed):
+    """Check transform method of VotingClassifier on toy dataset."""
+    clf1 = LogisticRegression(random_state=global_random_seed)
+    clf2 = RandomForestClassifier(n_estimators=10, random_state=global_random_seed)
+    clf3 = GaussianNB()
+    X = np.array([[-1.1, -1.5], [-1.2, -1.4], [-3.4, -2.2], [1.1, 1.2]])
+    y = np.array([1, 1, 2, 2])
+
+    eclf1 = VotingClassifier(
+        estimators=[("lr", clf1), ("rf", clf2), ("gnb", clf3)], voting="soft"
+    ).fit(X, y)
+    eclf2 = VotingClassifier(
+        estimators=[("lr", clf1), ("rf", clf2), ("gnb", clf3)],
+        voting="soft",
+        flatten_transform=True,
+    ).fit(X, y)
+    eclf3 = VotingClassifier(
+        estimators=[("lr", clf1), ("rf", clf2), ("gnb", clf3)],
+        voting="soft",
+        flatten_transform=False,
+    ).fit(X, y)
+
+    assert_array_equal(eclf1.transform(X).shape, (4, 6))
+    assert_array_equal(eclf2.transform(X).shape, (4, 6))
+    assert_array_equal(eclf3.transform(X).shape, (3, 4, 2))
+    assert_array_almost_equal(eclf1.transform(X), eclf2.transform(X))
+    assert_array_almost_equal(
+        eclf3.transform(X).swapaxes(0, 1).reshape((4, 6)), eclf2.transform(X)
+    )
+
+
+@pytest.mark.parametrize(
+    "X, y, voter",
+    [
+        (
+            X,
+            y,
+            VotingClassifier(
+                [
+                    ("lr", LogisticRegression()),
+                    ("rf", RandomForestClassifier(n_estimators=5)),
+                ]
+            ),
+        ),
+        (
+            X_r,
+            y_r,
+            VotingRegressor(
+                [
+                    ("lr", LinearRegression()),
+                    ("rf", RandomForestRegressor(n_estimators=5)),
+                ]
+            ),
+        ),
+    ],
+)
+def test_none_estimator_with_weights(X, y, voter):
+    # check that an estimator can be set to 'drop' and passing some weight
+    # regression test for
+    # https://github.com/scikit-learn/scikit-learn/issues/13777
+    voter = clone(voter)
+    # Scaled to solve ConvergenceWarning throw by Logistic Regression
+    X_scaled = StandardScaler().fit_transform(X)
+    voter.fit(X_scaled, y, sample_weight=np.ones(y.shape))
+    voter.set_params(lr="drop")
+    voter.fit(X_scaled, y, sample_weight=np.ones(y.shape))
+    y_pred = voter.predict(X_scaled)
+    assert y_pred.shape == y.shape
+
+
+@pytest.mark.parametrize(
+    "est",
+    [
+        VotingRegressor(
+            estimators=[
+                ("lr", LinearRegression()),
+                ("tree", DecisionTreeRegressor(random_state=0)),
+            ]
+        ),
+        VotingClassifier(
+            estimators=[
+                ("lr", LogisticRegression(random_state=0)),
+                ("tree", DecisionTreeClassifier(random_state=0)),
+            ]
+        ),
+    ],
+    ids=["VotingRegressor", "VotingClassifier"],
+)
+def test_n_features_in(est):
+    est = clone(est)
+    X = [[1, 2], [3, 4], [5, 6]]
+    y = [0, 1, 2]
+
+    assert not hasattr(est, "n_features_in_")
+    est.fit(X, y)
+    assert est.n_features_in_ == 2
+
+
+@pytest.mark.parametrize(
+    "estimator",
+    [
+        VotingRegressor(
+            estimators=[
+                ("lr", LinearRegression()),
+                ("rf", RandomForestRegressor(random_state=123)),
+            ],
+            verbose=True,
+        ),
+        VotingClassifier(
+            estimators=[
+                ("lr", LogisticRegression(random_state=123)),
+                ("rf", RandomForestClassifier(random_state=123)),
+            ],
+            verbose=True,
+        ),
+    ],
+)
+def test_voting_verbose(estimator, capsys):
+    X = np.array([[-1.1, -1.5], [-1.2, -1.4], [-3.4, -2.2], [1.1, 1.2]])
+    y = np.array([1, 1, 2, 2])
+
+    pattern = (
+        r"\[Voting\].*\(1 of 2\) Processing lr, total=.*\n"
+        r"\[Voting\].*\(2 of 2\) Processing rf, total=.*\n$"
+    )
+    clone(estimator).fit(X, y)
+    assert re.match(pattern, capsys.readouterr()[0])
+
+
+def test_get_features_names_out_regressor():
+    """Check get_feature_names_out output for regressor."""
+
+    X = [[1, 2], [3, 4], [5, 6]]
+    y = [0, 1, 2]
+
+    voting = VotingRegressor(
+        estimators=[
+            ("lr", LinearRegression()),
+            ("tree", DecisionTreeRegressor(random_state=0)),
+            ("ignore", "drop"),
+        ]
+    )
+    voting.fit(X, y)
+
+    names_out = voting.get_feature_names_out()
+    expected_names = ["votingregressor_lr", "votingregressor_tree"]
+    assert_array_equal(names_out, expected_names)
+
+
+@pytest.mark.parametrize(
+    "kwargs, expected_names",
+    [
+        (
+            {"voting": "soft", "flatten_transform": True},
+            [
+                "votingclassifier_lr0",
+                "votingclassifier_lr1",
+                "votingclassifier_lr2",
+                "votingclassifier_tree0",
+                "votingclassifier_tree1",
+                "votingclassifier_tree2",
+            ],
+        ),
+        ({"voting": "hard"}, ["votingclassifier_lr", "votingclassifier_tree"]),
+    ],
+)
+def test_get_features_names_out_classifier(kwargs, expected_names):
+    """Check get_feature_names_out for classifier for different settings."""
+    X = [[1, 2], [3, 4], [5, 6], [1, 1.2]]
+    y = [0, 1, 2, 0]
+
+    voting = VotingClassifier(
+        estimators=[
+            ("lr", LogisticRegression(random_state=0)),
+            ("tree", DecisionTreeClassifier(random_state=0)),
+        ],
+        **kwargs,
+    )
+    voting.fit(X, y)
+    X_trans = voting.transform(X)
+    names_out = voting.get_feature_names_out()
+
+    assert X_trans.shape[1] == len(expected_names)
+    assert_array_equal(names_out, expected_names)
+
+
+def test_get_features_names_out_classifier_error():
+    """Check that error is raised when voting="soft" and flatten_transform=False."""
+    X = [[1, 2], [3, 4], [5, 6]]
+    y = [0, 1, 2]
+
+    voting = VotingClassifier(
+        estimators=[
+            ("lr", LogisticRegression(random_state=0)),
+            ("tree", DecisionTreeClassifier(random_state=0)),
+        ],
+        voting="soft",
+        flatten_transform=False,
+    )
+    voting.fit(X, y)
+
+    msg = (
+        "get_feature_names_out is not supported when `voting='soft'` and "
+        "`flatten_transform=False`"
+    )
+    with pytest.raises(ValueError, match=msg):
+        voting.get_feature_names_out()
+
+
+# Metadata Routing Tests
+# ======================
+
+
+@pytest.mark.parametrize(
+    "Estimator, Child",
+    [(VotingClassifier, ConsumingClassifier), (VotingRegressor, ConsumingRegressor)],
+)
+def test_routing_passed_metadata_not_supported(Estimator, Child):
+    """Test that the right error message is raised when metadata is passed while
+    not supported when `enable_metadata_routing=False`."""
+
+    X = np.array([[0, 1], [2, 2], [4, 6]])
+    y = [1, 2, 3]
+
+    with pytest.raises(
+        ValueError, match="is only supported if enable_metadata_routing=True"
+    ):
+        Estimator(["clf", Child()]).fit(X, y, sample_weight=[1, 1, 1], metadata="a")
+
+
+@pytest.mark.parametrize(
+    "Estimator, Child",
+    [(VotingClassifier, ConsumingClassifier), (VotingRegressor, ConsumingRegressor)],
+)
+@config_context(enable_metadata_routing=True)
+def test_get_metadata_routing_without_fit(Estimator, Child):
+    # Test that metadata_routing() doesn't raise when called before fit.
+    est = Estimator([("sub_est", Child())])
+    est.get_metadata_routing()
+
+
+@pytest.mark.parametrize(
+    "Estimator, Child",
+    [(VotingClassifier, ConsumingClassifier), (VotingRegressor, ConsumingRegressor)],
+)
+@pytest.mark.parametrize("prop", ["sample_weight", "metadata"])
+@config_context(enable_metadata_routing=True)
+def test_metadata_routing_for_voting_estimators(Estimator, Child, prop):
+    """Test that metadata is routed correctly for Voting*."""
+    X = np.array([[0, 1], [2, 2], [4, 6]])
+    y = [1, 2, 3]
+    sample_weight, metadata = [1, 1, 1], "a"
+
+    est = Estimator(
+        [
+            (
+                "sub_est1",
+                Child(registry=_Registry()).set_fit_request(**{prop: True}),
+            ),
+            (
+                "sub_est2",
+                Child(registry=_Registry()).set_fit_request(**{prop: True}),
+            ),
+        ]
+    )
+
+    est.fit(X, y, **{prop: sample_weight if prop == "sample_weight" else metadata})
+
+    for estimator in est.estimators:
+        if prop == "sample_weight":
+            kwargs = {prop: sample_weight}
+        else:
+            kwargs = {prop: metadata}
+        # access sub-estimator in (name, est) with estimator[1]
+        registry = estimator[1].registry
+        assert len(registry)
+        for sub_est in registry:
+            check_recorded_metadata(obj=sub_est, method="fit", parent="fit", **kwargs)
+
+
+@pytest.mark.parametrize(
+    "Estimator, Child",
+    [(VotingClassifier, ConsumingClassifier), (VotingRegressor, ConsumingRegressor)],
+)
+@config_context(enable_metadata_routing=True)
+def test_metadata_routing_error_for_voting_estimators(Estimator, Child):
+    """Test that the right error is raised when metadata is not requested."""
+    X = np.array([[0, 1], [2, 2], [4, 6]])
+    y = [1, 2, 3]
+    sample_weight, metadata = [1, 1, 1], "a"
+
+    est = Estimator([("sub_est", Child())])
+
+    error_message = (
+        "[sample_weight, metadata] are passed but are not explicitly set as requested"
+        f" or not requested for {Child.__name__}.fit"
+    )
+
+    with pytest.raises(ValueError, match=re.escape(error_message)):
+        est.fit(X, y, sample_weight=sample_weight, metadata=metadata)
+
+
+# End of Metadata Routing Tests
+# =============================
--- a/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/tests/test_weight_boosting.py
+++ b/linedance-app/venv/lib/python3.12/site-packages/sklearn/ensemble/tests/test_weight_boosting.py
@@ -0,0 +1,602 @@
+"""Testing for the boost module (sklearn.ensemble.boost)."""
+
+import re
+
+import numpy as np
+import pytest
+
+from sklearn import datasets
+from sklearn.base import BaseEstimator, clone
+from sklearn.dummy import DummyClassifier, DummyRegressor
+from sklearn.ensemble import AdaBoostClassifier, AdaBoostRegressor
+from sklearn.linear_model import LinearRegression
+from sklearn.model_selection import GridSearchCV, train_test_split
+from sklearn.svm import SVC, SVR
+from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
+from sklearn.utils import shuffle
+from sklearn.utils._mocking import NoSampleWeightWrapper
+from sklearn.utils._testing import (
+    assert_allclose,
+    assert_array_almost_equal,
+    assert_array_equal,
+)
+from sklearn.utils.fixes import (
+    COO_CONTAINERS,
+    CSC_CONTAINERS,
+    CSR_CONTAINERS,
+    DOK_CONTAINERS,
+    LIL_CONTAINERS,
+)
+
+# Common random state
+rng = np.random.RandomState(0)
+
+# Toy sample
+X = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1]]
+y_class = ["foo", "foo", "foo", 1, 1, 1]  # test string class labels
+y_regr = [-1, -1, -1, 1, 1, 1]
+T = [[-1, -1], [2, 2], [3, 2]]
+y_t_class = ["foo", 1, 1]
+y_t_regr = [-1, 1, 1]
+
+# Load the iris dataset and randomly permute it
+iris = datasets.load_iris()
+perm = rng.permutation(iris.target.size)
+iris.data, iris.target = shuffle(iris.data, iris.target, random_state=rng)
+
+# Load the diabetes dataset and randomly permute it
+diabetes = datasets.load_diabetes()
+diabetes.data, diabetes.target = shuffle(
+    diabetes.data, diabetes.target, random_state=rng
+)
+
+
+def test_oneclass_adaboost_proba():
+    # Test predict_proba robustness for one class label input.
+    # In response to issue #7501
+    # https://github.com/scikit-learn/scikit-learn/issues/7501
+    y_t = np.ones(len(X))
+    clf = AdaBoostClassifier().fit(X, y_t)
+    assert_array_almost_equal(clf.predict_proba(X), np.ones((len(X), 1)))
+
+
+def test_classification_toy():
+    # Check classification on a toy dataset.
+    clf = AdaBoostClassifier(random_state=0)
+    clf.fit(X, y_class)
+    assert_array_equal(clf.predict(T), y_t_class)
+    assert_array_equal(np.unique(np.asarray(y_t_class)), clf.classes_)
+    assert clf.predict_proba(T).shape == (len(T), 2)
+    assert clf.decision_function(T).shape == (len(T),)
+
+
+def test_regression_toy():
+    # Check classification on a toy dataset.
+    clf = AdaBoostRegressor(random_state=0)
+    clf.fit(X, y_regr)
+    assert_array_equal(clf.predict(T), y_t_regr)
+
+
+def test_iris():
+    # Check consistency on dataset iris.
+    classes = np.unique(iris.target)
+
+    clf = AdaBoostClassifier()
+    clf.fit(iris.data, iris.target)
+
+    assert_array_equal(classes, clf.classes_)
+    proba = clf.predict_proba(iris.data)
+
+    assert proba.shape[1] == len(classes)
+    assert clf.decision_function(iris.data).shape[1] == len(classes)
+
+    score = clf.score(iris.data, iris.target)
+    assert score > 0.9, f"Failed with {score = }"
+
+    # Check we used multiple estimators
+    assert len(clf.estimators_) > 1
+    # Check for distinct random states (see issue #7408)
+    assert len(set(est.random_state for est in clf.estimators_)) == len(clf.estimators_)
+
+
+@pytest.mark.parametrize("loss", ["linear", "square", "exponential"])
+def test_diabetes(loss):
+    # Check consistency on dataset diabetes.
+    reg = AdaBoostRegressor(loss=loss, random_state=0)
+    reg.fit(diabetes.data, diabetes.target)
+    score = reg.score(diabetes.data, diabetes.target)
+    assert score > 0.55
+
+    # Check we used multiple estimators
+    assert len(reg.estimators_) > 1
+    # Check for distinct random states (see issue #7408)
+    assert len(set(est.random_state for est in reg.estimators_)) == len(reg.estimators_)
+
+
+def test_staged_predict():
+    # Check staged predictions.
+    rng = np.random.RandomState(0)
+    iris_weights = rng.randint(10, size=iris.target.shape)
+    diabetes_weights = rng.randint(10, size=diabetes.target.shape)
+
+    clf = AdaBoostClassifier(n_estimators=10)
+    clf.fit(iris.data, iris.target, sample_weight=iris_weights)
+
+    predictions = clf.predict(iris.data)
+    staged_predictions = [p for p in clf.staged_predict(iris.data)]
+    proba = clf.predict_proba(iris.data)
+    staged_probas = [p for p in clf.staged_predict_proba(iris.data)]
+    score = clf.score(iris.data, iris.target, sample_weight=iris_weights)
+    staged_scores = [
+        s for s in clf.staged_score(iris.data, iris.target, sample_weight=iris_weights)
+    ]
+
+    assert len(staged_predictions) == 10
+    assert_array_almost_equal(predictions, staged_predictions[-1])
+    assert len(staged_probas) == 10
+    assert_array_almost_equal(proba, staged_probas[-1])
+    assert len(staged_scores) == 10
+    assert_array_almost_equal(score, staged_scores[-1])
+
+    # AdaBoost regression
+    clf = AdaBoostRegressor(n_estimators=10, random_state=0)
+    clf.fit(diabetes.data, diabetes.target, sample_weight=diabetes_weights)
+
+    predictions = clf.predict(diabetes.data)
+    staged_predictions = [p for p in clf.staged_predict(diabetes.data)]
+    score = clf.score(diabetes.data, diabetes.target, sample_weight=diabetes_weights)
+    staged_scores = [
+        s
+        for s in clf.staged_score(
+            diabetes.data, diabetes.target, sample_weight=diabetes_weights
+        )
+    ]
+
+    assert len(staged_predictions) == 10
+    assert_array_almost_equal(predictions, staged_predictions[-1])
+    assert len(staged_scores) == 10
+    assert_array_almost_equal(score, staged_scores[-1])
+
+
+def test_gridsearch():
+    # Check that base trees can be grid-searched.
+    # AdaBoost classification
+    boost = AdaBoostClassifier(estimator=DecisionTreeClassifier())
+    parameters = {
+        "n_estimators": (1, 2),
+        "estimator__max_depth": (1, 2),
+    }
+    clf = GridSearchCV(boost, parameters)
+    clf.fit(iris.data, iris.target)
+
+    # AdaBoost regression
+    boost = AdaBoostRegressor(estimator=DecisionTreeRegressor(), random_state=0)
+    parameters = {"n_estimators": (1, 2), "estimator__max_depth": (1, 2)}
+    clf = GridSearchCV(boost, parameters)
+    clf.fit(diabetes.data, diabetes.target)
+
+
+def test_pickle():
+    # Check pickability.
+    import pickle
+
+    # Adaboost classifier
+    obj = AdaBoostClassifier()
+    obj.fit(iris.data, iris.target)
+    score = obj.score(iris.data, iris.target)
+    s = pickle.dumps(obj)
+
+    obj2 = pickle.loads(s)
+    assert type(obj2) == obj.__class__
+    score2 = obj2.score(iris.data, iris.target)
+    assert score == score2
+
+    # Adaboost regressor
+    obj = AdaBoostRegressor(random_state=0)
+    obj.fit(diabetes.data, diabetes.target)
+    score = obj.score(diabetes.data, diabetes.target)
+    s = pickle.dumps(obj)
+
+    obj2 = pickle.loads(s)
+    assert type(obj2) == obj.__class__
+    score2 = obj2.score(diabetes.data, diabetes.target)
+    assert score == score2
+
+
+def test_importances():
+    # Check variable importances.
+    X, y = datasets.make_classification(
+        n_samples=2000,
+        n_features=10,
+        n_informative=3,
+        n_redundant=0,
+        n_repeated=0,
+        shuffle=False,
+        random_state=1,
+    )
+
+    clf = AdaBoostClassifier()
+
+    clf.fit(X, y)
+    importances = clf.feature_importances_
+
+    assert importances.shape[0] == 10
+    assert (importances[:3, np.newaxis] >= importances[3:]).all()
+
+
+def test_adaboost_classifier_sample_weight_error():
+    # Test that it gives proper exception on incorrect sample weight.
+    clf = AdaBoostClassifier()
+    msg = re.escape("sample_weight.shape == (1,), expected (6,)")
+    with pytest.raises(ValueError, match=msg):
+        clf.fit(X, y_class, sample_weight=np.asarray([-1]))
+
+
+def test_estimator():
+    # Test different estimators.
+    from sklearn.ensemble import RandomForestClassifier
+
+    # XXX doesn't work with y_class because RF doesn't support classes_
+    # Shouldn't AdaBoost run a LabelBinarizer?
+    clf = AdaBoostClassifier(RandomForestClassifier())
+    clf.fit(X, y_regr)
+
+    clf = AdaBoostClassifier(SVC())
+    clf.fit(X, y_class)
+
+    from sklearn.ensemble import RandomForestRegressor
+
+    clf = AdaBoostRegressor(RandomForestRegressor(), random_state=0)
+    clf.fit(X, y_regr)
+
+    clf = AdaBoostRegressor(SVR(), random_state=0)
+    clf.fit(X, y_regr)
+
+    # Check that an empty discrete ensemble fails in fit, not predict.
+    X_fail = [[1, 1], [1, 1], [1, 1], [1, 1]]
+    y_fail = ["foo", "bar", 1, 2]
+    clf = AdaBoostClassifier(SVC())
+    with pytest.raises(ValueError, match="worse than random"):
+        clf.fit(X_fail, y_fail)
+
+
+def test_sample_weights_infinite():
+    msg = "Sample weights have reached infinite values"
+    clf = AdaBoostClassifier(n_estimators=30, learning_rate=23.0)
+    with pytest.warns(UserWarning, match=msg):
+        clf.fit(iris.data, iris.target)
+
+
+@pytest.mark.parametrize(
+    "sparse_container, expected_internal_type",
+    zip(
+        [
+            *CSC_CONTAINERS,
+            *CSR_CONTAINERS,
+            *LIL_CONTAINERS,
+            *COO_CONTAINERS,
+            *DOK_CONTAINERS,
+        ],
+        CSC_CONTAINERS + 4 * CSR_CONTAINERS,
+    ),
+)
+def test_sparse_classification(sparse_container, expected_internal_type):
+    # Check classification with sparse input.
+
+    class CustomSVC(SVC):
+        """SVC variant that records the nature of the training set."""
+
+        def fit(self, X, y, sample_weight=None):
+            """Modification on fit caries data type for later verification."""
+            super().fit(X, y, sample_weight=sample_weight)
+            self.data_type_ = type(X)
+            return self
+
+    X, y = datasets.make_multilabel_classification(
+        n_classes=1, n_samples=15, n_features=5, random_state=42
+    )
+    # Flatten y to a 1d array
+    y = np.ravel(y)
+
+    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
+
+    X_train_sparse = sparse_container(X_train)
+    X_test_sparse = sparse_container(X_test)
+
+    # Trained on sparse format
+    sparse_classifier = AdaBoostClassifier(
+        estimator=CustomSVC(probability=True),
+        random_state=1,
+    ).fit(X_train_sparse, y_train)
+
+    # Trained on dense format
+    dense_classifier = AdaBoostClassifier(
+        estimator=CustomSVC(probability=True),
+        random_state=1,
+    ).fit(X_train, y_train)
+
+    # predict
+    sparse_clf_results = sparse_classifier.predict(X_test_sparse)
+    dense_clf_results = dense_classifier.predict(X_test)
+    assert_array_equal(sparse_clf_results, dense_clf_results)
+
+    # decision_function
+    sparse_clf_results = sparse_classifier.decision_function(X_test_sparse)
+    dense_clf_results = dense_classifier.decision_function(X_test)
+    assert_array_almost_equal(sparse_clf_results, dense_clf_results)
+
+    # predict_log_proba
+    sparse_clf_results = sparse_classifier.predict_log_proba(X_test_sparse)
+    dense_clf_results = dense_classifier.predict_log_proba(X_test)
+    assert_array_almost_equal(sparse_clf_results, dense_clf_results)
+
+    # predict_proba
+    sparse_clf_results = sparse_classifier.predict_proba(X_test_sparse)
+    dense_clf_results = dense_classifier.predict_proba(X_test)
+    assert_array_almost_equal(sparse_clf_results, dense_clf_results)
+
+    # score
+    sparse_clf_results = sparse_classifier.score(X_test_sparse, y_test)
+    dense_clf_results = dense_classifier.score(X_test, y_test)
+    assert_array_almost_equal(sparse_clf_results, dense_clf_results)
+
+    # staged_decision_function
+    sparse_clf_results = sparse_classifier.staged_decision_function(X_test_sparse)
+    dense_clf_results = dense_classifier.staged_decision_function(X_test)
+    for sparse_clf_res, dense_clf_res in zip(sparse_clf_results, dense_clf_results):
+        assert_array_almost_equal(sparse_clf_res, dense_clf_res)
+
+    # staged_predict
+    sparse_clf_results = sparse_classifier.staged_predict(X_test_sparse)
+    dense_clf_results = dense_classifier.staged_predict(X_test)
+    for sparse_clf_res, dense_clf_res in zip(sparse_clf_results, dense_clf_results):
+        assert_array_equal(sparse_clf_res, dense_clf_res)
+
+    # staged_predict_proba
+    sparse_clf_results = sparse_classifier.staged_predict_proba(X_test_sparse)
+    dense_clf_results = dense_classifier.staged_predict_proba(X_test)
+    for sparse_clf_res, dense_clf_res in zip(sparse_clf_results, dense_clf_results):
+        assert_array_almost_equal(sparse_clf_res, dense_clf_res)
+
+    # staged_score
+    sparse_clf_results = sparse_classifier.staged_score(X_test_sparse, y_test)
+    dense_clf_results = dense_classifier.staged_score(X_test, y_test)
+    for sparse_clf_res, dense_clf_res in zip(sparse_clf_results, dense_clf_results):
+        assert_array_equal(sparse_clf_res, dense_clf_res)
+
+    # Verify sparsity of data is maintained during training
+    types = [i.data_type_ for i in sparse_classifier.estimators_]
+
+    assert all([t == expected_internal_type for t in types])
+
+
+@pytest.mark.parametrize(
+    "sparse_container, expected_internal_type",
+    zip(
+        [
+            *CSC_CONTAINERS,
+            *CSR_CONTAINERS,
+            *LIL_CONTAINERS,
+            *COO_CONTAINERS,
+            *DOK_CONTAINERS,
+        ],
+        CSC_CONTAINERS + 4 * CSR_CONTAINERS,
+    ),
+)
+def test_sparse_regression(sparse_container, expected_internal_type):
+    # Check regression with sparse input.
+
+    class CustomSVR(SVR):
+        """SVR variant that records the nature of the training set."""
+
+        def fit(self, X, y, sample_weight=None):
+            """Modification on fit caries data type for later verification."""
+            super().fit(X, y, sample_weight=sample_weight)
+            self.data_type_ = type(X)
+            return self
+
+    X, y = datasets.make_regression(
+        n_samples=15, n_features=50, n_targets=1, random_state=42
+    )
+
+    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
+
+    X_train_sparse = sparse_container(X_train)
+    X_test_sparse = sparse_container(X_test)
+
+    # Trained on sparse format
+    sparse_regressor = AdaBoostRegressor(estimator=CustomSVR(), random_state=1).fit(
+        X_train_sparse, y_train
+    )
+
+    # Trained on dense format
+    dense_regressor = AdaBoostRegressor(estimator=CustomSVR(), random_state=1).fit(
+        X_train, y_train
+    )
+
+    # predict
+    sparse_regr_results = sparse_regressor.predict(X_test_sparse)
+    dense_regr_results = dense_regressor.predict(X_test)
+    assert_array_almost_equal(sparse_regr_results, dense_regr_results)
+
+    # staged_predict
+    sparse_regr_results = sparse_regressor.staged_predict(X_test_sparse)
+    dense_regr_results = dense_regressor.staged_predict(X_test)
+    for sparse_regr_res, dense_regr_res in zip(sparse_regr_results, dense_regr_results):
+        assert_array_almost_equal(sparse_regr_res, dense_regr_res)
+
+    types = [i.data_type_ for i in sparse_regressor.estimators_]
+
+    assert all([t == expected_internal_type for t in types])
+
+
+def test_sample_weight_adaboost_regressor():
+    """
+    AdaBoostRegressor should work without sample_weights in the base estimator
+    The random weighted sampling is done internally in the _boost method in
+    AdaBoostRegressor.
+    """
+
+    class DummyEstimator(BaseEstimator):
+        def fit(self, X, y):
+            pass
+
+        def predict(self, X):
+            return np.zeros(X.shape[0])
+
+    boost = AdaBoostRegressor(DummyEstimator(), n_estimators=3)
+    boost.fit(X, y_regr)
+    assert len(boost.estimator_weights_) == len(boost.estimator_errors_)
+
+
+def test_multidimensional_X():
+    """
+    Check that the AdaBoost estimators can work with n-dimensional
+    data matrix
+    """
+    rng = np.random.RandomState(0)
+
+    X = rng.randn(51, 3, 3)
+    yc = rng.choice([0, 1], 51)
+    yr = rng.randn(51)
+
+    boost = AdaBoostClassifier(DummyClassifier(strategy="most_frequent"))
+    boost.fit(X, yc)
+    boost.predict(X)
+    boost.predict_proba(X)
+
+    boost = AdaBoostRegressor(DummyRegressor())
+    boost.fit(X, yr)
+    boost.predict(X)
+
+
+def test_adaboostclassifier_without_sample_weight():
+    X, y = iris.data, iris.target
+    estimator = NoSampleWeightWrapper(DummyClassifier())
+    clf = AdaBoostClassifier(estimator=estimator)
+    err_msg = "{} doesn't support sample_weight".format(estimator.__class__.__name__)
+    with pytest.raises(ValueError, match=err_msg):
+        clf.fit(X, y)
+
+
+def test_adaboostregressor_sample_weight():
+    # check that giving weight will have an influence on the error computed
+    # for a weak learner
+    rng = np.random.RandomState(42)
+    X = np.linspace(0, 100, num=1000)
+    y = (0.8 * X + 0.2) + (rng.rand(X.shape[0]) * 0.0001)
+    X = X.reshape(-1, 1)
+
+    # add an arbitrary outlier
+    X[-1] *= 10
+    y[-1] = 10000
+
+    # random_state=0 ensure that the underlying bootstrap will use the outlier
+    regr_no_outlier = AdaBoostRegressor(
+        estimator=LinearRegression(), n_estimators=1, random_state=0
+    )
+    regr_with_weight = clone(regr_no_outlier)
+    regr_with_outlier = clone(regr_no_outlier)
+
+    # fit 3 models:
+    # - a model containing the outlier
+    # - a model without the outlier
+    # - a model containing the outlier but with a null sample-weight
+    regr_with_outlier.fit(X, y)
+    regr_no_outlier.fit(X[:-1], y[:-1])
+    sample_weight = np.ones_like(y)
+    sample_weight[-1] = 0
+    regr_with_weight.fit(X, y, sample_weight=sample_weight)
+
+    score_with_outlier = regr_with_outlier.score(X[:-1], y[:-1])
+    score_no_outlier = regr_no_outlier.score(X[:-1], y[:-1])
+    score_with_weight = regr_with_weight.score(X[:-1], y[:-1])
+
+    assert score_with_outlier < score_no_outlier
+    assert score_with_outlier < score_with_weight
+    assert score_no_outlier == pytest.approx(score_with_weight)
+
+
+def test_adaboost_consistent_predict():
+    # check that predict_proba and predict give consistent results
+    # regression test for:
+    # https://github.com/scikit-learn/scikit-learn/issues/14084
+    X_train, X_test, y_train, y_test = train_test_split(
+        *datasets.load_digits(return_X_y=True), random_state=42
+    )
+    model = AdaBoostClassifier(random_state=42)
+    model.fit(X_train, y_train)
+
+    assert_array_equal(
+        np.argmax(model.predict_proba(X_test), axis=1), model.predict(X_test)
+    )
+
+
+@pytest.mark.parametrize(
+    "model, X, y",
+    [
+        (AdaBoostClassifier(), iris.data, iris.target),
+        (AdaBoostRegressor(), diabetes.data, diabetes.target),
+    ],
+)
+def test_adaboost_negative_weight_error(model, X, y):
+    sample_weight = np.ones_like(y)
+    sample_weight[-1] = -10
+
+    err_msg = "Negative values in data passed to `sample_weight`"
+    with pytest.raises(ValueError, match=err_msg):
+        model.fit(X, y, sample_weight=sample_weight)
+
+
+def test_adaboost_numerically_stable_feature_importance_with_small_weights():
+    """Check that we don't create NaN feature importance with numerically
+    instable inputs.
+
+    Non-regression test for:
+    https://github.com/scikit-learn/scikit-learn/issues/20320
+    """
+    rng = np.random.RandomState(42)
+    X = rng.normal(size=(1000, 10))
+    y = rng.choice([0, 1], size=1000)
+    sample_weight = np.ones_like(y) * 1e-263
+    tree = DecisionTreeClassifier(max_depth=10, random_state=12)
+    ada_model = AdaBoostClassifier(estimator=tree, n_estimators=20, random_state=12)
+    ada_model.fit(X, y, sample_weight=sample_weight)
+    assert np.isnan(ada_model.feature_importances_).sum() == 0
+
+
+def test_adaboost_decision_function(global_random_seed):
+    """Check that the decision function respects the symmetric constraint for weak
+    learners.
+
+    Non-regression test for:
+    https://github.com/scikit-learn/scikit-learn/issues/26520
+    """
+    n_classes = 3
+    X, y = datasets.make_classification(
+        n_classes=n_classes, n_clusters_per_class=1, random_state=global_random_seed
+    )
+    clf = AdaBoostClassifier(n_estimators=1, random_state=global_random_seed).fit(X, y)
+
+    y_score = clf.decision_function(X)
+    assert_allclose(y_score.sum(axis=1), 0, atol=1e-8)
+
+    # With a single learner, we expect to have a decision function in
+    # {1, - 1 / (n_classes - 1)}.
+    assert set(np.unique(y_score)) == {1, -1 / (n_classes - 1)}
+
+    # We can assert the same for staged_decision_function since we have a single learner
+    for y_score in clf.staged_decision_function(X):
+        assert_allclose(y_score.sum(axis=1), 0, atol=1e-8)
+
+        # With a single learner, we expect to have a decision function in
+        # {1, - 1 / (n_classes - 1)}.
+        assert set(np.unique(y_score)) == {1, -1 / (n_classes - 1)}
+
+    clf.set_params(n_estimators=5).fit(X, y)
+
+    y_score = clf.decision_function(X)
+    assert_allclose(y_score.sum(axis=1), 0, atol=1e-8)
+
+    for y_score in clf.staged_decision_function(X):
+        assert_allclose(y_score.sum(axis=1), 0, atol=1e-8)