This commit is contained in:
2026-04-10 15:06:59 +02:00
parent 3031b7153b
commit e5a4711004
7806 changed files with 1918528 additions and 335 deletions

View File

@@ -0,0 +1,106 @@
"""Tools for model selection, such as cross validation and hyper-parameter tuning."""
# Authors: The scikit-learn developers
# SPDX-License-Identifier: BSD-3-Clause
import typing
from sklearn.model_selection._classification_threshold import (
FixedThresholdClassifier,
TunedThresholdClassifierCV,
)
from sklearn.model_selection._plot import LearningCurveDisplay, ValidationCurveDisplay
from sklearn.model_selection._search import (
GridSearchCV,
ParameterGrid,
ParameterSampler,
RandomizedSearchCV,
)
from sklearn.model_selection._split import (
BaseCrossValidator,
BaseShuffleSplit,
GroupKFold,
GroupShuffleSplit,
KFold,
LeaveOneGroupOut,
LeaveOneOut,
LeavePGroupsOut,
LeavePOut,
PredefinedSplit,
RepeatedKFold,
RepeatedStratifiedKFold,
ShuffleSplit,
StratifiedGroupKFold,
StratifiedKFold,
StratifiedShuffleSplit,
TimeSeriesSplit,
check_cv,
train_test_split,
)
from sklearn.model_selection._validation import (
cross_val_predict,
cross_val_score,
cross_validate,
learning_curve,
permutation_test_score,
validation_curve,
)
if typing.TYPE_CHECKING:
# Avoid errors in type checkers (e.g. mypy) for experimental estimators.
# TODO: remove this check once the estimator is no longer experimental.
from sklearn.model_selection._search_successive_halving import (
HalvingGridSearchCV,
HalvingRandomSearchCV,
)
__all__ = [
"BaseCrossValidator",
"BaseShuffleSplit",
"FixedThresholdClassifier",
"GridSearchCV",
"GroupKFold",
"GroupShuffleSplit",
"HalvingGridSearchCV",
"HalvingRandomSearchCV",
"KFold",
"LearningCurveDisplay",
"LeaveOneGroupOut",
"LeaveOneOut",
"LeavePGroupsOut",
"LeavePOut",
"ParameterGrid",
"ParameterSampler",
"PredefinedSplit",
"RandomizedSearchCV",
"RepeatedKFold",
"RepeatedStratifiedKFold",
"ShuffleSplit",
"StratifiedGroupKFold",
"StratifiedKFold",
"StratifiedShuffleSplit",
"TimeSeriesSplit",
"TunedThresholdClassifierCV",
"ValidationCurveDisplay",
"check_cv",
"cross_val_predict",
"cross_val_score",
"cross_validate",
"learning_curve",
"permutation_test_score",
"train_test_split",
"validation_curve",
]
# TODO: remove this check once the estimator is no longer experimental.
def __getattr__(name):
if name in {"HalvingGridSearchCV", "HalvingRandomSearchCV"}:
raise ImportError(
f"{name} is experimental and the API might change without any "
"deprecation cycle. To use it, you need to explicitly import "
"enable_halving_search_cv:\n"
"from sklearn.experimental import enable_halving_search_cv"
)
raise AttributeError(f"module {__name__} has no attribute {name}")

View File

@@ -0,0 +1,883 @@
# Authors: The scikit-learn developers
# SPDX-License-Identifier: BSD-3-Clause
from collections.abc import MutableMapping
from numbers import Integral, Real
import numpy as np
from sklearn.base import (
BaseEstimator,
ClassifierMixin,
MetaEstimatorMixin,
_fit_context,
clone,
)
from sklearn.exceptions import NotFittedError
from sklearn.metrics import check_scoring, get_scorer_names
from sklearn.metrics._scorer import _CurveScorer, _threshold_scores_to_class_labels
from sklearn.model_selection._split import StratifiedShuffleSplit, check_cv
from sklearn.utils import _safe_indexing, get_tags
from sklearn.utils._param_validation import HasMethods, Interval, RealNotInt, StrOptions
from sklearn.utils._response import _get_response_values_binary
from sklearn.utils.metadata_routing import (
MetadataRouter,
MethodMapping,
_raise_for_params,
process_routing,
)
from sklearn.utils.metaestimators import available_if
from sklearn.utils.multiclass import type_of_target
from sklearn.utils.parallel import Parallel, delayed
from sklearn.utils.validation import (
_check_method_params,
_estimator_has,
_num_samples,
check_is_fitted,
indexable,
)
def _check_is_fitted(estimator):
try:
check_is_fitted(estimator.estimator)
except NotFittedError:
check_is_fitted(estimator, "estimator_")
class BaseThresholdClassifier(ClassifierMixin, MetaEstimatorMixin, BaseEstimator):
"""Base class for binary classifiers that set a non-default decision threshold.
In this base class, we define the following interface:
- the validation of common parameters in `fit`;
- the different prediction methods that can be used with the classifier.
.. versionadded:: 1.5
Parameters
----------
estimator : estimator instance
The binary classifier, fitted or not, for which we want to optimize
the decision threshold used during `predict`.
response_method : {"auto", "decision_function", "predict_proba"}, default="auto"
Methods by the classifier `estimator` corresponding to the
decision function for which we want to find a threshold. It can be:
* if `"auto"`, it will try to invoke, for each classifier,
`"predict_proba"` or `"decision_function"` in that order.
* otherwise, one of `"predict_proba"` or `"decision_function"`.
If the method is not implemented by the classifier, it will raise an
error.
"""
_parameter_constraints: dict = {
"estimator": [
HasMethods(["fit", "predict_proba"]),
HasMethods(["fit", "decision_function"]),
],
"response_method": [StrOptions({"auto", "predict_proba", "decision_function"})],
}
def __init__(self, estimator, *, response_method="auto"):
self.estimator = estimator
self.response_method = response_method
def _get_response_method(self):
"""Define the response method."""
if self.response_method == "auto":
response_method = ["predict_proba", "decision_function"]
else:
response_method = self.response_method
return response_method
@_fit_context(
# *ThresholdClassifier*.estimator is not validated yet
prefer_skip_nested_validation=False
)
def fit(self, X, y, **params):
"""Fit the classifier.
Parameters
----------
X : {array-like, sparse matrix} of shape (n_samples, n_features)
Training data.
y : array-like of shape (n_samples,)
Target values.
**params : dict
Parameters to pass to the `fit` method of the underlying
classifier.
Returns
-------
self : object
Returns an instance of self.
"""
_raise_for_params(params, self, None)
X, y = indexable(X, y)
y_type = type_of_target(y, input_name="y")
if y_type != "binary":
raise ValueError(
f"Only binary classification is supported. Unknown label type: {y_type}"
)
self._fit(X, y, **params)
if hasattr(self.estimator_, "n_features_in_"):
self.n_features_in_ = self.estimator_.n_features_in_
if hasattr(self.estimator_, "feature_names_in_"):
self.feature_names_in_ = self.estimator_.feature_names_in_
return self
@property
def classes_(self):
"""Classes labels."""
return self.estimator_.classes_
@available_if(_estimator_has("predict_proba"))
def predict_proba(self, X):
"""Predict class probabilities for `X` using the fitted estimator.
Parameters
----------
X : {array-like, sparse matrix} of shape (n_samples, n_features)
Training vectors, where `n_samples` is the number of samples and
`n_features` is the number of features.
Returns
-------
probabilities : ndarray of shape (n_samples, n_classes)
The class probabilities of the input samples.
"""
_check_is_fitted(self)
estimator = getattr(self, "estimator_", self.estimator)
return estimator.predict_proba(X)
@available_if(_estimator_has("predict_log_proba"))
def predict_log_proba(self, X):
"""Predict logarithm class probabilities for `X` using the fitted estimator.
Parameters
----------
X : {array-like, sparse matrix} of shape (n_samples, n_features)
Training vectors, where `n_samples` is the number of samples and
`n_features` is the number of features.
Returns
-------
log_probabilities : ndarray of shape (n_samples, n_classes)
The logarithm class probabilities of the input samples.
"""
_check_is_fitted(self)
estimator = getattr(self, "estimator_", self.estimator)
return estimator.predict_log_proba(X)
@available_if(_estimator_has("decision_function"))
def decision_function(self, X):
"""Decision function for samples in `X` using the fitted estimator.
Parameters
----------
X : {array-like, sparse matrix} of shape (n_samples, n_features)
Training vectors, where `n_samples` is the number of samples and
`n_features` is the number of features.
Returns
-------
decisions : ndarray of shape (n_samples,)
The decision function computed the fitted estimator.
"""
_check_is_fitted(self)
estimator = getattr(self, "estimator_", self.estimator)
return estimator.decision_function(X)
def __sklearn_tags__(self):
tags = super().__sklearn_tags__()
tags.classifier_tags.multi_class = False
tags.input_tags.sparse = get_tags(self.estimator).input_tags.sparse
return tags
class FixedThresholdClassifier(BaseThresholdClassifier):
"""Binary classifier that manually sets the decision threshold.
This classifier allows to change the default decision threshold used for
converting posterior probability estimates (i.e. output of `predict_proba`) or
decision scores (i.e. output of `decision_function`) into a class label.
Here, the threshold is not optimized and is set to a constant value.
Read more in the :ref:`User Guide <FixedThresholdClassifier>`.
.. versionadded:: 1.5
Parameters
----------
estimator : estimator instance
The binary classifier, fitted or not, for which we want to optimize
the decision threshold used during `predict`.
threshold : {"auto"} or float, default="auto"
The decision threshold to use when converting posterior probability estimates
(i.e. output of `predict_proba`) or decision scores (i.e. output of
`decision_function`) into a class label. When `"auto"`, the threshold is set
to 0.5 if `predict_proba` is used as `response_method`, otherwise it is set to
0 (i.e. the default threshold for `decision_function`).
pos_label : int, float, bool or str, default=None
The label of the positive class. Used to process the output of the
`response_method` method. When `pos_label=None`, if `y_true` is in `{-1, 1}` or
`{0, 1}`, `pos_label` is set to 1, otherwise an error will be raised.
response_method : {"auto", "decision_function", "predict_proba"}, default="auto"
Methods by the classifier `estimator` corresponding to the
decision function for which we want to find a threshold. It can be:
* if `"auto"`, it will try to invoke `"predict_proba"` or `"decision_function"`
in that order.
* otherwise, one of `"predict_proba"` or `"decision_function"`.
If the method is not implemented by the classifier, it will raise an
error.
Attributes
----------
estimator_ : estimator instance
The fitted classifier used when predicting.
classes_ : ndarray of shape (n_classes,)
The class labels.
n_features_in_ : int
Number of features seen during :term:`fit`. Only defined if the
underlying estimator exposes such an attribute when fit.
feature_names_in_ : ndarray of shape (`n_features_in_`,)
Names of features seen during :term:`fit`. Only defined if the
underlying estimator exposes such an attribute when fit.
See Also
--------
sklearn.model_selection.TunedThresholdClassifierCV : Classifier that post-tunes
the decision threshold based on some metrics and using cross-validation.
sklearn.calibration.CalibratedClassifierCV : Estimator that calibrates
probabilities.
Examples
--------
>>> from sklearn.datasets import make_classification
>>> from sklearn.linear_model import LogisticRegression
>>> from sklearn.metrics import confusion_matrix
>>> from sklearn.model_selection import FixedThresholdClassifier, train_test_split
>>> X, y = make_classification(
... n_samples=1_000, weights=[0.9, 0.1], class_sep=0.8, random_state=42
... )
>>> X_train, X_test, y_train, y_test = train_test_split(
... X, y, stratify=y, random_state=42
... )
>>> classifier = LogisticRegression(random_state=0).fit(X_train, y_train)
>>> print(confusion_matrix(y_test, classifier.predict(X_test)))
[[217 7]
[ 19 7]]
>>> classifier_other_threshold = FixedThresholdClassifier(
... classifier, threshold=0.1, response_method="predict_proba"
... ).fit(X_train, y_train)
>>> print(confusion_matrix(y_test, classifier_other_threshold.predict(X_test)))
[[184 40]
[ 6 20]]
"""
_parameter_constraints: dict = {
**BaseThresholdClassifier._parameter_constraints,
"threshold": [StrOptions({"auto"}), Real],
"pos_label": [Real, str, "boolean", None],
}
def __init__(
self,
estimator,
*,
threshold="auto",
pos_label=None,
response_method="auto",
):
super().__init__(estimator=estimator, response_method=response_method)
self.pos_label = pos_label
self.threshold = threshold
@property
def classes_(self):
if estimator := getattr(self, "estimator_", None):
return estimator.classes_
try:
check_is_fitted(self.estimator)
return self.estimator.classes_
except NotFittedError:
raise AttributeError(
"The underlying estimator is not fitted yet."
) from NotFittedError
def _fit(self, X, y, **params):
"""Fit the classifier.
Parameters
----------
X : {array-like, sparse matrix} of shape (n_samples, n_features)
Training data.
y : array-like of shape (n_samples,)
Target values.
**params : dict
Parameters to pass to the `fit` method of the underlying
classifier.
Returns
-------
self : object
Returns an instance of self.
"""
routed_params = process_routing(self, "fit", **params)
self.estimator_ = clone(self.estimator).fit(X, y, **routed_params.estimator.fit)
return self
def predict(self, X):
"""Predict the target of new samples.
Parameters
----------
X : {array-like, sparse matrix} of shape (n_samples, n_features)
The samples, as accepted by `estimator.predict`.
Returns
-------
class_labels : ndarray of shape (n_samples,)
The predicted class.
"""
_check_is_fitted(self)
estimator = getattr(self, "estimator_", self.estimator)
y_score, _, response_method_used = _get_response_values_binary(
estimator,
X,
self._get_response_method(),
pos_label=self.pos_label,
return_response_method_used=True,
)
if self.threshold == "auto":
decision_threshold = 0.5 if response_method_used == "predict_proba" else 0.0
else:
decision_threshold = self.threshold
return _threshold_scores_to_class_labels(
y_score, decision_threshold, self.classes_, self.pos_label
)
def get_metadata_routing(self):
"""Get metadata routing of this object.
Please check :ref:`User Guide <metadata_routing>` on how the routing
mechanism works.
Returns
-------
routing : MetadataRouter
A :class:`~sklearn.utils.metadata_routing.MetadataRouter` encapsulating
routing information.
"""
router = MetadataRouter(owner=self).add(
estimator=self.estimator,
method_mapping=MethodMapping().add(callee="fit", caller="fit"),
)
return router
def _fit_and_score_over_thresholds(
classifier,
X,
y,
*,
fit_params,
train_idx,
val_idx,
curve_scorer,
score_params,
):
"""Fit a classifier and compute the scores for different decision thresholds.
Parameters
----------
classifier : estimator instance
The classifier to fit and use for scoring. If `classifier` is already fitted,
it will be used as is.
X : {array-like, sparse matrix} of shape (n_samples, n_features)
The entire dataset.
y : array-like of shape (n_samples,)
The entire target vector.
fit_params : dict
Parameters to pass to the `fit` method of the underlying classifier.
train_idx : ndarray of shape (n_train_samples,) or None
The indices of the training set. If `None`, `classifier` is expected to be
already fitted.
val_idx : ndarray of shape (n_val_samples,)
The indices of the validation set used to score `classifier`. If `train_idx`,
the entire set will be used.
curve_scorer : scorer instance
The scorer taking `classifier` and the validation set as input and outputting
decision thresholds and scores as a curve. Note that this is different from
the usual scorer that outputs a single score value as `curve_scorer`
outputs a single score value for each threshold.
score_params : dict
Parameters to pass to the `score` method of the underlying scorer.
Returns
-------
scores : ndarray of shape (thresholds,) or tuple of such arrays
The scores computed for each decision threshold. When TPR/TNR or precision/
recall are computed, `scores` is a tuple of two arrays.
potential_thresholds : ndarray of shape (thresholds,)
The decision thresholds used to compute the scores. They are returned in
ascending order.
"""
if train_idx is not None:
X_train, X_val = _safe_indexing(X, train_idx), _safe_indexing(X, val_idx)
y_train, y_val = _safe_indexing(y, train_idx), _safe_indexing(y, val_idx)
fit_params_train = _check_method_params(X, fit_params, indices=train_idx)
score_params_val = _check_method_params(X, score_params, indices=val_idx)
classifier.fit(X_train, y_train, **fit_params_train)
else: # prefit estimator, only a validation set is provided
X_val, y_val, score_params_val = X, y, score_params
return curve_scorer(classifier, X_val, y_val, **score_params_val)
def _mean_interpolated_score(target_thresholds, cv_thresholds, cv_scores):
"""Compute the mean interpolated score across folds by defining common thresholds.
Parameters
----------
target_thresholds : ndarray of shape (thresholds,)
The thresholds to use to compute the mean score.
cv_thresholds : ndarray of shape (n_folds, thresholds_fold)
The thresholds used to compute the scores for each fold.
cv_scores : ndarray of shape (n_folds, thresholds_fold)
The scores computed for each threshold for each fold.
Returns
-------
mean_score : ndarray of shape (thresholds,)
The mean score across all folds for each target threshold.
"""
return np.mean(
[
np.interp(target_thresholds, split_thresholds, split_score)
for split_thresholds, split_score in zip(cv_thresholds, cv_scores)
],
axis=0,
)
class TunedThresholdClassifierCV(BaseThresholdClassifier):
"""Classifier that post-tunes the decision threshold using cross-validation.
This estimator post-tunes the decision threshold (cut-off point) that is
used for converting posterior probability estimates (i.e. output of
`predict_proba`) or decision scores (i.e. output of `decision_function`)
into a class label. The tuning is done by optimizing a binary metric,
potentially constrained by another metric.
Read more in the :ref:`User Guide <TunedThresholdClassifierCV>`.
.. versionadded:: 1.5
Parameters
----------
estimator : estimator instance
The classifier, fitted or not, for which we want to optimize
the decision threshold used during `predict`.
scoring : str or callable, default="balanced_accuracy"
The objective metric to be optimized. Can be one of:
- str: string associated to a scoring function for binary classification,
see :ref:`scoring_string_names` for options.
- callable: a scorer callable object (e.g., function) with signature
``scorer(estimator, X, y)``. See :ref:`scoring_callable` for details.
response_method : {"auto", "decision_function", "predict_proba"}, default="auto"
Methods by the classifier `estimator` corresponding to the
decision function for which we want to find a threshold. It can be:
* if `"auto"`, it will try to invoke, for each classifier,
`"predict_proba"` or `"decision_function"` in that order.
* otherwise, one of `"predict_proba"` or `"decision_function"`.
If the method is not implemented by the classifier, it will raise an
error.
thresholds : int or array-like, default=100
The number of decision threshold to use when discretizing the output of the
classifier `method`. Pass an array-like to manually specify the thresholds
to use.
cv : int, float, cross-validation generator, iterable or "prefit", default=None
Determines the cross-validation splitting strategy to train classifier.
Possible inputs for cv are:
* `None`, to use the default 5-fold stratified K-fold cross validation;
* An integer number, to specify the number of folds in a stratified k-fold;
* A float number, to specify a single shuffle split. The floating number should
be in (0, 1) and represent the size of the validation set;
* An object to be used as a cross-validation generator;
* An iterable yielding train, test splits;
* `"prefit"`, to bypass the cross-validation.
Refer :ref:`User Guide <cross_validation>` for the various
cross-validation strategies that can be used here.
.. warning::
Using `cv="prefit"` and passing the same dataset for fitting `estimator`
and tuning the cut-off point is subject to undesired overfitting. You can
refer to :ref:`TunedThresholdClassifierCV_no_cv` for an example.
This option should only be used when the set used to fit `estimator` is
different from the one used to tune the cut-off point (by calling
:meth:`TunedThresholdClassifierCV.fit`).
refit : bool, default=True
Whether or not to refit the classifier on the entire training set once
the decision threshold has been found.
Note that forcing `refit=False` on cross-validation having more
than a single split will raise an error. Similarly, `refit=True` in
conjunction with `cv="prefit"` will raise an error.
n_jobs : int, default=None
The number of jobs to run in parallel. When `cv` represents a
cross-validation strategy, the fitting and scoring on each data split
is done in parallel. ``None`` means 1 unless in a
:obj:`joblib.parallel_backend` context. ``-1`` means using all
processors. See :term:`Glossary <n_jobs>` for more details.
random_state : int, RandomState instance or None, default=None
Controls the randomness of cross-validation when `cv` is a float.
See :term:`Glossary <random_state>`.
store_cv_results : bool, default=False
Whether to store all scores and thresholds computed during the cross-validation
process.
Attributes
----------
estimator_ : estimator instance
The fitted classifier used when predicting.
best_threshold_ : float
The new decision threshold.
best_score_ : float or None
The optimal score of the objective metric, evaluated at `best_threshold_`.
cv_results_ : dict or None
A dictionary containing the scores and thresholds computed during the
cross-validation process. Only exist if `store_cv_results=True`. The
keys are `"thresholds"` and `"scores"`.
classes_ : ndarray of shape (n_classes,)
The class labels.
n_features_in_ : int
Number of features seen during :term:`fit`. Only defined if the
underlying estimator exposes such an attribute when fit.
feature_names_in_ : ndarray of shape (`n_features_in_`,)
Names of features seen during :term:`fit`. Only defined if the
underlying estimator exposes such an attribute when fit.
See Also
--------
sklearn.model_selection.FixedThresholdClassifier : Classifier that uses a
constant threshold.
sklearn.calibration.CalibratedClassifierCV : Estimator that calibrates
probabilities.
Examples
--------
>>> from sklearn.datasets import make_classification
>>> from sklearn.ensemble import RandomForestClassifier
>>> from sklearn.metrics import classification_report
>>> from sklearn.model_selection import TunedThresholdClassifierCV, train_test_split
>>> X, y = make_classification(
... n_samples=1_000, weights=[0.9, 0.1], class_sep=0.8, random_state=42
... )
>>> X_train, X_test, y_train, y_test = train_test_split(
... X, y, stratify=y, random_state=42
... )
>>> classifier = RandomForestClassifier(random_state=0).fit(X_train, y_train)
>>> print(classification_report(y_test, classifier.predict(X_test)))
precision recall f1-score support
<BLANKLINE>
0 0.94 0.99 0.96 224
1 0.80 0.46 0.59 26
<BLANKLINE>
accuracy 0.93 250
macro avg 0.87 0.72 0.77 250
weighted avg 0.93 0.93 0.92 250
<BLANKLINE>
>>> classifier_tuned = TunedThresholdClassifierCV(
... classifier, scoring="balanced_accuracy"
... ).fit(X_train, y_train)
>>> print(
... f"Cut-off point found at {classifier_tuned.best_threshold_:.3f}"
... )
Cut-off point found at 0.342
>>> print(classification_report(y_test, classifier_tuned.predict(X_test)))
precision recall f1-score support
<BLANKLINE>
0 0.96 0.95 0.96 224
1 0.61 0.65 0.63 26
<BLANKLINE>
accuracy 0.92 250
macro avg 0.78 0.80 0.79 250
weighted avg 0.92 0.92 0.92 250
<BLANKLINE>
"""
_parameter_constraints: dict = {
**BaseThresholdClassifier._parameter_constraints,
"scoring": [
StrOptions(set(get_scorer_names())),
callable,
MutableMapping,
],
"thresholds": [Interval(Integral, 1, None, closed="left"), "array-like"],
"cv": [
"cv_object",
StrOptions({"prefit"}),
Interval(RealNotInt, 0.0, 1.0, closed="neither"),
],
"refit": ["boolean"],
"n_jobs": [Integral, None],
"random_state": ["random_state"],
"store_cv_results": ["boolean"],
}
def __init__(
self,
estimator,
*,
scoring="balanced_accuracy",
response_method="auto",
thresholds=100,
cv=None,
refit=True,
n_jobs=None,
random_state=None,
store_cv_results=False,
):
super().__init__(estimator=estimator, response_method=response_method)
self.scoring = scoring
self.thresholds = thresholds
self.cv = cv
self.refit = refit
self.n_jobs = n_jobs
self.random_state = random_state
self.store_cv_results = store_cv_results
def _fit(self, X, y, **params):
"""Fit the classifier and post-tune the decision threshold.
Parameters
----------
X : {array-like, sparse matrix} of shape (n_samples, n_features)
Training data.
y : array-like of shape (n_samples,)
Target values.
**params : dict
Parameters to pass to the `fit` method of the underlying
classifier and to the `scoring` scorer.
Returns
-------
self : object
Returns an instance of self.
"""
if isinstance(self.cv, Real) and 0 < self.cv < 1:
cv = StratifiedShuffleSplit(
n_splits=1, test_size=self.cv, random_state=self.random_state
)
elif self.cv == "prefit":
if self.refit is True:
raise ValueError("When cv='prefit', refit cannot be True.")
try:
check_is_fitted(self.estimator, "classes_")
except NotFittedError as exc:
raise NotFittedError(
"""When cv='prefit', `estimator` must be fitted."""
) from exc
cv = self.cv
else:
cv = check_cv(self.cv, y=y, classifier=True)
if self.refit is False and cv.get_n_splits() > 1:
raise ValueError("When cv has several folds, refit cannot be False.")
routed_params = process_routing(self, "fit", **params)
self._curve_scorer = self._get_curve_scorer()
# in the following block, we:
# - define the final classifier `self.estimator_` and train it if necessary
# - define `classifier` to be used to post-tune the decision threshold
# - define `split` to be used to fit/score `classifier`
if cv == "prefit":
self.estimator_ = self.estimator
classifier = self.estimator_
splits = [(None, range(_num_samples(X)))]
else:
self.estimator_ = clone(self.estimator)
classifier = clone(self.estimator)
splits = cv.split(X, y, **routed_params.splitter.split)
if self.refit:
# train on the whole dataset
X_train, y_train, fit_params_train = X, y, routed_params.estimator.fit
else:
# single split cross-validation
train_idx, _ = next(cv.split(X, y, **routed_params.splitter.split))
X_train = _safe_indexing(X, train_idx)
y_train = _safe_indexing(y, train_idx)
fit_params_train = _check_method_params(
X, routed_params.estimator.fit, indices=train_idx
)
self.estimator_.fit(X_train, y_train, **fit_params_train)
cv_scores, cv_thresholds = zip(
*Parallel(n_jobs=self.n_jobs)(
delayed(_fit_and_score_over_thresholds)(
clone(classifier) if cv != "prefit" else classifier,
X,
y,
fit_params=routed_params.estimator.fit,
train_idx=train_idx,
val_idx=val_idx,
curve_scorer=self._curve_scorer,
score_params=routed_params.scorer.score,
)
for train_idx, val_idx in splits
)
)
if any(np.isclose(th[0], th[-1]) for th in cv_thresholds):
raise ValueError(
"The provided estimator makes constant predictions. Therefore, it is "
"impossible to optimize the decision threshold."
)
# find the global min and max thresholds across all folds
min_threshold = min(
split_thresholds.min() for split_thresholds in cv_thresholds
)
max_threshold = max(
split_thresholds.max() for split_thresholds in cv_thresholds
)
if isinstance(self.thresholds, Integral):
decision_thresholds = np.linspace(
min_threshold, max_threshold, num=self.thresholds
)
else:
decision_thresholds = np.asarray(self.thresholds)
objective_scores = _mean_interpolated_score(
decision_thresholds, cv_thresholds, cv_scores
)
best_idx = objective_scores.argmax()
self.best_score_ = objective_scores[best_idx]
self.best_threshold_ = decision_thresholds[best_idx]
if self.store_cv_results:
self.cv_results_ = {
"thresholds": decision_thresholds,
"scores": objective_scores,
}
return self
def predict(self, X):
"""Predict the target of new samples.
Parameters
----------
X : {array-like, sparse matrix} of shape (n_samples, n_features)
The samples, as accepted by `estimator.predict`.
Returns
-------
class_labels : ndarray of shape (n_samples,)
The predicted class.
"""
check_is_fitted(self, "estimator_")
pos_label = self._curve_scorer._get_pos_label()
y_score, _ = _get_response_values_binary(
self.estimator_,
X,
self._get_response_method(),
pos_label=pos_label,
)
return _threshold_scores_to_class_labels(
y_score, self.best_threshold_, self.classes_, pos_label
)
def get_metadata_routing(self):
"""Get metadata routing of this object.
Please check :ref:`User Guide <metadata_routing>` on how the routing
mechanism works.
Returns
-------
routing : MetadataRouter
A :class:`~sklearn.utils.metadata_routing.MetadataRouter` encapsulating
routing information.
"""
router = (
MetadataRouter(owner=self)
.add(
estimator=self.estimator,
method_mapping=MethodMapping().add(callee="fit", caller="fit"),
)
.add(
splitter=self.cv,
method_mapping=MethodMapping().add(callee="split", caller="fit"),
)
.add(
scorer=self._get_curve_scorer(),
method_mapping=MethodMapping().add(callee="score", caller="fit"),
)
)
return router
def _get_curve_scorer(self):
"""Get the curve scorer based on the objective metric used."""
scoring = check_scoring(self.estimator, scoring=self.scoring)
curve_scorer = _CurveScorer.from_scorer(
scoring, self._get_response_method(), self.thresholds
)
return curve_scorer

View File

@@ -0,0 +1,885 @@
# Authors: The scikit-learn developers
# SPDX-License-Identifier: BSD-3-Clause
import numpy as np
from sklearn.model_selection._validation import learning_curve, validation_curve
from sklearn.utils._optional_dependencies import check_matplotlib_support
from sklearn.utils._plotting import _interval_max_min_ratio, _validate_score_name
class _BaseCurveDisplay:
def _plot_curve(
self,
x_data,
*,
ax=None,
negate_score=False,
score_name=None,
score_type="test",
std_display_style="fill_between",
line_kw=None,
fill_between_kw=None,
errorbar_kw=None,
):
check_matplotlib_support(f"{self.__class__.__name__}.plot")
import matplotlib.pyplot as plt
if ax is None:
_, ax = plt.subplots()
if negate_score:
train_scores, test_scores = -self.train_scores, -self.test_scores
else:
train_scores, test_scores = self.train_scores, self.test_scores
if std_display_style not in ("errorbar", "fill_between", None):
raise ValueError(
f"Unknown std_display_style: {std_display_style}. Should be one of"
" 'errorbar', 'fill_between', or None."
)
if score_type not in ("test", "train", "both"):
raise ValueError(
f"Unknown score_type: {score_type}. Should be one of 'test', "
"'train', or 'both'."
)
if score_type == "train":
scores = {"Train": train_scores}
elif score_type == "test":
scores = {"Test": test_scores}
else: # score_type == "both"
scores = {"Train": train_scores, "Test": test_scores}
if std_display_style in ("fill_between", None):
# plot the mean score
if line_kw is None:
line_kw = {}
self.lines_ = []
for line_label, score in scores.items():
self.lines_.append(
*ax.plot(
x_data,
score.mean(axis=1),
label=line_label,
**line_kw,
)
)
self.errorbar_ = None
self.fill_between_ = None # overwritten below by fill_between
if std_display_style == "errorbar":
if errorbar_kw is None:
errorbar_kw = {}
self.errorbar_ = []
for line_label, score in scores.items():
self.errorbar_.append(
ax.errorbar(
x_data,
score.mean(axis=1),
score.std(axis=1),
label=line_label,
**errorbar_kw,
)
)
self.lines_, self.fill_between_ = None, None
elif std_display_style == "fill_between":
if fill_between_kw is None:
fill_between_kw = {}
default_fill_between_kw = {"alpha": 0.5}
fill_between_kw = {**default_fill_between_kw, **fill_between_kw}
self.fill_between_ = []
for line_label, score in scores.items():
self.fill_between_.append(
ax.fill_between(
x_data,
score.mean(axis=1) - score.std(axis=1),
score.mean(axis=1) + score.std(axis=1),
**fill_between_kw,
)
)
score_name = self.score_name if score_name is None else score_name
ax.legend()
# We found that a ratio, smaller or bigger than 5, between the largest and
# smallest gap of the x values is a good indicator to choose between linear
# and log scale.
if _interval_max_min_ratio(x_data) > 5:
xscale = "symlog" if x_data.min() <= 0 else "log"
else:
xscale = "linear"
ax.set_xscale(xscale)
ax.set_ylabel(f"{score_name}")
self.ax_ = ax
self.figure_ = ax.figure
class LearningCurveDisplay(_BaseCurveDisplay):
"""Learning Curve visualization.
It is recommended to use
:meth:`~sklearn.model_selection.LearningCurveDisplay.from_estimator` to
create a :class:`~sklearn.model_selection.LearningCurveDisplay` instance.
All parameters are stored as attributes.
Read more in the :ref:`User Guide <visualizations>` for general information
about the visualization API and
:ref:`detailed documentation <learning_curve>` regarding the learning
curve visualization.
.. versionadded:: 1.2
Parameters
----------
train_sizes : ndarray of shape (n_unique_ticks,)
Numbers of training examples that has been used to generate the
learning curve.
train_scores : ndarray of shape (n_ticks, n_cv_folds)
Scores on training sets.
test_scores : ndarray of shape (n_ticks, n_cv_folds)
Scores on test set.
score_name : str, default=None
The name of the score used in `learning_curve`. It will override the name
inferred from the `scoring` parameter. If `score` is `None`, we use `"Score"` if
`negate_score` is `False` and `"Negative score"` otherwise. If `scoring` is a
string or a callable, we infer the name. We replace `_` by spaces and capitalize
the first letter. We remove `neg_` and replace it by `"Negative"` if
`negate_score` is `False` or just remove it otherwise.
Attributes
----------
ax_ : matplotlib Axes
Axes with the learning curve.
figure_ : matplotlib Figure
Figure containing the learning curve.
errorbar_ : list of matplotlib Artist or None
When the `std_display_style` is `"errorbar"`, this is a list of
`matplotlib.container.ErrorbarContainer` objects. If another style is
used, `errorbar_` is `None`.
lines_ : list of matplotlib Artist or None
When the `std_display_style` is `"fill_between"`, this is a list of
`matplotlib.lines.Line2D` objects corresponding to the mean train and
test scores. If another style is used, `line_` is `None`.
fill_between_ : list of matplotlib Artist or None
When the `std_display_style` is `"fill_between"`, this is a list of
`matplotlib.collections.PolyCollection` objects. If another style is
used, `fill_between_` is `None`.
See Also
--------
sklearn.model_selection.learning_curve : Compute the learning curve.
Examples
--------
>>> import matplotlib.pyplot as plt
>>> from sklearn.datasets import load_iris
>>> from sklearn.model_selection import LearningCurveDisplay, learning_curve
>>> from sklearn.tree import DecisionTreeClassifier
>>> X, y = load_iris(return_X_y=True)
>>> tree = DecisionTreeClassifier(random_state=0)
>>> train_sizes, train_scores, test_scores = learning_curve(
... tree, X, y)
>>> display = LearningCurveDisplay(train_sizes=train_sizes,
... train_scores=train_scores, test_scores=test_scores, score_name="Score")
>>> display.plot()
<...>
>>> plt.show()
"""
def __init__(self, *, train_sizes, train_scores, test_scores, score_name=None):
self.train_sizes = train_sizes
self.train_scores = train_scores
self.test_scores = test_scores
self.score_name = score_name
def plot(
self,
ax=None,
*,
negate_score=False,
score_name=None,
score_type="both",
std_display_style="fill_between",
line_kw=None,
fill_between_kw=None,
errorbar_kw=None,
):
"""Plot visualization.
Parameters
----------
ax : matplotlib Axes, default=None
Axes object to plot on. If `None`, a new figure and axes is
created.
negate_score : bool, default=False
Whether or not to negate the scores obtained through
:func:`~sklearn.model_selection.learning_curve`. This is
particularly useful when using the error denoted by `neg_*` in
`scikit-learn`.
score_name : str, default=None
The name of the score used to decorate the y-axis of the plot. It will
override the name inferred from the `scoring` parameter. If `score` is
`None`, we use `"Score"` if `negate_score` is `False` and `"Negative score"`
otherwise. If `scoring` is a string or a callable, we infer the name. We
replace `_` by spaces and capitalize the first letter. We remove `neg_` and
replace it by `"Negative"` if `negate_score` is
`False` or just remove it otherwise.
score_type : {"test", "train", "both"}, default="both"
The type of score to plot. Can be one of `"test"`, `"train"`, or
`"both"`.
std_display_style : {"errorbar", "fill_between"} or None, default="fill_between"
The style used to display the score standard deviation around the
mean score. If None, no standard deviation representation is
displayed.
line_kw : dict, default=None
Additional keyword arguments passed to the `plt.plot` used to draw
the mean score.
fill_between_kw : dict, default=None
Additional keyword arguments passed to the `plt.fill_between` used
to draw the score standard deviation.
errorbar_kw : dict, default=None
Additional keyword arguments passed to the `plt.errorbar` used to
draw mean score and standard deviation score.
Returns
-------
display : :class:`~sklearn.model_selection.LearningCurveDisplay`
Object that stores computed values.
"""
self._plot_curve(
self.train_sizes,
ax=ax,
negate_score=negate_score,
score_name=score_name,
score_type=score_type,
std_display_style=std_display_style,
line_kw=line_kw,
fill_between_kw=fill_between_kw,
errorbar_kw=errorbar_kw,
)
self.ax_.set_xlabel("Number of samples in the training set")
return self
@classmethod
def from_estimator(
cls,
estimator,
X,
y,
*,
groups=None,
train_sizes=np.linspace(0.1, 1.0, 5),
cv=None,
scoring=None,
exploit_incremental_learning=False,
n_jobs=None,
pre_dispatch="all",
verbose=0,
shuffle=False,
random_state=None,
error_score=np.nan,
fit_params=None,
ax=None,
negate_score=False,
score_name=None,
score_type="both",
std_display_style="fill_between",
line_kw=None,
fill_between_kw=None,
errorbar_kw=None,
):
"""Create a learning curve display from an estimator.
Read more in the :ref:`User Guide <visualizations>` for general
information about the visualization API and :ref:`detailed
documentation <learning_curve>` regarding the learning curve
visualization.
Parameters
----------
estimator : object type that implements the "fit" and "predict" methods
An object of that type which is cloned for each validation.
X : array-like of shape (n_samples, n_features)
Training data, where `n_samples` is the number of samples and
`n_features` is the number of features.
y : array-like of shape (n_samples,) or (n_samples, n_outputs) or None
Target relative to X for classification or regression;
None for unsupervised learning.
groups : array-like of shape (n_samples,), default=None
Group labels for the samples used while splitting the dataset into
train/test set. Only used in conjunction with a "Group" :term:`cv`
instance (e.g., :class:`GroupKFold`).
train_sizes : array-like of shape (n_ticks,), \
default=np.linspace(0.1, 1.0, 5)
Relative or absolute numbers of training examples that will be used
to generate the learning curve. If the dtype is float, it is
regarded as a fraction of the maximum size of the training set
(that is determined by the selected validation method), i.e. it has
to be within (0, 1]. Otherwise it is interpreted as absolute sizes
of the training sets. Note that for classification the number of
samples usually have to be big enough to contain at least one
sample from each class.
cv : int, cross-validation generator or an iterable, default=None
Determines the cross-validation splitting strategy.
Possible inputs for cv are:
- None, to use the default 5-fold cross validation,
- int, to specify the number of folds in a `(Stratified)KFold`,
- :term:`CV splitter`,
- An iterable yielding (train, test) splits as arrays of indices.
For int/None inputs, if the estimator is a classifier and `y` is
either binary or multiclass,
:class:`~sklearn.model_selection.StratifiedKFold` is used. In all
other cases, :class:`~sklearn.model_selection.KFold` is used. These
splitters are instantiated with `shuffle=False` so the splits will
be the same across calls.
Refer :ref:`User Guide <cross_validation>` for the various
cross-validation strategies that can be used here.
scoring : str or callable, default=None
The scoring method to use when calculating the learning curve. Options:
- str: see :ref:`scoring_string_names` for options.
- callable: a scorer callable object (e.g., function) with signature
``scorer(estimator, X, y)``. See :ref:`scoring_callable` for details.
- `None`: the `estimator`'s
:ref:`default evaluation criterion <scoring_api_overview>` is used.
exploit_incremental_learning : bool, default=False
If the estimator supports incremental learning, this will be
used to speed up fitting for different training set sizes.
n_jobs : int, default=None
Number of jobs to run in parallel. Training the estimator and
computing the score are parallelized over the different training
and test sets. `None` means 1 unless in a
:obj:`joblib.parallel_backend` context. `-1` means using all
processors. See :term:`Glossary <n_jobs>` for more details.
pre_dispatch : int or str, default='all'
Number of predispatched jobs for parallel execution (default is
all). The option can reduce the allocated memory. The str can
be an expression like '2*n_jobs'.
verbose : int, default=0
Controls the verbosity: the higher, the more messages.
shuffle : bool, default=False
Whether to shuffle training data before taking prefixes of it
based on`train_sizes`.
random_state : int, RandomState instance or None, default=None
Used when `shuffle` is True. Pass an int for reproducible
output across multiple function calls.
See :term:`Glossary <random_state>`.
error_score : 'raise' or numeric, default=np.nan
Value to assign to the score if an error occurs in estimator
fitting. If set to 'raise', the error is raised. If a numeric value
is given, FitFailedWarning is raised.
fit_params : dict, default=None
Parameters to pass to the fit method of the estimator.
ax : matplotlib Axes, default=None
Axes object to plot on. If `None`, a new figure and axes is
created.
negate_score : bool, default=False
Whether or not to negate the scores obtained through
:func:`~sklearn.model_selection.learning_curve`. This is
particularly useful when using the error denoted by `neg_*` in
`scikit-learn`.
score_name : str, default=None
The name of the score used to decorate the y-axis of the plot. It will
override the name inferred from the `scoring` parameter. If `score` is
`None`, we use `"Score"` if `negate_score` is `False` and `"Negative score"`
otherwise. If `scoring` is a string or a callable, we infer the name. We
replace `_` by spaces and capitalize the first letter. We remove `neg_` and
replace it by `"Negative"` if `negate_score` is
`False` or just remove it otherwise.
score_type : {"test", "train", "both"}, default="both"
The type of score to plot. Can be one of `"test"`, `"train"`, or
`"both"`.
std_display_style : {"errorbar", "fill_between"} or None, default="fill_between"
The style used to display the score standard deviation around the
mean score. If `None`, no representation of the standard deviation
is displayed.
line_kw : dict, default=None
Additional keyword arguments passed to the `plt.plot` used to draw
the mean score.
fill_between_kw : dict, default=None
Additional keyword arguments passed to the `plt.fill_between` used
to draw the score standard deviation.
errorbar_kw : dict, default=None
Additional keyword arguments passed to the `plt.errorbar` used to
draw mean score and standard deviation score.
Returns
-------
display : :class:`~sklearn.model_selection.LearningCurveDisplay`
Object that stores computed values.
Examples
--------
>>> import matplotlib.pyplot as plt
>>> from sklearn.datasets import load_iris
>>> from sklearn.model_selection import LearningCurveDisplay
>>> from sklearn.tree import DecisionTreeClassifier
>>> X, y = load_iris(return_X_y=True)
>>> tree = DecisionTreeClassifier(random_state=0)
>>> LearningCurveDisplay.from_estimator(tree, X, y)
<...>
>>> plt.show()
"""
check_matplotlib_support(f"{cls.__name__}.from_estimator")
score_name = _validate_score_name(score_name, scoring, negate_score)
train_sizes, train_scores, test_scores = learning_curve(
estimator,
X,
y,
groups=groups,
train_sizes=train_sizes,
cv=cv,
scoring=scoring,
exploit_incremental_learning=exploit_incremental_learning,
n_jobs=n_jobs,
pre_dispatch=pre_dispatch,
verbose=verbose,
shuffle=shuffle,
random_state=random_state,
error_score=error_score,
return_times=False,
params=fit_params,
)
viz = cls(
train_sizes=train_sizes,
train_scores=train_scores,
test_scores=test_scores,
score_name=score_name,
)
return viz.plot(
ax=ax,
negate_score=negate_score,
score_type=score_type,
std_display_style=std_display_style,
line_kw=line_kw,
fill_between_kw=fill_between_kw,
errorbar_kw=errorbar_kw,
)
class ValidationCurveDisplay(_BaseCurveDisplay):
"""Validation Curve visualization.
It is recommended to use
:meth:`~sklearn.model_selection.ValidationCurveDisplay.from_estimator` to
create a :class:`~sklearn.model_selection.ValidationCurveDisplay` instance.
All parameters are stored as attributes.
Read more in the :ref:`User Guide <visualizations>` for general information
about the visualization API and :ref:`detailed documentation
<validation_curve>` regarding the validation curve visualization.
.. versionadded:: 1.3
Parameters
----------
param_name : str
Name of the parameter that has been varied.
param_range : array-like of shape (n_ticks,)
The values of the parameter that have been evaluated.
train_scores : ndarray of shape (n_ticks, n_cv_folds)
Scores on training sets.
test_scores : ndarray of shape (n_ticks, n_cv_folds)
Scores on test set.
score_name : str, default=None
The name of the score used in `validation_curve`. It will override the name
inferred from the `scoring` parameter. If `score` is `None`, we use `"Score"` if
`negate_score` is `False` and `"Negative score"` otherwise. If `scoring` is a
string or a callable, we infer the name. We replace `_` by spaces and capitalize
the first letter. We remove `neg_` and replace it by `"Negative"` if
`negate_score` is `False` or just remove it otherwise.
Attributes
----------
ax_ : matplotlib Axes
Axes with the validation curve.
figure_ : matplotlib Figure
Figure containing the validation curve.
errorbar_ : list of matplotlib Artist or None
When the `std_display_style` is `"errorbar"`, this is a list of
`matplotlib.container.ErrorbarContainer` objects. If another style is
used, `errorbar_` is `None`.
lines_ : list of matplotlib Artist or None
When the `std_display_style` is `"fill_between"`, this is a list of
`matplotlib.lines.Line2D` objects corresponding to the mean train and
test scores. If another style is used, `line_` is `None`.
fill_between_ : list of matplotlib Artist or None
When the `std_display_style` is `"fill_between"`, this is a list of
`matplotlib.collections.PolyCollection` objects. If another style is
used, `fill_between_` is `None`.
See Also
--------
sklearn.model_selection.validation_curve : Compute the validation curve.
Examples
--------
>>> import numpy as np
>>> import matplotlib.pyplot as plt
>>> from sklearn.datasets import make_classification
>>> from sklearn.model_selection import ValidationCurveDisplay, validation_curve
>>> from sklearn.linear_model import LogisticRegression
>>> X, y = make_classification(n_samples=1_000, random_state=0)
>>> logistic_regression = LogisticRegression()
>>> param_name, param_range = "C", np.logspace(-8, 3, 10)
>>> train_scores, test_scores = validation_curve(
... logistic_regression, X, y, param_name=param_name, param_range=param_range
... )
>>> display = ValidationCurveDisplay(
... param_name=param_name, param_range=param_range,
... train_scores=train_scores, test_scores=test_scores, score_name="Score"
... )
>>> display.plot()
<...>
>>> plt.show()
"""
def __init__(
self, *, param_name, param_range, train_scores, test_scores, score_name=None
):
self.param_name = param_name
self.param_range = param_range
self.train_scores = train_scores
self.test_scores = test_scores
self.score_name = score_name
def plot(
self,
ax=None,
*,
negate_score=False,
score_name=None,
score_type="both",
std_display_style="fill_between",
line_kw=None,
fill_between_kw=None,
errorbar_kw=None,
):
"""Plot visualization.
Parameters
----------
ax : matplotlib Axes, default=None
Axes object to plot on. If `None`, a new figure and axes is
created.
negate_score : bool, default=False
Whether or not to negate the scores obtained through
:func:`~sklearn.model_selection.validation_curve`. This is
particularly useful when using the error denoted by `neg_*` in
`scikit-learn`.
score_name : str, default=None
The name of the score used to decorate the y-axis of the plot. It will
override the name inferred from the `scoring` parameter. If `score` is
`None`, we use `"Score"` if `negate_score` is `False` and `"Negative score"`
otherwise. If `scoring` is a string or a callable, we infer the name. We
replace `_` by spaces and capitalize the first letter. We remove `neg_` and
replace it by `"Negative"` if `negate_score` is
`False` or just remove it otherwise.
score_type : {"test", "train", "both"}, default="both"
The type of score to plot. Can be one of `"test"`, `"train"`, or
`"both"`.
std_display_style : {"errorbar", "fill_between"} or None, default="fill_between"
The style used to display the score standard deviation around the
mean score. If None, no standard deviation representation is
displayed.
line_kw : dict, default=None
Additional keyword arguments passed to the `plt.plot` used to draw
the mean score.
fill_between_kw : dict, default=None
Additional keyword arguments passed to the `plt.fill_between` used
to draw the score standard deviation.
errorbar_kw : dict, default=None
Additional keyword arguments passed to the `plt.errorbar` used to
draw mean score and standard deviation score.
Returns
-------
display : :class:`~sklearn.model_selection.ValidationCurveDisplay`
Object that stores computed values.
"""
self._plot_curve(
self.param_range,
ax=ax,
negate_score=negate_score,
score_name=score_name,
score_type=score_type,
std_display_style=std_display_style,
line_kw=line_kw,
fill_between_kw=fill_between_kw,
errorbar_kw=errorbar_kw,
)
self.ax_.set_xlabel(f"{self.param_name}")
return self
@classmethod
def from_estimator(
cls,
estimator,
X,
y,
*,
param_name,
param_range,
groups=None,
cv=None,
scoring=None,
n_jobs=None,
pre_dispatch="all",
verbose=0,
error_score=np.nan,
fit_params=None,
ax=None,
negate_score=False,
score_name=None,
score_type="both",
std_display_style="fill_between",
line_kw=None,
fill_between_kw=None,
errorbar_kw=None,
):
"""Create a validation curve display from an estimator.
Read more in the :ref:`User Guide <visualizations>` for general
information about the visualization API and :ref:`detailed
documentation <validation_curve>` regarding the validation curve
visualization.
Parameters
----------
estimator : object type that implements the "fit" and "predict" methods
An object of that type which is cloned for each validation.
X : array-like of shape (n_samples, n_features)
Training data, where `n_samples` is the number of samples and
`n_features` is the number of features.
y : array-like of shape (n_samples,) or (n_samples, n_outputs) or None
Target relative to X for classification or regression;
None for unsupervised learning.
param_name : str
Name of the parameter that will be varied.
param_range : array-like of shape (n_values,)
The values of the parameter that will be evaluated.
groups : array-like of shape (n_samples,), default=None
Group labels for the samples used while splitting the dataset into
train/test set. Only used in conjunction with a "Group" :term:`cv`
instance (e.g., :class:`GroupKFold`).
cv : int, cross-validation generator or an iterable, default=None
Determines the cross-validation splitting strategy.
Possible inputs for cv are:
- None, to use the default 5-fold cross validation,
- int, to specify the number of folds in a `(Stratified)KFold`,
- :term:`CV splitter`,
- An iterable yielding (train, test) splits as arrays of indices.
For int/None inputs, if the estimator is a classifier and `y` is
either binary or multiclass,
:class:`~sklearn.model_selection.StratifiedKFold` is used. In all
other cases, :class:`~sklearn.model_selection.KFold` is used. These
splitters are instantiated with `shuffle=False` so the splits will
be the same across calls.
Refer :ref:`User Guide <cross_validation>` for the various
cross-validation strategies that can be used here.
scoring : str or callable, default=None
Scoring method to use when computing the validation curve. Options:
- str: see :ref:`scoring_string_names` for options.
- callable: a scorer callable object (e.g., function) with signature
``scorer(estimator, X, y)``. See :ref:`scoring_callable` for details.
- `None`: the `estimator`'s
:ref:`default evaluation criterion <scoring_api_overview>` is used.
n_jobs : int, default=None
Number of jobs to run in parallel. Training the estimator and
computing the score are parallelized over the different training
and test sets. `None` means 1 unless in a
:obj:`joblib.parallel_backend` context. `-1` means using all
processors. See :term:`Glossary <n_jobs>` for more details.
pre_dispatch : int or str, default='all'
Number of predispatched jobs for parallel execution (default is
all). The option can reduce the allocated memory. The str can
be an expression like '2*n_jobs'.
verbose : int, default=0
Controls the verbosity: the higher, the more messages.
error_score : 'raise' or numeric, default=np.nan
Value to assign to the score if an error occurs in estimator
fitting. If set to 'raise', the error is raised. If a numeric value
is given, FitFailedWarning is raised.
fit_params : dict, default=None
Parameters to pass to the fit method of the estimator.
ax : matplotlib Axes, default=None
Axes object to plot on. If `None`, a new figure and axes is
created.
negate_score : bool, default=False
Whether or not to negate the scores obtained through
:func:`~sklearn.model_selection.validation_curve`. This is
particularly useful when using the error denoted by `neg_*` in
`scikit-learn`.
score_name : str, default=None
The name of the score used to decorate the y-axis of the plot. It will
override the name inferred from the `scoring` parameter. If `score` is
`None`, we use `"Score"` if `negate_score` is `False` and `"Negative score"`
otherwise. If `scoring` is a string or a callable, we infer the name. We
replace `_` by spaces and capitalize the first letter. We remove `neg_` and
replace it by `"Negative"` if `negate_score` is
`False` or just remove it otherwise.
score_type : {"test", "train", "both"}, default="both"
The type of score to plot. Can be one of `"test"`, `"train"`, or
`"both"`.
std_display_style : {"errorbar", "fill_between"} or None, default="fill_between"
The style used to display the score standard deviation around the
mean score. If `None`, no representation of the standard deviation
is displayed.
line_kw : dict, default=None
Additional keyword arguments passed to the `plt.plot` used to draw
the mean score.
fill_between_kw : dict, default=None
Additional keyword arguments passed to the `plt.fill_between` used
to draw the score standard deviation.
errorbar_kw : dict, default=None
Additional keyword arguments passed to the `plt.errorbar` used to
draw mean score and standard deviation score.
Returns
-------
display : :class:`~sklearn.model_selection.ValidationCurveDisplay`
Object that stores computed values.
Examples
--------
>>> import numpy as np
>>> import matplotlib.pyplot as plt
>>> from sklearn.datasets import make_classification
>>> from sklearn.model_selection import ValidationCurveDisplay
>>> from sklearn.linear_model import LogisticRegression
>>> X, y = make_classification(n_samples=1_000, random_state=0)
>>> logistic_regression = LogisticRegression()
>>> param_name, param_range = "C", np.logspace(-8, 3, 10)
>>> ValidationCurveDisplay.from_estimator(
... logistic_regression, X, y, param_name=param_name,
... param_range=param_range,
... )
<...>
>>> plt.show()
"""
check_matplotlib_support(f"{cls.__name__}.from_estimator")
score_name = _validate_score_name(score_name, scoring, negate_score)
train_scores, test_scores = validation_curve(
estimator,
X,
y,
param_name=param_name,
param_range=param_range,
groups=groups,
cv=cv,
scoring=scoring,
n_jobs=n_jobs,
pre_dispatch=pre_dispatch,
verbose=verbose,
error_score=error_score,
params=fit_params,
)
viz = cls(
param_name=param_name,
param_range=np.asarray(param_range),
train_scores=train_scores,
test_scores=test_scores,
score_name=score_name,
)
return viz.plot(
ax=ax,
negate_score=negate_score,
score_type=score_type,
std_display_style=std_display_style,
line_kw=line_kw,
fill_between_kw=fill_between_kw,
errorbar_kw=errorbar_kw,
)

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,24 @@
"""
Common utilities for testing model selection.
"""
import numpy as np
from sklearn.model_selection import KFold
class OneTimeSplitter:
"""A wrapper to make KFold single entry cv iterator"""
def __init__(self, n_splits=4, n_samples=99):
self.n_splits = n_splits
self.n_samples = n_samples
self.indices = iter(KFold(n_splits=n_splits).split(np.ones(n_samples)))
def split(self, X=None, y=None, groups=None):
"""Split can be called only once"""
for index in self.indices:
yield index
def get_n_splits(self, X=None, y=None, groups=None):
return self.n_splits

View File

@@ -0,0 +1,618 @@
import numpy as np
import pytest
from sklearn import config_context
from sklearn.base import clone
from sklearn.datasets import (
load_breast_cancer,
load_iris,
make_classification,
make_multilabel_classification,
)
from sklearn.dummy import DummyClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.exceptions import NotFittedError
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import (
balanced_accuracy_score,
f1_score,
fbeta_score,
make_scorer,
)
from sklearn.metrics._scorer import _CurveScorer
from sklearn.model_selection import (
FixedThresholdClassifier,
StratifiedShuffleSplit,
TunedThresholdClassifierCV,
)
from sklearn.model_selection._classification_threshold import (
_fit_and_score_over_thresholds,
)
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.utils._mocking import CheckingClassifier
from sklearn.utils._testing import (
_convert_container,
assert_allclose,
assert_array_equal,
)
def test_fit_and_score_over_thresholds_curve_scorers():
"""Check that `_fit_and_score_over_thresholds` returns thresholds in ascending order
for the different accepted curve scorers."""
X, y = make_classification(n_samples=100, random_state=0)
train_idx, val_idx = np.arange(50), np.arange(50, 100)
classifier = LogisticRegression()
curve_scorer = _CurveScorer(
score_func=balanced_accuracy_score,
sign=1,
response_method="predict_proba",
thresholds=10,
kwargs={},
)
scores, thresholds = _fit_and_score_over_thresholds(
classifier,
X,
y,
fit_params={},
train_idx=train_idx,
val_idx=val_idx,
curve_scorer=curve_scorer,
score_params={},
)
assert np.all(thresholds[:-1] <= thresholds[1:])
assert isinstance(scores, np.ndarray)
assert np.logical_and(scores >= 0, scores <= 1).all()
def test_fit_and_score_over_thresholds_prefit():
"""Check the behaviour with a prefit classifier."""
X, y = make_classification(n_samples=100, random_state=0)
# `train_idx is None` to indicate that the classifier is prefit
train_idx, val_idx = None, np.arange(50, 100)
classifier = DecisionTreeClassifier(random_state=0).fit(X, y)
# make sure that the classifier memorized the full dataset such that
# we get perfect predictions and thus match the expected score
assert classifier.score(X[val_idx], y[val_idx]) == pytest.approx(1.0)
curve_scorer = _CurveScorer(
score_func=balanced_accuracy_score,
sign=1,
response_method="predict_proba",
thresholds=2,
kwargs={},
)
scores, thresholds = _fit_and_score_over_thresholds(
classifier,
X,
y,
fit_params={},
train_idx=train_idx,
val_idx=val_idx,
curve_scorer=curve_scorer,
score_params={},
)
assert np.all(thresholds[:-1] <= thresholds[1:])
assert_allclose(scores, [0.5, 1.0])
@config_context(enable_metadata_routing=True)
def test_fit_and_score_over_thresholds_sample_weight():
"""Check that we dispatch the sample-weight to fit and score the classifier."""
X, y = load_iris(return_X_y=True)
X, y = X[:100], y[:100] # only 2 classes
# create a dataset and repeat twice the sample of class #0
X_repeated, y_repeated = np.vstack([X, X[y == 0]]), np.hstack([y, y[y == 0]])
# create a sample weight vector that is equivalent to the repeated dataset
sample_weight = np.ones_like(y)
sample_weight[:50] *= 2
classifier = LogisticRegression()
train_repeated_idx = np.arange(X_repeated.shape[0])
val_repeated_idx = np.arange(X_repeated.shape[0])
curve_scorer = _CurveScorer(
score_func=balanced_accuracy_score,
sign=1,
response_method="predict_proba",
thresholds=10,
kwargs={},
)
scores_repeated, thresholds_repeated = _fit_and_score_over_thresholds(
classifier,
X_repeated,
y_repeated,
fit_params={},
train_idx=train_repeated_idx,
val_idx=val_repeated_idx,
curve_scorer=curve_scorer,
score_params={},
)
train_idx, val_idx = np.arange(X.shape[0]), np.arange(X.shape[0])
scores, thresholds = _fit_and_score_over_thresholds(
classifier.set_fit_request(sample_weight=True),
X,
y,
fit_params={"sample_weight": sample_weight},
train_idx=train_idx,
val_idx=val_idx,
curve_scorer=curve_scorer.set_score_request(sample_weight=True),
score_params={"sample_weight": sample_weight},
)
assert_allclose(thresholds_repeated, thresholds)
assert_allclose(scores_repeated, scores)
@pytest.mark.parametrize("fit_params_type", ["list", "array"])
@config_context(enable_metadata_routing=True)
def test_fit_and_score_over_thresholds_fit_params(fit_params_type):
"""Check that we pass `fit_params` to the classifier when calling `fit`."""
X, y = make_classification(n_samples=100, random_state=0)
fit_params = {
"a": _convert_container(y, fit_params_type),
"b": _convert_container(y, fit_params_type),
}
classifier = CheckingClassifier(expected_fit_params=["a", "b"], random_state=0)
classifier.set_fit_request(a=True, b=True)
train_idx, val_idx = np.arange(50), np.arange(50, 100)
curve_scorer = _CurveScorer(
score_func=balanced_accuracy_score,
sign=1,
response_method="predict_proba",
thresholds=10,
kwargs={},
)
_fit_and_score_over_thresholds(
classifier,
X,
y,
fit_params=fit_params,
train_idx=train_idx,
val_idx=val_idx,
curve_scorer=curve_scorer,
score_params={},
)
@pytest.mark.parametrize(
"data",
[
make_classification(n_classes=3, n_clusters_per_class=1, random_state=0),
make_multilabel_classification(random_state=0),
],
)
def test_tuned_threshold_classifier_no_binary(data):
"""Check that we raise an informative error message for non-binary problem."""
err_msg = "Only binary classification is supported."
with pytest.raises(ValueError, match=err_msg):
TunedThresholdClassifierCV(LogisticRegression()).fit(*data)
@pytest.mark.parametrize(
"params, err_type, err_msg",
[
(
{"cv": "prefit", "refit": True},
ValueError,
"When cv='prefit', refit cannot be True.",
),
(
{"cv": 10, "refit": False},
ValueError,
"When cv has several folds, refit cannot be False.",
),
(
{"cv": "prefit", "refit": False},
NotFittedError,
"`estimator` must be fitted.",
),
],
)
def test_tuned_threshold_classifier_conflict_cv_refit(params, err_type, err_msg):
"""Check that we raise an informative error message when `cv` and `refit`
cannot be used together.
"""
X, y = make_classification(n_samples=100, random_state=0)
with pytest.raises(err_type, match=err_msg):
TunedThresholdClassifierCV(LogisticRegression(), **params).fit(X, y)
@pytest.mark.parametrize(
"estimator",
[LogisticRegression(), SVC(), GradientBoostingClassifier(n_estimators=4)],
)
@pytest.mark.parametrize(
"response_method", ["predict_proba", "predict_log_proba", "decision_function"]
)
@pytest.mark.parametrize(
"ThresholdClassifier", [FixedThresholdClassifier, TunedThresholdClassifierCV]
)
def test_threshold_classifier_estimator_response_methods(
ThresholdClassifier, estimator, response_method
):
"""Check that `TunedThresholdClassifierCV` exposes the same response methods as the
underlying estimator.
"""
X, y = make_classification(n_samples=100, random_state=0)
model = ThresholdClassifier(estimator=estimator)
assert hasattr(model, response_method) == hasattr(estimator, response_method)
model.fit(X, y)
assert hasattr(model, response_method) == hasattr(estimator, response_method)
if hasattr(model, response_method):
y_pred_cutoff = getattr(model, response_method)(X)
y_pred_underlying_estimator = getattr(model.estimator_, response_method)(X)
assert_allclose(y_pred_cutoff, y_pred_underlying_estimator)
@pytest.mark.parametrize(
"response_method", ["auto", "decision_function", "predict_proba"]
)
def test_tuned_threshold_classifier_without_constraint_value(response_method):
"""Check that `TunedThresholdClassifierCV` is optimizing a given objective
metric."""
X, y = load_breast_cancer(return_X_y=True)
# remove feature to degrade performances
X = X[:, :5]
# make the problem completely imbalanced such that the balanced accuracy is low
indices_pos = np.flatnonzero(y == 1)
indices_pos = indices_pos[: indices_pos.size // 50]
indices_neg = np.flatnonzero(y == 0)
X = np.vstack([X[indices_neg], X[indices_pos]])
y = np.hstack([y[indices_neg], y[indices_pos]])
lr = make_pipeline(StandardScaler(), LogisticRegression()).fit(X, y)
thresholds = 100
model = TunedThresholdClassifierCV(
estimator=lr,
scoring="balanced_accuracy",
response_method=response_method,
thresholds=thresholds,
store_cv_results=True,
)
score_optimized = balanced_accuracy_score(y, model.fit(X, y).predict(X))
score_baseline = balanced_accuracy_score(y, lr.predict(X))
assert score_optimized > score_baseline
assert model.cv_results_["thresholds"].shape == (thresholds,)
assert model.cv_results_["scores"].shape == (thresholds,)
def test_tuned_threshold_classifier_metric_with_parameter():
"""Check that we can pass a metric with a parameter in addition check that
`f_beta` with `beta=1` is equivalent to `f1` and different from `f_beta` with
`beta=2`.
"""
X, y = load_breast_cancer(return_X_y=True)
lr = make_pipeline(StandardScaler(), LogisticRegression()).fit(X, y)
model_fbeta_1 = TunedThresholdClassifierCV(
estimator=lr, scoring=make_scorer(fbeta_score, beta=1)
).fit(X, y)
model_fbeta_2 = TunedThresholdClassifierCV(
estimator=lr, scoring=make_scorer(fbeta_score, beta=2)
).fit(X, y)
model_f1 = TunedThresholdClassifierCV(
estimator=lr, scoring=make_scorer(f1_score)
).fit(X, y)
assert model_fbeta_1.best_threshold_ == pytest.approx(model_f1.best_threshold_)
assert model_fbeta_1.best_threshold_ != pytest.approx(model_fbeta_2.best_threshold_)
@pytest.mark.parametrize(
"response_method", ["auto", "decision_function", "predict_proba"]
)
@pytest.mark.parametrize(
"metric",
[
make_scorer(balanced_accuracy_score),
make_scorer(f1_score, pos_label="cancer"),
],
)
def test_tuned_threshold_classifier_with_string_targets(response_method, metric):
"""Check that targets represented by str are properly managed.
Also, check with several metrics to be sure that `pos_label` is properly
dispatched.
"""
X, y = load_breast_cancer(return_X_y=True)
# Encode numeric targets by meaningful strings. We purposely designed the class
# names such that the `pos_label` is the first alphabetically sorted class and thus
# encoded as 0.
classes = np.array(["cancer", "healthy"], dtype=object)
y = classes[y]
model = TunedThresholdClassifierCV(
estimator=make_pipeline(StandardScaler(), LogisticRegression()),
scoring=metric,
response_method=response_method,
thresholds=100,
).fit(X, y)
assert_array_equal(model.classes_, np.sort(classes))
y_pred = model.predict(X)
assert_array_equal(np.unique(y_pred), np.sort(classes))
@pytest.mark.parametrize("with_sample_weight", [True, False])
@config_context(enable_metadata_routing=True)
def test_tuned_threshold_classifier_refit(with_sample_weight, global_random_seed):
"""Check the behaviour of the `refit` parameter."""
rng = np.random.RandomState(global_random_seed)
X, y = make_classification(n_samples=100, random_state=0)
if with_sample_weight:
sample_weight = rng.randn(X.shape[0])
sample_weight = np.abs(sample_weight, out=sample_weight)
else:
sample_weight = None
# check that `estimator_` if fitted on the full dataset when `refit=True`
estimator = LogisticRegression().set_fit_request(sample_weight=True)
model = TunedThresholdClassifierCV(estimator, refit=True).fit(
X, y, sample_weight=sample_weight
)
assert model.estimator_ is not estimator
estimator.fit(X, y, sample_weight=sample_weight)
assert_allclose(model.estimator_.coef_, estimator.coef_)
assert_allclose(model.estimator_.intercept_, estimator.intercept_)
# check that `estimator_` was not altered when `refit=False` and `cv="prefit"`
estimator = LogisticRegression().set_fit_request(sample_weight=True)
estimator.fit(X, y, sample_weight=sample_weight)
coef = estimator.coef_.copy()
model = TunedThresholdClassifierCV(estimator, cv="prefit", refit=False).fit(
X, y, sample_weight=sample_weight
)
assert model.estimator_ is estimator
assert_allclose(model.estimator_.coef_, coef)
# check that we train `estimator_` on the training split of a given cross-validation
estimator = LogisticRegression().set_fit_request(sample_weight=True)
cv = [
(np.arange(50), np.arange(50, 100)),
] # single split
model = TunedThresholdClassifierCV(estimator, cv=cv, refit=False).fit(
X, y, sample_weight=sample_weight
)
assert model.estimator_ is not estimator
if with_sample_weight:
sw_train = sample_weight[cv[0][0]]
else:
sw_train = None
estimator.fit(X[cv[0][0]], y[cv[0][0]], sample_weight=sw_train)
assert_allclose(model.estimator_.coef_, estimator.coef_)
@pytest.mark.parametrize("fit_params_type", ["list", "array"])
@config_context(enable_metadata_routing=True)
def test_tuned_threshold_classifier_fit_params(fit_params_type):
"""Check that we pass `fit_params` to the classifier when calling `fit`."""
X, y = make_classification(n_samples=100, random_state=0)
fit_params = {
"a": _convert_container(y, fit_params_type),
"b": _convert_container(y, fit_params_type),
}
classifier = CheckingClassifier(expected_fit_params=["a", "b"], random_state=0)
classifier.set_fit_request(a=True, b=True)
model = TunedThresholdClassifierCV(classifier)
model.fit(X, y, **fit_params)
@config_context(enable_metadata_routing=True)
def test_tuned_threshold_classifier_cv_zeros_sample_weights_equivalence():
"""Check that passing removing some sample from the dataset `X` is
equivalent to passing a `sample_weight` with a factor 0."""
X, y = load_iris(return_X_y=True)
# Scale the data to avoid any convergence issue
X = StandardScaler().fit_transform(X)
# Only use 2 classes and select samples such that 2-fold cross-validation
# split will lead to an equivalence with a `sample_weight` of 0
X = np.vstack((X[:40], X[50:90]))
y = np.hstack((y[:40], y[50:90]))
sample_weight = np.zeros_like(y)
sample_weight[::2] = 1
estimator = LogisticRegression().set_fit_request(sample_weight=True)
model_without_weights = TunedThresholdClassifierCV(estimator, cv=2)
model_with_weights = clone(model_without_weights)
model_with_weights.fit(X, y, sample_weight=sample_weight)
model_without_weights.fit(X[::2], y[::2])
assert_allclose(
model_with_weights.estimator_.coef_, model_without_weights.estimator_.coef_
)
y_pred_with_weights = model_with_weights.predict_proba(X)
y_pred_without_weights = model_without_weights.predict_proba(X)
assert_allclose(y_pred_with_weights, y_pred_without_weights)
def test_tuned_threshold_classifier_thresholds_array():
"""Check that we can pass an array to `thresholds` and it is used as candidate
threshold internally."""
X, y = make_classification(random_state=0)
estimator = LogisticRegression()
thresholds = np.linspace(0, 1, 11)
tuned_model = TunedThresholdClassifierCV(
estimator,
thresholds=thresholds,
response_method="predict_proba",
store_cv_results=True,
).fit(X, y)
assert_allclose(tuned_model.cv_results_["thresholds"], thresholds)
@pytest.mark.parametrize("store_cv_results", [True, False])
def test_tuned_threshold_classifier_store_cv_results(store_cv_results):
"""Check that if `cv_results_` exists depending on `store_cv_results`."""
X, y = make_classification(random_state=0)
estimator = LogisticRegression()
tuned_model = TunedThresholdClassifierCV(
estimator, store_cv_results=store_cv_results
).fit(X, y)
if store_cv_results:
assert hasattr(tuned_model, "cv_results_")
else:
assert not hasattr(tuned_model, "cv_results_")
def test_tuned_threshold_classifier_cv_float():
"""Check the behaviour when `cv` is set to a float."""
X, y = make_classification(random_state=0)
# case where `refit=False` and cv is a float: the underlying estimator will be fit
# on the training set given by a ShuffleSplit. We check that we get the same model
# coefficients.
test_size = 0.3
estimator = LogisticRegression()
tuned_model = TunedThresholdClassifierCV(
estimator, cv=test_size, refit=False, random_state=0
).fit(X, y)
tuned_model.fit(X, y)
cv = StratifiedShuffleSplit(n_splits=1, test_size=test_size, random_state=0)
train_idx, val_idx = next(cv.split(X, y))
cloned_estimator = clone(estimator).fit(X[train_idx], y[train_idx])
assert_allclose(tuned_model.estimator_.coef_, cloned_estimator.coef_)
# case where `refit=True`, then the underlying estimator is fitted on the full
# dataset.
tuned_model.set_params(refit=True).fit(X, y)
cloned_estimator = clone(estimator).fit(X, y)
assert_allclose(tuned_model.estimator_.coef_, cloned_estimator.coef_)
def test_tuned_threshold_classifier_error_constant_predictor():
"""Check that we raise a ValueError if the underlying classifier returns constant
probabilities such that we cannot find any threshold.
"""
X, y = make_classification(random_state=0)
estimator = DummyClassifier(strategy="constant", constant=1)
tuned_model = TunedThresholdClassifierCV(estimator, response_method="predict_proba")
err_msg = "The provided estimator makes constant predictions"
with pytest.raises(ValueError, match=err_msg):
tuned_model.fit(X, y)
@pytest.mark.parametrize(
"response_method", ["auto", "predict_proba", "decision_function"]
)
def test_fixed_threshold_classifier_equivalence_default(response_method):
"""Check that `FixedThresholdClassifier` has the same behaviour as the vanilla
classifier.
"""
X, y = make_classification(random_state=0)
classifier = LogisticRegression().fit(X, y)
classifier_default_threshold = FixedThresholdClassifier(
estimator=clone(classifier), response_method=response_method
)
classifier_default_threshold.fit(X, y)
# emulate the response method that should take into account the `pos_label`
if response_method in ("auto", "predict_proba"):
y_score = classifier_default_threshold.predict_proba(X)[:, 1]
threshold = 0.5
else: # response_method == "decision_function"
y_score = classifier_default_threshold.decision_function(X)
threshold = 0.0
y_pred_lr = (y_score >= threshold).astype(int)
assert_allclose(classifier_default_threshold.predict(X), y_pred_lr)
@pytest.mark.parametrize(
"response_method, threshold", [("predict_proba", 0.7), ("decision_function", 2.0)]
)
@pytest.mark.parametrize("pos_label", [0, 1])
def test_fixed_threshold_classifier(response_method, threshold, pos_label):
"""Check that applying `predict` lead to the same prediction as applying the
threshold to the output of the response method.
"""
X, y = make_classification(n_samples=50, random_state=0)
logistic_regression = LogisticRegression().fit(X, y)
model = FixedThresholdClassifier(
estimator=clone(logistic_regression),
threshold=threshold,
response_method=response_method,
pos_label=pos_label,
).fit(X, y)
# check that the underlying estimator is the same
assert_allclose(model.estimator_.coef_, logistic_regression.coef_)
# emulate the response method that should take into account the `pos_label`
if response_method == "predict_proba":
y_score = model.predict_proba(X)[:, pos_label]
else: # response_method == "decision_function"
y_score = model.decision_function(X)
y_score = y_score if pos_label == 1 else -y_score
# create a mapping from boolean values to class labels
map_to_label = np.array([0, 1]) if pos_label == 1 else np.array([1, 0])
y_pred_lr = map_to_label[(y_score >= threshold).astype(int)]
assert_allclose(model.predict(X), y_pred_lr)
for method in ("predict_proba", "predict_log_proba", "decision_function"):
assert_allclose(
getattr(model, method)(X), getattr(logistic_regression, method)(X)
)
assert_allclose(
getattr(model.estimator_, method)(X),
getattr(logistic_regression, method)(X),
)
@config_context(enable_metadata_routing=True)
def test_fixed_threshold_classifier_metadata_routing():
"""Check that everything works with metadata routing."""
X, y = make_classification(random_state=0)
sample_weight = np.ones_like(y)
sample_weight[::2] = 2
classifier = LogisticRegression().set_fit_request(sample_weight=True)
classifier.fit(X, y, sample_weight=sample_weight)
classifier_default_threshold = FixedThresholdClassifier(estimator=clone(classifier))
classifier_default_threshold.fit(X, y, sample_weight=sample_weight)
assert_allclose(classifier_default_threshold.estimator_.coef_, classifier.coef_)
@pytest.mark.parametrize(
"method", ["predict_proba", "decision_function", "predict", "predict_log_proba"]
)
def test_fixed_threshold_classifier_fitted_estimator(method):
"""Check that if the underlying estimator is already fitted, no fit is required."""
X, y = make_classification(random_state=0)
classifier = LogisticRegression().fit(X, y)
fixed_threshold_classifier = FixedThresholdClassifier(estimator=classifier)
# This should not raise an error
getattr(fixed_threshold_classifier, method)(X)
def test_fixed_threshold_classifier_classes_():
"""Check that the classes_ attribute is properly set."""
X, y = make_classification(random_state=0)
with pytest.raises(
AttributeError, match="The underlying estimator is not fitted yet."
):
FixedThresholdClassifier(estimator=LogisticRegression()).classes_
classifier = LogisticRegression().fit(X, y)
fixed_threshold_classifier = FixedThresholdClassifier(estimator=classifier)
assert_array_equal(fixed_threshold_classifier.classes_, classifier.classes_)

View File

@@ -0,0 +1,572 @@
import numpy as np
import pytest
from sklearn.datasets import load_iris
from sklearn.model_selection import (
LearningCurveDisplay,
ValidationCurveDisplay,
learning_curve,
validation_curve,
)
from sklearn.tree import DecisionTreeClassifier
from sklearn.utils import shuffle
from sklearn.utils._testing import assert_allclose, assert_array_equal
@pytest.fixture
def data():
return shuffle(*load_iris(return_X_y=True), random_state=0)
@pytest.mark.parametrize(
"params, err_type, err_msg",
[
({"std_display_style": "invalid"}, ValueError, "Unknown std_display_style:"),
({"score_type": "invalid"}, ValueError, "Unknown score_type:"),
],
)
@pytest.mark.parametrize(
"CurveDisplay, specific_params",
[
(ValidationCurveDisplay, {"param_name": "max_depth", "param_range": [1, 3, 5]}),
(LearningCurveDisplay, {"train_sizes": [0.3, 0.6, 0.9]}),
],
)
def test_curve_display_parameters_validation(
pyplot, data, params, err_type, err_msg, CurveDisplay, specific_params
):
"""Check that we raise a proper error when passing invalid parameters."""
X, y = data
estimator = DecisionTreeClassifier(random_state=0)
with pytest.raises(err_type, match=err_msg):
CurveDisplay.from_estimator(estimator, X, y, **specific_params, **params)
def test_learning_curve_display_default_usage(pyplot, data):
"""Check the default usage of the LearningCurveDisplay class."""
X, y = data
estimator = DecisionTreeClassifier(random_state=0)
train_sizes = [0.3, 0.6, 0.9]
display = LearningCurveDisplay.from_estimator(
estimator, X, y, train_sizes=train_sizes
)
import matplotlib as mpl
assert display.errorbar_ is None
assert isinstance(display.lines_, list)
for line in display.lines_:
assert isinstance(line, mpl.lines.Line2D)
assert isinstance(display.fill_between_, list)
for fill in display.fill_between_:
assert isinstance(fill, mpl.collections.PolyCollection)
assert fill.get_alpha() == 0.5
assert display.score_name == "Score"
assert display.ax_.get_xlabel() == "Number of samples in the training set"
assert display.ax_.get_ylabel() == "Score"
_, legend_labels = display.ax_.get_legend_handles_labels()
assert legend_labels == ["Train", "Test"]
train_sizes_abs, train_scores, test_scores = learning_curve(
estimator, X, y, train_sizes=train_sizes
)
assert_array_equal(display.train_sizes, train_sizes_abs)
assert_allclose(display.train_scores, train_scores)
assert_allclose(display.test_scores, test_scores)
def test_validation_curve_display_default_usage(pyplot, data):
"""Check the default usage of the ValidationCurveDisplay class."""
X, y = data
estimator = DecisionTreeClassifier(random_state=0)
param_name, param_range = "max_depth", [1, 3, 5]
display = ValidationCurveDisplay.from_estimator(
estimator, X, y, param_name=param_name, param_range=param_range
)
import matplotlib as mpl
assert display.errorbar_ is None
assert isinstance(display.lines_, list)
for line in display.lines_:
assert isinstance(line, mpl.lines.Line2D)
assert isinstance(display.fill_between_, list)
for fill in display.fill_between_:
assert isinstance(fill, mpl.collections.PolyCollection)
assert fill.get_alpha() == 0.5
assert display.score_name == "Score"
assert display.ax_.get_xlabel() == f"{param_name}"
assert display.ax_.get_ylabel() == "Score"
_, legend_labels = display.ax_.get_legend_handles_labels()
assert legend_labels == ["Train", "Test"]
train_scores, test_scores = validation_curve(
estimator, X, y, param_name=param_name, param_range=param_range
)
assert_array_equal(display.param_range, param_range)
assert_allclose(display.train_scores, train_scores)
assert_allclose(display.test_scores, test_scores)
@pytest.mark.parametrize(
"CurveDisplay, specific_params",
[
(ValidationCurveDisplay, {"param_name": "max_depth", "param_range": [1, 3, 5]}),
(LearningCurveDisplay, {"train_sizes": [0.3, 0.6, 0.9]}),
],
)
def test_curve_display_negate_score(pyplot, data, CurveDisplay, specific_params):
"""Check the behaviour of the `negate_score` parameter calling `from_estimator` and
`plot`.
"""
X, y = data
estimator = DecisionTreeClassifier(max_depth=1, random_state=0)
negate_score = False
display = CurveDisplay.from_estimator(
estimator, X, y, **specific_params, negate_score=negate_score
)
positive_scores = display.lines_[0].get_data()[1]
assert (positive_scores >= 0).all()
assert display.ax_.get_ylabel() == "Score"
negate_score = True
display = CurveDisplay.from_estimator(
estimator, X, y, **specific_params, negate_score=negate_score
)
negative_scores = display.lines_[0].get_data()[1]
assert (negative_scores <= 0).all()
assert_allclose(negative_scores, -positive_scores)
assert display.ax_.get_ylabel() == "Negative score"
negate_score = False
display = CurveDisplay.from_estimator(
estimator, X, y, **specific_params, negate_score=negate_score
)
assert display.ax_.get_ylabel() == "Score"
display.plot(negate_score=not negate_score)
assert display.ax_.get_ylabel() == "Score"
assert (display.lines_[0].get_data()[1] < 0).all()
@pytest.mark.parametrize(
"score_name, ylabel", [(None, "Score"), ("Accuracy", "Accuracy")]
)
@pytest.mark.parametrize(
"CurveDisplay, specific_params",
[
(ValidationCurveDisplay, {"param_name": "max_depth", "param_range": [1, 3, 5]}),
(LearningCurveDisplay, {"train_sizes": [0.3, 0.6, 0.9]}),
],
)
def test_curve_display_score_name(
pyplot, data, score_name, ylabel, CurveDisplay, specific_params
):
"""Check that we can overwrite the default score name shown on the y-axis."""
X, y = data
estimator = DecisionTreeClassifier(random_state=0)
display = CurveDisplay.from_estimator(
estimator, X, y, **specific_params, score_name=score_name
)
assert display.ax_.get_ylabel() == ylabel
X, y = data
estimator = DecisionTreeClassifier(max_depth=1, random_state=0)
display = CurveDisplay.from_estimator(
estimator, X, y, **specific_params, score_name=score_name
)
assert display.score_name == ylabel
@pytest.mark.parametrize("std_display_style", (None, "errorbar"))
def test_learning_curve_display_score_type(pyplot, data, std_display_style):
"""Check the behaviour of setting the `score_type` parameter."""
X, y = data
estimator = DecisionTreeClassifier(random_state=0)
train_sizes = [0.3, 0.6, 0.9]
train_sizes_abs, train_scores, test_scores = learning_curve(
estimator, X, y, train_sizes=train_sizes
)
score_type = "train"
display = LearningCurveDisplay.from_estimator(
estimator,
X,
y,
train_sizes=train_sizes,
score_type=score_type,
std_display_style=std_display_style,
)
_, legend_label = display.ax_.get_legend_handles_labels()
assert legend_label == ["Train"]
if std_display_style is None:
assert len(display.lines_) == 1
assert display.errorbar_ is None
x_data, y_data = display.lines_[0].get_data()
else:
assert display.lines_ is None
assert len(display.errorbar_) == 1
x_data, y_data = display.errorbar_[0].lines[0].get_data()
assert_array_equal(x_data, train_sizes_abs)
assert_allclose(y_data, train_scores.mean(axis=1))
score_type = "test"
display = LearningCurveDisplay.from_estimator(
estimator,
X,
y,
train_sizes=train_sizes,
score_type=score_type,
std_display_style=std_display_style,
)
_, legend_label = display.ax_.get_legend_handles_labels()
assert legend_label == ["Test"]
if std_display_style is None:
assert len(display.lines_) == 1
assert display.errorbar_ is None
x_data, y_data = display.lines_[0].get_data()
else:
assert display.lines_ is None
assert len(display.errorbar_) == 1
x_data, y_data = display.errorbar_[0].lines[0].get_data()
assert_array_equal(x_data, train_sizes_abs)
assert_allclose(y_data, test_scores.mean(axis=1))
score_type = "both"
display = LearningCurveDisplay.from_estimator(
estimator,
X,
y,
train_sizes=train_sizes,
score_type=score_type,
std_display_style=std_display_style,
)
_, legend_label = display.ax_.get_legend_handles_labels()
assert legend_label == ["Train", "Test"]
if std_display_style is None:
assert len(display.lines_) == 2
assert display.errorbar_ is None
x_data_train, y_data_train = display.lines_[0].get_data()
x_data_test, y_data_test = display.lines_[1].get_data()
else:
assert display.lines_ is None
assert len(display.errorbar_) == 2
x_data_train, y_data_train = display.errorbar_[0].lines[0].get_data()
x_data_test, y_data_test = display.errorbar_[1].lines[0].get_data()
assert_array_equal(x_data_train, train_sizes_abs)
assert_allclose(y_data_train, train_scores.mean(axis=1))
assert_array_equal(x_data_test, train_sizes_abs)
assert_allclose(y_data_test, test_scores.mean(axis=1))
@pytest.mark.parametrize("std_display_style", (None, "errorbar"))
def test_validation_curve_display_score_type(pyplot, data, std_display_style):
"""Check the behaviour of setting the `score_type` parameter."""
X, y = data
estimator = DecisionTreeClassifier(random_state=0)
param_name, param_range = "max_depth", [1, 3, 5]
train_scores, test_scores = validation_curve(
estimator, X, y, param_name=param_name, param_range=param_range
)
score_type = "train"
display = ValidationCurveDisplay.from_estimator(
estimator,
X,
y,
param_name=param_name,
param_range=param_range,
score_type=score_type,
std_display_style=std_display_style,
)
_, legend_label = display.ax_.get_legend_handles_labels()
assert legend_label == ["Train"]
if std_display_style is None:
assert len(display.lines_) == 1
assert display.errorbar_ is None
x_data, y_data = display.lines_[0].get_data()
else:
assert display.lines_ is None
assert len(display.errorbar_) == 1
x_data, y_data = display.errorbar_[0].lines[0].get_data()
assert_array_equal(x_data, param_range)
assert_allclose(y_data, train_scores.mean(axis=1))
score_type = "test"
display = ValidationCurveDisplay.from_estimator(
estimator,
X,
y,
param_name=param_name,
param_range=param_range,
score_type=score_type,
std_display_style=std_display_style,
)
_, legend_label = display.ax_.get_legend_handles_labels()
assert legend_label == ["Test"]
if std_display_style is None:
assert len(display.lines_) == 1
assert display.errorbar_ is None
x_data, y_data = display.lines_[0].get_data()
else:
assert display.lines_ is None
assert len(display.errorbar_) == 1
x_data, y_data = display.errorbar_[0].lines[0].get_data()
assert_array_equal(x_data, param_range)
assert_allclose(y_data, test_scores.mean(axis=1))
score_type = "both"
display = ValidationCurveDisplay.from_estimator(
estimator,
X,
y,
param_name=param_name,
param_range=param_range,
score_type=score_type,
std_display_style=std_display_style,
)
_, legend_label = display.ax_.get_legend_handles_labels()
assert legend_label == ["Train", "Test"]
if std_display_style is None:
assert len(display.lines_) == 2
assert display.errorbar_ is None
x_data_train, y_data_train = display.lines_[0].get_data()
x_data_test, y_data_test = display.lines_[1].get_data()
else:
assert display.lines_ is None
assert len(display.errorbar_) == 2
x_data_train, y_data_train = display.errorbar_[0].lines[0].get_data()
x_data_test, y_data_test = display.errorbar_[1].lines[0].get_data()
assert_array_equal(x_data_train, param_range)
assert_allclose(y_data_train, train_scores.mean(axis=1))
assert_array_equal(x_data_test, param_range)
assert_allclose(y_data_test, test_scores.mean(axis=1))
@pytest.mark.parametrize(
"CurveDisplay, specific_params, expected_xscale",
[
(
ValidationCurveDisplay,
{"param_name": "max_depth", "param_range": np.arange(1, 5)},
"linear",
),
(LearningCurveDisplay, {"train_sizes": np.linspace(0.1, 0.9, num=5)}, "linear"),
(
ValidationCurveDisplay,
{
"param_name": "max_depth",
"param_range": np.round(np.logspace(0, 2, num=5)).astype(np.int64),
},
"log",
),
(LearningCurveDisplay, {"train_sizes": np.logspace(-1, 0, num=5)}, "log"),
],
)
def test_curve_display_xscale_auto(
pyplot, data, CurveDisplay, specific_params, expected_xscale
):
"""Check the behaviour of the x-axis scaling depending on the data provided."""
X, y = data
estimator = DecisionTreeClassifier(random_state=0)
display = CurveDisplay.from_estimator(estimator, X, y, **specific_params)
assert display.ax_.get_xscale() == expected_xscale
@pytest.mark.parametrize(
"CurveDisplay, specific_params",
[
(ValidationCurveDisplay, {"param_name": "max_depth", "param_range": [1, 3, 5]}),
(LearningCurveDisplay, {"train_sizes": [0.3, 0.6, 0.9]}),
],
)
def test_curve_display_std_display_style(pyplot, data, CurveDisplay, specific_params):
"""Check the behaviour of the parameter `std_display_style`."""
X, y = data
estimator = DecisionTreeClassifier(random_state=0)
import matplotlib as mpl
std_display_style = None
display = CurveDisplay.from_estimator(
estimator,
X,
y,
**specific_params,
std_display_style=std_display_style,
)
assert len(display.lines_) == 2
for line in display.lines_:
assert isinstance(line, mpl.lines.Line2D)
assert display.errorbar_ is None
assert display.fill_between_ is None
_, legend_label = display.ax_.get_legend_handles_labels()
assert len(legend_label) == 2
std_display_style = "fill_between"
display = CurveDisplay.from_estimator(
estimator,
X,
y,
**specific_params,
std_display_style=std_display_style,
)
assert len(display.lines_) == 2
for line in display.lines_:
assert isinstance(line, mpl.lines.Line2D)
assert display.errorbar_ is None
assert len(display.fill_between_) == 2
for fill_between in display.fill_between_:
assert isinstance(fill_between, mpl.collections.PolyCollection)
_, legend_label = display.ax_.get_legend_handles_labels()
assert len(legend_label) == 2
std_display_style = "errorbar"
display = CurveDisplay.from_estimator(
estimator,
X,
y,
**specific_params,
std_display_style=std_display_style,
)
assert display.lines_ is None
assert len(display.errorbar_) == 2
for errorbar in display.errorbar_:
assert isinstance(errorbar, mpl.container.ErrorbarContainer)
assert display.fill_between_ is None
_, legend_label = display.ax_.get_legend_handles_labels()
assert len(legend_label) == 2
@pytest.mark.parametrize(
"CurveDisplay, specific_params",
[
(ValidationCurveDisplay, {"param_name": "max_depth", "param_range": [1, 3, 5]}),
(LearningCurveDisplay, {"train_sizes": [0.3, 0.6, 0.9]}),
],
)
def test_curve_display_plot_kwargs(pyplot, data, CurveDisplay, specific_params):
"""Check the behaviour of the different plotting keyword arguments: `line_kw`,
`fill_between_kw`, and `errorbar_kw`."""
X, y = data
estimator = DecisionTreeClassifier(random_state=0)
std_display_style = "fill_between"
line_kw = {"color": "red"}
fill_between_kw = {"color": "red", "alpha": 1.0}
display = CurveDisplay.from_estimator(
estimator,
X,
y,
**specific_params,
std_display_style=std_display_style,
line_kw=line_kw,
fill_between_kw=fill_between_kw,
)
assert display.lines_[0].get_color() == "red"
assert_allclose(
display.fill_between_[0].get_facecolor(),
[[1.0, 0.0, 0.0, 1.0]], # trust me, it's red
)
std_display_style = "errorbar"
errorbar_kw = {"color": "red"}
display = CurveDisplay.from_estimator(
estimator,
X,
y,
**specific_params,
std_display_style=std_display_style,
errorbar_kw=errorbar_kw,
)
assert display.errorbar_[0].lines[0].get_color() == "red"
@pytest.mark.parametrize(
"param_range, xscale",
[([5, 10, 15], "linear"), ([-50, 5, 50, 500], "symlog"), ([5, 50, 500], "log")],
)
def test_validation_curve_xscale_from_param_range_provided_as_a_list(
pyplot, data, param_range, xscale
):
"""Check the induced xscale from the provided param_range values."""
X, y = data
estimator = DecisionTreeClassifier(random_state=0)
param_name = "max_depth"
display = ValidationCurveDisplay.from_estimator(
estimator,
X,
y,
param_name=param_name,
param_range=param_range,
)
assert display.ax_.get_xscale() == xscale
@pytest.mark.parametrize(
"Display, params",
[
(LearningCurveDisplay, {}),
(ValidationCurveDisplay, {"param_name": "max_depth", "param_range": [1, 3, 5]}),
],
)
def test_subclassing_displays(pyplot, data, Display, params):
"""Check that named constructors return the correct type when subclassed.
Non-regression test for:
https://github.com/scikit-learn/scikit-learn/pull/27675
"""
X, y = data
estimator = DecisionTreeClassifier(random_state=0)
class SubclassOfDisplay(Display):
pass
display = SubclassOfDisplay.from_estimator(estimator, X, y, **params)
assert isinstance(display, SubclassOfDisplay)

View File

@@ -0,0 +1,853 @@
from math import ceil
import numpy as np
import pytest
from scipy.stats import expon, norm, randint
from sklearn.datasets import make_classification
from sklearn.dummy import DummyClassifier
from sklearn.experimental import enable_halving_search_cv # noqa: F401
from sklearn.model_selection import (
GroupKFold,
GroupShuffleSplit,
HalvingGridSearchCV,
HalvingRandomSearchCV,
KFold,
LeaveOneGroupOut,
LeavePGroupsOut,
ShuffleSplit,
StratifiedKFold,
StratifiedShuffleSplit,
)
from sklearn.model_selection._search_successive_halving import (
_SubsampleMetaSplitter,
_top_k,
)
from sklearn.model_selection.tests.test_search import (
check_cv_results_array_types,
check_cv_results_keys,
)
from sklearn.svm import SVC, LinearSVC
class FastClassifier(DummyClassifier):
"""Dummy classifier that accepts parameters a, b, ... z.
These parameter don't affect the predictions and are useful for fast
grid searching."""
# update the constraints such that we accept all parameters from a to z
_parameter_constraints: dict = {
**DummyClassifier._parameter_constraints,
**{chr(key): "no_validation" for key in range(ord("a"), ord("z") + 1)},
}
def __init__(
self, strategy="stratified", random_state=None, constant=None, **kwargs
):
super().__init__(
strategy=strategy, random_state=random_state, constant=constant
)
def get_params(self, deep=False):
params = super().get_params(deep=deep)
for char in range(ord("a"), ord("z") + 1):
params[chr(char)] = "whatever"
return params
class SometimesFailClassifier(DummyClassifier):
def __init__(
self,
strategy="stratified",
random_state=None,
constant=None,
n_estimators=10,
fail_fit=False,
fail_predict=False,
a=0,
):
self.fail_fit = fail_fit
self.fail_predict = fail_predict
self.n_estimators = n_estimators
self.a = a
super().__init__(
strategy=strategy, random_state=random_state, constant=constant
)
def fit(self, X, y):
if self.fail_fit:
raise Exception("fitting failed")
return super().fit(X, y)
def predict(self, X):
if self.fail_predict:
raise Exception("predict failed")
return super().predict(X)
@pytest.mark.filterwarnings("ignore::sklearn.exceptions.FitFailedWarning")
@pytest.mark.filterwarnings("ignore:Scoring failed:UserWarning")
@pytest.mark.filterwarnings("ignore:One or more of the:UserWarning")
@pytest.mark.parametrize("HalvingSearch", (HalvingGridSearchCV, HalvingRandomSearchCV))
@pytest.mark.parametrize("fail_at", ("fit", "predict"))
def test_nan_handling(HalvingSearch, fail_at):
"""Check the selection of the best scores in presence of failure represented by
NaN values."""
n_samples = 1_000
X, y = make_classification(n_samples=n_samples, random_state=0)
search = HalvingSearch(
SometimesFailClassifier(),
{f"fail_{fail_at}": [False, True], "a": range(3)},
resource="n_estimators",
max_resources=6,
min_resources=1,
factor=2,
)
search.fit(X, y)
# estimators that failed during fit/predict should always rank lower
# than ones where the fit/predict succeeded
assert not search.best_params_[f"fail_{fail_at}"]
scores = search.cv_results_["mean_test_score"]
ranks = search.cv_results_["rank_test_score"]
# some scores should be NaN
assert np.isnan(scores).any()
unique_nan_ranks = np.unique(ranks[np.isnan(scores)])
# all NaN scores should have the same rank
assert unique_nan_ranks.shape[0] == 1
# NaNs should have the lowest rank
assert (unique_nan_ranks[0] >= ranks).all()
@pytest.mark.parametrize("Est", (HalvingGridSearchCV, HalvingRandomSearchCV))
@pytest.mark.parametrize(
(
"aggressive_elimination,"
"max_resources,"
"expected_n_iterations,"
"expected_n_required_iterations,"
"expected_n_possible_iterations,"
"expected_n_remaining_candidates,"
"expected_n_candidates,"
"expected_n_resources,"
),
[
# notice how it loops at the beginning
# also, the number of candidates evaluated at the last iteration is
# <= factor
(True, "limited", 4, 4, 3, 1, [60, 20, 7, 3], [20, 20, 60, 180]),
# no aggressive elimination: we end up with less iterations, and
# the number of candidates at the last iter is > factor, which isn't
# ideal
(False, "limited", 3, 4, 3, 3, [60, 20, 7], [20, 60, 180]),
# # When the amount of resource isn't limited, aggressive_elimination
# # has no effect. Here the default min_resources='exhaust' will take
# # over.
(True, "unlimited", 4, 4, 4, 1, [60, 20, 7, 3], [37, 111, 333, 999]),
(False, "unlimited", 4, 4, 4, 1, [60, 20, 7, 3], [37, 111, 333, 999]),
],
)
def test_aggressive_elimination(
Est,
aggressive_elimination,
max_resources,
expected_n_iterations,
expected_n_required_iterations,
expected_n_possible_iterations,
expected_n_remaining_candidates,
expected_n_candidates,
expected_n_resources,
):
# Test the aggressive_elimination parameter.
n_samples = 1000
X, y = make_classification(n_samples=n_samples, random_state=0)
param_grid = {"a": ("l1", "l2"), "b": list(range(30))}
base_estimator = FastClassifier()
if max_resources == "limited":
max_resources = 180
else:
max_resources = n_samples
sh = Est(
base_estimator,
param_grid,
aggressive_elimination=aggressive_elimination,
max_resources=max_resources,
factor=3,
)
sh.set_params(verbose=True) # just for test coverage
if Est is HalvingRandomSearchCV:
# same number of candidates as with the grid
sh.set_params(n_candidates=2 * 30, min_resources="exhaust")
sh.fit(X, y)
assert sh.n_iterations_ == expected_n_iterations
assert sh.n_required_iterations_ == expected_n_required_iterations
assert sh.n_possible_iterations_ == expected_n_possible_iterations
assert sh.n_resources_ == expected_n_resources
assert sh.n_candidates_ == expected_n_candidates
assert sh.n_remaining_candidates_ == expected_n_remaining_candidates
assert ceil(sh.n_candidates_[-1] / sh.factor) == sh.n_remaining_candidates_
@pytest.mark.parametrize("Est", (HalvingGridSearchCV, HalvingRandomSearchCV))
@pytest.mark.parametrize(
(
"min_resources,"
"max_resources,"
"expected_n_iterations,"
"expected_n_possible_iterations,"
"expected_n_resources,"
),
[
# with enough resources
("smallest", "auto", 2, 4, [20, 60]),
# with enough resources but min_resources set manually
(50, "auto", 2, 3, [50, 150]),
# without enough resources, only one iteration can be done
("smallest", 30, 1, 1, [20]),
# with exhaust: use as much resources as possible at the last iter
("exhaust", "auto", 2, 2, [333, 999]),
("exhaust", 1000, 2, 2, [333, 999]),
("exhaust", 999, 2, 2, [333, 999]),
("exhaust", 600, 2, 2, [200, 600]),
("exhaust", 599, 2, 2, [199, 597]),
("exhaust", 300, 2, 2, [100, 300]),
("exhaust", 60, 2, 2, [20, 60]),
("exhaust", 50, 1, 1, [20]),
("exhaust", 20, 1, 1, [20]),
],
)
def test_min_max_resources(
Est,
min_resources,
max_resources,
expected_n_iterations,
expected_n_possible_iterations,
expected_n_resources,
):
# Test the min_resources and max_resources parameters, and how they affect
# the number of resources used at each iteration
n_samples = 1000
X, y = make_classification(n_samples=n_samples, random_state=0)
param_grid = {"a": [1, 2], "b": [1, 2, 3]}
base_estimator = FastClassifier()
sh = Est(
base_estimator,
param_grid,
factor=3,
min_resources=min_resources,
max_resources=max_resources,
)
if Est is HalvingRandomSearchCV:
sh.set_params(n_candidates=6) # same number as with the grid
sh.fit(X, y)
expected_n_required_iterations = 2 # given 6 combinations and factor = 3
assert sh.n_iterations_ == expected_n_iterations
assert sh.n_required_iterations_ == expected_n_required_iterations
assert sh.n_possible_iterations_ == expected_n_possible_iterations
assert sh.n_resources_ == expected_n_resources
if min_resources == "exhaust":
assert sh.n_possible_iterations_ == sh.n_iterations_ == len(sh.n_resources_)
@pytest.mark.parametrize("Est", (HalvingRandomSearchCV, HalvingGridSearchCV))
@pytest.mark.parametrize(
"max_resources, n_iterations, n_possible_iterations",
[
("auto", 5, 9), # all resources are used
(1024, 5, 9),
(700, 5, 8),
(512, 5, 8),
(511, 5, 7),
(32, 4, 4),
(31, 3, 3),
(16, 3, 3),
(4, 1, 1), # max_resources == min_resources, only one iteration is
# possible
],
)
def test_n_iterations(Est, max_resources, n_iterations, n_possible_iterations):
# test the number of actual iterations that were run depending on
# max_resources
n_samples = 1024
X, y = make_classification(n_samples=n_samples, random_state=1)
param_grid = {"a": [1, 2], "b": list(range(10))}
base_estimator = FastClassifier()
factor = 2
sh = Est(
base_estimator,
param_grid,
cv=2,
factor=factor,
max_resources=max_resources,
min_resources=4,
)
if Est is HalvingRandomSearchCV:
sh.set_params(n_candidates=20) # same as for HalvingGridSearchCV
sh.fit(X, y)
assert sh.n_required_iterations_ == 5
assert sh.n_iterations_ == n_iterations
assert sh.n_possible_iterations_ == n_possible_iterations
@pytest.mark.parametrize("Est", (HalvingRandomSearchCV, HalvingGridSearchCV))
def test_resource_parameter(Est):
# Test the resource parameter
n_samples = 1000
X, y = make_classification(n_samples=n_samples, random_state=0)
param_grid = {"a": [1, 2], "b": list(range(10))}
base_estimator = FastClassifier()
sh = Est(base_estimator, param_grid, cv=2, resource="c", max_resources=10, factor=3)
sh.fit(X, y)
assert set(sh.n_resources_) == set([1, 3, 9])
for r_i, params, param_c in zip(
sh.cv_results_["n_resources"],
sh.cv_results_["params"],
sh.cv_results_["param_c"],
):
assert r_i == params["c"] == param_c
with pytest.raises(
ValueError, match="Cannot use resource=1234 which is not supported "
):
sh = HalvingGridSearchCV(
base_estimator, param_grid, cv=2, resource="1234", max_resources=10
)
sh.fit(X, y)
with pytest.raises(
ValueError,
match=(
"Cannot use parameter c as the resource since it is part "
"of the searched parameters."
),
):
param_grid = {"a": [1, 2], "b": [1, 2], "c": [1, 3]}
sh = HalvingGridSearchCV(
base_estimator, param_grid, cv=2, resource="c", max_resources=10
)
sh.fit(X, y)
@pytest.mark.parametrize(
"max_resources, n_candidates, expected_n_candidates",
[
(512, "exhaust", 128), # generate exactly as much as needed
(32, "exhaust", 8),
(32, 8, 8),
(32, 7, 7), # ask for less than what we could
(32, 9, 9), # ask for more than 'reasonable'
],
)
def test_random_search(max_resources, n_candidates, expected_n_candidates):
# Test random search and make sure the number of generated candidates is
# as expected
n_samples = 1024
X, y = make_classification(n_samples=n_samples, random_state=0)
param_grid = {"a": norm, "b": norm}
base_estimator = FastClassifier()
sh = HalvingRandomSearchCV(
base_estimator,
param_grid,
n_candidates=n_candidates,
cv=2,
max_resources=max_resources,
factor=2,
min_resources=4,
)
sh.fit(X, y)
assert sh.n_candidates_[0] == expected_n_candidates
if n_candidates == "exhaust":
# Make sure 'exhaust' makes the last iteration use as much resources as
# we can
assert sh.n_resources_[-1] == max_resources
@pytest.mark.parametrize(
"param_distributions, expected_n_candidates",
[
({"a": [1, 2]}, 2), # all lists, sample less than n_candidates
({"a": randint(1, 3)}, 10), # not all list, respect n_candidates
],
)
def test_random_search_discrete_distributions(
param_distributions, expected_n_candidates
):
# Make sure random search samples the appropriate number of candidates when
# we ask for more than what's possible. How many parameters are sampled
# depends whether the distributions are 'all lists' or not (see
# ParameterSampler for details). This is somewhat redundant with the checks
# in ParameterSampler but interaction bugs were discovered during
# development of SH
n_samples = 1024
X, y = make_classification(n_samples=n_samples, random_state=0)
base_estimator = FastClassifier()
sh = HalvingRandomSearchCV(base_estimator, param_distributions, n_candidates=10)
sh.fit(X, y)
assert sh.n_candidates_[0] == expected_n_candidates
@pytest.mark.parametrize("Est", (HalvingGridSearchCV, HalvingRandomSearchCV))
@pytest.mark.parametrize(
"params, expected_error_message",
[
(
{"resource": "not_a_parameter"},
"Cannot use resource=not_a_parameter which is not supported",
),
(
{"resource": "a", "max_resources": 100},
"Cannot use parameter a as the resource since it is part of",
),
(
{"max_resources": "auto", "resource": "b"},
"resource can only be 'n_samples' when max_resources='auto'",
),
(
{"min_resources": 15, "max_resources": 14},
"min_resources_=15 is greater than max_resources_=14",
),
({"cv": KFold(shuffle=True)}, "must yield consistent folds"),
({"cv": ShuffleSplit()}, "must yield consistent folds"),
],
)
def test_input_errors(Est, params, expected_error_message):
base_estimator = FastClassifier()
param_grid = {"a": [1]}
X, y = make_classification(100)
sh = Est(base_estimator, param_grid, **params)
with pytest.raises(ValueError, match=expected_error_message):
sh.fit(X, y)
@pytest.mark.parametrize(
"params, expected_error_message",
[
(
{"n_candidates": "exhaust", "min_resources": "exhaust"},
"cannot be both set to 'exhaust'",
),
],
)
def test_input_errors_randomized(params, expected_error_message):
# tests specific to HalvingRandomSearchCV
base_estimator = FastClassifier()
param_grid = {"a": [1]}
X, y = make_classification(100)
sh = HalvingRandomSearchCV(base_estimator, param_grid, **params)
with pytest.raises(ValueError, match=expected_error_message):
sh.fit(X, y)
@pytest.mark.parametrize(
"fraction, subsample_test, expected_train_size, expected_test_size",
[
(0.5, True, 40, 10),
(0.5, False, 40, 20),
(0.2, True, 16, 4),
(0.2, False, 16, 20),
],
)
def test_subsample_splitter_shapes(
fraction, subsample_test, expected_train_size, expected_test_size
):
# Make sure splits returned by SubsampleMetaSplitter are of appropriate
# size
n_samples = 100
X, y = make_classification(n_samples)
cv = _SubsampleMetaSplitter(
base_cv=KFold(5),
fraction=fraction,
subsample_test=subsample_test,
random_state=None,
)
for train, test in cv.split(X, y):
assert train.shape[0] == expected_train_size
assert test.shape[0] == expected_test_size
if subsample_test:
assert train.shape[0] + test.shape[0] == int(n_samples * fraction)
else:
assert test.shape[0] == n_samples // cv.base_cv.get_n_splits()
@pytest.mark.parametrize("subsample_test", (True, False))
def test_subsample_splitter_determinism(subsample_test):
# Make sure _SubsampleMetaSplitter is consistent across calls to split():
# - we're OK having training sets differ (they're always sampled with a
# different fraction anyway)
# - when we don't subsample the test set, we want it to be always the same.
# This check is the most important. This is ensured by the determinism
# of the base_cv.
# Note: we could force both train and test splits to be always the same if
# we drew an int seed in _SubsampleMetaSplitter.__init__
n_samples = 100
X, y = make_classification(n_samples)
cv = _SubsampleMetaSplitter(
base_cv=KFold(5), fraction=0.5, subsample_test=subsample_test, random_state=None
)
folds_a = list(cv.split(X, y, groups=None))
folds_b = list(cv.split(X, y, groups=None))
for (train_a, test_a), (train_b, test_b) in zip(folds_a, folds_b):
assert not np.all(train_a == train_b)
if subsample_test:
assert not np.all(test_a == test_b)
else:
assert np.all(test_a == test_b)
assert np.all(X[test_a] == X[test_b])
@pytest.mark.parametrize(
"k, itr, expected",
[
(1, 0, ["c"]),
(2, 0, ["a", "c"]),
(4, 0, ["d", "b", "a", "c"]),
(10, 0, ["d", "b", "a", "c"]),
(1, 1, ["e"]),
(2, 1, ["f", "e"]),
(10, 1, ["f", "e"]),
(1, 2, ["i"]),
(10, 2, ["g", "h", "i"]),
],
)
def test_top_k(k, itr, expected):
results = { # this isn't a 'real world' result dict
"iter": [0, 0, 0, 0, 1, 1, 2, 2, 2],
"mean_test_score": [4, 3, 5, 1, 11, 10, 5, 6, 9],
"params": ["a", "b", "c", "d", "e", "f", "g", "h", "i"],
}
got = _top_k(results, k=k, itr=itr)
assert np.all(got == expected)
@pytest.mark.parametrize("Est", (HalvingRandomSearchCV, HalvingGridSearchCV))
def test_cv_results(Est):
# test that the cv_results_ matches correctly the logic of the
# tournament: in particular that the candidates continued in each
# successive iteration are those that were best in the previous iteration
pd = pytest.importorskip("pandas")
rng = np.random.RandomState(0)
n_samples = 1000
X, y = make_classification(n_samples=n_samples, random_state=0)
param_grid = {"a": ("l1", "l2"), "b": list(range(30))}
base_estimator = FastClassifier()
# generate random scores: we want to avoid ties, which would otherwise
# mess with the ordering and make testing harder
def scorer(est, X, y):
return rng.rand()
sh = Est(base_estimator, param_grid, factor=2, scoring=scorer)
if Est is HalvingRandomSearchCV:
# same number of candidates as with the grid
sh.set_params(n_candidates=2 * 30, min_resources="exhaust")
sh.fit(X, y)
# non-regression check for
# https://github.com/scikit-learn/scikit-learn/issues/19203
assert isinstance(sh.cv_results_["iter"], np.ndarray)
assert isinstance(sh.cv_results_["n_resources"], np.ndarray)
cv_results_df = pd.DataFrame(sh.cv_results_)
# just make sure we don't have ties
assert len(cv_results_df["mean_test_score"].unique()) == len(cv_results_df)
cv_results_df["params_str"] = cv_results_df["params"].apply(str)
table = cv_results_df.pivot(
index="params_str", columns="iter", values="mean_test_score"
)
# table looks like something like this:
# iter 0 1 2 3 4 5
# params_str
# {'a': 'l2', 'b': 23} 0.75 NaN NaN NaN NaN NaN
# {'a': 'l1', 'b': 30} 0.90 0.875 NaN NaN NaN NaN
# {'a': 'l1', 'b': 0} 0.75 NaN NaN NaN NaN NaN
# {'a': 'l2', 'b': 3} 0.85 0.925 0.9125 0.90625 NaN NaN
# {'a': 'l1', 'b': 5} 0.80 NaN NaN NaN NaN NaN
# ...
# where a NaN indicates that the candidate wasn't evaluated at a given
# iteration, because it wasn't part of the top-K at some previous
# iteration. We here make sure that candidates that aren't in the top-k at
# any given iteration are indeed not evaluated at the subsequent
# iterations.
nan_mask = pd.isna(table)
n_iter = sh.n_iterations_
for it in range(n_iter - 1):
already_discarded_mask = nan_mask[it]
# make sure that if a candidate is already discarded, we don't evaluate
# it later
assert (
already_discarded_mask & nan_mask[it + 1] == already_discarded_mask
).all()
# make sure that the number of discarded candidate is correct
discarded_now_mask = ~already_discarded_mask & nan_mask[it + 1]
kept_mask = ~already_discarded_mask & ~discarded_now_mask
assert kept_mask.sum() == sh.n_candidates_[it + 1]
# make sure that all discarded candidates have a lower score than the
# kept candidates
discarded_max_score = table[it].where(discarded_now_mask).max()
kept_min_score = table[it].where(kept_mask).min()
assert discarded_max_score < kept_min_score
# We now make sure that the best candidate is chosen only from the last
# iteration.
# We also make sure this is true even if there were higher scores in
# earlier rounds (this isn't generally the case, but worth ensuring it's
# possible).
last_iter = cv_results_df["iter"].max()
idx_best_last_iter = cv_results_df[cv_results_df["iter"] == last_iter][
"mean_test_score"
].idxmax()
idx_best_all_iters = cv_results_df["mean_test_score"].idxmax()
assert sh.best_params_ == cv_results_df.iloc[idx_best_last_iter]["params"]
assert (
cv_results_df.iloc[idx_best_last_iter]["mean_test_score"]
< cv_results_df.iloc[idx_best_all_iters]["mean_test_score"]
)
assert (
cv_results_df.iloc[idx_best_last_iter]["params"]
!= cv_results_df.iloc[idx_best_all_iters]["params"]
)
@pytest.mark.parametrize("Est", (HalvingGridSearchCV, HalvingRandomSearchCV))
def test_base_estimator_inputs(Est):
# make sure that the base estimators are passed the correct parameters and
# number of samples at each iteration.
pd = pytest.importorskip("pandas")
passed_n_samples_fit = []
passed_n_samples_predict = []
passed_params = []
class FastClassifierBookKeeping(FastClassifier):
def fit(self, X, y):
passed_n_samples_fit.append(X.shape[0])
return super().fit(X, y)
def predict(self, X):
passed_n_samples_predict.append(X.shape[0])
return super().predict(X)
def set_params(self, **params):
passed_params.append(params)
return super().set_params(**params)
n_samples = 1024
n_splits = 2
X, y = make_classification(n_samples=n_samples, random_state=0)
param_grid = {"a": ("l1", "l2"), "b": list(range(30))}
base_estimator = FastClassifierBookKeeping()
sh = Est(
base_estimator,
param_grid,
factor=2,
cv=n_splits,
return_train_score=False,
refit=False,
)
if Est is HalvingRandomSearchCV:
# same number of candidates as with the grid
sh.set_params(n_candidates=2 * 30, min_resources="exhaust")
sh.fit(X, y)
assert len(passed_n_samples_fit) == len(passed_n_samples_predict)
passed_n_samples = [
x + y for (x, y) in zip(passed_n_samples_fit, passed_n_samples_predict)
]
# Lists are of length n_splits * n_iter * n_candidates_at_i.
# Each chunk of size n_splits corresponds to the n_splits folds for the
# same candidate at the same iteration, so they contain equal values. We
# subsample such that the lists are of length n_iter * n_candidates_at_it
passed_n_samples = passed_n_samples[::n_splits]
passed_params = passed_params[::n_splits]
cv_results_df = pd.DataFrame(sh.cv_results_)
assert len(passed_params) == len(passed_n_samples) == len(cv_results_df)
uniques, counts = np.unique(passed_n_samples, return_counts=True)
assert (sh.n_resources_ == uniques).all()
assert (sh.n_candidates_ == counts).all()
assert (cv_results_df["params"] == passed_params).all()
assert (cv_results_df["n_resources"] == passed_n_samples).all()
@pytest.mark.parametrize("Est", (HalvingGridSearchCV, HalvingRandomSearchCV))
def test_groups_support(Est):
# Check if ValueError (when groups is None) propagates to
# HalvingGridSearchCV and HalvingRandomSearchCV
# And also check if groups is correctly passed to the cv object
rng = np.random.RandomState(0)
X, y = make_classification(n_samples=50, n_classes=2, random_state=0)
groups = rng.randint(0, 3, 50)
clf = LinearSVC(random_state=0)
grid = {"C": [1]}
group_cvs = [
LeaveOneGroupOut(),
LeavePGroupsOut(2),
GroupKFold(n_splits=3),
GroupShuffleSplit(random_state=0),
]
error_msg = "The 'groups' parameter should not be None."
for cv in group_cvs:
gs = Est(clf, grid, cv=cv, random_state=0)
with pytest.raises(ValueError, match=error_msg):
gs.fit(X, y)
gs.fit(X, y, groups=groups)
non_group_cvs = [StratifiedKFold(), StratifiedShuffleSplit(random_state=0)]
for cv in non_group_cvs:
gs = Est(clf, grid, cv=cv)
# Should not raise an error
gs.fit(X, y)
@pytest.mark.parametrize("SearchCV", [HalvingRandomSearchCV, HalvingGridSearchCV])
def test_min_resources_null(SearchCV):
"""Check that we raise an error if the minimum resources is set to 0."""
base_estimator = FastClassifier()
param_grid = {"a": [1]}
X = np.empty(0).reshape(0, 3)
search = SearchCV(base_estimator, param_grid, min_resources="smallest")
err_msg = "min_resources_=0: you might have passed an empty dataset X."
with pytest.raises(ValueError, match=err_msg):
search.fit(X, [])
@pytest.mark.parametrize("SearchCV", [HalvingGridSearchCV, HalvingRandomSearchCV])
def test_select_best_index(SearchCV):
"""Check the selection strategy of the halving search."""
results = { # this isn't a 'real world' result dict
"iter": np.array([0, 0, 0, 0, 1, 1, 2, 2, 2]),
"mean_test_score": np.array([4, 3, 5, 1, 11, 10, 5, 6, 9]),
"params": np.array(["a", "b", "c", "d", "e", "f", "g", "h", "i"]),
}
# we expect the index of 'i'
best_index = SearchCV._select_best_index(None, None, results)
assert best_index == 8
def test_halving_random_search_list_of_dicts():
"""Check the behaviour of the `HalvingRandomSearchCV` with `param_distribution`
being a list of dictionary.
"""
X, y = make_classification(n_samples=150, n_features=4, random_state=42)
params = [
{"kernel": ["rbf"], "C": expon(scale=10), "gamma": expon(scale=0.1)},
{"kernel": ["poly"], "degree": [2, 3]},
]
param_keys = (
"param_C",
"param_degree",
"param_gamma",
"param_kernel",
)
score_keys = (
"mean_test_score",
"mean_train_score",
"rank_test_score",
"split0_test_score",
"split1_test_score",
"split2_test_score",
"split0_train_score",
"split1_train_score",
"split2_train_score",
"std_test_score",
"std_train_score",
"mean_fit_time",
"std_fit_time",
"mean_score_time",
"std_score_time",
)
extra_keys = ("n_resources", "iter")
search = HalvingRandomSearchCV(
SVC(), cv=3, param_distributions=params, return_train_score=True, random_state=0
)
search.fit(X, y)
n_candidates = sum(search.n_candidates_)
cv_results = search.cv_results_
# Check results structure
check_cv_results_keys(cv_results, param_keys, score_keys, n_candidates, extra_keys)
expected_cv_results_kinds = {
"param_C": "f",
"param_degree": "i",
"param_gamma": "f",
"param_kernel": "O",
}
check_cv_results_array_types(
search, param_keys, score_keys, expected_cv_results_kinds
)
assert all(
(
cv_results["param_C"].mask[i]
and cv_results["param_gamma"].mask[i]
and not cv_results["param_degree"].mask[i]
)
for i in range(n_candidates)
if cv_results["param_kernel"][i] == "poly"
)
assert all(
(
not cv_results["param_C"].mask[i]
and not cv_results["param_gamma"].mask[i]
and cv_results["param_degree"].mask[i]
)
for i in range(n_candidates)
if cv_results["param_kernel"][i] == "rbf"
)