Videre
This commit is contained in:
@@ -0,0 +1,106 @@
|
||||
"""Tools for model selection, such as cross validation and hyper-parameter tuning."""
|
||||
|
||||
# Authors: The scikit-learn developers
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
import typing
|
||||
|
||||
from sklearn.model_selection._classification_threshold import (
|
||||
FixedThresholdClassifier,
|
||||
TunedThresholdClassifierCV,
|
||||
)
|
||||
from sklearn.model_selection._plot import LearningCurveDisplay, ValidationCurveDisplay
|
||||
from sklearn.model_selection._search import (
|
||||
GridSearchCV,
|
||||
ParameterGrid,
|
||||
ParameterSampler,
|
||||
RandomizedSearchCV,
|
||||
)
|
||||
from sklearn.model_selection._split import (
|
||||
BaseCrossValidator,
|
||||
BaseShuffleSplit,
|
||||
GroupKFold,
|
||||
GroupShuffleSplit,
|
||||
KFold,
|
||||
LeaveOneGroupOut,
|
||||
LeaveOneOut,
|
||||
LeavePGroupsOut,
|
||||
LeavePOut,
|
||||
PredefinedSplit,
|
||||
RepeatedKFold,
|
||||
RepeatedStratifiedKFold,
|
||||
ShuffleSplit,
|
||||
StratifiedGroupKFold,
|
||||
StratifiedKFold,
|
||||
StratifiedShuffleSplit,
|
||||
TimeSeriesSplit,
|
||||
check_cv,
|
||||
train_test_split,
|
||||
)
|
||||
from sklearn.model_selection._validation import (
|
||||
cross_val_predict,
|
||||
cross_val_score,
|
||||
cross_validate,
|
||||
learning_curve,
|
||||
permutation_test_score,
|
||||
validation_curve,
|
||||
)
|
||||
|
||||
if typing.TYPE_CHECKING:
|
||||
# Avoid errors in type checkers (e.g. mypy) for experimental estimators.
|
||||
# TODO: remove this check once the estimator is no longer experimental.
|
||||
from sklearn.model_selection._search_successive_halving import (
|
||||
HalvingGridSearchCV,
|
||||
HalvingRandomSearchCV,
|
||||
)
|
||||
|
||||
|
||||
__all__ = [
|
||||
"BaseCrossValidator",
|
||||
"BaseShuffleSplit",
|
||||
"FixedThresholdClassifier",
|
||||
"GridSearchCV",
|
||||
"GroupKFold",
|
||||
"GroupShuffleSplit",
|
||||
"HalvingGridSearchCV",
|
||||
"HalvingRandomSearchCV",
|
||||
"KFold",
|
||||
"LearningCurveDisplay",
|
||||
"LeaveOneGroupOut",
|
||||
"LeaveOneOut",
|
||||
"LeavePGroupsOut",
|
||||
"LeavePOut",
|
||||
"ParameterGrid",
|
||||
"ParameterSampler",
|
||||
"PredefinedSplit",
|
||||
"RandomizedSearchCV",
|
||||
"RepeatedKFold",
|
||||
"RepeatedStratifiedKFold",
|
||||
"ShuffleSplit",
|
||||
"StratifiedGroupKFold",
|
||||
"StratifiedKFold",
|
||||
"StratifiedShuffleSplit",
|
||||
"TimeSeriesSplit",
|
||||
"TunedThresholdClassifierCV",
|
||||
"ValidationCurveDisplay",
|
||||
"check_cv",
|
||||
"cross_val_predict",
|
||||
"cross_val_score",
|
||||
"cross_validate",
|
||||
"learning_curve",
|
||||
"permutation_test_score",
|
||||
"train_test_split",
|
||||
"validation_curve",
|
||||
]
|
||||
|
||||
|
||||
# TODO: remove this check once the estimator is no longer experimental.
|
||||
def __getattr__(name):
|
||||
if name in {"HalvingGridSearchCV", "HalvingRandomSearchCV"}:
|
||||
raise ImportError(
|
||||
f"{name} is experimental and the API might change without any "
|
||||
"deprecation cycle. To use it, you need to explicitly import "
|
||||
"enable_halving_search_cv:\n"
|
||||
"from sklearn.experimental import enable_halving_search_cv"
|
||||
)
|
||||
raise AttributeError(f"module {__name__} has no attribute {name}")
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,883 @@
|
||||
# Authors: The scikit-learn developers
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
from collections.abc import MutableMapping
|
||||
from numbers import Integral, Real
|
||||
|
||||
import numpy as np
|
||||
|
||||
from sklearn.base import (
|
||||
BaseEstimator,
|
||||
ClassifierMixin,
|
||||
MetaEstimatorMixin,
|
||||
_fit_context,
|
||||
clone,
|
||||
)
|
||||
from sklearn.exceptions import NotFittedError
|
||||
from sklearn.metrics import check_scoring, get_scorer_names
|
||||
from sklearn.metrics._scorer import _CurveScorer, _threshold_scores_to_class_labels
|
||||
from sklearn.model_selection._split import StratifiedShuffleSplit, check_cv
|
||||
from sklearn.utils import _safe_indexing, get_tags
|
||||
from sklearn.utils._param_validation import HasMethods, Interval, RealNotInt, StrOptions
|
||||
from sklearn.utils._response import _get_response_values_binary
|
||||
from sklearn.utils.metadata_routing import (
|
||||
MetadataRouter,
|
||||
MethodMapping,
|
||||
_raise_for_params,
|
||||
process_routing,
|
||||
)
|
||||
from sklearn.utils.metaestimators import available_if
|
||||
from sklearn.utils.multiclass import type_of_target
|
||||
from sklearn.utils.parallel import Parallel, delayed
|
||||
from sklearn.utils.validation import (
|
||||
_check_method_params,
|
||||
_estimator_has,
|
||||
_num_samples,
|
||||
check_is_fitted,
|
||||
indexable,
|
||||
)
|
||||
|
||||
|
||||
def _check_is_fitted(estimator):
|
||||
try:
|
||||
check_is_fitted(estimator.estimator)
|
||||
except NotFittedError:
|
||||
check_is_fitted(estimator, "estimator_")
|
||||
|
||||
|
||||
class BaseThresholdClassifier(ClassifierMixin, MetaEstimatorMixin, BaseEstimator):
|
||||
"""Base class for binary classifiers that set a non-default decision threshold.
|
||||
|
||||
In this base class, we define the following interface:
|
||||
|
||||
- the validation of common parameters in `fit`;
|
||||
- the different prediction methods that can be used with the classifier.
|
||||
|
||||
.. versionadded:: 1.5
|
||||
|
||||
Parameters
|
||||
----------
|
||||
estimator : estimator instance
|
||||
The binary classifier, fitted or not, for which we want to optimize
|
||||
the decision threshold used during `predict`.
|
||||
|
||||
response_method : {"auto", "decision_function", "predict_proba"}, default="auto"
|
||||
Methods by the classifier `estimator` corresponding to the
|
||||
decision function for which we want to find a threshold. It can be:
|
||||
|
||||
* if `"auto"`, it will try to invoke, for each classifier,
|
||||
`"predict_proba"` or `"decision_function"` in that order.
|
||||
* otherwise, one of `"predict_proba"` or `"decision_function"`.
|
||||
If the method is not implemented by the classifier, it will raise an
|
||||
error.
|
||||
"""
|
||||
|
||||
_parameter_constraints: dict = {
|
||||
"estimator": [
|
||||
HasMethods(["fit", "predict_proba"]),
|
||||
HasMethods(["fit", "decision_function"]),
|
||||
],
|
||||
"response_method": [StrOptions({"auto", "predict_proba", "decision_function"})],
|
||||
}
|
||||
|
||||
def __init__(self, estimator, *, response_method="auto"):
|
||||
self.estimator = estimator
|
||||
self.response_method = response_method
|
||||
|
||||
def _get_response_method(self):
|
||||
"""Define the response method."""
|
||||
if self.response_method == "auto":
|
||||
response_method = ["predict_proba", "decision_function"]
|
||||
else:
|
||||
response_method = self.response_method
|
||||
return response_method
|
||||
|
||||
@_fit_context(
|
||||
# *ThresholdClassifier*.estimator is not validated yet
|
||||
prefer_skip_nested_validation=False
|
||||
)
|
||||
def fit(self, X, y, **params):
|
||||
"""Fit the classifier.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : {array-like, sparse matrix} of shape (n_samples, n_features)
|
||||
Training data.
|
||||
|
||||
y : array-like of shape (n_samples,)
|
||||
Target values.
|
||||
|
||||
**params : dict
|
||||
Parameters to pass to the `fit` method of the underlying
|
||||
classifier.
|
||||
|
||||
Returns
|
||||
-------
|
||||
self : object
|
||||
Returns an instance of self.
|
||||
"""
|
||||
_raise_for_params(params, self, None)
|
||||
|
||||
X, y = indexable(X, y)
|
||||
|
||||
y_type = type_of_target(y, input_name="y")
|
||||
if y_type != "binary":
|
||||
raise ValueError(
|
||||
f"Only binary classification is supported. Unknown label type: {y_type}"
|
||||
)
|
||||
|
||||
self._fit(X, y, **params)
|
||||
|
||||
if hasattr(self.estimator_, "n_features_in_"):
|
||||
self.n_features_in_ = self.estimator_.n_features_in_
|
||||
if hasattr(self.estimator_, "feature_names_in_"):
|
||||
self.feature_names_in_ = self.estimator_.feature_names_in_
|
||||
|
||||
return self
|
||||
|
||||
@property
|
||||
def classes_(self):
|
||||
"""Classes labels."""
|
||||
return self.estimator_.classes_
|
||||
|
||||
@available_if(_estimator_has("predict_proba"))
|
||||
def predict_proba(self, X):
|
||||
"""Predict class probabilities for `X` using the fitted estimator.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : {array-like, sparse matrix} of shape (n_samples, n_features)
|
||||
Training vectors, where `n_samples` is the number of samples and
|
||||
`n_features` is the number of features.
|
||||
|
||||
Returns
|
||||
-------
|
||||
probabilities : ndarray of shape (n_samples, n_classes)
|
||||
The class probabilities of the input samples.
|
||||
"""
|
||||
_check_is_fitted(self)
|
||||
estimator = getattr(self, "estimator_", self.estimator)
|
||||
return estimator.predict_proba(X)
|
||||
|
||||
@available_if(_estimator_has("predict_log_proba"))
|
||||
def predict_log_proba(self, X):
|
||||
"""Predict logarithm class probabilities for `X` using the fitted estimator.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : {array-like, sparse matrix} of shape (n_samples, n_features)
|
||||
Training vectors, where `n_samples` is the number of samples and
|
||||
`n_features` is the number of features.
|
||||
|
||||
Returns
|
||||
-------
|
||||
log_probabilities : ndarray of shape (n_samples, n_classes)
|
||||
The logarithm class probabilities of the input samples.
|
||||
"""
|
||||
_check_is_fitted(self)
|
||||
estimator = getattr(self, "estimator_", self.estimator)
|
||||
return estimator.predict_log_proba(X)
|
||||
|
||||
@available_if(_estimator_has("decision_function"))
|
||||
def decision_function(self, X):
|
||||
"""Decision function for samples in `X` using the fitted estimator.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : {array-like, sparse matrix} of shape (n_samples, n_features)
|
||||
Training vectors, where `n_samples` is the number of samples and
|
||||
`n_features` is the number of features.
|
||||
|
||||
Returns
|
||||
-------
|
||||
decisions : ndarray of shape (n_samples,)
|
||||
The decision function computed the fitted estimator.
|
||||
"""
|
||||
_check_is_fitted(self)
|
||||
estimator = getattr(self, "estimator_", self.estimator)
|
||||
return estimator.decision_function(X)
|
||||
|
||||
def __sklearn_tags__(self):
|
||||
tags = super().__sklearn_tags__()
|
||||
tags.classifier_tags.multi_class = False
|
||||
tags.input_tags.sparse = get_tags(self.estimator).input_tags.sparse
|
||||
return tags
|
||||
|
||||
|
||||
class FixedThresholdClassifier(BaseThresholdClassifier):
|
||||
"""Binary classifier that manually sets the decision threshold.
|
||||
|
||||
This classifier allows to change the default decision threshold used for
|
||||
converting posterior probability estimates (i.e. output of `predict_proba`) or
|
||||
decision scores (i.e. output of `decision_function`) into a class label.
|
||||
|
||||
Here, the threshold is not optimized and is set to a constant value.
|
||||
|
||||
Read more in the :ref:`User Guide <FixedThresholdClassifier>`.
|
||||
|
||||
.. versionadded:: 1.5
|
||||
|
||||
Parameters
|
||||
----------
|
||||
estimator : estimator instance
|
||||
The binary classifier, fitted or not, for which we want to optimize
|
||||
the decision threshold used during `predict`.
|
||||
|
||||
threshold : {"auto"} or float, default="auto"
|
||||
The decision threshold to use when converting posterior probability estimates
|
||||
(i.e. output of `predict_proba`) or decision scores (i.e. output of
|
||||
`decision_function`) into a class label. When `"auto"`, the threshold is set
|
||||
to 0.5 if `predict_proba` is used as `response_method`, otherwise it is set to
|
||||
0 (i.e. the default threshold for `decision_function`).
|
||||
|
||||
pos_label : int, float, bool or str, default=None
|
||||
The label of the positive class. Used to process the output of the
|
||||
`response_method` method. When `pos_label=None`, if `y_true` is in `{-1, 1}` or
|
||||
`{0, 1}`, `pos_label` is set to 1, otherwise an error will be raised.
|
||||
|
||||
response_method : {"auto", "decision_function", "predict_proba"}, default="auto"
|
||||
Methods by the classifier `estimator` corresponding to the
|
||||
decision function for which we want to find a threshold. It can be:
|
||||
|
||||
* if `"auto"`, it will try to invoke `"predict_proba"` or `"decision_function"`
|
||||
in that order.
|
||||
* otherwise, one of `"predict_proba"` or `"decision_function"`.
|
||||
If the method is not implemented by the classifier, it will raise an
|
||||
error.
|
||||
|
||||
Attributes
|
||||
----------
|
||||
estimator_ : estimator instance
|
||||
The fitted classifier used when predicting.
|
||||
|
||||
classes_ : ndarray of shape (n_classes,)
|
||||
The class labels.
|
||||
|
||||
n_features_in_ : int
|
||||
Number of features seen during :term:`fit`. Only defined if the
|
||||
underlying estimator exposes such an attribute when fit.
|
||||
|
||||
feature_names_in_ : ndarray of shape (`n_features_in_`,)
|
||||
Names of features seen during :term:`fit`. Only defined if the
|
||||
underlying estimator exposes such an attribute when fit.
|
||||
|
||||
See Also
|
||||
--------
|
||||
sklearn.model_selection.TunedThresholdClassifierCV : Classifier that post-tunes
|
||||
the decision threshold based on some metrics and using cross-validation.
|
||||
sklearn.calibration.CalibratedClassifierCV : Estimator that calibrates
|
||||
probabilities.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> from sklearn.datasets import make_classification
|
||||
>>> from sklearn.linear_model import LogisticRegression
|
||||
>>> from sklearn.metrics import confusion_matrix
|
||||
>>> from sklearn.model_selection import FixedThresholdClassifier, train_test_split
|
||||
>>> X, y = make_classification(
|
||||
... n_samples=1_000, weights=[0.9, 0.1], class_sep=0.8, random_state=42
|
||||
... )
|
||||
>>> X_train, X_test, y_train, y_test = train_test_split(
|
||||
... X, y, stratify=y, random_state=42
|
||||
... )
|
||||
>>> classifier = LogisticRegression(random_state=0).fit(X_train, y_train)
|
||||
>>> print(confusion_matrix(y_test, classifier.predict(X_test)))
|
||||
[[217 7]
|
||||
[ 19 7]]
|
||||
>>> classifier_other_threshold = FixedThresholdClassifier(
|
||||
... classifier, threshold=0.1, response_method="predict_proba"
|
||||
... ).fit(X_train, y_train)
|
||||
>>> print(confusion_matrix(y_test, classifier_other_threshold.predict(X_test)))
|
||||
[[184 40]
|
||||
[ 6 20]]
|
||||
"""
|
||||
|
||||
_parameter_constraints: dict = {
|
||||
**BaseThresholdClassifier._parameter_constraints,
|
||||
"threshold": [StrOptions({"auto"}), Real],
|
||||
"pos_label": [Real, str, "boolean", None],
|
||||
}
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
estimator,
|
||||
*,
|
||||
threshold="auto",
|
||||
pos_label=None,
|
||||
response_method="auto",
|
||||
):
|
||||
super().__init__(estimator=estimator, response_method=response_method)
|
||||
self.pos_label = pos_label
|
||||
self.threshold = threshold
|
||||
|
||||
@property
|
||||
def classes_(self):
|
||||
if estimator := getattr(self, "estimator_", None):
|
||||
return estimator.classes_
|
||||
try:
|
||||
check_is_fitted(self.estimator)
|
||||
return self.estimator.classes_
|
||||
except NotFittedError:
|
||||
raise AttributeError(
|
||||
"The underlying estimator is not fitted yet."
|
||||
) from NotFittedError
|
||||
|
||||
def _fit(self, X, y, **params):
|
||||
"""Fit the classifier.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : {array-like, sparse matrix} of shape (n_samples, n_features)
|
||||
Training data.
|
||||
|
||||
y : array-like of shape (n_samples,)
|
||||
Target values.
|
||||
|
||||
**params : dict
|
||||
Parameters to pass to the `fit` method of the underlying
|
||||
classifier.
|
||||
|
||||
Returns
|
||||
-------
|
||||
self : object
|
||||
Returns an instance of self.
|
||||
"""
|
||||
routed_params = process_routing(self, "fit", **params)
|
||||
self.estimator_ = clone(self.estimator).fit(X, y, **routed_params.estimator.fit)
|
||||
return self
|
||||
|
||||
def predict(self, X):
|
||||
"""Predict the target of new samples.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : {array-like, sparse matrix} of shape (n_samples, n_features)
|
||||
The samples, as accepted by `estimator.predict`.
|
||||
|
||||
Returns
|
||||
-------
|
||||
class_labels : ndarray of shape (n_samples,)
|
||||
The predicted class.
|
||||
"""
|
||||
_check_is_fitted(self)
|
||||
|
||||
estimator = getattr(self, "estimator_", self.estimator)
|
||||
|
||||
y_score, _, response_method_used = _get_response_values_binary(
|
||||
estimator,
|
||||
X,
|
||||
self._get_response_method(),
|
||||
pos_label=self.pos_label,
|
||||
return_response_method_used=True,
|
||||
)
|
||||
|
||||
if self.threshold == "auto":
|
||||
decision_threshold = 0.5 if response_method_used == "predict_proba" else 0.0
|
||||
else:
|
||||
decision_threshold = self.threshold
|
||||
|
||||
return _threshold_scores_to_class_labels(
|
||||
y_score, decision_threshold, self.classes_, self.pos_label
|
||||
)
|
||||
|
||||
def get_metadata_routing(self):
|
||||
"""Get metadata routing of this object.
|
||||
|
||||
Please check :ref:`User Guide <metadata_routing>` on how the routing
|
||||
mechanism works.
|
||||
|
||||
Returns
|
||||
-------
|
||||
routing : MetadataRouter
|
||||
A :class:`~sklearn.utils.metadata_routing.MetadataRouter` encapsulating
|
||||
routing information.
|
||||
"""
|
||||
router = MetadataRouter(owner=self).add(
|
||||
estimator=self.estimator,
|
||||
method_mapping=MethodMapping().add(callee="fit", caller="fit"),
|
||||
)
|
||||
return router
|
||||
|
||||
|
||||
def _fit_and_score_over_thresholds(
|
||||
classifier,
|
||||
X,
|
||||
y,
|
||||
*,
|
||||
fit_params,
|
||||
train_idx,
|
||||
val_idx,
|
||||
curve_scorer,
|
||||
score_params,
|
||||
):
|
||||
"""Fit a classifier and compute the scores for different decision thresholds.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
classifier : estimator instance
|
||||
The classifier to fit and use for scoring. If `classifier` is already fitted,
|
||||
it will be used as is.
|
||||
|
||||
X : {array-like, sparse matrix} of shape (n_samples, n_features)
|
||||
The entire dataset.
|
||||
|
||||
y : array-like of shape (n_samples,)
|
||||
The entire target vector.
|
||||
|
||||
fit_params : dict
|
||||
Parameters to pass to the `fit` method of the underlying classifier.
|
||||
|
||||
train_idx : ndarray of shape (n_train_samples,) or None
|
||||
The indices of the training set. If `None`, `classifier` is expected to be
|
||||
already fitted.
|
||||
|
||||
val_idx : ndarray of shape (n_val_samples,)
|
||||
The indices of the validation set used to score `classifier`. If `train_idx`,
|
||||
the entire set will be used.
|
||||
|
||||
curve_scorer : scorer instance
|
||||
The scorer taking `classifier` and the validation set as input and outputting
|
||||
decision thresholds and scores as a curve. Note that this is different from
|
||||
the usual scorer that outputs a single score value as `curve_scorer`
|
||||
outputs a single score value for each threshold.
|
||||
|
||||
score_params : dict
|
||||
Parameters to pass to the `score` method of the underlying scorer.
|
||||
|
||||
Returns
|
||||
-------
|
||||
scores : ndarray of shape (thresholds,) or tuple of such arrays
|
||||
The scores computed for each decision threshold. When TPR/TNR or precision/
|
||||
recall are computed, `scores` is a tuple of two arrays.
|
||||
|
||||
potential_thresholds : ndarray of shape (thresholds,)
|
||||
The decision thresholds used to compute the scores. They are returned in
|
||||
ascending order.
|
||||
"""
|
||||
|
||||
if train_idx is not None:
|
||||
X_train, X_val = _safe_indexing(X, train_idx), _safe_indexing(X, val_idx)
|
||||
y_train, y_val = _safe_indexing(y, train_idx), _safe_indexing(y, val_idx)
|
||||
fit_params_train = _check_method_params(X, fit_params, indices=train_idx)
|
||||
score_params_val = _check_method_params(X, score_params, indices=val_idx)
|
||||
classifier.fit(X_train, y_train, **fit_params_train)
|
||||
else: # prefit estimator, only a validation set is provided
|
||||
X_val, y_val, score_params_val = X, y, score_params
|
||||
|
||||
return curve_scorer(classifier, X_val, y_val, **score_params_val)
|
||||
|
||||
|
||||
def _mean_interpolated_score(target_thresholds, cv_thresholds, cv_scores):
|
||||
"""Compute the mean interpolated score across folds by defining common thresholds.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
target_thresholds : ndarray of shape (thresholds,)
|
||||
The thresholds to use to compute the mean score.
|
||||
|
||||
cv_thresholds : ndarray of shape (n_folds, thresholds_fold)
|
||||
The thresholds used to compute the scores for each fold.
|
||||
|
||||
cv_scores : ndarray of shape (n_folds, thresholds_fold)
|
||||
The scores computed for each threshold for each fold.
|
||||
|
||||
Returns
|
||||
-------
|
||||
mean_score : ndarray of shape (thresholds,)
|
||||
The mean score across all folds for each target threshold.
|
||||
"""
|
||||
return np.mean(
|
||||
[
|
||||
np.interp(target_thresholds, split_thresholds, split_score)
|
||||
for split_thresholds, split_score in zip(cv_thresholds, cv_scores)
|
||||
],
|
||||
axis=0,
|
||||
)
|
||||
|
||||
|
||||
class TunedThresholdClassifierCV(BaseThresholdClassifier):
|
||||
"""Classifier that post-tunes the decision threshold using cross-validation.
|
||||
|
||||
This estimator post-tunes the decision threshold (cut-off point) that is
|
||||
used for converting posterior probability estimates (i.e. output of
|
||||
`predict_proba`) or decision scores (i.e. output of `decision_function`)
|
||||
into a class label. The tuning is done by optimizing a binary metric,
|
||||
potentially constrained by another metric.
|
||||
|
||||
Read more in the :ref:`User Guide <TunedThresholdClassifierCV>`.
|
||||
|
||||
.. versionadded:: 1.5
|
||||
|
||||
Parameters
|
||||
----------
|
||||
estimator : estimator instance
|
||||
The classifier, fitted or not, for which we want to optimize
|
||||
the decision threshold used during `predict`.
|
||||
|
||||
scoring : str or callable, default="balanced_accuracy"
|
||||
The objective metric to be optimized. Can be one of:
|
||||
|
||||
- str: string associated to a scoring function for binary classification,
|
||||
see :ref:`scoring_string_names` for options.
|
||||
- callable: a scorer callable object (e.g., function) with signature
|
||||
``scorer(estimator, X, y)``. See :ref:`scoring_callable` for details.
|
||||
|
||||
response_method : {"auto", "decision_function", "predict_proba"}, default="auto"
|
||||
Methods by the classifier `estimator` corresponding to the
|
||||
decision function for which we want to find a threshold. It can be:
|
||||
|
||||
* if `"auto"`, it will try to invoke, for each classifier,
|
||||
`"predict_proba"` or `"decision_function"` in that order.
|
||||
* otherwise, one of `"predict_proba"` or `"decision_function"`.
|
||||
If the method is not implemented by the classifier, it will raise an
|
||||
error.
|
||||
|
||||
thresholds : int or array-like, default=100
|
||||
The number of decision threshold to use when discretizing the output of the
|
||||
classifier `method`. Pass an array-like to manually specify the thresholds
|
||||
to use.
|
||||
|
||||
cv : int, float, cross-validation generator, iterable or "prefit", default=None
|
||||
Determines the cross-validation splitting strategy to train classifier.
|
||||
Possible inputs for cv are:
|
||||
|
||||
* `None`, to use the default 5-fold stratified K-fold cross validation;
|
||||
* An integer number, to specify the number of folds in a stratified k-fold;
|
||||
* A float number, to specify a single shuffle split. The floating number should
|
||||
be in (0, 1) and represent the size of the validation set;
|
||||
* An object to be used as a cross-validation generator;
|
||||
* An iterable yielding train, test splits;
|
||||
* `"prefit"`, to bypass the cross-validation.
|
||||
|
||||
Refer :ref:`User Guide <cross_validation>` for the various
|
||||
cross-validation strategies that can be used here.
|
||||
|
||||
.. warning::
|
||||
Using `cv="prefit"` and passing the same dataset for fitting `estimator`
|
||||
and tuning the cut-off point is subject to undesired overfitting. You can
|
||||
refer to :ref:`TunedThresholdClassifierCV_no_cv` for an example.
|
||||
|
||||
This option should only be used when the set used to fit `estimator` is
|
||||
different from the one used to tune the cut-off point (by calling
|
||||
:meth:`TunedThresholdClassifierCV.fit`).
|
||||
|
||||
refit : bool, default=True
|
||||
Whether or not to refit the classifier on the entire training set once
|
||||
the decision threshold has been found.
|
||||
Note that forcing `refit=False` on cross-validation having more
|
||||
than a single split will raise an error. Similarly, `refit=True` in
|
||||
conjunction with `cv="prefit"` will raise an error.
|
||||
|
||||
n_jobs : int, default=None
|
||||
The number of jobs to run in parallel. When `cv` represents a
|
||||
cross-validation strategy, the fitting and scoring on each data split
|
||||
is done in parallel. ``None`` means 1 unless in a
|
||||
:obj:`joblib.parallel_backend` context. ``-1`` means using all
|
||||
processors. See :term:`Glossary <n_jobs>` for more details.
|
||||
|
||||
random_state : int, RandomState instance or None, default=None
|
||||
Controls the randomness of cross-validation when `cv` is a float.
|
||||
See :term:`Glossary <random_state>`.
|
||||
|
||||
store_cv_results : bool, default=False
|
||||
Whether to store all scores and thresholds computed during the cross-validation
|
||||
process.
|
||||
|
||||
Attributes
|
||||
----------
|
||||
estimator_ : estimator instance
|
||||
The fitted classifier used when predicting.
|
||||
|
||||
best_threshold_ : float
|
||||
The new decision threshold.
|
||||
|
||||
best_score_ : float or None
|
||||
The optimal score of the objective metric, evaluated at `best_threshold_`.
|
||||
|
||||
cv_results_ : dict or None
|
||||
A dictionary containing the scores and thresholds computed during the
|
||||
cross-validation process. Only exist if `store_cv_results=True`. The
|
||||
keys are `"thresholds"` and `"scores"`.
|
||||
|
||||
classes_ : ndarray of shape (n_classes,)
|
||||
The class labels.
|
||||
|
||||
n_features_in_ : int
|
||||
Number of features seen during :term:`fit`. Only defined if the
|
||||
underlying estimator exposes such an attribute when fit.
|
||||
|
||||
feature_names_in_ : ndarray of shape (`n_features_in_`,)
|
||||
Names of features seen during :term:`fit`. Only defined if the
|
||||
underlying estimator exposes such an attribute when fit.
|
||||
|
||||
See Also
|
||||
--------
|
||||
sklearn.model_selection.FixedThresholdClassifier : Classifier that uses a
|
||||
constant threshold.
|
||||
sklearn.calibration.CalibratedClassifierCV : Estimator that calibrates
|
||||
probabilities.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> from sklearn.datasets import make_classification
|
||||
>>> from sklearn.ensemble import RandomForestClassifier
|
||||
>>> from sklearn.metrics import classification_report
|
||||
>>> from sklearn.model_selection import TunedThresholdClassifierCV, train_test_split
|
||||
>>> X, y = make_classification(
|
||||
... n_samples=1_000, weights=[0.9, 0.1], class_sep=0.8, random_state=42
|
||||
... )
|
||||
>>> X_train, X_test, y_train, y_test = train_test_split(
|
||||
... X, y, stratify=y, random_state=42
|
||||
... )
|
||||
>>> classifier = RandomForestClassifier(random_state=0).fit(X_train, y_train)
|
||||
>>> print(classification_report(y_test, classifier.predict(X_test)))
|
||||
precision recall f1-score support
|
||||
<BLANKLINE>
|
||||
0 0.94 0.99 0.96 224
|
||||
1 0.80 0.46 0.59 26
|
||||
<BLANKLINE>
|
||||
accuracy 0.93 250
|
||||
macro avg 0.87 0.72 0.77 250
|
||||
weighted avg 0.93 0.93 0.92 250
|
||||
<BLANKLINE>
|
||||
>>> classifier_tuned = TunedThresholdClassifierCV(
|
||||
... classifier, scoring="balanced_accuracy"
|
||||
... ).fit(X_train, y_train)
|
||||
>>> print(
|
||||
... f"Cut-off point found at {classifier_tuned.best_threshold_:.3f}"
|
||||
... )
|
||||
Cut-off point found at 0.342
|
||||
>>> print(classification_report(y_test, classifier_tuned.predict(X_test)))
|
||||
precision recall f1-score support
|
||||
<BLANKLINE>
|
||||
0 0.96 0.95 0.96 224
|
||||
1 0.61 0.65 0.63 26
|
||||
<BLANKLINE>
|
||||
accuracy 0.92 250
|
||||
macro avg 0.78 0.80 0.79 250
|
||||
weighted avg 0.92 0.92 0.92 250
|
||||
<BLANKLINE>
|
||||
"""
|
||||
|
||||
_parameter_constraints: dict = {
|
||||
**BaseThresholdClassifier._parameter_constraints,
|
||||
"scoring": [
|
||||
StrOptions(set(get_scorer_names())),
|
||||
callable,
|
||||
MutableMapping,
|
||||
],
|
||||
"thresholds": [Interval(Integral, 1, None, closed="left"), "array-like"],
|
||||
"cv": [
|
||||
"cv_object",
|
||||
StrOptions({"prefit"}),
|
||||
Interval(RealNotInt, 0.0, 1.0, closed="neither"),
|
||||
],
|
||||
"refit": ["boolean"],
|
||||
"n_jobs": [Integral, None],
|
||||
"random_state": ["random_state"],
|
||||
"store_cv_results": ["boolean"],
|
||||
}
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
estimator,
|
||||
*,
|
||||
scoring="balanced_accuracy",
|
||||
response_method="auto",
|
||||
thresholds=100,
|
||||
cv=None,
|
||||
refit=True,
|
||||
n_jobs=None,
|
||||
random_state=None,
|
||||
store_cv_results=False,
|
||||
):
|
||||
super().__init__(estimator=estimator, response_method=response_method)
|
||||
self.scoring = scoring
|
||||
self.thresholds = thresholds
|
||||
self.cv = cv
|
||||
self.refit = refit
|
||||
self.n_jobs = n_jobs
|
||||
self.random_state = random_state
|
||||
self.store_cv_results = store_cv_results
|
||||
|
||||
def _fit(self, X, y, **params):
|
||||
"""Fit the classifier and post-tune the decision threshold.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : {array-like, sparse matrix} of shape (n_samples, n_features)
|
||||
Training data.
|
||||
|
||||
y : array-like of shape (n_samples,)
|
||||
Target values.
|
||||
|
||||
**params : dict
|
||||
Parameters to pass to the `fit` method of the underlying
|
||||
classifier and to the `scoring` scorer.
|
||||
|
||||
Returns
|
||||
-------
|
||||
self : object
|
||||
Returns an instance of self.
|
||||
"""
|
||||
if isinstance(self.cv, Real) and 0 < self.cv < 1:
|
||||
cv = StratifiedShuffleSplit(
|
||||
n_splits=1, test_size=self.cv, random_state=self.random_state
|
||||
)
|
||||
elif self.cv == "prefit":
|
||||
if self.refit is True:
|
||||
raise ValueError("When cv='prefit', refit cannot be True.")
|
||||
try:
|
||||
check_is_fitted(self.estimator, "classes_")
|
||||
except NotFittedError as exc:
|
||||
raise NotFittedError(
|
||||
"""When cv='prefit', `estimator` must be fitted."""
|
||||
) from exc
|
||||
cv = self.cv
|
||||
else:
|
||||
cv = check_cv(self.cv, y=y, classifier=True)
|
||||
if self.refit is False and cv.get_n_splits() > 1:
|
||||
raise ValueError("When cv has several folds, refit cannot be False.")
|
||||
|
||||
routed_params = process_routing(self, "fit", **params)
|
||||
self._curve_scorer = self._get_curve_scorer()
|
||||
|
||||
# in the following block, we:
|
||||
# - define the final classifier `self.estimator_` and train it if necessary
|
||||
# - define `classifier` to be used to post-tune the decision threshold
|
||||
# - define `split` to be used to fit/score `classifier`
|
||||
if cv == "prefit":
|
||||
self.estimator_ = self.estimator
|
||||
classifier = self.estimator_
|
||||
splits = [(None, range(_num_samples(X)))]
|
||||
else:
|
||||
self.estimator_ = clone(self.estimator)
|
||||
classifier = clone(self.estimator)
|
||||
splits = cv.split(X, y, **routed_params.splitter.split)
|
||||
|
||||
if self.refit:
|
||||
# train on the whole dataset
|
||||
X_train, y_train, fit_params_train = X, y, routed_params.estimator.fit
|
||||
else:
|
||||
# single split cross-validation
|
||||
train_idx, _ = next(cv.split(X, y, **routed_params.splitter.split))
|
||||
X_train = _safe_indexing(X, train_idx)
|
||||
y_train = _safe_indexing(y, train_idx)
|
||||
fit_params_train = _check_method_params(
|
||||
X, routed_params.estimator.fit, indices=train_idx
|
||||
)
|
||||
|
||||
self.estimator_.fit(X_train, y_train, **fit_params_train)
|
||||
|
||||
cv_scores, cv_thresholds = zip(
|
||||
*Parallel(n_jobs=self.n_jobs)(
|
||||
delayed(_fit_and_score_over_thresholds)(
|
||||
clone(classifier) if cv != "prefit" else classifier,
|
||||
X,
|
||||
y,
|
||||
fit_params=routed_params.estimator.fit,
|
||||
train_idx=train_idx,
|
||||
val_idx=val_idx,
|
||||
curve_scorer=self._curve_scorer,
|
||||
score_params=routed_params.scorer.score,
|
||||
)
|
||||
for train_idx, val_idx in splits
|
||||
)
|
||||
)
|
||||
|
||||
if any(np.isclose(th[0], th[-1]) for th in cv_thresholds):
|
||||
raise ValueError(
|
||||
"The provided estimator makes constant predictions. Therefore, it is "
|
||||
"impossible to optimize the decision threshold."
|
||||
)
|
||||
|
||||
# find the global min and max thresholds across all folds
|
||||
min_threshold = min(
|
||||
split_thresholds.min() for split_thresholds in cv_thresholds
|
||||
)
|
||||
max_threshold = max(
|
||||
split_thresholds.max() for split_thresholds in cv_thresholds
|
||||
)
|
||||
if isinstance(self.thresholds, Integral):
|
||||
decision_thresholds = np.linspace(
|
||||
min_threshold, max_threshold, num=self.thresholds
|
||||
)
|
||||
else:
|
||||
decision_thresholds = np.asarray(self.thresholds)
|
||||
|
||||
objective_scores = _mean_interpolated_score(
|
||||
decision_thresholds, cv_thresholds, cv_scores
|
||||
)
|
||||
best_idx = objective_scores.argmax()
|
||||
self.best_score_ = objective_scores[best_idx]
|
||||
self.best_threshold_ = decision_thresholds[best_idx]
|
||||
if self.store_cv_results:
|
||||
self.cv_results_ = {
|
||||
"thresholds": decision_thresholds,
|
||||
"scores": objective_scores,
|
||||
}
|
||||
|
||||
return self
|
||||
|
||||
def predict(self, X):
|
||||
"""Predict the target of new samples.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : {array-like, sparse matrix} of shape (n_samples, n_features)
|
||||
The samples, as accepted by `estimator.predict`.
|
||||
|
||||
Returns
|
||||
-------
|
||||
class_labels : ndarray of shape (n_samples,)
|
||||
The predicted class.
|
||||
"""
|
||||
check_is_fitted(self, "estimator_")
|
||||
pos_label = self._curve_scorer._get_pos_label()
|
||||
y_score, _ = _get_response_values_binary(
|
||||
self.estimator_,
|
||||
X,
|
||||
self._get_response_method(),
|
||||
pos_label=pos_label,
|
||||
)
|
||||
|
||||
return _threshold_scores_to_class_labels(
|
||||
y_score, self.best_threshold_, self.classes_, pos_label
|
||||
)
|
||||
|
||||
def get_metadata_routing(self):
|
||||
"""Get metadata routing of this object.
|
||||
|
||||
Please check :ref:`User Guide <metadata_routing>` on how the routing
|
||||
mechanism works.
|
||||
|
||||
Returns
|
||||
-------
|
||||
routing : MetadataRouter
|
||||
A :class:`~sklearn.utils.metadata_routing.MetadataRouter` encapsulating
|
||||
routing information.
|
||||
"""
|
||||
router = (
|
||||
MetadataRouter(owner=self)
|
||||
.add(
|
||||
estimator=self.estimator,
|
||||
method_mapping=MethodMapping().add(callee="fit", caller="fit"),
|
||||
)
|
||||
.add(
|
||||
splitter=self.cv,
|
||||
method_mapping=MethodMapping().add(callee="split", caller="fit"),
|
||||
)
|
||||
.add(
|
||||
scorer=self._get_curve_scorer(),
|
||||
method_mapping=MethodMapping().add(callee="score", caller="fit"),
|
||||
)
|
||||
)
|
||||
return router
|
||||
|
||||
def _get_curve_scorer(self):
|
||||
"""Get the curve scorer based on the objective metric used."""
|
||||
scoring = check_scoring(self.estimator, scoring=self.scoring)
|
||||
curve_scorer = _CurveScorer.from_scorer(
|
||||
scoring, self._get_response_method(), self.thresholds
|
||||
)
|
||||
return curve_scorer
|
||||
@@ -0,0 +1,885 @@
|
||||
# Authors: The scikit-learn developers
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
import numpy as np
|
||||
|
||||
from sklearn.model_selection._validation import learning_curve, validation_curve
|
||||
from sklearn.utils._optional_dependencies import check_matplotlib_support
|
||||
from sklearn.utils._plotting import _interval_max_min_ratio, _validate_score_name
|
||||
|
||||
|
||||
class _BaseCurveDisplay:
|
||||
def _plot_curve(
|
||||
self,
|
||||
x_data,
|
||||
*,
|
||||
ax=None,
|
||||
negate_score=False,
|
||||
score_name=None,
|
||||
score_type="test",
|
||||
std_display_style="fill_between",
|
||||
line_kw=None,
|
||||
fill_between_kw=None,
|
||||
errorbar_kw=None,
|
||||
):
|
||||
check_matplotlib_support(f"{self.__class__.__name__}.plot")
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
if ax is None:
|
||||
_, ax = plt.subplots()
|
||||
|
||||
if negate_score:
|
||||
train_scores, test_scores = -self.train_scores, -self.test_scores
|
||||
else:
|
||||
train_scores, test_scores = self.train_scores, self.test_scores
|
||||
|
||||
if std_display_style not in ("errorbar", "fill_between", None):
|
||||
raise ValueError(
|
||||
f"Unknown std_display_style: {std_display_style}. Should be one of"
|
||||
" 'errorbar', 'fill_between', or None."
|
||||
)
|
||||
|
||||
if score_type not in ("test", "train", "both"):
|
||||
raise ValueError(
|
||||
f"Unknown score_type: {score_type}. Should be one of 'test', "
|
||||
"'train', or 'both'."
|
||||
)
|
||||
|
||||
if score_type == "train":
|
||||
scores = {"Train": train_scores}
|
||||
elif score_type == "test":
|
||||
scores = {"Test": test_scores}
|
||||
else: # score_type == "both"
|
||||
scores = {"Train": train_scores, "Test": test_scores}
|
||||
|
||||
if std_display_style in ("fill_between", None):
|
||||
# plot the mean score
|
||||
if line_kw is None:
|
||||
line_kw = {}
|
||||
|
||||
self.lines_ = []
|
||||
for line_label, score in scores.items():
|
||||
self.lines_.append(
|
||||
*ax.plot(
|
||||
x_data,
|
||||
score.mean(axis=1),
|
||||
label=line_label,
|
||||
**line_kw,
|
||||
)
|
||||
)
|
||||
self.errorbar_ = None
|
||||
self.fill_between_ = None # overwritten below by fill_between
|
||||
|
||||
if std_display_style == "errorbar":
|
||||
if errorbar_kw is None:
|
||||
errorbar_kw = {}
|
||||
|
||||
self.errorbar_ = []
|
||||
for line_label, score in scores.items():
|
||||
self.errorbar_.append(
|
||||
ax.errorbar(
|
||||
x_data,
|
||||
score.mean(axis=1),
|
||||
score.std(axis=1),
|
||||
label=line_label,
|
||||
**errorbar_kw,
|
||||
)
|
||||
)
|
||||
self.lines_, self.fill_between_ = None, None
|
||||
elif std_display_style == "fill_between":
|
||||
if fill_between_kw is None:
|
||||
fill_between_kw = {}
|
||||
default_fill_between_kw = {"alpha": 0.5}
|
||||
fill_between_kw = {**default_fill_between_kw, **fill_between_kw}
|
||||
|
||||
self.fill_between_ = []
|
||||
for line_label, score in scores.items():
|
||||
self.fill_between_.append(
|
||||
ax.fill_between(
|
||||
x_data,
|
||||
score.mean(axis=1) - score.std(axis=1),
|
||||
score.mean(axis=1) + score.std(axis=1),
|
||||
**fill_between_kw,
|
||||
)
|
||||
)
|
||||
|
||||
score_name = self.score_name if score_name is None else score_name
|
||||
|
||||
ax.legend()
|
||||
|
||||
# We found that a ratio, smaller or bigger than 5, between the largest and
|
||||
# smallest gap of the x values is a good indicator to choose between linear
|
||||
# and log scale.
|
||||
if _interval_max_min_ratio(x_data) > 5:
|
||||
xscale = "symlog" if x_data.min() <= 0 else "log"
|
||||
else:
|
||||
xscale = "linear"
|
||||
|
||||
ax.set_xscale(xscale)
|
||||
ax.set_ylabel(f"{score_name}")
|
||||
|
||||
self.ax_ = ax
|
||||
self.figure_ = ax.figure
|
||||
|
||||
|
||||
class LearningCurveDisplay(_BaseCurveDisplay):
|
||||
"""Learning Curve visualization.
|
||||
|
||||
It is recommended to use
|
||||
:meth:`~sklearn.model_selection.LearningCurveDisplay.from_estimator` to
|
||||
create a :class:`~sklearn.model_selection.LearningCurveDisplay` instance.
|
||||
All parameters are stored as attributes.
|
||||
|
||||
Read more in the :ref:`User Guide <visualizations>` for general information
|
||||
about the visualization API and
|
||||
:ref:`detailed documentation <learning_curve>` regarding the learning
|
||||
curve visualization.
|
||||
|
||||
.. versionadded:: 1.2
|
||||
|
||||
Parameters
|
||||
----------
|
||||
train_sizes : ndarray of shape (n_unique_ticks,)
|
||||
Numbers of training examples that has been used to generate the
|
||||
learning curve.
|
||||
|
||||
train_scores : ndarray of shape (n_ticks, n_cv_folds)
|
||||
Scores on training sets.
|
||||
|
||||
test_scores : ndarray of shape (n_ticks, n_cv_folds)
|
||||
Scores on test set.
|
||||
|
||||
score_name : str, default=None
|
||||
The name of the score used in `learning_curve`. It will override the name
|
||||
inferred from the `scoring` parameter. If `score` is `None`, we use `"Score"` if
|
||||
`negate_score` is `False` and `"Negative score"` otherwise. If `scoring` is a
|
||||
string or a callable, we infer the name. We replace `_` by spaces and capitalize
|
||||
the first letter. We remove `neg_` and replace it by `"Negative"` if
|
||||
`negate_score` is `False` or just remove it otherwise.
|
||||
|
||||
Attributes
|
||||
----------
|
||||
ax_ : matplotlib Axes
|
||||
Axes with the learning curve.
|
||||
|
||||
figure_ : matplotlib Figure
|
||||
Figure containing the learning curve.
|
||||
|
||||
errorbar_ : list of matplotlib Artist or None
|
||||
When the `std_display_style` is `"errorbar"`, this is a list of
|
||||
`matplotlib.container.ErrorbarContainer` objects. If another style is
|
||||
used, `errorbar_` is `None`.
|
||||
|
||||
lines_ : list of matplotlib Artist or None
|
||||
When the `std_display_style` is `"fill_between"`, this is a list of
|
||||
`matplotlib.lines.Line2D` objects corresponding to the mean train and
|
||||
test scores. If another style is used, `line_` is `None`.
|
||||
|
||||
fill_between_ : list of matplotlib Artist or None
|
||||
When the `std_display_style` is `"fill_between"`, this is a list of
|
||||
`matplotlib.collections.PolyCollection` objects. If another style is
|
||||
used, `fill_between_` is `None`.
|
||||
|
||||
See Also
|
||||
--------
|
||||
sklearn.model_selection.learning_curve : Compute the learning curve.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> import matplotlib.pyplot as plt
|
||||
>>> from sklearn.datasets import load_iris
|
||||
>>> from sklearn.model_selection import LearningCurveDisplay, learning_curve
|
||||
>>> from sklearn.tree import DecisionTreeClassifier
|
||||
>>> X, y = load_iris(return_X_y=True)
|
||||
>>> tree = DecisionTreeClassifier(random_state=0)
|
||||
>>> train_sizes, train_scores, test_scores = learning_curve(
|
||||
... tree, X, y)
|
||||
>>> display = LearningCurveDisplay(train_sizes=train_sizes,
|
||||
... train_scores=train_scores, test_scores=test_scores, score_name="Score")
|
||||
>>> display.plot()
|
||||
<...>
|
||||
>>> plt.show()
|
||||
"""
|
||||
|
||||
def __init__(self, *, train_sizes, train_scores, test_scores, score_name=None):
|
||||
self.train_sizes = train_sizes
|
||||
self.train_scores = train_scores
|
||||
self.test_scores = test_scores
|
||||
self.score_name = score_name
|
||||
|
||||
def plot(
|
||||
self,
|
||||
ax=None,
|
||||
*,
|
||||
negate_score=False,
|
||||
score_name=None,
|
||||
score_type="both",
|
||||
std_display_style="fill_between",
|
||||
line_kw=None,
|
||||
fill_between_kw=None,
|
||||
errorbar_kw=None,
|
||||
):
|
||||
"""Plot visualization.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
ax : matplotlib Axes, default=None
|
||||
Axes object to plot on. If `None`, a new figure and axes is
|
||||
created.
|
||||
|
||||
negate_score : bool, default=False
|
||||
Whether or not to negate the scores obtained through
|
||||
:func:`~sklearn.model_selection.learning_curve`. This is
|
||||
particularly useful when using the error denoted by `neg_*` in
|
||||
`scikit-learn`.
|
||||
|
||||
score_name : str, default=None
|
||||
The name of the score used to decorate the y-axis of the plot. It will
|
||||
override the name inferred from the `scoring` parameter. If `score` is
|
||||
`None`, we use `"Score"` if `negate_score` is `False` and `"Negative score"`
|
||||
otherwise. If `scoring` is a string or a callable, we infer the name. We
|
||||
replace `_` by spaces and capitalize the first letter. We remove `neg_` and
|
||||
replace it by `"Negative"` if `negate_score` is
|
||||
`False` or just remove it otherwise.
|
||||
|
||||
score_type : {"test", "train", "both"}, default="both"
|
||||
The type of score to plot. Can be one of `"test"`, `"train"`, or
|
||||
`"both"`.
|
||||
|
||||
std_display_style : {"errorbar", "fill_between"} or None, default="fill_between"
|
||||
The style used to display the score standard deviation around the
|
||||
mean score. If None, no standard deviation representation is
|
||||
displayed.
|
||||
|
||||
line_kw : dict, default=None
|
||||
Additional keyword arguments passed to the `plt.plot` used to draw
|
||||
the mean score.
|
||||
|
||||
fill_between_kw : dict, default=None
|
||||
Additional keyword arguments passed to the `plt.fill_between` used
|
||||
to draw the score standard deviation.
|
||||
|
||||
errorbar_kw : dict, default=None
|
||||
Additional keyword arguments passed to the `plt.errorbar` used to
|
||||
draw mean score and standard deviation score.
|
||||
|
||||
Returns
|
||||
-------
|
||||
display : :class:`~sklearn.model_selection.LearningCurveDisplay`
|
||||
Object that stores computed values.
|
||||
"""
|
||||
self._plot_curve(
|
||||
self.train_sizes,
|
||||
ax=ax,
|
||||
negate_score=negate_score,
|
||||
score_name=score_name,
|
||||
score_type=score_type,
|
||||
std_display_style=std_display_style,
|
||||
line_kw=line_kw,
|
||||
fill_between_kw=fill_between_kw,
|
||||
errorbar_kw=errorbar_kw,
|
||||
)
|
||||
self.ax_.set_xlabel("Number of samples in the training set")
|
||||
return self
|
||||
|
||||
@classmethod
|
||||
def from_estimator(
|
||||
cls,
|
||||
estimator,
|
||||
X,
|
||||
y,
|
||||
*,
|
||||
groups=None,
|
||||
train_sizes=np.linspace(0.1, 1.0, 5),
|
||||
cv=None,
|
||||
scoring=None,
|
||||
exploit_incremental_learning=False,
|
||||
n_jobs=None,
|
||||
pre_dispatch="all",
|
||||
verbose=0,
|
||||
shuffle=False,
|
||||
random_state=None,
|
||||
error_score=np.nan,
|
||||
fit_params=None,
|
||||
ax=None,
|
||||
negate_score=False,
|
||||
score_name=None,
|
||||
score_type="both",
|
||||
std_display_style="fill_between",
|
||||
line_kw=None,
|
||||
fill_between_kw=None,
|
||||
errorbar_kw=None,
|
||||
):
|
||||
"""Create a learning curve display from an estimator.
|
||||
|
||||
Read more in the :ref:`User Guide <visualizations>` for general
|
||||
information about the visualization API and :ref:`detailed
|
||||
documentation <learning_curve>` regarding the learning curve
|
||||
visualization.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
estimator : object type that implements the "fit" and "predict" methods
|
||||
An object of that type which is cloned for each validation.
|
||||
|
||||
X : array-like of shape (n_samples, n_features)
|
||||
Training data, where `n_samples` is the number of samples and
|
||||
`n_features` is the number of features.
|
||||
|
||||
y : array-like of shape (n_samples,) or (n_samples, n_outputs) or None
|
||||
Target relative to X for classification or regression;
|
||||
None for unsupervised learning.
|
||||
|
||||
groups : array-like of shape (n_samples,), default=None
|
||||
Group labels for the samples used while splitting the dataset into
|
||||
train/test set. Only used in conjunction with a "Group" :term:`cv`
|
||||
instance (e.g., :class:`GroupKFold`).
|
||||
|
||||
train_sizes : array-like of shape (n_ticks,), \
|
||||
default=np.linspace(0.1, 1.0, 5)
|
||||
Relative or absolute numbers of training examples that will be used
|
||||
to generate the learning curve. If the dtype is float, it is
|
||||
regarded as a fraction of the maximum size of the training set
|
||||
(that is determined by the selected validation method), i.e. it has
|
||||
to be within (0, 1]. Otherwise it is interpreted as absolute sizes
|
||||
of the training sets. Note that for classification the number of
|
||||
samples usually have to be big enough to contain at least one
|
||||
sample from each class.
|
||||
|
||||
cv : int, cross-validation generator or an iterable, default=None
|
||||
Determines the cross-validation splitting strategy.
|
||||
Possible inputs for cv are:
|
||||
|
||||
- None, to use the default 5-fold cross validation,
|
||||
- int, to specify the number of folds in a `(Stratified)KFold`,
|
||||
- :term:`CV splitter`,
|
||||
- An iterable yielding (train, test) splits as arrays of indices.
|
||||
|
||||
For int/None inputs, if the estimator is a classifier and `y` is
|
||||
either binary or multiclass,
|
||||
:class:`~sklearn.model_selection.StratifiedKFold` is used. In all
|
||||
other cases, :class:`~sklearn.model_selection.KFold` is used. These
|
||||
splitters are instantiated with `shuffle=False` so the splits will
|
||||
be the same across calls.
|
||||
|
||||
Refer :ref:`User Guide <cross_validation>` for the various
|
||||
cross-validation strategies that can be used here.
|
||||
|
||||
scoring : str or callable, default=None
|
||||
The scoring method to use when calculating the learning curve. Options:
|
||||
|
||||
- str: see :ref:`scoring_string_names` for options.
|
||||
- callable: a scorer callable object (e.g., function) with signature
|
||||
``scorer(estimator, X, y)``. See :ref:`scoring_callable` for details.
|
||||
- `None`: the `estimator`'s
|
||||
:ref:`default evaluation criterion <scoring_api_overview>` is used.
|
||||
|
||||
exploit_incremental_learning : bool, default=False
|
||||
If the estimator supports incremental learning, this will be
|
||||
used to speed up fitting for different training set sizes.
|
||||
|
||||
n_jobs : int, default=None
|
||||
Number of jobs to run in parallel. Training the estimator and
|
||||
computing the score are parallelized over the different training
|
||||
and test sets. `None` means 1 unless in a
|
||||
:obj:`joblib.parallel_backend` context. `-1` means using all
|
||||
processors. See :term:`Glossary <n_jobs>` for more details.
|
||||
|
||||
pre_dispatch : int or str, default='all'
|
||||
Number of predispatched jobs for parallel execution (default is
|
||||
all). The option can reduce the allocated memory. The str can
|
||||
be an expression like '2*n_jobs'.
|
||||
|
||||
verbose : int, default=0
|
||||
Controls the verbosity: the higher, the more messages.
|
||||
|
||||
shuffle : bool, default=False
|
||||
Whether to shuffle training data before taking prefixes of it
|
||||
based on`train_sizes`.
|
||||
|
||||
random_state : int, RandomState instance or None, default=None
|
||||
Used when `shuffle` is True. Pass an int for reproducible
|
||||
output across multiple function calls.
|
||||
See :term:`Glossary <random_state>`.
|
||||
|
||||
error_score : 'raise' or numeric, default=np.nan
|
||||
Value to assign to the score if an error occurs in estimator
|
||||
fitting. If set to 'raise', the error is raised. If a numeric value
|
||||
is given, FitFailedWarning is raised.
|
||||
|
||||
fit_params : dict, default=None
|
||||
Parameters to pass to the fit method of the estimator.
|
||||
|
||||
ax : matplotlib Axes, default=None
|
||||
Axes object to plot on. If `None`, a new figure and axes is
|
||||
created.
|
||||
|
||||
negate_score : bool, default=False
|
||||
Whether or not to negate the scores obtained through
|
||||
:func:`~sklearn.model_selection.learning_curve`. This is
|
||||
particularly useful when using the error denoted by `neg_*` in
|
||||
`scikit-learn`.
|
||||
|
||||
score_name : str, default=None
|
||||
The name of the score used to decorate the y-axis of the plot. It will
|
||||
override the name inferred from the `scoring` parameter. If `score` is
|
||||
`None`, we use `"Score"` if `negate_score` is `False` and `"Negative score"`
|
||||
otherwise. If `scoring` is a string or a callable, we infer the name. We
|
||||
replace `_` by spaces and capitalize the first letter. We remove `neg_` and
|
||||
replace it by `"Negative"` if `negate_score` is
|
||||
`False` or just remove it otherwise.
|
||||
|
||||
score_type : {"test", "train", "both"}, default="both"
|
||||
The type of score to plot. Can be one of `"test"`, `"train"`, or
|
||||
`"both"`.
|
||||
|
||||
std_display_style : {"errorbar", "fill_between"} or None, default="fill_between"
|
||||
The style used to display the score standard deviation around the
|
||||
mean score. If `None`, no representation of the standard deviation
|
||||
is displayed.
|
||||
|
||||
line_kw : dict, default=None
|
||||
Additional keyword arguments passed to the `plt.plot` used to draw
|
||||
the mean score.
|
||||
|
||||
fill_between_kw : dict, default=None
|
||||
Additional keyword arguments passed to the `plt.fill_between` used
|
||||
to draw the score standard deviation.
|
||||
|
||||
errorbar_kw : dict, default=None
|
||||
Additional keyword arguments passed to the `plt.errorbar` used to
|
||||
draw mean score and standard deviation score.
|
||||
|
||||
Returns
|
||||
-------
|
||||
display : :class:`~sklearn.model_selection.LearningCurveDisplay`
|
||||
Object that stores computed values.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> import matplotlib.pyplot as plt
|
||||
>>> from sklearn.datasets import load_iris
|
||||
>>> from sklearn.model_selection import LearningCurveDisplay
|
||||
>>> from sklearn.tree import DecisionTreeClassifier
|
||||
>>> X, y = load_iris(return_X_y=True)
|
||||
>>> tree = DecisionTreeClassifier(random_state=0)
|
||||
>>> LearningCurveDisplay.from_estimator(tree, X, y)
|
||||
<...>
|
||||
>>> plt.show()
|
||||
"""
|
||||
check_matplotlib_support(f"{cls.__name__}.from_estimator")
|
||||
|
||||
score_name = _validate_score_name(score_name, scoring, negate_score)
|
||||
|
||||
train_sizes, train_scores, test_scores = learning_curve(
|
||||
estimator,
|
||||
X,
|
||||
y,
|
||||
groups=groups,
|
||||
train_sizes=train_sizes,
|
||||
cv=cv,
|
||||
scoring=scoring,
|
||||
exploit_incremental_learning=exploit_incremental_learning,
|
||||
n_jobs=n_jobs,
|
||||
pre_dispatch=pre_dispatch,
|
||||
verbose=verbose,
|
||||
shuffle=shuffle,
|
||||
random_state=random_state,
|
||||
error_score=error_score,
|
||||
return_times=False,
|
||||
params=fit_params,
|
||||
)
|
||||
|
||||
viz = cls(
|
||||
train_sizes=train_sizes,
|
||||
train_scores=train_scores,
|
||||
test_scores=test_scores,
|
||||
score_name=score_name,
|
||||
)
|
||||
return viz.plot(
|
||||
ax=ax,
|
||||
negate_score=negate_score,
|
||||
score_type=score_type,
|
||||
std_display_style=std_display_style,
|
||||
line_kw=line_kw,
|
||||
fill_between_kw=fill_between_kw,
|
||||
errorbar_kw=errorbar_kw,
|
||||
)
|
||||
|
||||
|
||||
class ValidationCurveDisplay(_BaseCurveDisplay):
|
||||
"""Validation Curve visualization.
|
||||
|
||||
It is recommended to use
|
||||
:meth:`~sklearn.model_selection.ValidationCurveDisplay.from_estimator` to
|
||||
create a :class:`~sklearn.model_selection.ValidationCurveDisplay` instance.
|
||||
All parameters are stored as attributes.
|
||||
|
||||
Read more in the :ref:`User Guide <visualizations>` for general information
|
||||
about the visualization API and :ref:`detailed documentation
|
||||
<validation_curve>` regarding the validation curve visualization.
|
||||
|
||||
.. versionadded:: 1.3
|
||||
|
||||
Parameters
|
||||
----------
|
||||
param_name : str
|
||||
Name of the parameter that has been varied.
|
||||
|
||||
param_range : array-like of shape (n_ticks,)
|
||||
The values of the parameter that have been evaluated.
|
||||
|
||||
train_scores : ndarray of shape (n_ticks, n_cv_folds)
|
||||
Scores on training sets.
|
||||
|
||||
test_scores : ndarray of shape (n_ticks, n_cv_folds)
|
||||
Scores on test set.
|
||||
|
||||
score_name : str, default=None
|
||||
The name of the score used in `validation_curve`. It will override the name
|
||||
inferred from the `scoring` parameter. If `score` is `None`, we use `"Score"` if
|
||||
`negate_score` is `False` and `"Negative score"` otherwise. If `scoring` is a
|
||||
string or a callable, we infer the name. We replace `_` by spaces and capitalize
|
||||
the first letter. We remove `neg_` and replace it by `"Negative"` if
|
||||
`negate_score` is `False` or just remove it otherwise.
|
||||
|
||||
Attributes
|
||||
----------
|
||||
ax_ : matplotlib Axes
|
||||
Axes with the validation curve.
|
||||
|
||||
figure_ : matplotlib Figure
|
||||
Figure containing the validation curve.
|
||||
|
||||
errorbar_ : list of matplotlib Artist or None
|
||||
When the `std_display_style` is `"errorbar"`, this is a list of
|
||||
`matplotlib.container.ErrorbarContainer` objects. If another style is
|
||||
used, `errorbar_` is `None`.
|
||||
|
||||
lines_ : list of matplotlib Artist or None
|
||||
When the `std_display_style` is `"fill_between"`, this is a list of
|
||||
`matplotlib.lines.Line2D` objects corresponding to the mean train and
|
||||
test scores. If another style is used, `line_` is `None`.
|
||||
|
||||
fill_between_ : list of matplotlib Artist or None
|
||||
When the `std_display_style` is `"fill_between"`, this is a list of
|
||||
`matplotlib.collections.PolyCollection` objects. If another style is
|
||||
used, `fill_between_` is `None`.
|
||||
|
||||
See Also
|
||||
--------
|
||||
sklearn.model_selection.validation_curve : Compute the validation curve.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> import numpy as np
|
||||
>>> import matplotlib.pyplot as plt
|
||||
>>> from sklearn.datasets import make_classification
|
||||
>>> from sklearn.model_selection import ValidationCurveDisplay, validation_curve
|
||||
>>> from sklearn.linear_model import LogisticRegression
|
||||
>>> X, y = make_classification(n_samples=1_000, random_state=0)
|
||||
>>> logistic_regression = LogisticRegression()
|
||||
>>> param_name, param_range = "C", np.logspace(-8, 3, 10)
|
||||
>>> train_scores, test_scores = validation_curve(
|
||||
... logistic_regression, X, y, param_name=param_name, param_range=param_range
|
||||
... )
|
||||
>>> display = ValidationCurveDisplay(
|
||||
... param_name=param_name, param_range=param_range,
|
||||
... train_scores=train_scores, test_scores=test_scores, score_name="Score"
|
||||
... )
|
||||
>>> display.plot()
|
||||
<...>
|
||||
>>> plt.show()
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self, *, param_name, param_range, train_scores, test_scores, score_name=None
|
||||
):
|
||||
self.param_name = param_name
|
||||
self.param_range = param_range
|
||||
self.train_scores = train_scores
|
||||
self.test_scores = test_scores
|
||||
self.score_name = score_name
|
||||
|
||||
def plot(
|
||||
self,
|
||||
ax=None,
|
||||
*,
|
||||
negate_score=False,
|
||||
score_name=None,
|
||||
score_type="both",
|
||||
std_display_style="fill_between",
|
||||
line_kw=None,
|
||||
fill_between_kw=None,
|
||||
errorbar_kw=None,
|
||||
):
|
||||
"""Plot visualization.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
ax : matplotlib Axes, default=None
|
||||
Axes object to plot on. If `None`, a new figure and axes is
|
||||
created.
|
||||
|
||||
negate_score : bool, default=False
|
||||
Whether or not to negate the scores obtained through
|
||||
:func:`~sklearn.model_selection.validation_curve`. This is
|
||||
particularly useful when using the error denoted by `neg_*` in
|
||||
`scikit-learn`.
|
||||
|
||||
score_name : str, default=None
|
||||
The name of the score used to decorate the y-axis of the plot. It will
|
||||
override the name inferred from the `scoring` parameter. If `score` is
|
||||
`None`, we use `"Score"` if `negate_score` is `False` and `"Negative score"`
|
||||
otherwise. If `scoring` is a string or a callable, we infer the name. We
|
||||
replace `_` by spaces and capitalize the first letter. We remove `neg_` and
|
||||
replace it by `"Negative"` if `negate_score` is
|
||||
`False` or just remove it otherwise.
|
||||
|
||||
score_type : {"test", "train", "both"}, default="both"
|
||||
The type of score to plot. Can be one of `"test"`, `"train"`, or
|
||||
`"both"`.
|
||||
|
||||
std_display_style : {"errorbar", "fill_between"} or None, default="fill_between"
|
||||
The style used to display the score standard deviation around the
|
||||
mean score. If None, no standard deviation representation is
|
||||
displayed.
|
||||
|
||||
line_kw : dict, default=None
|
||||
Additional keyword arguments passed to the `plt.plot` used to draw
|
||||
the mean score.
|
||||
|
||||
fill_between_kw : dict, default=None
|
||||
Additional keyword arguments passed to the `plt.fill_between` used
|
||||
to draw the score standard deviation.
|
||||
|
||||
errorbar_kw : dict, default=None
|
||||
Additional keyword arguments passed to the `plt.errorbar` used to
|
||||
draw mean score and standard deviation score.
|
||||
|
||||
Returns
|
||||
-------
|
||||
display : :class:`~sklearn.model_selection.ValidationCurveDisplay`
|
||||
Object that stores computed values.
|
||||
"""
|
||||
self._plot_curve(
|
||||
self.param_range,
|
||||
ax=ax,
|
||||
negate_score=negate_score,
|
||||
score_name=score_name,
|
||||
score_type=score_type,
|
||||
std_display_style=std_display_style,
|
||||
line_kw=line_kw,
|
||||
fill_between_kw=fill_between_kw,
|
||||
errorbar_kw=errorbar_kw,
|
||||
)
|
||||
self.ax_.set_xlabel(f"{self.param_name}")
|
||||
return self
|
||||
|
||||
@classmethod
|
||||
def from_estimator(
|
||||
cls,
|
||||
estimator,
|
||||
X,
|
||||
y,
|
||||
*,
|
||||
param_name,
|
||||
param_range,
|
||||
groups=None,
|
||||
cv=None,
|
||||
scoring=None,
|
||||
n_jobs=None,
|
||||
pre_dispatch="all",
|
||||
verbose=0,
|
||||
error_score=np.nan,
|
||||
fit_params=None,
|
||||
ax=None,
|
||||
negate_score=False,
|
||||
score_name=None,
|
||||
score_type="both",
|
||||
std_display_style="fill_between",
|
||||
line_kw=None,
|
||||
fill_between_kw=None,
|
||||
errorbar_kw=None,
|
||||
):
|
||||
"""Create a validation curve display from an estimator.
|
||||
|
||||
Read more in the :ref:`User Guide <visualizations>` for general
|
||||
information about the visualization API and :ref:`detailed
|
||||
documentation <validation_curve>` regarding the validation curve
|
||||
visualization.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
estimator : object type that implements the "fit" and "predict" methods
|
||||
An object of that type which is cloned for each validation.
|
||||
|
||||
X : array-like of shape (n_samples, n_features)
|
||||
Training data, where `n_samples` is the number of samples and
|
||||
`n_features` is the number of features.
|
||||
|
||||
y : array-like of shape (n_samples,) or (n_samples, n_outputs) or None
|
||||
Target relative to X for classification or regression;
|
||||
None for unsupervised learning.
|
||||
|
||||
param_name : str
|
||||
Name of the parameter that will be varied.
|
||||
|
||||
param_range : array-like of shape (n_values,)
|
||||
The values of the parameter that will be evaluated.
|
||||
|
||||
groups : array-like of shape (n_samples,), default=None
|
||||
Group labels for the samples used while splitting the dataset into
|
||||
train/test set. Only used in conjunction with a "Group" :term:`cv`
|
||||
instance (e.g., :class:`GroupKFold`).
|
||||
|
||||
cv : int, cross-validation generator or an iterable, default=None
|
||||
Determines the cross-validation splitting strategy.
|
||||
Possible inputs for cv are:
|
||||
|
||||
- None, to use the default 5-fold cross validation,
|
||||
- int, to specify the number of folds in a `(Stratified)KFold`,
|
||||
- :term:`CV splitter`,
|
||||
- An iterable yielding (train, test) splits as arrays of indices.
|
||||
|
||||
For int/None inputs, if the estimator is a classifier and `y` is
|
||||
either binary or multiclass,
|
||||
:class:`~sklearn.model_selection.StratifiedKFold` is used. In all
|
||||
other cases, :class:`~sklearn.model_selection.KFold` is used. These
|
||||
splitters are instantiated with `shuffle=False` so the splits will
|
||||
be the same across calls.
|
||||
|
||||
Refer :ref:`User Guide <cross_validation>` for the various
|
||||
cross-validation strategies that can be used here.
|
||||
|
||||
scoring : str or callable, default=None
|
||||
Scoring method to use when computing the validation curve. Options:
|
||||
|
||||
- str: see :ref:`scoring_string_names` for options.
|
||||
- callable: a scorer callable object (e.g., function) with signature
|
||||
``scorer(estimator, X, y)``. See :ref:`scoring_callable` for details.
|
||||
- `None`: the `estimator`'s
|
||||
:ref:`default evaluation criterion <scoring_api_overview>` is used.
|
||||
|
||||
n_jobs : int, default=None
|
||||
Number of jobs to run in parallel. Training the estimator and
|
||||
computing the score are parallelized over the different training
|
||||
and test sets. `None` means 1 unless in a
|
||||
:obj:`joblib.parallel_backend` context. `-1` means using all
|
||||
processors. See :term:`Glossary <n_jobs>` for more details.
|
||||
|
||||
pre_dispatch : int or str, default='all'
|
||||
Number of predispatched jobs for parallel execution (default is
|
||||
all). The option can reduce the allocated memory. The str can
|
||||
be an expression like '2*n_jobs'.
|
||||
|
||||
verbose : int, default=0
|
||||
Controls the verbosity: the higher, the more messages.
|
||||
|
||||
error_score : 'raise' or numeric, default=np.nan
|
||||
Value to assign to the score if an error occurs in estimator
|
||||
fitting. If set to 'raise', the error is raised. If a numeric value
|
||||
is given, FitFailedWarning is raised.
|
||||
|
||||
fit_params : dict, default=None
|
||||
Parameters to pass to the fit method of the estimator.
|
||||
|
||||
ax : matplotlib Axes, default=None
|
||||
Axes object to plot on. If `None`, a new figure and axes is
|
||||
created.
|
||||
|
||||
negate_score : bool, default=False
|
||||
Whether or not to negate the scores obtained through
|
||||
:func:`~sklearn.model_selection.validation_curve`. This is
|
||||
particularly useful when using the error denoted by `neg_*` in
|
||||
`scikit-learn`.
|
||||
|
||||
score_name : str, default=None
|
||||
The name of the score used to decorate the y-axis of the plot. It will
|
||||
override the name inferred from the `scoring` parameter. If `score` is
|
||||
`None`, we use `"Score"` if `negate_score` is `False` and `"Negative score"`
|
||||
otherwise. If `scoring` is a string or a callable, we infer the name. We
|
||||
replace `_` by spaces and capitalize the first letter. We remove `neg_` and
|
||||
replace it by `"Negative"` if `negate_score` is
|
||||
`False` or just remove it otherwise.
|
||||
|
||||
score_type : {"test", "train", "both"}, default="both"
|
||||
The type of score to plot. Can be one of `"test"`, `"train"`, or
|
||||
`"both"`.
|
||||
|
||||
std_display_style : {"errorbar", "fill_between"} or None, default="fill_between"
|
||||
The style used to display the score standard deviation around the
|
||||
mean score. If `None`, no representation of the standard deviation
|
||||
is displayed.
|
||||
|
||||
line_kw : dict, default=None
|
||||
Additional keyword arguments passed to the `plt.plot` used to draw
|
||||
the mean score.
|
||||
|
||||
fill_between_kw : dict, default=None
|
||||
Additional keyword arguments passed to the `plt.fill_between` used
|
||||
to draw the score standard deviation.
|
||||
|
||||
errorbar_kw : dict, default=None
|
||||
Additional keyword arguments passed to the `plt.errorbar` used to
|
||||
draw mean score and standard deviation score.
|
||||
|
||||
Returns
|
||||
-------
|
||||
display : :class:`~sklearn.model_selection.ValidationCurveDisplay`
|
||||
Object that stores computed values.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> import numpy as np
|
||||
>>> import matplotlib.pyplot as plt
|
||||
>>> from sklearn.datasets import make_classification
|
||||
>>> from sklearn.model_selection import ValidationCurveDisplay
|
||||
>>> from sklearn.linear_model import LogisticRegression
|
||||
>>> X, y = make_classification(n_samples=1_000, random_state=0)
|
||||
>>> logistic_regression = LogisticRegression()
|
||||
>>> param_name, param_range = "C", np.logspace(-8, 3, 10)
|
||||
>>> ValidationCurveDisplay.from_estimator(
|
||||
... logistic_regression, X, y, param_name=param_name,
|
||||
... param_range=param_range,
|
||||
... )
|
||||
<...>
|
||||
>>> plt.show()
|
||||
"""
|
||||
check_matplotlib_support(f"{cls.__name__}.from_estimator")
|
||||
|
||||
score_name = _validate_score_name(score_name, scoring, negate_score)
|
||||
|
||||
train_scores, test_scores = validation_curve(
|
||||
estimator,
|
||||
X,
|
||||
y,
|
||||
param_name=param_name,
|
||||
param_range=param_range,
|
||||
groups=groups,
|
||||
cv=cv,
|
||||
scoring=scoring,
|
||||
n_jobs=n_jobs,
|
||||
pre_dispatch=pre_dispatch,
|
||||
verbose=verbose,
|
||||
error_score=error_score,
|
||||
params=fit_params,
|
||||
)
|
||||
|
||||
viz = cls(
|
||||
param_name=param_name,
|
||||
param_range=np.asarray(param_range),
|
||||
train_scores=train_scores,
|
||||
test_scores=test_scores,
|
||||
score_name=score_name,
|
||||
)
|
||||
return viz.plot(
|
||||
ax=ax,
|
||||
negate_score=negate_score,
|
||||
score_type=score_type,
|
||||
std_display_style=std_display_style,
|
||||
line_kw=line_kw,
|
||||
fill_between_kw=fill_between_kw,
|
||||
errorbar_kw=errorbar_kw,
|
||||
)
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,24 @@
|
||||
"""
|
||||
Common utilities for testing model selection.
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
|
||||
from sklearn.model_selection import KFold
|
||||
|
||||
|
||||
class OneTimeSplitter:
|
||||
"""A wrapper to make KFold single entry cv iterator"""
|
||||
|
||||
def __init__(self, n_splits=4, n_samples=99):
|
||||
self.n_splits = n_splits
|
||||
self.n_samples = n_samples
|
||||
self.indices = iter(KFold(n_splits=n_splits).split(np.ones(n_samples)))
|
||||
|
||||
def split(self, X=None, y=None, groups=None):
|
||||
"""Split can be called only once"""
|
||||
for index in self.indices:
|
||||
yield index
|
||||
|
||||
def get_n_splits(self, X=None, y=None, groups=None):
|
||||
return self.n_splits
|
||||
@@ -0,0 +1,618 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from sklearn import config_context
|
||||
from sklearn.base import clone
|
||||
from sklearn.datasets import (
|
||||
load_breast_cancer,
|
||||
load_iris,
|
||||
make_classification,
|
||||
make_multilabel_classification,
|
||||
)
|
||||
from sklearn.dummy import DummyClassifier
|
||||
from sklearn.ensemble import GradientBoostingClassifier
|
||||
from sklearn.exceptions import NotFittedError
|
||||
from sklearn.linear_model import LogisticRegression
|
||||
from sklearn.metrics import (
|
||||
balanced_accuracy_score,
|
||||
f1_score,
|
||||
fbeta_score,
|
||||
make_scorer,
|
||||
)
|
||||
from sklearn.metrics._scorer import _CurveScorer
|
||||
from sklearn.model_selection import (
|
||||
FixedThresholdClassifier,
|
||||
StratifiedShuffleSplit,
|
||||
TunedThresholdClassifierCV,
|
||||
)
|
||||
from sklearn.model_selection._classification_threshold import (
|
||||
_fit_and_score_over_thresholds,
|
||||
)
|
||||
from sklearn.pipeline import make_pipeline
|
||||
from sklearn.preprocessing import StandardScaler
|
||||
from sklearn.svm import SVC
|
||||
from sklearn.tree import DecisionTreeClassifier
|
||||
from sklearn.utils._mocking import CheckingClassifier
|
||||
from sklearn.utils._testing import (
|
||||
_convert_container,
|
||||
assert_allclose,
|
||||
assert_array_equal,
|
||||
)
|
||||
|
||||
|
||||
def test_fit_and_score_over_thresholds_curve_scorers():
|
||||
"""Check that `_fit_and_score_over_thresholds` returns thresholds in ascending order
|
||||
for the different accepted curve scorers."""
|
||||
X, y = make_classification(n_samples=100, random_state=0)
|
||||
train_idx, val_idx = np.arange(50), np.arange(50, 100)
|
||||
classifier = LogisticRegression()
|
||||
|
||||
curve_scorer = _CurveScorer(
|
||||
score_func=balanced_accuracy_score,
|
||||
sign=1,
|
||||
response_method="predict_proba",
|
||||
thresholds=10,
|
||||
kwargs={},
|
||||
)
|
||||
scores, thresholds = _fit_and_score_over_thresholds(
|
||||
classifier,
|
||||
X,
|
||||
y,
|
||||
fit_params={},
|
||||
train_idx=train_idx,
|
||||
val_idx=val_idx,
|
||||
curve_scorer=curve_scorer,
|
||||
score_params={},
|
||||
)
|
||||
|
||||
assert np.all(thresholds[:-1] <= thresholds[1:])
|
||||
assert isinstance(scores, np.ndarray)
|
||||
assert np.logical_and(scores >= 0, scores <= 1).all()
|
||||
|
||||
|
||||
def test_fit_and_score_over_thresholds_prefit():
|
||||
"""Check the behaviour with a prefit classifier."""
|
||||
X, y = make_classification(n_samples=100, random_state=0)
|
||||
|
||||
# `train_idx is None` to indicate that the classifier is prefit
|
||||
train_idx, val_idx = None, np.arange(50, 100)
|
||||
classifier = DecisionTreeClassifier(random_state=0).fit(X, y)
|
||||
# make sure that the classifier memorized the full dataset such that
|
||||
# we get perfect predictions and thus match the expected score
|
||||
assert classifier.score(X[val_idx], y[val_idx]) == pytest.approx(1.0)
|
||||
|
||||
curve_scorer = _CurveScorer(
|
||||
score_func=balanced_accuracy_score,
|
||||
sign=1,
|
||||
response_method="predict_proba",
|
||||
thresholds=2,
|
||||
kwargs={},
|
||||
)
|
||||
scores, thresholds = _fit_and_score_over_thresholds(
|
||||
classifier,
|
||||
X,
|
||||
y,
|
||||
fit_params={},
|
||||
train_idx=train_idx,
|
||||
val_idx=val_idx,
|
||||
curve_scorer=curve_scorer,
|
||||
score_params={},
|
||||
)
|
||||
assert np.all(thresholds[:-1] <= thresholds[1:])
|
||||
assert_allclose(scores, [0.5, 1.0])
|
||||
|
||||
|
||||
@config_context(enable_metadata_routing=True)
|
||||
def test_fit_and_score_over_thresholds_sample_weight():
|
||||
"""Check that we dispatch the sample-weight to fit and score the classifier."""
|
||||
X, y = load_iris(return_X_y=True)
|
||||
X, y = X[:100], y[:100] # only 2 classes
|
||||
|
||||
# create a dataset and repeat twice the sample of class #0
|
||||
X_repeated, y_repeated = np.vstack([X, X[y == 0]]), np.hstack([y, y[y == 0]])
|
||||
# create a sample weight vector that is equivalent to the repeated dataset
|
||||
sample_weight = np.ones_like(y)
|
||||
sample_weight[:50] *= 2
|
||||
|
||||
classifier = LogisticRegression()
|
||||
train_repeated_idx = np.arange(X_repeated.shape[0])
|
||||
val_repeated_idx = np.arange(X_repeated.shape[0])
|
||||
curve_scorer = _CurveScorer(
|
||||
score_func=balanced_accuracy_score,
|
||||
sign=1,
|
||||
response_method="predict_proba",
|
||||
thresholds=10,
|
||||
kwargs={},
|
||||
)
|
||||
scores_repeated, thresholds_repeated = _fit_and_score_over_thresholds(
|
||||
classifier,
|
||||
X_repeated,
|
||||
y_repeated,
|
||||
fit_params={},
|
||||
train_idx=train_repeated_idx,
|
||||
val_idx=val_repeated_idx,
|
||||
curve_scorer=curve_scorer,
|
||||
score_params={},
|
||||
)
|
||||
|
||||
train_idx, val_idx = np.arange(X.shape[0]), np.arange(X.shape[0])
|
||||
scores, thresholds = _fit_and_score_over_thresholds(
|
||||
classifier.set_fit_request(sample_weight=True),
|
||||
X,
|
||||
y,
|
||||
fit_params={"sample_weight": sample_weight},
|
||||
train_idx=train_idx,
|
||||
val_idx=val_idx,
|
||||
curve_scorer=curve_scorer.set_score_request(sample_weight=True),
|
||||
score_params={"sample_weight": sample_weight},
|
||||
)
|
||||
|
||||
assert_allclose(thresholds_repeated, thresholds)
|
||||
assert_allclose(scores_repeated, scores)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("fit_params_type", ["list", "array"])
|
||||
@config_context(enable_metadata_routing=True)
|
||||
def test_fit_and_score_over_thresholds_fit_params(fit_params_type):
|
||||
"""Check that we pass `fit_params` to the classifier when calling `fit`."""
|
||||
X, y = make_classification(n_samples=100, random_state=0)
|
||||
fit_params = {
|
||||
"a": _convert_container(y, fit_params_type),
|
||||
"b": _convert_container(y, fit_params_type),
|
||||
}
|
||||
|
||||
classifier = CheckingClassifier(expected_fit_params=["a", "b"], random_state=0)
|
||||
classifier.set_fit_request(a=True, b=True)
|
||||
train_idx, val_idx = np.arange(50), np.arange(50, 100)
|
||||
|
||||
curve_scorer = _CurveScorer(
|
||||
score_func=balanced_accuracy_score,
|
||||
sign=1,
|
||||
response_method="predict_proba",
|
||||
thresholds=10,
|
||||
kwargs={},
|
||||
)
|
||||
_fit_and_score_over_thresholds(
|
||||
classifier,
|
||||
X,
|
||||
y,
|
||||
fit_params=fit_params,
|
||||
train_idx=train_idx,
|
||||
val_idx=val_idx,
|
||||
curve_scorer=curve_scorer,
|
||||
score_params={},
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"data",
|
||||
[
|
||||
make_classification(n_classes=3, n_clusters_per_class=1, random_state=0),
|
||||
make_multilabel_classification(random_state=0),
|
||||
],
|
||||
)
|
||||
def test_tuned_threshold_classifier_no_binary(data):
|
||||
"""Check that we raise an informative error message for non-binary problem."""
|
||||
err_msg = "Only binary classification is supported."
|
||||
with pytest.raises(ValueError, match=err_msg):
|
||||
TunedThresholdClassifierCV(LogisticRegression()).fit(*data)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"params, err_type, err_msg",
|
||||
[
|
||||
(
|
||||
{"cv": "prefit", "refit": True},
|
||||
ValueError,
|
||||
"When cv='prefit', refit cannot be True.",
|
||||
),
|
||||
(
|
||||
{"cv": 10, "refit": False},
|
||||
ValueError,
|
||||
"When cv has several folds, refit cannot be False.",
|
||||
),
|
||||
(
|
||||
{"cv": "prefit", "refit": False},
|
||||
NotFittedError,
|
||||
"`estimator` must be fitted.",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_tuned_threshold_classifier_conflict_cv_refit(params, err_type, err_msg):
|
||||
"""Check that we raise an informative error message when `cv` and `refit`
|
||||
cannot be used together.
|
||||
"""
|
||||
X, y = make_classification(n_samples=100, random_state=0)
|
||||
with pytest.raises(err_type, match=err_msg):
|
||||
TunedThresholdClassifierCV(LogisticRegression(), **params).fit(X, y)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"estimator",
|
||||
[LogisticRegression(), SVC(), GradientBoostingClassifier(n_estimators=4)],
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"response_method", ["predict_proba", "predict_log_proba", "decision_function"]
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"ThresholdClassifier", [FixedThresholdClassifier, TunedThresholdClassifierCV]
|
||||
)
|
||||
def test_threshold_classifier_estimator_response_methods(
|
||||
ThresholdClassifier, estimator, response_method
|
||||
):
|
||||
"""Check that `TunedThresholdClassifierCV` exposes the same response methods as the
|
||||
underlying estimator.
|
||||
"""
|
||||
X, y = make_classification(n_samples=100, random_state=0)
|
||||
|
||||
model = ThresholdClassifier(estimator=estimator)
|
||||
assert hasattr(model, response_method) == hasattr(estimator, response_method)
|
||||
|
||||
model.fit(X, y)
|
||||
assert hasattr(model, response_method) == hasattr(estimator, response_method)
|
||||
|
||||
if hasattr(model, response_method):
|
||||
y_pred_cutoff = getattr(model, response_method)(X)
|
||||
y_pred_underlying_estimator = getattr(model.estimator_, response_method)(X)
|
||||
|
||||
assert_allclose(y_pred_cutoff, y_pred_underlying_estimator)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"response_method", ["auto", "decision_function", "predict_proba"]
|
||||
)
|
||||
def test_tuned_threshold_classifier_without_constraint_value(response_method):
|
||||
"""Check that `TunedThresholdClassifierCV` is optimizing a given objective
|
||||
metric."""
|
||||
X, y = load_breast_cancer(return_X_y=True)
|
||||
# remove feature to degrade performances
|
||||
X = X[:, :5]
|
||||
|
||||
# make the problem completely imbalanced such that the balanced accuracy is low
|
||||
indices_pos = np.flatnonzero(y == 1)
|
||||
indices_pos = indices_pos[: indices_pos.size // 50]
|
||||
indices_neg = np.flatnonzero(y == 0)
|
||||
|
||||
X = np.vstack([X[indices_neg], X[indices_pos]])
|
||||
y = np.hstack([y[indices_neg], y[indices_pos]])
|
||||
|
||||
lr = make_pipeline(StandardScaler(), LogisticRegression()).fit(X, y)
|
||||
thresholds = 100
|
||||
model = TunedThresholdClassifierCV(
|
||||
estimator=lr,
|
||||
scoring="balanced_accuracy",
|
||||
response_method=response_method,
|
||||
thresholds=thresholds,
|
||||
store_cv_results=True,
|
||||
)
|
||||
score_optimized = balanced_accuracy_score(y, model.fit(X, y).predict(X))
|
||||
score_baseline = balanced_accuracy_score(y, lr.predict(X))
|
||||
assert score_optimized > score_baseline
|
||||
assert model.cv_results_["thresholds"].shape == (thresholds,)
|
||||
assert model.cv_results_["scores"].shape == (thresholds,)
|
||||
|
||||
|
||||
def test_tuned_threshold_classifier_metric_with_parameter():
|
||||
"""Check that we can pass a metric with a parameter in addition check that
|
||||
`f_beta` with `beta=1` is equivalent to `f1` and different from `f_beta` with
|
||||
`beta=2`.
|
||||
"""
|
||||
X, y = load_breast_cancer(return_X_y=True)
|
||||
lr = make_pipeline(StandardScaler(), LogisticRegression()).fit(X, y)
|
||||
model_fbeta_1 = TunedThresholdClassifierCV(
|
||||
estimator=lr, scoring=make_scorer(fbeta_score, beta=1)
|
||||
).fit(X, y)
|
||||
model_fbeta_2 = TunedThresholdClassifierCV(
|
||||
estimator=lr, scoring=make_scorer(fbeta_score, beta=2)
|
||||
).fit(X, y)
|
||||
model_f1 = TunedThresholdClassifierCV(
|
||||
estimator=lr, scoring=make_scorer(f1_score)
|
||||
).fit(X, y)
|
||||
|
||||
assert model_fbeta_1.best_threshold_ == pytest.approx(model_f1.best_threshold_)
|
||||
assert model_fbeta_1.best_threshold_ != pytest.approx(model_fbeta_2.best_threshold_)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"response_method", ["auto", "decision_function", "predict_proba"]
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"metric",
|
||||
[
|
||||
make_scorer(balanced_accuracy_score),
|
||||
make_scorer(f1_score, pos_label="cancer"),
|
||||
],
|
||||
)
|
||||
def test_tuned_threshold_classifier_with_string_targets(response_method, metric):
|
||||
"""Check that targets represented by str are properly managed.
|
||||
Also, check with several metrics to be sure that `pos_label` is properly
|
||||
dispatched.
|
||||
"""
|
||||
X, y = load_breast_cancer(return_X_y=True)
|
||||
# Encode numeric targets by meaningful strings. We purposely designed the class
|
||||
# names such that the `pos_label` is the first alphabetically sorted class and thus
|
||||
# encoded as 0.
|
||||
classes = np.array(["cancer", "healthy"], dtype=object)
|
||||
y = classes[y]
|
||||
model = TunedThresholdClassifierCV(
|
||||
estimator=make_pipeline(StandardScaler(), LogisticRegression()),
|
||||
scoring=metric,
|
||||
response_method=response_method,
|
||||
thresholds=100,
|
||||
).fit(X, y)
|
||||
assert_array_equal(model.classes_, np.sort(classes))
|
||||
y_pred = model.predict(X)
|
||||
assert_array_equal(np.unique(y_pred), np.sort(classes))
|
||||
|
||||
|
||||
@pytest.mark.parametrize("with_sample_weight", [True, False])
|
||||
@config_context(enable_metadata_routing=True)
|
||||
def test_tuned_threshold_classifier_refit(with_sample_weight, global_random_seed):
|
||||
"""Check the behaviour of the `refit` parameter."""
|
||||
rng = np.random.RandomState(global_random_seed)
|
||||
X, y = make_classification(n_samples=100, random_state=0)
|
||||
if with_sample_weight:
|
||||
sample_weight = rng.randn(X.shape[0])
|
||||
sample_weight = np.abs(sample_weight, out=sample_weight)
|
||||
else:
|
||||
sample_weight = None
|
||||
|
||||
# check that `estimator_` if fitted on the full dataset when `refit=True`
|
||||
estimator = LogisticRegression().set_fit_request(sample_weight=True)
|
||||
model = TunedThresholdClassifierCV(estimator, refit=True).fit(
|
||||
X, y, sample_weight=sample_weight
|
||||
)
|
||||
|
||||
assert model.estimator_ is not estimator
|
||||
estimator.fit(X, y, sample_weight=sample_weight)
|
||||
assert_allclose(model.estimator_.coef_, estimator.coef_)
|
||||
assert_allclose(model.estimator_.intercept_, estimator.intercept_)
|
||||
|
||||
# check that `estimator_` was not altered when `refit=False` and `cv="prefit"`
|
||||
estimator = LogisticRegression().set_fit_request(sample_weight=True)
|
||||
estimator.fit(X, y, sample_weight=sample_weight)
|
||||
coef = estimator.coef_.copy()
|
||||
model = TunedThresholdClassifierCV(estimator, cv="prefit", refit=False).fit(
|
||||
X, y, sample_weight=sample_weight
|
||||
)
|
||||
|
||||
assert model.estimator_ is estimator
|
||||
assert_allclose(model.estimator_.coef_, coef)
|
||||
|
||||
# check that we train `estimator_` on the training split of a given cross-validation
|
||||
estimator = LogisticRegression().set_fit_request(sample_weight=True)
|
||||
cv = [
|
||||
(np.arange(50), np.arange(50, 100)),
|
||||
] # single split
|
||||
model = TunedThresholdClassifierCV(estimator, cv=cv, refit=False).fit(
|
||||
X, y, sample_weight=sample_weight
|
||||
)
|
||||
|
||||
assert model.estimator_ is not estimator
|
||||
if with_sample_weight:
|
||||
sw_train = sample_weight[cv[0][0]]
|
||||
else:
|
||||
sw_train = None
|
||||
estimator.fit(X[cv[0][0]], y[cv[0][0]], sample_weight=sw_train)
|
||||
assert_allclose(model.estimator_.coef_, estimator.coef_)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("fit_params_type", ["list", "array"])
|
||||
@config_context(enable_metadata_routing=True)
|
||||
def test_tuned_threshold_classifier_fit_params(fit_params_type):
|
||||
"""Check that we pass `fit_params` to the classifier when calling `fit`."""
|
||||
X, y = make_classification(n_samples=100, random_state=0)
|
||||
fit_params = {
|
||||
"a": _convert_container(y, fit_params_type),
|
||||
"b": _convert_container(y, fit_params_type),
|
||||
}
|
||||
|
||||
classifier = CheckingClassifier(expected_fit_params=["a", "b"], random_state=0)
|
||||
classifier.set_fit_request(a=True, b=True)
|
||||
model = TunedThresholdClassifierCV(classifier)
|
||||
model.fit(X, y, **fit_params)
|
||||
|
||||
|
||||
@config_context(enable_metadata_routing=True)
|
||||
def test_tuned_threshold_classifier_cv_zeros_sample_weights_equivalence():
|
||||
"""Check that passing removing some sample from the dataset `X` is
|
||||
equivalent to passing a `sample_weight` with a factor 0."""
|
||||
X, y = load_iris(return_X_y=True)
|
||||
# Scale the data to avoid any convergence issue
|
||||
X = StandardScaler().fit_transform(X)
|
||||
# Only use 2 classes and select samples such that 2-fold cross-validation
|
||||
# split will lead to an equivalence with a `sample_weight` of 0
|
||||
X = np.vstack((X[:40], X[50:90]))
|
||||
y = np.hstack((y[:40], y[50:90]))
|
||||
sample_weight = np.zeros_like(y)
|
||||
sample_weight[::2] = 1
|
||||
|
||||
estimator = LogisticRegression().set_fit_request(sample_weight=True)
|
||||
model_without_weights = TunedThresholdClassifierCV(estimator, cv=2)
|
||||
model_with_weights = clone(model_without_weights)
|
||||
|
||||
model_with_weights.fit(X, y, sample_weight=sample_weight)
|
||||
model_without_weights.fit(X[::2], y[::2])
|
||||
|
||||
assert_allclose(
|
||||
model_with_weights.estimator_.coef_, model_without_weights.estimator_.coef_
|
||||
)
|
||||
|
||||
y_pred_with_weights = model_with_weights.predict_proba(X)
|
||||
y_pred_without_weights = model_without_weights.predict_proba(X)
|
||||
assert_allclose(y_pred_with_weights, y_pred_without_weights)
|
||||
|
||||
|
||||
def test_tuned_threshold_classifier_thresholds_array():
|
||||
"""Check that we can pass an array to `thresholds` and it is used as candidate
|
||||
threshold internally."""
|
||||
X, y = make_classification(random_state=0)
|
||||
estimator = LogisticRegression()
|
||||
thresholds = np.linspace(0, 1, 11)
|
||||
tuned_model = TunedThresholdClassifierCV(
|
||||
estimator,
|
||||
thresholds=thresholds,
|
||||
response_method="predict_proba",
|
||||
store_cv_results=True,
|
||||
).fit(X, y)
|
||||
assert_allclose(tuned_model.cv_results_["thresholds"], thresholds)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("store_cv_results", [True, False])
|
||||
def test_tuned_threshold_classifier_store_cv_results(store_cv_results):
|
||||
"""Check that if `cv_results_` exists depending on `store_cv_results`."""
|
||||
X, y = make_classification(random_state=0)
|
||||
estimator = LogisticRegression()
|
||||
tuned_model = TunedThresholdClassifierCV(
|
||||
estimator, store_cv_results=store_cv_results
|
||||
).fit(X, y)
|
||||
if store_cv_results:
|
||||
assert hasattr(tuned_model, "cv_results_")
|
||||
else:
|
||||
assert not hasattr(tuned_model, "cv_results_")
|
||||
|
||||
|
||||
def test_tuned_threshold_classifier_cv_float():
|
||||
"""Check the behaviour when `cv` is set to a float."""
|
||||
X, y = make_classification(random_state=0)
|
||||
|
||||
# case where `refit=False` and cv is a float: the underlying estimator will be fit
|
||||
# on the training set given by a ShuffleSplit. We check that we get the same model
|
||||
# coefficients.
|
||||
test_size = 0.3
|
||||
estimator = LogisticRegression()
|
||||
tuned_model = TunedThresholdClassifierCV(
|
||||
estimator, cv=test_size, refit=False, random_state=0
|
||||
).fit(X, y)
|
||||
tuned_model.fit(X, y)
|
||||
|
||||
cv = StratifiedShuffleSplit(n_splits=1, test_size=test_size, random_state=0)
|
||||
train_idx, val_idx = next(cv.split(X, y))
|
||||
cloned_estimator = clone(estimator).fit(X[train_idx], y[train_idx])
|
||||
|
||||
assert_allclose(tuned_model.estimator_.coef_, cloned_estimator.coef_)
|
||||
|
||||
# case where `refit=True`, then the underlying estimator is fitted on the full
|
||||
# dataset.
|
||||
tuned_model.set_params(refit=True).fit(X, y)
|
||||
cloned_estimator = clone(estimator).fit(X, y)
|
||||
|
||||
assert_allclose(tuned_model.estimator_.coef_, cloned_estimator.coef_)
|
||||
|
||||
|
||||
def test_tuned_threshold_classifier_error_constant_predictor():
|
||||
"""Check that we raise a ValueError if the underlying classifier returns constant
|
||||
probabilities such that we cannot find any threshold.
|
||||
"""
|
||||
X, y = make_classification(random_state=0)
|
||||
estimator = DummyClassifier(strategy="constant", constant=1)
|
||||
tuned_model = TunedThresholdClassifierCV(estimator, response_method="predict_proba")
|
||||
err_msg = "The provided estimator makes constant predictions"
|
||||
with pytest.raises(ValueError, match=err_msg):
|
||||
tuned_model.fit(X, y)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"response_method", ["auto", "predict_proba", "decision_function"]
|
||||
)
|
||||
def test_fixed_threshold_classifier_equivalence_default(response_method):
|
||||
"""Check that `FixedThresholdClassifier` has the same behaviour as the vanilla
|
||||
classifier.
|
||||
"""
|
||||
X, y = make_classification(random_state=0)
|
||||
classifier = LogisticRegression().fit(X, y)
|
||||
classifier_default_threshold = FixedThresholdClassifier(
|
||||
estimator=clone(classifier), response_method=response_method
|
||||
)
|
||||
classifier_default_threshold.fit(X, y)
|
||||
|
||||
# emulate the response method that should take into account the `pos_label`
|
||||
if response_method in ("auto", "predict_proba"):
|
||||
y_score = classifier_default_threshold.predict_proba(X)[:, 1]
|
||||
threshold = 0.5
|
||||
else: # response_method == "decision_function"
|
||||
y_score = classifier_default_threshold.decision_function(X)
|
||||
threshold = 0.0
|
||||
|
||||
y_pred_lr = (y_score >= threshold).astype(int)
|
||||
assert_allclose(classifier_default_threshold.predict(X), y_pred_lr)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"response_method, threshold", [("predict_proba", 0.7), ("decision_function", 2.0)]
|
||||
)
|
||||
@pytest.mark.parametrize("pos_label", [0, 1])
|
||||
def test_fixed_threshold_classifier(response_method, threshold, pos_label):
|
||||
"""Check that applying `predict` lead to the same prediction as applying the
|
||||
threshold to the output of the response method.
|
||||
"""
|
||||
X, y = make_classification(n_samples=50, random_state=0)
|
||||
logistic_regression = LogisticRegression().fit(X, y)
|
||||
model = FixedThresholdClassifier(
|
||||
estimator=clone(logistic_regression),
|
||||
threshold=threshold,
|
||||
response_method=response_method,
|
||||
pos_label=pos_label,
|
||||
).fit(X, y)
|
||||
|
||||
# check that the underlying estimator is the same
|
||||
assert_allclose(model.estimator_.coef_, logistic_regression.coef_)
|
||||
|
||||
# emulate the response method that should take into account the `pos_label`
|
||||
if response_method == "predict_proba":
|
||||
y_score = model.predict_proba(X)[:, pos_label]
|
||||
else: # response_method == "decision_function"
|
||||
y_score = model.decision_function(X)
|
||||
y_score = y_score if pos_label == 1 else -y_score
|
||||
|
||||
# create a mapping from boolean values to class labels
|
||||
map_to_label = np.array([0, 1]) if pos_label == 1 else np.array([1, 0])
|
||||
y_pred_lr = map_to_label[(y_score >= threshold).astype(int)]
|
||||
assert_allclose(model.predict(X), y_pred_lr)
|
||||
|
||||
for method in ("predict_proba", "predict_log_proba", "decision_function"):
|
||||
assert_allclose(
|
||||
getattr(model, method)(X), getattr(logistic_regression, method)(X)
|
||||
)
|
||||
assert_allclose(
|
||||
getattr(model.estimator_, method)(X),
|
||||
getattr(logistic_regression, method)(X),
|
||||
)
|
||||
|
||||
|
||||
@config_context(enable_metadata_routing=True)
|
||||
def test_fixed_threshold_classifier_metadata_routing():
|
||||
"""Check that everything works with metadata routing."""
|
||||
X, y = make_classification(random_state=0)
|
||||
sample_weight = np.ones_like(y)
|
||||
sample_weight[::2] = 2
|
||||
classifier = LogisticRegression().set_fit_request(sample_weight=True)
|
||||
classifier.fit(X, y, sample_weight=sample_weight)
|
||||
classifier_default_threshold = FixedThresholdClassifier(estimator=clone(classifier))
|
||||
classifier_default_threshold.fit(X, y, sample_weight=sample_weight)
|
||||
assert_allclose(classifier_default_threshold.estimator_.coef_, classifier.coef_)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"method", ["predict_proba", "decision_function", "predict", "predict_log_proba"]
|
||||
)
|
||||
def test_fixed_threshold_classifier_fitted_estimator(method):
|
||||
"""Check that if the underlying estimator is already fitted, no fit is required."""
|
||||
X, y = make_classification(random_state=0)
|
||||
classifier = LogisticRegression().fit(X, y)
|
||||
fixed_threshold_classifier = FixedThresholdClassifier(estimator=classifier)
|
||||
# This should not raise an error
|
||||
getattr(fixed_threshold_classifier, method)(X)
|
||||
|
||||
|
||||
def test_fixed_threshold_classifier_classes_():
|
||||
"""Check that the classes_ attribute is properly set."""
|
||||
X, y = make_classification(random_state=0)
|
||||
with pytest.raises(
|
||||
AttributeError, match="The underlying estimator is not fitted yet."
|
||||
):
|
||||
FixedThresholdClassifier(estimator=LogisticRegression()).classes_
|
||||
|
||||
classifier = LogisticRegression().fit(X, y)
|
||||
fixed_threshold_classifier = FixedThresholdClassifier(estimator=classifier)
|
||||
assert_array_equal(fixed_threshold_classifier.classes_, classifier.classes_)
|
||||
@@ -0,0 +1,572 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from sklearn.datasets import load_iris
|
||||
from sklearn.model_selection import (
|
||||
LearningCurveDisplay,
|
||||
ValidationCurveDisplay,
|
||||
learning_curve,
|
||||
validation_curve,
|
||||
)
|
||||
from sklearn.tree import DecisionTreeClassifier
|
||||
from sklearn.utils import shuffle
|
||||
from sklearn.utils._testing import assert_allclose, assert_array_equal
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def data():
|
||||
return shuffle(*load_iris(return_X_y=True), random_state=0)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"params, err_type, err_msg",
|
||||
[
|
||||
({"std_display_style": "invalid"}, ValueError, "Unknown std_display_style:"),
|
||||
({"score_type": "invalid"}, ValueError, "Unknown score_type:"),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"CurveDisplay, specific_params",
|
||||
[
|
||||
(ValidationCurveDisplay, {"param_name": "max_depth", "param_range": [1, 3, 5]}),
|
||||
(LearningCurveDisplay, {"train_sizes": [0.3, 0.6, 0.9]}),
|
||||
],
|
||||
)
|
||||
def test_curve_display_parameters_validation(
|
||||
pyplot, data, params, err_type, err_msg, CurveDisplay, specific_params
|
||||
):
|
||||
"""Check that we raise a proper error when passing invalid parameters."""
|
||||
X, y = data
|
||||
estimator = DecisionTreeClassifier(random_state=0)
|
||||
|
||||
with pytest.raises(err_type, match=err_msg):
|
||||
CurveDisplay.from_estimator(estimator, X, y, **specific_params, **params)
|
||||
|
||||
|
||||
def test_learning_curve_display_default_usage(pyplot, data):
|
||||
"""Check the default usage of the LearningCurveDisplay class."""
|
||||
X, y = data
|
||||
estimator = DecisionTreeClassifier(random_state=0)
|
||||
|
||||
train_sizes = [0.3, 0.6, 0.9]
|
||||
display = LearningCurveDisplay.from_estimator(
|
||||
estimator, X, y, train_sizes=train_sizes
|
||||
)
|
||||
|
||||
import matplotlib as mpl
|
||||
|
||||
assert display.errorbar_ is None
|
||||
|
||||
assert isinstance(display.lines_, list)
|
||||
for line in display.lines_:
|
||||
assert isinstance(line, mpl.lines.Line2D)
|
||||
|
||||
assert isinstance(display.fill_between_, list)
|
||||
for fill in display.fill_between_:
|
||||
assert isinstance(fill, mpl.collections.PolyCollection)
|
||||
assert fill.get_alpha() == 0.5
|
||||
|
||||
assert display.score_name == "Score"
|
||||
assert display.ax_.get_xlabel() == "Number of samples in the training set"
|
||||
assert display.ax_.get_ylabel() == "Score"
|
||||
|
||||
_, legend_labels = display.ax_.get_legend_handles_labels()
|
||||
assert legend_labels == ["Train", "Test"]
|
||||
|
||||
train_sizes_abs, train_scores, test_scores = learning_curve(
|
||||
estimator, X, y, train_sizes=train_sizes
|
||||
)
|
||||
|
||||
assert_array_equal(display.train_sizes, train_sizes_abs)
|
||||
assert_allclose(display.train_scores, train_scores)
|
||||
assert_allclose(display.test_scores, test_scores)
|
||||
|
||||
|
||||
def test_validation_curve_display_default_usage(pyplot, data):
|
||||
"""Check the default usage of the ValidationCurveDisplay class."""
|
||||
X, y = data
|
||||
estimator = DecisionTreeClassifier(random_state=0)
|
||||
|
||||
param_name, param_range = "max_depth", [1, 3, 5]
|
||||
display = ValidationCurveDisplay.from_estimator(
|
||||
estimator, X, y, param_name=param_name, param_range=param_range
|
||||
)
|
||||
|
||||
import matplotlib as mpl
|
||||
|
||||
assert display.errorbar_ is None
|
||||
|
||||
assert isinstance(display.lines_, list)
|
||||
for line in display.lines_:
|
||||
assert isinstance(line, mpl.lines.Line2D)
|
||||
|
||||
assert isinstance(display.fill_between_, list)
|
||||
for fill in display.fill_between_:
|
||||
assert isinstance(fill, mpl.collections.PolyCollection)
|
||||
assert fill.get_alpha() == 0.5
|
||||
|
||||
assert display.score_name == "Score"
|
||||
assert display.ax_.get_xlabel() == f"{param_name}"
|
||||
assert display.ax_.get_ylabel() == "Score"
|
||||
|
||||
_, legend_labels = display.ax_.get_legend_handles_labels()
|
||||
assert legend_labels == ["Train", "Test"]
|
||||
|
||||
train_scores, test_scores = validation_curve(
|
||||
estimator, X, y, param_name=param_name, param_range=param_range
|
||||
)
|
||||
|
||||
assert_array_equal(display.param_range, param_range)
|
||||
assert_allclose(display.train_scores, train_scores)
|
||||
assert_allclose(display.test_scores, test_scores)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"CurveDisplay, specific_params",
|
||||
[
|
||||
(ValidationCurveDisplay, {"param_name": "max_depth", "param_range": [1, 3, 5]}),
|
||||
(LearningCurveDisplay, {"train_sizes": [0.3, 0.6, 0.9]}),
|
||||
],
|
||||
)
|
||||
def test_curve_display_negate_score(pyplot, data, CurveDisplay, specific_params):
|
||||
"""Check the behaviour of the `negate_score` parameter calling `from_estimator` and
|
||||
`plot`.
|
||||
"""
|
||||
X, y = data
|
||||
estimator = DecisionTreeClassifier(max_depth=1, random_state=0)
|
||||
|
||||
negate_score = False
|
||||
display = CurveDisplay.from_estimator(
|
||||
estimator, X, y, **specific_params, negate_score=negate_score
|
||||
)
|
||||
|
||||
positive_scores = display.lines_[0].get_data()[1]
|
||||
assert (positive_scores >= 0).all()
|
||||
assert display.ax_.get_ylabel() == "Score"
|
||||
|
||||
negate_score = True
|
||||
display = CurveDisplay.from_estimator(
|
||||
estimator, X, y, **specific_params, negate_score=negate_score
|
||||
)
|
||||
|
||||
negative_scores = display.lines_[0].get_data()[1]
|
||||
assert (negative_scores <= 0).all()
|
||||
assert_allclose(negative_scores, -positive_scores)
|
||||
assert display.ax_.get_ylabel() == "Negative score"
|
||||
|
||||
negate_score = False
|
||||
display = CurveDisplay.from_estimator(
|
||||
estimator, X, y, **specific_params, negate_score=negate_score
|
||||
)
|
||||
assert display.ax_.get_ylabel() == "Score"
|
||||
display.plot(negate_score=not negate_score)
|
||||
assert display.ax_.get_ylabel() == "Score"
|
||||
assert (display.lines_[0].get_data()[1] < 0).all()
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"score_name, ylabel", [(None, "Score"), ("Accuracy", "Accuracy")]
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"CurveDisplay, specific_params",
|
||||
[
|
||||
(ValidationCurveDisplay, {"param_name": "max_depth", "param_range": [1, 3, 5]}),
|
||||
(LearningCurveDisplay, {"train_sizes": [0.3, 0.6, 0.9]}),
|
||||
],
|
||||
)
|
||||
def test_curve_display_score_name(
|
||||
pyplot, data, score_name, ylabel, CurveDisplay, specific_params
|
||||
):
|
||||
"""Check that we can overwrite the default score name shown on the y-axis."""
|
||||
X, y = data
|
||||
estimator = DecisionTreeClassifier(random_state=0)
|
||||
|
||||
display = CurveDisplay.from_estimator(
|
||||
estimator, X, y, **specific_params, score_name=score_name
|
||||
)
|
||||
|
||||
assert display.ax_.get_ylabel() == ylabel
|
||||
X, y = data
|
||||
estimator = DecisionTreeClassifier(max_depth=1, random_state=0)
|
||||
|
||||
display = CurveDisplay.from_estimator(
|
||||
estimator, X, y, **specific_params, score_name=score_name
|
||||
)
|
||||
|
||||
assert display.score_name == ylabel
|
||||
|
||||
|
||||
@pytest.mark.parametrize("std_display_style", (None, "errorbar"))
|
||||
def test_learning_curve_display_score_type(pyplot, data, std_display_style):
|
||||
"""Check the behaviour of setting the `score_type` parameter."""
|
||||
X, y = data
|
||||
estimator = DecisionTreeClassifier(random_state=0)
|
||||
|
||||
train_sizes = [0.3, 0.6, 0.9]
|
||||
train_sizes_abs, train_scores, test_scores = learning_curve(
|
||||
estimator, X, y, train_sizes=train_sizes
|
||||
)
|
||||
|
||||
score_type = "train"
|
||||
display = LearningCurveDisplay.from_estimator(
|
||||
estimator,
|
||||
X,
|
||||
y,
|
||||
train_sizes=train_sizes,
|
||||
score_type=score_type,
|
||||
std_display_style=std_display_style,
|
||||
)
|
||||
|
||||
_, legend_label = display.ax_.get_legend_handles_labels()
|
||||
assert legend_label == ["Train"]
|
||||
|
||||
if std_display_style is None:
|
||||
assert len(display.lines_) == 1
|
||||
assert display.errorbar_ is None
|
||||
x_data, y_data = display.lines_[0].get_data()
|
||||
else:
|
||||
assert display.lines_ is None
|
||||
assert len(display.errorbar_) == 1
|
||||
x_data, y_data = display.errorbar_[0].lines[0].get_data()
|
||||
|
||||
assert_array_equal(x_data, train_sizes_abs)
|
||||
assert_allclose(y_data, train_scores.mean(axis=1))
|
||||
|
||||
score_type = "test"
|
||||
display = LearningCurveDisplay.from_estimator(
|
||||
estimator,
|
||||
X,
|
||||
y,
|
||||
train_sizes=train_sizes,
|
||||
score_type=score_type,
|
||||
std_display_style=std_display_style,
|
||||
)
|
||||
|
||||
_, legend_label = display.ax_.get_legend_handles_labels()
|
||||
assert legend_label == ["Test"]
|
||||
|
||||
if std_display_style is None:
|
||||
assert len(display.lines_) == 1
|
||||
assert display.errorbar_ is None
|
||||
x_data, y_data = display.lines_[0].get_data()
|
||||
else:
|
||||
assert display.lines_ is None
|
||||
assert len(display.errorbar_) == 1
|
||||
x_data, y_data = display.errorbar_[0].lines[0].get_data()
|
||||
|
||||
assert_array_equal(x_data, train_sizes_abs)
|
||||
assert_allclose(y_data, test_scores.mean(axis=1))
|
||||
|
||||
score_type = "both"
|
||||
display = LearningCurveDisplay.from_estimator(
|
||||
estimator,
|
||||
X,
|
||||
y,
|
||||
train_sizes=train_sizes,
|
||||
score_type=score_type,
|
||||
std_display_style=std_display_style,
|
||||
)
|
||||
|
||||
_, legend_label = display.ax_.get_legend_handles_labels()
|
||||
assert legend_label == ["Train", "Test"]
|
||||
|
||||
if std_display_style is None:
|
||||
assert len(display.lines_) == 2
|
||||
assert display.errorbar_ is None
|
||||
x_data_train, y_data_train = display.lines_[0].get_data()
|
||||
x_data_test, y_data_test = display.lines_[1].get_data()
|
||||
else:
|
||||
assert display.lines_ is None
|
||||
assert len(display.errorbar_) == 2
|
||||
x_data_train, y_data_train = display.errorbar_[0].lines[0].get_data()
|
||||
x_data_test, y_data_test = display.errorbar_[1].lines[0].get_data()
|
||||
|
||||
assert_array_equal(x_data_train, train_sizes_abs)
|
||||
assert_allclose(y_data_train, train_scores.mean(axis=1))
|
||||
assert_array_equal(x_data_test, train_sizes_abs)
|
||||
assert_allclose(y_data_test, test_scores.mean(axis=1))
|
||||
|
||||
|
||||
@pytest.mark.parametrize("std_display_style", (None, "errorbar"))
|
||||
def test_validation_curve_display_score_type(pyplot, data, std_display_style):
|
||||
"""Check the behaviour of setting the `score_type` parameter."""
|
||||
X, y = data
|
||||
estimator = DecisionTreeClassifier(random_state=0)
|
||||
|
||||
param_name, param_range = "max_depth", [1, 3, 5]
|
||||
train_scores, test_scores = validation_curve(
|
||||
estimator, X, y, param_name=param_name, param_range=param_range
|
||||
)
|
||||
|
||||
score_type = "train"
|
||||
display = ValidationCurveDisplay.from_estimator(
|
||||
estimator,
|
||||
X,
|
||||
y,
|
||||
param_name=param_name,
|
||||
param_range=param_range,
|
||||
score_type=score_type,
|
||||
std_display_style=std_display_style,
|
||||
)
|
||||
|
||||
_, legend_label = display.ax_.get_legend_handles_labels()
|
||||
assert legend_label == ["Train"]
|
||||
|
||||
if std_display_style is None:
|
||||
assert len(display.lines_) == 1
|
||||
assert display.errorbar_ is None
|
||||
x_data, y_data = display.lines_[0].get_data()
|
||||
else:
|
||||
assert display.lines_ is None
|
||||
assert len(display.errorbar_) == 1
|
||||
x_data, y_data = display.errorbar_[0].lines[0].get_data()
|
||||
|
||||
assert_array_equal(x_data, param_range)
|
||||
assert_allclose(y_data, train_scores.mean(axis=1))
|
||||
|
||||
score_type = "test"
|
||||
display = ValidationCurveDisplay.from_estimator(
|
||||
estimator,
|
||||
X,
|
||||
y,
|
||||
param_name=param_name,
|
||||
param_range=param_range,
|
||||
score_type=score_type,
|
||||
std_display_style=std_display_style,
|
||||
)
|
||||
|
||||
_, legend_label = display.ax_.get_legend_handles_labels()
|
||||
assert legend_label == ["Test"]
|
||||
|
||||
if std_display_style is None:
|
||||
assert len(display.lines_) == 1
|
||||
assert display.errorbar_ is None
|
||||
x_data, y_data = display.lines_[0].get_data()
|
||||
else:
|
||||
assert display.lines_ is None
|
||||
assert len(display.errorbar_) == 1
|
||||
x_data, y_data = display.errorbar_[0].lines[0].get_data()
|
||||
|
||||
assert_array_equal(x_data, param_range)
|
||||
assert_allclose(y_data, test_scores.mean(axis=1))
|
||||
|
||||
score_type = "both"
|
||||
display = ValidationCurveDisplay.from_estimator(
|
||||
estimator,
|
||||
X,
|
||||
y,
|
||||
param_name=param_name,
|
||||
param_range=param_range,
|
||||
score_type=score_type,
|
||||
std_display_style=std_display_style,
|
||||
)
|
||||
|
||||
_, legend_label = display.ax_.get_legend_handles_labels()
|
||||
assert legend_label == ["Train", "Test"]
|
||||
|
||||
if std_display_style is None:
|
||||
assert len(display.lines_) == 2
|
||||
assert display.errorbar_ is None
|
||||
x_data_train, y_data_train = display.lines_[0].get_data()
|
||||
x_data_test, y_data_test = display.lines_[1].get_data()
|
||||
else:
|
||||
assert display.lines_ is None
|
||||
assert len(display.errorbar_) == 2
|
||||
x_data_train, y_data_train = display.errorbar_[0].lines[0].get_data()
|
||||
x_data_test, y_data_test = display.errorbar_[1].lines[0].get_data()
|
||||
|
||||
assert_array_equal(x_data_train, param_range)
|
||||
assert_allclose(y_data_train, train_scores.mean(axis=1))
|
||||
assert_array_equal(x_data_test, param_range)
|
||||
assert_allclose(y_data_test, test_scores.mean(axis=1))
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"CurveDisplay, specific_params, expected_xscale",
|
||||
[
|
||||
(
|
||||
ValidationCurveDisplay,
|
||||
{"param_name": "max_depth", "param_range": np.arange(1, 5)},
|
||||
"linear",
|
||||
),
|
||||
(LearningCurveDisplay, {"train_sizes": np.linspace(0.1, 0.9, num=5)}, "linear"),
|
||||
(
|
||||
ValidationCurveDisplay,
|
||||
{
|
||||
"param_name": "max_depth",
|
||||
"param_range": np.round(np.logspace(0, 2, num=5)).astype(np.int64),
|
||||
},
|
||||
"log",
|
||||
),
|
||||
(LearningCurveDisplay, {"train_sizes": np.logspace(-1, 0, num=5)}, "log"),
|
||||
],
|
||||
)
|
||||
def test_curve_display_xscale_auto(
|
||||
pyplot, data, CurveDisplay, specific_params, expected_xscale
|
||||
):
|
||||
"""Check the behaviour of the x-axis scaling depending on the data provided."""
|
||||
X, y = data
|
||||
estimator = DecisionTreeClassifier(random_state=0)
|
||||
|
||||
display = CurveDisplay.from_estimator(estimator, X, y, **specific_params)
|
||||
assert display.ax_.get_xscale() == expected_xscale
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"CurveDisplay, specific_params",
|
||||
[
|
||||
(ValidationCurveDisplay, {"param_name": "max_depth", "param_range": [1, 3, 5]}),
|
||||
(LearningCurveDisplay, {"train_sizes": [0.3, 0.6, 0.9]}),
|
||||
],
|
||||
)
|
||||
def test_curve_display_std_display_style(pyplot, data, CurveDisplay, specific_params):
|
||||
"""Check the behaviour of the parameter `std_display_style`."""
|
||||
X, y = data
|
||||
estimator = DecisionTreeClassifier(random_state=0)
|
||||
|
||||
import matplotlib as mpl
|
||||
|
||||
std_display_style = None
|
||||
display = CurveDisplay.from_estimator(
|
||||
estimator,
|
||||
X,
|
||||
y,
|
||||
**specific_params,
|
||||
std_display_style=std_display_style,
|
||||
)
|
||||
|
||||
assert len(display.lines_) == 2
|
||||
for line in display.lines_:
|
||||
assert isinstance(line, mpl.lines.Line2D)
|
||||
assert display.errorbar_ is None
|
||||
assert display.fill_between_ is None
|
||||
_, legend_label = display.ax_.get_legend_handles_labels()
|
||||
assert len(legend_label) == 2
|
||||
|
||||
std_display_style = "fill_between"
|
||||
display = CurveDisplay.from_estimator(
|
||||
estimator,
|
||||
X,
|
||||
y,
|
||||
**specific_params,
|
||||
std_display_style=std_display_style,
|
||||
)
|
||||
|
||||
assert len(display.lines_) == 2
|
||||
for line in display.lines_:
|
||||
assert isinstance(line, mpl.lines.Line2D)
|
||||
assert display.errorbar_ is None
|
||||
assert len(display.fill_between_) == 2
|
||||
for fill_between in display.fill_between_:
|
||||
assert isinstance(fill_between, mpl.collections.PolyCollection)
|
||||
_, legend_label = display.ax_.get_legend_handles_labels()
|
||||
assert len(legend_label) == 2
|
||||
|
||||
std_display_style = "errorbar"
|
||||
display = CurveDisplay.from_estimator(
|
||||
estimator,
|
||||
X,
|
||||
y,
|
||||
**specific_params,
|
||||
std_display_style=std_display_style,
|
||||
)
|
||||
|
||||
assert display.lines_ is None
|
||||
assert len(display.errorbar_) == 2
|
||||
for errorbar in display.errorbar_:
|
||||
assert isinstance(errorbar, mpl.container.ErrorbarContainer)
|
||||
assert display.fill_between_ is None
|
||||
_, legend_label = display.ax_.get_legend_handles_labels()
|
||||
assert len(legend_label) == 2
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"CurveDisplay, specific_params",
|
||||
[
|
||||
(ValidationCurveDisplay, {"param_name": "max_depth", "param_range": [1, 3, 5]}),
|
||||
(LearningCurveDisplay, {"train_sizes": [0.3, 0.6, 0.9]}),
|
||||
],
|
||||
)
|
||||
def test_curve_display_plot_kwargs(pyplot, data, CurveDisplay, specific_params):
|
||||
"""Check the behaviour of the different plotting keyword arguments: `line_kw`,
|
||||
`fill_between_kw`, and `errorbar_kw`."""
|
||||
X, y = data
|
||||
estimator = DecisionTreeClassifier(random_state=0)
|
||||
|
||||
std_display_style = "fill_between"
|
||||
line_kw = {"color": "red"}
|
||||
fill_between_kw = {"color": "red", "alpha": 1.0}
|
||||
display = CurveDisplay.from_estimator(
|
||||
estimator,
|
||||
X,
|
||||
y,
|
||||
**specific_params,
|
||||
std_display_style=std_display_style,
|
||||
line_kw=line_kw,
|
||||
fill_between_kw=fill_between_kw,
|
||||
)
|
||||
|
||||
assert display.lines_[0].get_color() == "red"
|
||||
assert_allclose(
|
||||
display.fill_between_[0].get_facecolor(),
|
||||
[[1.0, 0.0, 0.0, 1.0]], # trust me, it's red
|
||||
)
|
||||
|
||||
std_display_style = "errorbar"
|
||||
errorbar_kw = {"color": "red"}
|
||||
display = CurveDisplay.from_estimator(
|
||||
estimator,
|
||||
X,
|
||||
y,
|
||||
**specific_params,
|
||||
std_display_style=std_display_style,
|
||||
errorbar_kw=errorbar_kw,
|
||||
)
|
||||
|
||||
assert display.errorbar_[0].lines[0].get_color() == "red"
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"param_range, xscale",
|
||||
[([5, 10, 15], "linear"), ([-50, 5, 50, 500], "symlog"), ([5, 50, 500], "log")],
|
||||
)
|
||||
def test_validation_curve_xscale_from_param_range_provided_as_a_list(
|
||||
pyplot, data, param_range, xscale
|
||||
):
|
||||
"""Check the induced xscale from the provided param_range values."""
|
||||
X, y = data
|
||||
estimator = DecisionTreeClassifier(random_state=0)
|
||||
|
||||
param_name = "max_depth"
|
||||
display = ValidationCurveDisplay.from_estimator(
|
||||
estimator,
|
||||
X,
|
||||
y,
|
||||
param_name=param_name,
|
||||
param_range=param_range,
|
||||
)
|
||||
|
||||
assert display.ax_.get_xscale() == xscale
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"Display, params",
|
||||
[
|
||||
(LearningCurveDisplay, {}),
|
||||
(ValidationCurveDisplay, {"param_name": "max_depth", "param_range": [1, 3, 5]}),
|
||||
],
|
||||
)
|
||||
def test_subclassing_displays(pyplot, data, Display, params):
|
||||
"""Check that named constructors return the correct type when subclassed.
|
||||
|
||||
Non-regression test for:
|
||||
https://github.com/scikit-learn/scikit-learn/pull/27675
|
||||
"""
|
||||
X, y = data
|
||||
estimator = DecisionTreeClassifier(random_state=0)
|
||||
|
||||
class SubclassOfDisplay(Display):
|
||||
pass
|
||||
|
||||
display = SubclassOfDisplay.from_estimator(estimator, X, y, **params)
|
||||
assert isinstance(display, SubclassOfDisplay)
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,853 @@
|
||||
from math import ceil
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
from scipy.stats import expon, norm, randint
|
||||
|
||||
from sklearn.datasets import make_classification
|
||||
from sklearn.dummy import DummyClassifier
|
||||
from sklearn.experimental import enable_halving_search_cv # noqa: F401
|
||||
from sklearn.model_selection import (
|
||||
GroupKFold,
|
||||
GroupShuffleSplit,
|
||||
HalvingGridSearchCV,
|
||||
HalvingRandomSearchCV,
|
||||
KFold,
|
||||
LeaveOneGroupOut,
|
||||
LeavePGroupsOut,
|
||||
ShuffleSplit,
|
||||
StratifiedKFold,
|
||||
StratifiedShuffleSplit,
|
||||
)
|
||||
from sklearn.model_selection._search_successive_halving import (
|
||||
_SubsampleMetaSplitter,
|
||||
_top_k,
|
||||
)
|
||||
from sklearn.model_selection.tests.test_search import (
|
||||
check_cv_results_array_types,
|
||||
check_cv_results_keys,
|
||||
)
|
||||
from sklearn.svm import SVC, LinearSVC
|
||||
|
||||
|
||||
class FastClassifier(DummyClassifier):
|
||||
"""Dummy classifier that accepts parameters a, b, ... z.
|
||||
|
||||
These parameter don't affect the predictions and are useful for fast
|
||||
grid searching."""
|
||||
|
||||
# update the constraints such that we accept all parameters from a to z
|
||||
_parameter_constraints: dict = {
|
||||
**DummyClassifier._parameter_constraints,
|
||||
**{chr(key): "no_validation" for key in range(ord("a"), ord("z") + 1)},
|
||||
}
|
||||
|
||||
def __init__(
|
||||
self, strategy="stratified", random_state=None, constant=None, **kwargs
|
||||
):
|
||||
super().__init__(
|
||||
strategy=strategy, random_state=random_state, constant=constant
|
||||
)
|
||||
|
||||
def get_params(self, deep=False):
|
||||
params = super().get_params(deep=deep)
|
||||
for char in range(ord("a"), ord("z") + 1):
|
||||
params[chr(char)] = "whatever"
|
||||
return params
|
||||
|
||||
|
||||
class SometimesFailClassifier(DummyClassifier):
|
||||
def __init__(
|
||||
self,
|
||||
strategy="stratified",
|
||||
random_state=None,
|
||||
constant=None,
|
||||
n_estimators=10,
|
||||
fail_fit=False,
|
||||
fail_predict=False,
|
||||
a=0,
|
||||
):
|
||||
self.fail_fit = fail_fit
|
||||
self.fail_predict = fail_predict
|
||||
self.n_estimators = n_estimators
|
||||
self.a = a
|
||||
|
||||
super().__init__(
|
||||
strategy=strategy, random_state=random_state, constant=constant
|
||||
)
|
||||
|
||||
def fit(self, X, y):
|
||||
if self.fail_fit:
|
||||
raise Exception("fitting failed")
|
||||
return super().fit(X, y)
|
||||
|
||||
def predict(self, X):
|
||||
if self.fail_predict:
|
||||
raise Exception("predict failed")
|
||||
return super().predict(X)
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore::sklearn.exceptions.FitFailedWarning")
|
||||
@pytest.mark.filterwarnings("ignore:Scoring failed:UserWarning")
|
||||
@pytest.mark.filterwarnings("ignore:One or more of the:UserWarning")
|
||||
@pytest.mark.parametrize("HalvingSearch", (HalvingGridSearchCV, HalvingRandomSearchCV))
|
||||
@pytest.mark.parametrize("fail_at", ("fit", "predict"))
|
||||
def test_nan_handling(HalvingSearch, fail_at):
|
||||
"""Check the selection of the best scores in presence of failure represented by
|
||||
NaN values."""
|
||||
n_samples = 1_000
|
||||
X, y = make_classification(n_samples=n_samples, random_state=0)
|
||||
|
||||
search = HalvingSearch(
|
||||
SometimesFailClassifier(),
|
||||
{f"fail_{fail_at}": [False, True], "a": range(3)},
|
||||
resource="n_estimators",
|
||||
max_resources=6,
|
||||
min_resources=1,
|
||||
factor=2,
|
||||
)
|
||||
|
||||
search.fit(X, y)
|
||||
|
||||
# estimators that failed during fit/predict should always rank lower
|
||||
# than ones where the fit/predict succeeded
|
||||
assert not search.best_params_[f"fail_{fail_at}"]
|
||||
scores = search.cv_results_["mean_test_score"]
|
||||
ranks = search.cv_results_["rank_test_score"]
|
||||
|
||||
# some scores should be NaN
|
||||
assert np.isnan(scores).any()
|
||||
|
||||
unique_nan_ranks = np.unique(ranks[np.isnan(scores)])
|
||||
# all NaN scores should have the same rank
|
||||
assert unique_nan_ranks.shape[0] == 1
|
||||
# NaNs should have the lowest rank
|
||||
assert (unique_nan_ranks[0] >= ranks).all()
|
||||
|
||||
|
||||
@pytest.mark.parametrize("Est", (HalvingGridSearchCV, HalvingRandomSearchCV))
|
||||
@pytest.mark.parametrize(
|
||||
(
|
||||
"aggressive_elimination,"
|
||||
"max_resources,"
|
||||
"expected_n_iterations,"
|
||||
"expected_n_required_iterations,"
|
||||
"expected_n_possible_iterations,"
|
||||
"expected_n_remaining_candidates,"
|
||||
"expected_n_candidates,"
|
||||
"expected_n_resources,"
|
||||
),
|
||||
[
|
||||
# notice how it loops at the beginning
|
||||
# also, the number of candidates evaluated at the last iteration is
|
||||
# <= factor
|
||||
(True, "limited", 4, 4, 3, 1, [60, 20, 7, 3], [20, 20, 60, 180]),
|
||||
# no aggressive elimination: we end up with less iterations, and
|
||||
# the number of candidates at the last iter is > factor, which isn't
|
||||
# ideal
|
||||
(False, "limited", 3, 4, 3, 3, [60, 20, 7], [20, 60, 180]),
|
||||
# # When the amount of resource isn't limited, aggressive_elimination
|
||||
# # has no effect. Here the default min_resources='exhaust' will take
|
||||
# # over.
|
||||
(True, "unlimited", 4, 4, 4, 1, [60, 20, 7, 3], [37, 111, 333, 999]),
|
||||
(False, "unlimited", 4, 4, 4, 1, [60, 20, 7, 3], [37, 111, 333, 999]),
|
||||
],
|
||||
)
|
||||
def test_aggressive_elimination(
|
||||
Est,
|
||||
aggressive_elimination,
|
||||
max_resources,
|
||||
expected_n_iterations,
|
||||
expected_n_required_iterations,
|
||||
expected_n_possible_iterations,
|
||||
expected_n_remaining_candidates,
|
||||
expected_n_candidates,
|
||||
expected_n_resources,
|
||||
):
|
||||
# Test the aggressive_elimination parameter.
|
||||
|
||||
n_samples = 1000
|
||||
X, y = make_classification(n_samples=n_samples, random_state=0)
|
||||
param_grid = {"a": ("l1", "l2"), "b": list(range(30))}
|
||||
base_estimator = FastClassifier()
|
||||
|
||||
if max_resources == "limited":
|
||||
max_resources = 180
|
||||
else:
|
||||
max_resources = n_samples
|
||||
|
||||
sh = Est(
|
||||
base_estimator,
|
||||
param_grid,
|
||||
aggressive_elimination=aggressive_elimination,
|
||||
max_resources=max_resources,
|
||||
factor=3,
|
||||
)
|
||||
sh.set_params(verbose=True) # just for test coverage
|
||||
|
||||
if Est is HalvingRandomSearchCV:
|
||||
# same number of candidates as with the grid
|
||||
sh.set_params(n_candidates=2 * 30, min_resources="exhaust")
|
||||
|
||||
sh.fit(X, y)
|
||||
|
||||
assert sh.n_iterations_ == expected_n_iterations
|
||||
assert sh.n_required_iterations_ == expected_n_required_iterations
|
||||
assert sh.n_possible_iterations_ == expected_n_possible_iterations
|
||||
assert sh.n_resources_ == expected_n_resources
|
||||
assert sh.n_candidates_ == expected_n_candidates
|
||||
assert sh.n_remaining_candidates_ == expected_n_remaining_candidates
|
||||
assert ceil(sh.n_candidates_[-1] / sh.factor) == sh.n_remaining_candidates_
|
||||
|
||||
|
||||
@pytest.mark.parametrize("Est", (HalvingGridSearchCV, HalvingRandomSearchCV))
|
||||
@pytest.mark.parametrize(
|
||||
(
|
||||
"min_resources,"
|
||||
"max_resources,"
|
||||
"expected_n_iterations,"
|
||||
"expected_n_possible_iterations,"
|
||||
"expected_n_resources,"
|
||||
),
|
||||
[
|
||||
# with enough resources
|
||||
("smallest", "auto", 2, 4, [20, 60]),
|
||||
# with enough resources but min_resources set manually
|
||||
(50, "auto", 2, 3, [50, 150]),
|
||||
# without enough resources, only one iteration can be done
|
||||
("smallest", 30, 1, 1, [20]),
|
||||
# with exhaust: use as much resources as possible at the last iter
|
||||
("exhaust", "auto", 2, 2, [333, 999]),
|
||||
("exhaust", 1000, 2, 2, [333, 999]),
|
||||
("exhaust", 999, 2, 2, [333, 999]),
|
||||
("exhaust", 600, 2, 2, [200, 600]),
|
||||
("exhaust", 599, 2, 2, [199, 597]),
|
||||
("exhaust", 300, 2, 2, [100, 300]),
|
||||
("exhaust", 60, 2, 2, [20, 60]),
|
||||
("exhaust", 50, 1, 1, [20]),
|
||||
("exhaust", 20, 1, 1, [20]),
|
||||
],
|
||||
)
|
||||
def test_min_max_resources(
|
||||
Est,
|
||||
min_resources,
|
||||
max_resources,
|
||||
expected_n_iterations,
|
||||
expected_n_possible_iterations,
|
||||
expected_n_resources,
|
||||
):
|
||||
# Test the min_resources and max_resources parameters, and how they affect
|
||||
# the number of resources used at each iteration
|
||||
n_samples = 1000
|
||||
X, y = make_classification(n_samples=n_samples, random_state=0)
|
||||
param_grid = {"a": [1, 2], "b": [1, 2, 3]}
|
||||
base_estimator = FastClassifier()
|
||||
|
||||
sh = Est(
|
||||
base_estimator,
|
||||
param_grid,
|
||||
factor=3,
|
||||
min_resources=min_resources,
|
||||
max_resources=max_resources,
|
||||
)
|
||||
if Est is HalvingRandomSearchCV:
|
||||
sh.set_params(n_candidates=6) # same number as with the grid
|
||||
|
||||
sh.fit(X, y)
|
||||
|
||||
expected_n_required_iterations = 2 # given 6 combinations and factor = 3
|
||||
assert sh.n_iterations_ == expected_n_iterations
|
||||
assert sh.n_required_iterations_ == expected_n_required_iterations
|
||||
assert sh.n_possible_iterations_ == expected_n_possible_iterations
|
||||
assert sh.n_resources_ == expected_n_resources
|
||||
if min_resources == "exhaust":
|
||||
assert sh.n_possible_iterations_ == sh.n_iterations_ == len(sh.n_resources_)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("Est", (HalvingRandomSearchCV, HalvingGridSearchCV))
|
||||
@pytest.mark.parametrize(
|
||||
"max_resources, n_iterations, n_possible_iterations",
|
||||
[
|
||||
("auto", 5, 9), # all resources are used
|
||||
(1024, 5, 9),
|
||||
(700, 5, 8),
|
||||
(512, 5, 8),
|
||||
(511, 5, 7),
|
||||
(32, 4, 4),
|
||||
(31, 3, 3),
|
||||
(16, 3, 3),
|
||||
(4, 1, 1), # max_resources == min_resources, only one iteration is
|
||||
# possible
|
||||
],
|
||||
)
|
||||
def test_n_iterations(Est, max_resources, n_iterations, n_possible_iterations):
|
||||
# test the number of actual iterations that were run depending on
|
||||
# max_resources
|
||||
|
||||
n_samples = 1024
|
||||
X, y = make_classification(n_samples=n_samples, random_state=1)
|
||||
param_grid = {"a": [1, 2], "b": list(range(10))}
|
||||
base_estimator = FastClassifier()
|
||||
factor = 2
|
||||
|
||||
sh = Est(
|
||||
base_estimator,
|
||||
param_grid,
|
||||
cv=2,
|
||||
factor=factor,
|
||||
max_resources=max_resources,
|
||||
min_resources=4,
|
||||
)
|
||||
if Est is HalvingRandomSearchCV:
|
||||
sh.set_params(n_candidates=20) # same as for HalvingGridSearchCV
|
||||
sh.fit(X, y)
|
||||
assert sh.n_required_iterations_ == 5
|
||||
assert sh.n_iterations_ == n_iterations
|
||||
assert sh.n_possible_iterations_ == n_possible_iterations
|
||||
|
||||
|
||||
@pytest.mark.parametrize("Est", (HalvingRandomSearchCV, HalvingGridSearchCV))
|
||||
def test_resource_parameter(Est):
|
||||
# Test the resource parameter
|
||||
|
||||
n_samples = 1000
|
||||
X, y = make_classification(n_samples=n_samples, random_state=0)
|
||||
param_grid = {"a": [1, 2], "b": list(range(10))}
|
||||
base_estimator = FastClassifier()
|
||||
sh = Est(base_estimator, param_grid, cv=2, resource="c", max_resources=10, factor=3)
|
||||
sh.fit(X, y)
|
||||
assert set(sh.n_resources_) == set([1, 3, 9])
|
||||
for r_i, params, param_c in zip(
|
||||
sh.cv_results_["n_resources"],
|
||||
sh.cv_results_["params"],
|
||||
sh.cv_results_["param_c"],
|
||||
):
|
||||
assert r_i == params["c"] == param_c
|
||||
|
||||
with pytest.raises(
|
||||
ValueError, match="Cannot use resource=1234 which is not supported "
|
||||
):
|
||||
sh = HalvingGridSearchCV(
|
||||
base_estimator, param_grid, cv=2, resource="1234", max_resources=10
|
||||
)
|
||||
sh.fit(X, y)
|
||||
|
||||
with pytest.raises(
|
||||
ValueError,
|
||||
match=(
|
||||
"Cannot use parameter c as the resource since it is part "
|
||||
"of the searched parameters."
|
||||
),
|
||||
):
|
||||
param_grid = {"a": [1, 2], "b": [1, 2], "c": [1, 3]}
|
||||
sh = HalvingGridSearchCV(
|
||||
base_estimator, param_grid, cv=2, resource="c", max_resources=10
|
||||
)
|
||||
sh.fit(X, y)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"max_resources, n_candidates, expected_n_candidates",
|
||||
[
|
||||
(512, "exhaust", 128), # generate exactly as much as needed
|
||||
(32, "exhaust", 8),
|
||||
(32, 8, 8),
|
||||
(32, 7, 7), # ask for less than what we could
|
||||
(32, 9, 9), # ask for more than 'reasonable'
|
||||
],
|
||||
)
|
||||
def test_random_search(max_resources, n_candidates, expected_n_candidates):
|
||||
# Test random search and make sure the number of generated candidates is
|
||||
# as expected
|
||||
|
||||
n_samples = 1024
|
||||
X, y = make_classification(n_samples=n_samples, random_state=0)
|
||||
param_grid = {"a": norm, "b": norm}
|
||||
base_estimator = FastClassifier()
|
||||
sh = HalvingRandomSearchCV(
|
||||
base_estimator,
|
||||
param_grid,
|
||||
n_candidates=n_candidates,
|
||||
cv=2,
|
||||
max_resources=max_resources,
|
||||
factor=2,
|
||||
min_resources=4,
|
||||
)
|
||||
sh.fit(X, y)
|
||||
assert sh.n_candidates_[0] == expected_n_candidates
|
||||
if n_candidates == "exhaust":
|
||||
# Make sure 'exhaust' makes the last iteration use as much resources as
|
||||
# we can
|
||||
assert sh.n_resources_[-1] == max_resources
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"param_distributions, expected_n_candidates",
|
||||
[
|
||||
({"a": [1, 2]}, 2), # all lists, sample less than n_candidates
|
||||
({"a": randint(1, 3)}, 10), # not all list, respect n_candidates
|
||||
],
|
||||
)
|
||||
def test_random_search_discrete_distributions(
|
||||
param_distributions, expected_n_candidates
|
||||
):
|
||||
# Make sure random search samples the appropriate number of candidates when
|
||||
# we ask for more than what's possible. How many parameters are sampled
|
||||
# depends whether the distributions are 'all lists' or not (see
|
||||
# ParameterSampler for details). This is somewhat redundant with the checks
|
||||
# in ParameterSampler but interaction bugs were discovered during
|
||||
# development of SH
|
||||
|
||||
n_samples = 1024
|
||||
X, y = make_classification(n_samples=n_samples, random_state=0)
|
||||
base_estimator = FastClassifier()
|
||||
sh = HalvingRandomSearchCV(base_estimator, param_distributions, n_candidates=10)
|
||||
sh.fit(X, y)
|
||||
assert sh.n_candidates_[0] == expected_n_candidates
|
||||
|
||||
|
||||
@pytest.mark.parametrize("Est", (HalvingGridSearchCV, HalvingRandomSearchCV))
|
||||
@pytest.mark.parametrize(
|
||||
"params, expected_error_message",
|
||||
[
|
||||
(
|
||||
{"resource": "not_a_parameter"},
|
||||
"Cannot use resource=not_a_parameter which is not supported",
|
||||
),
|
||||
(
|
||||
{"resource": "a", "max_resources": 100},
|
||||
"Cannot use parameter a as the resource since it is part of",
|
||||
),
|
||||
(
|
||||
{"max_resources": "auto", "resource": "b"},
|
||||
"resource can only be 'n_samples' when max_resources='auto'",
|
||||
),
|
||||
(
|
||||
{"min_resources": 15, "max_resources": 14},
|
||||
"min_resources_=15 is greater than max_resources_=14",
|
||||
),
|
||||
({"cv": KFold(shuffle=True)}, "must yield consistent folds"),
|
||||
({"cv": ShuffleSplit()}, "must yield consistent folds"),
|
||||
],
|
||||
)
|
||||
def test_input_errors(Est, params, expected_error_message):
|
||||
base_estimator = FastClassifier()
|
||||
param_grid = {"a": [1]}
|
||||
X, y = make_classification(100)
|
||||
|
||||
sh = Est(base_estimator, param_grid, **params)
|
||||
|
||||
with pytest.raises(ValueError, match=expected_error_message):
|
||||
sh.fit(X, y)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"params, expected_error_message",
|
||||
[
|
||||
(
|
||||
{"n_candidates": "exhaust", "min_resources": "exhaust"},
|
||||
"cannot be both set to 'exhaust'",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_input_errors_randomized(params, expected_error_message):
|
||||
# tests specific to HalvingRandomSearchCV
|
||||
|
||||
base_estimator = FastClassifier()
|
||||
param_grid = {"a": [1]}
|
||||
X, y = make_classification(100)
|
||||
|
||||
sh = HalvingRandomSearchCV(base_estimator, param_grid, **params)
|
||||
|
||||
with pytest.raises(ValueError, match=expected_error_message):
|
||||
sh.fit(X, y)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"fraction, subsample_test, expected_train_size, expected_test_size",
|
||||
[
|
||||
(0.5, True, 40, 10),
|
||||
(0.5, False, 40, 20),
|
||||
(0.2, True, 16, 4),
|
||||
(0.2, False, 16, 20),
|
||||
],
|
||||
)
|
||||
def test_subsample_splitter_shapes(
|
||||
fraction, subsample_test, expected_train_size, expected_test_size
|
||||
):
|
||||
# Make sure splits returned by SubsampleMetaSplitter are of appropriate
|
||||
# size
|
||||
|
||||
n_samples = 100
|
||||
X, y = make_classification(n_samples)
|
||||
cv = _SubsampleMetaSplitter(
|
||||
base_cv=KFold(5),
|
||||
fraction=fraction,
|
||||
subsample_test=subsample_test,
|
||||
random_state=None,
|
||||
)
|
||||
|
||||
for train, test in cv.split(X, y):
|
||||
assert train.shape[0] == expected_train_size
|
||||
assert test.shape[0] == expected_test_size
|
||||
if subsample_test:
|
||||
assert train.shape[0] + test.shape[0] == int(n_samples * fraction)
|
||||
else:
|
||||
assert test.shape[0] == n_samples // cv.base_cv.get_n_splits()
|
||||
|
||||
|
||||
@pytest.mark.parametrize("subsample_test", (True, False))
|
||||
def test_subsample_splitter_determinism(subsample_test):
|
||||
# Make sure _SubsampleMetaSplitter is consistent across calls to split():
|
||||
# - we're OK having training sets differ (they're always sampled with a
|
||||
# different fraction anyway)
|
||||
# - when we don't subsample the test set, we want it to be always the same.
|
||||
# This check is the most important. This is ensured by the determinism
|
||||
# of the base_cv.
|
||||
|
||||
# Note: we could force both train and test splits to be always the same if
|
||||
# we drew an int seed in _SubsampleMetaSplitter.__init__
|
||||
|
||||
n_samples = 100
|
||||
X, y = make_classification(n_samples)
|
||||
cv = _SubsampleMetaSplitter(
|
||||
base_cv=KFold(5), fraction=0.5, subsample_test=subsample_test, random_state=None
|
||||
)
|
||||
|
||||
folds_a = list(cv.split(X, y, groups=None))
|
||||
folds_b = list(cv.split(X, y, groups=None))
|
||||
|
||||
for (train_a, test_a), (train_b, test_b) in zip(folds_a, folds_b):
|
||||
assert not np.all(train_a == train_b)
|
||||
|
||||
if subsample_test:
|
||||
assert not np.all(test_a == test_b)
|
||||
else:
|
||||
assert np.all(test_a == test_b)
|
||||
assert np.all(X[test_a] == X[test_b])
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"k, itr, expected",
|
||||
[
|
||||
(1, 0, ["c"]),
|
||||
(2, 0, ["a", "c"]),
|
||||
(4, 0, ["d", "b", "a", "c"]),
|
||||
(10, 0, ["d", "b", "a", "c"]),
|
||||
(1, 1, ["e"]),
|
||||
(2, 1, ["f", "e"]),
|
||||
(10, 1, ["f", "e"]),
|
||||
(1, 2, ["i"]),
|
||||
(10, 2, ["g", "h", "i"]),
|
||||
],
|
||||
)
|
||||
def test_top_k(k, itr, expected):
|
||||
results = { # this isn't a 'real world' result dict
|
||||
"iter": [0, 0, 0, 0, 1, 1, 2, 2, 2],
|
||||
"mean_test_score": [4, 3, 5, 1, 11, 10, 5, 6, 9],
|
||||
"params": ["a", "b", "c", "d", "e", "f", "g", "h", "i"],
|
||||
}
|
||||
got = _top_k(results, k=k, itr=itr)
|
||||
assert np.all(got == expected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("Est", (HalvingRandomSearchCV, HalvingGridSearchCV))
|
||||
def test_cv_results(Est):
|
||||
# test that the cv_results_ matches correctly the logic of the
|
||||
# tournament: in particular that the candidates continued in each
|
||||
# successive iteration are those that were best in the previous iteration
|
||||
pd = pytest.importorskip("pandas")
|
||||
|
||||
rng = np.random.RandomState(0)
|
||||
|
||||
n_samples = 1000
|
||||
X, y = make_classification(n_samples=n_samples, random_state=0)
|
||||
param_grid = {"a": ("l1", "l2"), "b": list(range(30))}
|
||||
base_estimator = FastClassifier()
|
||||
|
||||
# generate random scores: we want to avoid ties, which would otherwise
|
||||
# mess with the ordering and make testing harder
|
||||
def scorer(est, X, y):
|
||||
return rng.rand()
|
||||
|
||||
sh = Est(base_estimator, param_grid, factor=2, scoring=scorer)
|
||||
if Est is HalvingRandomSearchCV:
|
||||
# same number of candidates as with the grid
|
||||
sh.set_params(n_candidates=2 * 30, min_resources="exhaust")
|
||||
|
||||
sh.fit(X, y)
|
||||
|
||||
# non-regression check for
|
||||
# https://github.com/scikit-learn/scikit-learn/issues/19203
|
||||
assert isinstance(sh.cv_results_["iter"], np.ndarray)
|
||||
assert isinstance(sh.cv_results_["n_resources"], np.ndarray)
|
||||
|
||||
cv_results_df = pd.DataFrame(sh.cv_results_)
|
||||
|
||||
# just make sure we don't have ties
|
||||
assert len(cv_results_df["mean_test_score"].unique()) == len(cv_results_df)
|
||||
|
||||
cv_results_df["params_str"] = cv_results_df["params"].apply(str)
|
||||
table = cv_results_df.pivot(
|
||||
index="params_str", columns="iter", values="mean_test_score"
|
||||
)
|
||||
|
||||
# table looks like something like this:
|
||||
# iter 0 1 2 3 4 5
|
||||
# params_str
|
||||
# {'a': 'l2', 'b': 23} 0.75 NaN NaN NaN NaN NaN
|
||||
# {'a': 'l1', 'b': 30} 0.90 0.875 NaN NaN NaN NaN
|
||||
# {'a': 'l1', 'b': 0} 0.75 NaN NaN NaN NaN NaN
|
||||
# {'a': 'l2', 'b': 3} 0.85 0.925 0.9125 0.90625 NaN NaN
|
||||
# {'a': 'l1', 'b': 5} 0.80 NaN NaN NaN NaN NaN
|
||||
# ...
|
||||
|
||||
# where a NaN indicates that the candidate wasn't evaluated at a given
|
||||
# iteration, because it wasn't part of the top-K at some previous
|
||||
# iteration. We here make sure that candidates that aren't in the top-k at
|
||||
# any given iteration are indeed not evaluated at the subsequent
|
||||
# iterations.
|
||||
nan_mask = pd.isna(table)
|
||||
n_iter = sh.n_iterations_
|
||||
for it in range(n_iter - 1):
|
||||
already_discarded_mask = nan_mask[it]
|
||||
|
||||
# make sure that if a candidate is already discarded, we don't evaluate
|
||||
# it later
|
||||
assert (
|
||||
already_discarded_mask & nan_mask[it + 1] == already_discarded_mask
|
||||
).all()
|
||||
|
||||
# make sure that the number of discarded candidate is correct
|
||||
discarded_now_mask = ~already_discarded_mask & nan_mask[it + 1]
|
||||
kept_mask = ~already_discarded_mask & ~discarded_now_mask
|
||||
assert kept_mask.sum() == sh.n_candidates_[it + 1]
|
||||
|
||||
# make sure that all discarded candidates have a lower score than the
|
||||
# kept candidates
|
||||
discarded_max_score = table[it].where(discarded_now_mask).max()
|
||||
kept_min_score = table[it].where(kept_mask).min()
|
||||
assert discarded_max_score < kept_min_score
|
||||
|
||||
# We now make sure that the best candidate is chosen only from the last
|
||||
# iteration.
|
||||
# We also make sure this is true even if there were higher scores in
|
||||
# earlier rounds (this isn't generally the case, but worth ensuring it's
|
||||
# possible).
|
||||
|
||||
last_iter = cv_results_df["iter"].max()
|
||||
idx_best_last_iter = cv_results_df[cv_results_df["iter"] == last_iter][
|
||||
"mean_test_score"
|
||||
].idxmax()
|
||||
idx_best_all_iters = cv_results_df["mean_test_score"].idxmax()
|
||||
|
||||
assert sh.best_params_ == cv_results_df.iloc[idx_best_last_iter]["params"]
|
||||
assert (
|
||||
cv_results_df.iloc[idx_best_last_iter]["mean_test_score"]
|
||||
< cv_results_df.iloc[idx_best_all_iters]["mean_test_score"]
|
||||
)
|
||||
assert (
|
||||
cv_results_df.iloc[idx_best_last_iter]["params"]
|
||||
!= cv_results_df.iloc[idx_best_all_iters]["params"]
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("Est", (HalvingGridSearchCV, HalvingRandomSearchCV))
|
||||
def test_base_estimator_inputs(Est):
|
||||
# make sure that the base estimators are passed the correct parameters and
|
||||
# number of samples at each iteration.
|
||||
pd = pytest.importorskip("pandas")
|
||||
|
||||
passed_n_samples_fit = []
|
||||
passed_n_samples_predict = []
|
||||
passed_params = []
|
||||
|
||||
class FastClassifierBookKeeping(FastClassifier):
|
||||
def fit(self, X, y):
|
||||
passed_n_samples_fit.append(X.shape[0])
|
||||
return super().fit(X, y)
|
||||
|
||||
def predict(self, X):
|
||||
passed_n_samples_predict.append(X.shape[0])
|
||||
return super().predict(X)
|
||||
|
||||
def set_params(self, **params):
|
||||
passed_params.append(params)
|
||||
return super().set_params(**params)
|
||||
|
||||
n_samples = 1024
|
||||
n_splits = 2
|
||||
X, y = make_classification(n_samples=n_samples, random_state=0)
|
||||
param_grid = {"a": ("l1", "l2"), "b": list(range(30))}
|
||||
base_estimator = FastClassifierBookKeeping()
|
||||
|
||||
sh = Est(
|
||||
base_estimator,
|
||||
param_grid,
|
||||
factor=2,
|
||||
cv=n_splits,
|
||||
return_train_score=False,
|
||||
refit=False,
|
||||
)
|
||||
if Est is HalvingRandomSearchCV:
|
||||
# same number of candidates as with the grid
|
||||
sh.set_params(n_candidates=2 * 30, min_resources="exhaust")
|
||||
|
||||
sh.fit(X, y)
|
||||
|
||||
assert len(passed_n_samples_fit) == len(passed_n_samples_predict)
|
||||
passed_n_samples = [
|
||||
x + y for (x, y) in zip(passed_n_samples_fit, passed_n_samples_predict)
|
||||
]
|
||||
|
||||
# Lists are of length n_splits * n_iter * n_candidates_at_i.
|
||||
# Each chunk of size n_splits corresponds to the n_splits folds for the
|
||||
# same candidate at the same iteration, so they contain equal values. We
|
||||
# subsample such that the lists are of length n_iter * n_candidates_at_it
|
||||
passed_n_samples = passed_n_samples[::n_splits]
|
||||
passed_params = passed_params[::n_splits]
|
||||
|
||||
cv_results_df = pd.DataFrame(sh.cv_results_)
|
||||
|
||||
assert len(passed_params) == len(passed_n_samples) == len(cv_results_df)
|
||||
|
||||
uniques, counts = np.unique(passed_n_samples, return_counts=True)
|
||||
assert (sh.n_resources_ == uniques).all()
|
||||
assert (sh.n_candidates_ == counts).all()
|
||||
|
||||
assert (cv_results_df["params"] == passed_params).all()
|
||||
assert (cv_results_df["n_resources"] == passed_n_samples).all()
|
||||
|
||||
|
||||
@pytest.mark.parametrize("Est", (HalvingGridSearchCV, HalvingRandomSearchCV))
|
||||
def test_groups_support(Est):
|
||||
# Check if ValueError (when groups is None) propagates to
|
||||
# HalvingGridSearchCV and HalvingRandomSearchCV
|
||||
# And also check if groups is correctly passed to the cv object
|
||||
rng = np.random.RandomState(0)
|
||||
|
||||
X, y = make_classification(n_samples=50, n_classes=2, random_state=0)
|
||||
groups = rng.randint(0, 3, 50)
|
||||
|
||||
clf = LinearSVC(random_state=0)
|
||||
grid = {"C": [1]}
|
||||
|
||||
group_cvs = [
|
||||
LeaveOneGroupOut(),
|
||||
LeavePGroupsOut(2),
|
||||
GroupKFold(n_splits=3),
|
||||
GroupShuffleSplit(random_state=0),
|
||||
]
|
||||
error_msg = "The 'groups' parameter should not be None."
|
||||
for cv in group_cvs:
|
||||
gs = Est(clf, grid, cv=cv, random_state=0)
|
||||
with pytest.raises(ValueError, match=error_msg):
|
||||
gs.fit(X, y)
|
||||
gs.fit(X, y, groups=groups)
|
||||
|
||||
non_group_cvs = [StratifiedKFold(), StratifiedShuffleSplit(random_state=0)]
|
||||
for cv in non_group_cvs:
|
||||
gs = Est(clf, grid, cv=cv)
|
||||
# Should not raise an error
|
||||
gs.fit(X, y)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("SearchCV", [HalvingRandomSearchCV, HalvingGridSearchCV])
|
||||
def test_min_resources_null(SearchCV):
|
||||
"""Check that we raise an error if the minimum resources is set to 0."""
|
||||
base_estimator = FastClassifier()
|
||||
param_grid = {"a": [1]}
|
||||
X = np.empty(0).reshape(0, 3)
|
||||
|
||||
search = SearchCV(base_estimator, param_grid, min_resources="smallest")
|
||||
|
||||
err_msg = "min_resources_=0: you might have passed an empty dataset X."
|
||||
with pytest.raises(ValueError, match=err_msg):
|
||||
search.fit(X, [])
|
||||
|
||||
|
||||
@pytest.mark.parametrize("SearchCV", [HalvingGridSearchCV, HalvingRandomSearchCV])
|
||||
def test_select_best_index(SearchCV):
|
||||
"""Check the selection strategy of the halving search."""
|
||||
results = { # this isn't a 'real world' result dict
|
||||
"iter": np.array([0, 0, 0, 0, 1, 1, 2, 2, 2]),
|
||||
"mean_test_score": np.array([4, 3, 5, 1, 11, 10, 5, 6, 9]),
|
||||
"params": np.array(["a", "b", "c", "d", "e", "f", "g", "h", "i"]),
|
||||
}
|
||||
|
||||
# we expect the index of 'i'
|
||||
best_index = SearchCV._select_best_index(None, None, results)
|
||||
assert best_index == 8
|
||||
|
||||
|
||||
def test_halving_random_search_list_of_dicts():
|
||||
"""Check the behaviour of the `HalvingRandomSearchCV` with `param_distribution`
|
||||
being a list of dictionary.
|
||||
"""
|
||||
X, y = make_classification(n_samples=150, n_features=4, random_state=42)
|
||||
|
||||
params = [
|
||||
{"kernel": ["rbf"], "C": expon(scale=10), "gamma": expon(scale=0.1)},
|
||||
{"kernel": ["poly"], "degree": [2, 3]},
|
||||
]
|
||||
param_keys = (
|
||||
"param_C",
|
||||
"param_degree",
|
||||
"param_gamma",
|
||||
"param_kernel",
|
||||
)
|
||||
score_keys = (
|
||||
"mean_test_score",
|
||||
"mean_train_score",
|
||||
"rank_test_score",
|
||||
"split0_test_score",
|
||||
"split1_test_score",
|
||||
"split2_test_score",
|
||||
"split0_train_score",
|
||||
"split1_train_score",
|
||||
"split2_train_score",
|
||||
"std_test_score",
|
||||
"std_train_score",
|
||||
"mean_fit_time",
|
||||
"std_fit_time",
|
||||
"mean_score_time",
|
||||
"std_score_time",
|
||||
)
|
||||
extra_keys = ("n_resources", "iter")
|
||||
|
||||
search = HalvingRandomSearchCV(
|
||||
SVC(), cv=3, param_distributions=params, return_train_score=True, random_state=0
|
||||
)
|
||||
search.fit(X, y)
|
||||
n_candidates = sum(search.n_candidates_)
|
||||
cv_results = search.cv_results_
|
||||
# Check results structure
|
||||
check_cv_results_keys(cv_results, param_keys, score_keys, n_candidates, extra_keys)
|
||||
expected_cv_results_kinds = {
|
||||
"param_C": "f",
|
||||
"param_degree": "i",
|
||||
"param_gamma": "f",
|
||||
"param_kernel": "O",
|
||||
}
|
||||
check_cv_results_array_types(
|
||||
search, param_keys, score_keys, expected_cv_results_kinds
|
||||
)
|
||||
|
||||
assert all(
|
||||
(
|
||||
cv_results["param_C"].mask[i]
|
||||
and cv_results["param_gamma"].mask[i]
|
||||
and not cv_results["param_degree"].mask[i]
|
||||
)
|
||||
for i in range(n_candidates)
|
||||
if cv_results["param_kernel"][i] == "poly"
|
||||
)
|
||||
assert all(
|
||||
(
|
||||
not cv_results["param_C"].mask[i]
|
||||
and not cv_results["param_gamma"].mask[i]
|
||||
and cv_results["param_degree"].mask[i]
|
||||
)
|
||||
for i in range(n_candidates)
|
||||
if cv_results["param_kernel"][i] == "rbf"
|
||||
)
|
||||
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user