Videre
This commit is contained in:
@@ -0,0 +1,23 @@
|
||||
"""Meta-estimators for building composite models with transformers.
|
||||
|
||||
In addition to its current contents, this module will eventually be home to
|
||||
refurbished versions of :class:`~sklearn.pipeline.Pipeline` and
|
||||
:class:`~sklearn.pipeline.FeatureUnion`.
|
||||
"""
|
||||
|
||||
# Authors: The scikit-learn developers
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
from sklearn.compose._column_transformer import (
|
||||
ColumnTransformer,
|
||||
make_column_selector,
|
||||
make_column_transformer,
|
||||
)
|
||||
from sklearn.compose._target import TransformedTargetRegressor
|
||||
|
||||
__all__ = [
|
||||
"ColumnTransformer",
|
||||
"TransformedTargetRegressor",
|
||||
"make_column_selector",
|
||||
"make_column_transformer",
|
||||
]
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,397 @@
|
||||
# Authors: The scikit-learn developers
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
import warnings
|
||||
|
||||
import numpy as np
|
||||
|
||||
from sklearn.base import BaseEstimator, RegressorMixin, _fit_context, clone
|
||||
from sklearn.exceptions import NotFittedError
|
||||
from sklearn.linear_model import LinearRegression
|
||||
from sklearn.preprocessing import FunctionTransformer
|
||||
from sklearn.utils import Bunch, _safe_indexing, check_array
|
||||
from sklearn.utils._metadata_requests import (
|
||||
MetadataRouter,
|
||||
MethodMapping,
|
||||
_routing_enabled,
|
||||
process_routing,
|
||||
)
|
||||
from sklearn.utils._param_validation import HasMethods
|
||||
from sklearn.utils._tags import get_tags
|
||||
from sklearn.utils.validation import check_is_fitted
|
||||
|
||||
__all__ = ["TransformedTargetRegressor"]
|
||||
|
||||
|
||||
class TransformedTargetRegressor(RegressorMixin, BaseEstimator):
|
||||
"""Meta-estimator to regress on a transformed target.
|
||||
|
||||
Useful for applying a non-linear transformation to the target `y` in
|
||||
regression problems. This transformation can be given as a Transformer
|
||||
such as the :class:`~sklearn.preprocessing.QuantileTransformer` or as a
|
||||
function and its inverse such as `np.log` and `np.exp`.
|
||||
|
||||
The computation during :meth:`fit` is::
|
||||
|
||||
regressor.fit(X, func(y))
|
||||
|
||||
or::
|
||||
|
||||
regressor.fit(X, transformer.transform(y))
|
||||
|
||||
The computation during :meth:`predict` is::
|
||||
|
||||
inverse_func(regressor.predict(X))
|
||||
|
||||
or::
|
||||
|
||||
transformer.inverse_transform(regressor.predict(X))
|
||||
|
||||
Read more in the :ref:`User Guide <transformed_target_regressor>`.
|
||||
|
||||
.. versionadded:: 0.20
|
||||
|
||||
Parameters
|
||||
----------
|
||||
regressor : object, default=None
|
||||
Regressor object such as derived from
|
||||
:class:`~sklearn.base.RegressorMixin`. This regressor will
|
||||
automatically be cloned each time prior to fitting. If `regressor is
|
||||
None`, :class:`~sklearn.linear_model.LinearRegression` is created and used.
|
||||
|
||||
transformer : object, default=None
|
||||
Estimator object such as derived from
|
||||
:class:`~sklearn.base.TransformerMixin`. Cannot be set at the same time
|
||||
as `func` and `inverse_func`. If `transformer is None` as well as
|
||||
`func` and `inverse_func`, the transformer will be an identity
|
||||
transformer. Note that the transformer will be cloned during fitting.
|
||||
Also, the transformer is restricting `y` to be a numpy array.
|
||||
|
||||
func : function, default=None
|
||||
Function to apply to `y` before passing to :meth:`fit`. Cannot be set
|
||||
at the same time as `transformer`. If `func is None`, the function used will be
|
||||
the identity function. If `func` is set, `inverse_func` also needs to be
|
||||
provided. The function needs to return a 2-dimensional array.
|
||||
|
||||
inverse_func : function, default=None
|
||||
Function to apply to the prediction of the regressor. Cannot be set at
|
||||
the same time as `transformer`. The inverse function is used to return
|
||||
predictions to the same space of the original training labels. If
|
||||
`inverse_func` is set, `func` also needs to be provided. The inverse
|
||||
function needs to return a 2-dimensional array.
|
||||
|
||||
check_inverse : bool, default=True
|
||||
Whether to check that `transform` followed by `inverse_transform`
|
||||
or `func` followed by `inverse_func` leads to the original targets.
|
||||
|
||||
Attributes
|
||||
----------
|
||||
regressor_ : object
|
||||
Fitted regressor.
|
||||
|
||||
transformer_ : object
|
||||
Transformer used in :meth:`fit` and :meth:`predict`.
|
||||
|
||||
n_features_in_ : int
|
||||
Number of features seen during :term:`fit`. Only defined if the
|
||||
underlying regressor exposes such an attribute when fit.
|
||||
|
||||
.. versionadded:: 0.24
|
||||
|
||||
feature_names_in_ : ndarray of shape (`n_features_in_`,)
|
||||
Names of features seen during :term:`fit`. Defined only when `X`
|
||||
has feature names that are all strings.
|
||||
|
||||
.. versionadded:: 1.0
|
||||
|
||||
See Also
|
||||
--------
|
||||
sklearn.preprocessing.FunctionTransformer : Construct a transformer from an
|
||||
arbitrary callable.
|
||||
|
||||
Notes
|
||||
-----
|
||||
Internally, the target `y` is always converted into a 2-dimensional array
|
||||
to be used by scikit-learn transformers. At the time of prediction, the
|
||||
output will be reshaped to a have the same number of dimensions as `y`.
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> import numpy as np
|
||||
>>> from sklearn.linear_model import LinearRegression
|
||||
>>> from sklearn.compose import TransformedTargetRegressor
|
||||
>>> tt = TransformedTargetRegressor(regressor=LinearRegression(),
|
||||
... func=np.log, inverse_func=np.exp)
|
||||
>>> X = np.arange(4).reshape(-1, 1)
|
||||
>>> y = np.exp(2 * X).ravel()
|
||||
>>> tt.fit(X, y)
|
||||
TransformedTargetRegressor(...)
|
||||
>>> tt.score(X, y)
|
||||
1.0
|
||||
>>> tt.regressor_.coef_
|
||||
array([2.])
|
||||
|
||||
For a more detailed example use case refer to
|
||||
:ref:`sphx_glr_auto_examples_compose_plot_transformed_target.py`.
|
||||
"""
|
||||
|
||||
_parameter_constraints: dict = {
|
||||
"regressor": [HasMethods(["fit", "predict"]), None],
|
||||
"transformer": [HasMethods("transform"), None],
|
||||
"func": [callable, None],
|
||||
"inverse_func": [callable, None],
|
||||
"check_inverse": ["boolean"],
|
||||
}
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
regressor=None,
|
||||
*,
|
||||
transformer=None,
|
||||
func=None,
|
||||
inverse_func=None,
|
||||
check_inverse=True,
|
||||
):
|
||||
self.regressor = regressor
|
||||
self.transformer = transformer
|
||||
self.func = func
|
||||
self.inverse_func = inverse_func
|
||||
self.check_inverse = check_inverse
|
||||
|
||||
def _fit_transformer(self, y):
|
||||
"""Check transformer and fit transformer.
|
||||
|
||||
Create the default transformer, fit it and make additional inverse
|
||||
check on a subset (optional).
|
||||
|
||||
"""
|
||||
if self.transformer is not None and (
|
||||
self.func is not None or self.inverse_func is not None
|
||||
):
|
||||
raise ValueError(
|
||||
"'transformer' and functions 'func'/'inverse_func' cannot both be set."
|
||||
)
|
||||
elif self.transformer is not None:
|
||||
self.transformer_ = clone(self.transformer)
|
||||
else:
|
||||
if (self.func is not None and self.inverse_func is None) or (
|
||||
self.func is None and self.inverse_func is not None
|
||||
):
|
||||
lacking_param, existing_param = (
|
||||
("func", "inverse_func")
|
||||
if self.func is None
|
||||
else ("inverse_func", "func")
|
||||
)
|
||||
raise ValueError(
|
||||
f"When '{existing_param}' is provided, '{lacking_param}' must also"
|
||||
f" be provided. If {lacking_param} is supposed to be the default,"
|
||||
" you need to explicitly pass it the identity function."
|
||||
)
|
||||
self.transformer_ = FunctionTransformer(
|
||||
func=self.func,
|
||||
inverse_func=self.inverse_func,
|
||||
validate=True,
|
||||
check_inverse=self.check_inverse,
|
||||
)
|
||||
# We are transforming the target here and not the features, so we set the
|
||||
# output of FunctionTransformer() to be a numpy array (default) and to not
|
||||
# depend on the global configuration:
|
||||
self.transformer_.set_output(transform="default")
|
||||
# XXX: sample_weight is not currently passed to the
|
||||
# transformer. However, if transformer starts using sample_weight, the
|
||||
# code should be modified accordingly. At the time to consider the
|
||||
# sample_prop feature, it is also a good use case to be considered.
|
||||
self.transformer_.fit(y)
|
||||
if self.check_inverse:
|
||||
idx_selected = slice(None, None, max(1, y.shape[0] // 10))
|
||||
y_sel = _safe_indexing(y, idx_selected)
|
||||
y_sel_t = self.transformer_.transform(y_sel)
|
||||
if not np.allclose(y_sel, self.transformer_.inverse_transform(y_sel_t)):
|
||||
warnings.warn(
|
||||
(
|
||||
"The provided functions or transformer are"
|
||||
" not strictly inverse of each other. If"
|
||||
" you are sure you want to proceed regardless"
|
||||
", set 'check_inverse=False'"
|
||||
),
|
||||
UserWarning,
|
||||
)
|
||||
|
||||
@_fit_context(
|
||||
# TransformedTargetRegressor.regressor/transformer are not validated yet.
|
||||
prefer_skip_nested_validation=False
|
||||
)
|
||||
def fit(self, X, y, **fit_params):
|
||||
"""Fit the model according to the given training data.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : {array-like, sparse matrix} of shape (n_samples, n_features)
|
||||
Training vector, where `n_samples` is the number of samples and
|
||||
`n_features` is the number of features.
|
||||
|
||||
y : array-like of shape (n_samples,)
|
||||
Target values.
|
||||
|
||||
**fit_params : dict
|
||||
- If `enable_metadata_routing=False` (default): Parameters directly passed
|
||||
to the `fit` method of the underlying regressor.
|
||||
|
||||
- If `enable_metadata_routing=True`: Parameters safely routed to the `fit`
|
||||
method of the underlying regressor.
|
||||
|
||||
.. versionchanged:: 1.6
|
||||
See :ref:`Metadata Routing User Guide <metadata_routing>` for
|
||||
more details.
|
||||
|
||||
Returns
|
||||
-------
|
||||
self : object
|
||||
Fitted estimator.
|
||||
"""
|
||||
if y is None:
|
||||
raise ValueError(
|
||||
f"This {self.__class__.__name__} estimator "
|
||||
"requires y to be passed, but the target y is None."
|
||||
)
|
||||
y = check_array(
|
||||
y,
|
||||
input_name="y",
|
||||
accept_sparse=False,
|
||||
ensure_all_finite=True,
|
||||
ensure_2d=False,
|
||||
dtype="numeric",
|
||||
allow_nd=True,
|
||||
)
|
||||
|
||||
# store the number of dimension of the target to predict an array of
|
||||
# similar shape at predict
|
||||
self._training_dim = y.ndim
|
||||
|
||||
# transformers are designed to modify X which is 2d dimensional, we
|
||||
# need to modify y accordingly.
|
||||
if y.ndim == 1:
|
||||
y_2d = y.reshape(-1, 1)
|
||||
else:
|
||||
y_2d = y
|
||||
self._fit_transformer(y_2d)
|
||||
|
||||
# transform y and convert back to 1d array if needed
|
||||
y_trans = self.transformer_.transform(y_2d)
|
||||
# FIXME: a FunctionTransformer can return a 1D array even when validate
|
||||
# is set to True. Therefore, we need to check the number of dimension
|
||||
# first.
|
||||
if y_trans.ndim == 2 and y_trans.shape[1] == 1 and self._training_dim == 1:
|
||||
y_trans = y_trans.squeeze(axis=1)
|
||||
|
||||
self.regressor_ = self._get_regressor(get_clone=True)
|
||||
if _routing_enabled():
|
||||
routed_params = process_routing(self, "fit", **fit_params)
|
||||
else:
|
||||
routed_params = Bunch(regressor=Bunch(fit=fit_params))
|
||||
|
||||
self.regressor_.fit(X, y_trans, **routed_params.regressor.fit)
|
||||
|
||||
if hasattr(self.regressor_, "feature_names_in_"):
|
||||
self.feature_names_in_ = self.regressor_.feature_names_in_
|
||||
|
||||
return self
|
||||
|
||||
def predict(self, X, **predict_params):
|
||||
"""Predict using the base regressor, applying inverse.
|
||||
|
||||
The regressor is used to predict and the `inverse_func` or
|
||||
`inverse_transform` is applied before returning the prediction.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : {array-like, sparse matrix} of shape (n_samples, n_features)
|
||||
Samples.
|
||||
|
||||
**predict_params : dict of str -> object
|
||||
- If `enable_metadata_routing=False` (default): Parameters directly passed
|
||||
to the `predict` method of the underlying regressor.
|
||||
|
||||
- If `enable_metadata_routing=True`: Parameters safely routed to the
|
||||
`predict` method of the underlying regressor.
|
||||
|
||||
.. versionchanged:: 1.6
|
||||
See :ref:`Metadata Routing User Guide <metadata_routing>`
|
||||
for more details.
|
||||
|
||||
Returns
|
||||
-------
|
||||
y_hat : ndarray of shape (n_samples,)
|
||||
Predicted values.
|
||||
"""
|
||||
check_is_fitted(self)
|
||||
if _routing_enabled():
|
||||
routed_params = process_routing(self, "predict", **predict_params)
|
||||
else:
|
||||
routed_params = Bunch(regressor=Bunch(predict=predict_params))
|
||||
|
||||
pred = self.regressor_.predict(X, **routed_params.regressor.predict)
|
||||
if pred.ndim == 1:
|
||||
pred_trans = self.transformer_.inverse_transform(pred.reshape(-1, 1))
|
||||
else:
|
||||
pred_trans = self.transformer_.inverse_transform(pred)
|
||||
if (
|
||||
self._training_dim == 1
|
||||
and pred_trans.ndim == 2
|
||||
and pred_trans.shape[1] == 1
|
||||
):
|
||||
pred_trans = pred_trans.squeeze(axis=1)
|
||||
|
||||
return pred_trans
|
||||
|
||||
def __sklearn_tags__(self):
|
||||
regressor = self._get_regressor()
|
||||
tags = super().__sklearn_tags__()
|
||||
tags.regressor_tags.poor_score = True
|
||||
tags.input_tags.sparse = get_tags(regressor).input_tags.sparse
|
||||
tags.target_tags.multi_output = get_tags(regressor).target_tags.multi_output
|
||||
return tags
|
||||
|
||||
@property
|
||||
def n_features_in_(self):
|
||||
"""Number of features seen during :term:`fit`."""
|
||||
# For consistency with other estimators we raise an AttributeError so
|
||||
# that hasattr() returns False the estimator isn't fitted.
|
||||
try:
|
||||
check_is_fitted(self)
|
||||
except NotFittedError as nfe:
|
||||
raise AttributeError(
|
||||
"{} object has no n_features_in_ attribute.".format(
|
||||
self.__class__.__name__
|
||||
)
|
||||
) from nfe
|
||||
|
||||
return self.regressor_.n_features_in_
|
||||
|
||||
def get_metadata_routing(self):
|
||||
"""Get metadata routing of this object.
|
||||
|
||||
Please check :ref:`User Guide <metadata_routing>` on how the routing
|
||||
mechanism works.
|
||||
|
||||
.. versionadded:: 1.6
|
||||
|
||||
Returns
|
||||
-------
|
||||
routing : MetadataRouter
|
||||
A :class:`~sklearn.utils.metadata_routing.MetadataRouter` encapsulating
|
||||
routing information.
|
||||
"""
|
||||
router = MetadataRouter(owner=self).add(
|
||||
regressor=self._get_regressor(),
|
||||
method_mapping=MethodMapping()
|
||||
.add(caller="fit", callee="fit")
|
||||
.add(caller="predict", callee="predict"),
|
||||
)
|
||||
return router
|
||||
|
||||
def _get_regressor(self, get_clone=False):
|
||||
if self.regressor is None:
|
||||
return LinearRegression()
|
||||
|
||||
return clone(self.regressor) if get_clone else self.regressor
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,439 @@
|
||||
import warnings
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from sklearn import config_context, datasets
|
||||
from sklearn.base import BaseEstimator, TransformerMixin, clone
|
||||
from sklearn.compose import TransformedTargetRegressor
|
||||
from sklearn.dummy import DummyRegressor
|
||||
from sklearn.linear_model import LinearRegression, OrthogonalMatchingPursuit
|
||||
from sklearn.pipeline import Pipeline
|
||||
from sklearn.preprocessing import FunctionTransformer, StandardScaler
|
||||
from sklearn.utils._testing import assert_allclose
|
||||
|
||||
friedman = datasets.make_friedman1(random_state=0)
|
||||
|
||||
|
||||
def test_transform_target_regressor_error():
|
||||
X, y = friedman
|
||||
# provide a transformer and functions at the same time
|
||||
regr = TransformedTargetRegressor(
|
||||
regressor=LinearRegression(),
|
||||
transformer=StandardScaler(),
|
||||
func=np.exp,
|
||||
inverse_func=np.log,
|
||||
)
|
||||
with pytest.raises(
|
||||
ValueError,
|
||||
match="'transformer' and functions 'func'/'inverse_func' cannot both be set.",
|
||||
):
|
||||
regr.fit(X, y)
|
||||
# fit with sample_weight with a regressor which does not support it
|
||||
sample_weight = np.ones((y.shape[0],))
|
||||
regr = TransformedTargetRegressor(
|
||||
regressor=OrthogonalMatchingPursuit(), transformer=StandardScaler()
|
||||
)
|
||||
with pytest.raises(
|
||||
TypeError,
|
||||
match=r"fit\(\) got an unexpected keyword argument 'sample_weight'",
|
||||
):
|
||||
regr.fit(X, y, sample_weight=sample_weight)
|
||||
|
||||
# one of (func, inverse_func) is given but the other one is not
|
||||
regr = TransformedTargetRegressor(func=np.exp)
|
||||
with pytest.raises(
|
||||
ValueError,
|
||||
match="When 'func' is provided, 'inverse_func' must also be provided",
|
||||
):
|
||||
regr.fit(X, y)
|
||||
|
||||
regr = TransformedTargetRegressor(inverse_func=np.log)
|
||||
with pytest.raises(
|
||||
ValueError,
|
||||
match="When 'inverse_func' is provided, 'func' must also be provided",
|
||||
):
|
||||
regr.fit(X, y)
|
||||
|
||||
|
||||
def test_transform_target_regressor_invertible():
|
||||
X, y = friedman
|
||||
regr = TransformedTargetRegressor(
|
||||
regressor=LinearRegression(),
|
||||
func=np.sqrt,
|
||||
inverse_func=np.log,
|
||||
check_inverse=True,
|
||||
)
|
||||
with pytest.warns(
|
||||
UserWarning,
|
||||
match=(r"The provided functions.* are not strictly inverse of each other"),
|
||||
):
|
||||
regr.fit(X, y)
|
||||
regr = TransformedTargetRegressor(
|
||||
regressor=LinearRegression(), func=np.sqrt, inverse_func=np.log
|
||||
)
|
||||
regr.set_params(check_inverse=False)
|
||||
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("error", UserWarning)
|
||||
regr.fit(X, y)
|
||||
|
||||
|
||||
def _check_standard_scaled(y, y_pred):
|
||||
y_mean = np.mean(y, axis=0)
|
||||
y_std = np.std(y, axis=0)
|
||||
assert_allclose((y - y_mean) / y_std, y_pred)
|
||||
|
||||
|
||||
def _check_shifted_by_one(y, y_pred):
|
||||
assert_allclose(y + 1, y_pred)
|
||||
|
||||
|
||||
def test_transform_target_regressor_functions():
|
||||
X, y = friedman
|
||||
regr = TransformedTargetRegressor(
|
||||
regressor=LinearRegression(), func=np.log, inverse_func=np.exp
|
||||
)
|
||||
y_pred = regr.fit(X, y).predict(X)
|
||||
# check the transformer output
|
||||
y_tran = regr.transformer_.transform(y.reshape(-1, 1)).squeeze()
|
||||
assert_allclose(np.log(y), y_tran)
|
||||
assert_allclose(
|
||||
y, regr.transformer_.inverse_transform(y_tran.reshape(-1, 1)).squeeze()
|
||||
)
|
||||
assert y.shape == y_pred.shape
|
||||
assert_allclose(y_pred, regr.inverse_func(regr.regressor_.predict(X)))
|
||||
# check the regressor output
|
||||
lr = LinearRegression().fit(X, regr.func(y))
|
||||
assert_allclose(regr.regressor_.coef_.ravel(), lr.coef_.ravel())
|
||||
|
||||
|
||||
def test_transform_target_regressor_functions_multioutput():
|
||||
X = friedman[0]
|
||||
y = np.vstack((friedman[1], friedman[1] ** 2 + 1)).T
|
||||
regr = TransformedTargetRegressor(
|
||||
regressor=LinearRegression(), func=np.log, inverse_func=np.exp
|
||||
)
|
||||
y_pred = regr.fit(X, y).predict(X)
|
||||
# check the transformer output
|
||||
y_tran = regr.transformer_.transform(y)
|
||||
assert_allclose(np.log(y), y_tran)
|
||||
assert_allclose(y, regr.transformer_.inverse_transform(y_tran))
|
||||
assert y.shape == y_pred.shape
|
||||
assert_allclose(y_pred, regr.inverse_func(regr.regressor_.predict(X)))
|
||||
# check the regressor output
|
||||
lr = LinearRegression().fit(X, regr.func(y))
|
||||
assert_allclose(regr.regressor_.coef_.ravel(), lr.coef_.ravel())
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"X,y", [friedman, (friedman[0], np.vstack((friedman[1], friedman[1] ** 2 + 1)).T)]
|
||||
)
|
||||
def test_transform_target_regressor_1d_transformer(X, y):
|
||||
# All transformer in scikit-learn expect 2D data. FunctionTransformer with
|
||||
# validate=False lift this constraint without checking that the input is a
|
||||
# 2D vector. We check the consistency of the data shape using a 1D and 2D y
|
||||
# array.
|
||||
transformer = FunctionTransformer(
|
||||
func=lambda x: x + 1, inverse_func=lambda x: x - 1
|
||||
)
|
||||
regr = TransformedTargetRegressor(
|
||||
regressor=LinearRegression(), transformer=transformer
|
||||
)
|
||||
y_pred = regr.fit(X, y).predict(X)
|
||||
assert y.shape == y_pred.shape
|
||||
# consistency forward transform
|
||||
y_tran = regr.transformer_.transform(y)
|
||||
_check_shifted_by_one(y, y_tran)
|
||||
assert y.shape == y_pred.shape
|
||||
# consistency inverse transform
|
||||
assert_allclose(y, regr.transformer_.inverse_transform(y_tran).squeeze())
|
||||
# consistency of the regressor
|
||||
lr = LinearRegression()
|
||||
transformer2 = clone(transformer)
|
||||
lr.fit(X, transformer2.fit_transform(y))
|
||||
y_lr_pred = lr.predict(X)
|
||||
assert_allclose(y_pred, transformer2.inverse_transform(y_lr_pred))
|
||||
assert_allclose(regr.regressor_.coef_, lr.coef_)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"X,y", [friedman, (friedman[0], np.vstack((friedman[1], friedman[1] ** 2 + 1)).T)]
|
||||
)
|
||||
def test_transform_target_regressor_2d_transformer(X, y):
|
||||
# Check consistency with transformer accepting only 2D array and a 1D/2D y
|
||||
# array.
|
||||
transformer = StandardScaler()
|
||||
regr = TransformedTargetRegressor(
|
||||
regressor=LinearRegression(), transformer=transformer
|
||||
)
|
||||
y_pred = regr.fit(X, y).predict(X)
|
||||
assert y.shape == y_pred.shape
|
||||
# consistency forward transform
|
||||
if y.ndim == 1: # create a 2D array and squeeze results
|
||||
y_tran = regr.transformer_.transform(y.reshape(-1, 1))
|
||||
else:
|
||||
y_tran = regr.transformer_.transform(y)
|
||||
_check_standard_scaled(y, y_tran.squeeze())
|
||||
assert y.shape == y_pred.shape
|
||||
# consistency inverse transform
|
||||
assert_allclose(y, regr.transformer_.inverse_transform(y_tran).squeeze())
|
||||
# consistency of the regressor
|
||||
lr = LinearRegression()
|
||||
transformer2 = clone(transformer)
|
||||
if y.ndim == 1: # create a 2D array and squeeze results
|
||||
lr.fit(X, transformer2.fit_transform(y.reshape(-1, 1)).squeeze())
|
||||
y_lr_pred = lr.predict(X).reshape(-1, 1)
|
||||
y_pred2 = transformer2.inverse_transform(y_lr_pred).squeeze()
|
||||
else:
|
||||
lr.fit(X, transformer2.fit_transform(y))
|
||||
y_lr_pred = lr.predict(X)
|
||||
y_pred2 = transformer2.inverse_transform(y_lr_pred)
|
||||
|
||||
assert_allclose(y_pred, y_pred2)
|
||||
assert_allclose(regr.regressor_.coef_, lr.coef_)
|
||||
|
||||
|
||||
def test_transform_target_regressor_2d_transformer_multioutput():
|
||||
# Check consistency with transformer accepting only 2D array and a 2D y
|
||||
# array.
|
||||
X = friedman[0]
|
||||
y = np.vstack((friedman[1], friedman[1] ** 2 + 1)).T
|
||||
transformer = StandardScaler()
|
||||
regr = TransformedTargetRegressor(
|
||||
regressor=LinearRegression(), transformer=transformer
|
||||
)
|
||||
y_pred = regr.fit(X, y).predict(X)
|
||||
assert y.shape == y_pred.shape
|
||||
# consistency forward transform
|
||||
y_tran = regr.transformer_.transform(y)
|
||||
_check_standard_scaled(y, y_tran)
|
||||
assert y.shape == y_pred.shape
|
||||
# consistency inverse transform
|
||||
assert_allclose(y, regr.transformer_.inverse_transform(y_tran).squeeze())
|
||||
# consistency of the regressor
|
||||
lr = LinearRegression()
|
||||
transformer2 = clone(transformer)
|
||||
lr.fit(X, transformer2.fit_transform(y))
|
||||
y_lr_pred = lr.predict(X)
|
||||
assert_allclose(y_pred, transformer2.inverse_transform(y_lr_pred))
|
||||
assert_allclose(regr.regressor_.coef_, lr.coef_)
|
||||
|
||||
|
||||
def test_transform_target_regressor_3d_target():
|
||||
# Non-regression test for:
|
||||
# https://github.com/scikit-learn/scikit-learn/issues/18866
|
||||
# Check with a 3D target with a transformer that reshapes the target
|
||||
X = friedman[0]
|
||||
y = np.tile(friedman[1].reshape(-1, 1, 1), [1, 3, 2])
|
||||
|
||||
def flatten_data(data):
|
||||
return data.reshape(data.shape[0], -1)
|
||||
|
||||
def unflatten_data(data):
|
||||
return data.reshape(data.shape[0], -1, 2)
|
||||
|
||||
transformer = FunctionTransformer(func=flatten_data, inverse_func=unflatten_data)
|
||||
regr = TransformedTargetRegressor(
|
||||
regressor=LinearRegression(), transformer=transformer
|
||||
)
|
||||
y_pred = regr.fit(X, y).predict(X)
|
||||
assert y.shape == y_pred.shape
|
||||
|
||||
|
||||
def test_transform_target_regressor_multi_to_single():
|
||||
X = friedman[0]
|
||||
y = np.transpose([friedman[1], (friedman[1] ** 2 + 1)])
|
||||
|
||||
def func(y):
|
||||
out = np.sqrt(y[:, 0] ** 2 + y[:, 1] ** 2)
|
||||
return out[:, np.newaxis]
|
||||
|
||||
def inverse_func(y):
|
||||
return y
|
||||
|
||||
tt = TransformedTargetRegressor(
|
||||
func=func, inverse_func=inverse_func, check_inverse=False
|
||||
)
|
||||
tt.fit(X, y)
|
||||
y_pred_2d_func = tt.predict(X)
|
||||
assert y_pred_2d_func.shape == (100, 1)
|
||||
|
||||
# force that the function only return a 1D array
|
||||
def func(y):
|
||||
return np.sqrt(y[:, 0] ** 2 + y[:, 1] ** 2)
|
||||
|
||||
tt = TransformedTargetRegressor(
|
||||
func=func, inverse_func=inverse_func, check_inverse=False
|
||||
)
|
||||
tt.fit(X, y)
|
||||
y_pred_1d_func = tt.predict(X)
|
||||
assert y_pred_1d_func.shape == (100, 1)
|
||||
|
||||
assert_allclose(y_pred_1d_func, y_pred_2d_func)
|
||||
|
||||
|
||||
class DummyCheckerArrayTransformer(TransformerMixin, BaseEstimator):
|
||||
def fit(self, X, y=None):
|
||||
assert isinstance(X, np.ndarray)
|
||||
return self
|
||||
|
||||
def transform(self, X):
|
||||
assert isinstance(X, np.ndarray)
|
||||
return X
|
||||
|
||||
def inverse_transform(self, X):
|
||||
assert isinstance(X, np.ndarray)
|
||||
return X
|
||||
|
||||
|
||||
class DummyCheckerListRegressor(DummyRegressor):
|
||||
def fit(self, X, y, sample_weight=None):
|
||||
assert isinstance(X, list)
|
||||
return super().fit(X, y, sample_weight)
|
||||
|
||||
def predict(self, X):
|
||||
assert isinstance(X, list)
|
||||
return super().predict(X)
|
||||
|
||||
|
||||
def test_transform_target_regressor_ensure_y_array():
|
||||
# check that the target ``y`` passed to the transformer will always be a
|
||||
# numpy array. Similarly, if ``X`` is passed as a list, we check that the
|
||||
# predictor receive as it is.
|
||||
X, y = friedman
|
||||
tt = TransformedTargetRegressor(
|
||||
transformer=DummyCheckerArrayTransformer(),
|
||||
regressor=DummyCheckerListRegressor(),
|
||||
check_inverse=False,
|
||||
)
|
||||
tt.fit(X.tolist(), y.tolist())
|
||||
tt.predict(X.tolist())
|
||||
with pytest.raises(AssertionError):
|
||||
tt.fit(X, y.tolist())
|
||||
with pytest.raises(AssertionError):
|
||||
tt.predict(X)
|
||||
|
||||
|
||||
class DummyTransformer(TransformerMixin, BaseEstimator):
|
||||
"""Dummy transformer which count how many time fit was called."""
|
||||
|
||||
def __init__(self, fit_counter=0):
|
||||
self.fit_counter = fit_counter
|
||||
|
||||
def fit(self, X, y=None):
|
||||
self.fit_counter += 1
|
||||
return self
|
||||
|
||||
def transform(self, X):
|
||||
return X
|
||||
|
||||
def inverse_transform(self, X):
|
||||
return X
|
||||
|
||||
|
||||
@pytest.mark.parametrize("check_inverse", [False, True])
|
||||
def test_transform_target_regressor_count_fit(check_inverse):
|
||||
# regression test for gh-issue #11618
|
||||
# check that we only call a single time fit for the transformer
|
||||
X, y = friedman
|
||||
ttr = TransformedTargetRegressor(
|
||||
transformer=DummyTransformer(), check_inverse=check_inverse
|
||||
)
|
||||
ttr.fit(X, y)
|
||||
assert ttr.transformer_.fit_counter == 1
|
||||
|
||||
|
||||
class DummyRegressorWithExtraFitParams(DummyRegressor):
|
||||
def fit(self, X, y, sample_weight=None, check_input=True):
|
||||
# on the test below we force this to false, we make sure this is
|
||||
# actually passed to the regressor
|
||||
assert not check_input
|
||||
return super().fit(X, y, sample_weight)
|
||||
|
||||
|
||||
def test_transform_target_regressor_pass_fit_parameters():
|
||||
X, y = friedman
|
||||
regr = TransformedTargetRegressor(
|
||||
regressor=DummyRegressorWithExtraFitParams(), transformer=DummyTransformer()
|
||||
)
|
||||
|
||||
regr.fit(X, y, check_input=False)
|
||||
assert regr.transformer_.fit_counter == 1
|
||||
|
||||
|
||||
def test_transform_target_regressor_route_pipeline():
|
||||
X, y = friedman
|
||||
|
||||
regr = TransformedTargetRegressor(
|
||||
regressor=DummyRegressorWithExtraFitParams(), transformer=DummyTransformer()
|
||||
)
|
||||
estimators = [("normalize", StandardScaler()), ("est", regr)]
|
||||
|
||||
pip = Pipeline(estimators)
|
||||
pip.fit(X, y, **{"est__check_input": False})
|
||||
|
||||
assert regr.transformer_.fit_counter == 1
|
||||
|
||||
|
||||
class DummyRegressorWithExtraPredictParams(DummyRegressor):
|
||||
def predict(self, X, check_input=True):
|
||||
# In the test below we make sure that the check input parameter is
|
||||
# passed as false
|
||||
self.predict_called = True
|
||||
assert not check_input
|
||||
return super().predict(X)
|
||||
|
||||
|
||||
def test_transform_target_regressor_pass_extra_predict_parameters():
|
||||
# Checks that predict kwargs are passed to regressor.
|
||||
X, y = friedman
|
||||
regr = TransformedTargetRegressor(
|
||||
regressor=DummyRegressorWithExtraPredictParams(), transformer=DummyTransformer()
|
||||
)
|
||||
|
||||
regr.fit(X, y)
|
||||
regr.predict(X, check_input=False)
|
||||
assert regr.regressor_.predict_called
|
||||
|
||||
|
||||
@pytest.mark.parametrize("output_format", ["pandas", "polars"])
|
||||
def test_transform_target_regressor_not_warns_with_global_output_set(output_format):
|
||||
"""Test that TransformedTargetRegressor will not raise warnings if
|
||||
set_config(transform_output="pandas"/"polars") is set globally; regression test for
|
||||
issue #29361."""
|
||||
X, y = datasets.make_regression()
|
||||
y = np.abs(y) + 1
|
||||
with config_context(transform_output=output_format):
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("error")
|
||||
TransformedTargetRegressor(
|
||||
regressor=LinearRegression(), func=np.log, inverse_func=np.exp
|
||||
).fit(X, y)
|
||||
|
||||
|
||||
class ValidateDimensionRegressor(BaseEstimator):
|
||||
"""A regressor that expects the target to have a specific number of dimensions."""
|
||||
|
||||
def __init__(self, ndim):
|
||||
self.ndim = ndim
|
||||
|
||||
def fit(self, X, y):
|
||||
assert y.ndim == self.ndim
|
||||
|
||||
def predict(self, X):
|
||||
pass # pragma: no cover
|
||||
|
||||
|
||||
@pytest.mark.parametrize("ndim", [1, 2])
|
||||
def test_transform_target_regressor_preserves_input_shape(ndim):
|
||||
"""Check that TransformedTargetRegressor internally preserves the shape of the input
|
||||
|
||||
non-regression test for issue #26530.
|
||||
"""
|
||||
X, y = datasets.make_regression(n_samples=10, n_features=5, random_state=42)
|
||||
if ndim == 2:
|
||||
y = y.reshape(-1, 1)
|
||||
|
||||
regr = TransformedTargetRegressor(regressor=ValidateDimensionRegressor(ndim))
|
||||
regr.fit(X, y)
|
||||
Reference in New Issue
Block a user