Videre
This commit is contained in:
@@ -0,0 +1,244 @@
|
||||
"""Kernel ridge regression."""
|
||||
|
||||
# Authors: The scikit-learn developers
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
from numbers import Real
|
||||
|
||||
import numpy as np
|
||||
|
||||
from sklearn.base import BaseEstimator, MultiOutputMixin, RegressorMixin, _fit_context
|
||||
from sklearn.linear_model._ridge import _solve_cholesky_kernel
|
||||
from sklearn.metrics.pairwise import PAIRWISE_KERNEL_FUNCTIONS, pairwise_kernels
|
||||
from sklearn.utils._param_validation import Interval, StrOptions
|
||||
from sklearn.utils.validation import (
|
||||
_check_sample_weight,
|
||||
check_is_fitted,
|
||||
validate_data,
|
||||
)
|
||||
|
||||
|
||||
class KernelRidge(MultiOutputMixin, RegressorMixin, BaseEstimator):
|
||||
"""Kernel ridge regression.
|
||||
|
||||
Kernel ridge regression (KRR) combines ridge regression (linear least
|
||||
squares with l2-norm regularization) with the kernel trick. It thus
|
||||
learns a linear function in the space induced by the respective kernel and
|
||||
the data. For non-linear kernels, this corresponds to a non-linear
|
||||
function in the original space.
|
||||
|
||||
The form of the model learned by KRR is identical to support vector
|
||||
regression (SVR). However, different loss functions are used: KRR uses
|
||||
squared error loss while support vector regression uses epsilon-insensitive
|
||||
loss, both combined with l2 regularization. In contrast to SVR, fitting a
|
||||
KRR model can be done in closed-form and is typically faster for
|
||||
medium-sized datasets. On the other hand, the learned model is non-sparse
|
||||
and thus slower than SVR, which learns a sparse model for epsilon > 0, at
|
||||
prediction-time.
|
||||
|
||||
This estimator has built-in support for multi-variate regression
|
||||
(i.e., when y is a 2d-array of shape [n_samples, n_targets]).
|
||||
|
||||
Read more in the :ref:`User Guide <kernel_ridge>`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
alpha : float or array-like of shape (n_targets,), default=1.0
|
||||
Regularization strength; must be a positive float. Regularization
|
||||
improves the conditioning of the problem and reduces the variance of
|
||||
the estimates. Larger values specify stronger regularization.
|
||||
Alpha corresponds to ``1 / (2C)`` in other linear models such as
|
||||
:class:`~sklearn.linear_model.LogisticRegression` or
|
||||
:class:`~sklearn.svm.LinearSVC`. If an array is passed, penalties are
|
||||
assumed to be specific to the targets. Hence they must correspond in
|
||||
number. See :ref:`ridge_regression` for formula.
|
||||
|
||||
kernel : str or callable, default="linear"
|
||||
Kernel mapping used internally. This parameter is directly passed to
|
||||
:class:`~sklearn.metrics.pairwise.pairwise_kernels`.
|
||||
If `kernel` is a string, it must be one of the metrics
|
||||
in `pairwise.PAIRWISE_KERNEL_FUNCTIONS` or "precomputed".
|
||||
If `kernel` is "precomputed", X is assumed to be a kernel matrix.
|
||||
Alternatively, if `kernel` is a callable function, it is called on
|
||||
each pair of instances (rows) and the resulting value recorded. The
|
||||
callable should take two rows from X as input and return the
|
||||
corresponding kernel value as a single number. This means that
|
||||
callables from :mod:`sklearn.metrics.pairwise` are not allowed, as
|
||||
they operate on matrices, not single samples. Use the string
|
||||
identifying the kernel instead.
|
||||
|
||||
gamma : float, default=None
|
||||
Gamma parameter for the RBF, laplacian, polynomial, exponential chi2
|
||||
and sigmoid kernels. Interpretation of the default value is left to
|
||||
the kernel; see the documentation for sklearn.metrics.pairwise.
|
||||
Ignored by other kernels.
|
||||
|
||||
degree : float, default=3
|
||||
Degree of the polynomial kernel. Ignored by other kernels.
|
||||
|
||||
coef0 : float, default=1
|
||||
Zero coefficient for polynomial and sigmoid kernels.
|
||||
Ignored by other kernels.
|
||||
|
||||
kernel_params : dict, default=None
|
||||
Additional parameters (keyword arguments) for kernel function passed
|
||||
as callable object.
|
||||
|
||||
Attributes
|
||||
----------
|
||||
dual_coef_ : ndarray of shape (n_samples,) or (n_samples, n_targets)
|
||||
Representation of weight vector(s) in kernel space
|
||||
|
||||
X_fit_ : {ndarray, sparse matrix} of shape (n_samples, n_features)
|
||||
Training data, which is also required for prediction. If
|
||||
kernel == "precomputed" this is instead the precomputed
|
||||
training matrix, of shape (n_samples, n_samples).
|
||||
|
||||
n_features_in_ : int
|
||||
Number of features seen during :term:`fit`.
|
||||
|
||||
.. versionadded:: 0.24
|
||||
|
||||
feature_names_in_ : ndarray of shape (`n_features_in_`,)
|
||||
Names of features seen during :term:`fit`. Defined only when `X`
|
||||
has feature names that are all strings.
|
||||
|
||||
.. versionadded:: 1.0
|
||||
|
||||
See Also
|
||||
--------
|
||||
sklearn.gaussian_process.GaussianProcessRegressor : Gaussian
|
||||
Process regressor providing automatic kernel hyperparameters
|
||||
tuning and predictions uncertainty.
|
||||
sklearn.linear_model.Ridge : Linear ridge regression.
|
||||
sklearn.linear_model.RidgeCV : Ridge regression with built-in
|
||||
cross-validation.
|
||||
sklearn.svm.SVR : Support Vector Regression accepting a large variety
|
||||
of kernels.
|
||||
|
||||
References
|
||||
----------
|
||||
* Kevin P. Murphy
|
||||
"Machine Learning: A Probabilistic Perspective", The MIT Press
|
||||
chapter 14.4.3, pp. 492-493
|
||||
|
||||
Examples
|
||||
--------
|
||||
>>> from sklearn.kernel_ridge import KernelRidge
|
||||
>>> import numpy as np
|
||||
>>> n_samples, n_features = 10, 5
|
||||
>>> rng = np.random.RandomState(0)
|
||||
>>> y = rng.randn(n_samples)
|
||||
>>> X = rng.randn(n_samples, n_features)
|
||||
>>> krr = KernelRidge(alpha=1.0)
|
||||
>>> krr.fit(X, y)
|
||||
KernelRidge(alpha=1.0)
|
||||
"""
|
||||
|
||||
_parameter_constraints: dict = {
|
||||
"alpha": [Interval(Real, 0, None, closed="left"), "array-like"],
|
||||
"kernel": [
|
||||
StrOptions(set(PAIRWISE_KERNEL_FUNCTIONS.keys()) | {"precomputed"}),
|
||||
callable,
|
||||
],
|
||||
"gamma": [Interval(Real, 0, None, closed="left"), None],
|
||||
"degree": [Interval(Real, 0, None, closed="left")],
|
||||
"coef0": [Interval(Real, None, None, closed="neither")],
|
||||
"kernel_params": [dict, None],
|
||||
}
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
alpha=1,
|
||||
*,
|
||||
kernel="linear",
|
||||
gamma=None,
|
||||
degree=3,
|
||||
coef0=1,
|
||||
kernel_params=None,
|
||||
):
|
||||
self.alpha = alpha
|
||||
self.kernel = kernel
|
||||
self.gamma = gamma
|
||||
self.degree = degree
|
||||
self.coef0 = coef0
|
||||
self.kernel_params = kernel_params
|
||||
|
||||
def _get_kernel(self, X, Y=None):
|
||||
if callable(self.kernel):
|
||||
params = self.kernel_params or {}
|
||||
else:
|
||||
params = {"gamma": self.gamma, "degree": self.degree, "coef0": self.coef0}
|
||||
return pairwise_kernels(X, Y, metric=self.kernel, filter_params=True, **params)
|
||||
|
||||
def __sklearn_tags__(self):
|
||||
tags = super().__sklearn_tags__()
|
||||
tags.input_tags.sparse = True
|
||||
tags.input_tags.pairwise = self.kernel == "precomputed"
|
||||
return tags
|
||||
|
||||
@_fit_context(prefer_skip_nested_validation=True)
|
||||
def fit(self, X, y, sample_weight=None):
|
||||
"""Fit Kernel Ridge regression model.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : {array-like, sparse matrix} of shape (n_samples, n_features)
|
||||
Training data. If kernel == "precomputed" this is instead
|
||||
a precomputed kernel matrix, of shape (n_samples, n_samples).
|
||||
|
||||
y : array-like of shape (n_samples,) or (n_samples, n_targets)
|
||||
Target values.
|
||||
|
||||
sample_weight : float or array-like of shape (n_samples,), default=None
|
||||
Individual weights for each sample, ignored if None is passed.
|
||||
|
||||
Returns
|
||||
-------
|
||||
self : object
|
||||
Returns the instance itself.
|
||||
"""
|
||||
# Convert data
|
||||
X, y = validate_data(
|
||||
self, X, y, accept_sparse=("csr", "csc"), multi_output=True, y_numeric=True
|
||||
)
|
||||
if sample_weight is not None and not isinstance(sample_weight, float):
|
||||
sample_weight = _check_sample_weight(sample_weight, X)
|
||||
|
||||
K = self._get_kernel(X)
|
||||
alpha = np.atleast_1d(self.alpha)
|
||||
|
||||
ravel = False
|
||||
if len(y.shape) == 1:
|
||||
y = y.reshape(-1, 1)
|
||||
ravel = True
|
||||
|
||||
copy = self.kernel == "precomputed"
|
||||
self.dual_coef_ = _solve_cholesky_kernel(K, y, alpha, sample_weight, copy)
|
||||
if ravel:
|
||||
self.dual_coef_ = self.dual_coef_.ravel()
|
||||
|
||||
self.X_fit_ = X
|
||||
|
||||
return self
|
||||
|
||||
def predict(self, X):
|
||||
"""Predict using the kernel ridge model.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X : {array-like, sparse matrix} of shape (n_samples, n_features)
|
||||
Samples. If kernel == "precomputed" this is instead a
|
||||
precomputed kernel matrix, shape = [n_samples,
|
||||
n_samples_fitted], where n_samples_fitted is the number of
|
||||
samples used in the fitting for this estimator.
|
||||
|
||||
Returns
|
||||
-------
|
||||
C : ndarray of shape (n_samples,) or (n_samples, n_targets)
|
||||
Returns predicted values.
|
||||
"""
|
||||
check_is_fitted(self)
|
||||
X = validate_data(self, X, accept_sparse=("csr", "csc"), reset=False)
|
||||
K = self._get_kernel(X, self.X_fit_)
|
||||
return np.dot(K, self.dual_coef_)
|
||||
Reference in New Issue
Block a user