Videre
This commit is contained in:
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,846 @@
|
||||
# Authors: The scikit-learn developers
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
import warnings
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
from scipy import linalg, sparse
|
||||
|
||||
from sklearn.datasets import load_iris, make_regression, make_sparse_uncorrelated
|
||||
from sklearn.linear_model import LinearRegression
|
||||
from sklearn.linear_model._base import (
|
||||
_preprocess_data,
|
||||
_rescale_data,
|
||||
make_dataset,
|
||||
)
|
||||
from sklearn.preprocessing import add_dummy_feature
|
||||
from sklearn.utils._testing import (
|
||||
assert_allclose,
|
||||
assert_array_almost_equal,
|
||||
assert_array_equal,
|
||||
)
|
||||
from sklearn.utils.fixes import (
|
||||
COO_CONTAINERS,
|
||||
CSC_CONTAINERS,
|
||||
CSR_CONTAINERS,
|
||||
LIL_CONTAINERS,
|
||||
)
|
||||
|
||||
rtol = 1e-6
|
||||
|
||||
|
||||
def test_linear_regression():
|
||||
# Test LinearRegression on a simple dataset.
|
||||
# a simple dataset
|
||||
X = [[1], [2]]
|
||||
Y = [1, 2]
|
||||
|
||||
reg = LinearRegression()
|
||||
reg.fit(X, Y)
|
||||
|
||||
assert_array_almost_equal(reg.coef_, [1])
|
||||
assert_array_almost_equal(reg.intercept_, [0])
|
||||
assert_array_almost_equal(reg.predict(X), [1, 2])
|
||||
|
||||
# test it also for degenerate input
|
||||
X = [[1]]
|
||||
Y = [0]
|
||||
|
||||
reg = LinearRegression()
|
||||
reg.fit(X, Y)
|
||||
assert_array_almost_equal(reg.coef_, [0])
|
||||
assert_array_almost_equal(reg.intercept_, [0])
|
||||
assert_array_almost_equal(reg.predict(X), [0])
|
||||
|
||||
|
||||
@pytest.mark.parametrize("sparse_container", [None] + CSR_CONTAINERS)
|
||||
@pytest.mark.parametrize("fit_intercept", [True, False])
|
||||
def test_linear_regression_sample_weights(
|
||||
sparse_container, fit_intercept, global_random_seed
|
||||
):
|
||||
rng = np.random.RandomState(global_random_seed)
|
||||
|
||||
# It would not work with under-determined systems
|
||||
n_samples, n_features = 6, 5
|
||||
|
||||
X = rng.normal(size=(n_samples, n_features))
|
||||
if sparse_container is not None:
|
||||
X = sparse_container(X)
|
||||
y = rng.normal(size=n_samples)
|
||||
|
||||
sample_weight = 1.0 + rng.uniform(size=n_samples)
|
||||
|
||||
# LinearRegression with explicit sample_weight
|
||||
reg = LinearRegression(fit_intercept=fit_intercept, tol=1e-16)
|
||||
reg.fit(X, y, sample_weight=sample_weight)
|
||||
coefs1 = reg.coef_
|
||||
inter1 = reg.intercept_
|
||||
|
||||
assert reg.coef_.shape == (X.shape[1],) # sanity checks
|
||||
|
||||
# Closed form of the weighted least square
|
||||
# theta = (X^T W X)^(-1) @ X^T W y
|
||||
W = np.diag(sample_weight)
|
||||
X_aug = X if not fit_intercept else add_dummy_feature(X)
|
||||
|
||||
Xw = X_aug.T @ W @ X_aug
|
||||
yw = X_aug.T @ W @ y
|
||||
coefs2 = linalg.solve(Xw, yw)
|
||||
|
||||
if not fit_intercept:
|
||||
assert_allclose(coefs1, coefs2)
|
||||
else:
|
||||
assert_allclose(coefs1, coefs2[1:])
|
||||
assert_allclose(inter1, coefs2[0])
|
||||
|
||||
|
||||
def test_raises_value_error_if_positive_and_sparse():
|
||||
error_msg = "Sparse data was passed for X, but dense data is required."
|
||||
# X must not be sparse if positive == True
|
||||
X = sparse.eye(10)
|
||||
y = np.ones(10)
|
||||
|
||||
reg = LinearRegression(positive=True)
|
||||
|
||||
with pytest.raises(TypeError, match=error_msg):
|
||||
reg.fit(X, y)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("n_samples, n_features", [(2, 3), (3, 2)])
|
||||
def test_raises_value_error_if_sample_weights_greater_than_1d(n_samples, n_features):
|
||||
# Sample weights must be either scalar or 1D
|
||||
rng = np.random.RandomState(0)
|
||||
X = rng.randn(n_samples, n_features)
|
||||
y = rng.randn(n_samples)
|
||||
sample_weights_OK = rng.randn(n_samples) ** 2 + 1
|
||||
sample_weights_OK_1 = 1.0
|
||||
sample_weights_OK_2 = 2.0
|
||||
|
||||
reg = LinearRegression()
|
||||
|
||||
# make sure the "OK" sample weights actually work
|
||||
reg.fit(X, y, sample_weights_OK)
|
||||
reg.fit(X, y, sample_weights_OK_1)
|
||||
reg.fit(X, y, sample_weights_OK_2)
|
||||
|
||||
|
||||
def test_fit_intercept():
|
||||
# Test assertions on betas shape.
|
||||
X2 = np.array([[0.38349978, 0.61650022], [0.58853682, 0.41146318]])
|
||||
X3 = np.array(
|
||||
[[0.27677969, 0.70693172, 0.01628859], [0.08385139, 0.20692515, 0.70922346]]
|
||||
)
|
||||
y = np.array([1, 1])
|
||||
|
||||
lr2_without_intercept = LinearRegression(fit_intercept=False).fit(X2, y)
|
||||
lr2_with_intercept = LinearRegression().fit(X2, y)
|
||||
|
||||
lr3_without_intercept = LinearRegression(fit_intercept=False).fit(X3, y)
|
||||
lr3_with_intercept = LinearRegression().fit(X3, y)
|
||||
|
||||
assert lr2_with_intercept.coef_.shape == lr2_without_intercept.coef_.shape
|
||||
assert lr3_with_intercept.coef_.shape == lr3_without_intercept.coef_.shape
|
||||
assert lr2_without_intercept.coef_.ndim == lr3_without_intercept.coef_.ndim
|
||||
|
||||
|
||||
def test_linear_regression_sparse(global_random_seed):
|
||||
# Test that linear regression also works with sparse data
|
||||
rng = np.random.RandomState(global_random_seed)
|
||||
n = 100
|
||||
X = sparse.eye(n, n)
|
||||
beta = rng.rand(n)
|
||||
y = X @ beta
|
||||
|
||||
ols = LinearRegression()
|
||||
ols.fit(X, y.ravel())
|
||||
assert_array_almost_equal(beta, ols.coef_ + ols.intercept_)
|
||||
|
||||
assert_array_almost_equal(ols.predict(X) - y.ravel(), 0)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("fit_intercept", [True, False])
|
||||
@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
|
||||
def test_linear_regression_sparse_equal_dense(fit_intercept, csr_container):
|
||||
# Test that linear regression agrees between sparse and dense
|
||||
rng = np.random.RandomState(0)
|
||||
n_samples = 200
|
||||
n_features = 2
|
||||
X = rng.randn(n_samples, n_features)
|
||||
X[X < 0.1] = 0.0
|
||||
Xcsr = csr_container(X)
|
||||
y = rng.rand(n_samples)
|
||||
params = dict(fit_intercept=fit_intercept)
|
||||
clf_dense = LinearRegression(**params)
|
||||
clf_sparse = LinearRegression(**params)
|
||||
clf_dense.fit(X, y)
|
||||
clf_sparse.fit(Xcsr, y)
|
||||
assert clf_dense.intercept_ == pytest.approx(clf_sparse.intercept_)
|
||||
assert_allclose(clf_dense.coef_, clf_sparse.coef_)
|
||||
|
||||
|
||||
def test_linear_regression_multiple_outcome():
|
||||
# Test multiple-outcome linear regressions
|
||||
rng = np.random.RandomState(0)
|
||||
X, y = make_regression(random_state=rng)
|
||||
|
||||
Y = np.vstack((y, y)).T
|
||||
n_features = X.shape[1]
|
||||
|
||||
reg = LinearRegression()
|
||||
reg.fit((X), Y)
|
||||
assert reg.coef_.shape == (2, n_features)
|
||||
Y_pred = reg.predict(X)
|
||||
reg.fit(X, y)
|
||||
y_pred = reg.predict(X)
|
||||
assert_array_almost_equal(np.vstack((y_pred, y_pred)).T, Y_pred, decimal=3)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("coo_container", COO_CONTAINERS)
|
||||
def test_linear_regression_sparse_multiple_outcome(global_random_seed, coo_container):
|
||||
# Test multiple-outcome linear regressions with sparse data
|
||||
rng = np.random.RandomState(global_random_seed)
|
||||
X, y = make_sparse_uncorrelated(random_state=rng)
|
||||
X = coo_container(X)
|
||||
Y = np.vstack((y, y)).T
|
||||
n_features = X.shape[1]
|
||||
|
||||
ols = LinearRegression()
|
||||
ols.fit(X, Y)
|
||||
assert ols.coef_.shape == (2, n_features)
|
||||
Y_pred = ols.predict(X)
|
||||
ols.fit(X, y.ravel())
|
||||
y_pred = ols.predict(X)
|
||||
assert_array_almost_equal(np.vstack((y_pred, y_pred)).T, Y_pred, decimal=3)
|
||||
|
||||
|
||||
def test_linear_regression_positive():
|
||||
# Test nonnegative LinearRegression on a simple dataset.
|
||||
X = [[1], [2]]
|
||||
y = [1, 2]
|
||||
|
||||
reg = LinearRegression(positive=True)
|
||||
reg.fit(X, y)
|
||||
|
||||
assert_array_almost_equal(reg.coef_, [1])
|
||||
assert_array_almost_equal(reg.intercept_, [0])
|
||||
assert_array_almost_equal(reg.predict(X), [1, 2])
|
||||
|
||||
# test it also for degenerate input
|
||||
X = [[1]]
|
||||
y = [0]
|
||||
|
||||
reg = LinearRegression(positive=True)
|
||||
reg.fit(X, y)
|
||||
assert_allclose(reg.coef_, [0])
|
||||
assert_allclose(reg.intercept_, [0])
|
||||
assert_allclose(reg.predict(X), [0])
|
||||
|
||||
|
||||
def test_linear_regression_positive_multiple_outcome(global_random_seed):
|
||||
# Test multiple-outcome nonnegative linear regressions
|
||||
rng = np.random.RandomState(global_random_seed)
|
||||
X, y = make_sparse_uncorrelated(random_state=rng)
|
||||
Y = np.vstack((y, y)).T
|
||||
n_features = X.shape[1]
|
||||
|
||||
ols = LinearRegression(positive=True)
|
||||
ols.fit(X, Y)
|
||||
assert ols.coef_.shape == (2, n_features)
|
||||
assert np.all(ols.coef_ >= 0.0)
|
||||
Y_pred = ols.predict(X)
|
||||
ols.fit(X, y.ravel())
|
||||
y_pred = ols.predict(X)
|
||||
assert_allclose(np.vstack((y_pred, y_pred)).T, Y_pred)
|
||||
|
||||
|
||||
def test_linear_regression_positive_vs_nonpositive(global_random_seed):
|
||||
# Test differences with LinearRegression when positive=False.
|
||||
rng = np.random.RandomState(global_random_seed)
|
||||
X, y = make_sparse_uncorrelated(random_state=rng)
|
||||
|
||||
reg = LinearRegression(positive=True)
|
||||
reg.fit(X, y)
|
||||
regn = LinearRegression(positive=False)
|
||||
regn.fit(X, y)
|
||||
|
||||
assert np.mean((reg.coef_ - regn.coef_) ** 2) > 1e-3
|
||||
|
||||
|
||||
def test_linear_regression_positive_vs_nonpositive_when_positive(global_random_seed):
|
||||
# Test LinearRegression fitted coefficients
|
||||
# when the problem is positive.
|
||||
rng = np.random.RandomState(global_random_seed)
|
||||
n_samples = 200
|
||||
n_features = 4
|
||||
X = rng.rand(n_samples, n_features)
|
||||
y = X[:, 0] + 2 * X[:, 1] + 3 * X[:, 2] + 1.5 * X[:, 3]
|
||||
|
||||
reg = LinearRegression(positive=True)
|
||||
reg.fit(X, y)
|
||||
regn = LinearRegression(positive=False)
|
||||
regn.fit(X, y)
|
||||
|
||||
assert np.mean((reg.coef_ - regn.coef_) ** 2) < 1e-6
|
||||
|
||||
|
||||
@pytest.mark.parametrize("sparse_container", [None] + CSR_CONTAINERS)
|
||||
@pytest.mark.parametrize("use_sw", [True, False])
|
||||
def test_inplace_data_preprocessing(sparse_container, use_sw, global_random_seed):
|
||||
# Check that the data is not modified inplace by the linear regression
|
||||
# estimator.
|
||||
rng = np.random.RandomState(global_random_seed)
|
||||
original_X_data = rng.randn(10, 12)
|
||||
original_y_data = rng.randn(10, 2)
|
||||
orginal_sw_data = rng.rand(10)
|
||||
|
||||
if sparse_container is not None:
|
||||
X = sparse_container(original_X_data)
|
||||
else:
|
||||
X = original_X_data.copy()
|
||||
y = original_y_data.copy()
|
||||
# XXX: Note hat y_sparse is not supported (broken?) in the current
|
||||
# implementation of LinearRegression.
|
||||
|
||||
if use_sw:
|
||||
sample_weight = orginal_sw_data.copy()
|
||||
else:
|
||||
sample_weight = None
|
||||
|
||||
# Do not allow inplace preprocessing of X and y:
|
||||
reg = LinearRegression()
|
||||
reg.fit(X, y, sample_weight=sample_weight)
|
||||
if sparse_container is not None:
|
||||
assert_allclose(X.toarray(), original_X_data)
|
||||
else:
|
||||
assert_allclose(X, original_X_data)
|
||||
assert_allclose(y, original_y_data)
|
||||
|
||||
if use_sw:
|
||||
assert_allclose(sample_weight, orginal_sw_data)
|
||||
|
||||
# Allow inplace preprocessing of X and y
|
||||
reg = LinearRegression(copy_X=False)
|
||||
reg.fit(X, y, sample_weight=sample_weight)
|
||||
if sparse_container is not None:
|
||||
# No optimization relying on the inplace modification of sparse input
|
||||
# data has been implemented at this time.
|
||||
assert_allclose(X.toarray(), original_X_data)
|
||||
else:
|
||||
# X has been offset (and optionally rescaled by sample weights)
|
||||
# inplace. The 0.42 threshold is arbitrary and has been found to be
|
||||
# robust to any random seed in the admissible range.
|
||||
assert np.linalg.norm(X - original_X_data) > 0.42
|
||||
|
||||
# y should not have been modified inplace by LinearRegression.fit.
|
||||
assert_allclose(y, original_y_data)
|
||||
|
||||
if use_sw:
|
||||
# Sample weights have no reason to ever be modified inplace.
|
||||
assert_allclose(sample_weight, orginal_sw_data)
|
||||
|
||||
|
||||
def test_linear_regression_pd_sparse_dataframe_warning():
|
||||
pd = pytest.importorskip("pandas")
|
||||
|
||||
# Warning is raised only when some of the columns is sparse
|
||||
df = pd.DataFrame({"0": np.random.randn(10)})
|
||||
for col in range(1, 4):
|
||||
arr = np.random.randn(10)
|
||||
arr[:8] = 0
|
||||
# all columns but the first column is sparse
|
||||
if col != 0:
|
||||
arr = pd.arrays.SparseArray(arr, fill_value=0)
|
||||
df[str(col)] = arr
|
||||
|
||||
msg = "pandas.DataFrame with sparse columns found."
|
||||
|
||||
reg = LinearRegression()
|
||||
with pytest.warns(UserWarning, match=msg):
|
||||
reg.fit(df.iloc[:, 0:2], df.iloc[:, 3])
|
||||
|
||||
# does not warn when the whole dataframe is sparse
|
||||
df["0"] = pd.arrays.SparseArray(df["0"], fill_value=0)
|
||||
assert hasattr(df, "sparse")
|
||||
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("error", UserWarning)
|
||||
reg.fit(df.iloc[:, 0:2], df.iloc[:, 3])
|
||||
|
||||
|
||||
def test_preprocess_data(global_random_seed):
|
||||
rng = np.random.RandomState(global_random_seed)
|
||||
n_samples = 200
|
||||
n_features = 2
|
||||
X = rng.rand(n_samples, n_features)
|
||||
y = rng.rand(n_samples)
|
||||
expected_X_mean = np.mean(X, axis=0)
|
||||
expected_y_mean = np.mean(y, axis=0)
|
||||
|
||||
Xt, yt, X_mean, y_mean, X_scale, sqrt_sw = _preprocess_data(
|
||||
X, y, fit_intercept=False
|
||||
)
|
||||
assert_array_almost_equal(X_mean, np.zeros(n_features))
|
||||
assert_array_almost_equal(y_mean, 0)
|
||||
assert_array_almost_equal(X_scale, np.ones(n_features))
|
||||
assert sqrt_sw is None
|
||||
assert_array_almost_equal(Xt, X)
|
||||
assert_array_almost_equal(yt, y)
|
||||
|
||||
Xt, yt, X_mean, y_mean, X_scale, sqrt_sw = _preprocess_data(
|
||||
X, y, fit_intercept=True
|
||||
)
|
||||
assert_array_almost_equal(X_mean, expected_X_mean)
|
||||
assert_array_almost_equal(y_mean, expected_y_mean)
|
||||
assert_array_almost_equal(X_scale, np.ones(n_features))
|
||||
assert sqrt_sw is None
|
||||
assert_array_almost_equal(Xt, X - expected_X_mean)
|
||||
assert_array_almost_equal(yt, y - expected_y_mean)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("sparse_container", [None] + CSC_CONTAINERS)
|
||||
def test_preprocess_data_multioutput(global_random_seed, sparse_container):
|
||||
rng = np.random.RandomState(global_random_seed)
|
||||
n_samples = 200
|
||||
n_features = 3
|
||||
n_outputs = 2
|
||||
X = rng.rand(n_samples, n_features)
|
||||
y = rng.rand(n_samples, n_outputs)
|
||||
expected_y_mean = np.mean(y, axis=0)
|
||||
|
||||
if sparse_container is not None:
|
||||
X = sparse_container(X)
|
||||
|
||||
_, yt, _, y_mean, _, _ = _preprocess_data(X, y, fit_intercept=False)
|
||||
assert_array_almost_equal(y_mean, np.zeros(n_outputs))
|
||||
assert_array_almost_equal(yt, y)
|
||||
|
||||
_, yt, _, y_mean, _, _ = _preprocess_data(X, y, fit_intercept=True)
|
||||
assert_array_almost_equal(y_mean, expected_y_mean)
|
||||
assert_array_almost_equal(yt, y - y_mean)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("rescale_with_sw", [False, True])
|
||||
@pytest.mark.parametrize("sparse_container", [None] + CSR_CONTAINERS)
|
||||
def test_preprocess_data_weighted(
|
||||
rescale_with_sw, sparse_container, global_random_seed
|
||||
):
|
||||
rng = np.random.RandomState(global_random_seed)
|
||||
n_samples = 200
|
||||
n_features = 4
|
||||
# Generate random data with 50% of zero values to make sure
|
||||
# that the sparse variant of this test is actually sparse. This also
|
||||
# shifts the mean value for each columns in X further away from
|
||||
# zero.
|
||||
X = rng.rand(n_samples, n_features)
|
||||
X[X < 0.5] = 0.0
|
||||
|
||||
# Scale the first feature of X to be 10 larger than the other to
|
||||
# better check the impact of feature scaling.
|
||||
X[:, 0] *= 10
|
||||
|
||||
# Constant non-zero feature.
|
||||
X[:, 2] = 1.0
|
||||
|
||||
# Constant zero feature (non-materialized in the sparse case)
|
||||
X[:, 3] = 0.0
|
||||
y = rng.rand(n_samples)
|
||||
|
||||
sample_weight = np.abs(rng.rand(n_samples)) + 1
|
||||
expected_X_mean = np.average(X, axis=0, weights=sample_weight)
|
||||
expected_y_mean = np.average(y, axis=0, weights=sample_weight)
|
||||
|
||||
X_sample_weight_avg = np.average(X, weights=sample_weight, axis=0)
|
||||
X_sample_weight_var = np.average(
|
||||
(X - X_sample_weight_avg) ** 2, weights=sample_weight, axis=0
|
||||
)
|
||||
constant_mask = X_sample_weight_var < 10 * np.finfo(X.dtype).eps
|
||||
assert_array_equal(constant_mask, [0, 0, 1, 1])
|
||||
expected_X_scale = np.sqrt(X_sample_weight_var) * np.sqrt(sample_weight.sum())
|
||||
|
||||
# near constant features should not be scaled
|
||||
expected_X_scale[constant_mask] = 1
|
||||
|
||||
if sparse_container is not None:
|
||||
X = sparse_container(X)
|
||||
|
||||
Xt, yt, X_mean, y_mean, X_scale, sqrt_sw = _preprocess_data(
|
||||
X,
|
||||
y,
|
||||
fit_intercept=True,
|
||||
sample_weight=sample_weight,
|
||||
rescale_with_sw=rescale_with_sw,
|
||||
)
|
||||
if sparse_container is not None:
|
||||
# Simplifies asserts
|
||||
X = X.toarray()
|
||||
Xt = Xt.toarray()
|
||||
|
||||
assert_array_almost_equal(X_mean, expected_X_mean)
|
||||
assert_array_almost_equal(y_mean, expected_y_mean)
|
||||
assert_array_almost_equal(X_scale, np.ones(n_features))
|
||||
if rescale_with_sw:
|
||||
assert_allclose(sqrt_sw, np.sqrt(sample_weight))
|
||||
if sparse_container is not None:
|
||||
assert_allclose(Xt, sqrt_sw[:, None] * X)
|
||||
else:
|
||||
assert_allclose(Xt, sqrt_sw[:, None] * (X - expected_X_mean))
|
||||
assert_allclose(yt, sqrt_sw * (y - expected_y_mean))
|
||||
else:
|
||||
assert sqrt_sw is None
|
||||
if sparse_container is not None:
|
||||
assert_allclose(Xt, X)
|
||||
else:
|
||||
assert_allclose(Xt, X - expected_X_mean)
|
||||
assert_allclose(yt, y - expected_y_mean)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("lil_container", LIL_CONTAINERS)
|
||||
def test_sparse_preprocess_data_offsets(global_random_seed, lil_container):
|
||||
rng = np.random.RandomState(global_random_seed)
|
||||
n_samples = 200
|
||||
n_features = 2
|
||||
X = sparse.rand(n_samples, n_features, density=0.5, random_state=rng)
|
||||
X = lil_container(X)
|
||||
y = rng.rand(n_samples)
|
||||
XA = X.toarray()
|
||||
|
||||
Xt, yt, X_mean, y_mean, X_scale, sqrt_sw = _preprocess_data(
|
||||
X, y, fit_intercept=False
|
||||
)
|
||||
assert_array_almost_equal(X_mean, np.zeros(n_features))
|
||||
assert_array_almost_equal(y_mean, 0)
|
||||
assert_array_almost_equal(X_scale, np.ones(n_features))
|
||||
assert sqrt_sw is None
|
||||
assert_array_almost_equal(Xt.toarray(), XA)
|
||||
assert_array_almost_equal(yt, y)
|
||||
|
||||
Xt, yt, X_mean, y_mean, X_scale, sqrt_sw = _preprocess_data(
|
||||
X, y, fit_intercept=True
|
||||
)
|
||||
assert_array_almost_equal(X_mean, np.mean(XA, axis=0))
|
||||
assert_array_almost_equal(y_mean, np.mean(y, axis=0))
|
||||
assert_array_almost_equal(X_scale, np.ones(n_features))
|
||||
assert sqrt_sw is None
|
||||
assert_array_almost_equal(Xt.toarray(), XA)
|
||||
assert_array_almost_equal(yt, y - np.mean(y, axis=0))
|
||||
|
||||
|
||||
@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
|
||||
def test_csr_preprocess_data(csr_container):
|
||||
# Test output format of _preprocess_data, when input is csr
|
||||
X, y = make_regression()
|
||||
X[X < 2.5] = 0.0
|
||||
csr = csr_container(X)
|
||||
csr_, y, _, _, _, _ = _preprocess_data(csr, y, fit_intercept=True)
|
||||
assert csr_.format == "csr"
|
||||
|
||||
|
||||
@pytest.mark.parametrize("sparse_container", [None] + CSR_CONTAINERS)
|
||||
@pytest.mark.parametrize("to_copy", (True, False))
|
||||
def test_preprocess_copy_data_no_checks(sparse_container, to_copy):
|
||||
X, y = make_regression()
|
||||
X[X < 2.5] = 0.0
|
||||
|
||||
if sparse_container is not None:
|
||||
X = sparse_container(X)
|
||||
|
||||
X_, y_, _, _, _, _ = _preprocess_data(
|
||||
X, y, fit_intercept=True, copy=to_copy, check_input=False
|
||||
)
|
||||
|
||||
if to_copy and sparse_container is not None:
|
||||
assert not np.may_share_memory(X_.data, X.data)
|
||||
elif to_copy:
|
||||
assert not np.may_share_memory(X_, X)
|
||||
elif sparse_container is not None:
|
||||
assert np.may_share_memory(X_.data, X.data)
|
||||
else:
|
||||
assert np.may_share_memory(X_, X)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("rescale_with_sw", [False, True])
|
||||
@pytest.mark.parametrize("fit_intercept", [False, True])
|
||||
def test_dtype_preprocess_data(rescale_with_sw, fit_intercept, global_random_seed):
|
||||
rng = np.random.RandomState(global_random_seed)
|
||||
n_samples = 200
|
||||
n_features = 2
|
||||
X = rng.rand(n_samples, n_features)
|
||||
y = rng.rand(n_samples)
|
||||
sw = rng.rand(n_samples) + 1
|
||||
|
||||
X_32 = np.asarray(X, dtype=np.float32)
|
||||
y_32 = np.asarray(y, dtype=np.float32)
|
||||
sw_32 = np.asarray(sw, dtype=np.float32)
|
||||
X_64 = np.asarray(X, dtype=np.float64)
|
||||
y_64 = np.asarray(y, dtype=np.float64)
|
||||
sw_64 = np.asarray(sw, dtype=np.float64)
|
||||
|
||||
Xt_32, yt_32, X_mean_32, y_mean_32, X_scale_32, sqrt_sw_32 = _preprocess_data(
|
||||
X_32,
|
||||
y_32,
|
||||
fit_intercept=fit_intercept,
|
||||
sample_weight=sw_32,
|
||||
rescale_with_sw=rescale_with_sw,
|
||||
)
|
||||
|
||||
Xt_64, yt_64, X_mean_64, y_mean_64, X_scale_64, sqrt_sw_64 = _preprocess_data(
|
||||
X_64,
|
||||
y_64,
|
||||
fit_intercept=fit_intercept,
|
||||
sample_weight=sw_64,
|
||||
rescale_with_sw=rescale_with_sw,
|
||||
)
|
||||
|
||||
Xt_3264, yt_3264, X_mean_3264, y_mean_3264, X_scale_3264, sqrt_sw_3264 = (
|
||||
_preprocess_data(
|
||||
X_32,
|
||||
y_64,
|
||||
fit_intercept=fit_intercept,
|
||||
sample_weight=sw_32, # sample_weight must have same dtype as X
|
||||
rescale_with_sw=rescale_with_sw,
|
||||
)
|
||||
)
|
||||
|
||||
Xt_6432, yt_6432, X_mean_6432, y_mean_6432, X_scale_6432, sqrt_sw_6432 = (
|
||||
_preprocess_data(
|
||||
X_64,
|
||||
y_32,
|
||||
fit_intercept=fit_intercept,
|
||||
sample_weight=sw_64, # sample_weight must have same dtype as X
|
||||
rescale_with_sw=rescale_with_sw,
|
||||
)
|
||||
)
|
||||
|
||||
assert Xt_32.dtype == np.float32
|
||||
assert yt_32.dtype == np.float32
|
||||
assert X_mean_32.dtype == np.float32
|
||||
assert y_mean_32.dtype == np.float32
|
||||
assert X_scale_32.dtype == np.float32
|
||||
if rescale_with_sw:
|
||||
assert sqrt_sw_32.dtype == np.float32
|
||||
|
||||
assert Xt_64.dtype == np.float64
|
||||
assert yt_64.dtype == np.float64
|
||||
assert X_mean_64.dtype == np.float64
|
||||
assert y_mean_64.dtype == np.float64
|
||||
assert X_scale_64.dtype == np.float64
|
||||
if rescale_with_sw:
|
||||
assert sqrt_sw_64.dtype == np.float64
|
||||
|
||||
assert Xt_3264.dtype == np.float32
|
||||
assert yt_3264.dtype == np.float32
|
||||
assert X_mean_3264.dtype == np.float32
|
||||
assert y_mean_3264.dtype == np.float32
|
||||
assert X_scale_3264.dtype == np.float32
|
||||
if rescale_with_sw:
|
||||
assert sqrt_sw_3264.dtype == np.float32
|
||||
|
||||
assert Xt_6432.dtype == np.float64
|
||||
assert yt_6432.dtype == np.float64
|
||||
assert X_mean_6432.dtype == np.float64
|
||||
assert y_mean_6432.dtype == np.float64
|
||||
assert X_scale_3264.dtype == np.float32
|
||||
if rescale_with_sw:
|
||||
assert sqrt_sw_6432.dtype == np.float64
|
||||
|
||||
assert X_32.dtype == np.float32
|
||||
assert y_32.dtype == np.float32
|
||||
assert X_64.dtype == np.float64
|
||||
assert y_64.dtype == np.float64
|
||||
|
||||
assert_allclose(Xt_32, Xt_64, rtol=1e-3, atol=1e-6)
|
||||
assert_allclose(yt_32, yt_64, rtol=1e-3, atol=1e-6)
|
||||
assert_allclose(X_mean_32, X_mean_64, rtol=1e-6)
|
||||
assert_allclose(y_mean_32, y_mean_64, rtol=1e-6)
|
||||
assert_allclose(X_scale_32, X_scale_64)
|
||||
if rescale_with_sw:
|
||||
assert_allclose(sqrt_sw_32, sqrt_sw_64, rtol=1e-6)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("n_targets", [None, 2])
|
||||
@pytest.mark.parametrize("sparse_container", [None] + CSR_CONTAINERS)
|
||||
def test_rescale_data(n_targets, sparse_container, global_random_seed):
|
||||
rng = np.random.RandomState(global_random_seed)
|
||||
n_samples = 200
|
||||
n_features = 2
|
||||
|
||||
sample_weight = 1.0 + rng.rand(n_samples)
|
||||
X = rng.rand(n_samples, n_features)
|
||||
if n_targets is None:
|
||||
y = rng.rand(n_samples)
|
||||
else:
|
||||
y = rng.rand(n_samples, n_targets)
|
||||
|
||||
expected_sqrt_sw = np.sqrt(sample_weight)
|
||||
expected_rescaled_X = X * expected_sqrt_sw[:, np.newaxis]
|
||||
|
||||
if n_targets is None:
|
||||
expected_rescaled_y = y * expected_sqrt_sw
|
||||
else:
|
||||
expected_rescaled_y = y * expected_sqrt_sw[:, np.newaxis]
|
||||
|
||||
if sparse_container is not None:
|
||||
X = sparse_container(X)
|
||||
if n_targets is None:
|
||||
y = sparse_container(y.reshape(-1, 1))
|
||||
else:
|
||||
y = sparse_container(y)
|
||||
|
||||
rescaled_X, rescaled_y, sqrt_sw = _rescale_data(X, y, sample_weight)
|
||||
|
||||
assert_allclose(sqrt_sw, expected_sqrt_sw)
|
||||
|
||||
if sparse_container is not None:
|
||||
rescaled_X = rescaled_X.toarray()
|
||||
rescaled_y = rescaled_y.toarray()
|
||||
if n_targets is None:
|
||||
rescaled_y = rescaled_y.ravel()
|
||||
|
||||
assert_allclose(rescaled_X, expected_rescaled_X)
|
||||
assert_allclose(rescaled_y, expected_rescaled_y)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
|
||||
def test_fused_types_make_dataset(csr_container):
|
||||
iris = load_iris()
|
||||
|
||||
X_32 = iris.data.astype(np.float32)
|
||||
y_32 = iris.target.astype(np.float32)
|
||||
X_csr_32 = csr_container(X_32)
|
||||
sample_weight_32 = np.arange(y_32.size, dtype=np.float32)
|
||||
|
||||
X_64 = iris.data.astype(np.float64)
|
||||
y_64 = iris.target.astype(np.float64)
|
||||
X_csr_64 = csr_container(X_64)
|
||||
sample_weight_64 = np.arange(y_64.size, dtype=np.float64)
|
||||
|
||||
# array
|
||||
dataset_32, _ = make_dataset(X_32, y_32, sample_weight_32)
|
||||
dataset_64, _ = make_dataset(X_64, y_64, sample_weight_64)
|
||||
xi_32, yi_32, _, _ = dataset_32._next_py()
|
||||
xi_64, yi_64, _, _ = dataset_64._next_py()
|
||||
xi_data_32, _, _ = xi_32
|
||||
xi_data_64, _, _ = xi_64
|
||||
|
||||
assert xi_data_32.dtype == np.float32
|
||||
assert xi_data_64.dtype == np.float64
|
||||
assert_allclose(yi_64, yi_32, rtol=rtol)
|
||||
|
||||
# csr
|
||||
datasetcsr_32, _ = make_dataset(X_csr_32, y_32, sample_weight_32)
|
||||
datasetcsr_64, _ = make_dataset(X_csr_64, y_64, sample_weight_64)
|
||||
xicsr_32, yicsr_32, _, _ = datasetcsr_32._next_py()
|
||||
xicsr_64, yicsr_64, _, _ = datasetcsr_64._next_py()
|
||||
xicsr_data_32, _, _ = xicsr_32
|
||||
xicsr_data_64, _, _ = xicsr_64
|
||||
|
||||
assert xicsr_data_32.dtype == np.float32
|
||||
assert xicsr_data_64.dtype == np.float64
|
||||
|
||||
assert_allclose(xicsr_data_64, xicsr_data_32, rtol=rtol)
|
||||
assert_allclose(yicsr_64, yicsr_32, rtol=rtol)
|
||||
|
||||
assert_array_equal(xi_data_32, xicsr_data_32)
|
||||
assert_array_equal(xi_data_64, xicsr_data_64)
|
||||
assert_array_equal(yi_32, yicsr_32)
|
||||
assert_array_equal(yi_64, yicsr_64)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("X_shape", [(10, 5), (10, 20), (100, 100)])
|
||||
@pytest.mark.parametrize(
|
||||
"sparse_container",
|
||||
[None]
|
||||
+ [
|
||||
pytest.param(
|
||||
container,
|
||||
marks=pytest.mark.xfail(
|
||||
reason="Known to fail for CSR arrays, see issue #30131."
|
||||
),
|
||||
)
|
||||
for container in CSR_CONTAINERS
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("fit_intercept", [False, True])
|
||||
def test_linear_regression_sample_weight_consistency(
|
||||
X_shape, sparse_container, fit_intercept, global_random_seed
|
||||
):
|
||||
"""Test that the impact of sample_weight is consistent.
|
||||
|
||||
Note that this test is stricter than the common test
|
||||
check_sample_weight_equivalence alone and also tests sparse X.
|
||||
It is very similar to test_enet_sample_weight_consistency.
|
||||
"""
|
||||
rng = np.random.RandomState(global_random_seed)
|
||||
n_samples, n_features = X_shape
|
||||
|
||||
X = rng.rand(n_samples, n_features)
|
||||
y = rng.rand(n_samples)
|
||||
if sparse_container is not None:
|
||||
X = sparse_container(X)
|
||||
params = dict(fit_intercept=fit_intercept)
|
||||
|
||||
reg = LinearRegression(**params).fit(X, y, sample_weight=None)
|
||||
coef = reg.coef_.copy()
|
||||
if fit_intercept:
|
||||
intercept = reg.intercept_
|
||||
|
||||
# 1) sample_weight=np.ones(..) must be equivalent to sample_weight=None,
|
||||
# a special case of check_sample_weight_equivalence(name, reg), but we also
|
||||
# test with sparse input.
|
||||
sample_weight = np.ones_like(y)
|
||||
reg.fit(X, y, sample_weight=sample_weight)
|
||||
assert_allclose(reg.coef_, coef, rtol=1e-6)
|
||||
if fit_intercept:
|
||||
assert_allclose(reg.intercept_, intercept)
|
||||
|
||||
# 2) sample_weight=None should be equivalent to sample_weight = number
|
||||
sample_weight = 123.0
|
||||
reg.fit(X, y, sample_weight=sample_weight)
|
||||
assert_allclose(reg.coef_, coef, rtol=1e-6)
|
||||
if fit_intercept:
|
||||
assert_allclose(reg.intercept_, intercept)
|
||||
|
||||
# 3) scaling of sample_weight should have no effect, cf. np.average()
|
||||
sample_weight = rng.uniform(low=0.01, high=2, size=X.shape[0])
|
||||
reg = reg.fit(X, y, sample_weight=sample_weight)
|
||||
coef = reg.coef_.copy()
|
||||
if fit_intercept:
|
||||
intercept = reg.intercept_
|
||||
|
||||
reg.fit(X, y, sample_weight=np.pi * sample_weight)
|
||||
assert_allclose(reg.coef_, coef, rtol=1e-6 if sparse_container is None else 1e-5)
|
||||
if fit_intercept:
|
||||
assert_allclose(reg.intercept_, intercept)
|
||||
|
||||
# 4) setting elements of sample_weight to 0 is equivalent to removing these samples
|
||||
sample_weight_0 = sample_weight.copy()
|
||||
sample_weight_0[-5:] = 0
|
||||
y[-5:] *= 1000 # to make excluding those samples important
|
||||
reg.fit(X, y, sample_weight=sample_weight_0)
|
||||
coef_0 = reg.coef_.copy()
|
||||
if fit_intercept:
|
||||
intercept_0 = reg.intercept_
|
||||
reg.fit(X[:-5], y[:-5], sample_weight=sample_weight[:-5])
|
||||
assert_allclose(reg.coef_, coef_0, rtol=1e-5)
|
||||
if fit_intercept:
|
||||
assert_allclose(reg.intercept_, intercept_0)
|
||||
|
||||
# 5) check that multiplying sample_weight by 2 is equivalent to repeating
|
||||
# corresponding samples twice
|
||||
if sparse_container is not None:
|
||||
X2 = sparse.vstack([X, X[: n_samples // 2]], format="csc")
|
||||
else:
|
||||
X2 = np.concatenate([X, X[: n_samples // 2]], axis=0)
|
||||
y2 = np.concatenate([y, y[: n_samples // 2]])
|
||||
sample_weight_1 = sample_weight.copy()
|
||||
sample_weight_1[: n_samples // 2] *= 2
|
||||
sample_weight_2 = np.concatenate(
|
||||
[sample_weight, sample_weight[: n_samples // 2]], axis=0
|
||||
)
|
||||
|
||||
reg1 = LinearRegression(**params).fit(X, y, sample_weight=sample_weight_1)
|
||||
reg2 = LinearRegression(**params).fit(X2, y2, sample_weight=sample_weight_2)
|
||||
assert_allclose(reg1.coef_, reg2.coef_, rtol=1e-6)
|
||||
if fit_intercept:
|
||||
assert_allclose(reg1.intercept_, reg2.intercept_)
|
||||
@@ -0,0 +1,314 @@
|
||||
# Authors: The scikit-learn developers
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
from math import log
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from sklearn import datasets
|
||||
from sklearn.linear_model import ARDRegression, BayesianRidge, Ridge
|
||||
from sklearn.utils import check_random_state
|
||||
from sklearn.utils._testing import (
|
||||
_convert_container,
|
||||
assert_allclose,
|
||||
assert_almost_equal,
|
||||
assert_array_almost_equal,
|
||||
assert_array_less,
|
||||
)
|
||||
from sklearn.utils.extmath import fast_logdet
|
||||
|
||||
diabetes = datasets.load_diabetes()
|
||||
|
||||
|
||||
def test_bayesian_ridge_scores():
|
||||
"""Check scores attribute shape"""
|
||||
X, y = diabetes.data, diabetes.target
|
||||
|
||||
clf = BayesianRidge(compute_score=True)
|
||||
clf.fit(X, y)
|
||||
|
||||
assert clf.scores_.shape == (clf.n_iter_ + 1,)
|
||||
|
||||
|
||||
def test_bayesian_ridge_score_values():
|
||||
"""Check value of score on toy example.
|
||||
|
||||
Compute log marginal likelihood with equation (36) in Sparse Bayesian
|
||||
Learning and the Relevance Vector Machine (Tipping, 2001):
|
||||
|
||||
- 0.5 * (log |Id/alpha + X.X^T/lambda| +
|
||||
y^T.(Id/alpha + X.X^T/lambda).y + n * log(2 * pi))
|
||||
+ lambda_1 * log(lambda) - lambda_2 * lambda
|
||||
+ alpha_1 * log(alpha) - alpha_2 * alpha
|
||||
|
||||
and check equality with the score computed during training.
|
||||
"""
|
||||
|
||||
X, y = diabetes.data, diabetes.target
|
||||
n_samples = X.shape[0]
|
||||
# check with initial values of alpha and lambda (see code for the values)
|
||||
eps = np.finfo(np.float64).eps
|
||||
alpha_ = 1.0 / (np.var(y) + eps)
|
||||
lambda_ = 1.0
|
||||
|
||||
# value of the parameters of the Gamma hyperpriors
|
||||
alpha_1 = 0.1
|
||||
alpha_2 = 0.1
|
||||
lambda_1 = 0.1
|
||||
lambda_2 = 0.1
|
||||
|
||||
# compute score using formula of docstring
|
||||
score = lambda_1 * log(lambda_) - lambda_2 * lambda_
|
||||
score += alpha_1 * log(alpha_) - alpha_2 * alpha_
|
||||
M = 1.0 / alpha_ * np.eye(n_samples) + 1.0 / lambda_ * np.dot(X, X.T)
|
||||
M_inv_dot_y = np.linalg.solve(M, y)
|
||||
score += -0.5 * (
|
||||
fast_logdet(M) + np.dot(y.T, M_inv_dot_y) + n_samples * log(2 * np.pi)
|
||||
)
|
||||
|
||||
# compute score with BayesianRidge
|
||||
clf = BayesianRidge(
|
||||
alpha_1=alpha_1,
|
||||
alpha_2=alpha_2,
|
||||
lambda_1=lambda_1,
|
||||
lambda_2=lambda_2,
|
||||
max_iter=1,
|
||||
fit_intercept=False,
|
||||
compute_score=True,
|
||||
)
|
||||
clf.fit(X, y)
|
||||
|
||||
assert_almost_equal(clf.scores_[0], score, decimal=9)
|
||||
|
||||
|
||||
def test_bayesian_ridge_parameter():
|
||||
# Test correctness of lambda_ and alpha_ parameters (GitHub issue #8224)
|
||||
X = np.array([[1, 1], [3, 4], [5, 7], [4, 1], [2, 6], [3, 10], [3, 2]])
|
||||
y = np.array([1, 2, 3, 2, 0, 4, 5]).T
|
||||
|
||||
# A Ridge regression model using an alpha value equal to the ratio of
|
||||
# lambda_ and alpha_ from the Bayesian Ridge model must be identical
|
||||
br_model = BayesianRidge(compute_score=True).fit(X, y)
|
||||
rr_model = Ridge(alpha=br_model.lambda_ / br_model.alpha_).fit(X, y)
|
||||
assert_array_almost_equal(rr_model.coef_, br_model.coef_)
|
||||
assert_almost_equal(rr_model.intercept_, br_model.intercept_)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("n_samples, n_features", [(10, 20), (20, 10)])
|
||||
def test_bayesian_covariance_matrix(n_samples, n_features, global_random_seed):
|
||||
"""Check the posterior covariance matrix sigma_
|
||||
|
||||
Non-regression test for https://github.com/scikit-learn/scikit-learn/issues/31093
|
||||
"""
|
||||
X, y = datasets.make_regression(
|
||||
n_samples, n_features, random_state=global_random_seed
|
||||
)
|
||||
reg = BayesianRidge(fit_intercept=False).fit(X, y)
|
||||
covariance_matrix = np.linalg.inv(
|
||||
reg.lambda_ * np.identity(n_features) + reg.alpha_ * np.dot(X.T, X)
|
||||
)
|
||||
assert_allclose(reg.sigma_, covariance_matrix, rtol=1e-6)
|
||||
|
||||
|
||||
def test_bayesian_sample_weights():
|
||||
# Test correctness of the sample_weights method
|
||||
X = np.array([[1, 1], [3, 4], [5, 7], [4, 1], [2, 6], [3, 10], [3, 2]])
|
||||
y = np.array([1, 2, 3, 2, 0, 4, 5]).T
|
||||
w = np.array([4, 3, 3, 1, 1, 2, 3]).T
|
||||
|
||||
# A Ridge regression model using an alpha value equal to the ratio of
|
||||
# lambda_ and alpha_ from the Bayesian Ridge model must be identical
|
||||
br_model = BayesianRidge(compute_score=True).fit(X, y, sample_weight=w)
|
||||
rr_model = Ridge(alpha=br_model.lambda_ / br_model.alpha_).fit(
|
||||
X, y, sample_weight=w
|
||||
)
|
||||
assert_array_almost_equal(rr_model.coef_, br_model.coef_)
|
||||
assert_almost_equal(rr_model.intercept_, br_model.intercept_)
|
||||
|
||||
|
||||
def test_toy_bayesian_ridge_object():
|
||||
# Test BayesianRidge on toy
|
||||
X = np.array([[1], [2], [6], [8], [10]])
|
||||
Y = np.array([1, 2, 6, 8, 10])
|
||||
clf = BayesianRidge(compute_score=True)
|
||||
clf.fit(X, Y)
|
||||
|
||||
# Check that the model could approximately learn the identity function
|
||||
test = [[1], [3], [4]]
|
||||
assert_array_almost_equal(clf.predict(test), [1, 3, 4], 2)
|
||||
|
||||
|
||||
def test_bayesian_initial_params():
|
||||
# Test BayesianRidge with initial values (alpha_init, lambda_init)
|
||||
X = np.vander(np.linspace(0, 4, 5), 4)
|
||||
y = np.array([0.0, 1.0, 0.0, -1.0, 0.0]) # y = (x^3 - 6x^2 + 8x) / 3
|
||||
|
||||
# In this case, starting from the default initial values will increase
|
||||
# the bias of the fitted curve. So, lambda_init should be small.
|
||||
reg = BayesianRidge(alpha_init=1.0, lambda_init=1e-3)
|
||||
# Check the R2 score nearly equals to one.
|
||||
r2 = reg.fit(X, y).score(X, y)
|
||||
assert_almost_equal(r2, 1.0)
|
||||
|
||||
|
||||
def test_prediction_bayesian_ridge_ard_with_constant_input():
|
||||
# Test BayesianRidge and ARDRegression predictions for edge case of
|
||||
# constant target vectors
|
||||
n_samples = 4
|
||||
n_features = 5
|
||||
random_state = check_random_state(42)
|
||||
constant_value = random_state.rand()
|
||||
X = random_state.random_sample((n_samples, n_features))
|
||||
y = np.full(n_samples, constant_value, dtype=np.array(constant_value).dtype)
|
||||
expected = np.full(n_samples, constant_value, dtype=np.array(constant_value).dtype)
|
||||
|
||||
for clf in [BayesianRidge(), ARDRegression()]:
|
||||
y_pred = clf.fit(X, y).predict(X)
|
||||
assert_array_almost_equal(y_pred, expected)
|
||||
|
||||
|
||||
def test_std_bayesian_ridge_ard_with_constant_input():
|
||||
# Test BayesianRidge and ARDRegression standard dev. for edge case of
|
||||
# constant target vector
|
||||
# The standard dev. should be relatively small (< 0.01 is tested here)
|
||||
n_samples = 10
|
||||
n_features = 5
|
||||
random_state = check_random_state(42)
|
||||
constant_value = random_state.rand()
|
||||
X = random_state.random_sample((n_samples, n_features))
|
||||
y = np.full(n_samples, constant_value, dtype=np.array(constant_value).dtype)
|
||||
expected_upper_boundary = 0.01
|
||||
|
||||
for clf in [BayesianRidge(), ARDRegression()]:
|
||||
_, y_std = clf.fit(X, y).predict(X, return_std=True)
|
||||
assert_array_less(y_std, expected_upper_boundary)
|
||||
|
||||
|
||||
def test_update_of_sigma_in_ard():
|
||||
# Checks that `sigma_` is updated correctly after the last iteration
|
||||
# of the ARDRegression algorithm. See issue #10128.
|
||||
X = np.array([[1, 0], [0, 0]])
|
||||
y = np.array([0, 0])
|
||||
clf = ARDRegression(max_iter=1)
|
||||
clf.fit(X, y)
|
||||
# With the inputs above, ARDRegression prunes both of the two coefficients
|
||||
# in the first iteration. Hence, the expected shape of `sigma_` is (0, 0).
|
||||
assert clf.sigma_.shape == (0, 0)
|
||||
# Ensure that no error is thrown at prediction stage
|
||||
clf.predict(X, return_std=True)
|
||||
|
||||
|
||||
def test_toy_ard_object():
|
||||
# Test BayesianRegression ARD classifier
|
||||
X = np.array([[1], [2], [3]])
|
||||
Y = np.array([1, 2, 3])
|
||||
clf = ARDRegression(compute_score=True)
|
||||
clf.fit(X, Y)
|
||||
|
||||
# Check that the model could approximately learn the identity function
|
||||
test = [[1], [3], [4]]
|
||||
assert_array_almost_equal(clf.predict(test), [1, 3, 4], 2)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("n_samples, n_features", ((10, 100), (100, 10)))
|
||||
def test_ard_accuracy_on_easy_problem(global_random_seed, n_samples, n_features):
|
||||
# Check that ARD converges with reasonable accuracy on an easy problem
|
||||
# (Github issue #14055)
|
||||
X = np.random.RandomState(global_random_seed).normal(size=(250, 3))
|
||||
y = X[:, 1]
|
||||
|
||||
regressor = ARDRegression()
|
||||
regressor.fit(X, y)
|
||||
|
||||
abs_coef_error = np.abs(1 - regressor.coef_[1])
|
||||
assert abs_coef_error < 1e-10
|
||||
|
||||
|
||||
@pytest.mark.parametrize("constructor_name", ["array", "dataframe"])
|
||||
def test_return_std(constructor_name):
|
||||
# Test return_std option for both Bayesian regressors
|
||||
def f(X):
|
||||
return np.dot(X, w) + b
|
||||
|
||||
def f_noise(X, noise_mult):
|
||||
return f(X) + np.random.randn(X.shape[0]) * noise_mult
|
||||
|
||||
d = 5
|
||||
n_train = 50
|
||||
n_test = 10
|
||||
|
||||
w = np.array([1.0, 0.0, 1.0, -1.0, 0.0])
|
||||
b = 1.0
|
||||
|
||||
X = np.random.random((n_train, d))
|
||||
X = _convert_container(X, constructor_name)
|
||||
|
||||
X_test = np.random.random((n_test, d))
|
||||
X_test = _convert_container(X_test, constructor_name)
|
||||
|
||||
for decimal, noise_mult in enumerate([1, 0.1, 0.01]):
|
||||
y = f_noise(X, noise_mult)
|
||||
|
||||
m1 = BayesianRidge()
|
||||
m1.fit(X, y)
|
||||
y_mean1, y_std1 = m1.predict(X_test, return_std=True)
|
||||
assert_array_almost_equal(y_std1, noise_mult, decimal=decimal)
|
||||
|
||||
m2 = ARDRegression()
|
||||
m2.fit(X, y)
|
||||
y_mean2, y_std2 = m2.predict(X_test, return_std=True)
|
||||
assert_array_almost_equal(y_std2, noise_mult, decimal=decimal)
|
||||
|
||||
|
||||
def test_update_sigma(global_random_seed):
|
||||
# make sure the two update_sigma() helpers are equivalent. The woodbury
|
||||
# formula is used when n_samples < n_features, and the other one is used
|
||||
# otherwise.
|
||||
|
||||
rng = np.random.RandomState(global_random_seed)
|
||||
|
||||
# set n_samples == n_features to avoid instability issues when inverting
|
||||
# the matrices. Using the woodbury formula would be unstable when
|
||||
# n_samples > n_features
|
||||
n_samples = n_features = 10
|
||||
X = rng.randn(n_samples, n_features)
|
||||
alpha = 1
|
||||
lmbda = np.arange(1, n_features + 1)
|
||||
keep_lambda = np.array([True] * n_features)
|
||||
|
||||
reg = ARDRegression()
|
||||
|
||||
sigma = reg._update_sigma(X, alpha, lmbda, keep_lambda)
|
||||
sigma_woodbury = reg._update_sigma_woodbury(X, alpha, lmbda, keep_lambda)
|
||||
|
||||
np.testing.assert_allclose(sigma, sigma_woodbury)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("dtype", [np.float32, np.float64])
|
||||
@pytest.mark.parametrize("Estimator", [BayesianRidge, ARDRegression])
|
||||
def test_dtype_match(dtype, Estimator):
|
||||
# Test that np.float32 input data is not cast to np.float64 when possible
|
||||
X = np.array([[1, 1], [3, 4], [5, 7], [4, 1], [2, 6], [3, 10], [3, 2]], dtype=dtype)
|
||||
y = np.array([1, 2, 3, 2, 0, 4, 5]).T
|
||||
|
||||
model = Estimator()
|
||||
# check type consistency
|
||||
model.fit(X, y)
|
||||
attributes = ["coef_", "sigma_"]
|
||||
for attribute in attributes:
|
||||
assert getattr(model, attribute).dtype == X.dtype
|
||||
|
||||
y_mean, y_std = model.predict(X, return_std=True)
|
||||
assert y_mean.dtype == X.dtype
|
||||
assert y_std.dtype == X.dtype
|
||||
|
||||
|
||||
@pytest.mark.parametrize("Estimator", [BayesianRidge, ARDRegression])
|
||||
def test_dtype_correctness(Estimator):
|
||||
X = np.array([[1, 1], [3, 4], [5, 7], [4, 1], [2, 6], [3, 10], [3, 2]])
|
||||
y = np.array([1, 2, 3, 2, 0, 4, 5]).T
|
||||
model = Estimator()
|
||||
coef_32 = model.fit(X.astype(np.float32), y).coef_
|
||||
coef_64 = model.fit(X.astype(np.float64), y).coef_
|
||||
np.testing.assert_allclose(coef_32, coef_64, rtol=1e-4)
|
||||
@@ -0,0 +1,291 @@
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
import inspect
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from sklearn.base import clone, is_classifier
|
||||
from sklearn.datasets import make_classification, make_low_rank_matrix, make_regression
|
||||
from sklearn.linear_model import (
|
||||
ARDRegression,
|
||||
BayesianRidge,
|
||||
ElasticNet,
|
||||
ElasticNetCV,
|
||||
GammaRegressor,
|
||||
HuberRegressor,
|
||||
Lars,
|
||||
LarsCV,
|
||||
Lasso,
|
||||
LassoCV,
|
||||
LassoLars,
|
||||
LassoLarsCV,
|
||||
LassoLarsIC,
|
||||
LinearRegression,
|
||||
LogisticRegression,
|
||||
LogisticRegressionCV,
|
||||
MultiTaskElasticNet,
|
||||
MultiTaskElasticNetCV,
|
||||
MultiTaskLasso,
|
||||
MultiTaskLassoCV,
|
||||
OrthogonalMatchingPursuit,
|
||||
OrthogonalMatchingPursuitCV,
|
||||
PassiveAggressiveClassifier,
|
||||
PassiveAggressiveRegressor,
|
||||
Perceptron,
|
||||
PoissonRegressor,
|
||||
Ridge,
|
||||
RidgeClassifier,
|
||||
RidgeClassifierCV,
|
||||
RidgeCV,
|
||||
SGDClassifier,
|
||||
SGDRegressor,
|
||||
TheilSenRegressor,
|
||||
TweedieRegressor,
|
||||
)
|
||||
from sklearn.pipeline import make_pipeline
|
||||
from sklearn.preprocessing import MinMaxScaler, StandardScaler
|
||||
from sklearn.svm import LinearSVC, LinearSVR
|
||||
from sklearn.utils._testing import assert_allclose, set_random_state
|
||||
from sklearn.utils.fixes import CSR_CONTAINERS
|
||||
|
||||
|
||||
# Note: GammaRegressor() and TweedieRegressor(power != 1) have a non-canonical link.
|
||||
@pytest.mark.parametrize(
|
||||
"model",
|
||||
[
|
||||
ARDRegression(),
|
||||
BayesianRidge(),
|
||||
ElasticNet(),
|
||||
ElasticNetCV(),
|
||||
Lars(),
|
||||
LarsCV(),
|
||||
Lasso(),
|
||||
LassoCV(),
|
||||
LassoLarsCV(),
|
||||
LassoLarsIC(),
|
||||
LinearRegression(),
|
||||
# TODO: FIx SAGA which fails badly with sample_weights.
|
||||
# This is a known limitation, see:
|
||||
# https://github.com/scikit-learn/scikit-learn/issues/21305
|
||||
pytest.param(
|
||||
LogisticRegression(l1_ratio=0.5, solver="saga", tol=1e-15),
|
||||
marks=pytest.mark.xfail(reason="Missing importance sampling scheme"),
|
||||
),
|
||||
LogisticRegressionCV(tol=1e-6, use_legacy_attributes=False, l1_ratios=(0,)),
|
||||
MultiTaskElasticNet(),
|
||||
MultiTaskElasticNetCV(),
|
||||
MultiTaskLasso(),
|
||||
MultiTaskLassoCV(),
|
||||
OrthogonalMatchingPursuit(),
|
||||
OrthogonalMatchingPursuitCV(),
|
||||
PoissonRegressor(),
|
||||
Ridge(),
|
||||
RidgeCV(),
|
||||
pytest.param(
|
||||
SGDRegressor(tol=1e-15),
|
||||
marks=pytest.mark.xfail(reason="Insufficient precision."),
|
||||
),
|
||||
SGDRegressor(penalty="elasticnet", max_iter=10_000),
|
||||
TweedieRegressor(power=0), # same as Ridge
|
||||
],
|
||||
ids=lambda x: x.__class__.__name__,
|
||||
)
|
||||
@pytest.mark.parametrize("with_sample_weight", [False, True])
|
||||
def test_balance_property(model, with_sample_weight, global_random_seed):
|
||||
# Test that sum(y_predicted) == sum(y_observed) on the training set.
|
||||
# This must hold for all linear models with deviance of an exponential disperson
|
||||
# family as loss and the corresponding canonical link if fit_intercept=True.
|
||||
# Examples:
|
||||
# - squared error and identity link (most linear models)
|
||||
# - Poisson deviance with log link
|
||||
# - log loss with logit link
|
||||
# This is known as balance property or unconditional calibration/unbiasedness.
|
||||
# For reference, see Corollary 3.18, 3.20 and Chapter 5.1.5 of
|
||||
# M.V. Wuthrich and M. Merz, "Statistical Foundations of Actuarial Learning and its
|
||||
# Applications" (June 3, 2022). http://doi.org/10.2139/ssrn.3822407
|
||||
model = clone(model) # Avoid side effects from shared instances.
|
||||
if (
|
||||
with_sample_weight
|
||||
and "sample_weight" not in inspect.signature(model.fit).parameters.keys()
|
||||
):
|
||||
pytest.skip("Estimator does not support sample_weight.")
|
||||
|
||||
rel = 2e-4 # test precision
|
||||
if isinstance(model, SGDRegressor):
|
||||
rel = 1e-1
|
||||
elif hasattr(model, "solver") and model.solver == "saga":
|
||||
rel = 1e-2
|
||||
|
||||
rng = np.random.RandomState(global_random_seed)
|
||||
n_train, n_features, n_targets = 100, 10, None
|
||||
if isinstance(
|
||||
model,
|
||||
(MultiTaskElasticNet, MultiTaskElasticNetCV, MultiTaskLasso, MultiTaskLassoCV),
|
||||
):
|
||||
n_targets = 3
|
||||
X = make_low_rank_matrix(n_samples=n_train, n_features=n_features, random_state=rng)
|
||||
if n_targets:
|
||||
coef = (
|
||||
rng.uniform(low=-2, high=2, size=(n_features, n_targets))
|
||||
/ np.max(X, axis=0)[:, None]
|
||||
)
|
||||
else:
|
||||
coef = rng.uniform(low=-2, high=2, size=n_features) / np.max(X, axis=0)
|
||||
|
||||
expectation = np.exp(X @ coef + 0.5)
|
||||
y = rng.poisson(lam=expectation) + 1 # strict positive, i.e. y > 0
|
||||
if is_classifier(model):
|
||||
y = (y > expectation + 1).astype(np.float64)
|
||||
|
||||
if with_sample_weight:
|
||||
sw = rng.uniform(low=1, high=10, size=y.shape[0])
|
||||
else:
|
||||
sw = None
|
||||
|
||||
model.set_params(fit_intercept=True) # to be sure
|
||||
if with_sample_weight:
|
||||
model.fit(X, y, sample_weight=sw)
|
||||
else:
|
||||
model.fit(X, y)
|
||||
# Assert balance property.
|
||||
if is_classifier(model):
|
||||
assert np.average(model.predict_proba(X)[:, 1], weights=sw) == pytest.approx(
|
||||
np.average(y, weights=sw), rel=rel
|
||||
)
|
||||
else:
|
||||
assert np.average(model.predict(X), weights=sw, axis=0) == pytest.approx(
|
||||
np.average(y, weights=sw, axis=0), rel=rel
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:The default of 'normalize'")
|
||||
@pytest.mark.filterwarnings("ignore:lbfgs failed to converge")
|
||||
@pytest.mark.filterwarnings("ignore:A column-vector y was passed when a 1d array.*")
|
||||
@pytest.mark.parametrize(
|
||||
"Regressor",
|
||||
[
|
||||
ARDRegression,
|
||||
BayesianRidge,
|
||||
ElasticNet,
|
||||
ElasticNetCV,
|
||||
GammaRegressor,
|
||||
HuberRegressor,
|
||||
Lars,
|
||||
LarsCV,
|
||||
Lasso,
|
||||
LassoCV,
|
||||
LassoLars,
|
||||
LassoLarsCV,
|
||||
LassoLarsIC,
|
||||
LinearSVR,
|
||||
LinearRegression,
|
||||
OrthogonalMatchingPursuit,
|
||||
OrthogonalMatchingPursuitCV,
|
||||
PassiveAggressiveRegressor,
|
||||
PoissonRegressor,
|
||||
Ridge,
|
||||
RidgeCV,
|
||||
SGDRegressor,
|
||||
TheilSenRegressor,
|
||||
TweedieRegressor,
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("ndim", [1, 2])
|
||||
def test_linear_model_regressor_coef_shape(Regressor, ndim):
|
||||
"""Check the consistency of linear models `coef` shape."""
|
||||
if Regressor is LinearRegression:
|
||||
pytest.xfail("LinearRegression does not follow `coef_` shape contract!")
|
||||
|
||||
X, y = make_regression(random_state=0, n_samples=200, n_features=20)
|
||||
y = MinMaxScaler().fit_transform(y.reshape(-1, 1))[:, 0] + 1
|
||||
y = y[:, np.newaxis] if ndim == 2 else y
|
||||
|
||||
regressor = Regressor()
|
||||
set_random_state(regressor)
|
||||
regressor.fit(X, y)
|
||||
assert regressor.coef_.shape == (X.shape[1],)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
["Classifier", "params"],
|
||||
[
|
||||
(LinearSVC, {}),
|
||||
(LogisticRegression, {}),
|
||||
(
|
||||
LogisticRegressionCV,
|
||||
{
|
||||
"solver": "newton-cholesky",
|
||||
"use_legacy_attributes": False,
|
||||
"l1_ratios": (0,),
|
||||
},
|
||||
),
|
||||
(PassiveAggressiveClassifier, {}),
|
||||
(Perceptron, {}),
|
||||
(RidgeClassifier, {}),
|
||||
(RidgeClassifierCV, {}),
|
||||
(SGDClassifier, {}),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("n_classes", [2, 3])
|
||||
def test_linear_model_classifier_coef_shape(Classifier, params, n_classes):
|
||||
if Classifier in (RidgeClassifier, RidgeClassifierCV):
|
||||
pytest.xfail(f"{Classifier} does not follow `coef_` shape contract!")
|
||||
|
||||
X, y = make_classification(n_informative=10, n_classes=n_classes, random_state=0)
|
||||
n_features = X.shape[1]
|
||||
|
||||
classifier = Classifier(**params)
|
||||
set_random_state(classifier)
|
||||
classifier.fit(X, y)
|
||||
expected_shape = (1, n_features) if n_classes == 2 else (n_classes, n_features)
|
||||
assert classifier.coef_.shape == expected_shape
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"LinearModel, params",
|
||||
[
|
||||
(Lasso, {"tol": 1e-15, "alpha": 0.01}),
|
||||
(LassoCV, {"tol": 1e-15}),
|
||||
(ElasticNetCV, {"tol": 1e-15}),
|
||||
(RidgeClassifier, {"solver": "sparse_cg", "alpha": 0.1}),
|
||||
(ElasticNet, {"tol": 1e-15, "l1_ratio": 1, "alpha": 0.01}),
|
||||
(ElasticNet, {"tol": 1e-15, "l1_ratio": 1e-5, "alpha": 0.01}),
|
||||
(Ridge, {"solver": "sparse_cg", "tol": 1e-12, "alpha": 0.1}),
|
||||
(LinearRegression, {}),
|
||||
(RidgeCV, {}),
|
||||
(RidgeClassifierCV, {}),
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
|
||||
def test_model_pipeline_same_dense_and_sparse(LinearModel, params, csr_container):
|
||||
"""Test that sparse and dense linear models give same results.
|
||||
|
||||
Models use a preprocessing pipeline with a StandardScaler.
|
||||
"""
|
||||
model_dense = make_pipeline(StandardScaler(with_mean=False), LinearModel(**params))
|
||||
|
||||
model_sparse = make_pipeline(StandardScaler(with_mean=False), LinearModel(**params))
|
||||
|
||||
# prepare the data
|
||||
rng = np.random.RandomState(0)
|
||||
n_samples = 100
|
||||
n_features = 2
|
||||
X = rng.randn(n_samples, n_features)
|
||||
X[X < 0.1] = 0.0
|
||||
|
||||
X_sparse = csr_container(X)
|
||||
y = rng.rand(n_samples)
|
||||
|
||||
if is_classifier(model_dense):
|
||||
y = np.sign(y)
|
||||
|
||||
model_dense.fit(X, y)
|
||||
model_sparse.fit(X_sparse, y)
|
||||
|
||||
assert_allclose(model_sparse[1].coef_, model_dense[1].coef_, atol=1e-15)
|
||||
y_pred_dense = model_dense.predict(X)
|
||||
y_pred_sparse = model_sparse.predict(X_sparse)
|
||||
assert_allclose(y_pred_dense, y_pred_sparse)
|
||||
|
||||
assert_allclose(model_dense[1].intercept_, model_sparse[1].intercept_)
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,216 @@
|
||||
# Authors: The scikit-learn developers
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
from scipy import optimize
|
||||
|
||||
from sklearn.datasets import make_regression
|
||||
from sklearn.linear_model import HuberRegressor, LinearRegression, Ridge, SGDRegressor
|
||||
from sklearn.linear_model._huber import _huber_loss_and_gradient
|
||||
from sklearn.utils._testing import (
|
||||
assert_almost_equal,
|
||||
assert_array_almost_equal,
|
||||
assert_array_equal,
|
||||
)
|
||||
from sklearn.utils.fixes import CSR_CONTAINERS
|
||||
|
||||
|
||||
def make_regression_with_outliers(n_samples=50, n_features=20):
|
||||
rng = np.random.RandomState(0)
|
||||
# Generate data with outliers by replacing 10% of the samples with noise.
|
||||
X, y = make_regression(
|
||||
n_samples=n_samples, n_features=n_features, random_state=0, noise=0.05
|
||||
)
|
||||
|
||||
# Replace 10% of the sample with noise.
|
||||
num_noise = int(0.1 * n_samples)
|
||||
random_samples = rng.randint(0, n_samples, num_noise)
|
||||
X[random_samples, :] = 2.0 * rng.normal(0, 1, (num_noise, X.shape[1]))
|
||||
return X, y
|
||||
|
||||
|
||||
def test_huber_equals_lr_for_high_epsilon():
|
||||
# Test that Ridge matches LinearRegression for large epsilon
|
||||
X, y = make_regression_with_outliers()
|
||||
lr = LinearRegression()
|
||||
lr.fit(X, y)
|
||||
huber = HuberRegressor(epsilon=1e3, alpha=0.0)
|
||||
huber.fit(X, y)
|
||||
assert_almost_equal(huber.coef_, lr.coef_, 3)
|
||||
assert_almost_equal(huber.intercept_, lr.intercept_, 2)
|
||||
|
||||
|
||||
def test_huber_max_iter():
|
||||
X, y = make_regression_with_outliers()
|
||||
huber = HuberRegressor(max_iter=1)
|
||||
huber.fit(X, y)
|
||||
assert huber.n_iter_ == huber.max_iter
|
||||
|
||||
|
||||
def test_huber_gradient():
|
||||
# Test that the gradient calculated by _huber_loss_and_gradient is correct
|
||||
rng = np.random.RandomState(1)
|
||||
X, y = make_regression_with_outliers()
|
||||
sample_weight = rng.randint(1, 3, (y.shape[0]))
|
||||
|
||||
def loss_func(x, *args):
|
||||
return _huber_loss_and_gradient(x, *args)[0]
|
||||
|
||||
def grad_func(x, *args):
|
||||
return _huber_loss_and_gradient(x, *args)[1]
|
||||
|
||||
# Check using optimize.check_grad that the gradients are equal.
|
||||
for _ in range(5):
|
||||
# Check for both fit_intercept and otherwise.
|
||||
for n_features in [X.shape[1] + 1, X.shape[1] + 2]:
|
||||
w = rng.randn(n_features)
|
||||
w[-1] = np.abs(w[-1])
|
||||
grad_same = optimize.check_grad(
|
||||
loss_func, grad_func, w, X, y, 0.01, 0.1, sample_weight
|
||||
)
|
||||
assert_almost_equal(grad_same, 1e-6, 4)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
|
||||
def test_huber_sample_weights(csr_container):
|
||||
# Test sample_weights implementation in HuberRegressor"""
|
||||
|
||||
X, y = make_regression_with_outliers()
|
||||
huber = HuberRegressor()
|
||||
huber.fit(X, y)
|
||||
huber_coef = huber.coef_
|
||||
huber_intercept = huber.intercept_
|
||||
|
||||
# Rescale coefs before comparing with assert_array_almost_equal to make
|
||||
# sure that the number of decimal places used is somewhat insensitive to
|
||||
# the amplitude of the coefficients and therefore to the scale of the
|
||||
# data and the regularization parameter
|
||||
scale = max(np.mean(np.abs(huber.coef_)), np.mean(np.abs(huber.intercept_)))
|
||||
|
||||
huber.fit(X, y, sample_weight=np.ones(y.shape[0]))
|
||||
assert_array_almost_equal(huber.coef_ / scale, huber_coef / scale)
|
||||
assert_array_almost_equal(huber.intercept_ / scale, huber_intercept / scale)
|
||||
|
||||
X, y = make_regression_with_outliers(n_samples=5, n_features=20)
|
||||
X_new = np.vstack((X, np.vstack((X[1], X[1], X[3]))))
|
||||
y_new = np.concatenate((y, [y[1]], [y[1]], [y[3]]))
|
||||
huber.fit(X_new, y_new)
|
||||
huber_coef = huber.coef_
|
||||
huber_intercept = huber.intercept_
|
||||
sample_weight = np.ones(X.shape[0])
|
||||
sample_weight[1] = 3
|
||||
sample_weight[3] = 2
|
||||
huber.fit(X, y, sample_weight=sample_weight)
|
||||
|
||||
assert_array_almost_equal(huber.coef_ / scale, huber_coef / scale)
|
||||
assert_array_almost_equal(huber.intercept_ / scale, huber_intercept / scale)
|
||||
|
||||
# Test sparse implementation with sample weights.
|
||||
X_csr = csr_container(X)
|
||||
huber_sparse = HuberRegressor()
|
||||
huber_sparse.fit(X_csr, y, sample_weight=sample_weight)
|
||||
assert_array_almost_equal(huber_sparse.coef_ / scale, huber_coef / scale)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
|
||||
def test_huber_sparse(csr_container):
|
||||
X, y = make_regression_with_outliers()
|
||||
huber = HuberRegressor(alpha=0.1)
|
||||
huber.fit(X, y)
|
||||
|
||||
X_csr = csr_container(X)
|
||||
huber_sparse = HuberRegressor(alpha=0.1)
|
||||
huber_sparse.fit(X_csr, y)
|
||||
assert_array_almost_equal(huber_sparse.coef_, huber.coef_)
|
||||
assert_array_equal(huber.outliers_, huber_sparse.outliers_)
|
||||
|
||||
|
||||
def test_huber_scaling_invariant():
|
||||
# Test that outliers filtering is scaling independent.
|
||||
X, y = make_regression_with_outliers()
|
||||
huber = HuberRegressor(fit_intercept=False, alpha=0.0)
|
||||
huber.fit(X, y)
|
||||
n_outliers_mask_1 = huber.outliers_
|
||||
assert not np.all(n_outliers_mask_1)
|
||||
|
||||
huber.fit(X, 2.0 * y)
|
||||
n_outliers_mask_2 = huber.outliers_
|
||||
assert_array_equal(n_outliers_mask_2, n_outliers_mask_1)
|
||||
|
||||
huber.fit(2.0 * X, 2.0 * y)
|
||||
n_outliers_mask_3 = huber.outliers_
|
||||
assert_array_equal(n_outliers_mask_3, n_outliers_mask_1)
|
||||
|
||||
|
||||
def test_huber_and_sgd_same_results():
|
||||
# Test they should converge to same coefficients for same parameters
|
||||
|
||||
X, y = make_regression_with_outliers(n_samples=10, n_features=2)
|
||||
|
||||
# Fit once to find out the scale parameter. Scale down X and y by scale
|
||||
# so that the scale parameter is optimized to 1.0
|
||||
huber = HuberRegressor(fit_intercept=False, alpha=0.0, epsilon=1.35)
|
||||
huber.fit(X, y)
|
||||
X_scale = X / huber.scale_
|
||||
y_scale = y / huber.scale_
|
||||
huber.fit(X_scale, y_scale)
|
||||
assert_almost_equal(huber.scale_, 1.0, 3)
|
||||
|
||||
sgdreg = SGDRegressor(
|
||||
alpha=0.0,
|
||||
loss="huber",
|
||||
shuffle=True,
|
||||
random_state=0,
|
||||
max_iter=10000,
|
||||
fit_intercept=False,
|
||||
epsilon=1.35,
|
||||
tol=None,
|
||||
)
|
||||
sgdreg.fit(X_scale, y_scale)
|
||||
assert_array_almost_equal(huber.coef_, sgdreg.coef_, 1)
|
||||
|
||||
|
||||
def test_huber_warm_start():
|
||||
X, y = make_regression_with_outliers()
|
||||
huber_warm = HuberRegressor(alpha=1.0, max_iter=10000, warm_start=True, tol=1e-1)
|
||||
|
||||
huber_warm.fit(X, y)
|
||||
huber_warm_coef = huber_warm.coef_.copy()
|
||||
huber_warm.fit(X, y)
|
||||
|
||||
# SciPy performs the tol check after doing the coef updates, so
|
||||
# these would be almost same but not equal.
|
||||
assert_array_almost_equal(huber_warm.coef_, huber_warm_coef, 1)
|
||||
|
||||
assert huber_warm.n_iter_ == 0
|
||||
|
||||
|
||||
def test_huber_better_r2_score():
|
||||
# Test that huber returns a better r2 score than non-outliers"""
|
||||
X, y = make_regression_with_outliers()
|
||||
huber = HuberRegressor(alpha=0.01)
|
||||
huber.fit(X, y)
|
||||
linear_loss = np.dot(X, huber.coef_) + huber.intercept_ - y
|
||||
mask = np.abs(linear_loss) < huber.epsilon * huber.scale_
|
||||
huber_score = huber.score(X[mask], y[mask])
|
||||
huber_outlier_score = huber.score(X[~mask], y[~mask])
|
||||
|
||||
# The Ridge regressor should be influenced by the outliers and hence
|
||||
# give a worse score on the non-outliers as compared to the huber
|
||||
# regressor.
|
||||
ridge = Ridge(alpha=0.01)
|
||||
ridge.fit(X, y)
|
||||
ridge_score = ridge.score(X[mask], y[mask])
|
||||
ridge_outlier_score = ridge.score(X[~mask], y[~mask])
|
||||
assert huber_score > ridge_score
|
||||
|
||||
# The huber model should also fit poorly on the outliers.
|
||||
assert ridge_outlier_score > huber_outlier_score
|
||||
|
||||
|
||||
def test_huber_bool():
|
||||
# Test that it does not crash with bool data
|
||||
X, y = make_regression(n_samples=200, n_features=2, noise=4.0, random_state=0)
|
||||
X_bool = X > 0
|
||||
HuberRegressor().fit(X_bool, y)
|
||||
@@ -0,0 +1,870 @@
|
||||
import warnings
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
from scipy import linalg
|
||||
|
||||
from sklearn import datasets, linear_model
|
||||
from sklearn.base import clone
|
||||
from sklearn.exceptions import ConvergenceWarning
|
||||
from sklearn.linear_model import (
|
||||
Lars,
|
||||
LarsCV,
|
||||
LassoLars,
|
||||
LassoLarsCV,
|
||||
LassoLarsIC,
|
||||
lars_path,
|
||||
)
|
||||
from sklearn.linear_model._least_angle import _lars_path_residues
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.pipeline import make_pipeline
|
||||
from sklearn.preprocessing import StandardScaler
|
||||
from sklearn.utils._testing import (
|
||||
TempMemmap,
|
||||
assert_allclose,
|
||||
assert_array_almost_equal,
|
||||
ignore_warnings,
|
||||
)
|
||||
|
||||
# TODO: use another dataset that has multiple drops
|
||||
diabetes = datasets.load_diabetes()
|
||||
X, y = diabetes.data, diabetes.target
|
||||
G = np.dot(X.T, X)
|
||||
Xy = np.dot(X.T, y)
|
||||
n_samples = y.size
|
||||
|
||||
|
||||
def test_simple():
|
||||
# Principle of Lars is to keep covariances tied and decreasing
|
||||
|
||||
# also test verbose output
|
||||
import sys
|
||||
from io import StringIO
|
||||
|
||||
old_stdout = sys.stdout
|
||||
try:
|
||||
sys.stdout = StringIO()
|
||||
|
||||
_, _, coef_path_ = linear_model.lars_path(X, y, method="lar", verbose=10)
|
||||
|
||||
sys.stdout = old_stdout
|
||||
|
||||
for i, coef_ in enumerate(coef_path_.T):
|
||||
res = y - np.dot(X, coef_)
|
||||
cov = np.dot(X.T, res)
|
||||
C = np.max(abs(cov))
|
||||
eps = 1e-3
|
||||
ocur = len(cov[C - eps < abs(cov)])
|
||||
if i < X.shape[1]:
|
||||
assert ocur == i + 1
|
||||
else:
|
||||
# no more than max_pred variables can go into the active set
|
||||
assert ocur == X.shape[1]
|
||||
finally:
|
||||
sys.stdout = old_stdout
|
||||
|
||||
|
||||
def test_simple_precomputed():
|
||||
# The same, with precomputed Gram matrix
|
||||
|
||||
_, _, coef_path_ = linear_model.lars_path(X, y, Gram=G, method="lar")
|
||||
|
||||
for i, coef_ in enumerate(coef_path_.T):
|
||||
res = y - np.dot(X, coef_)
|
||||
cov = np.dot(X.T, res)
|
||||
C = np.max(abs(cov))
|
||||
eps = 1e-3
|
||||
ocur = len(cov[C - eps < abs(cov)])
|
||||
if i < X.shape[1]:
|
||||
assert ocur == i + 1
|
||||
else:
|
||||
# no more than max_pred variables can go into the active set
|
||||
assert ocur == X.shape[1]
|
||||
|
||||
|
||||
def _assert_same_lars_path_result(output1, output2):
|
||||
assert len(output1) == len(output2)
|
||||
for o1, o2 in zip(output1, output2):
|
||||
assert_allclose(o1, o2)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("method", ["lar", "lasso"])
|
||||
@pytest.mark.parametrize("return_path", [True, False])
|
||||
def test_lars_path_gram_equivalent(method, return_path):
|
||||
_assert_same_lars_path_result(
|
||||
linear_model.lars_path_gram(
|
||||
Xy=Xy, Gram=G, n_samples=n_samples, method=method, return_path=return_path
|
||||
),
|
||||
linear_model.lars_path(X, y, Gram=G, method=method, return_path=return_path),
|
||||
)
|
||||
|
||||
|
||||
def test_x_none_gram_none_raises_value_error():
|
||||
# Test that lars_path with no X and Gram raises exception
|
||||
Xy = np.dot(X.T, y)
|
||||
with pytest.raises(ValueError, match="X and Gram cannot both be unspecified"):
|
||||
linear_model.lars_path(None, y, Gram=None, Xy=Xy)
|
||||
|
||||
|
||||
def test_all_precomputed():
|
||||
# Test that lars_path with precomputed Gram and Xy gives the right answer
|
||||
G = np.dot(X.T, X)
|
||||
Xy = np.dot(X.T, y)
|
||||
for method in "lar", "lasso":
|
||||
output = linear_model.lars_path(X, y, method=method)
|
||||
output_pre = linear_model.lars_path(X, y, Gram=G, Xy=Xy, method=method)
|
||||
for expected, got in zip(output, output_pre):
|
||||
assert_array_almost_equal(expected, got)
|
||||
|
||||
|
||||
# TODO: remove warning filter when numpy min version >= 2.0.0
|
||||
@pytest.mark.filterwarnings("ignore: `rcond` parameter will change")
|
||||
def test_lars_lstsq():
|
||||
# Test that Lars gives least square solution at the end
|
||||
# of the path
|
||||
X1 = 3 * X # use un-normalized dataset
|
||||
clf = linear_model.LassoLars(alpha=0.0)
|
||||
clf.fit(X1, y)
|
||||
coef_lstsq = np.linalg.lstsq(X1, y)[0]
|
||||
assert_array_almost_equal(clf.coef_, coef_lstsq)
|
||||
|
||||
|
||||
# TODO: remove warning filter when numpy min version >= 2.0.0
|
||||
@pytest.mark.filterwarnings("ignore: `rcond` parameter will change")
|
||||
def test_lasso_gives_lstsq_solution():
|
||||
# Test that Lars Lasso gives least square solution at the end
|
||||
# of the path
|
||||
_, _, coef_path_ = linear_model.lars_path(X, y, method="lasso")
|
||||
coef_lstsq = np.linalg.lstsq(X, y)[0]
|
||||
assert_array_almost_equal(coef_lstsq, coef_path_[:, -1])
|
||||
|
||||
|
||||
def test_collinearity():
|
||||
# Check that lars_path is robust to collinearity in input
|
||||
X = np.array([[3.0, 3.0, 1.0], [2.0, 2.0, 0.0], [1.0, 1.0, 0]])
|
||||
y = np.array([1.0, 0.0, 0])
|
||||
rng = np.random.RandomState(0)
|
||||
|
||||
f = ignore_warnings
|
||||
_, _, coef_path_ = f(linear_model.lars_path)(X, y, alpha_min=0.01)
|
||||
assert not np.isnan(coef_path_).any()
|
||||
residual = np.dot(X, coef_path_[:, -1]) - y
|
||||
assert (residual**2).sum() < 1.0 # just make sure it's bounded
|
||||
|
||||
n_samples = 10
|
||||
X = rng.rand(n_samples, 5)
|
||||
y = np.zeros(n_samples)
|
||||
_, _, coef_path_ = linear_model.lars_path(
|
||||
X,
|
||||
y,
|
||||
Gram="auto",
|
||||
copy_X=False,
|
||||
copy_Gram=False,
|
||||
alpha_min=0.0,
|
||||
method="lasso",
|
||||
verbose=0,
|
||||
max_iter=500,
|
||||
)
|
||||
assert_array_almost_equal(coef_path_, np.zeros_like(coef_path_))
|
||||
|
||||
|
||||
def test_no_path():
|
||||
# Test that the ``return_path=False`` option returns the correct output
|
||||
alphas_, _, coef_path_ = linear_model.lars_path(X, y, method="lar")
|
||||
alpha_, _, coef = linear_model.lars_path(X, y, method="lar", return_path=False)
|
||||
|
||||
assert_array_almost_equal(coef, coef_path_[:, -1])
|
||||
assert alpha_ == alphas_[-1]
|
||||
|
||||
|
||||
def test_no_path_precomputed():
|
||||
# Test that the ``return_path=False`` option with Gram remains correct
|
||||
alphas_, _, coef_path_ = linear_model.lars_path(X, y, method="lar", Gram=G)
|
||||
alpha_, _, coef = linear_model.lars_path(
|
||||
X, y, method="lar", Gram=G, return_path=False
|
||||
)
|
||||
|
||||
assert_array_almost_equal(coef, coef_path_[:, -1])
|
||||
assert alpha_ == alphas_[-1]
|
||||
|
||||
|
||||
def test_no_path_all_precomputed():
|
||||
# Test that the ``return_path=False`` option with Gram and Xy remains
|
||||
# correct
|
||||
X, y = 3 * diabetes.data, diabetes.target
|
||||
G = np.dot(X.T, X)
|
||||
Xy = np.dot(X.T, y)
|
||||
alphas_, _, coef_path_ = linear_model.lars_path(
|
||||
X, y, method="lasso", Xy=Xy, Gram=G, alpha_min=0.9
|
||||
)
|
||||
alpha_, _, coef = linear_model.lars_path(
|
||||
X, y, method="lasso", Gram=G, Xy=Xy, alpha_min=0.9, return_path=False
|
||||
)
|
||||
|
||||
assert_array_almost_equal(coef, coef_path_[:, -1])
|
||||
assert alpha_ == alphas_[-1]
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"classifier", [linear_model.Lars, linear_model.LarsCV, linear_model.LassoLarsIC]
|
||||
)
|
||||
def test_lars_precompute(classifier):
|
||||
# Check for different values of precompute
|
||||
G = np.dot(X.T, X)
|
||||
|
||||
clf = classifier(precompute=G)
|
||||
output_1 = ignore_warnings(clf.fit)(X, y).coef_
|
||||
for precompute in [True, False, "auto", None]:
|
||||
clf = classifier(precompute=precompute)
|
||||
output_2 = clf.fit(X, y).coef_
|
||||
assert_array_almost_equal(output_1, output_2, decimal=8)
|
||||
|
||||
|
||||
def test_singular_matrix():
|
||||
# Test when input is a singular matrix
|
||||
X1 = np.array([[1, 1.0], [1.0, 1.0]])
|
||||
y1 = np.array([1, 1])
|
||||
_, _, coef_path = linear_model.lars_path(X1, y1)
|
||||
assert_array_almost_equal(coef_path.T, [[0, 0], [1, 0]])
|
||||
|
||||
|
||||
def test_rank_deficient_design():
|
||||
# consistency test that checks that LARS Lasso is handling rank
|
||||
# deficient input data (with n_features < rank) in the same way
|
||||
# as coordinate descent Lasso
|
||||
y = [5, 0, 5]
|
||||
for X in ([[5, 0], [0, 5], [10, 10]], [[10, 10, 0], [1e-32, 0, 0], [0, 0, 1]]):
|
||||
# To be able to use the coefs to compute the objective function,
|
||||
# we need to turn off normalization
|
||||
lars = linear_model.LassoLars(0.1)
|
||||
coef_lars_ = lars.fit(X, y).coef_
|
||||
obj_lars = 1.0 / (2.0 * 3.0) * linalg.norm(
|
||||
y - np.dot(X, coef_lars_)
|
||||
) ** 2 + 0.1 * linalg.norm(coef_lars_, 1)
|
||||
coord_descent = linear_model.Lasso(0.1, tol=1e-6)
|
||||
coef_cd_ = coord_descent.fit(X, y).coef_
|
||||
obj_cd = (1.0 / (2.0 * 3.0)) * linalg.norm(
|
||||
y - np.dot(X, coef_cd_)
|
||||
) ** 2 + 0.1 * linalg.norm(coef_cd_, 1)
|
||||
assert obj_lars < obj_cd * (1.0 + 1e-8)
|
||||
|
||||
|
||||
def test_lasso_lars_vs_lasso_cd():
|
||||
# Test that LassoLars and Lasso using coordinate descent give the
|
||||
# same results.
|
||||
X = 3 * diabetes.data
|
||||
|
||||
alphas, _, lasso_path = linear_model.lars_path(X, y, method="lasso")
|
||||
lasso_cd = linear_model.Lasso(fit_intercept=False, tol=1e-8)
|
||||
for c, a in zip(lasso_path.T, alphas):
|
||||
if a == 0:
|
||||
continue
|
||||
lasso_cd.alpha = a
|
||||
lasso_cd.fit(X, y)
|
||||
error = linalg.norm(c - lasso_cd.coef_)
|
||||
assert error < 0.01
|
||||
|
||||
# similar test, with the classifiers
|
||||
for alpha in np.linspace(1e-2, 1 - 1e-2, 20):
|
||||
clf1 = linear_model.LassoLars(alpha=alpha).fit(X, y)
|
||||
clf2 = linear_model.Lasso(alpha=alpha, tol=1e-8).fit(X, y)
|
||||
err = linalg.norm(clf1.coef_ - clf2.coef_)
|
||||
assert err < 1e-3
|
||||
|
||||
# same test, with normalized data
|
||||
X = diabetes.data
|
||||
X = X - X.sum(axis=0)
|
||||
X /= np.linalg.norm(X, axis=0)
|
||||
alphas, _, lasso_path = linear_model.lars_path(X, y, method="lasso")
|
||||
lasso_cd = linear_model.Lasso(fit_intercept=False, tol=1e-8)
|
||||
for c, a in zip(lasso_path.T, alphas):
|
||||
if a == 0:
|
||||
continue
|
||||
lasso_cd.alpha = a
|
||||
lasso_cd.fit(X, y)
|
||||
error = linalg.norm(c - lasso_cd.coef_)
|
||||
assert error < 0.01
|
||||
|
||||
|
||||
def test_lasso_lars_vs_lasso_cd_early_stopping():
|
||||
# Test that LassoLars and Lasso using coordinate descent give the
|
||||
# same results when early stopping is used.
|
||||
# (test : before, in the middle, and in the last part of the path)
|
||||
alphas_min = [10, 0.9, 1e-4]
|
||||
|
||||
X = diabetes.data
|
||||
|
||||
for alpha_min in alphas_min:
|
||||
alphas, _, lasso_path = linear_model.lars_path(
|
||||
X, y, method="lasso", alpha_min=alpha_min
|
||||
)
|
||||
lasso_cd = linear_model.Lasso(fit_intercept=False, tol=1e-8)
|
||||
lasso_cd.alpha = alphas[-1]
|
||||
lasso_cd.fit(X, y)
|
||||
error = linalg.norm(lasso_path[:, -1] - lasso_cd.coef_)
|
||||
assert error < 0.01
|
||||
|
||||
# same test, with normalization
|
||||
X = diabetes.data - diabetes.data.sum(axis=0)
|
||||
X /= np.linalg.norm(X, axis=0)
|
||||
|
||||
for alpha_min in alphas_min:
|
||||
alphas, _, lasso_path = linear_model.lars_path(
|
||||
X, y, method="lasso", alpha_min=alpha_min
|
||||
)
|
||||
lasso_cd = linear_model.Lasso(tol=1e-8)
|
||||
lasso_cd.alpha = alphas[-1]
|
||||
lasso_cd.fit(X, y)
|
||||
error = linalg.norm(lasso_path[:, -1] - lasso_cd.coef_)
|
||||
assert error < 0.01
|
||||
|
||||
|
||||
def test_lasso_lars_path_length():
|
||||
# Test that the path length of the LassoLars is right
|
||||
lasso = linear_model.LassoLars()
|
||||
lasso.fit(X, y)
|
||||
lasso2 = linear_model.LassoLars(alpha=lasso.alphas_[2])
|
||||
lasso2.fit(X, y)
|
||||
assert_array_almost_equal(lasso.alphas_[:3], lasso2.alphas_)
|
||||
# Also check that the sequence of alphas is always decreasing
|
||||
assert np.all(np.diff(lasso.alphas_) < 0)
|
||||
|
||||
|
||||
def test_lasso_lars_vs_lasso_cd_ill_conditioned():
|
||||
# Test lasso lars on a very ill-conditioned design, and check that
|
||||
# it does not blow up, and stays somewhat close to a solution given
|
||||
# by the coordinate descent solver
|
||||
# Also test that lasso_path (using lars_path output style) gives
|
||||
# the same result as lars_path and previous lasso output style
|
||||
# under these conditions.
|
||||
rng = np.random.RandomState(42)
|
||||
|
||||
# Generate data
|
||||
n, m = 70, 100
|
||||
k = 5
|
||||
X = rng.randn(n, m)
|
||||
w = np.zeros((m, 1))
|
||||
i = np.arange(0, m)
|
||||
rng.shuffle(i)
|
||||
supp = i[:k]
|
||||
w[supp] = np.sign(rng.randn(k, 1)) * (rng.rand(k, 1) + 1)
|
||||
y = np.dot(X, w)
|
||||
sigma = 0.2
|
||||
y += sigma * rng.rand(*y.shape)
|
||||
y = y.squeeze()
|
||||
lars_alphas, _, lars_coef = linear_model.lars_path(X, y, method="lasso")
|
||||
|
||||
_, lasso_coef2, _ = linear_model.lasso_path(X, y, alphas=lars_alphas, tol=1e-6)
|
||||
|
||||
assert_array_almost_equal(lars_coef, lasso_coef2, decimal=1)
|
||||
|
||||
|
||||
def test_lasso_lars_vs_lasso_cd_ill_conditioned2():
|
||||
# Create an ill-conditioned situation in which the LARS has to go
|
||||
# far in the path to converge, and check that LARS and coordinate
|
||||
# descent give the same answers
|
||||
# Note it used to be the case that Lars had to use the drop for good
|
||||
# strategy for this but this is no longer the case with the
|
||||
# equality_tolerance checks
|
||||
X = [[1e20, 1e20, 0], [-1e-32, 0, 0], [1, 1, 1]]
|
||||
y = [10, 10, 1]
|
||||
alpha = 0.0001
|
||||
|
||||
def objective_function(coef):
|
||||
return 1.0 / (2.0 * len(X)) * linalg.norm(
|
||||
y - np.dot(X, coef)
|
||||
) ** 2 + alpha * linalg.norm(coef, 1)
|
||||
|
||||
lars = linear_model.LassoLars(alpha=alpha)
|
||||
warning_message = "Regressors in active set degenerate."
|
||||
with pytest.warns(ConvergenceWarning, match=warning_message):
|
||||
lars.fit(X, y)
|
||||
lars_coef_ = lars.coef_
|
||||
lars_obj = objective_function(lars_coef_)
|
||||
|
||||
coord_descent = linear_model.Lasso(alpha=alpha, tol=1e-4)
|
||||
cd_coef_ = coord_descent.fit(X, y).coef_
|
||||
cd_obj = objective_function(cd_coef_)
|
||||
|
||||
assert lars_obj < cd_obj * (1.0 + 1e-8)
|
||||
|
||||
|
||||
def test_lars_add_features():
|
||||
# assure that at least some features get added if necessary
|
||||
# test for 6d2b4c
|
||||
# Hilbert matrix
|
||||
n = 5
|
||||
H = 1.0 / (np.arange(1, n + 1) + np.arange(n)[:, np.newaxis])
|
||||
clf = linear_model.Lars(fit_intercept=False).fit(H, np.arange(n))
|
||||
assert np.all(np.isfinite(clf.coef_))
|
||||
|
||||
|
||||
def test_lars_n_nonzero_coefs(verbose=False):
|
||||
lars = linear_model.Lars(n_nonzero_coefs=6, verbose=verbose)
|
||||
lars.fit(X, y)
|
||||
assert len(lars.coef_.nonzero()[0]) == 6
|
||||
# The path should be of length 6 + 1 in a Lars going down to 6
|
||||
# non-zero coefs
|
||||
assert len(lars.alphas_) == 7
|
||||
|
||||
|
||||
def test_multitarget():
|
||||
# Assure that estimators receiving multidimensional y do the right thing
|
||||
Y = np.vstack([y, y**2]).T
|
||||
n_targets = Y.shape[1]
|
||||
estimators = [
|
||||
linear_model.LassoLars(),
|
||||
linear_model.Lars(),
|
||||
# regression test for gh-1615
|
||||
linear_model.LassoLars(fit_intercept=False),
|
||||
linear_model.Lars(fit_intercept=False),
|
||||
]
|
||||
|
||||
for estimator in estimators:
|
||||
estimator.fit(X, Y)
|
||||
Y_pred = estimator.predict(X)
|
||||
alphas, active, coef, path = (
|
||||
estimator.alphas_,
|
||||
estimator.active_,
|
||||
estimator.coef_,
|
||||
estimator.coef_path_,
|
||||
)
|
||||
for k in range(n_targets):
|
||||
estimator.fit(X, Y[:, k])
|
||||
y_pred = estimator.predict(X)
|
||||
assert_array_almost_equal(alphas[k], estimator.alphas_)
|
||||
assert_array_almost_equal(active[k], estimator.active_)
|
||||
assert_array_almost_equal(coef[k], estimator.coef_)
|
||||
assert_array_almost_equal(path[k], estimator.coef_path_)
|
||||
assert_array_almost_equal(Y_pred[:, k], y_pred)
|
||||
|
||||
|
||||
def test_lars_cv():
|
||||
# Test the LassoLarsCV object by checking that the optimal alpha
|
||||
# increases as the number of samples increases.
|
||||
# This property is not actually guaranteed in general and is just a
|
||||
# property of the given dataset, with the given steps chosen.
|
||||
old_alpha = 0
|
||||
lars_cv = linear_model.LassoLarsCV()
|
||||
for length in (400, 200, 100):
|
||||
X = diabetes.data[:length]
|
||||
y = diabetes.target[:length]
|
||||
lars_cv.fit(X, y)
|
||||
np.testing.assert_array_less(old_alpha, lars_cv.alpha_)
|
||||
old_alpha = lars_cv.alpha_
|
||||
assert not hasattr(lars_cv, "n_nonzero_coefs")
|
||||
|
||||
|
||||
def test_lars_cv_max_iter(recwarn):
|
||||
warnings.simplefilter("always")
|
||||
with np.errstate(divide="raise", invalid="raise"):
|
||||
X = diabetes.data
|
||||
y = diabetes.target
|
||||
rng = np.random.RandomState(42)
|
||||
x = rng.randn(len(y))
|
||||
X = diabetes.data
|
||||
X = np.c_[X, x, x] # add correlated features
|
||||
X = StandardScaler().fit_transform(X)
|
||||
lars_cv = linear_model.LassoLarsCV(max_iter=5, cv=5)
|
||||
lars_cv.fit(X, y)
|
||||
|
||||
# Check that there is no warning in general and no ConvergenceWarning
|
||||
# in particular.
|
||||
# Materialize the string representation of the warning to get a more
|
||||
# informative error message in case of AssertionError.
|
||||
recorded_warnings = [str(w) for w in recwarn]
|
||||
assert len(recorded_warnings) == 0
|
||||
|
||||
|
||||
def test_lasso_lars_ic():
|
||||
# Test the LassoLarsIC object by checking that
|
||||
# - some good features are selected.
|
||||
# - alpha_bic > alpha_aic
|
||||
# - n_nonzero_bic < n_nonzero_aic
|
||||
lars_bic = linear_model.LassoLarsIC("bic")
|
||||
lars_aic = linear_model.LassoLarsIC("aic")
|
||||
rng = np.random.RandomState(42)
|
||||
X = diabetes.data
|
||||
X = np.c_[X, rng.randn(X.shape[0], 5)] # add 5 bad features
|
||||
X = StandardScaler().fit_transform(X)
|
||||
lars_bic.fit(X, y)
|
||||
lars_aic.fit(X, y)
|
||||
nonzero_bic = np.where(lars_bic.coef_)[0]
|
||||
nonzero_aic = np.where(lars_aic.coef_)[0]
|
||||
assert lars_bic.alpha_ > lars_aic.alpha_
|
||||
assert len(nonzero_bic) < len(nonzero_aic)
|
||||
assert np.max(nonzero_bic) < diabetes.data.shape[1]
|
||||
|
||||
|
||||
def test_lars_path_readonly_data():
|
||||
# When using automated memory mapping on large input, the
|
||||
# fold data is in read-only mode
|
||||
# This is a non-regression test for:
|
||||
# https://github.com/scikit-learn/scikit-learn/issues/4597
|
||||
splitted_data = train_test_split(X, y, random_state=42)
|
||||
with TempMemmap(splitted_data) as (X_train, X_test, y_train, y_test):
|
||||
# The following should not fail despite copy=False
|
||||
_lars_path_residues(X_train, y_train, X_test, y_test, copy=False)
|
||||
|
||||
|
||||
def test_lars_path_positive_constraint():
|
||||
# this is the main test for the positive parameter on the lars_path method
|
||||
# the estimator classes just make use of this function
|
||||
|
||||
# we do the test on the diabetes dataset
|
||||
|
||||
# ensure that we get negative coefficients when positive=False
|
||||
# and all positive when positive=True
|
||||
# for method 'lar' (default) and lasso
|
||||
|
||||
err_msg = "Positive constraint not supported for 'lar' coding method."
|
||||
with pytest.raises(ValueError, match=err_msg):
|
||||
linear_model.lars_path(
|
||||
diabetes["data"], diabetes["target"], method="lar", positive=True
|
||||
)
|
||||
|
||||
method = "lasso"
|
||||
_, _, coefs = linear_model.lars_path(
|
||||
X, y, return_path=True, method=method, positive=False
|
||||
)
|
||||
assert coefs.min() < 0
|
||||
|
||||
_, _, coefs = linear_model.lars_path(
|
||||
X, y, return_path=True, method=method, positive=True
|
||||
)
|
||||
assert coefs.min() >= 0
|
||||
|
||||
|
||||
# now we gonna test the positive option for all estimator classes
|
||||
|
||||
default_parameter = {"fit_intercept": False}
|
||||
|
||||
estimator_parameter_map = {
|
||||
"LassoLars": {"alpha": 0.1},
|
||||
"LassoLarsCV": {},
|
||||
"LassoLarsIC": {},
|
||||
}
|
||||
|
||||
|
||||
def test_estimatorclasses_positive_constraint():
|
||||
# testing the transmissibility for the positive option of all estimator
|
||||
# classes in this same function here
|
||||
default_parameter = {"fit_intercept": False}
|
||||
|
||||
estimator_parameter_map = {
|
||||
"LassoLars": {"alpha": 0.1},
|
||||
"LassoLarsCV": {},
|
||||
"LassoLarsIC": {},
|
||||
}
|
||||
for estname in estimator_parameter_map:
|
||||
params = default_parameter.copy()
|
||||
params.update(estimator_parameter_map[estname])
|
||||
estimator = getattr(linear_model, estname)(positive=False, **params)
|
||||
estimator.fit(X, y)
|
||||
assert estimator.coef_.min() < 0
|
||||
estimator = getattr(linear_model, estname)(positive=True, **params)
|
||||
estimator.fit(X, y)
|
||||
assert min(estimator.coef_) >= 0
|
||||
|
||||
|
||||
def test_lasso_lars_vs_lasso_cd_positive():
|
||||
# Test that LassoLars and Lasso using coordinate descent give the
|
||||
# same results when using the positive option
|
||||
|
||||
# This test is basically a copy of the above with additional positive
|
||||
# option. However for the middle part, the comparison of coefficient values
|
||||
# for a range of alphas, we had to make an adaptations. See below.
|
||||
|
||||
# not normalized data
|
||||
X = 3 * diabetes.data
|
||||
|
||||
alphas, _, lasso_path = linear_model.lars_path(X, y, method="lasso", positive=True)
|
||||
lasso_cd = linear_model.Lasso(fit_intercept=False, tol=1e-8, positive=True)
|
||||
for c, a in zip(lasso_path.T, alphas):
|
||||
if a == 0:
|
||||
continue
|
||||
lasso_cd.alpha = a
|
||||
lasso_cd.fit(X, y)
|
||||
error = linalg.norm(c - lasso_cd.coef_)
|
||||
assert error < 0.01
|
||||
|
||||
# The range of alphas chosen for coefficient comparison here is restricted
|
||||
# as compared with the above test without the positive option. This is due
|
||||
# to the circumstance that the Lars-Lasso algorithm does not converge to
|
||||
# the least-squares-solution for small alphas, see 'Least Angle Regression'
|
||||
# by Efron et al 2004. The coefficients are typically in congruence up to
|
||||
# the smallest alpha reached by the Lars-Lasso algorithm and start to
|
||||
# diverge thereafter. See
|
||||
# https://gist.github.com/michigraber/7e7d7c75eca694c7a6ff
|
||||
|
||||
for alpha in np.linspace(6e-1, 1 - 1e-2, 20):
|
||||
clf1 = linear_model.LassoLars(
|
||||
fit_intercept=False, alpha=alpha, positive=True
|
||||
).fit(X, y)
|
||||
clf2 = linear_model.Lasso(
|
||||
fit_intercept=False, alpha=alpha, tol=1e-8, positive=True
|
||||
).fit(X, y)
|
||||
err = linalg.norm(clf1.coef_ - clf2.coef_)
|
||||
assert err < 1e-3
|
||||
|
||||
# normalized data
|
||||
X = diabetes.data - diabetes.data.sum(axis=0)
|
||||
X /= np.linalg.norm(X, axis=0)
|
||||
alphas, _, lasso_path = linear_model.lars_path(X, y, method="lasso", positive=True)
|
||||
lasso_cd = linear_model.Lasso(fit_intercept=False, tol=1e-8, positive=True)
|
||||
for c, a in zip(lasso_path.T[:-1], alphas[:-1]): # don't include alpha=0
|
||||
lasso_cd.alpha = a
|
||||
lasso_cd.fit(X, y)
|
||||
error = linalg.norm(c - lasso_cd.coef_)
|
||||
assert error < 0.01
|
||||
|
||||
|
||||
def test_lasso_lars_vs_R_implementation():
|
||||
# Test that sklearn LassoLars implementation agrees with the LassoLars
|
||||
# implementation available in R (lars library) when fit_intercept=False.
|
||||
|
||||
# Let's generate the data used in the bug report 7778
|
||||
y = np.array([-6.45006793, -3.51251449, -8.52445396, 6.12277822, -19.42109366])
|
||||
x = np.array(
|
||||
[
|
||||
[0.47299829, 0, 0, 0, 0],
|
||||
[0.08239882, 0.85784863, 0, 0, 0],
|
||||
[0.30114139, -0.07501577, 0.80895216, 0, 0],
|
||||
[-0.01460346, -0.1015233, 0.0407278, 0.80338378, 0],
|
||||
[-0.69363927, 0.06754067, 0.18064514, -0.0803561, 0.40427291],
|
||||
]
|
||||
)
|
||||
|
||||
X = x.T
|
||||
|
||||
# The R result was obtained using the following code:
|
||||
#
|
||||
# library(lars)
|
||||
# model_lasso_lars = lars(X, t(y), type="lasso", intercept=FALSE,
|
||||
# trace=TRUE, normalize=FALSE)
|
||||
# r = t(model_lasso_lars$beta)
|
||||
#
|
||||
|
||||
r = np.array(
|
||||
[
|
||||
[
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
-79.810362809499026,
|
||||
-83.528788732782829,
|
||||
-83.777653739190711,
|
||||
-83.784156932888934,
|
||||
-84.033390591756657,
|
||||
],
|
||||
[0, 0, 0, 0, -0.476624256777266, 0, 0, 0, 0, 0.025219751009936],
|
||||
[
|
||||
0,
|
||||
-3.577397088285891,
|
||||
-4.702795355871871,
|
||||
-7.016748621359461,
|
||||
-7.614898471899412,
|
||||
-0.336938391359179,
|
||||
0,
|
||||
0,
|
||||
0.001213370600853,
|
||||
0.048162321585148,
|
||||
],
|
||||
[
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
2.231558436628169,
|
||||
2.723267514525966,
|
||||
2.811549786389614,
|
||||
2.813766976061531,
|
||||
2.817462468949557,
|
||||
2.817368178703816,
|
||||
2.816221090636795,
|
||||
],
|
||||
[
|
||||
0,
|
||||
0,
|
||||
-1.218422599914637,
|
||||
-3.457726183014808,
|
||||
-4.021304522060710,
|
||||
-45.827461592423745,
|
||||
-47.776608869312305,
|
||||
-47.911561610746404,
|
||||
-47.914845922736234,
|
||||
-48.039562334265717,
|
||||
],
|
||||
]
|
||||
)
|
||||
|
||||
model_lasso_lars = linear_model.LassoLars(alpha=0, fit_intercept=False)
|
||||
model_lasso_lars.fit(X, y)
|
||||
skl_betas = model_lasso_lars.coef_path_
|
||||
|
||||
assert_array_almost_equal(r, skl_betas, decimal=12)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("copy_X", [True, False])
|
||||
def test_lasso_lars_copyX_behaviour(copy_X):
|
||||
"""
|
||||
Test that user input regarding copy_X is not being overridden (it was until
|
||||
at least version 0.21)
|
||||
|
||||
"""
|
||||
lasso_lars = LassoLarsIC(copy_X=copy_X, precompute=False)
|
||||
rng = np.random.RandomState(0)
|
||||
X = rng.normal(0, 1, (100, 5))
|
||||
X_copy = X.copy()
|
||||
y = X[:, 2]
|
||||
lasso_lars.fit(X, y)
|
||||
assert copy_X == np.array_equal(X, X_copy)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("copy_X", [True, False])
|
||||
def test_lasso_lars_fit_copyX_behaviour(copy_X):
|
||||
"""
|
||||
Test that user input to .fit for copy_X overrides default __init__ value
|
||||
|
||||
"""
|
||||
lasso_lars = LassoLarsIC(precompute=False)
|
||||
rng = np.random.RandomState(0)
|
||||
X = rng.normal(0, 1, (100, 5))
|
||||
X_copy = X.copy()
|
||||
y = X[:, 2]
|
||||
lasso_lars.fit(X, y, copy_X=copy_X)
|
||||
assert copy_X == np.array_equal(X, X_copy)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("est", (LassoLars(alpha=1e-3), Lars()))
|
||||
def test_lars_with_jitter(est):
|
||||
est = clone(est) # Avoid side effects from previous tests.
|
||||
# Test that a small amount of jitter helps stability,
|
||||
# using example provided in issue #2746
|
||||
|
||||
X = np.array([[0.0, 0.0, 0.0, -1.0, 0.0], [0.0, -1.0, 0.0, 0.0, 0.0]])
|
||||
y = [-2.5, -2.5]
|
||||
expected_coef = [0, 2.5, 0, 2.5, 0]
|
||||
|
||||
# set to fit_intercept to False since target is constant and we want check
|
||||
# the value of coef. coef would be all zeros otherwise.
|
||||
est.set_params(fit_intercept=False)
|
||||
est_jitter = clone(est).set_params(jitter=10e-8, random_state=0)
|
||||
|
||||
est.fit(X, y)
|
||||
est_jitter.fit(X, y)
|
||||
|
||||
assert np.mean((est.coef_ - est_jitter.coef_) ** 2) > 0.1
|
||||
np.testing.assert_allclose(est_jitter.coef_, expected_coef, rtol=1e-3)
|
||||
|
||||
|
||||
def test_X_none_gram_not_none():
|
||||
with pytest.raises(ValueError, match="X cannot be None if Gram is not None"):
|
||||
lars_path(X=None, y=np.array([1]), Gram=True)
|
||||
|
||||
|
||||
def test_copy_X_with_auto_gram():
|
||||
# Non-regression test for #17789, `copy_X=True` and Gram='auto' does not
|
||||
# overwrite X
|
||||
rng = np.random.RandomState(42)
|
||||
X = rng.rand(6, 6)
|
||||
y = rng.rand(6)
|
||||
|
||||
X_before = X.copy()
|
||||
linear_model.lars_path(X, y, Gram="auto", copy_X=True, method="lasso")
|
||||
# X did not change
|
||||
assert_allclose(X, X_before)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"LARS, has_coef_path, args",
|
||||
(
|
||||
(Lars, True, {}),
|
||||
(LassoLars, True, {}),
|
||||
(LassoLarsIC, False, {}),
|
||||
(LarsCV, True, {}),
|
||||
# max_iter=5 is for avoiding ConvergenceWarning
|
||||
(LassoLarsCV, True, {"max_iter": 5}),
|
||||
),
|
||||
)
|
||||
@pytest.mark.parametrize("dtype", (np.float32, np.float64))
|
||||
def test_lars_dtype_match(LARS, has_coef_path, args, dtype):
|
||||
# The test ensures that the fit method preserves input dtype
|
||||
rng = np.random.RandomState(0)
|
||||
X = rng.rand(20, 6).astype(dtype)
|
||||
y = rng.rand(20).astype(dtype)
|
||||
|
||||
model = LARS(**args)
|
||||
model.fit(X, y)
|
||||
assert model.coef_.dtype == dtype
|
||||
if has_coef_path:
|
||||
assert model.coef_path_.dtype == dtype
|
||||
assert model.intercept_.dtype == dtype
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"LARS, has_coef_path, args",
|
||||
(
|
||||
(Lars, True, {}),
|
||||
(LassoLars, True, {}),
|
||||
(LassoLarsIC, False, {}),
|
||||
(LarsCV, True, {}),
|
||||
# max_iter=5 is for avoiding ConvergenceWarning
|
||||
(LassoLarsCV, True, {"max_iter": 5}),
|
||||
),
|
||||
)
|
||||
def test_lars_numeric_consistency(LARS, has_coef_path, args):
|
||||
# The test ensures numerical consistency between trained coefficients
|
||||
# of float32 and float64.
|
||||
rtol = 1e-5
|
||||
atol = 1e-5
|
||||
|
||||
rng = np.random.RandomState(0)
|
||||
X_64 = rng.rand(10, 6)
|
||||
y_64 = rng.rand(10)
|
||||
|
||||
model_64 = LARS(**args).fit(X_64, y_64)
|
||||
model_32 = LARS(**args).fit(X_64.astype(np.float32), y_64.astype(np.float32))
|
||||
|
||||
assert_allclose(model_64.coef_, model_32.coef_, rtol=rtol, atol=atol)
|
||||
if has_coef_path:
|
||||
assert_allclose(model_64.coef_path_, model_32.coef_path_, rtol=rtol, atol=atol)
|
||||
assert_allclose(model_64.intercept_, model_32.intercept_, rtol=rtol, atol=atol)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("criterion", ["aic", "bic"])
|
||||
def test_lassolarsic_alpha_selection(criterion):
|
||||
"""Check that we properly compute the AIC and BIC score.
|
||||
|
||||
In this test, we reproduce the example of the Fig. 2 of Zou et al.
|
||||
(reference [1] in LassoLarsIC) In this example, only 7 features should be
|
||||
selected.
|
||||
"""
|
||||
model = make_pipeline(StandardScaler(), LassoLarsIC(criterion=criterion))
|
||||
model.fit(X, y)
|
||||
|
||||
best_alpha_selected = np.argmin(model[-1].criterion_)
|
||||
assert best_alpha_selected == 7
|
||||
|
||||
|
||||
@pytest.mark.parametrize("fit_intercept", [True, False])
|
||||
def test_lassolarsic_noise_variance(fit_intercept):
|
||||
"""Check the behaviour when `n_samples` < `n_features` and that one needs
|
||||
to provide the noise variance."""
|
||||
rng = np.random.RandomState(0)
|
||||
X, y = datasets.make_regression(
|
||||
n_samples=10, n_features=11 - fit_intercept, random_state=rng
|
||||
)
|
||||
|
||||
model = make_pipeline(StandardScaler(), LassoLarsIC(fit_intercept=fit_intercept))
|
||||
|
||||
err_msg = (
|
||||
"You are using LassoLarsIC in the case where the number of samples is smaller"
|
||||
" than the number of features"
|
||||
)
|
||||
with pytest.raises(ValueError, match=err_msg):
|
||||
model.fit(X, y)
|
||||
|
||||
model.set_params(lassolarsic__noise_variance=1.0)
|
||||
model.fit(X, y).predict(X)
|
||||
@@ -0,0 +1,510 @@
|
||||
"""
|
||||
Tests for LinearModelLoss
|
||||
|
||||
Note that correctness of losses (which compose LinearModelLoss) is already well
|
||||
covered in the _loss module.
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
from numpy.testing import assert_allclose
|
||||
from scipy import linalg, optimize
|
||||
|
||||
from sklearn._loss.loss import (
|
||||
HalfBinomialLoss,
|
||||
HalfMultinomialLoss,
|
||||
HalfPoissonLoss,
|
||||
)
|
||||
from sklearn.datasets import make_low_rank_matrix
|
||||
from sklearn.linear_model._linear_loss import LinearModelLoss
|
||||
from sklearn.utils.extmath import squared_norm
|
||||
from sklearn.utils.fixes import CSR_CONTAINERS
|
||||
|
||||
# We do not need to test all losses, just what LinearModelLoss does on top of the
|
||||
# base losses.
|
||||
LOSSES = [HalfBinomialLoss, HalfMultinomialLoss, HalfPoissonLoss]
|
||||
|
||||
|
||||
def random_X_y_coef(
|
||||
linear_model_loss, n_samples, n_features, coef_bound=(-2, 2), seed=42
|
||||
):
|
||||
"""Random generate y, X and coef in valid range."""
|
||||
rng = np.random.RandomState(seed)
|
||||
n_dof = n_features + linear_model_loss.fit_intercept
|
||||
X = make_low_rank_matrix(
|
||||
n_samples=n_samples,
|
||||
n_features=n_features,
|
||||
random_state=rng,
|
||||
)
|
||||
coef = linear_model_loss.init_zero_coef(X)
|
||||
|
||||
if linear_model_loss.base_loss.is_multiclass:
|
||||
n_classes = linear_model_loss.base_loss.n_classes
|
||||
coef.flat[:] = rng.uniform(
|
||||
low=coef_bound[0],
|
||||
high=coef_bound[1],
|
||||
size=n_classes * n_dof,
|
||||
)
|
||||
if linear_model_loss.fit_intercept:
|
||||
raw_prediction = X @ coef[:, :-1].T + coef[:, -1]
|
||||
else:
|
||||
raw_prediction = X @ coef.T
|
||||
proba = linear_model_loss.base_loss.link.inverse(raw_prediction)
|
||||
|
||||
# y = rng.choice(np.arange(n_classes), p=proba) does not work.
|
||||
# See https://stackoverflow.com/a/34190035/16761084
|
||||
def choice_vectorized(items, p):
|
||||
s = p.cumsum(axis=1)
|
||||
r = rng.rand(p.shape[0])[:, None]
|
||||
k = (s < r).sum(axis=1)
|
||||
return items[k]
|
||||
|
||||
y = choice_vectorized(np.arange(n_classes), p=proba).astype(np.float64)
|
||||
else:
|
||||
coef.flat[:] = rng.uniform(
|
||||
low=coef_bound[0],
|
||||
high=coef_bound[1],
|
||||
size=n_dof,
|
||||
)
|
||||
if linear_model_loss.fit_intercept:
|
||||
raw_prediction = X @ coef[:-1] + coef[-1]
|
||||
else:
|
||||
raw_prediction = X @ coef
|
||||
y = linear_model_loss.base_loss.link.inverse(
|
||||
raw_prediction + rng.uniform(low=-1, high=1, size=n_samples)
|
||||
)
|
||||
|
||||
return X, y, coef
|
||||
|
||||
|
||||
@pytest.mark.parametrize("base_loss", LOSSES)
|
||||
@pytest.mark.parametrize("fit_intercept", [False, True])
|
||||
@pytest.mark.parametrize("n_features", [0, 1, 10])
|
||||
@pytest.mark.parametrize("dtype", [None, np.float32, np.float64, np.int64])
|
||||
def test_init_zero_coef(
|
||||
base_loss, fit_intercept, n_features, dtype, global_random_seed
|
||||
):
|
||||
"""Test that init_zero_coef initializes coef correctly."""
|
||||
loss = LinearModelLoss(base_loss=base_loss(), fit_intercept=fit_intercept)
|
||||
rng = np.random.RandomState(global_random_seed)
|
||||
X = rng.normal(size=(5, n_features))
|
||||
coef = loss.init_zero_coef(X, dtype=dtype)
|
||||
if loss.base_loss.is_multiclass:
|
||||
n_classes = loss.base_loss.n_classes
|
||||
assert coef.shape == (n_classes, n_features + fit_intercept)
|
||||
assert coef.flags["F_CONTIGUOUS"]
|
||||
else:
|
||||
assert coef.shape == (n_features + fit_intercept,)
|
||||
|
||||
if dtype is None:
|
||||
assert coef.dtype == X.dtype
|
||||
else:
|
||||
assert coef.dtype == dtype
|
||||
|
||||
assert np.count_nonzero(coef) == 0
|
||||
|
||||
|
||||
@pytest.mark.parametrize("base_loss", LOSSES)
|
||||
@pytest.mark.parametrize("fit_intercept", [False, True])
|
||||
@pytest.mark.parametrize("sample_weight", [None, "range"])
|
||||
@pytest.mark.parametrize("l2_reg_strength", [0, 1])
|
||||
@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
|
||||
def test_loss_grad_hess_are_the_same(
|
||||
base_loss,
|
||||
fit_intercept,
|
||||
sample_weight,
|
||||
l2_reg_strength,
|
||||
csr_container,
|
||||
global_random_seed,
|
||||
):
|
||||
"""Test that loss and gradient are the same across different functions."""
|
||||
loss = LinearModelLoss(base_loss=base_loss(), fit_intercept=fit_intercept)
|
||||
X, y, coef = random_X_y_coef(
|
||||
linear_model_loss=loss, n_samples=10, n_features=5, seed=global_random_seed
|
||||
)
|
||||
X_old, y_old, coef_old = X.copy(), y.copy(), coef.copy()
|
||||
|
||||
if sample_weight == "range":
|
||||
sample_weight = np.linspace(1, y.shape[0], num=y.shape[0])
|
||||
|
||||
l1 = loss.loss(
|
||||
coef, X, y, sample_weight=sample_weight, l2_reg_strength=l2_reg_strength
|
||||
)
|
||||
g1 = loss.gradient(
|
||||
coef, X, y, sample_weight=sample_weight, l2_reg_strength=l2_reg_strength
|
||||
)
|
||||
l2, g2 = loss.loss_gradient(
|
||||
coef, X, y, sample_weight=sample_weight, l2_reg_strength=l2_reg_strength
|
||||
)
|
||||
g3, h3 = loss.gradient_hessian_product(
|
||||
coef, X, y, sample_weight=sample_weight, l2_reg_strength=l2_reg_strength
|
||||
)
|
||||
g4, h4, _ = loss.gradient_hessian(
|
||||
coef, X, y, sample_weight=sample_weight, l2_reg_strength=l2_reg_strength
|
||||
)
|
||||
assert_allclose(l1, l2)
|
||||
assert_allclose(g1, g2)
|
||||
assert_allclose(g1, g3)
|
||||
assert_allclose(g1, g4)
|
||||
# The ravelling only takes effect for multiclass.
|
||||
assert_allclose(h4 @ g4.ravel(order="F"), h3(g3).ravel(order="F"))
|
||||
# Test that gradient_out and hessian_out are considered properly.
|
||||
g_out = np.empty_like(coef)
|
||||
h_out = np.empty_like(coef, shape=(coef.size, coef.size))
|
||||
g5, h5, _ = loss.gradient_hessian(
|
||||
coef,
|
||||
X,
|
||||
y,
|
||||
sample_weight=sample_weight,
|
||||
l2_reg_strength=l2_reg_strength,
|
||||
gradient_out=g_out,
|
||||
hessian_out=h_out,
|
||||
)
|
||||
assert np.shares_memory(g5, g_out)
|
||||
assert np.shares_memory(h5, h_out)
|
||||
assert_allclose(g5, g_out)
|
||||
assert_allclose(h5, h_out)
|
||||
assert_allclose(g1, g5)
|
||||
assert_allclose(h5, h4)
|
||||
|
||||
# same for sparse X
|
||||
Xs = csr_container(X)
|
||||
l1_sp = loss.loss(
|
||||
coef, Xs, y, sample_weight=sample_weight, l2_reg_strength=l2_reg_strength
|
||||
)
|
||||
g1_sp = loss.gradient(
|
||||
coef, Xs, y, sample_weight=sample_weight, l2_reg_strength=l2_reg_strength
|
||||
)
|
||||
l2_sp, g2_sp = loss.loss_gradient(
|
||||
coef, Xs, y, sample_weight=sample_weight, l2_reg_strength=l2_reg_strength
|
||||
)
|
||||
g3_sp, h3_sp = loss.gradient_hessian_product(
|
||||
coef, Xs, y, sample_weight=sample_weight, l2_reg_strength=l2_reg_strength
|
||||
)
|
||||
g4_sp, h4_sp, _ = loss.gradient_hessian(
|
||||
coef, Xs, y, sample_weight=sample_weight, l2_reg_strength=l2_reg_strength
|
||||
)
|
||||
assert_allclose(l1, l1_sp)
|
||||
assert_allclose(l1, l2_sp)
|
||||
assert_allclose(g1, g1_sp)
|
||||
assert_allclose(g1, g2_sp)
|
||||
assert_allclose(g1, g3_sp)
|
||||
assert_allclose(h3(g1), h3_sp(g1_sp))
|
||||
assert_allclose(g1, g4_sp)
|
||||
assert_allclose(h4, h4_sp)
|
||||
|
||||
# X, y and coef should not have changed
|
||||
assert_allclose(X, X_old)
|
||||
assert_allclose(Xs.toarray(), X_old)
|
||||
assert_allclose(y, y_old)
|
||||
assert_allclose(coef, coef_old)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("base_loss", LOSSES)
|
||||
@pytest.mark.parametrize("sample_weight", [None, "range"])
|
||||
@pytest.mark.parametrize("l2_reg_strength", [0, 1])
|
||||
@pytest.mark.parametrize("X_container", CSR_CONTAINERS + [None])
|
||||
def test_loss_gradients_hessp_intercept(
|
||||
base_loss, sample_weight, l2_reg_strength, X_container, global_random_seed
|
||||
):
|
||||
"""Test that loss and gradient handle intercept correctly."""
|
||||
loss = LinearModelLoss(base_loss=base_loss(), fit_intercept=False)
|
||||
loss_inter = LinearModelLoss(base_loss=base_loss(), fit_intercept=True)
|
||||
n_samples, n_features = 10, 5
|
||||
X, y, coef = random_X_y_coef(
|
||||
linear_model_loss=loss,
|
||||
n_samples=n_samples,
|
||||
n_features=n_features,
|
||||
seed=global_random_seed,
|
||||
)
|
||||
|
||||
X[:, -1] = 1 # make last column of 1 to mimic intercept term
|
||||
X_inter = X[
|
||||
:, :-1
|
||||
] # exclude intercept column as it is added automatically by loss_inter
|
||||
|
||||
if X_container is not None:
|
||||
X = X_container(X)
|
||||
|
||||
if sample_weight == "range":
|
||||
sample_weight = np.linspace(1, y.shape[0], num=y.shape[0])
|
||||
|
||||
l, g = loss.loss_gradient(
|
||||
coef, X, y, sample_weight=sample_weight, l2_reg_strength=l2_reg_strength
|
||||
)
|
||||
_, hessp = loss.gradient_hessian_product(
|
||||
coef, X, y, sample_weight=sample_weight, l2_reg_strength=l2_reg_strength
|
||||
)
|
||||
l_inter, g_inter = loss_inter.loss_gradient(
|
||||
coef, X_inter, y, sample_weight=sample_weight, l2_reg_strength=l2_reg_strength
|
||||
)
|
||||
_, hessp_inter = loss_inter.gradient_hessian_product(
|
||||
coef, X_inter, y, sample_weight=sample_weight, l2_reg_strength=l2_reg_strength
|
||||
)
|
||||
|
||||
# Note, that intercept gets no L2 penalty.
|
||||
assert l == pytest.approx(
|
||||
l_inter + 0.5 * l2_reg_strength * squared_norm(coef.T[-1])
|
||||
)
|
||||
|
||||
g_inter_corrected = g_inter
|
||||
g_inter_corrected.T[-1] += l2_reg_strength * coef.T[-1]
|
||||
assert_allclose(g, g_inter_corrected)
|
||||
|
||||
s = np.random.RandomState(global_random_seed).randn(*coef.shape)
|
||||
h = hessp(s)
|
||||
h_inter = hessp_inter(s)
|
||||
h_inter_corrected = h_inter
|
||||
h_inter_corrected.T[-1] += l2_reg_strength * s.T[-1]
|
||||
assert_allclose(h, h_inter_corrected)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("base_loss", LOSSES)
|
||||
@pytest.mark.parametrize("fit_intercept", [False, True])
|
||||
@pytest.mark.parametrize("sample_weight", [None, "range"])
|
||||
@pytest.mark.parametrize("l2_reg_strength", [0, 1])
|
||||
def test_gradients_hessians_numerically(
|
||||
base_loss, fit_intercept, sample_weight, l2_reg_strength, global_random_seed
|
||||
):
|
||||
"""Test gradients and hessians with numerical derivatives.
|
||||
|
||||
Gradient should equal the numerical derivatives of the loss function.
|
||||
Hessians should equal the numerical derivatives of gradients.
|
||||
"""
|
||||
loss = LinearModelLoss(base_loss=base_loss(), fit_intercept=fit_intercept)
|
||||
n_samples, n_features = 10, 5
|
||||
X, y, coef = random_X_y_coef(
|
||||
linear_model_loss=loss,
|
||||
n_samples=n_samples,
|
||||
n_features=n_features,
|
||||
seed=global_random_seed,
|
||||
)
|
||||
coef = coef.ravel(order="F") # this is important only for multinomial loss
|
||||
|
||||
if sample_weight == "range":
|
||||
sample_weight = np.linspace(1, y.shape[0], num=y.shape[0])
|
||||
|
||||
# 1. Check gradients numerically
|
||||
eps = 1e-6
|
||||
g, hessp = loss.gradient_hessian_product(
|
||||
coef, X, y, sample_weight=sample_weight, l2_reg_strength=l2_reg_strength
|
||||
)
|
||||
# Use a trick to get central finite difference of accuracy 4 (five-point stencil)
|
||||
# https://en.wikipedia.org/wiki/Numerical_differentiation
|
||||
# https://en.wikipedia.org/wiki/Finite_difference_coefficient
|
||||
# approx_g1 = (f(x + eps) - f(x - eps)) / (2*eps)
|
||||
approx_g1 = optimize.approx_fprime(
|
||||
coef,
|
||||
lambda coef: loss.loss(
|
||||
coef - eps,
|
||||
X,
|
||||
y,
|
||||
sample_weight=sample_weight,
|
||||
l2_reg_strength=l2_reg_strength,
|
||||
),
|
||||
2 * eps,
|
||||
)
|
||||
# approx_g2 = (f(x + 2*eps) - f(x - 2*eps)) / (4*eps)
|
||||
approx_g2 = optimize.approx_fprime(
|
||||
coef,
|
||||
lambda coef: loss.loss(
|
||||
coef - 2 * eps,
|
||||
X,
|
||||
y,
|
||||
sample_weight=sample_weight,
|
||||
l2_reg_strength=l2_reg_strength,
|
||||
),
|
||||
4 * eps,
|
||||
)
|
||||
# Five-point stencil approximation
|
||||
# See: https://en.wikipedia.org/wiki/Five-point_stencil#1D_first_derivative
|
||||
approx_g = (4 * approx_g1 - approx_g2) / 3
|
||||
assert_allclose(g, approx_g, rtol=1e-2, atol=1e-8)
|
||||
|
||||
# 2. Check hessp numerically along the second direction of the gradient
|
||||
vector = np.zeros_like(g)
|
||||
vector[1] = 1
|
||||
hess_col = hessp(vector)
|
||||
# Computation of the Hessian is particularly fragile to numerical errors when doing
|
||||
# simple finite differences. Here we compute the grad along a path in the direction
|
||||
# of the vector and then use a least-square regression to estimate the slope
|
||||
eps = 1e-3
|
||||
d_x = np.linspace(-eps, eps, 30)
|
||||
d_grad = np.array(
|
||||
[
|
||||
loss.gradient(
|
||||
coef + t * vector,
|
||||
X,
|
||||
y,
|
||||
sample_weight=sample_weight,
|
||||
l2_reg_strength=l2_reg_strength,
|
||||
)
|
||||
for t in d_x
|
||||
]
|
||||
)
|
||||
d_grad -= d_grad.mean(axis=0)
|
||||
approx_hess_col = linalg.lstsq(d_x[:, np.newaxis], d_grad)[0].ravel()
|
||||
assert_allclose(approx_hess_col, hess_col, rtol=1e-3)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("fit_intercept", [False, True])
|
||||
def test_multinomial_coef_shape(fit_intercept, global_random_seed):
|
||||
"""Test that multinomial LinearModelLoss respects shape of coef."""
|
||||
loss = LinearModelLoss(base_loss=HalfMultinomialLoss(), fit_intercept=fit_intercept)
|
||||
n_samples, n_features = 10, 5
|
||||
X, y, coef = random_X_y_coef(
|
||||
linear_model_loss=loss,
|
||||
n_samples=n_samples,
|
||||
n_features=n_features,
|
||||
seed=global_random_seed,
|
||||
)
|
||||
s = np.random.RandomState(global_random_seed).randn(*coef.shape)
|
||||
|
||||
l, g = loss.loss_gradient(coef, X, y)
|
||||
g1 = loss.gradient(coef, X, y)
|
||||
g2, hessp = loss.gradient_hessian_product(coef, X, y)
|
||||
h = hessp(s)
|
||||
assert g.shape == coef.shape
|
||||
assert h.shape == coef.shape
|
||||
assert_allclose(g, g1)
|
||||
assert_allclose(g, g2)
|
||||
g3, hess, _ = loss.gradient_hessian(coef, X, y)
|
||||
assert g3.shape == coef.shape
|
||||
# But full hessian is always 2d.
|
||||
assert hess.shape == (coef.size, coef.size)
|
||||
|
||||
coef_r = coef.ravel(order="F")
|
||||
s_r = s.ravel(order="F")
|
||||
l_r, g_r = loss.loss_gradient(coef_r, X, y)
|
||||
g1_r = loss.gradient(coef_r, X, y)
|
||||
g2_r, hessp_r = loss.gradient_hessian_product(coef_r, X, y)
|
||||
h_r = hessp_r(s_r)
|
||||
assert g_r.shape == coef_r.shape
|
||||
assert h_r.shape == coef_r.shape
|
||||
assert_allclose(g_r, g1_r)
|
||||
assert_allclose(g_r, g2_r)
|
||||
|
||||
assert_allclose(g, g_r.reshape(loss.base_loss.n_classes, -1, order="F"))
|
||||
assert_allclose(h, h_r.reshape(loss.base_loss.n_classes, -1, order="F"))
|
||||
|
||||
|
||||
@pytest.mark.parametrize("sample_weight", [None, "range"])
|
||||
def test_multinomial_hessian_3_classes(sample_weight, global_random_seed):
|
||||
"""Test multinomial hessian for 3 classes and 2 points.
|
||||
|
||||
For n_classes = 3 and n_samples = 2, we have
|
||||
p0 = [p0_0, p0_1]
|
||||
p1 = [p1_0, p1_1]
|
||||
p2 = [p2_0, p2_1]
|
||||
and with 2 x 2 diagonal subblocks
|
||||
H = [p0 * (1-p0), -p0 * p1, -p0 * p2]
|
||||
[ -p0 * p1, p1 * (1-p1), -p1 * p2]
|
||||
[ -p0 * p2, -p1 * p2, p2 * (1-p2)]
|
||||
hess = X' H X
|
||||
"""
|
||||
n_samples, n_features, n_classes = 2, 5, 3
|
||||
loss = LinearModelLoss(
|
||||
base_loss=HalfMultinomialLoss(n_classes=n_classes), fit_intercept=False
|
||||
)
|
||||
X, y, coef = random_X_y_coef(
|
||||
linear_model_loss=loss,
|
||||
n_samples=n_samples,
|
||||
n_features=n_features,
|
||||
seed=global_random_seed,
|
||||
)
|
||||
coef = coef.ravel(order="F") # this is important only for multinomial loss
|
||||
|
||||
if sample_weight == "range":
|
||||
sample_weight = np.linspace(1, y.shape[0], num=y.shape[0])
|
||||
|
||||
grad, hess, _ = loss.gradient_hessian(
|
||||
coef,
|
||||
X,
|
||||
y,
|
||||
sample_weight=sample_weight,
|
||||
l2_reg_strength=0,
|
||||
)
|
||||
# Hessian must be a symmetrix matrix.
|
||||
assert_allclose(hess, hess.T)
|
||||
|
||||
weights, intercept, raw_prediction = loss.weight_intercept_raw(coef, X)
|
||||
grad_pointwise, proba = loss.base_loss.gradient_proba(
|
||||
y_true=y,
|
||||
raw_prediction=raw_prediction,
|
||||
sample_weight=sample_weight,
|
||||
)
|
||||
p0d, p1d, p2d, oned = (
|
||||
np.diag(proba[:, 0]),
|
||||
np.diag(proba[:, 1]),
|
||||
np.diag(proba[:, 2]),
|
||||
np.diag(np.ones(2)),
|
||||
)
|
||||
h = np.block(
|
||||
[
|
||||
[p0d * (oned - p0d), -p0d * p1d, -p0d * p2d],
|
||||
[-p0d * p1d, p1d * (oned - p1d), -p1d * p2d],
|
||||
[-p0d * p2d, -p1d * p2d, p2d * (oned - p2d)],
|
||||
]
|
||||
)
|
||||
h = h.reshape((n_classes, n_samples, n_classes, n_samples))
|
||||
if sample_weight is None:
|
||||
h /= n_samples
|
||||
else:
|
||||
h *= sample_weight / np.sum(sample_weight)
|
||||
# hess_expected.shape = (n_features, n_classes, n_classes, n_features)
|
||||
hess_expected = np.einsum("ij, mini, ik->jmnk", X, h, X)
|
||||
hess_expected = np.moveaxis(hess_expected, 2, 3)
|
||||
hess_expected = hess_expected.reshape(
|
||||
n_classes * n_features, n_classes * n_features, order="C"
|
||||
)
|
||||
assert_allclose(hess_expected, hess_expected.T)
|
||||
assert_allclose(hess, hess_expected)
|
||||
|
||||
|
||||
def test_linear_loss_gradient_hessian_raises_wrong_out_parameters():
|
||||
"""Test that wrong gradient_out and hessian_out raises errors."""
|
||||
n_samples, n_features, n_classes = 5, 2, 3
|
||||
loss = LinearModelLoss(base_loss=HalfBinomialLoss(), fit_intercept=False)
|
||||
X = np.ones((n_samples, n_features))
|
||||
y = np.ones(n_samples)
|
||||
coef = loss.init_zero_coef(X)
|
||||
gradient_out = np.zeros(1)
|
||||
with pytest.raises(
|
||||
ValueError, match="gradient_out is required to have shape coef.shape"
|
||||
):
|
||||
loss.gradient_hessian(
|
||||
coef=coef,
|
||||
X=X,
|
||||
y=y,
|
||||
gradient_out=gradient_out,
|
||||
hessian_out=None,
|
||||
)
|
||||
hessian_out = np.zeros(1)
|
||||
with pytest.raises(ValueError, match="hessian_out is required to have shape"):
|
||||
loss.gradient_hessian(
|
||||
coef=coef,
|
||||
X=X,
|
||||
y=y,
|
||||
gradient_out=None,
|
||||
hessian_out=hessian_out,
|
||||
)
|
||||
|
||||
loss = LinearModelLoss(base_loss=HalfMultinomialLoss(), fit_intercept=False)
|
||||
coef = loss.init_zero_coef(X)
|
||||
gradient_out = np.zeros((2 * n_classes, n_features))[::2]
|
||||
with pytest.raises(ValueError, match="gradient_out must be F-contiguous"):
|
||||
loss.gradient_hessian(
|
||||
coef=coef,
|
||||
X=X,
|
||||
y=y,
|
||||
gradient_out=gradient_out,
|
||||
)
|
||||
hessian_out = np.zeros((2 * n_classes * n_features, n_classes * n_features))[::2]
|
||||
with pytest.raises(ValueError, match="hessian_out must be contiguous"):
|
||||
loss.gradient_hessian(
|
||||
coef=coef,
|
||||
X=X,
|
||||
y=y,
|
||||
gradient_out=None,
|
||||
hessian_out=hessian_out,
|
||||
)
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,273 @@
|
||||
# Authors: The scikit-learn developers
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from sklearn.datasets import make_sparse_coded_signal
|
||||
from sklearn.linear_model import (
|
||||
LinearRegression,
|
||||
OrthogonalMatchingPursuit,
|
||||
OrthogonalMatchingPursuitCV,
|
||||
orthogonal_mp,
|
||||
orthogonal_mp_gram,
|
||||
)
|
||||
from sklearn.utils import check_random_state
|
||||
from sklearn.utils._testing import (
|
||||
assert_allclose,
|
||||
assert_array_almost_equal,
|
||||
assert_array_equal,
|
||||
ignore_warnings,
|
||||
)
|
||||
|
||||
n_samples, n_features, n_nonzero_coefs, n_targets = 25, 35, 5, 3
|
||||
y, X, gamma = make_sparse_coded_signal(
|
||||
n_samples=n_targets,
|
||||
n_components=n_features,
|
||||
n_features=n_samples,
|
||||
n_nonzero_coefs=n_nonzero_coefs,
|
||||
random_state=0,
|
||||
)
|
||||
y, X, gamma = y.T, X.T, gamma.T
|
||||
# Make X not of norm 1 for testing
|
||||
X *= 10
|
||||
y *= 10
|
||||
G, Xy = np.dot(X.T, X), np.dot(X.T, y)
|
||||
# this makes X (n_samples, n_features)
|
||||
# and y (n_samples, 3)
|
||||
|
||||
|
||||
def test_correct_shapes():
|
||||
assert orthogonal_mp(X, y[:, 0], n_nonzero_coefs=5).shape == (n_features,)
|
||||
assert orthogonal_mp(X, y, n_nonzero_coefs=5).shape == (n_features, 3)
|
||||
|
||||
|
||||
def test_correct_shapes_gram():
|
||||
assert orthogonal_mp_gram(G, Xy[:, 0], n_nonzero_coefs=5).shape == (n_features,)
|
||||
assert orthogonal_mp_gram(G, Xy, n_nonzero_coefs=5).shape == (n_features, 3)
|
||||
|
||||
|
||||
def test_n_nonzero_coefs():
|
||||
assert np.count_nonzero(orthogonal_mp(X, y[:, 0], n_nonzero_coefs=5)) <= 5
|
||||
assert (
|
||||
np.count_nonzero(orthogonal_mp(X, y[:, 0], n_nonzero_coefs=5, precompute=True))
|
||||
<= 5
|
||||
)
|
||||
|
||||
|
||||
def test_tol():
|
||||
tol = 0.5
|
||||
gamma = orthogonal_mp(X, y[:, 0], tol=tol)
|
||||
gamma_gram = orthogonal_mp(X, y[:, 0], tol=tol, precompute=True)
|
||||
assert np.sum((y[:, 0] - np.dot(X, gamma)) ** 2) <= tol
|
||||
assert np.sum((y[:, 0] - np.dot(X, gamma_gram)) ** 2) <= tol
|
||||
|
||||
|
||||
def test_with_without_gram():
|
||||
assert_array_almost_equal(
|
||||
orthogonal_mp(X, y, n_nonzero_coefs=5),
|
||||
orthogonal_mp(X, y, n_nonzero_coefs=5, precompute=True),
|
||||
)
|
||||
|
||||
|
||||
def test_with_without_gram_tol():
|
||||
assert_array_almost_equal(
|
||||
orthogonal_mp(X, y, tol=1.0), orthogonal_mp(X, y, tol=1.0, precompute=True)
|
||||
)
|
||||
|
||||
|
||||
def test_unreachable_accuracy():
|
||||
assert_array_almost_equal(
|
||||
orthogonal_mp(X, y, tol=0), orthogonal_mp(X, y, n_nonzero_coefs=n_features)
|
||||
)
|
||||
warning_message = (
|
||||
"Orthogonal matching pursuit ended prematurely "
|
||||
"due to linear dependence in the dictionary. "
|
||||
"The requested precision might not have been met."
|
||||
)
|
||||
with pytest.warns(RuntimeWarning, match=warning_message):
|
||||
assert_array_almost_equal(
|
||||
orthogonal_mp(X, y, tol=0, precompute=True),
|
||||
orthogonal_mp(X, y, precompute=True, n_nonzero_coefs=n_features),
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("positional_params", [(X, y), (G, Xy)])
|
||||
@pytest.mark.parametrize(
|
||||
"keyword_params",
|
||||
[{"n_nonzero_coefs": n_features + 1}],
|
||||
)
|
||||
def test_bad_input(positional_params, keyword_params):
|
||||
with pytest.raises(ValueError):
|
||||
orthogonal_mp(*positional_params, **keyword_params)
|
||||
|
||||
|
||||
def test_perfect_signal_recovery():
|
||||
(idx,) = gamma[:, 0].nonzero()
|
||||
gamma_rec = orthogonal_mp(X, y[:, 0], n_nonzero_coefs=5)
|
||||
gamma_gram = orthogonal_mp_gram(G, Xy[:, 0], n_nonzero_coefs=5)
|
||||
assert_array_equal(idx, np.flatnonzero(gamma_rec))
|
||||
assert_array_equal(idx, np.flatnonzero(gamma_gram))
|
||||
assert_array_almost_equal(gamma[:, 0], gamma_rec, decimal=2)
|
||||
assert_array_almost_equal(gamma[:, 0], gamma_gram, decimal=2)
|
||||
|
||||
|
||||
def test_orthogonal_mp_gram_readonly():
|
||||
# Non-regression test for:
|
||||
# https://github.com/scikit-learn/scikit-learn/issues/5956
|
||||
(idx,) = gamma[:, 0].nonzero()
|
||||
G_readonly = G.copy()
|
||||
G_readonly.setflags(write=False)
|
||||
Xy_readonly = Xy.copy()
|
||||
Xy_readonly.setflags(write=False)
|
||||
gamma_gram = orthogonal_mp_gram(
|
||||
G_readonly, Xy_readonly[:, 0], n_nonzero_coefs=5, copy_Gram=False, copy_Xy=False
|
||||
)
|
||||
assert_array_equal(idx, np.flatnonzero(gamma_gram))
|
||||
assert_array_almost_equal(gamma[:, 0], gamma_gram, decimal=2)
|
||||
|
||||
|
||||
def test_estimator():
|
||||
omp = OrthogonalMatchingPursuit(n_nonzero_coefs=n_nonzero_coefs)
|
||||
omp.fit(X, y[:, 0])
|
||||
assert omp.coef_.shape == (n_features,)
|
||||
assert omp.intercept_.shape == ()
|
||||
assert np.count_nonzero(omp.coef_) <= n_nonzero_coefs
|
||||
|
||||
omp.fit(X, y)
|
||||
assert omp.coef_.shape == (n_targets, n_features)
|
||||
assert omp.intercept_.shape == (n_targets,)
|
||||
assert np.count_nonzero(omp.coef_) <= n_targets * n_nonzero_coefs
|
||||
|
||||
coef_normalized = omp.coef_[0].copy()
|
||||
omp.set_params(fit_intercept=True)
|
||||
omp.fit(X, y[:, 0])
|
||||
assert_array_almost_equal(coef_normalized, omp.coef_)
|
||||
|
||||
omp.set_params(fit_intercept=False)
|
||||
omp.fit(X, y[:, 0])
|
||||
assert np.count_nonzero(omp.coef_) <= n_nonzero_coefs
|
||||
assert omp.coef_.shape == (n_features,)
|
||||
assert omp.intercept_ == 0
|
||||
|
||||
omp.fit(X, y)
|
||||
assert omp.coef_.shape == (n_targets, n_features)
|
||||
assert omp.intercept_ == 0
|
||||
assert np.count_nonzero(omp.coef_) <= n_targets * n_nonzero_coefs
|
||||
|
||||
|
||||
def test_estimator_n_nonzero_coefs():
|
||||
"""Check `n_nonzero_coefs_` correct when `tol` is and isn't set."""
|
||||
omp = OrthogonalMatchingPursuit(n_nonzero_coefs=n_nonzero_coefs)
|
||||
omp.fit(X, y[:, 0])
|
||||
assert omp.n_nonzero_coefs_ == n_nonzero_coefs
|
||||
|
||||
omp = OrthogonalMatchingPursuit(n_nonzero_coefs=n_nonzero_coefs, tol=0.5)
|
||||
omp.fit(X, y[:, 0])
|
||||
assert omp.n_nonzero_coefs_ is None
|
||||
|
||||
|
||||
def test_identical_regressors():
|
||||
newX = X.copy()
|
||||
newX[:, 1] = newX[:, 0]
|
||||
gamma = np.zeros(n_features)
|
||||
gamma[0] = gamma[1] = 1.0
|
||||
newy = np.dot(newX, gamma)
|
||||
warning_message = (
|
||||
"Orthogonal matching pursuit ended prematurely "
|
||||
"due to linear dependence in the dictionary. "
|
||||
"The requested precision might not have been met."
|
||||
)
|
||||
with pytest.warns(RuntimeWarning, match=warning_message):
|
||||
orthogonal_mp(newX, newy, n_nonzero_coefs=2)
|
||||
|
||||
|
||||
def test_swapped_regressors():
|
||||
gamma = np.zeros(n_features)
|
||||
# X[:, 21] should be selected first, then X[:, 0] selected second,
|
||||
# which will take X[:, 21]'s place in case the algorithm does
|
||||
# column swapping for optimization (which is the case at the moment)
|
||||
gamma[21] = 1.0
|
||||
gamma[0] = 0.5
|
||||
new_y = np.dot(X, gamma)
|
||||
new_Xy = np.dot(X.T, new_y)
|
||||
gamma_hat = orthogonal_mp(X, new_y, n_nonzero_coefs=2)
|
||||
gamma_hat_gram = orthogonal_mp_gram(G, new_Xy, n_nonzero_coefs=2)
|
||||
assert_array_equal(np.flatnonzero(gamma_hat), [0, 21])
|
||||
assert_array_equal(np.flatnonzero(gamma_hat_gram), [0, 21])
|
||||
|
||||
|
||||
def test_no_atoms():
|
||||
y_empty = np.zeros_like(y)
|
||||
Xy_empty = np.dot(X.T, y_empty)
|
||||
gamma_empty = ignore_warnings(orthogonal_mp)(X, y_empty, n_nonzero_coefs=1)
|
||||
gamma_empty_gram = ignore_warnings(orthogonal_mp)(G, Xy_empty, n_nonzero_coefs=1)
|
||||
assert np.all(gamma_empty == 0)
|
||||
assert np.all(gamma_empty_gram == 0)
|
||||
|
||||
|
||||
def test_omp_path():
|
||||
path = orthogonal_mp(X, y, n_nonzero_coefs=5, return_path=True)
|
||||
last = orthogonal_mp(X, y, n_nonzero_coefs=5, return_path=False)
|
||||
assert path.shape == (n_features, n_targets, 5)
|
||||
assert_array_almost_equal(path[:, :, -1], last)
|
||||
path = orthogonal_mp_gram(G, Xy, n_nonzero_coefs=5, return_path=True)
|
||||
last = orthogonal_mp_gram(G, Xy, n_nonzero_coefs=5, return_path=False)
|
||||
assert path.shape == (n_features, n_targets, 5)
|
||||
assert_array_almost_equal(path[:, :, -1], last)
|
||||
|
||||
|
||||
def test_omp_return_path_prop_with_gram():
|
||||
path = orthogonal_mp(X, y, n_nonzero_coefs=5, return_path=True, precompute=True)
|
||||
last = orthogonal_mp(X, y, n_nonzero_coefs=5, return_path=False, precompute=True)
|
||||
assert path.shape == (n_features, n_targets, 5)
|
||||
assert_array_almost_equal(path[:, :, -1], last)
|
||||
|
||||
|
||||
def test_omp_cv():
|
||||
y_ = y[:, 0]
|
||||
gamma_ = gamma[:, 0]
|
||||
ompcv = OrthogonalMatchingPursuitCV(fit_intercept=False, max_iter=10)
|
||||
ompcv.fit(X, y_)
|
||||
assert ompcv.n_nonzero_coefs_ == n_nonzero_coefs
|
||||
assert_array_almost_equal(ompcv.coef_, gamma_)
|
||||
omp = OrthogonalMatchingPursuit(
|
||||
fit_intercept=False, n_nonzero_coefs=ompcv.n_nonzero_coefs_
|
||||
)
|
||||
omp.fit(X, y_)
|
||||
assert_array_almost_equal(ompcv.coef_, omp.coef_)
|
||||
|
||||
|
||||
def test_omp_reaches_least_squares():
|
||||
# Use small simple data; it's a sanity check but OMP can stop early
|
||||
rng = check_random_state(0)
|
||||
n_samples, n_features = (10, 8)
|
||||
n_targets = 3
|
||||
X = rng.randn(n_samples, n_features)
|
||||
Y = rng.randn(n_samples, n_targets)
|
||||
omp = OrthogonalMatchingPursuit(n_nonzero_coefs=n_features)
|
||||
lstsq = LinearRegression()
|
||||
omp.fit(X, Y)
|
||||
lstsq.fit(X, Y)
|
||||
assert_array_almost_equal(omp.coef_, lstsq.coef_)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("data_type", (np.float32, np.float64))
|
||||
def test_omp_gram_dtype_match(data_type):
|
||||
# verify matching input data type and output data type
|
||||
coef = orthogonal_mp_gram(
|
||||
G.astype(data_type), Xy.astype(data_type), n_nonzero_coefs=5
|
||||
)
|
||||
assert coef.dtype == data_type
|
||||
|
||||
|
||||
def test_omp_gram_numerical_consistency():
|
||||
# verify numericaly consistency among np.float32 and np.float64
|
||||
coef_32 = orthogonal_mp_gram(
|
||||
G.astype(np.float32), Xy.astype(np.float32), n_nonzero_coefs=5
|
||||
)
|
||||
coef_64 = orthogonal_mp_gram(
|
||||
G.astype(np.float32), Xy.astype(np.float64), n_nonzero_coefs=5
|
||||
)
|
||||
assert_allclose(coef_32, coef_64)
|
||||
@@ -0,0 +1,345 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
from numpy.testing import assert_allclose
|
||||
from scipy.sparse import issparse
|
||||
|
||||
from sklearn.base import ClassifierMixin
|
||||
from sklearn.datasets import load_iris, make_classification, make_regression
|
||||
from sklearn.linear_model import (
|
||||
PassiveAggressiveClassifier,
|
||||
PassiveAggressiveRegressor,
|
||||
SGDClassifier,
|
||||
SGDRegressor,
|
||||
)
|
||||
from sklearn.linear_model._base import SPARSE_INTERCEPT_DECAY
|
||||
from sklearn.linear_model._stochastic_gradient import DEFAULT_EPSILON
|
||||
from sklearn.utils import check_random_state
|
||||
from sklearn.utils._testing import (
|
||||
assert_almost_equal,
|
||||
assert_array_equal,
|
||||
)
|
||||
from sklearn.utils.fixes import CSR_CONTAINERS
|
||||
|
||||
iris = load_iris()
|
||||
random_state = check_random_state(12)
|
||||
indices = np.arange(iris.data.shape[0])
|
||||
random_state.shuffle(indices)
|
||||
X = iris.data[indices]
|
||||
y = iris.target[indices]
|
||||
|
||||
|
||||
# TODO(1.10): Move to test_sgd.py
|
||||
class MyPassiveAggressive(ClassifierMixin):
|
||||
def __init__(
|
||||
self,
|
||||
C=1.0,
|
||||
epsilon=DEFAULT_EPSILON,
|
||||
loss="hinge",
|
||||
fit_intercept=True,
|
||||
n_iter=1,
|
||||
random_state=None,
|
||||
):
|
||||
self.C = C
|
||||
self.epsilon = epsilon
|
||||
self.loss = loss
|
||||
self.fit_intercept = fit_intercept
|
||||
self.n_iter = n_iter
|
||||
|
||||
def fit(self, X, y):
|
||||
n_samples, n_features = X.shape
|
||||
self.w = np.zeros(n_features, dtype=np.float64)
|
||||
self.b = 0.0
|
||||
|
||||
# Mimic SGD's behavior for intercept
|
||||
intercept_decay = 1.0
|
||||
if issparse(X):
|
||||
intercept_decay = SPARSE_INTERCEPT_DECAY
|
||||
X = X.toarray()
|
||||
|
||||
for t in range(self.n_iter):
|
||||
for i in range(n_samples):
|
||||
p = self.project(X[i])
|
||||
if self.loss in ("hinge", "squared_hinge"):
|
||||
loss = max(1 - y[i] * p, 0)
|
||||
else:
|
||||
loss = max(np.abs(p - y[i]) - self.epsilon, 0)
|
||||
|
||||
sqnorm = np.dot(X[i], X[i])
|
||||
|
||||
if self.loss in ("hinge", "epsilon_insensitive"):
|
||||
step = min(self.C, loss / sqnorm)
|
||||
elif self.loss in ("squared_hinge", "squared_epsilon_insensitive"):
|
||||
step = loss / (sqnorm + 1.0 / (2 * self.C))
|
||||
|
||||
if self.loss in ("hinge", "squared_hinge"):
|
||||
step *= y[i]
|
||||
else:
|
||||
step *= np.sign(y[i] - p)
|
||||
|
||||
self.w += step * X[i]
|
||||
if self.fit_intercept:
|
||||
self.b += intercept_decay * step
|
||||
|
||||
def project(self, X):
|
||||
return np.dot(X, self.w) + self.b
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore::FutureWarning")
|
||||
@pytest.mark.parametrize("average", [False, True])
|
||||
@pytest.mark.parametrize("fit_intercept", [True, False])
|
||||
@pytest.mark.parametrize("csr_container", [None, *CSR_CONTAINERS])
|
||||
def test_classifier_accuracy(csr_container, fit_intercept, average):
|
||||
data = csr_container(X) if csr_container is not None else X
|
||||
clf = PassiveAggressiveClassifier(
|
||||
C=1.0,
|
||||
max_iter=30,
|
||||
fit_intercept=fit_intercept,
|
||||
random_state=1,
|
||||
average=average,
|
||||
tol=None,
|
||||
)
|
||||
clf.fit(data, y)
|
||||
score = clf.score(data, y)
|
||||
assert score > 0.79
|
||||
if average:
|
||||
assert hasattr(clf, "_average_coef")
|
||||
assert hasattr(clf, "_average_intercept")
|
||||
assert hasattr(clf, "_standard_intercept")
|
||||
assert hasattr(clf, "_standard_coef")
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore::FutureWarning")
|
||||
@pytest.mark.parametrize("average", [False, True])
|
||||
@pytest.mark.parametrize("csr_container", [None, *CSR_CONTAINERS])
|
||||
def test_classifier_partial_fit(csr_container, average):
|
||||
classes = np.unique(y)
|
||||
data = csr_container(X) if csr_container is not None else X
|
||||
clf = PassiveAggressiveClassifier(random_state=0, average=average, max_iter=5)
|
||||
for t in range(30):
|
||||
clf.partial_fit(data, y, classes)
|
||||
score = clf.score(data, y)
|
||||
assert score > 0.79
|
||||
if average:
|
||||
assert hasattr(clf, "_average_coef")
|
||||
assert hasattr(clf, "_average_intercept")
|
||||
assert hasattr(clf, "_standard_intercept")
|
||||
assert hasattr(clf, "_standard_coef")
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore::FutureWarning")
|
||||
def test_classifier_refit():
|
||||
# Classifier can be retrained on different labels and features.
|
||||
clf = PassiveAggressiveClassifier(max_iter=5).fit(X, y)
|
||||
assert_array_equal(clf.classes_, np.unique(y))
|
||||
|
||||
clf.fit(X[:, :-1], iris.target_names[y])
|
||||
assert_array_equal(clf.classes_, iris.target_names)
|
||||
|
||||
|
||||
# TODO(1.10): Move to test_sgd.py
|
||||
@pytest.mark.filterwarnings("ignore::FutureWarning")
|
||||
@pytest.mark.parametrize("csr_container", [None, *CSR_CONTAINERS])
|
||||
@pytest.mark.parametrize("loss", ("hinge", "squared_hinge"))
|
||||
def test_classifier_correctness(loss, csr_container):
|
||||
y_bin = y.copy()
|
||||
y_bin[y != 1] = -1
|
||||
data = csr_container(X) if csr_container is not None else X
|
||||
|
||||
clf1 = MyPassiveAggressive(loss=loss, n_iter=4)
|
||||
clf1.fit(data, y_bin)
|
||||
|
||||
clf2 = PassiveAggressiveClassifier(loss=loss, max_iter=4, shuffle=False, tol=None)
|
||||
clf2.fit(data, y_bin)
|
||||
|
||||
assert_allclose(clf1.w, clf2.coef_.ravel())
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore::FutureWarning")
|
||||
@pytest.mark.parametrize(
|
||||
"response_method", ["predict_proba", "predict_log_proba", "transform"]
|
||||
)
|
||||
def test_classifier_undefined_methods(response_method):
|
||||
clf = PassiveAggressiveClassifier(max_iter=100)
|
||||
with pytest.raises(AttributeError):
|
||||
getattr(clf, response_method)
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore::FutureWarning")
|
||||
def test_class_weights():
|
||||
# Test class weights.
|
||||
X2 = np.array([[-1.0, -1.0], [-1.0, 0], [-0.8, -1.0], [1.0, 1.0], [1.0, 0.0]])
|
||||
y2 = [1, 1, 1, -1, -1]
|
||||
|
||||
clf = PassiveAggressiveClassifier(
|
||||
C=0.1, max_iter=100, class_weight=None, random_state=100
|
||||
)
|
||||
clf.fit(X2, y2)
|
||||
assert_array_equal(clf.predict([[0.2, -1.0]]), np.array([1]))
|
||||
|
||||
# we give a small weights to class 1
|
||||
clf = PassiveAggressiveClassifier(
|
||||
C=0.1, max_iter=100, class_weight={1: 0.001}, random_state=100
|
||||
)
|
||||
clf.fit(X2, y2)
|
||||
|
||||
# now the hyperplane should rotate clock-wise and
|
||||
# the prediction on this point should shift
|
||||
assert_array_equal(clf.predict([[0.2, -1.0]]), np.array([-1]))
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore::FutureWarning")
|
||||
def test_partial_fit_weight_class_balanced():
|
||||
# partial_fit with class_weight='balanced' not supported
|
||||
clf = PassiveAggressiveClassifier(class_weight="balanced", max_iter=100)
|
||||
with pytest.raises(ValueError):
|
||||
clf.partial_fit(X, y, classes=np.unique(y))
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore::FutureWarning")
|
||||
def test_equal_class_weight():
|
||||
X2 = [[1, 0], [1, 0], [0, 1], [0, 1]]
|
||||
y2 = [0, 0, 1, 1]
|
||||
clf = PassiveAggressiveClassifier(C=0.1, tol=None, class_weight=None)
|
||||
clf.fit(X2, y2)
|
||||
|
||||
# Already balanced, so "balanced" weights should have no effect
|
||||
clf_balanced = PassiveAggressiveClassifier(C=0.1, tol=None, class_weight="balanced")
|
||||
clf_balanced.fit(X2, y2)
|
||||
|
||||
clf_weighted = PassiveAggressiveClassifier(
|
||||
C=0.1, tol=None, class_weight={0: 0.5, 1: 0.5}
|
||||
)
|
||||
clf_weighted.fit(X2, y2)
|
||||
|
||||
# should be similar up to some epsilon due to learning rate schedule
|
||||
assert_almost_equal(clf.coef_, clf_weighted.coef_, decimal=2)
|
||||
assert_almost_equal(clf.coef_, clf_balanced.coef_, decimal=2)
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore::FutureWarning")
|
||||
def test_wrong_class_weight_label():
|
||||
# ValueError due to wrong class_weight label.
|
||||
X2 = np.array([[-1.0, -1.0], [-1.0, 0], [-0.8, -1.0], [1.0, 1.0], [1.0, 0.0]])
|
||||
y2 = [1, 1, 1, -1, -1]
|
||||
|
||||
clf = PassiveAggressiveClassifier(class_weight={0: 0.5}, max_iter=100)
|
||||
with pytest.raises(ValueError):
|
||||
clf.fit(X2, y2)
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore::FutureWarning")
|
||||
@pytest.mark.parametrize("average", [False, True])
|
||||
@pytest.mark.parametrize("fit_intercept", [True, False])
|
||||
@pytest.mark.parametrize("csr_container", [None, *CSR_CONTAINERS])
|
||||
def test_regressor_mse(csr_container, fit_intercept, average):
|
||||
y_bin = y.copy()
|
||||
y_bin[y != 1] = -1
|
||||
|
||||
data = csr_container(X) if csr_container is not None else X
|
||||
reg = PassiveAggressiveRegressor(
|
||||
C=1.0,
|
||||
fit_intercept=fit_intercept,
|
||||
random_state=0,
|
||||
average=average,
|
||||
max_iter=5,
|
||||
)
|
||||
reg.fit(data, y_bin)
|
||||
pred = reg.predict(data)
|
||||
assert np.mean((pred - y_bin) ** 2) < 1.7
|
||||
if average:
|
||||
assert hasattr(reg, "_average_coef")
|
||||
assert hasattr(reg, "_average_intercept")
|
||||
assert hasattr(reg, "_standard_intercept")
|
||||
assert hasattr(reg, "_standard_coef")
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore::FutureWarning")
|
||||
@pytest.mark.parametrize("average", [False, True])
|
||||
@pytest.mark.parametrize("csr_container", [None, *CSR_CONTAINERS])
|
||||
def test_regressor_partial_fit(csr_container, average):
|
||||
y_bin = y.copy()
|
||||
y_bin[y != 1] = -1
|
||||
|
||||
data = csr_container(X) if csr_container is not None else X
|
||||
reg = PassiveAggressiveRegressor(random_state=0, average=average, max_iter=100)
|
||||
for t in range(50):
|
||||
reg.partial_fit(data, y_bin)
|
||||
pred = reg.predict(data)
|
||||
assert np.mean((pred - y_bin) ** 2) < 1.7
|
||||
if average:
|
||||
assert hasattr(reg, "_average_coef")
|
||||
assert hasattr(reg, "_average_intercept")
|
||||
assert hasattr(reg, "_standard_intercept")
|
||||
assert hasattr(reg, "_standard_coef")
|
||||
|
||||
|
||||
# TODO(1.10): Move to test_sgd.py
|
||||
@pytest.mark.filterwarnings("ignore::FutureWarning")
|
||||
@pytest.mark.parametrize("csr_container", [None, *CSR_CONTAINERS])
|
||||
@pytest.mark.parametrize("loss", ("epsilon_insensitive", "squared_epsilon_insensitive"))
|
||||
def test_regressor_correctness(loss, csr_container):
|
||||
y_bin = y.copy()
|
||||
y_bin[y != 1] = -1
|
||||
data = csr_container(X) if csr_container is not None else X
|
||||
|
||||
reg1 = MyPassiveAggressive(loss=loss, n_iter=4)
|
||||
reg1.fit(data, y_bin)
|
||||
|
||||
reg2 = PassiveAggressiveRegressor(loss=loss, max_iter=4, shuffle=False, tol=None)
|
||||
reg2.fit(data, y_bin)
|
||||
|
||||
assert_allclose(reg1.w, reg2.coef_.ravel())
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore::FutureWarning")
|
||||
def test_regressor_undefined_methods():
|
||||
reg = PassiveAggressiveRegressor(max_iter=100)
|
||||
with pytest.raises(AttributeError):
|
||||
reg.transform(X)
|
||||
|
||||
|
||||
# TODO(1.10): remove
|
||||
@pytest.mark.parametrize(
|
||||
"Estimator", [PassiveAggressiveClassifier, PassiveAggressiveRegressor]
|
||||
)
|
||||
def test_class_deprecation(Estimator):
|
||||
# Check that we raise the proper deprecation warning.
|
||||
|
||||
with pytest.warns(FutureWarning, match="Class PassiveAggressive.+is deprecated"):
|
||||
Estimator()
|
||||
|
||||
|
||||
@pytest.mark.parametrize(["loss", "lr"], [("hinge", "pa1"), ("squared_hinge", "pa2")])
|
||||
def test_passive_aggressive_classifier_vs_sgd(loss, lr):
|
||||
"""Test that both are equivalent."""
|
||||
X, y = make_classification(
|
||||
n_samples=100, n_features=10, n_informative=5, random_state=1234
|
||||
)
|
||||
pa = PassiveAggressiveClassifier(loss=loss, C=0.987, random_state=42).fit(X, y)
|
||||
sgd = SGDClassifier(
|
||||
loss="hinge", penalty=None, learning_rate=lr, eta0=0.987, random_state=42
|
||||
).fit(X, y)
|
||||
assert_allclose(pa.decision_function(X), sgd.decision_function(X))
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
["loss", "lr"],
|
||||
[("epsilon_insensitive", "pa1"), ("squared_epsilon_insensitive", "pa2")],
|
||||
)
|
||||
def test_passive_aggressive_regressor_vs_sgd(loss, lr):
|
||||
"""Test that both are equivalent."""
|
||||
X, y = make_regression(
|
||||
n_samples=100, n_features=10, n_informative=5, random_state=1234
|
||||
)
|
||||
pa = PassiveAggressiveRegressor(
|
||||
loss=loss, epsilon=0.123, C=0.987, random_state=42
|
||||
).fit(X, y)
|
||||
sgd = SGDRegressor(
|
||||
loss="epsilon_insensitive",
|
||||
epsilon=0.123,
|
||||
penalty=None,
|
||||
learning_rate=lr,
|
||||
eta0=0.987,
|
||||
random_state=42,
|
||||
).fit(X, y)
|
||||
assert_allclose(pa.predict(X), sgd.predict(X))
|
||||
@@ -0,0 +1,88 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from sklearn.datasets import load_iris
|
||||
from sklearn.linear_model import Perceptron
|
||||
from sklearn.utils import check_random_state
|
||||
from sklearn.utils._testing import assert_allclose, assert_array_almost_equal
|
||||
from sklearn.utils.fixes import CSR_CONTAINERS
|
||||
|
||||
iris = load_iris()
|
||||
random_state = check_random_state(12)
|
||||
indices = np.arange(iris.data.shape[0])
|
||||
random_state.shuffle(indices)
|
||||
X = iris.data[indices]
|
||||
y = iris.target[indices]
|
||||
|
||||
|
||||
class MyPerceptron:
|
||||
def __init__(self, n_iter=1):
|
||||
self.n_iter = n_iter
|
||||
|
||||
def fit(self, X, y):
|
||||
n_samples, n_features = X.shape
|
||||
self.w = np.zeros(n_features, dtype=np.float64)
|
||||
self.b = 0.0
|
||||
|
||||
for t in range(self.n_iter):
|
||||
for i in range(n_samples):
|
||||
if self.predict(X[i])[0] != y[i]:
|
||||
self.w += y[i] * X[i]
|
||||
self.b += y[i]
|
||||
|
||||
def project(self, X):
|
||||
return np.dot(X, self.w) + self.b
|
||||
|
||||
def predict(self, X):
|
||||
X = np.atleast_2d(X)
|
||||
return np.sign(self.project(X))
|
||||
|
||||
|
||||
@pytest.mark.parametrize("container", CSR_CONTAINERS + [np.array])
|
||||
def test_perceptron_accuracy(container):
|
||||
data = container(X)
|
||||
clf = Perceptron(max_iter=100, tol=None, shuffle=False)
|
||||
clf.fit(data, y)
|
||||
score = clf.score(data, y)
|
||||
assert score > 0.7
|
||||
|
||||
|
||||
def test_perceptron_correctness():
|
||||
y_bin = y.copy()
|
||||
y_bin[y != 1] = -1
|
||||
|
||||
clf1 = MyPerceptron(n_iter=2)
|
||||
clf1.fit(X, y_bin)
|
||||
|
||||
clf2 = Perceptron(max_iter=2, shuffle=False, tol=None)
|
||||
clf2.fit(X, y_bin)
|
||||
|
||||
assert_array_almost_equal(clf1.w, clf2.coef_.ravel())
|
||||
|
||||
|
||||
def test_undefined_methods():
|
||||
clf = Perceptron(max_iter=100)
|
||||
for meth in ("predict_proba", "predict_log_proba"):
|
||||
with pytest.raises(AttributeError):
|
||||
getattr(clf, meth)
|
||||
|
||||
|
||||
def test_perceptron_l1_ratio():
|
||||
"""Check that `l1_ratio` has an impact when `penalty='elasticnet'`"""
|
||||
clf1 = Perceptron(l1_ratio=0, penalty="elasticnet")
|
||||
clf1.fit(X, y)
|
||||
|
||||
clf2 = Perceptron(l1_ratio=0.15, penalty="elasticnet")
|
||||
clf2.fit(X, y)
|
||||
|
||||
assert clf1.score(X, y) != clf2.score(X, y)
|
||||
|
||||
# check that the bounds of elastic net which should correspond to an l1 or
|
||||
# l2 penalty depending of `l1_ratio` value.
|
||||
clf_l1 = Perceptron(penalty="l1").fit(X, y)
|
||||
clf_elasticnet = Perceptron(l1_ratio=1, penalty="elasticnet").fit(X, y)
|
||||
assert_allclose(clf_l1.coef_, clf_elasticnet.coef_)
|
||||
|
||||
clf_l2 = Perceptron(penalty="l2").fit(X, y)
|
||||
clf_elasticnet = Perceptron(l1_ratio=0, penalty="elasticnet").fit(X, y)
|
||||
assert_allclose(clf_l2.coef_, clf_elasticnet.coef_)
|
||||
@@ -0,0 +1,283 @@
|
||||
# Authors: The scikit-learn developers
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
from pytest import approx
|
||||
from scipy.optimize import minimize
|
||||
|
||||
from sklearn.datasets import make_regression
|
||||
from sklearn.exceptions import ConvergenceWarning
|
||||
from sklearn.linear_model import HuberRegressor, QuantileRegressor
|
||||
from sklearn.metrics import mean_pinball_loss
|
||||
from sklearn.utils._testing import assert_allclose
|
||||
from sklearn.utils.fixes import (
|
||||
COO_CONTAINERS,
|
||||
CSC_CONTAINERS,
|
||||
CSR_CONTAINERS,
|
||||
parse_version,
|
||||
sp_version,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def X_y_data():
|
||||
X, y = make_regression(n_samples=10, n_features=1, random_state=0, noise=1)
|
||||
return X, y
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
parse_version(sp_version.base_version) >= parse_version("1.11"),
|
||||
reason="interior-point solver is not available in SciPy 1.11",
|
||||
)
|
||||
@pytest.mark.parametrize("solver", ["interior-point", "revised simplex"])
|
||||
@pytest.mark.parametrize("csc_container", CSC_CONTAINERS)
|
||||
def test_incompatible_solver_for_sparse_input(X_y_data, solver, csc_container):
|
||||
X, y = X_y_data
|
||||
X_sparse = csc_container(X)
|
||||
err_msg = (
|
||||
f"Solver {solver} does not support sparse X. Use solver 'highs' for example."
|
||||
)
|
||||
with pytest.raises(ValueError, match=err_msg):
|
||||
QuantileRegressor(solver=solver).fit(X_sparse, y)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"quantile, alpha, intercept, coef",
|
||||
[
|
||||
# for 50% quantile w/o regularization, any slope in [1, 10] is okay
|
||||
[0.5, 0, 1, None],
|
||||
# if positive error costs more, the slope is maximal
|
||||
[0.51, 0, 1, 10],
|
||||
# if negative error costs more, the slope is minimal
|
||||
[0.49, 0, 1, 1],
|
||||
# for a small lasso penalty, the slope is also minimal
|
||||
[0.5, 0.01, 1, 1],
|
||||
# for a large lasso penalty, the model predicts the constant median
|
||||
[0.5, 100, 2, 0],
|
||||
],
|
||||
)
|
||||
def test_quantile_toy_example(quantile, alpha, intercept, coef):
|
||||
# test how different parameters affect a small intuitive example
|
||||
X = [[0], [1], [1]]
|
||||
y = [1, 2, 11]
|
||||
model = QuantileRegressor(quantile=quantile, alpha=alpha).fit(X, y)
|
||||
assert_allclose(model.intercept_, intercept, atol=1e-2)
|
||||
if coef is not None:
|
||||
assert_allclose(model.coef_[0], coef, atol=1e-2)
|
||||
if alpha < 100:
|
||||
assert model.coef_[0] >= 1
|
||||
assert model.coef_[0] <= 10
|
||||
|
||||
|
||||
@pytest.mark.parametrize("fit_intercept", [True, False])
|
||||
def test_quantile_equals_huber_for_low_epsilon(fit_intercept):
|
||||
X, y = make_regression(n_samples=100, n_features=20, random_state=0, noise=1.0)
|
||||
alpha = 1e-4
|
||||
huber = HuberRegressor(
|
||||
epsilon=1 + 1e-4, alpha=alpha, fit_intercept=fit_intercept
|
||||
).fit(X, y)
|
||||
quant = QuantileRegressor(alpha=alpha, fit_intercept=fit_intercept).fit(X, y)
|
||||
assert_allclose(huber.coef_, quant.coef_, atol=1e-1)
|
||||
if fit_intercept:
|
||||
assert huber.intercept_ == approx(quant.intercept_, abs=1e-1)
|
||||
# check that we still predict fraction
|
||||
assert np.mean(y < quant.predict(X)) == approx(0.5, abs=1e-1)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("q", [0.5, 0.9, 0.05])
|
||||
def test_quantile_estimates_calibration(q):
|
||||
# Test that model estimates percentage of points below the prediction
|
||||
X, y = make_regression(n_samples=1000, n_features=20, random_state=0, noise=1.0)
|
||||
quant = QuantileRegressor(quantile=q, alpha=0).fit(X, y)
|
||||
assert np.mean(y < quant.predict(X)) == approx(q, abs=1e-2)
|
||||
|
||||
|
||||
def test_quantile_sample_weight():
|
||||
# test that with unequal sample weights we still estimate weighted fraction
|
||||
n = 1000
|
||||
X, y = make_regression(n_samples=n, n_features=5, random_state=0, noise=10.0)
|
||||
weight = np.ones(n)
|
||||
# when we increase weight of upper observations,
|
||||
# estimate of quantile should go up
|
||||
weight[y > y.mean()] = 100
|
||||
quant = QuantileRegressor(quantile=0.5, alpha=1e-8)
|
||||
quant.fit(X, y, sample_weight=weight)
|
||||
fraction_below = np.mean(y < quant.predict(X))
|
||||
assert fraction_below > 0.5
|
||||
weighted_fraction_below = np.average(y < quant.predict(X), weights=weight)
|
||||
assert weighted_fraction_below == approx(0.5, abs=3e-2)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("quantile", [0.2, 0.5, 0.8])
|
||||
def test_asymmetric_error(quantile):
|
||||
"""Test quantile regression for asymmetric distributed targets."""
|
||||
n_samples = 1000
|
||||
rng = np.random.RandomState(42)
|
||||
X = np.concatenate(
|
||||
(
|
||||
np.abs(rng.randn(n_samples)[:, None]),
|
||||
-rng.randint(2, size=(n_samples, 1)),
|
||||
),
|
||||
axis=1,
|
||||
)
|
||||
intercept = 1.23
|
||||
coef = np.array([0.5, -2])
|
||||
# Take care that X @ coef + intercept > 0
|
||||
assert np.min(X @ coef + intercept) > 0
|
||||
# For an exponential distribution with rate lambda, e.g. exp(-lambda * x),
|
||||
# the quantile at level q is:
|
||||
# quantile(q) = - log(1 - q) / lambda
|
||||
# scale = 1/lambda = -quantile(q) / log(1 - q)
|
||||
y = rng.exponential(
|
||||
scale=-(X @ coef + intercept) / np.log(1 - quantile), size=n_samples
|
||||
)
|
||||
model = QuantileRegressor(
|
||||
quantile=quantile,
|
||||
alpha=0,
|
||||
).fit(X, y)
|
||||
# This test can be made to pass with any solver but in the interest
|
||||
# of sparing continuous integration resources, the test is performed
|
||||
# with the fastest solver only.
|
||||
|
||||
assert model.intercept_ == approx(intercept, rel=0.2)
|
||||
assert_allclose(model.coef_, coef, rtol=0.6)
|
||||
assert_allclose(np.mean(model.predict(X) > y), quantile, atol=1e-2)
|
||||
|
||||
# Now compare to Nelder-Mead optimization with L1 penalty
|
||||
alpha = 0.01
|
||||
model.set_params(alpha=alpha).fit(X, y)
|
||||
model_coef = np.r_[model.intercept_, model.coef_]
|
||||
|
||||
def func(coef):
|
||||
loss = mean_pinball_loss(y, X @ coef[1:] + coef[0], alpha=quantile)
|
||||
L1 = np.sum(np.abs(coef[1:]))
|
||||
return loss + alpha * L1
|
||||
|
||||
res = minimize(
|
||||
fun=func,
|
||||
x0=[1, 0, -1],
|
||||
method="Nelder-Mead",
|
||||
tol=1e-12,
|
||||
options={"maxiter": 2000},
|
||||
)
|
||||
|
||||
assert func(model_coef) == approx(func(res.x))
|
||||
assert_allclose(model.intercept_, res.x[0])
|
||||
assert_allclose(model.coef_, res.x[1:])
|
||||
assert_allclose(np.mean(model.predict(X) > y), quantile, atol=1e-2)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("quantile", [0.2, 0.5, 0.8])
|
||||
def test_equivariance(quantile):
|
||||
"""Test equivariace of quantile regression.
|
||||
|
||||
See Koenker (2005) Quantile Regression, Chapter 2.2.3.
|
||||
"""
|
||||
rng = np.random.RandomState(42)
|
||||
n_samples, n_features = 100, 5
|
||||
X, y = make_regression(
|
||||
n_samples=n_samples,
|
||||
n_features=n_features,
|
||||
n_informative=n_features,
|
||||
noise=0,
|
||||
random_state=rng,
|
||||
shuffle=False,
|
||||
)
|
||||
# make y asymmetric
|
||||
y += rng.exponential(scale=100, size=y.shape)
|
||||
params = dict(alpha=0)
|
||||
model1 = QuantileRegressor(quantile=quantile, **params).fit(X, y)
|
||||
|
||||
# coef(q; a*y, X) = a * coef(q; y, X)
|
||||
a = 2.5
|
||||
model2 = QuantileRegressor(quantile=quantile, **params).fit(X, a * y)
|
||||
assert model2.intercept_ == approx(a * model1.intercept_, rel=1e-5)
|
||||
assert_allclose(model2.coef_, a * model1.coef_, rtol=1e-5)
|
||||
|
||||
# coef(1-q; -a*y, X) = -a * coef(q; y, X)
|
||||
model2 = QuantileRegressor(quantile=1 - quantile, **params).fit(X, -a * y)
|
||||
assert model2.intercept_ == approx(-a * model1.intercept_, rel=1e-5)
|
||||
assert_allclose(model2.coef_, -a * model1.coef_, rtol=1e-5)
|
||||
|
||||
# coef(q; y + X @ g, X) = coef(q; y, X) + g
|
||||
g_intercept, g_coef = rng.randn(), rng.randn(n_features)
|
||||
model2 = QuantileRegressor(quantile=quantile, **params)
|
||||
model2.fit(X, y + X @ g_coef + g_intercept)
|
||||
assert model2.intercept_ == approx(model1.intercept_ + g_intercept)
|
||||
assert_allclose(model2.coef_, model1.coef_ + g_coef, rtol=1e-6)
|
||||
|
||||
# coef(q; y, X @ A) = A^-1 @ coef(q; y, X)
|
||||
A = rng.randn(n_features, n_features)
|
||||
model2 = QuantileRegressor(quantile=quantile, **params)
|
||||
model2.fit(X @ A, y)
|
||||
assert model2.intercept_ == approx(model1.intercept_, rel=1e-5)
|
||||
assert_allclose(model2.coef_, np.linalg.solve(A, model1.coef_), rtol=1e-5)
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
parse_version(sp_version.base_version) >= parse_version("1.11"),
|
||||
reason="interior-point solver is not available in SciPy 1.11",
|
||||
)
|
||||
@pytest.mark.filterwarnings("ignore:`method='interior-point'` is deprecated")
|
||||
def test_linprog_failure():
|
||||
"""Test that linprog fails."""
|
||||
X = np.linspace(0, 10, num=10).reshape(-1, 1)
|
||||
y = np.linspace(0, 10, num=10)
|
||||
reg = QuantileRegressor(
|
||||
alpha=0, solver="interior-point", solver_options={"maxiter": 1}
|
||||
)
|
||||
|
||||
msg = "Linear programming for QuantileRegressor did not succeed."
|
||||
with pytest.warns(ConvergenceWarning, match=msg):
|
||||
reg.fit(X, y)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"sparse_container", CSC_CONTAINERS + CSR_CONTAINERS + COO_CONTAINERS
|
||||
)
|
||||
@pytest.mark.parametrize("solver", ["highs", "highs-ds", "highs-ipm"])
|
||||
@pytest.mark.parametrize("fit_intercept", [True, False])
|
||||
def test_sparse_input(sparse_container, solver, fit_intercept, global_random_seed):
|
||||
"""Test that sparse and dense X give same results."""
|
||||
n_informative = 10
|
||||
quantile_level = 0.6
|
||||
X, y = make_regression(
|
||||
n_samples=300,
|
||||
n_features=20,
|
||||
n_informative=10,
|
||||
random_state=global_random_seed,
|
||||
noise=1.0,
|
||||
)
|
||||
X_sparse = sparse_container(X)
|
||||
alpha = 0.1
|
||||
quant_dense = QuantileRegressor(
|
||||
quantile=quantile_level, alpha=alpha, fit_intercept=fit_intercept
|
||||
).fit(X, y)
|
||||
quant_sparse = QuantileRegressor(
|
||||
quantile=quantile_level, alpha=alpha, fit_intercept=fit_intercept, solver=solver
|
||||
).fit(X_sparse, y)
|
||||
assert_allclose(quant_sparse.coef_, quant_dense.coef_, rtol=1e-2)
|
||||
sparse_support = quant_sparse.coef_ != 0
|
||||
dense_support = quant_dense.coef_ != 0
|
||||
assert dense_support.sum() == pytest.approx(n_informative, abs=1)
|
||||
assert sparse_support.sum() == pytest.approx(n_informative, abs=1)
|
||||
if fit_intercept:
|
||||
assert quant_sparse.intercept_ == approx(quant_dense.intercept_)
|
||||
# check that we still predict fraction
|
||||
empirical_coverage = np.mean(y < quant_sparse.predict(X_sparse))
|
||||
assert empirical_coverage == approx(quantile_level, abs=3e-2)
|
||||
|
||||
|
||||
def test_error_interior_point_future(X_y_data, monkeypatch):
|
||||
"""Check that we will raise a proper error when requesting
|
||||
`solver='interior-point'` in SciPy >= 1.11.
|
||||
"""
|
||||
X, y = X_y_data
|
||||
import sklearn.linear_model._quantile
|
||||
|
||||
with monkeypatch.context() as m:
|
||||
m.setattr(sklearn.linear_model._quantile, "sp_version", parse_version("1.11.0"))
|
||||
err_msg = "Solver interior-point is not anymore available in SciPy >= 1.11.0."
|
||||
with pytest.raises(ValueError, match=err_msg):
|
||||
QuantileRegressor(solver="interior-point").fit(X, y)
|
||||
@@ -0,0 +1,543 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
from numpy.testing import assert_array_almost_equal, assert_array_equal
|
||||
|
||||
from sklearn.datasets import make_regression
|
||||
from sklearn.exceptions import ConvergenceWarning
|
||||
from sklearn.linear_model import (
|
||||
LinearRegression,
|
||||
OrthogonalMatchingPursuit,
|
||||
RANSACRegressor,
|
||||
Ridge,
|
||||
)
|
||||
from sklearn.linear_model._ransac import _dynamic_max_trials
|
||||
from sklearn.utils import check_random_state
|
||||
from sklearn.utils._testing import assert_allclose
|
||||
from sklearn.utils.fixes import COO_CONTAINERS, CSC_CONTAINERS, CSR_CONTAINERS
|
||||
|
||||
# Generate coordinates of line
|
||||
X = np.arange(-200, 200)
|
||||
y = 0.2 * X + 20
|
||||
data = np.column_stack([X, y])
|
||||
|
||||
# Add some faulty data
|
||||
rng = np.random.RandomState(1000)
|
||||
outliers = np.unique(rng.randint(len(X), size=200))
|
||||
data[outliers, :] += 50 + rng.rand(len(outliers), 2) * 10
|
||||
|
||||
X = data[:, 0][:, np.newaxis]
|
||||
y = data[:, 1]
|
||||
|
||||
|
||||
def test_ransac_inliers_outliers():
|
||||
estimator = LinearRegression()
|
||||
ransac_estimator = RANSACRegressor(
|
||||
estimator, min_samples=2, residual_threshold=5, random_state=0
|
||||
)
|
||||
|
||||
# Estimate parameters of corrupted data
|
||||
ransac_estimator.fit(X, y)
|
||||
|
||||
# Ground truth / reference inlier mask
|
||||
ref_inlier_mask = np.ones_like(ransac_estimator.inlier_mask_).astype(np.bool_)
|
||||
ref_inlier_mask[outliers] = False
|
||||
|
||||
assert_array_equal(ransac_estimator.inlier_mask_, ref_inlier_mask)
|
||||
|
||||
|
||||
def test_ransac_is_data_valid():
|
||||
def is_data_valid(X, y):
|
||||
assert X.shape[0] == 2
|
||||
assert y.shape[0] == 2
|
||||
return False
|
||||
|
||||
rng = np.random.RandomState(0)
|
||||
X = rng.rand(10, 2)
|
||||
y = rng.rand(10, 1)
|
||||
|
||||
estimator = LinearRegression()
|
||||
ransac_estimator = RANSACRegressor(
|
||||
estimator,
|
||||
min_samples=2,
|
||||
residual_threshold=5,
|
||||
is_data_valid=is_data_valid,
|
||||
random_state=0,
|
||||
)
|
||||
with pytest.raises(ValueError):
|
||||
ransac_estimator.fit(X, y)
|
||||
|
||||
|
||||
def test_ransac_is_model_valid():
|
||||
def is_model_valid(estimator, X, y):
|
||||
assert X.shape[0] == 2
|
||||
assert y.shape[0] == 2
|
||||
return False
|
||||
|
||||
estimator = LinearRegression()
|
||||
ransac_estimator = RANSACRegressor(
|
||||
estimator,
|
||||
min_samples=2,
|
||||
residual_threshold=5,
|
||||
is_model_valid=is_model_valid,
|
||||
random_state=0,
|
||||
)
|
||||
with pytest.raises(ValueError):
|
||||
ransac_estimator.fit(X, y)
|
||||
|
||||
|
||||
def test_ransac_max_trials():
|
||||
estimator = LinearRegression()
|
||||
|
||||
ransac_estimator = RANSACRegressor(
|
||||
estimator,
|
||||
min_samples=2,
|
||||
residual_threshold=5,
|
||||
max_trials=0,
|
||||
random_state=0,
|
||||
)
|
||||
with pytest.raises(ValueError):
|
||||
ransac_estimator.fit(X, y)
|
||||
|
||||
# there is a 1e-9 chance it will take these many trials. No good reason
|
||||
# 1e-2 isn't enough, can still happen
|
||||
# 2 is the what ransac defines as min_samples = X.shape[1] + 1
|
||||
max_trials = _dynamic_max_trials(len(X) - len(outliers), X.shape[0], 2, 1 - 1e-9)
|
||||
ransac_estimator = RANSACRegressor(estimator, min_samples=2)
|
||||
for i in range(50):
|
||||
ransac_estimator.set_params(min_samples=2, random_state=i)
|
||||
ransac_estimator.fit(X, y)
|
||||
assert ransac_estimator.n_trials_ < max_trials + 1
|
||||
|
||||
|
||||
def test_ransac_stop_n_inliers():
|
||||
estimator = LinearRegression()
|
||||
ransac_estimator = RANSACRegressor(
|
||||
estimator,
|
||||
min_samples=2,
|
||||
residual_threshold=5,
|
||||
stop_n_inliers=2,
|
||||
random_state=0,
|
||||
)
|
||||
ransac_estimator.fit(X, y)
|
||||
|
||||
assert ransac_estimator.n_trials_ == 1
|
||||
|
||||
|
||||
def test_ransac_stop_score():
|
||||
estimator = LinearRegression()
|
||||
ransac_estimator = RANSACRegressor(
|
||||
estimator,
|
||||
min_samples=2,
|
||||
residual_threshold=5,
|
||||
stop_score=0,
|
||||
random_state=0,
|
||||
)
|
||||
ransac_estimator.fit(X, y)
|
||||
|
||||
assert ransac_estimator.n_trials_ == 1
|
||||
|
||||
|
||||
def test_ransac_score():
|
||||
X = np.arange(100)[:, None]
|
||||
y = np.zeros((100,))
|
||||
y[0] = 1
|
||||
y[1] = 100
|
||||
|
||||
estimator = LinearRegression()
|
||||
ransac_estimator = RANSACRegressor(
|
||||
estimator, min_samples=2, residual_threshold=0.5, random_state=0
|
||||
)
|
||||
ransac_estimator.fit(X, y)
|
||||
|
||||
assert ransac_estimator.score(X[2:], y[2:]) == 1
|
||||
assert ransac_estimator.score(X[:2], y[:2]) < 1
|
||||
|
||||
|
||||
def test_ransac_predict():
|
||||
X = np.arange(100)[:, None]
|
||||
y = np.zeros((100,))
|
||||
y[0] = 1
|
||||
y[1] = 100
|
||||
|
||||
estimator = LinearRegression()
|
||||
ransac_estimator = RANSACRegressor(
|
||||
estimator, min_samples=2, residual_threshold=0.5, random_state=0
|
||||
)
|
||||
ransac_estimator.fit(X, y)
|
||||
|
||||
assert_array_equal(ransac_estimator.predict(X), np.zeros(100))
|
||||
|
||||
|
||||
def test_ransac_no_valid_data():
|
||||
def is_data_valid(X, y):
|
||||
return False
|
||||
|
||||
estimator = LinearRegression()
|
||||
ransac_estimator = RANSACRegressor(
|
||||
estimator, is_data_valid=is_data_valid, max_trials=5
|
||||
)
|
||||
|
||||
msg = "RANSAC could not find a valid consensus set"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ransac_estimator.fit(X, y)
|
||||
assert ransac_estimator.n_skips_no_inliers_ == 0
|
||||
assert ransac_estimator.n_skips_invalid_data_ == 5
|
||||
assert ransac_estimator.n_skips_invalid_model_ == 0
|
||||
|
||||
|
||||
def test_ransac_no_valid_model():
|
||||
def is_model_valid(estimator, X, y):
|
||||
return False
|
||||
|
||||
estimator = LinearRegression()
|
||||
ransac_estimator = RANSACRegressor(
|
||||
estimator, is_model_valid=is_model_valid, max_trials=5
|
||||
)
|
||||
|
||||
msg = "RANSAC could not find a valid consensus set"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ransac_estimator.fit(X, y)
|
||||
assert ransac_estimator.n_skips_no_inliers_ == 0
|
||||
assert ransac_estimator.n_skips_invalid_data_ == 0
|
||||
assert ransac_estimator.n_skips_invalid_model_ == 5
|
||||
|
||||
|
||||
def test_ransac_exceed_max_skips():
|
||||
def is_data_valid(X, y):
|
||||
return False
|
||||
|
||||
estimator = LinearRegression()
|
||||
ransac_estimator = RANSACRegressor(
|
||||
estimator, is_data_valid=is_data_valid, max_trials=5, max_skips=3
|
||||
)
|
||||
|
||||
msg = "RANSAC skipped more iterations than `max_skips`"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
ransac_estimator.fit(X, y)
|
||||
assert ransac_estimator.n_skips_no_inliers_ == 0
|
||||
assert ransac_estimator.n_skips_invalid_data_ == 4
|
||||
assert ransac_estimator.n_skips_invalid_model_ == 0
|
||||
|
||||
|
||||
def test_ransac_warn_exceed_max_skips():
|
||||
class IsDataValid:
|
||||
def __init__(self):
|
||||
self.call_counter = 0
|
||||
|
||||
def __call__(self, X, y):
|
||||
result = self.call_counter == 0
|
||||
self.call_counter += 1
|
||||
return result
|
||||
|
||||
estimator = LinearRegression()
|
||||
ransac_estimator = RANSACRegressor(
|
||||
estimator, is_data_valid=IsDataValid(), max_skips=3, max_trials=5
|
||||
)
|
||||
warning_message = (
|
||||
"RANSAC found a valid consensus set but exited "
|
||||
"early due to skipping more iterations than "
|
||||
"`max_skips`. See estimator attributes for "
|
||||
"diagnostics."
|
||||
)
|
||||
with pytest.warns(ConvergenceWarning, match=warning_message):
|
||||
ransac_estimator.fit(X, y)
|
||||
assert ransac_estimator.n_skips_no_inliers_ == 0
|
||||
assert ransac_estimator.n_skips_invalid_data_ == 4
|
||||
assert ransac_estimator.n_skips_invalid_model_ == 0
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"sparse_container", COO_CONTAINERS + CSR_CONTAINERS + CSC_CONTAINERS
|
||||
)
|
||||
def test_ransac_sparse(sparse_container):
|
||||
X_sparse = sparse_container(X)
|
||||
|
||||
estimator = LinearRegression()
|
||||
ransac_estimator = RANSACRegressor(
|
||||
estimator, min_samples=2, residual_threshold=5, random_state=0
|
||||
)
|
||||
ransac_estimator.fit(X_sparse, y)
|
||||
|
||||
ref_inlier_mask = np.ones_like(ransac_estimator.inlier_mask_).astype(np.bool_)
|
||||
ref_inlier_mask[outliers] = False
|
||||
|
||||
assert_array_equal(ransac_estimator.inlier_mask_, ref_inlier_mask)
|
||||
|
||||
|
||||
def test_ransac_none_estimator():
|
||||
estimator = LinearRegression()
|
||||
|
||||
ransac_estimator = RANSACRegressor(
|
||||
estimator, min_samples=2, residual_threshold=5, random_state=0
|
||||
)
|
||||
ransac_none_estimator = RANSACRegressor(
|
||||
None, min_samples=2, residual_threshold=5, random_state=0
|
||||
)
|
||||
|
||||
ransac_estimator.fit(X, y)
|
||||
ransac_none_estimator.fit(X, y)
|
||||
|
||||
assert_array_almost_equal(
|
||||
ransac_estimator.predict(X), ransac_none_estimator.predict(X)
|
||||
)
|
||||
|
||||
|
||||
def test_ransac_min_n_samples():
|
||||
estimator = LinearRegression()
|
||||
ransac_estimator1 = RANSACRegressor(
|
||||
estimator, min_samples=2, residual_threshold=5, random_state=0
|
||||
)
|
||||
ransac_estimator2 = RANSACRegressor(
|
||||
estimator,
|
||||
min_samples=2.0 / X.shape[0],
|
||||
residual_threshold=5,
|
||||
random_state=0,
|
||||
)
|
||||
ransac_estimator5 = RANSACRegressor(
|
||||
estimator, min_samples=2, residual_threshold=5, random_state=0
|
||||
)
|
||||
ransac_estimator6 = RANSACRegressor(estimator, residual_threshold=5, random_state=0)
|
||||
ransac_estimator7 = RANSACRegressor(
|
||||
estimator, min_samples=X.shape[0] + 1, residual_threshold=5, random_state=0
|
||||
)
|
||||
# GH #19390
|
||||
ransac_estimator8 = RANSACRegressor(
|
||||
Ridge(), min_samples=None, residual_threshold=5, random_state=0
|
||||
)
|
||||
|
||||
ransac_estimator1.fit(X, y)
|
||||
ransac_estimator2.fit(X, y)
|
||||
ransac_estimator5.fit(X, y)
|
||||
ransac_estimator6.fit(X, y)
|
||||
|
||||
assert_array_almost_equal(
|
||||
ransac_estimator1.predict(X), ransac_estimator2.predict(X)
|
||||
)
|
||||
assert_array_almost_equal(
|
||||
ransac_estimator1.predict(X), ransac_estimator5.predict(X)
|
||||
)
|
||||
assert_array_almost_equal(
|
||||
ransac_estimator1.predict(X), ransac_estimator6.predict(X)
|
||||
)
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
ransac_estimator7.fit(X, y)
|
||||
|
||||
err_msg = "`min_samples` needs to be explicitly set"
|
||||
with pytest.raises(ValueError, match=err_msg):
|
||||
ransac_estimator8.fit(X, y)
|
||||
|
||||
|
||||
def test_ransac_multi_dimensional_targets():
|
||||
estimator = LinearRegression()
|
||||
ransac_estimator = RANSACRegressor(
|
||||
estimator, min_samples=2, residual_threshold=5, random_state=0
|
||||
)
|
||||
|
||||
# 3-D target values
|
||||
yyy = np.column_stack([y, y, y])
|
||||
|
||||
# Estimate parameters of corrupted data
|
||||
ransac_estimator.fit(X, yyy)
|
||||
|
||||
# Ground truth / reference inlier mask
|
||||
ref_inlier_mask = np.ones_like(ransac_estimator.inlier_mask_).astype(np.bool_)
|
||||
ref_inlier_mask[outliers] = False
|
||||
|
||||
assert_array_equal(ransac_estimator.inlier_mask_, ref_inlier_mask)
|
||||
|
||||
|
||||
def test_ransac_residual_loss():
|
||||
def loss_multi1(y_true, y_pred):
|
||||
return np.sum(np.abs(y_true - y_pred), axis=1)
|
||||
|
||||
def loss_multi2(y_true, y_pred):
|
||||
return np.sum((y_true - y_pred) ** 2, axis=1)
|
||||
|
||||
def loss_mono(y_true, y_pred):
|
||||
return np.abs(y_true - y_pred)
|
||||
|
||||
yyy = np.column_stack([y, y, y])
|
||||
|
||||
estimator = LinearRegression()
|
||||
ransac_estimator0 = RANSACRegressor(
|
||||
estimator, min_samples=2, residual_threshold=5, random_state=0
|
||||
)
|
||||
ransac_estimator1 = RANSACRegressor(
|
||||
estimator,
|
||||
min_samples=2,
|
||||
residual_threshold=5,
|
||||
random_state=0,
|
||||
loss=loss_multi1,
|
||||
)
|
||||
ransac_estimator2 = RANSACRegressor(
|
||||
estimator,
|
||||
min_samples=2,
|
||||
residual_threshold=5,
|
||||
random_state=0,
|
||||
loss=loss_multi2,
|
||||
)
|
||||
|
||||
# multi-dimensional
|
||||
ransac_estimator0.fit(X, yyy)
|
||||
ransac_estimator1.fit(X, yyy)
|
||||
ransac_estimator2.fit(X, yyy)
|
||||
assert_array_almost_equal(
|
||||
ransac_estimator0.predict(X), ransac_estimator1.predict(X)
|
||||
)
|
||||
assert_array_almost_equal(
|
||||
ransac_estimator0.predict(X), ransac_estimator2.predict(X)
|
||||
)
|
||||
|
||||
# one-dimensional
|
||||
ransac_estimator0.fit(X, y)
|
||||
ransac_estimator2.loss = loss_mono
|
||||
ransac_estimator2.fit(X, y)
|
||||
assert_array_almost_equal(
|
||||
ransac_estimator0.predict(X), ransac_estimator2.predict(X)
|
||||
)
|
||||
ransac_estimator3 = RANSACRegressor(
|
||||
estimator,
|
||||
min_samples=2,
|
||||
residual_threshold=5,
|
||||
random_state=0,
|
||||
loss="squared_error",
|
||||
)
|
||||
ransac_estimator3.fit(X, y)
|
||||
assert_array_almost_equal(
|
||||
ransac_estimator0.predict(X), ransac_estimator2.predict(X)
|
||||
)
|
||||
|
||||
|
||||
def test_ransac_default_residual_threshold():
|
||||
estimator = LinearRegression()
|
||||
ransac_estimator = RANSACRegressor(estimator, min_samples=2, random_state=0)
|
||||
|
||||
# Estimate parameters of corrupted data
|
||||
ransac_estimator.fit(X, y)
|
||||
|
||||
# Ground truth / reference inlier mask
|
||||
ref_inlier_mask = np.ones_like(ransac_estimator.inlier_mask_).astype(np.bool_)
|
||||
ref_inlier_mask[outliers] = False
|
||||
|
||||
assert_array_equal(ransac_estimator.inlier_mask_, ref_inlier_mask)
|
||||
|
||||
|
||||
def test_ransac_dynamic_max_trials():
|
||||
# Numbers hand-calculated and confirmed on page 119 (Table 4.3) in
|
||||
# Hartley, R.~I. and Zisserman, A., 2004,
|
||||
# Multiple View Geometry in Computer Vision, Second Edition,
|
||||
# Cambridge University Press, ISBN: 0521540518
|
||||
|
||||
# e = 0%, min_samples = X
|
||||
assert _dynamic_max_trials(100, 100, 2, 0.99) == 1
|
||||
|
||||
# e = 5%, min_samples = 2
|
||||
assert _dynamic_max_trials(95, 100, 2, 0.99) == 2
|
||||
# e = 10%, min_samples = 2
|
||||
assert _dynamic_max_trials(90, 100, 2, 0.99) == 3
|
||||
# e = 30%, min_samples = 2
|
||||
assert _dynamic_max_trials(70, 100, 2, 0.99) == 7
|
||||
# e = 50%, min_samples = 2
|
||||
assert _dynamic_max_trials(50, 100, 2, 0.99) == 17
|
||||
|
||||
# e = 5%, min_samples = 8
|
||||
assert _dynamic_max_trials(95, 100, 8, 0.99) == 5
|
||||
# e = 10%, min_samples = 8
|
||||
assert _dynamic_max_trials(90, 100, 8, 0.99) == 9
|
||||
# e = 30%, min_samples = 8
|
||||
assert _dynamic_max_trials(70, 100, 8, 0.99) == 78
|
||||
# e = 50%, min_samples = 8
|
||||
assert _dynamic_max_trials(50, 100, 8, 0.99) == 1177
|
||||
|
||||
# e = 0%, min_samples = 10
|
||||
assert _dynamic_max_trials(1, 100, 10, 0) == 0
|
||||
assert _dynamic_max_trials(1, 100, 10, 1) == float("inf")
|
||||
|
||||
|
||||
def test_ransac_fit_sample_weight():
|
||||
ransac_estimator = RANSACRegressor(random_state=0)
|
||||
n_samples = y.shape[0]
|
||||
weights = np.ones(n_samples)
|
||||
ransac_estimator.fit(X, y, sample_weight=weights)
|
||||
# sanity check
|
||||
assert ransac_estimator.inlier_mask_.shape[0] == n_samples
|
||||
|
||||
ref_inlier_mask = np.ones_like(ransac_estimator.inlier_mask_).astype(np.bool_)
|
||||
ref_inlier_mask[outliers] = False
|
||||
# check that mask is correct
|
||||
assert_array_equal(ransac_estimator.inlier_mask_, ref_inlier_mask)
|
||||
|
||||
# check that fit(X) = fit([X1, X2, X3],sample_weight = [n1, n2, n3]) where
|
||||
# X = X1 repeated n1 times, X2 repeated n2 times and so forth
|
||||
random_state = check_random_state(0)
|
||||
X_ = random_state.randint(0, 200, [10, 1])
|
||||
y_ = np.ndarray.flatten(0.2 * X_ + 2)
|
||||
sample_weight = random_state.randint(0, 10, 10)
|
||||
outlier_X = random_state.randint(0, 1000, [1, 1])
|
||||
outlier_weight = random_state.randint(0, 10, 1)
|
||||
outlier_y = random_state.randint(-1000, 0, 1)
|
||||
|
||||
X_flat = np.append(
|
||||
np.repeat(X_, sample_weight, axis=0),
|
||||
np.repeat(outlier_X, outlier_weight, axis=0),
|
||||
axis=0,
|
||||
)
|
||||
y_flat = np.ndarray.flatten(
|
||||
np.append(
|
||||
np.repeat(y_, sample_weight, axis=0),
|
||||
np.repeat(outlier_y, outlier_weight, axis=0),
|
||||
axis=0,
|
||||
)
|
||||
)
|
||||
ransac_estimator.fit(X_flat, y_flat)
|
||||
ref_coef_ = ransac_estimator.estimator_.coef_
|
||||
|
||||
sample_weight = np.append(sample_weight, outlier_weight)
|
||||
X_ = np.append(X_, outlier_X, axis=0)
|
||||
y_ = np.append(y_, outlier_y)
|
||||
ransac_estimator.fit(X_, y_, sample_weight=sample_weight)
|
||||
|
||||
assert_allclose(ransac_estimator.estimator_.coef_, ref_coef_)
|
||||
|
||||
# check that if estimator.fit doesn't support
|
||||
# sample_weight, raises error
|
||||
estimator = OrthogonalMatchingPursuit()
|
||||
ransac_estimator = RANSACRegressor(estimator, min_samples=10)
|
||||
|
||||
err_msg = f"{estimator.__class__.__name__} does not support sample_weight."
|
||||
with pytest.raises(ValueError, match=err_msg):
|
||||
ransac_estimator.fit(X, y, sample_weight=weights)
|
||||
|
||||
|
||||
def test_ransac_final_model_fit_sample_weight():
|
||||
X, y = make_regression(n_samples=1000, random_state=10)
|
||||
rng = check_random_state(42)
|
||||
sample_weight = rng.randint(1, 4, size=y.shape[0])
|
||||
sample_weight = sample_weight / sample_weight.sum()
|
||||
ransac = RANSACRegressor(random_state=0)
|
||||
ransac.fit(X, y, sample_weight=sample_weight)
|
||||
|
||||
final_model = LinearRegression()
|
||||
mask_samples = ransac.inlier_mask_
|
||||
final_model.fit(
|
||||
X[mask_samples], y[mask_samples], sample_weight=sample_weight[mask_samples]
|
||||
)
|
||||
|
||||
assert_allclose(ransac.estimator_.coef_, final_model.coef_, atol=1e-12)
|
||||
|
||||
|
||||
def test_perfect_horizontal_line():
|
||||
"""Check that we can fit a line where all samples are inliers.
|
||||
Non-regression test for:
|
||||
https://github.com/scikit-learn/scikit-learn/issues/19497
|
||||
"""
|
||||
X = np.arange(100)[:, None]
|
||||
y = np.zeros((100,))
|
||||
|
||||
estimator = LinearRegression()
|
||||
ransac_estimator = RANSACRegressor(estimator, random_state=0)
|
||||
ransac_estimator.fit(X, y)
|
||||
|
||||
assert_allclose(ransac_estimator.estimator_.coef_, 0.0)
|
||||
assert_allclose(ransac_estimator.estimator_.intercept_, 0.0)
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,861 @@
|
||||
# Authors: The scikit-learn developers
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
import math
|
||||
import re
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
from sklearn.base import clone
|
||||
from sklearn.datasets import load_iris, make_blobs, make_classification
|
||||
from sklearn.linear_model import LogisticRegression, Ridge
|
||||
from sklearn.linear_model._sag import get_auto_step_size
|
||||
from sklearn.multiclass import OneVsRestClassifier
|
||||
from sklearn.preprocessing import LabelEncoder
|
||||
from sklearn.utils import check_random_state, compute_class_weight
|
||||
from sklearn.utils._testing import (
|
||||
assert_allclose,
|
||||
assert_almost_equal,
|
||||
assert_array_almost_equal,
|
||||
)
|
||||
from sklearn.utils.extmath import row_norms
|
||||
from sklearn.utils.fixes import CSR_CONTAINERS
|
||||
|
||||
iris = load_iris()
|
||||
|
||||
|
||||
# this is used for sag classification
|
||||
def log_dloss(p, y):
|
||||
z = p * y
|
||||
# approximately equal and saves the computation of the log
|
||||
if z > 18.0:
|
||||
return math.exp(-z) * -y
|
||||
if z < -18.0:
|
||||
return -y
|
||||
return -y / (math.exp(z) + 1.0)
|
||||
|
||||
|
||||
def log_loss(p, y):
|
||||
return np.mean(np.log(1.0 + np.exp(-y * p)))
|
||||
|
||||
|
||||
# this is used for sag regression
|
||||
def squared_dloss(p, y):
|
||||
return p - y
|
||||
|
||||
|
||||
def squared_loss(p, y):
|
||||
return np.mean(0.5 * (p - y) * (p - y))
|
||||
|
||||
|
||||
# function for measuring the log loss
|
||||
def get_pobj(w, alpha, myX, myy, loss):
|
||||
w = w.ravel()
|
||||
pred = np.dot(myX, w)
|
||||
p = loss(pred, myy)
|
||||
p += alpha * w.dot(w) / 2.0
|
||||
return p
|
||||
|
||||
|
||||
def sag(
|
||||
X,
|
||||
y,
|
||||
step_size,
|
||||
alpha,
|
||||
n_iter=1,
|
||||
dloss=None,
|
||||
sparse=False,
|
||||
sample_weight=None,
|
||||
fit_intercept=True,
|
||||
saga=False,
|
||||
):
|
||||
n_samples, n_features = X.shape[0], X.shape[1]
|
||||
|
||||
weights = np.zeros(X.shape[1])
|
||||
sum_gradient = np.zeros(X.shape[1])
|
||||
gradient_memory = np.zeros((n_samples, n_features))
|
||||
|
||||
intercept = 0.0
|
||||
intercept_sum_gradient = 0.0
|
||||
intercept_gradient_memory = np.zeros(n_samples)
|
||||
|
||||
rng = np.random.RandomState(77)
|
||||
decay = 1.0
|
||||
seen = set()
|
||||
|
||||
# sparse data has a fixed decay of .01
|
||||
if sparse:
|
||||
decay = 0.01
|
||||
|
||||
for epoch in range(n_iter):
|
||||
for k in range(n_samples):
|
||||
idx = int(rng.rand() * n_samples)
|
||||
# idx = k
|
||||
entry = X[idx]
|
||||
seen.add(idx)
|
||||
p = np.dot(entry, weights) + intercept
|
||||
gradient = dloss(p, y[idx])
|
||||
if sample_weight is not None:
|
||||
gradient *= sample_weight[idx]
|
||||
update = entry * gradient + alpha * weights
|
||||
gradient_correction = update - gradient_memory[idx]
|
||||
sum_gradient += gradient_correction
|
||||
gradient_memory[idx] = update
|
||||
if saga:
|
||||
weights -= gradient_correction * step_size * (1 - 1.0 / len(seen))
|
||||
|
||||
if fit_intercept:
|
||||
gradient_correction = gradient - intercept_gradient_memory[idx]
|
||||
intercept_gradient_memory[idx] = gradient
|
||||
intercept_sum_gradient += gradient_correction
|
||||
gradient_correction *= step_size * (1.0 - 1.0 / len(seen))
|
||||
if saga:
|
||||
intercept -= (
|
||||
step_size * intercept_sum_gradient / len(seen) * decay
|
||||
) + gradient_correction
|
||||
else:
|
||||
intercept -= step_size * intercept_sum_gradient / len(seen) * decay
|
||||
|
||||
weights -= step_size * sum_gradient / len(seen)
|
||||
|
||||
return weights, intercept
|
||||
|
||||
|
||||
def sag_sparse(
|
||||
X,
|
||||
y,
|
||||
step_size,
|
||||
alpha,
|
||||
n_iter=1,
|
||||
dloss=None,
|
||||
sample_weight=None,
|
||||
sparse=False,
|
||||
fit_intercept=True,
|
||||
saga=False,
|
||||
random_state=0,
|
||||
):
|
||||
if step_size * alpha == 1.0:
|
||||
raise ZeroDivisionError(
|
||||
"Sparse sag does not handle the case step_size * alpha == 1"
|
||||
)
|
||||
n_samples, n_features = X.shape[0], X.shape[1]
|
||||
|
||||
weights = np.zeros(n_features)
|
||||
sum_gradient = np.zeros(n_features)
|
||||
last_updated = np.zeros(n_features, dtype=int)
|
||||
gradient_memory = np.zeros(n_samples)
|
||||
rng = check_random_state(random_state)
|
||||
intercept = 0.0
|
||||
intercept_sum_gradient = 0.0
|
||||
wscale = 1.0
|
||||
decay = 1.0
|
||||
seen = set()
|
||||
|
||||
c_sum = np.zeros(n_iter * n_samples)
|
||||
|
||||
# sparse data has a fixed decay of .01
|
||||
if sparse:
|
||||
decay = 0.01
|
||||
|
||||
counter = 0
|
||||
for epoch in range(n_iter):
|
||||
for k in range(n_samples):
|
||||
# idx = k
|
||||
idx = int(rng.rand() * n_samples)
|
||||
entry = X[idx]
|
||||
seen.add(idx)
|
||||
|
||||
if counter >= 1:
|
||||
for j in range(n_features):
|
||||
if last_updated[j] == 0:
|
||||
weights[j] -= c_sum[counter - 1] * sum_gradient[j]
|
||||
else:
|
||||
weights[j] -= (
|
||||
c_sum[counter - 1] - c_sum[last_updated[j] - 1]
|
||||
) * sum_gradient[j]
|
||||
last_updated[j] = counter
|
||||
|
||||
p = (wscale * np.dot(entry, weights)) + intercept
|
||||
gradient = dloss(p, y[idx])
|
||||
|
||||
if sample_weight is not None:
|
||||
gradient *= sample_weight[idx]
|
||||
|
||||
update = entry * gradient
|
||||
gradient_correction = update - (gradient_memory[idx] * entry)
|
||||
sum_gradient += gradient_correction
|
||||
if saga:
|
||||
for j in range(n_features):
|
||||
weights[j] -= (
|
||||
gradient_correction[j]
|
||||
* step_size
|
||||
* (1 - 1.0 / len(seen))
|
||||
/ wscale
|
||||
)
|
||||
|
||||
if fit_intercept:
|
||||
gradient_correction = gradient - gradient_memory[idx]
|
||||
intercept_sum_gradient += gradient_correction
|
||||
gradient_correction *= step_size * (1.0 - 1.0 / len(seen))
|
||||
if saga:
|
||||
intercept -= (
|
||||
step_size * intercept_sum_gradient / len(seen) * decay
|
||||
) + gradient_correction
|
||||
else:
|
||||
intercept -= step_size * intercept_sum_gradient / len(seen) * decay
|
||||
|
||||
gradient_memory[idx] = gradient
|
||||
|
||||
wscale *= 1.0 - alpha * step_size
|
||||
if counter == 0:
|
||||
c_sum[0] = step_size / (wscale * len(seen))
|
||||
else:
|
||||
c_sum[counter] = c_sum[counter - 1] + step_size / (wscale * len(seen))
|
||||
|
||||
if counter >= 1 and wscale < 1e-9:
|
||||
for j in range(n_features):
|
||||
if last_updated[j] == 0:
|
||||
weights[j] -= c_sum[counter] * sum_gradient[j]
|
||||
else:
|
||||
weights[j] -= (
|
||||
c_sum[counter] - c_sum[last_updated[j] - 1]
|
||||
) * sum_gradient[j]
|
||||
last_updated[j] = counter + 1
|
||||
c_sum[counter] = 0
|
||||
weights *= wscale
|
||||
wscale = 1.0
|
||||
|
||||
counter += 1
|
||||
|
||||
for j in range(n_features):
|
||||
if last_updated[j] == 0:
|
||||
weights[j] -= c_sum[counter - 1] * sum_gradient[j]
|
||||
else:
|
||||
weights[j] -= (
|
||||
c_sum[counter - 1] - c_sum[last_updated[j] - 1]
|
||||
) * sum_gradient[j]
|
||||
weights *= wscale
|
||||
return weights, intercept
|
||||
|
||||
|
||||
def get_step_size(X, alpha, fit_intercept, classification=True):
|
||||
if classification:
|
||||
return 4.0 / (np.max(np.sum(X * X, axis=1)) + fit_intercept + 4.0 * alpha)
|
||||
else:
|
||||
return 1.0 / (np.max(np.sum(X * X, axis=1)) + fit_intercept + alpha)
|
||||
|
||||
|
||||
def test_classifier_matching():
|
||||
n_samples = 20
|
||||
X, y = make_blobs(n_samples=n_samples, centers=2, random_state=0, cluster_std=0.1)
|
||||
# y must be 0 or 1
|
||||
alpha = 1.1
|
||||
fit_intercept = True
|
||||
step_size = get_step_size(X, alpha, fit_intercept)
|
||||
for solver in ["sag", "saga"]:
|
||||
if solver == "sag":
|
||||
n_iter = 80
|
||||
else:
|
||||
# SAGA variance w.r.t. stream order is higher
|
||||
n_iter = 300
|
||||
clf = LogisticRegression(
|
||||
solver=solver,
|
||||
fit_intercept=fit_intercept,
|
||||
tol=1e-11,
|
||||
C=1.0 / alpha / n_samples,
|
||||
max_iter=n_iter,
|
||||
random_state=10,
|
||||
)
|
||||
clf.fit(X, y)
|
||||
|
||||
weights, intercept = sag_sparse(
|
||||
X,
|
||||
2 * y - 1, # y must be -1 or +1
|
||||
step_size,
|
||||
alpha,
|
||||
n_iter=n_iter,
|
||||
dloss=log_dloss,
|
||||
fit_intercept=fit_intercept,
|
||||
saga=solver == "saga",
|
||||
)
|
||||
weights2, intercept2 = sag(
|
||||
X,
|
||||
2 * y - 1, # y must be -1 or +1
|
||||
step_size,
|
||||
alpha,
|
||||
n_iter=n_iter,
|
||||
dloss=log_dloss,
|
||||
fit_intercept=fit_intercept,
|
||||
saga=solver == "saga",
|
||||
)
|
||||
weights = np.atleast_2d(weights)
|
||||
intercept = np.atleast_1d(intercept)
|
||||
weights2 = np.atleast_2d(weights2)
|
||||
intercept2 = np.atleast_1d(intercept2)
|
||||
|
||||
assert_array_almost_equal(weights, clf.coef_, decimal=9)
|
||||
assert_array_almost_equal(intercept, clf.intercept_, decimal=9)
|
||||
assert_array_almost_equal(weights2, clf.coef_, decimal=9)
|
||||
assert_array_almost_equal(intercept2, clf.intercept_, decimal=9)
|
||||
|
||||
|
||||
def test_regressor_matching():
|
||||
n_samples = 10
|
||||
n_features = 5
|
||||
|
||||
rng = np.random.RandomState(10)
|
||||
X = rng.normal(size=(n_samples, n_features))
|
||||
true_w = rng.normal(size=n_features)
|
||||
y = X.dot(true_w)
|
||||
|
||||
alpha = 1.0
|
||||
n_iter = 100
|
||||
fit_intercept = True
|
||||
|
||||
step_size = get_step_size(X, alpha, fit_intercept, classification=False)
|
||||
clf = Ridge(
|
||||
fit_intercept=fit_intercept,
|
||||
tol=0.00000000001,
|
||||
solver="sag",
|
||||
alpha=alpha * n_samples,
|
||||
max_iter=n_iter,
|
||||
)
|
||||
clf.fit(X, y)
|
||||
|
||||
weights1, intercept1 = sag_sparse(
|
||||
X,
|
||||
y,
|
||||
step_size,
|
||||
alpha,
|
||||
n_iter=n_iter,
|
||||
dloss=squared_dloss,
|
||||
fit_intercept=fit_intercept,
|
||||
)
|
||||
weights2, intercept2 = sag(
|
||||
X,
|
||||
y,
|
||||
step_size,
|
||||
alpha,
|
||||
n_iter=n_iter,
|
||||
dloss=squared_dloss,
|
||||
fit_intercept=fit_intercept,
|
||||
)
|
||||
|
||||
assert_allclose(weights1, clf.coef_)
|
||||
assert_allclose(intercept1, clf.intercept_)
|
||||
assert_allclose(weights2, clf.coef_)
|
||||
assert_allclose(intercept2, clf.intercept_)
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:The max_iter was reached")
|
||||
@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
|
||||
def test_sag_pobj_matches_logistic_regression(csr_container):
|
||||
"""tests if the sag pobj matches log reg"""
|
||||
n_samples = 100
|
||||
alpha = 1.0
|
||||
max_iter = 20
|
||||
X, y = make_blobs(n_samples=n_samples, centers=2, random_state=0, cluster_std=0.1)
|
||||
|
||||
clf1 = LogisticRegression(
|
||||
solver="sag",
|
||||
fit_intercept=False,
|
||||
tol=0.0000001,
|
||||
C=1.0 / alpha / n_samples,
|
||||
max_iter=max_iter,
|
||||
random_state=10,
|
||||
)
|
||||
clf2 = clone(clf1)
|
||||
clf3 = LogisticRegression(
|
||||
fit_intercept=False,
|
||||
tol=0.0000001,
|
||||
C=1.0 / alpha / n_samples,
|
||||
max_iter=max_iter,
|
||||
random_state=10,
|
||||
)
|
||||
|
||||
clf1.fit(X, y)
|
||||
clf2.fit(csr_container(X), y)
|
||||
clf3.fit(X, y)
|
||||
|
||||
pobj1 = get_pobj(clf1.coef_, alpha, X, y, log_loss)
|
||||
pobj2 = get_pobj(clf2.coef_, alpha, X, y, log_loss)
|
||||
pobj3 = get_pobj(clf3.coef_, alpha, X, y, log_loss)
|
||||
|
||||
assert_array_almost_equal(pobj1, pobj2, decimal=4)
|
||||
assert_array_almost_equal(pobj2, pobj3, decimal=4)
|
||||
assert_array_almost_equal(pobj3, pobj1, decimal=4)
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:The max_iter was reached")
|
||||
@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
|
||||
def test_sag_pobj_matches_ridge_regression(csr_container):
|
||||
"""tests if the sag pobj matches ridge reg"""
|
||||
n_samples = 100
|
||||
n_features = 10
|
||||
alpha = 1.0
|
||||
n_iter = 100
|
||||
fit_intercept = False
|
||||
rng = np.random.RandomState(10)
|
||||
X = rng.normal(size=(n_samples, n_features))
|
||||
true_w = rng.normal(size=n_features)
|
||||
y = X.dot(true_w)
|
||||
|
||||
clf1 = Ridge(
|
||||
fit_intercept=fit_intercept,
|
||||
tol=0.00000000001,
|
||||
solver="sag",
|
||||
alpha=alpha,
|
||||
max_iter=n_iter,
|
||||
random_state=42,
|
||||
)
|
||||
clf2 = clone(clf1)
|
||||
clf3 = Ridge(
|
||||
fit_intercept=fit_intercept,
|
||||
tol=0.00001,
|
||||
solver="lsqr",
|
||||
alpha=alpha,
|
||||
max_iter=n_iter,
|
||||
random_state=42,
|
||||
)
|
||||
|
||||
clf1.fit(X, y)
|
||||
clf2.fit(csr_container(X), y)
|
||||
clf3.fit(X, y)
|
||||
|
||||
pobj1 = get_pobj(clf1.coef_, alpha, X, y, squared_loss)
|
||||
pobj2 = get_pobj(clf2.coef_, alpha, X, y, squared_loss)
|
||||
pobj3 = get_pobj(clf3.coef_, alpha, X, y, squared_loss)
|
||||
|
||||
assert_array_almost_equal(pobj1, pobj2, decimal=4)
|
||||
assert_array_almost_equal(pobj1, pobj3, decimal=4)
|
||||
assert_array_almost_equal(pobj3, pobj2, decimal=4)
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:The max_iter was reached")
|
||||
@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
|
||||
def test_sag_regressor_computed_correctly(csr_container):
|
||||
"""tests if the sag regressor is computed correctly"""
|
||||
alpha = 0.1
|
||||
n_features = 10
|
||||
n_samples = 40
|
||||
max_iter = 100
|
||||
tol = 0.000001
|
||||
fit_intercept = True
|
||||
rng = np.random.RandomState(0)
|
||||
X = rng.normal(size=(n_samples, n_features))
|
||||
w = rng.normal(size=n_features)
|
||||
y = np.dot(X, w) + 2.0
|
||||
step_size = get_step_size(X, alpha, fit_intercept, classification=False)
|
||||
|
||||
clf1 = Ridge(
|
||||
fit_intercept=fit_intercept,
|
||||
tol=tol,
|
||||
solver="sag",
|
||||
alpha=alpha * n_samples,
|
||||
max_iter=max_iter,
|
||||
random_state=rng,
|
||||
)
|
||||
clf2 = clone(clf1)
|
||||
|
||||
clf1.fit(X, y)
|
||||
clf2.fit(csr_container(X), y)
|
||||
|
||||
spweights1, spintercept1 = sag_sparse(
|
||||
X,
|
||||
y,
|
||||
step_size,
|
||||
alpha,
|
||||
n_iter=max_iter,
|
||||
dloss=squared_dloss,
|
||||
fit_intercept=fit_intercept,
|
||||
random_state=rng,
|
||||
)
|
||||
|
||||
spweights2, spintercept2 = sag_sparse(
|
||||
X,
|
||||
y,
|
||||
step_size,
|
||||
alpha,
|
||||
n_iter=max_iter,
|
||||
dloss=squared_dloss,
|
||||
sparse=True,
|
||||
fit_intercept=fit_intercept,
|
||||
random_state=rng,
|
||||
)
|
||||
|
||||
assert_array_almost_equal(clf1.coef_.ravel(), spweights1.ravel(), decimal=3)
|
||||
assert_almost_equal(clf1.intercept_, spintercept1, decimal=1)
|
||||
|
||||
# TODO: uncomment when sparse Ridge with intercept will be fixed (#4710)
|
||||
# assert_array_almost_equal(clf2.coef_.ravel(),
|
||||
# spweights2.ravel(),
|
||||
# decimal=3)
|
||||
# assert_almost_equal(clf2.intercept_, spintercept2, decimal=1)'''
|
||||
|
||||
|
||||
def test_get_auto_step_size():
|
||||
X = np.array([[1, 2, 3], [2, 3, 4], [2, 3, 2]], dtype=np.float64)
|
||||
alpha = 1.2
|
||||
fit_intercept = False
|
||||
# sum the squares of the second sample because that's the largest
|
||||
max_squared_sum = 4 + 9 + 16
|
||||
max_squared_sum_ = row_norms(X, squared=True).max()
|
||||
n_samples = X.shape[0]
|
||||
assert_almost_equal(max_squared_sum, max_squared_sum_, decimal=4)
|
||||
|
||||
for saga in [True, False]:
|
||||
for fit_intercept in (True, False):
|
||||
if saga:
|
||||
L_sqr = max_squared_sum + alpha + int(fit_intercept)
|
||||
L_log = (max_squared_sum + 4.0 * alpha + int(fit_intercept)) / 4.0
|
||||
mun_sqr = min(2 * n_samples * alpha, L_sqr)
|
||||
mun_log = min(2 * n_samples * alpha, L_log)
|
||||
step_size_sqr = 1 / (2 * L_sqr + mun_sqr)
|
||||
step_size_log = 1 / (2 * L_log + mun_log)
|
||||
else:
|
||||
step_size_sqr = 1.0 / (max_squared_sum + alpha + int(fit_intercept))
|
||||
step_size_log = 4.0 / (
|
||||
max_squared_sum + 4.0 * alpha + int(fit_intercept)
|
||||
)
|
||||
|
||||
step_size_sqr_ = get_auto_step_size(
|
||||
max_squared_sum_,
|
||||
alpha,
|
||||
"squared",
|
||||
fit_intercept,
|
||||
n_samples=n_samples,
|
||||
is_saga=saga,
|
||||
)
|
||||
step_size_log_ = get_auto_step_size(
|
||||
max_squared_sum_,
|
||||
alpha,
|
||||
"log",
|
||||
fit_intercept,
|
||||
n_samples=n_samples,
|
||||
is_saga=saga,
|
||||
)
|
||||
|
||||
assert_almost_equal(step_size_sqr, step_size_sqr_, decimal=4)
|
||||
assert_almost_equal(step_size_log, step_size_log_, decimal=4)
|
||||
|
||||
msg = "Unknown loss function for SAG solver, got wrong instead of"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
get_auto_step_size(max_squared_sum_, alpha, "wrong", fit_intercept)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("seed", range(3)) # locally tested with 1000 seeds
|
||||
@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
|
||||
def test_sag_regressor(seed, csr_container):
|
||||
"""tests if the sag regressor performs well"""
|
||||
xmin, xmax = -5, 5
|
||||
n_samples = 300
|
||||
tol = 0.001
|
||||
max_iter = 100
|
||||
alpha = 0.1
|
||||
rng = np.random.RandomState(seed)
|
||||
X = np.linspace(xmin, xmax, n_samples).reshape(n_samples, 1)
|
||||
|
||||
# simple linear function without noise
|
||||
y = 0.5 * X.ravel()
|
||||
|
||||
clf1 = Ridge(
|
||||
tol=tol,
|
||||
solver="sag",
|
||||
max_iter=max_iter,
|
||||
alpha=alpha * n_samples,
|
||||
random_state=rng,
|
||||
)
|
||||
clf2 = clone(clf1)
|
||||
clf1.fit(X, y)
|
||||
clf2.fit(csr_container(X), y)
|
||||
score1 = clf1.score(X, y)
|
||||
score2 = clf2.score(X, y)
|
||||
assert score1 > 0.98
|
||||
assert score2 > 0.98
|
||||
|
||||
# simple linear function with noise
|
||||
y = 0.5 * X.ravel() + rng.randn(n_samples, 1).ravel()
|
||||
|
||||
clf1 = Ridge(tol=tol, solver="sag", max_iter=max_iter, alpha=alpha * n_samples)
|
||||
clf2 = clone(clf1)
|
||||
clf1.fit(X, y)
|
||||
clf2.fit(csr_container(X), y)
|
||||
score1 = clf1.score(X, y)
|
||||
score2 = clf2.score(X, y)
|
||||
assert score1 > 0.45
|
||||
assert score2 > 0.45
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:The max_iter was reached")
|
||||
@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
|
||||
def test_sag_classifier_computed_correctly(csr_container):
|
||||
"""tests if the binary classifier is computed correctly"""
|
||||
alpha = 0.1
|
||||
n_samples = 50
|
||||
n_iter = 50
|
||||
tol = 0.00001
|
||||
fit_intercept = True
|
||||
X, y = make_blobs(n_samples=n_samples, centers=2, random_state=0, cluster_std=0.1)
|
||||
step_size = get_step_size(X, alpha, fit_intercept, classification=True)
|
||||
classes = np.unique(y)
|
||||
y_tmp = np.ones(n_samples)
|
||||
y_tmp[y != classes[1]] = -1
|
||||
y = y_tmp
|
||||
|
||||
clf1 = LogisticRegression(
|
||||
solver="sag",
|
||||
C=1.0 / alpha / n_samples,
|
||||
max_iter=n_iter,
|
||||
tol=tol,
|
||||
random_state=77,
|
||||
fit_intercept=fit_intercept,
|
||||
)
|
||||
clf2 = clone(clf1)
|
||||
|
||||
clf1.fit(X, y)
|
||||
clf2.fit(csr_container(X), y)
|
||||
|
||||
spweights, spintercept = sag_sparse(
|
||||
X,
|
||||
y,
|
||||
step_size,
|
||||
alpha,
|
||||
n_iter=n_iter,
|
||||
dloss=log_dloss,
|
||||
fit_intercept=fit_intercept,
|
||||
)
|
||||
spweights2, spintercept2 = sag_sparse(
|
||||
X,
|
||||
y,
|
||||
step_size,
|
||||
alpha,
|
||||
n_iter=n_iter,
|
||||
dloss=log_dloss,
|
||||
sparse=True,
|
||||
fit_intercept=fit_intercept,
|
||||
)
|
||||
|
||||
assert_array_almost_equal(clf1.coef_.ravel(), spweights.ravel(), decimal=2)
|
||||
assert_almost_equal(clf1.intercept_, spintercept, decimal=1)
|
||||
|
||||
assert_array_almost_equal(clf2.coef_.ravel(), spweights2.ravel(), decimal=2)
|
||||
assert_almost_equal(clf2.intercept_, spintercept2, decimal=1)
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:The max_iter was reached")
|
||||
@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
|
||||
def test_sag_multiclass_computed_correctly(csr_container):
|
||||
"""tests if the multiclass classifier is computed correctly"""
|
||||
alpha = 0.1
|
||||
n_samples = 20
|
||||
tol = 1e-5
|
||||
max_iter = 70
|
||||
fit_intercept = True
|
||||
X, y = make_blobs(n_samples=n_samples, centers=3, random_state=0, cluster_std=0.1)
|
||||
step_size = get_step_size(X, alpha, fit_intercept, classification=True)
|
||||
classes = np.unique(y)
|
||||
|
||||
clf1 = OneVsRestClassifier(
|
||||
LogisticRegression(
|
||||
solver="sag",
|
||||
C=1.0 / alpha / n_samples,
|
||||
max_iter=max_iter,
|
||||
tol=tol,
|
||||
random_state=77,
|
||||
fit_intercept=fit_intercept,
|
||||
)
|
||||
)
|
||||
clf2 = clone(clf1)
|
||||
|
||||
clf1.fit(X, y)
|
||||
clf2.fit(csr_container(X), y)
|
||||
|
||||
coef1 = []
|
||||
intercept1 = []
|
||||
coef2 = []
|
||||
intercept2 = []
|
||||
for cl in classes:
|
||||
y_encoded = np.ones(n_samples)
|
||||
y_encoded[y != cl] = -1
|
||||
|
||||
spweights1, spintercept1 = sag_sparse(
|
||||
X,
|
||||
y_encoded,
|
||||
step_size,
|
||||
alpha,
|
||||
dloss=log_dloss,
|
||||
n_iter=max_iter,
|
||||
fit_intercept=fit_intercept,
|
||||
)
|
||||
spweights2, spintercept2 = sag_sparse(
|
||||
X,
|
||||
y_encoded,
|
||||
step_size,
|
||||
alpha,
|
||||
dloss=log_dloss,
|
||||
n_iter=max_iter,
|
||||
sparse=True,
|
||||
fit_intercept=fit_intercept,
|
||||
)
|
||||
coef1.append(spweights1)
|
||||
intercept1.append(spintercept1)
|
||||
|
||||
coef2.append(spweights2)
|
||||
intercept2.append(spintercept2)
|
||||
|
||||
coef1 = np.vstack(coef1)
|
||||
intercept1 = np.array(intercept1)
|
||||
coef2 = np.vstack(coef2)
|
||||
intercept2 = np.array(intercept2)
|
||||
|
||||
for i, cl in enumerate(classes):
|
||||
assert_allclose(clf1.estimators_[i].coef_.ravel(), coef1[i], rtol=1e-2)
|
||||
assert_allclose(clf1.estimators_[i].intercept_, intercept1[i], rtol=1e-1)
|
||||
|
||||
assert_allclose(clf2.estimators_[i].coef_.ravel(), coef2[i], rtol=1e-2)
|
||||
# Note the very crude accuracy, i.e. high rtol.
|
||||
assert_allclose(clf2.estimators_[i].intercept_, intercept2[i], rtol=5e-1)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
|
||||
def test_classifier_results(csr_container):
|
||||
"""tests if classifier results match target"""
|
||||
alpha = 0.1
|
||||
n_features = 20
|
||||
n_samples = 10
|
||||
tol = 0.01
|
||||
max_iter = 200
|
||||
rng = np.random.RandomState(0)
|
||||
X = rng.normal(size=(n_samples, n_features))
|
||||
w = rng.normal(size=n_features)
|
||||
y = np.dot(X, w)
|
||||
y = np.sign(y)
|
||||
clf1 = LogisticRegression(
|
||||
solver="sag",
|
||||
C=1.0 / alpha / n_samples,
|
||||
max_iter=max_iter,
|
||||
tol=tol,
|
||||
random_state=77,
|
||||
)
|
||||
clf2 = clone(clf1)
|
||||
|
||||
clf1.fit(X, y)
|
||||
clf2.fit(csr_container(X), y)
|
||||
pred1 = clf1.predict(X)
|
||||
pred2 = clf2.predict(X)
|
||||
assert_almost_equal(pred1, y, decimal=12)
|
||||
assert_almost_equal(pred2, y, decimal=12)
|
||||
|
||||
|
||||
@pytest.mark.filterwarnings("ignore:The max_iter was reached")
|
||||
@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
|
||||
def test_binary_classifier_class_weight(csr_container):
|
||||
"""tests binary classifier with classweights for each class"""
|
||||
alpha = 0.1
|
||||
n_samples = 50
|
||||
n_iter = 20
|
||||
tol = 0.00001
|
||||
fit_intercept = True
|
||||
X, y = make_blobs(n_samples=n_samples, centers=2, random_state=10, cluster_std=0.1)
|
||||
step_size = get_step_size(X, alpha, fit_intercept, classification=True)
|
||||
classes = np.unique(y)
|
||||
y_tmp = np.ones(n_samples)
|
||||
y_tmp[y != classes[1]] = -1
|
||||
y = y_tmp
|
||||
|
||||
class_weight = {1: 0.45, -1: 0.55}
|
||||
clf1 = LogisticRegression(
|
||||
solver="sag",
|
||||
C=1.0 / alpha / n_samples,
|
||||
max_iter=n_iter,
|
||||
tol=tol,
|
||||
random_state=77,
|
||||
fit_intercept=fit_intercept,
|
||||
class_weight=class_weight,
|
||||
)
|
||||
clf2 = clone(clf1)
|
||||
|
||||
clf1.fit(X, y)
|
||||
clf2.fit(csr_container(X), y)
|
||||
|
||||
le = LabelEncoder()
|
||||
class_weight_ = compute_class_weight(class_weight, classes=np.unique(y), y=y)
|
||||
sample_weight = class_weight_[le.fit_transform(y)]
|
||||
spweights, spintercept = sag_sparse(
|
||||
X,
|
||||
y,
|
||||
step_size,
|
||||
alpha,
|
||||
n_iter=n_iter,
|
||||
dloss=log_dloss,
|
||||
sample_weight=sample_weight,
|
||||
fit_intercept=fit_intercept,
|
||||
)
|
||||
spweights2, spintercept2 = sag_sparse(
|
||||
X,
|
||||
y,
|
||||
step_size,
|
||||
alpha,
|
||||
n_iter=n_iter,
|
||||
dloss=log_dloss,
|
||||
sparse=True,
|
||||
sample_weight=sample_weight,
|
||||
fit_intercept=fit_intercept,
|
||||
)
|
||||
|
||||
assert_array_almost_equal(clf1.coef_.ravel(), spweights.ravel(), decimal=2)
|
||||
assert_almost_equal(clf1.intercept_, spintercept, decimal=1)
|
||||
|
||||
assert_array_almost_equal(clf2.coef_.ravel(), spweights2.ravel(), decimal=2)
|
||||
assert_almost_equal(clf2.intercept_, spintercept2, decimal=1)
|
||||
|
||||
|
||||
def test_classifier_single_class():
|
||||
"""tests if ValueError is thrown with only one class"""
|
||||
X = [[1, 2], [3, 4]]
|
||||
y = [1, 1]
|
||||
|
||||
msg = "This solver needs samples of at least 2 classes in the data"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
LogisticRegression(solver="sag").fit(X, y)
|
||||
|
||||
|
||||
def test_step_size_alpha_error():
|
||||
X = [[0, 0], [0, 0]]
|
||||
y = [1, -1]
|
||||
fit_intercept = False
|
||||
alpha = 1.0
|
||||
msg = re.escape(
|
||||
"Current sag implementation does not handle the case"
|
||||
" step_size * alpha_scaled == 1"
|
||||
)
|
||||
|
||||
clf1 = LogisticRegression(solver="sag", C=1.0 / alpha, fit_intercept=fit_intercept)
|
||||
with pytest.raises(ZeroDivisionError, match=msg):
|
||||
clf1.fit(X, y)
|
||||
|
||||
clf2 = Ridge(fit_intercept=fit_intercept, solver="sag", alpha=alpha)
|
||||
with pytest.raises(ZeroDivisionError, match=msg):
|
||||
clf2.fit(X, y)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("solver", ["sag", "saga"])
|
||||
def test_sag_classifier_raises_error(solver):
|
||||
# Following #13316, the error handling behavior changed in cython sag. This
|
||||
# is simply a non-regression test to make sure numerical errors are
|
||||
# properly raised.
|
||||
|
||||
# Train a classifier on a simple problem
|
||||
rng = np.random.RandomState(42)
|
||||
X, y = make_classification(random_state=rng)
|
||||
clf = LogisticRegression(solver=solver, random_state=rng, warm_start=True)
|
||||
clf.fit(X, y)
|
||||
|
||||
# Trigger a numerical error by:
|
||||
# - corrupting the fitted coefficients of the classifier
|
||||
# - fit it again starting from its current state thanks to warm_start
|
||||
clf.coef_[:] = np.nan
|
||||
|
||||
with pytest.raises(ValueError, match="Floating-point under-/overflow"):
|
||||
clf.fit(X, y)
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,387 @@
|
||||
import numpy as np
|
||||
import pytest
|
||||
import scipy.sparse as sp
|
||||
from numpy.testing import assert_allclose
|
||||
|
||||
from sklearn.datasets import make_regression
|
||||
from sklearn.exceptions import ConvergenceWarning
|
||||
from sklearn.linear_model import ElasticNet, ElasticNetCV, Lasso, LassoCV
|
||||
from sklearn.utils._testing import (
|
||||
assert_almost_equal,
|
||||
assert_array_almost_equal,
|
||||
create_memmap_backed_data,
|
||||
ignore_warnings,
|
||||
)
|
||||
from sklearn.utils.fixes import COO_CONTAINERS, CSC_CONTAINERS, LIL_CONTAINERS
|
||||
|
||||
|
||||
def test_sparse_coef():
|
||||
# Check that the sparse_coef property works
|
||||
clf = ElasticNet()
|
||||
clf.coef_ = [1, 2, 3]
|
||||
|
||||
assert sp.issparse(clf.sparse_coef_)
|
||||
assert clf.sparse_coef_.toarray().tolist()[0] == clf.coef_
|
||||
|
||||
|
||||
@pytest.mark.parametrize("csc_container", CSC_CONTAINERS)
|
||||
def test_lasso_zero(csc_container):
|
||||
# Check that the sparse lasso can handle zero data without crashing
|
||||
X = csc_container((3, 1))
|
||||
y = [0, 0, 0]
|
||||
T = np.array([[1], [2], [3]])
|
||||
clf = Lasso().fit(X, y)
|
||||
pred = clf.predict(T)
|
||||
assert_array_almost_equal(clf.coef_, [0])
|
||||
assert_array_almost_equal(pred, [0, 0, 0])
|
||||
assert_almost_equal(clf.dual_gap_, 0)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("with_sample_weight", [True, False])
|
||||
@pytest.mark.parametrize("csc_container", CSC_CONTAINERS)
|
||||
def test_enet_toy_list_input(with_sample_weight, csc_container):
|
||||
# Test ElasticNet for various values of alpha and l1_ratio with list X
|
||||
|
||||
X = np.array([[-1], [0], [1]])
|
||||
X = csc_container(X)
|
||||
Y = [-1, 0, 1] # just a straight line
|
||||
T = np.array([[2], [3], [4]]) # test sample
|
||||
if with_sample_weight:
|
||||
sw = np.array([2.0, 2, 2])
|
||||
else:
|
||||
sw = None
|
||||
|
||||
# this should be the same as unregularized least squares
|
||||
clf = ElasticNet(alpha=0, l1_ratio=1.0)
|
||||
# catch warning about alpha=0.
|
||||
# this is discouraged but should work.
|
||||
ignore_warnings(clf.fit)(X, Y, sample_weight=sw)
|
||||
pred = clf.predict(T)
|
||||
assert_array_almost_equal(clf.coef_, [1])
|
||||
assert_array_almost_equal(pred, [2, 3, 4])
|
||||
assert_almost_equal(clf.dual_gap_, 0)
|
||||
|
||||
clf = ElasticNet(alpha=0.5, l1_ratio=0.3)
|
||||
clf.fit(X, Y, sample_weight=sw)
|
||||
pred = clf.predict(T)
|
||||
assert_array_almost_equal(clf.coef_, [0.50819], decimal=3)
|
||||
assert_array_almost_equal(pred, [1.0163, 1.5245, 2.0327], decimal=3)
|
||||
assert_almost_equal(clf.dual_gap_, 0)
|
||||
|
||||
clf = ElasticNet(alpha=0.5, l1_ratio=0.5)
|
||||
clf.fit(X, Y, sample_weight=sw)
|
||||
pred = clf.predict(T)
|
||||
assert_array_almost_equal(clf.coef_, [0.45454], 3)
|
||||
assert_array_almost_equal(pred, [0.9090, 1.3636, 1.8181], 3)
|
||||
assert_almost_equal(clf.dual_gap_, 0)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("lil_container", LIL_CONTAINERS)
|
||||
def test_enet_toy_explicit_sparse_input(lil_container):
|
||||
# Test ElasticNet for various values of alpha and l1_ratio with sparse X
|
||||
# training samples
|
||||
X = lil_container((3, 1))
|
||||
X[0, 0] = -1
|
||||
# X[1, 0] = 0
|
||||
X[2, 0] = 1
|
||||
Y = [-1, 0, 1] # just a straight line (the identity function)
|
||||
|
||||
# test samples
|
||||
T = lil_container((3, 1))
|
||||
T[0, 0] = 2
|
||||
T[1, 0] = 3
|
||||
T[2, 0] = 4
|
||||
|
||||
# this should be the same as lasso
|
||||
clf = ElasticNet(alpha=0, l1_ratio=1.0)
|
||||
ignore_warnings(clf.fit)(X, Y)
|
||||
pred = clf.predict(T)
|
||||
assert_array_almost_equal(clf.coef_, [1])
|
||||
assert_array_almost_equal(pred, [2, 3, 4])
|
||||
assert_almost_equal(clf.dual_gap_, 0)
|
||||
|
||||
clf = ElasticNet(alpha=0.5, l1_ratio=0.3)
|
||||
clf.fit(X, Y)
|
||||
pred = clf.predict(T)
|
||||
assert_array_almost_equal(clf.coef_, [0.50819], decimal=3)
|
||||
assert_array_almost_equal(pred, [1.0163, 1.5245, 2.0327], decimal=3)
|
||||
assert_almost_equal(clf.dual_gap_, 0)
|
||||
|
||||
clf = ElasticNet(alpha=0.5, l1_ratio=0.5)
|
||||
clf.fit(X, Y)
|
||||
pred = clf.predict(T)
|
||||
assert_array_almost_equal(clf.coef_, [0.45454], 3)
|
||||
assert_array_almost_equal(pred, [0.9090, 1.3636, 1.8181], 3)
|
||||
assert_almost_equal(clf.dual_gap_, 0)
|
||||
|
||||
|
||||
def make_sparse_data(
|
||||
sparse_container,
|
||||
n_samples=100,
|
||||
n_features=100,
|
||||
n_informative=10,
|
||||
seed=42,
|
||||
positive=False,
|
||||
n_targets=1,
|
||||
):
|
||||
random_state = np.random.RandomState(seed)
|
||||
|
||||
# build an ill-posed linear regression problem with many noisy features and
|
||||
# comparatively few samples
|
||||
|
||||
# generate a ground truth model
|
||||
w = random_state.randn(n_features, n_targets)
|
||||
w[n_informative:] = 0.0 # only the top features are impacting the model
|
||||
if positive:
|
||||
w = np.abs(w)
|
||||
|
||||
X = random_state.randn(n_samples, n_features)
|
||||
rnd = random_state.uniform(size=(n_samples, n_features))
|
||||
X[rnd > 0.5] = 0.0 # 50% of zeros in input signal
|
||||
|
||||
# generate training ground truth labels
|
||||
y = np.dot(X, w)
|
||||
X = sparse_container(X)
|
||||
if n_targets == 1:
|
||||
y = np.ravel(y)
|
||||
return X, y
|
||||
|
||||
|
||||
@pytest.mark.parametrize("csc_container", CSC_CONTAINERS)
|
||||
@pytest.mark.parametrize(
|
||||
"alpha, fit_intercept, positive",
|
||||
[(0.1, False, False), (0.1, True, False), (1e-3, False, True), (1e-3, True, True)],
|
||||
)
|
||||
def test_sparse_enet_not_as_toy_dataset(csc_container, alpha, fit_intercept, positive):
|
||||
n_samples, n_features, max_iter = 100, 100, 1000
|
||||
n_informative = 10
|
||||
|
||||
X, y = make_sparse_data(
|
||||
csc_container, n_samples, n_features, n_informative, positive=positive
|
||||
)
|
||||
|
||||
X_train, X_test = X[n_samples // 2 :], X[: n_samples // 2]
|
||||
y_train, y_test = y[n_samples // 2 :], y[: n_samples // 2]
|
||||
|
||||
s_clf = ElasticNet(
|
||||
alpha=alpha,
|
||||
l1_ratio=0.8,
|
||||
fit_intercept=fit_intercept,
|
||||
max_iter=max_iter,
|
||||
tol=1e-7,
|
||||
positive=positive,
|
||||
warm_start=True,
|
||||
)
|
||||
s_clf.fit(X_train, y_train)
|
||||
|
||||
assert_almost_equal(s_clf.dual_gap_, 0, 4)
|
||||
assert s_clf.score(X_test, y_test) > 0.85
|
||||
|
||||
# check the convergence is the same as the dense version
|
||||
d_clf = ElasticNet(
|
||||
alpha=alpha,
|
||||
l1_ratio=0.8,
|
||||
fit_intercept=fit_intercept,
|
||||
max_iter=max_iter,
|
||||
tol=1e-7,
|
||||
positive=positive,
|
||||
warm_start=True,
|
||||
)
|
||||
d_clf.fit(X_train.toarray(), y_train)
|
||||
|
||||
assert_almost_equal(d_clf.dual_gap_, 0, 4)
|
||||
assert d_clf.score(X_test, y_test) > 0.85
|
||||
|
||||
assert_almost_equal(s_clf.coef_, d_clf.coef_, 5)
|
||||
assert_almost_equal(s_clf.intercept_, d_clf.intercept_, 5)
|
||||
|
||||
# check that the coefs are sparse
|
||||
assert np.sum(s_clf.coef_ != 0.0) < 2 * n_informative
|
||||
|
||||
|
||||
@pytest.mark.parametrize("csc_container", CSC_CONTAINERS)
|
||||
def test_sparse_lasso_not_as_toy_dataset(csc_container):
|
||||
n_samples = 100
|
||||
max_iter = 1000
|
||||
n_informative = 10
|
||||
X, y = make_sparse_data(
|
||||
csc_container, n_samples=n_samples, n_informative=n_informative
|
||||
)
|
||||
|
||||
X_train, X_test = X[n_samples // 2 :], X[: n_samples // 2]
|
||||
y_train, y_test = y[n_samples // 2 :], y[: n_samples // 2]
|
||||
|
||||
s_clf = Lasso(alpha=0.1, fit_intercept=False, max_iter=max_iter, tol=1e-7)
|
||||
s_clf.fit(X_train, y_train)
|
||||
assert_almost_equal(s_clf.dual_gap_, 0, 4)
|
||||
assert s_clf.score(X_test, y_test) > 0.85
|
||||
|
||||
# check the convergence is the same as the dense version
|
||||
d_clf = Lasso(alpha=0.1, fit_intercept=False, max_iter=max_iter, tol=1e-7)
|
||||
d_clf.fit(X_train.toarray(), y_train)
|
||||
assert_almost_equal(d_clf.dual_gap_, 0, 4)
|
||||
assert d_clf.score(X_test, y_test) > 0.85
|
||||
|
||||
# check that the coefs are sparse
|
||||
assert np.sum(s_clf.coef_ != 0.0) == n_informative
|
||||
|
||||
|
||||
@pytest.mark.parametrize("csc_container", CSC_CONTAINERS)
|
||||
def test_enet_multitarget(csc_container):
|
||||
n_targets = 3
|
||||
X, y = make_sparse_data(csc_container, n_targets=n_targets)
|
||||
|
||||
estimator = ElasticNet(alpha=0.01, precompute=False)
|
||||
# XXX: There is a bug when precompute is not False!
|
||||
estimator.fit(X, y)
|
||||
coef, intercept, dual_gap = (
|
||||
estimator.coef_,
|
||||
estimator.intercept_,
|
||||
estimator.dual_gap_,
|
||||
)
|
||||
|
||||
for k in range(n_targets):
|
||||
estimator.fit(X, y[:, k])
|
||||
assert_array_almost_equal(coef[k, :], estimator.coef_)
|
||||
assert_array_almost_equal(intercept[k], estimator.intercept_)
|
||||
assert_array_almost_equal(dual_gap[k], estimator.dual_gap_)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("csc_container", CSC_CONTAINERS)
|
||||
def test_path_parameters(csc_container):
|
||||
X, y = make_sparse_data(csc_container)
|
||||
max_iter = 50
|
||||
n_alphas = 10
|
||||
clf = ElasticNetCV(
|
||||
alphas=n_alphas,
|
||||
eps=1e-3,
|
||||
max_iter=max_iter,
|
||||
l1_ratio=0.5,
|
||||
fit_intercept=False,
|
||||
)
|
||||
clf.fit(X, y)
|
||||
assert_almost_equal(0.5, clf.l1_ratio)
|
||||
assert clf.alphas == n_alphas
|
||||
assert len(clf.alphas_) == n_alphas
|
||||
sparse_mse_path = clf.mse_path_
|
||||
# compare with dense data
|
||||
clf.fit(X.toarray(), y)
|
||||
assert_almost_equal(clf.mse_path_, sparse_mse_path)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("Model", [Lasso, ElasticNet, LassoCV, ElasticNetCV])
|
||||
@pytest.mark.parametrize("fit_intercept", [False, True])
|
||||
@pytest.mark.parametrize("n_samples, n_features", [(24, 6), (6, 24)])
|
||||
@pytest.mark.parametrize("with_sample_weight", [True, False])
|
||||
@pytest.mark.parametrize("csc_container", CSC_CONTAINERS)
|
||||
def test_sparse_dense_equality(
|
||||
Model, fit_intercept, n_samples, n_features, with_sample_weight, csc_container
|
||||
):
|
||||
X, y = make_regression(
|
||||
n_samples=n_samples,
|
||||
n_features=n_features,
|
||||
effective_rank=n_features // 2,
|
||||
n_informative=n_features // 2,
|
||||
bias=4 * fit_intercept,
|
||||
noise=1,
|
||||
random_state=42,
|
||||
)
|
||||
if with_sample_weight:
|
||||
sw = np.abs(np.random.RandomState(42).normal(scale=10, size=y.shape))
|
||||
else:
|
||||
sw = None
|
||||
Xs = csc_container(X)
|
||||
params = {"fit_intercept": fit_intercept, "tol": 1e-6}
|
||||
reg_dense = Model(**params).fit(X, y, sample_weight=sw)
|
||||
reg_sparse = Model(**params).fit(Xs, y, sample_weight=sw)
|
||||
if fit_intercept:
|
||||
assert reg_sparse.intercept_ == pytest.approx(reg_dense.intercept_)
|
||||
# balance property
|
||||
assert np.average(reg_sparse.predict(X), weights=sw) == pytest.approx(
|
||||
np.average(y, weights=sw)
|
||||
)
|
||||
assert_allclose(reg_sparse.coef_, reg_dense.coef_)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("csc_container", CSC_CONTAINERS)
|
||||
def test_same_output_sparse_dense_lasso_and_enet_cv(csc_container):
|
||||
X, y = make_sparse_data(csc_container, n_samples=40, n_features=10)
|
||||
clfs = ElasticNetCV(max_iter=100, tol=1e-7)
|
||||
clfs.fit(X, y)
|
||||
clfd = ElasticNetCV(max_iter=100, tol=1e-7)
|
||||
clfd.fit(X.toarray(), y)
|
||||
assert_allclose(clfs.alpha_, clfd.alpha_)
|
||||
assert_allclose(clfs.intercept_, clfd.intercept_)
|
||||
assert_allclose(clfs.mse_path_, clfd.mse_path_)
|
||||
assert_allclose(clfs.alphas_, clfd.alphas_)
|
||||
|
||||
clfs = LassoCV(max_iter=100, cv=4, tol=1e-8)
|
||||
clfs.fit(X, y)
|
||||
clfd = LassoCV(max_iter=100, cv=4, tol=1e-8)
|
||||
clfd.fit(X.toarray(), y)
|
||||
assert_allclose(clfs.alpha_, clfd.alpha_)
|
||||
assert_allclose(clfs.intercept_, clfd.intercept_)
|
||||
assert_allclose(clfs.mse_path_, clfd.mse_path_)
|
||||
assert_allclose(clfs.alphas_, clfd.alphas_)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("coo_container", COO_CONTAINERS)
|
||||
def test_same_multiple_output_sparse_dense(coo_container):
|
||||
l = ElasticNet()
|
||||
X = [
|
||||
[0, 1, 2, 3, 4],
|
||||
[0, 2, 5, 8, 11],
|
||||
[9, 10, 11, 12, 13],
|
||||
[10, 11, 12, 13, 14],
|
||||
]
|
||||
y = [
|
||||
[1, 2, 3, 4, 5],
|
||||
[1, 3, 6, 9, 12],
|
||||
[10, 11, 12, 13, 14],
|
||||
[11, 12, 13, 14, 15],
|
||||
]
|
||||
l.fit(X, y)
|
||||
sample = np.array([1, 2, 3, 4, 5]).reshape(1, -1)
|
||||
predict_dense = l.predict(sample)
|
||||
|
||||
l_sp = ElasticNet()
|
||||
X_sp = coo_container(X)
|
||||
l_sp.fit(X_sp, y)
|
||||
sample_sparse = coo_container(sample)
|
||||
predict_sparse = l_sp.predict(sample_sparse)
|
||||
|
||||
assert_array_almost_equal(predict_sparse, predict_dense)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("csc_container", CSC_CONTAINERS)
|
||||
def test_sparse_enet_coordinate_descent(csc_container):
|
||||
"""Test that a warning is issued if model does not converge"""
|
||||
clf = Lasso(
|
||||
alpha=1e-10, fit_intercept=False, warm_start=True, max_iter=2, tol=1e-10
|
||||
)
|
||||
# Set initial coefficients to very bad values.
|
||||
clf.coef_ = np.array([1, 1, 1, 1000])
|
||||
X = np.array([[-1, -1, 1, 1], [1, 1, -1, -1]])
|
||||
X = csc_container(X)
|
||||
y = np.array([-1, 1])
|
||||
warning_message = (
|
||||
"Objective did not converge. You might want "
|
||||
"to increase the number of iterations."
|
||||
)
|
||||
with pytest.warns(ConvergenceWarning, match=warning_message):
|
||||
clf.fit(X, y)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("copy_X", (True, False))
|
||||
def test_sparse_read_only_buffer(copy_X):
|
||||
"""Test that sparse coordinate descent works for read-only buffers"""
|
||||
rng = np.random.RandomState(0)
|
||||
|
||||
clf = ElasticNet(alpha=0.1, copy_X=copy_X, random_state=rng)
|
||||
X = sp.random(100, 20, format="csc", random_state=rng)
|
||||
|
||||
# Make X.data read-only
|
||||
X.data = create_memmap_backed_data(X.data)
|
||||
|
||||
y = rng.rand(100)
|
||||
clf.fit(X, y)
|
||||
@@ -0,0 +1,296 @@
|
||||
"""
|
||||
Testing for Theil-Sen module (sklearn.linear_model.theil_sen)
|
||||
"""
|
||||
|
||||
# Authors: The scikit-learn developers
|
||||
# SPDX-License-Identifier: BSD-3-Clause
|
||||
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
from contextlib import contextmanager
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
from numpy.testing import (
|
||||
assert_array_almost_equal,
|
||||
assert_array_equal,
|
||||
assert_array_less,
|
||||
)
|
||||
from scipy.linalg import norm
|
||||
from scipy.optimize import fmin_bfgs
|
||||
|
||||
from sklearn.exceptions import ConvergenceWarning
|
||||
from sklearn.linear_model import LinearRegression, TheilSenRegressor
|
||||
from sklearn.linear_model._theil_sen import (
|
||||
_breakdown_point,
|
||||
_modified_weiszfeld_step,
|
||||
_spatial_median,
|
||||
)
|
||||
from sklearn.utils._testing import assert_almost_equal
|
||||
|
||||
|
||||
@contextmanager
|
||||
def no_stdout_stderr():
|
||||
old_stdout = sys.stdout
|
||||
old_stderr = sys.stderr
|
||||
with open(os.devnull, "w") as devnull:
|
||||
sys.stdout = devnull
|
||||
sys.stderr = devnull
|
||||
yield
|
||||
devnull.flush()
|
||||
sys.stdout = old_stdout
|
||||
sys.stderr = old_stderr
|
||||
|
||||
|
||||
def gen_toy_problem_1d(intercept=True):
|
||||
random_state = np.random.RandomState(0)
|
||||
# Linear model y = 3*x + N(2, 0.1**2)
|
||||
w = 3.0
|
||||
if intercept:
|
||||
c = 2.0
|
||||
n_samples = 50
|
||||
else:
|
||||
c = 0.1
|
||||
n_samples = 100
|
||||
x = random_state.normal(size=n_samples)
|
||||
noise = 0.1 * random_state.normal(size=n_samples)
|
||||
y = w * x + c + noise
|
||||
# Add some outliers
|
||||
if intercept:
|
||||
x[42], y[42] = (-2, 4)
|
||||
x[43], y[43] = (-2.5, 8)
|
||||
x[33], y[33] = (2.5, 1)
|
||||
x[49], y[49] = (2.1, 2)
|
||||
else:
|
||||
x[42], y[42] = (-2, 4)
|
||||
x[43], y[43] = (-2.5, 8)
|
||||
x[53], y[53] = (2.5, 1)
|
||||
x[60], y[60] = (2.1, 2)
|
||||
x[72], y[72] = (1.8, -7)
|
||||
return x[:, np.newaxis], y, w, c
|
||||
|
||||
|
||||
def gen_toy_problem_2d():
|
||||
random_state = np.random.RandomState(0)
|
||||
n_samples = 100
|
||||
# Linear model y = 5*x_1 + 10*x_2 + N(1, 0.1**2)
|
||||
X = random_state.normal(size=(n_samples, 2))
|
||||
w = np.array([5.0, 10.0])
|
||||
c = 1.0
|
||||
noise = 0.1 * random_state.normal(size=n_samples)
|
||||
y = np.dot(X, w) + c + noise
|
||||
# Add some outliers
|
||||
n_outliers = n_samples // 10
|
||||
ix = random_state.randint(0, n_samples, size=n_outliers)
|
||||
y[ix] = 50 * random_state.normal(size=n_outliers)
|
||||
return X, y, w, c
|
||||
|
||||
|
||||
def gen_toy_problem_4d():
|
||||
random_state = np.random.RandomState(0)
|
||||
n_samples = 10000
|
||||
# Linear model y = 5*x_1 + 10*x_2 + 42*x_3 + 7*x_4 + N(1, 0.1**2)
|
||||
X = random_state.normal(size=(n_samples, 4))
|
||||
w = np.array([5.0, 10.0, 42.0, 7.0])
|
||||
c = 1.0
|
||||
noise = 0.1 * random_state.normal(size=n_samples)
|
||||
y = np.dot(X, w) + c + noise
|
||||
# Add some outliers
|
||||
n_outliers = n_samples // 10
|
||||
ix = random_state.randint(0, n_samples, size=n_outliers)
|
||||
y[ix] = 50 * random_state.normal(size=n_outliers)
|
||||
return X, y, w, c
|
||||
|
||||
|
||||
def test_modweiszfeld_step_1d():
|
||||
X = np.array([1.0, 2.0, 3.0]).reshape(3, 1)
|
||||
# Check startvalue is element of X and solution
|
||||
median = 2.0
|
||||
new_y = _modified_weiszfeld_step(X, median)
|
||||
assert_array_almost_equal(new_y, median)
|
||||
# Check startvalue is not the solution
|
||||
y = 2.5
|
||||
new_y = _modified_weiszfeld_step(X, y)
|
||||
assert_array_less(median, new_y)
|
||||
assert_array_less(new_y, y)
|
||||
# Check startvalue is not the solution but element of X
|
||||
y = 3.0
|
||||
new_y = _modified_weiszfeld_step(X, y)
|
||||
assert_array_less(median, new_y)
|
||||
assert_array_less(new_y, y)
|
||||
# Check that a single vector is identity
|
||||
X = np.array([1.0, 2.0, 3.0]).reshape(1, 3)
|
||||
y = X[0]
|
||||
new_y = _modified_weiszfeld_step(X, y)
|
||||
assert_array_equal(y, new_y)
|
||||
|
||||
|
||||
def test_modweiszfeld_step_2d():
|
||||
X = np.array([0.0, 0.0, 1.0, 1.0, 0.0, 1.0]).reshape(3, 2)
|
||||
y = np.array([0.5, 0.5])
|
||||
# Check first two iterations
|
||||
new_y = _modified_weiszfeld_step(X, y)
|
||||
assert_array_almost_equal(new_y, np.array([1 / 3, 2 / 3]))
|
||||
new_y = _modified_weiszfeld_step(X, new_y)
|
||||
assert_array_almost_equal(new_y, np.array([0.2792408, 0.7207592]))
|
||||
# Check fix point
|
||||
y = np.array([0.21132505, 0.78867497])
|
||||
new_y = _modified_weiszfeld_step(X, y)
|
||||
assert_array_almost_equal(new_y, y)
|
||||
|
||||
|
||||
def test_spatial_median_1d():
|
||||
X = np.array([1.0, 2.0, 3.0]).reshape(3, 1)
|
||||
true_median = 2.0
|
||||
_, median = _spatial_median(X)
|
||||
assert_array_almost_equal(median, true_median)
|
||||
# Test larger problem and for exact solution in 1d case
|
||||
random_state = np.random.RandomState(0)
|
||||
X = random_state.randint(100, size=(1000, 1))
|
||||
true_median = np.median(X.ravel())
|
||||
_, median = _spatial_median(X)
|
||||
assert_array_equal(median, true_median)
|
||||
|
||||
|
||||
def test_spatial_median_2d():
|
||||
X = np.array([0.0, 0.0, 1.0, 1.0, 0.0, 1.0]).reshape(3, 2)
|
||||
_, median = _spatial_median(X, max_iter=100, tol=1.0e-6)
|
||||
|
||||
def cost_func(y):
|
||||
dists = np.array([norm(x - y) for x in X])
|
||||
return np.sum(dists)
|
||||
|
||||
# Check if median is solution of the Fermat-Weber location problem
|
||||
fermat_weber = fmin_bfgs(cost_func, median, disp=False)
|
||||
assert_array_almost_equal(median, fermat_weber)
|
||||
# Check when maximum iteration is exceeded a warning is emitted
|
||||
warning_message = "Maximum number of iterations 30 reached in spatial median."
|
||||
with pytest.warns(ConvergenceWarning, match=warning_message):
|
||||
_spatial_median(X, max_iter=30, tol=0.0)
|
||||
|
||||
|
||||
def test_theil_sen_1d():
|
||||
X, y, w, c = gen_toy_problem_1d()
|
||||
# Check that Least Squares fails
|
||||
lstq = LinearRegression().fit(X, y)
|
||||
assert np.abs(lstq.coef_ - w) > 0.9
|
||||
# Check that Theil-Sen works
|
||||
theil_sen = TheilSenRegressor(random_state=0).fit(X, y)
|
||||
assert_array_almost_equal(theil_sen.coef_, w, 1)
|
||||
assert_array_almost_equal(theil_sen.intercept_, c, 1)
|
||||
|
||||
|
||||
def test_theil_sen_1d_no_intercept():
|
||||
X, y, w, c = gen_toy_problem_1d(intercept=False)
|
||||
# Check that Least Squares fails
|
||||
lstq = LinearRegression(fit_intercept=False).fit(X, y)
|
||||
assert np.abs(lstq.coef_ - w - c) > 0.5
|
||||
# Check that Theil-Sen works
|
||||
theil_sen = TheilSenRegressor(fit_intercept=False, random_state=0).fit(X, y)
|
||||
assert_array_almost_equal(theil_sen.coef_, w + c, 1)
|
||||
assert_almost_equal(theil_sen.intercept_, 0.0)
|
||||
|
||||
# non-regression test for #18104
|
||||
theil_sen.score(X, y)
|
||||
|
||||
|
||||
def test_theil_sen_2d():
|
||||
X, y, w, c = gen_toy_problem_2d()
|
||||
# Check that Least Squares fails
|
||||
lstq = LinearRegression().fit(X, y)
|
||||
assert norm(lstq.coef_ - w) > 1.0
|
||||
# Check that Theil-Sen works
|
||||
theil_sen = TheilSenRegressor(max_subpopulation=1e3, random_state=0).fit(X, y)
|
||||
assert_array_almost_equal(theil_sen.coef_, w, 1)
|
||||
assert_array_almost_equal(theil_sen.intercept_, c, 1)
|
||||
|
||||
|
||||
def test_calc_breakdown_point():
|
||||
bp = _breakdown_point(1e10, 2)
|
||||
assert np.abs(bp - 1 + 1 / (np.sqrt(2))) < 1.0e-6
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"param, ExceptionCls, match",
|
||||
[
|
||||
(
|
||||
{"n_subsamples": 1},
|
||||
ValueError,
|
||||
re.escape("Invalid parameter since n_features+1 > n_subsamples (2 > 1)"),
|
||||
),
|
||||
(
|
||||
{"n_subsamples": 101},
|
||||
ValueError,
|
||||
re.escape("Invalid parameter since n_subsamples > n_samples (101 > 50)"),
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_checksubparams_invalid_input(param, ExceptionCls, match):
|
||||
X, y, w, c = gen_toy_problem_1d()
|
||||
theil_sen = TheilSenRegressor(**param, random_state=0)
|
||||
with pytest.raises(ExceptionCls, match=match):
|
||||
theil_sen.fit(X, y)
|
||||
|
||||
|
||||
def test_checksubparams_n_subsamples_if_less_samples_than_features():
|
||||
random_state = np.random.RandomState(0)
|
||||
n_samples, n_features = 10, 20
|
||||
X = random_state.normal(size=(n_samples, n_features))
|
||||
y = random_state.normal(size=n_samples)
|
||||
theil_sen = TheilSenRegressor(n_subsamples=9, random_state=0)
|
||||
with pytest.raises(ValueError):
|
||||
theil_sen.fit(X, y)
|
||||
|
||||
|
||||
def test_subpopulation():
|
||||
X, y, w, c = gen_toy_problem_4d()
|
||||
theil_sen = TheilSenRegressor(max_subpopulation=250, random_state=0).fit(X, y)
|
||||
assert_array_almost_equal(theil_sen.coef_, w, 1)
|
||||
assert_array_almost_equal(theil_sen.intercept_, c, 1)
|
||||
|
||||
|
||||
def test_subsamples():
|
||||
X, y, w, c = gen_toy_problem_4d()
|
||||
theil_sen = TheilSenRegressor(n_subsamples=X.shape[0], random_state=0).fit(X, y)
|
||||
lstq = LinearRegression().fit(X, y)
|
||||
# Check for exact the same results as Least Squares
|
||||
assert_array_almost_equal(theil_sen.coef_, lstq.coef_, 9)
|
||||
|
||||
|
||||
@pytest.mark.thread_unsafe # manually captured stdout
|
||||
def test_verbosity():
|
||||
X, y, w, c = gen_toy_problem_1d()
|
||||
# Check that Theil-Sen can be verbose
|
||||
with no_stdout_stderr():
|
||||
TheilSenRegressor(verbose=True, random_state=0).fit(X, y)
|
||||
TheilSenRegressor(verbose=True, max_subpopulation=10, random_state=0).fit(X, y)
|
||||
|
||||
|
||||
def test_theil_sen_parallel():
|
||||
X, y, w, c = gen_toy_problem_2d()
|
||||
# Check that Least Squares fails
|
||||
lstq = LinearRegression().fit(X, y)
|
||||
assert norm(lstq.coef_ - w) > 1.0
|
||||
# Check that Theil-Sen works
|
||||
theil_sen = TheilSenRegressor(n_jobs=2, random_state=0, max_subpopulation=2e3).fit(
|
||||
X, y
|
||||
)
|
||||
assert_array_almost_equal(theil_sen.coef_, w, 1)
|
||||
assert_array_almost_equal(theil_sen.intercept_, c, 1)
|
||||
|
||||
|
||||
def test_less_samples_than_features():
|
||||
random_state = np.random.RandomState(0)
|
||||
n_samples, n_features = 10, 20
|
||||
X = random_state.normal(size=(n_samples, n_features))
|
||||
y = random_state.normal(size=n_samples)
|
||||
# Check that Theil-Sen falls back to Least Squares if fit_intercept=False
|
||||
theil_sen = TheilSenRegressor(fit_intercept=False, random_state=0).fit(X, y)
|
||||
lstq = LinearRegression(fit_intercept=False).fit(X, y)
|
||||
assert_array_almost_equal(theil_sen.coef_, lstq.coef_, 12)
|
||||
# Check fit_intercept=True case. This will not be equal to the Least
|
||||
# Squares solution since the intercept is calculated differently.
|
||||
theil_sen = TheilSenRegressor(fit_intercept=True, random_state=0).fit(X, y)
|
||||
y_pred = theil_sen.predict(X)
|
||||
assert_array_almost_equal(y_pred, y, 12)
|
||||
Reference in New Issue
Block a user