This commit is contained in:
2026-04-10 15:06:59 +02:00
parent 3031b7153b
commit e5a4711004
7806 changed files with 1918528 additions and 335 deletions

View File

@@ -0,0 +1,846 @@
# Authors: The scikit-learn developers
# SPDX-License-Identifier: BSD-3-Clause
import warnings
import numpy as np
import pytest
from scipy import linalg, sparse
from sklearn.datasets import load_iris, make_regression, make_sparse_uncorrelated
from sklearn.linear_model import LinearRegression
from sklearn.linear_model._base import (
_preprocess_data,
_rescale_data,
make_dataset,
)
from sklearn.preprocessing import add_dummy_feature
from sklearn.utils._testing import (
assert_allclose,
assert_array_almost_equal,
assert_array_equal,
)
from sklearn.utils.fixes import (
COO_CONTAINERS,
CSC_CONTAINERS,
CSR_CONTAINERS,
LIL_CONTAINERS,
)
rtol = 1e-6
def test_linear_regression():
# Test LinearRegression on a simple dataset.
# a simple dataset
X = [[1], [2]]
Y = [1, 2]
reg = LinearRegression()
reg.fit(X, Y)
assert_array_almost_equal(reg.coef_, [1])
assert_array_almost_equal(reg.intercept_, [0])
assert_array_almost_equal(reg.predict(X), [1, 2])
# test it also for degenerate input
X = [[1]]
Y = [0]
reg = LinearRegression()
reg.fit(X, Y)
assert_array_almost_equal(reg.coef_, [0])
assert_array_almost_equal(reg.intercept_, [0])
assert_array_almost_equal(reg.predict(X), [0])
@pytest.mark.parametrize("sparse_container", [None] + CSR_CONTAINERS)
@pytest.mark.parametrize("fit_intercept", [True, False])
def test_linear_regression_sample_weights(
sparse_container, fit_intercept, global_random_seed
):
rng = np.random.RandomState(global_random_seed)
# It would not work with under-determined systems
n_samples, n_features = 6, 5
X = rng.normal(size=(n_samples, n_features))
if sparse_container is not None:
X = sparse_container(X)
y = rng.normal(size=n_samples)
sample_weight = 1.0 + rng.uniform(size=n_samples)
# LinearRegression with explicit sample_weight
reg = LinearRegression(fit_intercept=fit_intercept, tol=1e-16)
reg.fit(X, y, sample_weight=sample_weight)
coefs1 = reg.coef_
inter1 = reg.intercept_
assert reg.coef_.shape == (X.shape[1],) # sanity checks
# Closed form of the weighted least square
# theta = (X^T W X)^(-1) @ X^T W y
W = np.diag(sample_weight)
X_aug = X if not fit_intercept else add_dummy_feature(X)
Xw = X_aug.T @ W @ X_aug
yw = X_aug.T @ W @ y
coefs2 = linalg.solve(Xw, yw)
if not fit_intercept:
assert_allclose(coefs1, coefs2)
else:
assert_allclose(coefs1, coefs2[1:])
assert_allclose(inter1, coefs2[0])
def test_raises_value_error_if_positive_and_sparse():
error_msg = "Sparse data was passed for X, but dense data is required."
# X must not be sparse if positive == True
X = sparse.eye(10)
y = np.ones(10)
reg = LinearRegression(positive=True)
with pytest.raises(TypeError, match=error_msg):
reg.fit(X, y)
@pytest.mark.parametrize("n_samples, n_features", [(2, 3), (3, 2)])
def test_raises_value_error_if_sample_weights_greater_than_1d(n_samples, n_features):
# Sample weights must be either scalar or 1D
rng = np.random.RandomState(0)
X = rng.randn(n_samples, n_features)
y = rng.randn(n_samples)
sample_weights_OK = rng.randn(n_samples) ** 2 + 1
sample_weights_OK_1 = 1.0
sample_weights_OK_2 = 2.0
reg = LinearRegression()
# make sure the "OK" sample weights actually work
reg.fit(X, y, sample_weights_OK)
reg.fit(X, y, sample_weights_OK_1)
reg.fit(X, y, sample_weights_OK_2)
def test_fit_intercept():
# Test assertions on betas shape.
X2 = np.array([[0.38349978, 0.61650022], [0.58853682, 0.41146318]])
X3 = np.array(
[[0.27677969, 0.70693172, 0.01628859], [0.08385139, 0.20692515, 0.70922346]]
)
y = np.array([1, 1])
lr2_without_intercept = LinearRegression(fit_intercept=False).fit(X2, y)
lr2_with_intercept = LinearRegression().fit(X2, y)
lr3_without_intercept = LinearRegression(fit_intercept=False).fit(X3, y)
lr3_with_intercept = LinearRegression().fit(X3, y)
assert lr2_with_intercept.coef_.shape == lr2_without_intercept.coef_.shape
assert lr3_with_intercept.coef_.shape == lr3_without_intercept.coef_.shape
assert lr2_without_intercept.coef_.ndim == lr3_without_intercept.coef_.ndim
def test_linear_regression_sparse(global_random_seed):
# Test that linear regression also works with sparse data
rng = np.random.RandomState(global_random_seed)
n = 100
X = sparse.eye(n, n)
beta = rng.rand(n)
y = X @ beta
ols = LinearRegression()
ols.fit(X, y.ravel())
assert_array_almost_equal(beta, ols.coef_ + ols.intercept_)
assert_array_almost_equal(ols.predict(X) - y.ravel(), 0)
@pytest.mark.parametrize("fit_intercept", [True, False])
@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
def test_linear_regression_sparse_equal_dense(fit_intercept, csr_container):
# Test that linear regression agrees between sparse and dense
rng = np.random.RandomState(0)
n_samples = 200
n_features = 2
X = rng.randn(n_samples, n_features)
X[X < 0.1] = 0.0
Xcsr = csr_container(X)
y = rng.rand(n_samples)
params = dict(fit_intercept=fit_intercept)
clf_dense = LinearRegression(**params)
clf_sparse = LinearRegression(**params)
clf_dense.fit(X, y)
clf_sparse.fit(Xcsr, y)
assert clf_dense.intercept_ == pytest.approx(clf_sparse.intercept_)
assert_allclose(clf_dense.coef_, clf_sparse.coef_)
def test_linear_regression_multiple_outcome():
# Test multiple-outcome linear regressions
rng = np.random.RandomState(0)
X, y = make_regression(random_state=rng)
Y = np.vstack((y, y)).T
n_features = X.shape[1]
reg = LinearRegression()
reg.fit((X), Y)
assert reg.coef_.shape == (2, n_features)
Y_pred = reg.predict(X)
reg.fit(X, y)
y_pred = reg.predict(X)
assert_array_almost_equal(np.vstack((y_pred, y_pred)).T, Y_pred, decimal=3)
@pytest.mark.parametrize("coo_container", COO_CONTAINERS)
def test_linear_regression_sparse_multiple_outcome(global_random_seed, coo_container):
# Test multiple-outcome linear regressions with sparse data
rng = np.random.RandomState(global_random_seed)
X, y = make_sparse_uncorrelated(random_state=rng)
X = coo_container(X)
Y = np.vstack((y, y)).T
n_features = X.shape[1]
ols = LinearRegression()
ols.fit(X, Y)
assert ols.coef_.shape == (2, n_features)
Y_pred = ols.predict(X)
ols.fit(X, y.ravel())
y_pred = ols.predict(X)
assert_array_almost_equal(np.vstack((y_pred, y_pred)).T, Y_pred, decimal=3)
def test_linear_regression_positive():
# Test nonnegative LinearRegression on a simple dataset.
X = [[1], [2]]
y = [1, 2]
reg = LinearRegression(positive=True)
reg.fit(X, y)
assert_array_almost_equal(reg.coef_, [1])
assert_array_almost_equal(reg.intercept_, [0])
assert_array_almost_equal(reg.predict(X), [1, 2])
# test it also for degenerate input
X = [[1]]
y = [0]
reg = LinearRegression(positive=True)
reg.fit(X, y)
assert_allclose(reg.coef_, [0])
assert_allclose(reg.intercept_, [0])
assert_allclose(reg.predict(X), [0])
def test_linear_regression_positive_multiple_outcome(global_random_seed):
# Test multiple-outcome nonnegative linear regressions
rng = np.random.RandomState(global_random_seed)
X, y = make_sparse_uncorrelated(random_state=rng)
Y = np.vstack((y, y)).T
n_features = X.shape[1]
ols = LinearRegression(positive=True)
ols.fit(X, Y)
assert ols.coef_.shape == (2, n_features)
assert np.all(ols.coef_ >= 0.0)
Y_pred = ols.predict(X)
ols.fit(X, y.ravel())
y_pred = ols.predict(X)
assert_allclose(np.vstack((y_pred, y_pred)).T, Y_pred)
def test_linear_regression_positive_vs_nonpositive(global_random_seed):
# Test differences with LinearRegression when positive=False.
rng = np.random.RandomState(global_random_seed)
X, y = make_sparse_uncorrelated(random_state=rng)
reg = LinearRegression(positive=True)
reg.fit(X, y)
regn = LinearRegression(positive=False)
regn.fit(X, y)
assert np.mean((reg.coef_ - regn.coef_) ** 2) > 1e-3
def test_linear_regression_positive_vs_nonpositive_when_positive(global_random_seed):
# Test LinearRegression fitted coefficients
# when the problem is positive.
rng = np.random.RandomState(global_random_seed)
n_samples = 200
n_features = 4
X = rng.rand(n_samples, n_features)
y = X[:, 0] + 2 * X[:, 1] + 3 * X[:, 2] + 1.5 * X[:, 3]
reg = LinearRegression(positive=True)
reg.fit(X, y)
regn = LinearRegression(positive=False)
regn.fit(X, y)
assert np.mean((reg.coef_ - regn.coef_) ** 2) < 1e-6
@pytest.mark.parametrize("sparse_container", [None] + CSR_CONTAINERS)
@pytest.mark.parametrize("use_sw", [True, False])
def test_inplace_data_preprocessing(sparse_container, use_sw, global_random_seed):
# Check that the data is not modified inplace by the linear regression
# estimator.
rng = np.random.RandomState(global_random_seed)
original_X_data = rng.randn(10, 12)
original_y_data = rng.randn(10, 2)
orginal_sw_data = rng.rand(10)
if sparse_container is not None:
X = sparse_container(original_X_data)
else:
X = original_X_data.copy()
y = original_y_data.copy()
# XXX: Note hat y_sparse is not supported (broken?) in the current
# implementation of LinearRegression.
if use_sw:
sample_weight = orginal_sw_data.copy()
else:
sample_weight = None
# Do not allow inplace preprocessing of X and y:
reg = LinearRegression()
reg.fit(X, y, sample_weight=sample_weight)
if sparse_container is not None:
assert_allclose(X.toarray(), original_X_data)
else:
assert_allclose(X, original_X_data)
assert_allclose(y, original_y_data)
if use_sw:
assert_allclose(sample_weight, orginal_sw_data)
# Allow inplace preprocessing of X and y
reg = LinearRegression(copy_X=False)
reg.fit(X, y, sample_weight=sample_weight)
if sparse_container is not None:
# No optimization relying on the inplace modification of sparse input
# data has been implemented at this time.
assert_allclose(X.toarray(), original_X_data)
else:
# X has been offset (and optionally rescaled by sample weights)
# inplace. The 0.42 threshold is arbitrary and has been found to be
# robust to any random seed in the admissible range.
assert np.linalg.norm(X - original_X_data) > 0.42
# y should not have been modified inplace by LinearRegression.fit.
assert_allclose(y, original_y_data)
if use_sw:
# Sample weights have no reason to ever be modified inplace.
assert_allclose(sample_weight, orginal_sw_data)
def test_linear_regression_pd_sparse_dataframe_warning():
pd = pytest.importorskip("pandas")
# Warning is raised only when some of the columns is sparse
df = pd.DataFrame({"0": np.random.randn(10)})
for col in range(1, 4):
arr = np.random.randn(10)
arr[:8] = 0
# all columns but the first column is sparse
if col != 0:
arr = pd.arrays.SparseArray(arr, fill_value=0)
df[str(col)] = arr
msg = "pandas.DataFrame with sparse columns found."
reg = LinearRegression()
with pytest.warns(UserWarning, match=msg):
reg.fit(df.iloc[:, 0:2], df.iloc[:, 3])
# does not warn when the whole dataframe is sparse
df["0"] = pd.arrays.SparseArray(df["0"], fill_value=0)
assert hasattr(df, "sparse")
with warnings.catch_warnings():
warnings.simplefilter("error", UserWarning)
reg.fit(df.iloc[:, 0:2], df.iloc[:, 3])
def test_preprocess_data(global_random_seed):
rng = np.random.RandomState(global_random_seed)
n_samples = 200
n_features = 2
X = rng.rand(n_samples, n_features)
y = rng.rand(n_samples)
expected_X_mean = np.mean(X, axis=0)
expected_y_mean = np.mean(y, axis=0)
Xt, yt, X_mean, y_mean, X_scale, sqrt_sw = _preprocess_data(
X, y, fit_intercept=False
)
assert_array_almost_equal(X_mean, np.zeros(n_features))
assert_array_almost_equal(y_mean, 0)
assert_array_almost_equal(X_scale, np.ones(n_features))
assert sqrt_sw is None
assert_array_almost_equal(Xt, X)
assert_array_almost_equal(yt, y)
Xt, yt, X_mean, y_mean, X_scale, sqrt_sw = _preprocess_data(
X, y, fit_intercept=True
)
assert_array_almost_equal(X_mean, expected_X_mean)
assert_array_almost_equal(y_mean, expected_y_mean)
assert_array_almost_equal(X_scale, np.ones(n_features))
assert sqrt_sw is None
assert_array_almost_equal(Xt, X - expected_X_mean)
assert_array_almost_equal(yt, y - expected_y_mean)
@pytest.mark.parametrize("sparse_container", [None] + CSC_CONTAINERS)
def test_preprocess_data_multioutput(global_random_seed, sparse_container):
rng = np.random.RandomState(global_random_seed)
n_samples = 200
n_features = 3
n_outputs = 2
X = rng.rand(n_samples, n_features)
y = rng.rand(n_samples, n_outputs)
expected_y_mean = np.mean(y, axis=0)
if sparse_container is not None:
X = sparse_container(X)
_, yt, _, y_mean, _, _ = _preprocess_data(X, y, fit_intercept=False)
assert_array_almost_equal(y_mean, np.zeros(n_outputs))
assert_array_almost_equal(yt, y)
_, yt, _, y_mean, _, _ = _preprocess_data(X, y, fit_intercept=True)
assert_array_almost_equal(y_mean, expected_y_mean)
assert_array_almost_equal(yt, y - y_mean)
@pytest.mark.parametrize("rescale_with_sw", [False, True])
@pytest.mark.parametrize("sparse_container", [None] + CSR_CONTAINERS)
def test_preprocess_data_weighted(
rescale_with_sw, sparse_container, global_random_seed
):
rng = np.random.RandomState(global_random_seed)
n_samples = 200
n_features = 4
# Generate random data with 50% of zero values to make sure
# that the sparse variant of this test is actually sparse. This also
# shifts the mean value for each columns in X further away from
# zero.
X = rng.rand(n_samples, n_features)
X[X < 0.5] = 0.0
# Scale the first feature of X to be 10 larger than the other to
# better check the impact of feature scaling.
X[:, 0] *= 10
# Constant non-zero feature.
X[:, 2] = 1.0
# Constant zero feature (non-materialized in the sparse case)
X[:, 3] = 0.0
y = rng.rand(n_samples)
sample_weight = np.abs(rng.rand(n_samples)) + 1
expected_X_mean = np.average(X, axis=0, weights=sample_weight)
expected_y_mean = np.average(y, axis=0, weights=sample_weight)
X_sample_weight_avg = np.average(X, weights=sample_weight, axis=0)
X_sample_weight_var = np.average(
(X - X_sample_weight_avg) ** 2, weights=sample_weight, axis=0
)
constant_mask = X_sample_weight_var < 10 * np.finfo(X.dtype).eps
assert_array_equal(constant_mask, [0, 0, 1, 1])
expected_X_scale = np.sqrt(X_sample_weight_var) * np.sqrt(sample_weight.sum())
# near constant features should not be scaled
expected_X_scale[constant_mask] = 1
if sparse_container is not None:
X = sparse_container(X)
Xt, yt, X_mean, y_mean, X_scale, sqrt_sw = _preprocess_data(
X,
y,
fit_intercept=True,
sample_weight=sample_weight,
rescale_with_sw=rescale_with_sw,
)
if sparse_container is not None:
# Simplifies asserts
X = X.toarray()
Xt = Xt.toarray()
assert_array_almost_equal(X_mean, expected_X_mean)
assert_array_almost_equal(y_mean, expected_y_mean)
assert_array_almost_equal(X_scale, np.ones(n_features))
if rescale_with_sw:
assert_allclose(sqrt_sw, np.sqrt(sample_weight))
if sparse_container is not None:
assert_allclose(Xt, sqrt_sw[:, None] * X)
else:
assert_allclose(Xt, sqrt_sw[:, None] * (X - expected_X_mean))
assert_allclose(yt, sqrt_sw * (y - expected_y_mean))
else:
assert sqrt_sw is None
if sparse_container is not None:
assert_allclose(Xt, X)
else:
assert_allclose(Xt, X - expected_X_mean)
assert_allclose(yt, y - expected_y_mean)
@pytest.mark.parametrize("lil_container", LIL_CONTAINERS)
def test_sparse_preprocess_data_offsets(global_random_seed, lil_container):
rng = np.random.RandomState(global_random_seed)
n_samples = 200
n_features = 2
X = sparse.rand(n_samples, n_features, density=0.5, random_state=rng)
X = lil_container(X)
y = rng.rand(n_samples)
XA = X.toarray()
Xt, yt, X_mean, y_mean, X_scale, sqrt_sw = _preprocess_data(
X, y, fit_intercept=False
)
assert_array_almost_equal(X_mean, np.zeros(n_features))
assert_array_almost_equal(y_mean, 0)
assert_array_almost_equal(X_scale, np.ones(n_features))
assert sqrt_sw is None
assert_array_almost_equal(Xt.toarray(), XA)
assert_array_almost_equal(yt, y)
Xt, yt, X_mean, y_mean, X_scale, sqrt_sw = _preprocess_data(
X, y, fit_intercept=True
)
assert_array_almost_equal(X_mean, np.mean(XA, axis=0))
assert_array_almost_equal(y_mean, np.mean(y, axis=0))
assert_array_almost_equal(X_scale, np.ones(n_features))
assert sqrt_sw is None
assert_array_almost_equal(Xt.toarray(), XA)
assert_array_almost_equal(yt, y - np.mean(y, axis=0))
@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
def test_csr_preprocess_data(csr_container):
# Test output format of _preprocess_data, when input is csr
X, y = make_regression()
X[X < 2.5] = 0.0
csr = csr_container(X)
csr_, y, _, _, _, _ = _preprocess_data(csr, y, fit_intercept=True)
assert csr_.format == "csr"
@pytest.mark.parametrize("sparse_container", [None] + CSR_CONTAINERS)
@pytest.mark.parametrize("to_copy", (True, False))
def test_preprocess_copy_data_no_checks(sparse_container, to_copy):
X, y = make_regression()
X[X < 2.5] = 0.0
if sparse_container is not None:
X = sparse_container(X)
X_, y_, _, _, _, _ = _preprocess_data(
X, y, fit_intercept=True, copy=to_copy, check_input=False
)
if to_copy and sparse_container is not None:
assert not np.may_share_memory(X_.data, X.data)
elif to_copy:
assert not np.may_share_memory(X_, X)
elif sparse_container is not None:
assert np.may_share_memory(X_.data, X.data)
else:
assert np.may_share_memory(X_, X)
@pytest.mark.parametrize("rescale_with_sw", [False, True])
@pytest.mark.parametrize("fit_intercept", [False, True])
def test_dtype_preprocess_data(rescale_with_sw, fit_intercept, global_random_seed):
rng = np.random.RandomState(global_random_seed)
n_samples = 200
n_features = 2
X = rng.rand(n_samples, n_features)
y = rng.rand(n_samples)
sw = rng.rand(n_samples) + 1
X_32 = np.asarray(X, dtype=np.float32)
y_32 = np.asarray(y, dtype=np.float32)
sw_32 = np.asarray(sw, dtype=np.float32)
X_64 = np.asarray(X, dtype=np.float64)
y_64 = np.asarray(y, dtype=np.float64)
sw_64 = np.asarray(sw, dtype=np.float64)
Xt_32, yt_32, X_mean_32, y_mean_32, X_scale_32, sqrt_sw_32 = _preprocess_data(
X_32,
y_32,
fit_intercept=fit_intercept,
sample_weight=sw_32,
rescale_with_sw=rescale_with_sw,
)
Xt_64, yt_64, X_mean_64, y_mean_64, X_scale_64, sqrt_sw_64 = _preprocess_data(
X_64,
y_64,
fit_intercept=fit_intercept,
sample_weight=sw_64,
rescale_with_sw=rescale_with_sw,
)
Xt_3264, yt_3264, X_mean_3264, y_mean_3264, X_scale_3264, sqrt_sw_3264 = (
_preprocess_data(
X_32,
y_64,
fit_intercept=fit_intercept,
sample_weight=sw_32, # sample_weight must have same dtype as X
rescale_with_sw=rescale_with_sw,
)
)
Xt_6432, yt_6432, X_mean_6432, y_mean_6432, X_scale_6432, sqrt_sw_6432 = (
_preprocess_data(
X_64,
y_32,
fit_intercept=fit_intercept,
sample_weight=sw_64, # sample_weight must have same dtype as X
rescale_with_sw=rescale_with_sw,
)
)
assert Xt_32.dtype == np.float32
assert yt_32.dtype == np.float32
assert X_mean_32.dtype == np.float32
assert y_mean_32.dtype == np.float32
assert X_scale_32.dtype == np.float32
if rescale_with_sw:
assert sqrt_sw_32.dtype == np.float32
assert Xt_64.dtype == np.float64
assert yt_64.dtype == np.float64
assert X_mean_64.dtype == np.float64
assert y_mean_64.dtype == np.float64
assert X_scale_64.dtype == np.float64
if rescale_with_sw:
assert sqrt_sw_64.dtype == np.float64
assert Xt_3264.dtype == np.float32
assert yt_3264.dtype == np.float32
assert X_mean_3264.dtype == np.float32
assert y_mean_3264.dtype == np.float32
assert X_scale_3264.dtype == np.float32
if rescale_with_sw:
assert sqrt_sw_3264.dtype == np.float32
assert Xt_6432.dtype == np.float64
assert yt_6432.dtype == np.float64
assert X_mean_6432.dtype == np.float64
assert y_mean_6432.dtype == np.float64
assert X_scale_3264.dtype == np.float32
if rescale_with_sw:
assert sqrt_sw_6432.dtype == np.float64
assert X_32.dtype == np.float32
assert y_32.dtype == np.float32
assert X_64.dtype == np.float64
assert y_64.dtype == np.float64
assert_allclose(Xt_32, Xt_64, rtol=1e-3, atol=1e-6)
assert_allclose(yt_32, yt_64, rtol=1e-3, atol=1e-6)
assert_allclose(X_mean_32, X_mean_64, rtol=1e-6)
assert_allclose(y_mean_32, y_mean_64, rtol=1e-6)
assert_allclose(X_scale_32, X_scale_64)
if rescale_with_sw:
assert_allclose(sqrt_sw_32, sqrt_sw_64, rtol=1e-6)
@pytest.mark.parametrize("n_targets", [None, 2])
@pytest.mark.parametrize("sparse_container", [None] + CSR_CONTAINERS)
def test_rescale_data(n_targets, sparse_container, global_random_seed):
rng = np.random.RandomState(global_random_seed)
n_samples = 200
n_features = 2
sample_weight = 1.0 + rng.rand(n_samples)
X = rng.rand(n_samples, n_features)
if n_targets is None:
y = rng.rand(n_samples)
else:
y = rng.rand(n_samples, n_targets)
expected_sqrt_sw = np.sqrt(sample_weight)
expected_rescaled_X = X * expected_sqrt_sw[:, np.newaxis]
if n_targets is None:
expected_rescaled_y = y * expected_sqrt_sw
else:
expected_rescaled_y = y * expected_sqrt_sw[:, np.newaxis]
if sparse_container is not None:
X = sparse_container(X)
if n_targets is None:
y = sparse_container(y.reshape(-1, 1))
else:
y = sparse_container(y)
rescaled_X, rescaled_y, sqrt_sw = _rescale_data(X, y, sample_weight)
assert_allclose(sqrt_sw, expected_sqrt_sw)
if sparse_container is not None:
rescaled_X = rescaled_X.toarray()
rescaled_y = rescaled_y.toarray()
if n_targets is None:
rescaled_y = rescaled_y.ravel()
assert_allclose(rescaled_X, expected_rescaled_X)
assert_allclose(rescaled_y, expected_rescaled_y)
@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
def test_fused_types_make_dataset(csr_container):
iris = load_iris()
X_32 = iris.data.astype(np.float32)
y_32 = iris.target.astype(np.float32)
X_csr_32 = csr_container(X_32)
sample_weight_32 = np.arange(y_32.size, dtype=np.float32)
X_64 = iris.data.astype(np.float64)
y_64 = iris.target.astype(np.float64)
X_csr_64 = csr_container(X_64)
sample_weight_64 = np.arange(y_64.size, dtype=np.float64)
# array
dataset_32, _ = make_dataset(X_32, y_32, sample_weight_32)
dataset_64, _ = make_dataset(X_64, y_64, sample_weight_64)
xi_32, yi_32, _, _ = dataset_32._next_py()
xi_64, yi_64, _, _ = dataset_64._next_py()
xi_data_32, _, _ = xi_32
xi_data_64, _, _ = xi_64
assert xi_data_32.dtype == np.float32
assert xi_data_64.dtype == np.float64
assert_allclose(yi_64, yi_32, rtol=rtol)
# csr
datasetcsr_32, _ = make_dataset(X_csr_32, y_32, sample_weight_32)
datasetcsr_64, _ = make_dataset(X_csr_64, y_64, sample_weight_64)
xicsr_32, yicsr_32, _, _ = datasetcsr_32._next_py()
xicsr_64, yicsr_64, _, _ = datasetcsr_64._next_py()
xicsr_data_32, _, _ = xicsr_32
xicsr_data_64, _, _ = xicsr_64
assert xicsr_data_32.dtype == np.float32
assert xicsr_data_64.dtype == np.float64
assert_allclose(xicsr_data_64, xicsr_data_32, rtol=rtol)
assert_allclose(yicsr_64, yicsr_32, rtol=rtol)
assert_array_equal(xi_data_32, xicsr_data_32)
assert_array_equal(xi_data_64, xicsr_data_64)
assert_array_equal(yi_32, yicsr_32)
assert_array_equal(yi_64, yicsr_64)
@pytest.mark.parametrize("X_shape", [(10, 5), (10, 20), (100, 100)])
@pytest.mark.parametrize(
"sparse_container",
[None]
+ [
pytest.param(
container,
marks=pytest.mark.xfail(
reason="Known to fail for CSR arrays, see issue #30131."
),
)
for container in CSR_CONTAINERS
],
)
@pytest.mark.parametrize("fit_intercept", [False, True])
def test_linear_regression_sample_weight_consistency(
X_shape, sparse_container, fit_intercept, global_random_seed
):
"""Test that the impact of sample_weight is consistent.
Note that this test is stricter than the common test
check_sample_weight_equivalence alone and also tests sparse X.
It is very similar to test_enet_sample_weight_consistency.
"""
rng = np.random.RandomState(global_random_seed)
n_samples, n_features = X_shape
X = rng.rand(n_samples, n_features)
y = rng.rand(n_samples)
if sparse_container is not None:
X = sparse_container(X)
params = dict(fit_intercept=fit_intercept)
reg = LinearRegression(**params).fit(X, y, sample_weight=None)
coef = reg.coef_.copy()
if fit_intercept:
intercept = reg.intercept_
# 1) sample_weight=np.ones(..) must be equivalent to sample_weight=None,
# a special case of check_sample_weight_equivalence(name, reg), but we also
# test with sparse input.
sample_weight = np.ones_like(y)
reg.fit(X, y, sample_weight=sample_weight)
assert_allclose(reg.coef_, coef, rtol=1e-6)
if fit_intercept:
assert_allclose(reg.intercept_, intercept)
# 2) sample_weight=None should be equivalent to sample_weight = number
sample_weight = 123.0
reg.fit(X, y, sample_weight=sample_weight)
assert_allclose(reg.coef_, coef, rtol=1e-6)
if fit_intercept:
assert_allclose(reg.intercept_, intercept)
# 3) scaling of sample_weight should have no effect, cf. np.average()
sample_weight = rng.uniform(low=0.01, high=2, size=X.shape[0])
reg = reg.fit(X, y, sample_weight=sample_weight)
coef = reg.coef_.copy()
if fit_intercept:
intercept = reg.intercept_
reg.fit(X, y, sample_weight=np.pi * sample_weight)
assert_allclose(reg.coef_, coef, rtol=1e-6 if sparse_container is None else 1e-5)
if fit_intercept:
assert_allclose(reg.intercept_, intercept)
# 4) setting elements of sample_weight to 0 is equivalent to removing these samples
sample_weight_0 = sample_weight.copy()
sample_weight_0[-5:] = 0
y[-5:] *= 1000 # to make excluding those samples important
reg.fit(X, y, sample_weight=sample_weight_0)
coef_0 = reg.coef_.copy()
if fit_intercept:
intercept_0 = reg.intercept_
reg.fit(X[:-5], y[:-5], sample_weight=sample_weight[:-5])
assert_allclose(reg.coef_, coef_0, rtol=1e-5)
if fit_intercept:
assert_allclose(reg.intercept_, intercept_0)
# 5) check that multiplying sample_weight by 2 is equivalent to repeating
# corresponding samples twice
if sparse_container is not None:
X2 = sparse.vstack([X, X[: n_samples // 2]], format="csc")
else:
X2 = np.concatenate([X, X[: n_samples // 2]], axis=0)
y2 = np.concatenate([y, y[: n_samples // 2]])
sample_weight_1 = sample_weight.copy()
sample_weight_1[: n_samples // 2] *= 2
sample_weight_2 = np.concatenate(
[sample_weight, sample_weight[: n_samples // 2]], axis=0
)
reg1 = LinearRegression(**params).fit(X, y, sample_weight=sample_weight_1)
reg2 = LinearRegression(**params).fit(X2, y2, sample_weight=sample_weight_2)
assert_allclose(reg1.coef_, reg2.coef_, rtol=1e-6)
if fit_intercept:
assert_allclose(reg1.intercept_, reg2.intercept_)

View File

@@ -0,0 +1,314 @@
# Authors: The scikit-learn developers
# SPDX-License-Identifier: BSD-3-Clause
from math import log
import numpy as np
import pytest
from sklearn import datasets
from sklearn.linear_model import ARDRegression, BayesianRidge, Ridge
from sklearn.utils import check_random_state
from sklearn.utils._testing import (
_convert_container,
assert_allclose,
assert_almost_equal,
assert_array_almost_equal,
assert_array_less,
)
from sklearn.utils.extmath import fast_logdet
diabetes = datasets.load_diabetes()
def test_bayesian_ridge_scores():
"""Check scores attribute shape"""
X, y = diabetes.data, diabetes.target
clf = BayesianRidge(compute_score=True)
clf.fit(X, y)
assert clf.scores_.shape == (clf.n_iter_ + 1,)
def test_bayesian_ridge_score_values():
"""Check value of score on toy example.
Compute log marginal likelihood with equation (36) in Sparse Bayesian
Learning and the Relevance Vector Machine (Tipping, 2001):
- 0.5 * (log |Id/alpha + X.X^T/lambda| +
y^T.(Id/alpha + X.X^T/lambda).y + n * log(2 * pi))
+ lambda_1 * log(lambda) - lambda_2 * lambda
+ alpha_1 * log(alpha) - alpha_2 * alpha
and check equality with the score computed during training.
"""
X, y = diabetes.data, diabetes.target
n_samples = X.shape[0]
# check with initial values of alpha and lambda (see code for the values)
eps = np.finfo(np.float64).eps
alpha_ = 1.0 / (np.var(y) + eps)
lambda_ = 1.0
# value of the parameters of the Gamma hyperpriors
alpha_1 = 0.1
alpha_2 = 0.1
lambda_1 = 0.1
lambda_2 = 0.1
# compute score using formula of docstring
score = lambda_1 * log(lambda_) - lambda_2 * lambda_
score += alpha_1 * log(alpha_) - alpha_2 * alpha_
M = 1.0 / alpha_ * np.eye(n_samples) + 1.0 / lambda_ * np.dot(X, X.T)
M_inv_dot_y = np.linalg.solve(M, y)
score += -0.5 * (
fast_logdet(M) + np.dot(y.T, M_inv_dot_y) + n_samples * log(2 * np.pi)
)
# compute score with BayesianRidge
clf = BayesianRidge(
alpha_1=alpha_1,
alpha_2=alpha_2,
lambda_1=lambda_1,
lambda_2=lambda_2,
max_iter=1,
fit_intercept=False,
compute_score=True,
)
clf.fit(X, y)
assert_almost_equal(clf.scores_[0], score, decimal=9)
def test_bayesian_ridge_parameter():
# Test correctness of lambda_ and alpha_ parameters (GitHub issue #8224)
X = np.array([[1, 1], [3, 4], [5, 7], [4, 1], [2, 6], [3, 10], [3, 2]])
y = np.array([1, 2, 3, 2, 0, 4, 5]).T
# A Ridge regression model using an alpha value equal to the ratio of
# lambda_ and alpha_ from the Bayesian Ridge model must be identical
br_model = BayesianRidge(compute_score=True).fit(X, y)
rr_model = Ridge(alpha=br_model.lambda_ / br_model.alpha_).fit(X, y)
assert_array_almost_equal(rr_model.coef_, br_model.coef_)
assert_almost_equal(rr_model.intercept_, br_model.intercept_)
@pytest.mark.parametrize("n_samples, n_features", [(10, 20), (20, 10)])
def test_bayesian_covariance_matrix(n_samples, n_features, global_random_seed):
"""Check the posterior covariance matrix sigma_
Non-regression test for https://github.com/scikit-learn/scikit-learn/issues/31093
"""
X, y = datasets.make_regression(
n_samples, n_features, random_state=global_random_seed
)
reg = BayesianRidge(fit_intercept=False).fit(X, y)
covariance_matrix = np.linalg.inv(
reg.lambda_ * np.identity(n_features) + reg.alpha_ * np.dot(X.T, X)
)
assert_allclose(reg.sigma_, covariance_matrix, rtol=1e-6)
def test_bayesian_sample_weights():
# Test correctness of the sample_weights method
X = np.array([[1, 1], [3, 4], [5, 7], [4, 1], [2, 6], [3, 10], [3, 2]])
y = np.array([1, 2, 3, 2, 0, 4, 5]).T
w = np.array([4, 3, 3, 1, 1, 2, 3]).T
# A Ridge regression model using an alpha value equal to the ratio of
# lambda_ and alpha_ from the Bayesian Ridge model must be identical
br_model = BayesianRidge(compute_score=True).fit(X, y, sample_weight=w)
rr_model = Ridge(alpha=br_model.lambda_ / br_model.alpha_).fit(
X, y, sample_weight=w
)
assert_array_almost_equal(rr_model.coef_, br_model.coef_)
assert_almost_equal(rr_model.intercept_, br_model.intercept_)
def test_toy_bayesian_ridge_object():
# Test BayesianRidge on toy
X = np.array([[1], [2], [6], [8], [10]])
Y = np.array([1, 2, 6, 8, 10])
clf = BayesianRidge(compute_score=True)
clf.fit(X, Y)
# Check that the model could approximately learn the identity function
test = [[1], [3], [4]]
assert_array_almost_equal(clf.predict(test), [1, 3, 4], 2)
def test_bayesian_initial_params():
# Test BayesianRidge with initial values (alpha_init, lambda_init)
X = np.vander(np.linspace(0, 4, 5), 4)
y = np.array([0.0, 1.0, 0.0, -1.0, 0.0]) # y = (x^3 - 6x^2 + 8x) / 3
# In this case, starting from the default initial values will increase
# the bias of the fitted curve. So, lambda_init should be small.
reg = BayesianRidge(alpha_init=1.0, lambda_init=1e-3)
# Check the R2 score nearly equals to one.
r2 = reg.fit(X, y).score(X, y)
assert_almost_equal(r2, 1.0)
def test_prediction_bayesian_ridge_ard_with_constant_input():
# Test BayesianRidge and ARDRegression predictions for edge case of
# constant target vectors
n_samples = 4
n_features = 5
random_state = check_random_state(42)
constant_value = random_state.rand()
X = random_state.random_sample((n_samples, n_features))
y = np.full(n_samples, constant_value, dtype=np.array(constant_value).dtype)
expected = np.full(n_samples, constant_value, dtype=np.array(constant_value).dtype)
for clf in [BayesianRidge(), ARDRegression()]:
y_pred = clf.fit(X, y).predict(X)
assert_array_almost_equal(y_pred, expected)
def test_std_bayesian_ridge_ard_with_constant_input():
# Test BayesianRidge and ARDRegression standard dev. for edge case of
# constant target vector
# The standard dev. should be relatively small (< 0.01 is tested here)
n_samples = 10
n_features = 5
random_state = check_random_state(42)
constant_value = random_state.rand()
X = random_state.random_sample((n_samples, n_features))
y = np.full(n_samples, constant_value, dtype=np.array(constant_value).dtype)
expected_upper_boundary = 0.01
for clf in [BayesianRidge(), ARDRegression()]:
_, y_std = clf.fit(X, y).predict(X, return_std=True)
assert_array_less(y_std, expected_upper_boundary)
def test_update_of_sigma_in_ard():
# Checks that `sigma_` is updated correctly after the last iteration
# of the ARDRegression algorithm. See issue #10128.
X = np.array([[1, 0], [0, 0]])
y = np.array([0, 0])
clf = ARDRegression(max_iter=1)
clf.fit(X, y)
# With the inputs above, ARDRegression prunes both of the two coefficients
# in the first iteration. Hence, the expected shape of `sigma_` is (0, 0).
assert clf.sigma_.shape == (0, 0)
# Ensure that no error is thrown at prediction stage
clf.predict(X, return_std=True)
def test_toy_ard_object():
# Test BayesianRegression ARD classifier
X = np.array([[1], [2], [3]])
Y = np.array([1, 2, 3])
clf = ARDRegression(compute_score=True)
clf.fit(X, Y)
# Check that the model could approximately learn the identity function
test = [[1], [3], [4]]
assert_array_almost_equal(clf.predict(test), [1, 3, 4], 2)
@pytest.mark.parametrize("n_samples, n_features", ((10, 100), (100, 10)))
def test_ard_accuracy_on_easy_problem(global_random_seed, n_samples, n_features):
# Check that ARD converges with reasonable accuracy on an easy problem
# (Github issue #14055)
X = np.random.RandomState(global_random_seed).normal(size=(250, 3))
y = X[:, 1]
regressor = ARDRegression()
regressor.fit(X, y)
abs_coef_error = np.abs(1 - regressor.coef_[1])
assert abs_coef_error < 1e-10
@pytest.mark.parametrize("constructor_name", ["array", "dataframe"])
def test_return_std(constructor_name):
# Test return_std option for both Bayesian regressors
def f(X):
return np.dot(X, w) + b
def f_noise(X, noise_mult):
return f(X) + np.random.randn(X.shape[0]) * noise_mult
d = 5
n_train = 50
n_test = 10
w = np.array([1.0, 0.0, 1.0, -1.0, 0.0])
b = 1.0
X = np.random.random((n_train, d))
X = _convert_container(X, constructor_name)
X_test = np.random.random((n_test, d))
X_test = _convert_container(X_test, constructor_name)
for decimal, noise_mult in enumerate([1, 0.1, 0.01]):
y = f_noise(X, noise_mult)
m1 = BayesianRidge()
m1.fit(X, y)
y_mean1, y_std1 = m1.predict(X_test, return_std=True)
assert_array_almost_equal(y_std1, noise_mult, decimal=decimal)
m2 = ARDRegression()
m2.fit(X, y)
y_mean2, y_std2 = m2.predict(X_test, return_std=True)
assert_array_almost_equal(y_std2, noise_mult, decimal=decimal)
def test_update_sigma(global_random_seed):
# make sure the two update_sigma() helpers are equivalent. The woodbury
# formula is used when n_samples < n_features, and the other one is used
# otherwise.
rng = np.random.RandomState(global_random_seed)
# set n_samples == n_features to avoid instability issues when inverting
# the matrices. Using the woodbury formula would be unstable when
# n_samples > n_features
n_samples = n_features = 10
X = rng.randn(n_samples, n_features)
alpha = 1
lmbda = np.arange(1, n_features + 1)
keep_lambda = np.array([True] * n_features)
reg = ARDRegression()
sigma = reg._update_sigma(X, alpha, lmbda, keep_lambda)
sigma_woodbury = reg._update_sigma_woodbury(X, alpha, lmbda, keep_lambda)
np.testing.assert_allclose(sigma, sigma_woodbury)
@pytest.mark.parametrize("dtype", [np.float32, np.float64])
@pytest.mark.parametrize("Estimator", [BayesianRidge, ARDRegression])
def test_dtype_match(dtype, Estimator):
# Test that np.float32 input data is not cast to np.float64 when possible
X = np.array([[1, 1], [3, 4], [5, 7], [4, 1], [2, 6], [3, 10], [3, 2]], dtype=dtype)
y = np.array([1, 2, 3, 2, 0, 4, 5]).T
model = Estimator()
# check type consistency
model.fit(X, y)
attributes = ["coef_", "sigma_"]
for attribute in attributes:
assert getattr(model, attribute).dtype == X.dtype
y_mean, y_std = model.predict(X, return_std=True)
assert y_mean.dtype == X.dtype
assert y_std.dtype == X.dtype
@pytest.mark.parametrize("Estimator", [BayesianRidge, ARDRegression])
def test_dtype_correctness(Estimator):
X = np.array([[1, 1], [3, 4], [5, 7], [4, 1], [2, 6], [3, 10], [3, 2]])
y = np.array([1, 2, 3, 2, 0, 4, 5]).T
model = Estimator()
coef_32 = model.fit(X.astype(np.float32), y).coef_
coef_64 = model.fit(X.astype(np.float64), y).coef_
np.testing.assert_allclose(coef_32, coef_64, rtol=1e-4)

View File

@@ -0,0 +1,291 @@
# SPDX-License-Identifier: BSD-3-Clause
import inspect
import numpy as np
import pytest
from sklearn.base import clone, is_classifier
from sklearn.datasets import make_classification, make_low_rank_matrix, make_regression
from sklearn.linear_model import (
ARDRegression,
BayesianRidge,
ElasticNet,
ElasticNetCV,
GammaRegressor,
HuberRegressor,
Lars,
LarsCV,
Lasso,
LassoCV,
LassoLars,
LassoLarsCV,
LassoLarsIC,
LinearRegression,
LogisticRegression,
LogisticRegressionCV,
MultiTaskElasticNet,
MultiTaskElasticNetCV,
MultiTaskLasso,
MultiTaskLassoCV,
OrthogonalMatchingPursuit,
OrthogonalMatchingPursuitCV,
PassiveAggressiveClassifier,
PassiveAggressiveRegressor,
Perceptron,
PoissonRegressor,
Ridge,
RidgeClassifier,
RidgeClassifierCV,
RidgeCV,
SGDClassifier,
SGDRegressor,
TheilSenRegressor,
TweedieRegressor,
)
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.svm import LinearSVC, LinearSVR
from sklearn.utils._testing import assert_allclose, set_random_state
from sklearn.utils.fixes import CSR_CONTAINERS
# Note: GammaRegressor() and TweedieRegressor(power != 1) have a non-canonical link.
@pytest.mark.parametrize(
"model",
[
ARDRegression(),
BayesianRidge(),
ElasticNet(),
ElasticNetCV(),
Lars(),
LarsCV(),
Lasso(),
LassoCV(),
LassoLarsCV(),
LassoLarsIC(),
LinearRegression(),
# TODO: FIx SAGA which fails badly with sample_weights.
# This is a known limitation, see:
# https://github.com/scikit-learn/scikit-learn/issues/21305
pytest.param(
LogisticRegression(l1_ratio=0.5, solver="saga", tol=1e-15),
marks=pytest.mark.xfail(reason="Missing importance sampling scheme"),
),
LogisticRegressionCV(tol=1e-6, use_legacy_attributes=False, l1_ratios=(0,)),
MultiTaskElasticNet(),
MultiTaskElasticNetCV(),
MultiTaskLasso(),
MultiTaskLassoCV(),
OrthogonalMatchingPursuit(),
OrthogonalMatchingPursuitCV(),
PoissonRegressor(),
Ridge(),
RidgeCV(),
pytest.param(
SGDRegressor(tol=1e-15),
marks=pytest.mark.xfail(reason="Insufficient precision."),
),
SGDRegressor(penalty="elasticnet", max_iter=10_000),
TweedieRegressor(power=0), # same as Ridge
],
ids=lambda x: x.__class__.__name__,
)
@pytest.mark.parametrize("with_sample_weight", [False, True])
def test_balance_property(model, with_sample_weight, global_random_seed):
# Test that sum(y_predicted) == sum(y_observed) on the training set.
# This must hold for all linear models with deviance of an exponential disperson
# family as loss and the corresponding canonical link if fit_intercept=True.
# Examples:
# - squared error and identity link (most linear models)
# - Poisson deviance with log link
# - log loss with logit link
# This is known as balance property or unconditional calibration/unbiasedness.
# For reference, see Corollary 3.18, 3.20 and Chapter 5.1.5 of
# M.V. Wuthrich and M. Merz, "Statistical Foundations of Actuarial Learning and its
# Applications" (June 3, 2022). http://doi.org/10.2139/ssrn.3822407
model = clone(model) # Avoid side effects from shared instances.
if (
with_sample_weight
and "sample_weight" not in inspect.signature(model.fit).parameters.keys()
):
pytest.skip("Estimator does not support sample_weight.")
rel = 2e-4 # test precision
if isinstance(model, SGDRegressor):
rel = 1e-1
elif hasattr(model, "solver") and model.solver == "saga":
rel = 1e-2
rng = np.random.RandomState(global_random_seed)
n_train, n_features, n_targets = 100, 10, None
if isinstance(
model,
(MultiTaskElasticNet, MultiTaskElasticNetCV, MultiTaskLasso, MultiTaskLassoCV),
):
n_targets = 3
X = make_low_rank_matrix(n_samples=n_train, n_features=n_features, random_state=rng)
if n_targets:
coef = (
rng.uniform(low=-2, high=2, size=(n_features, n_targets))
/ np.max(X, axis=0)[:, None]
)
else:
coef = rng.uniform(low=-2, high=2, size=n_features) / np.max(X, axis=0)
expectation = np.exp(X @ coef + 0.5)
y = rng.poisson(lam=expectation) + 1 # strict positive, i.e. y > 0
if is_classifier(model):
y = (y > expectation + 1).astype(np.float64)
if with_sample_weight:
sw = rng.uniform(low=1, high=10, size=y.shape[0])
else:
sw = None
model.set_params(fit_intercept=True) # to be sure
if with_sample_weight:
model.fit(X, y, sample_weight=sw)
else:
model.fit(X, y)
# Assert balance property.
if is_classifier(model):
assert np.average(model.predict_proba(X)[:, 1], weights=sw) == pytest.approx(
np.average(y, weights=sw), rel=rel
)
else:
assert np.average(model.predict(X), weights=sw, axis=0) == pytest.approx(
np.average(y, weights=sw, axis=0), rel=rel
)
@pytest.mark.filterwarnings("ignore:The default of 'normalize'")
@pytest.mark.filterwarnings("ignore:lbfgs failed to converge")
@pytest.mark.filterwarnings("ignore:A column-vector y was passed when a 1d array.*")
@pytest.mark.parametrize(
"Regressor",
[
ARDRegression,
BayesianRidge,
ElasticNet,
ElasticNetCV,
GammaRegressor,
HuberRegressor,
Lars,
LarsCV,
Lasso,
LassoCV,
LassoLars,
LassoLarsCV,
LassoLarsIC,
LinearSVR,
LinearRegression,
OrthogonalMatchingPursuit,
OrthogonalMatchingPursuitCV,
PassiveAggressiveRegressor,
PoissonRegressor,
Ridge,
RidgeCV,
SGDRegressor,
TheilSenRegressor,
TweedieRegressor,
],
)
@pytest.mark.parametrize("ndim", [1, 2])
def test_linear_model_regressor_coef_shape(Regressor, ndim):
"""Check the consistency of linear models `coef` shape."""
if Regressor is LinearRegression:
pytest.xfail("LinearRegression does not follow `coef_` shape contract!")
X, y = make_regression(random_state=0, n_samples=200, n_features=20)
y = MinMaxScaler().fit_transform(y.reshape(-1, 1))[:, 0] + 1
y = y[:, np.newaxis] if ndim == 2 else y
regressor = Regressor()
set_random_state(regressor)
regressor.fit(X, y)
assert regressor.coef_.shape == (X.shape[1],)
@pytest.mark.parametrize(
["Classifier", "params"],
[
(LinearSVC, {}),
(LogisticRegression, {}),
(
LogisticRegressionCV,
{
"solver": "newton-cholesky",
"use_legacy_attributes": False,
"l1_ratios": (0,),
},
),
(PassiveAggressiveClassifier, {}),
(Perceptron, {}),
(RidgeClassifier, {}),
(RidgeClassifierCV, {}),
(SGDClassifier, {}),
],
)
@pytest.mark.parametrize("n_classes", [2, 3])
def test_linear_model_classifier_coef_shape(Classifier, params, n_classes):
if Classifier in (RidgeClassifier, RidgeClassifierCV):
pytest.xfail(f"{Classifier} does not follow `coef_` shape contract!")
X, y = make_classification(n_informative=10, n_classes=n_classes, random_state=0)
n_features = X.shape[1]
classifier = Classifier(**params)
set_random_state(classifier)
classifier.fit(X, y)
expected_shape = (1, n_features) if n_classes == 2 else (n_classes, n_features)
assert classifier.coef_.shape == expected_shape
@pytest.mark.parametrize(
"LinearModel, params",
[
(Lasso, {"tol": 1e-15, "alpha": 0.01}),
(LassoCV, {"tol": 1e-15}),
(ElasticNetCV, {"tol": 1e-15}),
(RidgeClassifier, {"solver": "sparse_cg", "alpha": 0.1}),
(ElasticNet, {"tol": 1e-15, "l1_ratio": 1, "alpha": 0.01}),
(ElasticNet, {"tol": 1e-15, "l1_ratio": 1e-5, "alpha": 0.01}),
(Ridge, {"solver": "sparse_cg", "tol": 1e-12, "alpha": 0.1}),
(LinearRegression, {}),
(RidgeCV, {}),
(RidgeClassifierCV, {}),
],
)
@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
def test_model_pipeline_same_dense_and_sparse(LinearModel, params, csr_container):
"""Test that sparse and dense linear models give same results.
Models use a preprocessing pipeline with a StandardScaler.
"""
model_dense = make_pipeline(StandardScaler(with_mean=False), LinearModel(**params))
model_sparse = make_pipeline(StandardScaler(with_mean=False), LinearModel(**params))
# prepare the data
rng = np.random.RandomState(0)
n_samples = 100
n_features = 2
X = rng.randn(n_samples, n_features)
X[X < 0.1] = 0.0
X_sparse = csr_container(X)
y = rng.rand(n_samples)
if is_classifier(model_dense):
y = np.sign(y)
model_dense.fit(X, y)
model_sparse.fit(X_sparse, y)
assert_allclose(model_sparse[1].coef_, model_dense[1].coef_, atol=1e-15)
y_pred_dense = model_dense.predict(X)
y_pred_sparse = model_sparse.predict(X_sparse)
assert_allclose(y_pred_dense, y_pred_sparse)
assert_allclose(model_dense[1].intercept_, model_sparse[1].intercept_)

View File

@@ -0,0 +1,216 @@
# Authors: The scikit-learn developers
# SPDX-License-Identifier: BSD-3-Clause
import numpy as np
import pytest
from scipy import optimize
from sklearn.datasets import make_regression
from sklearn.linear_model import HuberRegressor, LinearRegression, Ridge, SGDRegressor
from sklearn.linear_model._huber import _huber_loss_and_gradient
from sklearn.utils._testing import (
assert_almost_equal,
assert_array_almost_equal,
assert_array_equal,
)
from sklearn.utils.fixes import CSR_CONTAINERS
def make_regression_with_outliers(n_samples=50, n_features=20):
rng = np.random.RandomState(0)
# Generate data with outliers by replacing 10% of the samples with noise.
X, y = make_regression(
n_samples=n_samples, n_features=n_features, random_state=0, noise=0.05
)
# Replace 10% of the sample with noise.
num_noise = int(0.1 * n_samples)
random_samples = rng.randint(0, n_samples, num_noise)
X[random_samples, :] = 2.0 * rng.normal(0, 1, (num_noise, X.shape[1]))
return X, y
def test_huber_equals_lr_for_high_epsilon():
# Test that Ridge matches LinearRegression for large epsilon
X, y = make_regression_with_outliers()
lr = LinearRegression()
lr.fit(X, y)
huber = HuberRegressor(epsilon=1e3, alpha=0.0)
huber.fit(X, y)
assert_almost_equal(huber.coef_, lr.coef_, 3)
assert_almost_equal(huber.intercept_, lr.intercept_, 2)
def test_huber_max_iter():
X, y = make_regression_with_outliers()
huber = HuberRegressor(max_iter=1)
huber.fit(X, y)
assert huber.n_iter_ == huber.max_iter
def test_huber_gradient():
# Test that the gradient calculated by _huber_loss_and_gradient is correct
rng = np.random.RandomState(1)
X, y = make_regression_with_outliers()
sample_weight = rng.randint(1, 3, (y.shape[0]))
def loss_func(x, *args):
return _huber_loss_and_gradient(x, *args)[0]
def grad_func(x, *args):
return _huber_loss_and_gradient(x, *args)[1]
# Check using optimize.check_grad that the gradients are equal.
for _ in range(5):
# Check for both fit_intercept and otherwise.
for n_features in [X.shape[1] + 1, X.shape[1] + 2]:
w = rng.randn(n_features)
w[-1] = np.abs(w[-1])
grad_same = optimize.check_grad(
loss_func, grad_func, w, X, y, 0.01, 0.1, sample_weight
)
assert_almost_equal(grad_same, 1e-6, 4)
@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
def test_huber_sample_weights(csr_container):
# Test sample_weights implementation in HuberRegressor"""
X, y = make_regression_with_outliers()
huber = HuberRegressor()
huber.fit(X, y)
huber_coef = huber.coef_
huber_intercept = huber.intercept_
# Rescale coefs before comparing with assert_array_almost_equal to make
# sure that the number of decimal places used is somewhat insensitive to
# the amplitude of the coefficients and therefore to the scale of the
# data and the regularization parameter
scale = max(np.mean(np.abs(huber.coef_)), np.mean(np.abs(huber.intercept_)))
huber.fit(X, y, sample_weight=np.ones(y.shape[0]))
assert_array_almost_equal(huber.coef_ / scale, huber_coef / scale)
assert_array_almost_equal(huber.intercept_ / scale, huber_intercept / scale)
X, y = make_regression_with_outliers(n_samples=5, n_features=20)
X_new = np.vstack((X, np.vstack((X[1], X[1], X[3]))))
y_new = np.concatenate((y, [y[1]], [y[1]], [y[3]]))
huber.fit(X_new, y_new)
huber_coef = huber.coef_
huber_intercept = huber.intercept_
sample_weight = np.ones(X.shape[0])
sample_weight[1] = 3
sample_weight[3] = 2
huber.fit(X, y, sample_weight=sample_weight)
assert_array_almost_equal(huber.coef_ / scale, huber_coef / scale)
assert_array_almost_equal(huber.intercept_ / scale, huber_intercept / scale)
# Test sparse implementation with sample weights.
X_csr = csr_container(X)
huber_sparse = HuberRegressor()
huber_sparse.fit(X_csr, y, sample_weight=sample_weight)
assert_array_almost_equal(huber_sparse.coef_ / scale, huber_coef / scale)
@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
def test_huber_sparse(csr_container):
X, y = make_regression_with_outliers()
huber = HuberRegressor(alpha=0.1)
huber.fit(X, y)
X_csr = csr_container(X)
huber_sparse = HuberRegressor(alpha=0.1)
huber_sparse.fit(X_csr, y)
assert_array_almost_equal(huber_sparse.coef_, huber.coef_)
assert_array_equal(huber.outliers_, huber_sparse.outliers_)
def test_huber_scaling_invariant():
# Test that outliers filtering is scaling independent.
X, y = make_regression_with_outliers()
huber = HuberRegressor(fit_intercept=False, alpha=0.0)
huber.fit(X, y)
n_outliers_mask_1 = huber.outliers_
assert not np.all(n_outliers_mask_1)
huber.fit(X, 2.0 * y)
n_outliers_mask_2 = huber.outliers_
assert_array_equal(n_outliers_mask_2, n_outliers_mask_1)
huber.fit(2.0 * X, 2.0 * y)
n_outliers_mask_3 = huber.outliers_
assert_array_equal(n_outliers_mask_3, n_outliers_mask_1)
def test_huber_and_sgd_same_results():
# Test they should converge to same coefficients for same parameters
X, y = make_regression_with_outliers(n_samples=10, n_features=2)
# Fit once to find out the scale parameter. Scale down X and y by scale
# so that the scale parameter is optimized to 1.0
huber = HuberRegressor(fit_intercept=False, alpha=0.0, epsilon=1.35)
huber.fit(X, y)
X_scale = X / huber.scale_
y_scale = y / huber.scale_
huber.fit(X_scale, y_scale)
assert_almost_equal(huber.scale_, 1.0, 3)
sgdreg = SGDRegressor(
alpha=0.0,
loss="huber",
shuffle=True,
random_state=0,
max_iter=10000,
fit_intercept=False,
epsilon=1.35,
tol=None,
)
sgdreg.fit(X_scale, y_scale)
assert_array_almost_equal(huber.coef_, sgdreg.coef_, 1)
def test_huber_warm_start():
X, y = make_regression_with_outliers()
huber_warm = HuberRegressor(alpha=1.0, max_iter=10000, warm_start=True, tol=1e-1)
huber_warm.fit(X, y)
huber_warm_coef = huber_warm.coef_.copy()
huber_warm.fit(X, y)
# SciPy performs the tol check after doing the coef updates, so
# these would be almost same but not equal.
assert_array_almost_equal(huber_warm.coef_, huber_warm_coef, 1)
assert huber_warm.n_iter_ == 0
def test_huber_better_r2_score():
# Test that huber returns a better r2 score than non-outliers"""
X, y = make_regression_with_outliers()
huber = HuberRegressor(alpha=0.01)
huber.fit(X, y)
linear_loss = np.dot(X, huber.coef_) + huber.intercept_ - y
mask = np.abs(linear_loss) < huber.epsilon * huber.scale_
huber_score = huber.score(X[mask], y[mask])
huber_outlier_score = huber.score(X[~mask], y[~mask])
# The Ridge regressor should be influenced by the outliers and hence
# give a worse score on the non-outliers as compared to the huber
# regressor.
ridge = Ridge(alpha=0.01)
ridge.fit(X, y)
ridge_score = ridge.score(X[mask], y[mask])
ridge_outlier_score = ridge.score(X[~mask], y[~mask])
assert huber_score > ridge_score
# The huber model should also fit poorly on the outliers.
assert ridge_outlier_score > huber_outlier_score
def test_huber_bool():
# Test that it does not crash with bool data
X, y = make_regression(n_samples=200, n_features=2, noise=4.0, random_state=0)
X_bool = X > 0
HuberRegressor().fit(X_bool, y)

View File

@@ -0,0 +1,870 @@
import warnings
import numpy as np
import pytest
from scipy import linalg
from sklearn import datasets, linear_model
from sklearn.base import clone
from sklearn.exceptions import ConvergenceWarning
from sklearn.linear_model import (
Lars,
LarsCV,
LassoLars,
LassoLarsCV,
LassoLarsIC,
lars_path,
)
from sklearn.linear_model._least_angle import _lars_path_residues
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.utils._testing import (
TempMemmap,
assert_allclose,
assert_array_almost_equal,
ignore_warnings,
)
# TODO: use another dataset that has multiple drops
diabetes = datasets.load_diabetes()
X, y = diabetes.data, diabetes.target
G = np.dot(X.T, X)
Xy = np.dot(X.T, y)
n_samples = y.size
def test_simple():
# Principle of Lars is to keep covariances tied and decreasing
# also test verbose output
import sys
from io import StringIO
old_stdout = sys.stdout
try:
sys.stdout = StringIO()
_, _, coef_path_ = linear_model.lars_path(X, y, method="lar", verbose=10)
sys.stdout = old_stdout
for i, coef_ in enumerate(coef_path_.T):
res = y - np.dot(X, coef_)
cov = np.dot(X.T, res)
C = np.max(abs(cov))
eps = 1e-3
ocur = len(cov[C - eps < abs(cov)])
if i < X.shape[1]:
assert ocur == i + 1
else:
# no more than max_pred variables can go into the active set
assert ocur == X.shape[1]
finally:
sys.stdout = old_stdout
def test_simple_precomputed():
# The same, with precomputed Gram matrix
_, _, coef_path_ = linear_model.lars_path(X, y, Gram=G, method="lar")
for i, coef_ in enumerate(coef_path_.T):
res = y - np.dot(X, coef_)
cov = np.dot(X.T, res)
C = np.max(abs(cov))
eps = 1e-3
ocur = len(cov[C - eps < abs(cov)])
if i < X.shape[1]:
assert ocur == i + 1
else:
# no more than max_pred variables can go into the active set
assert ocur == X.shape[1]
def _assert_same_lars_path_result(output1, output2):
assert len(output1) == len(output2)
for o1, o2 in zip(output1, output2):
assert_allclose(o1, o2)
@pytest.mark.parametrize("method", ["lar", "lasso"])
@pytest.mark.parametrize("return_path", [True, False])
def test_lars_path_gram_equivalent(method, return_path):
_assert_same_lars_path_result(
linear_model.lars_path_gram(
Xy=Xy, Gram=G, n_samples=n_samples, method=method, return_path=return_path
),
linear_model.lars_path(X, y, Gram=G, method=method, return_path=return_path),
)
def test_x_none_gram_none_raises_value_error():
# Test that lars_path with no X and Gram raises exception
Xy = np.dot(X.T, y)
with pytest.raises(ValueError, match="X and Gram cannot both be unspecified"):
linear_model.lars_path(None, y, Gram=None, Xy=Xy)
def test_all_precomputed():
# Test that lars_path with precomputed Gram and Xy gives the right answer
G = np.dot(X.T, X)
Xy = np.dot(X.T, y)
for method in "lar", "lasso":
output = linear_model.lars_path(X, y, method=method)
output_pre = linear_model.lars_path(X, y, Gram=G, Xy=Xy, method=method)
for expected, got in zip(output, output_pre):
assert_array_almost_equal(expected, got)
# TODO: remove warning filter when numpy min version >= 2.0.0
@pytest.mark.filterwarnings("ignore: `rcond` parameter will change")
def test_lars_lstsq():
# Test that Lars gives least square solution at the end
# of the path
X1 = 3 * X # use un-normalized dataset
clf = linear_model.LassoLars(alpha=0.0)
clf.fit(X1, y)
coef_lstsq = np.linalg.lstsq(X1, y)[0]
assert_array_almost_equal(clf.coef_, coef_lstsq)
# TODO: remove warning filter when numpy min version >= 2.0.0
@pytest.mark.filterwarnings("ignore: `rcond` parameter will change")
def test_lasso_gives_lstsq_solution():
# Test that Lars Lasso gives least square solution at the end
# of the path
_, _, coef_path_ = linear_model.lars_path(X, y, method="lasso")
coef_lstsq = np.linalg.lstsq(X, y)[0]
assert_array_almost_equal(coef_lstsq, coef_path_[:, -1])
def test_collinearity():
# Check that lars_path is robust to collinearity in input
X = np.array([[3.0, 3.0, 1.0], [2.0, 2.0, 0.0], [1.0, 1.0, 0]])
y = np.array([1.0, 0.0, 0])
rng = np.random.RandomState(0)
f = ignore_warnings
_, _, coef_path_ = f(linear_model.lars_path)(X, y, alpha_min=0.01)
assert not np.isnan(coef_path_).any()
residual = np.dot(X, coef_path_[:, -1]) - y
assert (residual**2).sum() < 1.0 # just make sure it's bounded
n_samples = 10
X = rng.rand(n_samples, 5)
y = np.zeros(n_samples)
_, _, coef_path_ = linear_model.lars_path(
X,
y,
Gram="auto",
copy_X=False,
copy_Gram=False,
alpha_min=0.0,
method="lasso",
verbose=0,
max_iter=500,
)
assert_array_almost_equal(coef_path_, np.zeros_like(coef_path_))
def test_no_path():
# Test that the ``return_path=False`` option returns the correct output
alphas_, _, coef_path_ = linear_model.lars_path(X, y, method="lar")
alpha_, _, coef = linear_model.lars_path(X, y, method="lar", return_path=False)
assert_array_almost_equal(coef, coef_path_[:, -1])
assert alpha_ == alphas_[-1]
def test_no_path_precomputed():
# Test that the ``return_path=False`` option with Gram remains correct
alphas_, _, coef_path_ = linear_model.lars_path(X, y, method="lar", Gram=G)
alpha_, _, coef = linear_model.lars_path(
X, y, method="lar", Gram=G, return_path=False
)
assert_array_almost_equal(coef, coef_path_[:, -1])
assert alpha_ == alphas_[-1]
def test_no_path_all_precomputed():
# Test that the ``return_path=False`` option with Gram and Xy remains
# correct
X, y = 3 * diabetes.data, diabetes.target
G = np.dot(X.T, X)
Xy = np.dot(X.T, y)
alphas_, _, coef_path_ = linear_model.lars_path(
X, y, method="lasso", Xy=Xy, Gram=G, alpha_min=0.9
)
alpha_, _, coef = linear_model.lars_path(
X, y, method="lasso", Gram=G, Xy=Xy, alpha_min=0.9, return_path=False
)
assert_array_almost_equal(coef, coef_path_[:, -1])
assert alpha_ == alphas_[-1]
@pytest.mark.parametrize(
"classifier", [linear_model.Lars, linear_model.LarsCV, linear_model.LassoLarsIC]
)
def test_lars_precompute(classifier):
# Check for different values of precompute
G = np.dot(X.T, X)
clf = classifier(precompute=G)
output_1 = ignore_warnings(clf.fit)(X, y).coef_
for precompute in [True, False, "auto", None]:
clf = classifier(precompute=precompute)
output_2 = clf.fit(X, y).coef_
assert_array_almost_equal(output_1, output_2, decimal=8)
def test_singular_matrix():
# Test when input is a singular matrix
X1 = np.array([[1, 1.0], [1.0, 1.0]])
y1 = np.array([1, 1])
_, _, coef_path = linear_model.lars_path(X1, y1)
assert_array_almost_equal(coef_path.T, [[0, 0], [1, 0]])
def test_rank_deficient_design():
# consistency test that checks that LARS Lasso is handling rank
# deficient input data (with n_features < rank) in the same way
# as coordinate descent Lasso
y = [5, 0, 5]
for X in ([[5, 0], [0, 5], [10, 10]], [[10, 10, 0], [1e-32, 0, 0], [0, 0, 1]]):
# To be able to use the coefs to compute the objective function,
# we need to turn off normalization
lars = linear_model.LassoLars(0.1)
coef_lars_ = lars.fit(X, y).coef_
obj_lars = 1.0 / (2.0 * 3.0) * linalg.norm(
y - np.dot(X, coef_lars_)
) ** 2 + 0.1 * linalg.norm(coef_lars_, 1)
coord_descent = linear_model.Lasso(0.1, tol=1e-6)
coef_cd_ = coord_descent.fit(X, y).coef_
obj_cd = (1.0 / (2.0 * 3.0)) * linalg.norm(
y - np.dot(X, coef_cd_)
) ** 2 + 0.1 * linalg.norm(coef_cd_, 1)
assert obj_lars < obj_cd * (1.0 + 1e-8)
def test_lasso_lars_vs_lasso_cd():
# Test that LassoLars and Lasso using coordinate descent give the
# same results.
X = 3 * diabetes.data
alphas, _, lasso_path = linear_model.lars_path(X, y, method="lasso")
lasso_cd = linear_model.Lasso(fit_intercept=False, tol=1e-8)
for c, a in zip(lasso_path.T, alphas):
if a == 0:
continue
lasso_cd.alpha = a
lasso_cd.fit(X, y)
error = linalg.norm(c - lasso_cd.coef_)
assert error < 0.01
# similar test, with the classifiers
for alpha in np.linspace(1e-2, 1 - 1e-2, 20):
clf1 = linear_model.LassoLars(alpha=alpha).fit(X, y)
clf2 = linear_model.Lasso(alpha=alpha, tol=1e-8).fit(X, y)
err = linalg.norm(clf1.coef_ - clf2.coef_)
assert err < 1e-3
# same test, with normalized data
X = diabetes.data
X = X - X.sum(axis=0)
X /= np.linalg.norm(X, axis=0)
alphas, _, lasso_path = linear_model.lars_path(X, y, method="lasso")
lasso_cd = linear_model.Lasso(fit_intercept=False, tol=1e-8)
for c, a in zip(lasso_path.T, alphas):
if a == 0:
continue
lasso_cd.alpha = a
lasso_cd.fit(X, y)
error = linalg.norm(c - lasso_cd.coef_)
assert error < 0.01
def test_lasso_lars_vs_lasso_cd_early_stopping():
# Test that LassoLars and Lasso using coordinate descent give the
# same results when early stopping is used.
# (test : before, in the middle, and in the last part of the path)
alphas_min = [10, 0.9, 1e-4]
X = diabetes.data
for alpha_min in alphas_min:
alphas, _, lasso_path = linear_model.lars_path(
X, y, method="lasso", alpha_min=alpha_min
)
lasso_cd = linear_model.Lasso(fit_intercept=False, tol=1e-8)
lasso_cd.alpha = alphas[-1]
lasso_cd.fit(X, y)
error = linalg.norm(lasso_path[:, -1] - lasso_cd.coef_)
assert error < 0.01
# same test, with normalization
X = diabetes.data - diabetes.data.sum(axis=0)
X /= np.linalg.norm(X, axis=0)
for alpha_min in alphas_min:
alphas, _, lasso_path = linear_model.lars_path(
X, y, method="lasso", alpha_min=alpha_min
)
lasso_cd = linear_model.Lasso(tol=1e-8)
lasso_cd.alpha = alphas[-1]
lasso_cd.fit(X, y)
error = linalg.norm(lasso_path[:, -1] - lasso_cd.coef_)
assert error < 0.01
def test_lasso_lars_path_length():
# Test that the path length of the LassoLars is right
lasso = linear_model.LassoLars()
lasso.fit(X, y)
lasso2 = linear_model.LassoLars(alpha=lasso.alphas_[2])
lasso2.fit(X, y)
assert_array_almost_equal(lasso.alphas_[:3], lasso2.alphas_)
# Also check that the sequence of alphas is always decreasing
assert np.all(np.diff(lasso.alphas_) < 0)
def test_lasso_lars_vs_lasso_cd_ill_conditioned():
# Test lasso lars on a very ill-conditioned design, and check that
# it does not blow up, and stays somewhat close to a solution given
# by the coordinate descent solver
# Also test that lasso_path (using lars_path output style) gives
# the same result as lars_path and previous lasso output style
# under these conditions.
rng = np.random.RandomState(42)
# Generate data
n, m = 70, 100
k = 5
X = rng.randn(n, m)
w = np.zeros((m, 1))
i = np.arange(0, m)
rng.shuffle(i)
supp = i[:k]
w[supp] = np.sign(rng.randn(k, 1)) * (rng.rand(k, 1) + 1)
y = np.dot(X, w)
sigma = 0.2
y += sigma * rng.rand(*y.shape)
y = y.squeeze()
lars_alphas, _, lars_coef = linear_model.lars_path(X, y, method="lasso")
_, lasso_coef2, _ = linear_model.lasso_path(X, y, alphas=lars_alphas, tol=1e-6)
assert_array_almost_equal(lars_coef, lasso_coef2, decimal=1)
def test_lasso_lars_vs_lasso_cd_ill_conditioned2():
# Create an ill-conditioned situation in which the LARS has to go
# far in the path to converge, and check that LARS and coordinate
# descent give the same answers
# Note it used to be the case that Lars had to use the drop for good
# strategy for this but this is no longer the case with the
# equality_tolerance checks
X = [[1e20, 1e20, 0], [-1e-32, 0, 0], [1, 1, 1]]
y = [10, 10, 1]
alpha = 0.0001
def objective_function(coef):
return 1.0 / (2.0 * len(X)) * linalg.norm(
y - np.dot(X, coef)
) ** 2 + alpha * linalg.norm(coef, 1)
lars = linear_model.LassoLars(alpha=alpha)
warning_message = "Regressors in active set degenerate."
with pytest.warns(ConvergenceWarning, match=warning_message):
lars.fit(X, y)
lars_coef_ = lars.coef_
lars_obj = objective_function(lars_coef_)
coord_descent = linear_model.Lasso(alpha=alpha, tol=1e-4)
cd_coef_ = coord_descent.fit(X, y).coef_
cd_obj = objective_function(cd_coef_)
assert lars_obj < cd_obj * (1.0 + 1e-8)
def test_lars_add_features():
# assure that at least some features get added if necessary
# test for 6d2b4c
# Hilbert matrix
n = 5
H = 1.0 / (np.arange(1, n + 1) + np.arange(n)[:, np.newaxis])
clf = linear_model.Lars(fit_intercept=False).fit(H, np.arange(n))
assert np.all(np.isfinite(clf.coef_))
def test_lars_n_nonzero_coefs(verbose=False):
lars = linear_model.Lars(n_nonzero_coefs=6, verbose=verbose)
lars.fit(X, y)
assert len(lars.coef_.nonzero()[0]) == 6
# The path should be of length 6 + 1 in a Lars going down to 6
# non-zero coefs
assert len(lars.alphas_) == 7
def test_multitarget():
# Assure that estimators receiving multidimensional y do the right thing
Y = np.vstack([y, y**2]).T
n_targets = Y.shape[1]
estimators = [
linear_model.LassoLars(),
linear_model.Lars(),
# regression test for gh-1615
linear_model.LassoLars(fit_intercept=False),
linear_model.Lars(fit_intercept=False),
]
for estimator in estimators:
estimator.fit(X, Y)
Y_pred = estimator.predict(X)
alphas, active, coef, path = (
estimator.alphas_,
estimator.active_,
estimator.coef_,
estimator.coef_path_,
)
for k in range(n_targets):
estimator.fit(X, Y[:, k])
y_pred = estimator.predict(X)
assert_array_almost_equal(alphas[k], estimator.alphas_)
assert_array_almost_equal(active[k], estimator.active_)
assert_array_almost_equal(coef[k], estimator.coef_)
assert_array_almost_equal(path[k], estimator.coef_path_)
assert_array_almost_equal(Y_pred[:, k], y_pred)
def test_lars_cv():
# Test the LassoLarsCV object by checking that the optimal alpha
# increases as the number of samples increases.
# This property is not actually guaranteed in general and is just a
# property of the given dataset, with the given steps chosen.
old_alpha = 0
lars_cv = linear_model.LassoLarsCV()
for length in (400, 200, 100):
X = diabetes.data[:length]
y = diabetes.target[:length]
lars_cv.fit(X, y)
np.testing.assert_array_less(old_alpha, lars_cv.alpha_)
old_alpha = lars_cv.alpha_
assert not hasattr(lars_cv, "n_nonzero_coefs")
def test_lars_cv_max_iter(recwarn):
warnings.simplefilter("always")
with np.errstate(divide="raise", invalid="raise"):
X = diabetes.data
y = diabetes.target
rng = np.random.RandomState(42)
x = rng.randn(len(y))
X = diabetes.data
X = np.c_[X, x, x] # add correlated features
X = StandardScaler().fit_transform(X)
lars_cv = linear_model.LassoLarsCV(max_iter=5, cv=5)
lars_cv.fit(X, y)
# Check that there is no warning in general and no ConvergenceWarning
# in particular.
# Materialize the string representation of the warning to get a more
# informative error message in case of AssertionError.
recorded_warnings = [str(w) for w in recwarn]
assert len(recorded_warnings) == 0
def test_lasso_lars_ic():
# Test the LassoLarsIC object by checking that
# - some good features are selected.
# - alpha_bic > alpha_aic
# - n_nonzero_bic < n_nonzero_aic
lars_bic = linear_model.LassoLarsIC("bic")
lars_aic = linear_model.LassoLarsIC("aic")
rng = np.random.RandomState(42)
X = diabetes.data
X = np.c_[X, rng.randn(X.shape[0], 5)] # add 5 bad features
X = StandardScaler().fit_transform(X)
lars_bic.fit(X, y)
lars_aic.fit(X, y)
nonzero_bic = np.where(lars_bic.coef_)[0]
nonzero_aic = np.where(lars_aic.coef_)[0]
assert lars_bic.alpha_ > lars_aic.alpha_
assert len(nonzero_bic) < len(nonzero_aic)
assert np.max(nonzero_bic) < diabetes.data.shape[1]
def test_lars_path_readonly_data():
# When using automated memory mapping on large input, the
# fold data is in read-only mode
# This is a non-regression test for:
# https://github.com/scikit-learn/scikit-learn/issues/4597
splitted_data = train_test_split(X, y, random_state=42)
with TempMemmap(splitted_data) as (X_train, X_test, y_train, y_test):
# The following should not fail despite copy=False
_lars_path_residues(X_train, y_train, X_test, y_test, copy=False)
def test_lars_path_positive_constraint():
# this is the main test for the positive parameter on the lars_path method
# the estimator classes just make use of this function
# we do the test on the diabetes dataset
# ensure that we get negative coefficients when positive=False
# and all positive when positive=True
# for method 'lar' (default) and lasso
err_msg = "Positive constraint not supported for 'lar' coding method."
with pytest.raises(ValueError, match=err_msg):
linear_model.lars_path(
diabetes["data"], diabetes["target"], method="lar", positive=True
)
method = "lasso"
_, _, coefs = linear_model.lars_path(
X, y, return_path=True, method=method, positive=False
)
assert coefs.min() < 0
_, _, coefs = linear_model.lars_path(
X, y, return_path=True, method=method, positive=True
)
assert coefs.min() >= 0
# now we gonna test the positive option for all estimator classes
default_parameter = {"fit_intercept": False}
estimator_parameter_map = {
"LassoLars": {"alpha": 0.1},
"LassoLarsCV": {},
"LassoLarsIC": {},
}
def test_estimatorclasses_positive_constraint():
# testing the transmissibility for the positive option of all estimator
# classes in this same function here
default_parameter = {"fit_intercept": False}
estimator_parameter_map = {
"LassoLars": {"alpha": 0.1},
"LassoLarsCV": {},
"LassoLarsIC": {},
}
for estname in estimator_parameter_map:
params = default_parameter.copy()
params.update(estimator_parameter_map[estname])
estimator = getattr(linear_model, estname)(positive=False, **params)
estimator.fit(X, y)
assert estimator.coef_.min() < 0
estimator = getattr(linear_model, estname)(positive=True, **params)
estimator.fit(X, y)
assert min(estimator.coef_) >= 0
def test_lasso_lars_vs_lasso_cd_positive():
# Test that LassoLars and Lasso using coordinate descent give the
# same results when using the positive option
# This test is basically a copy of the above with additional positive
# option. However for the middle part, the comparison of coefficient values
# for a range of alphas, we had to make an adaptations. See below.
# not normalized data
X = 3 * diabetes.data
alphas, _, lasso_path = linear_model.lars_path(X, y, method="lasso", positive=True)
lasso_cd = linear_model.Lasso(fit_intercept=False, tol=1e-8, positive=True)
for c, a in zip(lasso_path.T, alphas):
if a == 0:
continue
lasso_cd.alpha = a
lasso_cd.fit(X, y)
error = linalg.norm(c - lasso_cd.coef_)
assert error < 0.01
# The range of alphas chosen for coefficient comparison here is restricted
# as compared with the above test without the positive option. This is due
# to the circumstance that the Lars-Lasso algorithm does not converge to
# the least-squares-solution for small alphas, see 'Least Angle Regression'
# by Efron et al 2004. The coefficients are typically in congruence up to
# the smallest alpha reached by the Lars-Lasso algorithm and start to
# diverge thereafter. See
# https://gist.github.com/michigraber/7e7d7c75eca694c7a6ff
for alpha in np.linspace(6e-1, 1 - 1e-2, 20):
clf1 = linear_model.LassoLars(
fit_intercept=False, alpha=alpha, positive=True
).fit(X, y)
clf2 = linear_model.Lasso(
fit_intercept=False, alpha=alpha, tol=1e-8, positive=True
).fit(X, y)
err = linalg.norm(clf1.coef_ - clf2.coef_)
assert err < 1e-3
# normalized data
X = diabetes.data - diabetes.data.sum(axis=0)
X /= np.linalg.norm(X, axis=0)
alphas, _, lasso_path = linear_model.lars_path(X, y, method="lasso", positive=True)
lasso_cd = linear_model.Lasso(fit_intercept=False, tol=1e-8, positive=True)
for c, a in zip(lasso_path.T[:-1], alphas[:-1]): # don't include alpha=0
lasso_cd.alpha = a
lasso_cd.fit(X, y)
error = linalg.norm(c - lasso_cd.coef_)
assert error < 0.01
def test_lasso_lars_vs_R_implementation():
# Test that sklearn LassoLars implementation agrees with the LassoLars
# implementation available in R (lars library) when fit_intercept=False.
# Let's generate the data used in the bug report 7778
y = np.array([-6.45006793, -3.51251449, -8.52445396, 6.12277822, -19.42109366])
x = np.array(
[
[0.47299829, 0, 0, 0, 0],
[0.08239882, 0.85784863, 0, 0, 0],
[0.30114139, -0.07501577, 0.80895216, 0, 0],
[-0.01460346, -0.1015233, 0.0407278, 0.80338378, 0],
[-0.69363927, 0.06754067, 0.18064514, -0.0803561, 0.40427291],
]
)
X = x.T
# The R result was obtained using the following code:
#
# library(lars)
# model_lasso_lars = lars(X, t(y), type="lasso", intercept=FALSE,
# trace=TRUE, normalize=FALSE)
# r = t(model_lasso_lars$beta)
#
r = np.array(
[
[
0,
0,
0,
0,
0,
-79.810362809499026,
-83.528788732782829,
-83.777653739190711,
-83.784156932888934,
-84.033390591756657,
],
[0, 0, 0, 0, -0.476624256777266, 0, 0, 0, 0, 0.025219751009936],
[
0,
-3.577397088285891,
-4.702795355871871,
-7.016748621359461,
-7.614898471899412,
-0.336938391359179,
0,
0,
0.001213370600853,
0.048162321585148,
],
[
0,
0,
0,
2.231558436628169,
2.723267514525966,
2.811549786389614,
2.813766976061531,
2.817462468949557,
2.817368178703816,
2.816221090636795,
],
[
0,
0,
-1.218422599914637,
-3.457726183014808,
-4.021304522060710,
-45.827461592423745,
-47.776608869312305,
-47.911561610746404,
-47.914845922736234,
-48.039562334265717,
],
]
)
model_lasso_lars = linear_model.LassoLars(alpha=0, fit_intercept=False)
model_lasso_lars.fit(X, y)
skl_betas = model_lasso_lars.coef_path_
assert_array_almost_equal(r, skl_betas, decimal=12)
@pytest.mark.parametrize("copy_X", [True, False])
def test_lasso_lars_copyX_behaviour(copy_X):
"""
Test that user input regarding copy_X is not being overridden (it was until
at least version 0.21)
"""
lasso_lars = LassoLarsIC(copy_X=copy_X, precompute=False)
rng = np.random.RandomState(0)
X = rng.normal(0, 1, (100, 5))
X_copy = X.copy()
y = X[:, 2]
lasso_lars.fit(X, y)
assert copy_X == np.array_equal(X, X_copy)
@pytest.mark.parametrize("copy_X", [True, False])
def test_lasso_lars_fit_copyX_behaviour(copy_X):
"""
Test that user input to .fit for copy_X overrides default __init__ value
"""
lasso_lars = LassoLarsIC(precompute=False)
rng = np.random.RandomState(0)
X = rng.normal(0, 1, (100, 5))
X_copy = X.copy()
y = X[:, 2]
lasso_lars.fit(X, y, copy_X=copy_X)
assert copy_X == np.array_equal(X, X_copy)
@pytest.mark.parametrize("est", (LassoLars(alpha=1e-3), Lars()))
def test_lars_with_jitter(est):
est = clone(est) # Avoid side effects from previous tests.
# Test that a small amount of jitter helps stability,
# using example provided in issue #2746
X = np.array([[0.0, 0.0, 0.0, -1.0, 0.0], [0.0, -1.0, 0.0, 0.0, 0.0]])
y = [-2.5, -2.5]
expected_coef = [0, 2.5, 0, 2.5, 0]
# set to fit_intercept to False since target is constant and we want check
# the value of coef. coef would be all zeros otherwise.
est.set_params(fit_intercept=False)
est_jitter = clone(est).set_params(jitter=10e-8, random_state=0)
est.fit(X, y)
est_jitter.fit(X, y)
assert np.mean((est.coef_ - est_jitter.coef_) ** 2) > 0.1
np.testing.assert_allclose(est_jitter.coef_, expected_coef, rtol=1e-3)
def test_X_none_gram_not_none():
with pytest.raises(ValueError, match="X cannot be None if Gram is not None"):
lars_path(X=None, y=np.array([1]), Gram=True)
def test_copy_X_with_auto_gram():
# Non-regression test for #17789, `copy_X=True` and Gram='auto' does not
# overwrite X
rng = np.random.RandomState(42)
X = rng.rand(6, 6)
y = rng.rand(6)
X_before = X.copy()
linear_model.lars_path(X, y, Gram="auto", copy_X=True, method="lasso")
# X did not change
assert_allclose(X, X_before)
@pytest.mark.parametrize(
"LARS, has_coef_path, args",
(
(Lars, True, {}),
(LassoLars, True, {}),
(LassoLarsIC, False, {}),
(LarsCV, True, {}),
# max_iter=5 is for avoiding ConvergenceWarning
(LassoLarsCV, True, {"max_iter": 5}),
),
)
@pytest.mark.parametrize("dtype", (np.float32, np.float64))
def test_lars_dtype_match(LARS, has_coef_path, args, dtype):
# The test ensures that the fit method preserves input dtype
rng = np.random.RandomState(0)
X = rng.rand(20, 6).astype(dtype)
y = rng.rand(20).astype(dtype)
model = LARS(**args)
model.fit(X, y)
assert model.coef_.dtype == dtype
if has_coef_path:
assert model.coef_path_.dtype == dtype
assert model.intercept_.dtype == dtype
@pytest.mark.parametrize(
"LARS, has_coef_path, args",
(
(Lars, True, {}),
(LassoLars, True, {}),
(LassoLarsIC, False, {}),
(LarsCV, True, {}),
# max_iter=5 is for avoiding ConvergenceWarning
(LassoLarsCV, True, {"max_iter": 5}),
),
)
def test_lars_numeric_consistency(LARS, has_coef_path, args):
# The test ensures numerical consistency between trained coefficients
# of float32 and float64.
rtol = 1e-5
atol = 1e-5
rng = np.random.RandomState(0)
X_64 = rng.rand(10, 6)
y_64 = rng.rand(10)
model_64 = LARS(**args).fit(X_64, y_64)
model_32 = LARS(**args).fit(X_64.astype(np.float32), y_64.astype(np.float32))
assert_allclose(model_64.coef_, model_32.coef_, rtol=rtol, atol=atol)
if has_coef_path:
assert_allclose(model_64.coef_path_, model_32.coef_path_, rtol=rtol, atol=atol)
assert_allclose(model_64.intercept_, model_32.intercept_, rtol=rtol, atol=atol)
@pytest.mark.parametrize("criterion", ["aic", "bic"])
def test_lassolarsic_alpha_selection(criterion):
"""Check that we properly compute the AIC and BIC score.
In this test, we reproduce the example of the Fig. 2 of Zou et al.
(reference [1] in LassoLarsIC) In this example, only 7 features should be
selected.
"""
model = make_pipeline(StandardScaler(), LassoLarsIC(criterion=criterion))
model.fit(X, y)
best_alpha_selected = np.argmin(model[-1].criterion_)
assert best_alpha_selected == 7
@pytest.mark.parametrize("fit_intercept", [True, False])
def test_lassolarsic_noise_variance(fit_intercept):
"""Check the behaviour when `n_samples` < `n_features` and that one needs
to provide the noise variance."""
rng = np.random.RandomState(0)
X, y = datasets.make_regression(
n_samples=10, n_features=11 - fit_intercept, random_state=rng
)
model = make_pipeline(StandardScaler(), LassoLarsIC(fit_intercept=fit_intercept))
err_msg = (
"You are using LassoLarsIC in the case where the number of samples is smaller"
" than the number of features"
)
with pytest.raises(ValueError, match=err_msg):
model.fit(X, y)
model.set_params(lassolarsic__noise_variance=1.0)
model.fit(X, y).predict(X)

View File

@@ -0,0 +1,510 @@
"""
Tests for LinearModelLoss
Note that correctness of losses (which compose LinearModelLoss) is already well
covered in the _loss module.
"""
import numpy as np
import pytest
from numpy.testing import assert_allclose
from scipy import linalg, optimize
from sklearn._loss.loss import (
HalfBinomialLoss,
HalfMultinomialLoss,
HalfPoissonLoss,
)
from sklearn.datasets import make_low_rank_matrix
from sklearn.linear_model._linear_loss import LinearModelLoss
from sklearn.utils.extmath import squared_norm
from sklearn.utils.fixes import CSR_CONTAINERS
# We do not need to test all losses, just what LinearModelLoss does on top of the
# base losses.
LOSSES = [HalfBinomialLoss, HalfMultinomialLoss, HalfPoissonLoss]
def random_X_y_coef(
linear_model_loss, n_samples, n_features, coef_bound=(-2, 2), seed=42
):
"""Random generate y, X and coef in valid range."""
rng = np.random.RandomState(seed)
n_dof = n_features + linear_model_loss.fit_intercept
X = make_low_rank_matrix(
n_samples=n_samples,
n_features=n_features,
random_state=rng,
)
coef = linear_model_loss.init_zero_coef(X)
if linear_model_loss.base_loss.is_multiclass:
n_classes = linear_model_loss.base_loss.n_classes
coef.flat[:] = rng.uniform(
low=coef_bound[0],
high=coef_bound[1],
size=n_classes * n_dof,
)
if linear_model_loss.fit_intercept:
raw_prediction = X @ coef[:, :-1].T + coef[:, -1]
else:
raw_prediction = X @ coef.T
proba = linear_model_loss.base_loss.link.inverse(raw_prediction)
# y = rng.choice(np.arange(n_classes), p=proba) does not work.
# See https://stackoverflow.com/a/34190035/16761084
def choice_vectorized(items, p):
s = p.cumsum(axis=1)
r = rng.rand(p.shape[0])[:, None]
k = (s < r).sum(axis=1)
return items[k]
y = choice_vectorized(np.arange(n_classes), p=proba).astype(np.float64)
else:
coef.flat[:] = rng.uniform(
low=coef_bound[0],
high=coef_bound[1],
size=n_dof,
)
if linear_model_loss.fit_intercept:
raw_prediction = X @ coef[:-1] + coef[-1]
else:
raw_prediction = X @ coef
y = linear_model_loss.base_loss.link.inverse(
raw_prediction + rng.uniform(low=-1, high=1, size=n_samples)
)
return X, y, coef
@pytest.mark.parametrize("base_loss", LOSSES)
@pytest.mark.parametrize("fit_intercept", [False, True])
@pytest.mark.parametrize("n_features", [0, 1, 10])
@pytest.mark.parametrize("dtype", [None, np.float32, np.float64, np.int64])
def test_init_zero_coef(
base_loss, fit_intercept, n_features, dtype, global_random_seed
):
"""Test that init_zero_coef initializes coef correctly."""
loss = LinearModelLoss(base_loss=base_loss(), fit_intercept=fit_intercept)
rng = np.random.RandomState(global_random_seed)
X = rng.normal(size=(5, n_features))
coef = loss.init_zero_coef(X, dtype=dtype)
if loss.base_loss.is_multiclass:
n_classes = loss.base_loss.n_classes
assert coef.shape == (n_classes, n_features + fit_intercept)
assert coef.flags["F_CONTIGUOUS"]
else:
assert coef.shape == (n_features + fit_intercept,)
if dtype is None:
assert coef.dtype == X.dtype
else:
assert coef.dtype == dtype
assert np.count_nonzero(coef) == 0
@pytest.mark.parametrize("base_loss", LOSSES)
@pytest.mark.parametrize("fit_intercept", [False, True])
@pytest.mark.parametrize("sample_weight", [None, "range"])
@pytest.mark.parametrize("l2_reg_strength", [0, 1])
@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
def test_loss_grad_hess_are_the_same(
base_loss,
fit_intercept,
sample_weight,
l2_reg_strength,
csr_container,
global_random_seed,
):
"""Test that loss and gradient are the same across different functions."""
loss = LinearModelLoss(base_loss=base_loss(), fit_intercept=fit_intercept)
X, y, coef = random_X_y_coef(
linear_model_loss=loss, n_samples=10, n_features=5, seed=global_random_seed
)
X_old, y_old, coef_old = X.copy(), y.copy(), coef.copy()
if sample_weight == "range":
sample_weight = np.linspace(1, y.shape[0], num=y.shape[0])
l1 = loss.loss(
coef, X, y, sample_weight=sample_weight, l2_reg_strength=l2_reg_strength
)
g1 = loss.gradient(
coef, X, y, sample_weight=sample_weight, l2_reg_strength=l2_reg_strength
)
l2, g2 = loss.loss_gradient(
coef, X, y, sample_weight=sample_weight, l2_reg_strength=l2_reg_strength
)
g3, h3 = loss.gradient_hessian_product(
coef, X, y, sample_weight=sample_weight, l2_reg_strength=l2_reg_strength
)
g4, h4, _ = loss.gradient_hessian(
coef, X, y, sample_weight=sample_weight, l2_reg_strength=l2_reg_strength
)
assert_allclose(l1, l2)
assert_allclose(g1, g2)
assert_allclose(g1, g3)
assert_allclose(g1, g4)
# The ravelling only takes effect for multiclass.
assert_allclose(h4 @ g4.ravel(order="F"), h3(g3).ravel(order="F"))
# Test that gradient_out and hessian_out are considered properly.
g_out = np.empty_like(coef)
h_out = np.empty_like(coef, shape=(coef.size, coef.size))
g5, h5, _ = loss.gradient_hessian(
coef,
X,
y,
sample_weight=sample_weight,
l2_reg_strength=l2_reg_strength,
gradient_out=g_out,
hessian_out=h_out,
)
assert np.shares_memory(g5, g_out)
assert np.shares_memory(h5, h_out)
assert_allclose(g5, g_out)
assert_allclose(h5, h_out)
assert_allclose(g1, g5)
assert_allclose(h5, h4)
# same for sparse X
Xs = csr_container(X)
l1_sp = loss.loss(
coef, Xs, y, sample_weight=sample_weight, l2_reg_strength=l2_reg_strength
)
g1_sp = loss.gradient(
coef, Xs, y, sample_weight=sample_weight, l2_reg_strength=l2_reg_strength
)
l2_sp, g2_sp = loss.loss_gradient(
coef, Xs, y, sample_weight=sample_weight, l2_reg_strength=l2_reg_strength
)
g3_sp, h3_sp = loss.gradient_hessian_product(
coef, Xs, y, sample_weight=sample_weight, l2_reg_strength=l2_reg_strength
)
g4_sp, h4_sp, _ = loss.gradient_hessian(
coef, Xs, y, sample_weight=sample_weight, l2_reg_strength=l2_reg_strength
)
assert_allclose(l1, l1_sp)
assert_allclose(l1, l2_sp)
assert_allclose(g1, g1_sp)
assert_allclose(g1, g2_sp)
assert_allclose(g1, g3_sp)
assert_allclose(h3(g1), h3_sp(g1_sp))
assert_allclose(g1, g4_sp)
assert_allclose(h4, h4_sp)
# X, y and coef should not have changed
assert_allclose(X, X_old)
assert_allclose(Xs.toarray(), X_old)
assert_allclose(y, y_old)
assert_allclose(coef, coef_old)
@pytest.mark.parametrize("base_loss", LOSSES)
@pytest.mark.parametrize("sample_weight", [None, "range"])
@pytest.mark.parametrize("l2_reg_strength", [0, 1])
@pytest.mark.parametrize("X_container", CSR_CONTAINERS + [None])
def test_loss_gradients_hessp_intercept(
base_loss, sample_weight, l2_reg_strength, X_container, global_random_seed
):
"""Test that loss and gradient handle intercept correctly."""
loss = LinearModelLoss(base_loss=base_loss(), fit_intercept=False)
loss_inter = LinearModelLoss(base_loss=base_loss(), fit_intercept=True)
n_samples, n_features = 10, 5
X, y, coef = random_X_y_coef(
linear_model_loss=loss,
n_samples=n_samples,
n_features=n_features,
seed=global_random_seed,
)
X[:, -1] = 1 # make last column of 1 to mimic intercept term
X_inter = X[
:, :-1
] # exclude intercept column as it is added automatically by loss_inter
if X_container is not None:
X = X_container(X)
if sample_weight == "range":
sample_weight = np.linspace(1, y.shape[0], num=y.shape[0])
l, g = loss.loss_gradient(
coef, X, y, sample_weight=sample_weight, l2_reg_strength=l2_reg_strength
)
_, hessp = loss.gradient_hessian_product(
coef, X, y, sample_weight=sample_weight, l2_reg_strength=l2_reg_strength
)
l_inter, g_inter = loss_inter.loss_gradient(
coef, X_inter, y, sample_weight=sample_weight, l2_reg_strength=l2_reg_strength
)
_, hessp_inter = loss_inter.gradient_hessian_product(
coef, X_inter, y, sample_weight=sample_weight, l2_reg_strength=l2_reg_strength
)
# Note, that intercept gets no L2 penalty.
assert l == pytest.approx(
l_inter + 0.5 * l2_reg_strength * squared_norm(coef.T[-1])
)
g_inter_corrected = g_inter
g_inter_corrected.T[-1] += l2_reg_strength * coef.T[-1]
assert_allclose(g, g_inter_corrected)
s = np.random.RandomState(global_random_seed).randn(*coef.shape)
h = hessp(s)
h_inter = hessp_inter(s)
h_inter_corrected = h_inter
h_inter_corrected.T[-1] += l2_reg_strength * s.T[-1]
assert_allclose(h, h_inter_corrected)
@pytest.mark.parametrize("base_loss", LOSSES)
@pytest.mark.parametrize("fit_intercept", [False, True])
@pytest.mark.parametrize("sample_weight", [None, "range"])
@pytest.mark.parametrize("l2_reg_strength", [0, 1])
def test_gradients_hessians_numerically(
base_loss, fit_intercept, sample_weight, l2_reg_strength, global_random_seed
):
"""Test gradients and hessians with numerical derivatives.
Gradient should equal the numerical derivatives of the loss function.
Hessians should equal the numerical derivatives of gradients.
"""
loss = LinearModelLoss(base_loss=base_loss(), fit_intercept=fit_intercept)
n_samples, n_features = 10, 5
X, y, coef = random_X_y_coef(
linear_model_loss=loss,
n_samples=n_samples,
n_features=n_features,
seed=global_random_seed,
)
coef = coef.ravel(order="F") # this is important only for multinomial loss
if sample_weight == "range":
sample_weight = np.linspace(1, y.shape[0], num=y.shape[0])
# 1. Check gradients numerically
eps = 1e-6
g, hessp = loss.gradient_hessian_product(
coef, X, y, sample_weight=sample_weight, l2_reg_strength=l2_reg_strength
)
# Use a trick to get central finite difference of accuracy 4 (five-point stencil)
# https://en.wikipedia.org/wiki/Numerical_differentiation
# https://en.wikipedia.org/wiki/Finite_difference_coefficient
# approx_g1 = (f(x + eps) - f(x - eps)) / (2*eps)
approx_g1 = optimize.approx_fprime(
coef,
lambda coef: loss.loss(
coef - eps,
X,
y,
sample_weight=sample_weight,
l2_reg_strength=l2_reg_strength,
),
2 * eps,
)
# approx_g2 = (f(x + 2*eps) - f(x - 2*eps)) / (4*eps)
approx_g2 = optimize.approx_fprime(
coef,
lambda coef: loss.loss(
coef - 2 * eps,
X,
y,
sample_weight=sample_weight,
l2_reg_strength=l2_reg_strength,
),
4 * eps,
)
# Five-point stencil approximation
# See: https://en.wikipedia.org/wiki/Five-point_stencil#1D_first_derivative
approx_g = (4 * approx_g1 - approx_g2) / 3
assert_allclose(g, approx_g, rtol=1e-2, atol=1e-8)
# 2. Check hessp numerically along the second direction of the gradient
vector = np.zeros_like(g)
vector[1] = 1
hess_col = hessp(vector)
# Computation of the Hessian is particularly fragile to numerical errors when doing
# simple finite differences. Here we compute the grad along a path in the direction
# of the vector and then use a least-square regression to estimate the slope
eps = 1e-3
d_x = np.linspace(-eps, eps, 30)
d_grad = np.array(
[
loss.gradient(
coef + t * vector,
X,
y,
sample_weight=sample_weight,
l2_reg_strength=l2_reg_strength,
)
for t in d_x
]
)
d_grad -= d_grad.mean(axis=0)
approx_hess_col = linalg.lstsq(d_x[:, np.newaxis], d_grad)[0].ravel()
assert_allclose(approx_hess_col, hess_col, rtol=1e-3)
@pytest.mark.parametrize("fit_intercept", [False, True])
def test_multinomial_coef_shape(fit_intercept, global_random_seed):
"""Test that multinomial LinearModelLoss respects shape of coef."""
loss = LinearModelLoss(base_loss=HalfMultinomialLoss(), fit_intercept=fit_intercept)
n_samples, n_features = 10, 5
X, y, coef = random_X_y_coef(
linear_model_loss=loss,
n_samples=n_samples,
n_features=n_features,
seed=global_random_seed,
)
s = np.random.RandomState(global_random_seed).randn(*coef.shape)
l, g = loss.loss_gradient(coef, X, y)
g1 = loss.gradient(coef, X, y)
g2, hessp = loss.gradient_hessian_product(coef, X, y)
h = hessp(s)
assert g.shape == coef.shape
assert h.shape == coef.shape
assert_allclose(g, g1)
assert_allclose(g, g2)
g3, hess, _ = loss.gradient_hessian(coef, X, y)
assert g3.shape == coef.shape
# But full hessian is always 2d.
assert hess.shape == (coef.size, coef.size)
coef_r = coef.ravel(order="F")
s_r = s.ravel(order="F")
l_r, g_r = loss.loss_gradient(coef_r, X, y)
g1_r = loss.gradient(coef_r, X, y)
g2_r, hessp_r = loss.gradient_hessian_product(coef_r, X, y)
h_r = hessp_r(s_r)
assert g_r.shape == coef_r.shape
assert h_r.shape == coef_r.shape
assert_allclose(g_r, g1_r)
assert_allclose(g_r, g2_r)
assert_allclose(g, g_r.reshape(loss.base_loss.n_classes, -1, order="F"))
assert_allclose(h, h_r.reshape(loss.base_loss.n_classes, -1, order="F"))
@pytest.mark.parametrize("sample_weight", [None, "range"])
def test_multinomial_hessian_3_classes(sample_weight, global_random_seed):
"""Test multinomial hessian for 3 classes and 2 points.
For n_classes = 3 and n_samples = 2, we have
p0 = [p0_0, p0_1]
p1 = [p1_0, p1_1]
p2 = [p2_0, p2_1]
and with 2 x 2 diagonal subblocks
H = [p0 * (1-p0), -p0 * p1, -p0 * p2]
[ -p0 * p1, p1 * (1-p1), -p1 * p2]
[ -p0 * p2, -p1 * p2, p2 * (1-p2)]
hess = X' H X
"""
n_samples, n_features, n_classes = 2, 5, 3
loss = LinearModelLoss(
base_loss=HalfMultinomialLoss(n_classes=n_classes), fit_intercept=False
)
X, y, coef = random_X_y_coef(
linear_model_loss=loss,
n_samples=n_samples,
n_features=n_features,
seed=global_random_seed,
)
coef = coef.ravel(order="F") # this is important only for multinomial loss
if sample_weight == "range":
sample_weight = np.linspace(1, y.shape[0], num=y.shape[0])
grad, hess, _ = loss.gradient_hessian(
coef,
X,
y,
sample_weight=sample_weight,
l2_reg_strength=0,
)
# Hessian must be a symmetrix matrix.
assert_allclose(hess, hess.T)
weights, intercept, raw_prediction = loss.weight_intercept_raw(coef, X)
grad_pointwise, proba = loss.base_loss.gradient_proba(
y_true=y,
raw_prediction=raw_prediction,
sample_weight=sample_weight,
)
p0d, p1d, p2d, oned = (
np.diag(proba[:, 0]),
np.diag(proba[:, 1]),
np.diag(proba[:, 2]),
np.diag(np.ones(2)),
)
h = np.block(
[
[p0d * (oned - p0d), -p0d * p1d, -p0d * p2d],
[-p0d * p1d, p1d * (oned - p1d), -p1d * p2d],
[-p0d * p2d, -p1d * p2d, p2d * (oned - p2d)],
]
)
h = h.reshape((n_classes, n_samples, n_classes, n_samples))
if sample_weight is None:
h /= n_samples
else:
h *= sample_weight / np.sum(sample_weight)
# hess_expected.shape = (n_features, n_classes, n_classes, n_features)
hess_expected = np.einsum("ij, mini, ik->jmnk", X, h, X)
hess_expected = np.moveaxis(hess_expected, 2, 3)
hess_expected = hess_expected.reshape(
n_classes * n_features, n_classes * n_features, order="C"
)
assert_allclose(hess_expected, hess_expected.T)
assert_allclose(hess, hess_expected)
def test_linear_loss_gradient_hessian_raises_wrong_out_parameters():
"""Test that wrong gradient_out and hessian_out raises errors."""
n_samples, n_features, n_classes = 5, 2, 3
loss = LinearModelLoss(base_loss=HalfBinomialLoss(), fit_intercept=False)
X = np.ones((n_samples, n_features))
y = np.ones(n_samples)
coef = loss.init_zero_coef(X)
gradient_out = np.zeros(1)
with pytest.raises(
ValueError, match="gradient_out is required to have shape coef.shape"
):
loss.gradient_hessian(
coef=coef,
X=X,
y=y,
gradient_out=gradient_out,
hessian_out=None,
)
hessian_out = np.zeros(1)
with pytest.raises(ValueError, match="hessian_out is required to have shape"):
loss.gradient_hessian(
coef=coef,
X=X,
y=y,
gradient_out=None,
hessian_out=hessian_out,
)
loss = LinearModelLoss(base_loss=HalfMultinomialLoss(), fit_intercept=False)
coef = loss.init_zero_coef(X)
gradient_out = np.zeros((2 * n_classes, n_features))[::2]
with pytest.raises(ValueError, match="gradient_out must be F-contiguous"):
loss.gradient_hessian(
coef=coef,
X=X,
y=y,
gradient_out=gradient_out,
)
hessian_out = np.zeros((2 * n_classes * n_features, n_classes * n_features))[::2]
with pytest.raises(ValueError, match="hessian_out must be contiguous"):
loss.gradient_hessian(
coef=coef,
X=X,
y=y,
gradient_out=None,
hessian_out=hessian_out,
)

View File

@@ -0,0 +1,273 @@
# Authors: The scikit-learn developers
# SPDX-License-Identifier: BSD-3-Clause
import numpy as np
import pytest
from sklearn.datasets import make_sparse_coded_signal
from sklearn.linear_model import (
LinearRegression,
OrthogonalMatchingPursuit,
OrthogonalMatchingPursuitCV,
orthogonal_mp,
orthogonal_mp_gram,
)
from sklearn.utils import check_random_state
from sklearn.utils._testing import (
assert_allclose,
assert_array_almost_equal,
assert_array_equal,
ignore_warnings,
)
n_samples, n_features, n_nonzero_coefs, n_targets = 25, 35, 5, 3
y, X, gamma = make_sparse_coded_signal(
n_samples=n_targets,
n_components=n_features,
n_features=n_samples,
n_nonzero_coefs=n_nonzero_coefs,
random_state=0,
)
y, X, gamma = y.T, X.T, gamma.T
# Make X not of norm 1 for testing
X *= 10
y *= 10
G, Xy = np.dot(X.T, X), np.dot(X.T, y)
# this makes X (n_samples, n_features)
# and y (n_samples, 3)
def test_correct_shapes():
assert orthogonal_mp(X, y[:, 0], n_nonzero_coefs=5).shape == (n_features,)
assert orthogonal_mp(X, y, n_nonzero_coefs=5).shape == (n_features, 3)
def test_correct_shapes_gram():
assert orthogonal_mp_gram(G, Xy[:, 0], n_nonzero_coefs=5).shape == (n_features,)
assert orthogonal_mp_gram(G, Xy, n_nonzero_coefs=5).shape == (n_features, 3)
def test_n_nonzero_coefs():
assert np.count_nonzero(orthogonal_mp(X, y[:, 0], n_nonzero_coefs=5)) <= 5
assert (
np.count_nonzero(orthogonal_mp(X, y[:, 0], n_nonzero_coefs=5, precompute=True))
<= 5
)
def test_tol():
tol = 0.5
gamma = orthogonal_mp(X, y[:, 0], tol=tol)
gamma_gram = orthogonal_mp(X, y[:, 0], tol=tol, precompute=True)
assert np.sum((y[:, 0] - np.dot(X, gamma)) ** 2) <= tol
assert np.sum((y[:, 0] - np.dot(X, gamma_gram)) ** 2) <= tol
def test_with_without_gram():
assert_array_almost_equal(
orthogonal_mp(X, y, n_nonzero_coefs=5),
orthogonal_mp(X, y, n_nonzero_coefs=5, precompute=True),
)
def test_with_without_gram_tol():
assert_array_almost_equal(
orthogonal_mp(X, y, tol=1.0), orthogonal_mp(X, y, tol=1.0, precompute=True)
)
def test_unreachable_accuracy():
assert_array_almost_equal(
orthogonal_mp(X, y, tol=0), orthogonal_mp(X, y, n_nonzero_coefs=n_features)
)
warning_message = (
"Orthogonal matching pursuit ended prematurely "
"due to linear dependence in the dictionary. "
"The requested precision might not have been met."
)
with pytest.warns(RuntimeWarning, match=warning_message):
assert_array_almost_equal(
orthogonal_mp(X, y, tol=0, precompute=True),
orthogonal_mp(X, y, precompute=True, n_nonzero_coefs=n_features),
)
@pytest.mark.parametrize("positional_params", [(X, y), (G, Xy)])
@pytest.mark.parametrize(
"keyword_params",
[{"n_nonzero_coefs": n_features + 1}],
)
def test_bad_input(positional_params, keyword_params):
with pytest.raises(ValueError):
orthogonal_mp(*positional_params, **keyword_params)
def test_perfect_signal_recovery():
(idx,) = gamma[:, 0].nonzero()
gamma_rec = orthogonal_mp(X, y[:, 0], n_nonzero_coefs=5)
gamma_gram = orthogonal_mp_gram(G, Xy[:, 0], n_nonzero_coefs=5)
assert_array_equal(idx, np.flatnonzero(gamma_rec))
assert_array_equal(idx, np.flatnonzero(gamma_gram))
assert_array_almost_equal(gamma[:, 0], gamma_rec, decimal=2)
assert_array_almost_equal(gamma[:, 0], gamma_gram, decimal=2)
def test_orthogonal_mp_gram_readonly():
# Non-regression test for:
# https://github.com/scikit-learn/scikit-learn/issues/5956
(idx,) = gamma[:, 0].nonzero()
G_readonly = G.copy()
G_readonly.setflags(write=False)
Xy_readonly = Xy.copy()
Xy_readonly.setflags(write=False)
gamma_gram = orthogonal_mp_gram(
G_readonly, Xy_readonly[:, 0], n_nonzero_coefs=5, copy_Gram=False, copy_Xy=False
)
assert_array_equal(idx, np.flatnonzero(gamma_gram))
assert_array_almost_equal(gamma[:, 0], gamma_gram, decimal=2)
def test_estimator():
omp = OrthogonalMatchingPursuit(n_nonzero_coefs=n_nonzero_coefs)
omp.fit(X, y[:, 0])
assert omp.coef_.shape == (n_features,)
assert omp.intercept_.shape == ()
assert np.count_nonzero(omp.coef_) <= n_nonzero_coefs
omp.fit(X, y)
assert omp.coef_.shape == (n_targets, n_features)
assert omp.intercept_.shape == (n_targets,)
assert np.count_nonzero(omp.coef_) <= n_targets * n_nonzero_coefs
coef_normalized = omp.coef_[0].copy()
omp.set_params(fit_intercept=True)
omp.fit(X, y[:, 0])
assert_array_almost_equal(coef_normalized, omp.coef_)
omp.set_params(fit_intercept=False)
omp.fit(X, y[:, 0])
assert np.count_nonzero(omp.coef_) <= n_nonzero_coefs
assert omp.coef_.shape == (n_features,)
assert omp.intercept_ == 0
omp.fit(X, y)
assert omp.coef_.shape == (n_targets, n_features)
assert omp.intercept_ == 0
assert np.count_nonzero(omp.coef_) <= n_targets * n_nonzero_coefs
def test_estimator_n_nonzero_coefs():
"""Check `n_nonzero_coefs_` correct when `tol` is and isn't set."""
omp = OrthogonalMatchingPursuit(n_nonzero_coefs=n_nonzero_coefs)
omp.fit(X, y[:, 0])
assert omp.n_nonzero_coefs_ == n_nonzero_coefs
omp = OrthogonalMatchingPursuit(n_nonzero_coefs=n_nonzero_coefs, tol=0.5)
omp.fit(X, y[:, 0])
assert omp.n_nonzero_coefs_ is None
def test_identical_regressors():
newX = X.copy()
newX[:, 1] = newX[:, 0]
gamma = np.zeros(n_features)
gamma[0] = gamma[1] = 1.0
newy = np.dot(newX, gamma)
warning_message = (
"Orthogonal matching pursuit ended prematurely "
"due to linear dependence in the dictionary. "
"The requested precision might not have been met."
)
with pytest.warns(RuntimeWarning, match=warning_message):
orthogonal_mp(newX, newy, n_nonzero_coefs=2)
def test_swapped_regressors():
gamma = np.zeros(n_features)
# X[:, 21] should be selected first, then X[:, 0] selected second,
# which will take X[:, 21]'s place in case the algorithm does
# column swapping for optimization (which is the case at the moment)
gamma[21] = 1.0
gamma[0] = 0.5
new_y = np.dot(X, gamma)
new_Xy = np.dot(X.T, new_y)
gamma_hat = orthogonal_mp(X, new_y, n_nonzero_coefs=2)
gamma_hat_gram = orthogonal_mp_gram(G, new_Xy, n_nonzero_coefs=2)
assert_array_equal(np.flatnonzero(gamma_hat), [0, 21])
assert_array_equal(np.flatnonzero(gamma_hat_gram), [0, 21])
def test_no_atoms():
y_empty = np.zeros_like(y)
Xy_empty = np.dot(X.T, y_empty)
gamma_empty = ignore_warnings(orthogonal_mp)(X, y_empty, n_nonzero_coefs=1)
gamma_empty_gram = ignore_warnings(orthogonal_mp)(G, Xy_empty, n_nonzero_coefs=1)
assert np.all(gamma_empty == 0)
assert np.all(gamma_empty_gram == 0)
def test_omp_path():
path = orthogonal_mp(X, y, n_nonzero_coefs=5, return_path=True)
last = orthogonal_mp(X, y, n_nonzero_coefs=5, return_path=False)
assert path.shape == (n_features, n_targets, 5)
assert_array_almost_equal(path[:, :, -1], last)
path = orthogonal_mp_gram(G, Xy, n_nonzero_coefs=5, return_path=True)
last = orthogonal_mp_gram(G, Xy, n_nonzero_coefs=5, return_path=False)
assert path.shape == (n_features, n_targets, 5)
assert_array_almost_equal(path[:, :, -1], last)
def test_omp_return_path_prop_with_gram():
path = orthogonal_mp(X, y, n_nonzero_coefs=5, return_path=True, precompute=True)
last = orthogonal_mp(X, y, n_nonzero_coefs=5, return_path=False, precompute=True)
assert path.shape == (n_features, n_targets, 5)
assert_array_almost_equal(path[:, :, -1], last)
def test_omp_cv():
y_ = y[:, 0]
gamma_ = gamma[:, 0]
ompcv = OrthogonalMatchingPursuitCV(fit_intercept=False, max_iter=10)
ompcv.fit(X, y_)
assert ompcv.n_nonzero_coefs_ == n_nonzero_coefs
assert_array_almost_equal(ompcv.coef_, gamma_)
omp = OrthogonalMatchingPursuit(
fit_intercept=False, n_nonzero_coefs=ompcv.n_nonzero_coefs_
)
omp.fit(X, y_)
assert_array_almost_equal(ompcv.coef_, omp.coef_)
def test_omp_reaches_least_squares():
# Use small simple data; it's a sanity check but OMP can stop early
rng = check_random_state(0)
n_samples, n_features = (10, 8)
n_targets = 3
X = rng.randn(n_samples, n_features)
Y = rng.randn(n_samples, n_targets)
omp = OrthogonalMatchingPursuit(n_nonzero_coefs=n_features)
lstsq = LinearRegression()
omp.fit(X, Y)
lstsq.fit(X, Y)
assert_array_almost_equal(omp.coef_, lstsq.coef_)
@pytest.mark.parametrize("data_type", (np.float32, np.float64))
def test_omp_gram_dtype_match(data_type):
# verify matching input data type and output data type
coef = orthogonal_mp_gram(
G.astype(data_type), Xy.astype(data_type), n_nonzero_coefs=5
)
assert coef.dtype == data_type
def test_omp_gram_numerical_consistency():
# verify numericaly consistency among np.float32 and np.float64
coef_32 = orthogonal_mp_gram(
G.astype(np.float32), Xy.astype(np.float32), n_nonzero_coefs=5
)
coef_64 = orthogonal_mp_gram(
G.astype(np.float32), Xy.astype(np.float64), n_nonzero_coefs=5
)
assert_allclose(coef_32, coef_64)

View File

@@ -0,0 +1,345 @@
import numpy as np
import pytest
from numpy.testing import assert_allclose
from scipy.sparse import issparse
from sklearn.base import ClassifierMixin
from sklearn.datasets import load_iris, make_classification, make_regression
from sklearn.linear_model import (
PassiveAggressiveClassifier,
PassiveAggressiveRegressor,
SGDClassifier,
SGDRegressor,
)
from sklearn.linear_model._base import SPARSE_INTERCEPT_DECAY
from sklearn.linear_model._stochastic_gradient import DEFAULT_EPSILON
from sklearn.utils import check_random_state
from sklearn.utils._testing import (
assert_almost_equal,
assert_array_equal,
)
from sklearn.utils.fixes import CSR_CONTAINERS
iris = load_iris()
random_state = check_random_state(12)
indices = np.arange(iris.data.shape[0])
random_state.shuffle(indices)
X = iris.data[indices]
y = iris.target[indices]
# TODO(1.10): Move to test_sgd.py
class MyPassiveAggressive(ClassifierMixin):
def __init__(
self,
C=1.0,
epsilon=DEFAULT_EPSILON,
loss="hinge",
fit_intercept=True,
n_iter=1,
random_state=None,
):
self.C = C
self.epsilon = epsilon
self.loss = loss
self.fit_intercept = fit_intercept
self.n_iter = n_iter
def fit(self, X, y):
n_samples, n_features = X.shape
self.w = np.zeros(n_features, dtype=np.float64)
self.b = 0.0
# Mimic SGD's behavior for intercept
intercept_decay = 1.0
if issparse(X):
intercept_decay = SPARSE_INTERCEPT_DECAY
X = X.toarray()
for t in range(self.n_iter):
for i in range(n_samples):
p = self.project(X[i])
if self.loss in ("hinge", "squared_hinge"):
loss = max(1 - y[i] * p, 0)
else:
loss = max(np.abs(p - y[i]) - self.epsilon, 0)
sqnorm = np.dot(X[i], X[i])
if self.loss in ("hinge", "epsilon_insensitive"):
step = min(self.C, loss / sqnorm)
elif self.loss in ("squared_hinge", "squared_epsilon_insensitive"):
step = loss / (sqnorm + 1.0 / (2 * self.C))
if self.loss in ("hinge", "squared_hinge"):
step *= y[i]
else:
step *= np.sign(y[i] - p)
self.w += step * X[i]
if self.fit_intercept:
self.b += intercept_decay * step
def project(self, X):
return np.dot(X, self.w) + self.b
@pytest.mark.filterwarnings("ignore::FutureWarning")
@pytest.mark.parametrize("average", [False, True])
@pytest.mark.parametrize("fit_intercept", [True, False])
@pytest.mark.parametrize("csr_container", [None, *CSR_CONTAINERS])
def test_classifier_accuracy(csr_container, fit_intercept, average):
data = csr_container(X) if csr_container is not None else X
clf = PassiveAggressiveClassifier(
C=1.0,
max_iter=30,
fit_intercept=fit_intercept,
random_state=1,
average=average,
tol=None,
)
clf.fit(data, y)
score = clf.score(data, y)
assert score > 0.79
if average:
assert hasattr(clf, "_average_coef")
assert hasattr(clf, "_average_intercept")
assert hasattr(clf, "_standard_intercept")
assert hasattr(clf, "_standard_coef")
@pytest.mark.filterwarnings("ignore::FutureWarning")
@pytest.mark.parametrize("average", [False, True])
@pytest.mark.parametrize("csr_container", [None, *CSR_CONTAINERS])
def test_classifier_partial_fit(csr_container, average):
classes = np.unique(y)
data = csr_container(X) if csr_container is not None else X
clf = PassiveAggressiveClassifier(random_state=0, average=average, max_iter=5)
for t in range(30):
clf.partial_fit(data, y, classes)
score = clf.score(data, y)
assert score > 0.79
if average:
assert hasattr(clf, "_average_coef")
assert hasattr(clf, "_average_intercept")
assert hasattr(clf, "_standard_intercept")
assert hasattr(clf, "_standard_coef")
@pytest.mark.filterwarnings("ignore::FutureWarning")
def test_classifier_refit():
# Classifier can be retrained on different labels and features.
clf = PassiveAggressiveClassifier(max_iter=5).fit(X, y)
assert_array_equal(clf.classes_, np.unique(y))
clf.fit(X[:, :-1], iris.target_names[y])
assert_array_equal(clf.classes_, iris.target_names)
# TODO(1.10): Move to test_sgd.py
@pytest.mark.filterwarnings("ignore::FutureWarning")
@pytest.mark.parametrize("csr_container", [None, *CSR_CONTAINERS])
@pytest.mark.parametrize("loss", ("hinge", "squared_hinge"))
def test_classifier_correctness(loss, csr_container):
y_bin = y.copy()
y_bin[y != 1] = -1
data = csr_container(X) if csr_container is not None else X
clf1 = MyPassiveAggressive(loss=loss, n_iter=4)
clf1.fit(data, y_bin)
clf2 = PassiveAggressiveClassifier(loss=loss, max_iter=4, shuffle=False, tol=None)
clf2.fit(data, y_bin)
assert_allclose(clf1.w, clf2.coef_.ravel())
@pytest.mark.filterwarnings("ignore::FutureWarning")
@pytest.mark.parametrize(
"response_method", ["predict_proba", "predict_log_proba", "transform"]
)
def test_classifier_undefined_methods(response_method):
clf = PassiveAggressiveClassifier(max_iter=100)
with pytest.raises(AttributeError):
getattr(clf, response_method)
@pytest.mark.filterwarnings("ignore::FutureWarning")
def test_class_weights():
# Test class weights.
X2 = np.array([[-1.0, -1.0], [-1.0, 0], [-0.8, -1.0], [1.0, 1.0], [1.0, 0.0]])
y2 = [1, 1, 1, -1, -1]
clf = PassiveAggressiveClassifier(
C=0.1, max_iter=100, class_weight=None, random_state=100
)
clf.fit(X2, y2)
assert_array_equal(clf.predict([[0.2, -1.0]]), np.array([1]))
# we give a small weights to class 1
clf = PassiveAggressiveClassifier(
C=0.1, max_iter=100, class_weight={1: 0.001}, random_state=100
)
clf.fit(X2, y2)
# now the hyperplane should rotate clock-wise and
# the prediction on this point should shift
assert_array_equal(clf.predict([[0.2, -1.0]]), np.array([-1]))
@pytest.mark.filterwarnings("ignore::FutureWarning")
def test_partial_fit_weight_class_balanced():
# partial_fit with class_weight='balanced' not supported
clf = PassiveAggressiveClassifier(class_weight="balanced", max_iter=100)
with pytest.raises(ValueError):
clf.partial_fit(X, y, classes=np.unique(y))
@pytest.mark.filterwarnings("ignore::FutureWarning")
def test_equal_class_weight():
X2 = [[1, 0], [1, 0], [0, 1], [0, 1]]
y2 = [0, 0, 1, 1]
clf = PassiveAggressiveClassifier(C=0.1, tol=None, class_weight=None)
clf.fit(X2, y2)
# Already balanced, so "balanced" weights should have no effect
clf_balanced = PassiveAggressiveClassifier(C=0.1, tol=None, class_weight="balanced")
clf_balanced.fit(X2, y2)
clf_weighted = PassiveAggressiveClassifier(
C=0.1, tol=None, class_weight={0: 0.5, 1: 0.5}
)
clf_weighted.fit(X2, y2)
# should be similar up to some epsilon due to learning rate schedule
assert_almost_equal(clf.coef_, clf_weighted.coef_, decimal=2)
assert_almost_equal(clf.coef_, clf_balanced.coef_, decimal=2)
@pytest.mark.filterwarnings("ignore::FutureWarning")
def test_wrong_class_weight_label():
# ValueError due to wrong class_weight label.
X2 = np.array([[-1.0, -1.0], [-1.0, 0], [-0.8, -1.0], [1.0, 1.0], [1.0, 0.0]])
y2 = [1, 1, 1, -1, -1]
clf = PassiveAggressiveClassifier(class_weight={0: 0.5}, max_iter=100)
with pytest.raises(ValueError):
clf.fit(X2, y2)
@pytest.mark.filterwarnings("ignore::FutureWarning")
@pytest.mark.parametrize("average", [False, True])
@pytest.mark.parametrize("fit_intercept", [True, False])
@pytest.mark.parametrize("csr_container", [None, *CSR_CONTAINERS])
def test_regressor_mse(csr_container, fit_intercept, average):
y_bin = y.copy()
y_bin[y != 1] = -1
data = csr_container(X) if csr_container is not None else X
reg = PassiveAggressiveRegressor(
C=1.0,
fit_intercept=fit_intercept,
random_state=0,
average=average,
max_iter=5,
)
reg.fit(data, y_bin)
pred = reg.predict(data)
assert np.mean((pred - y_bin) ** 2) < 1.7
if average:
assert hasattr(reg, "_average_coef")
assert hasattr(reg, "_average_intercept")
assert hasattr(reg, "_standard_intercept")
assert hasattr(reg, "_standard_coef")
@pytest.mark.filterwarnings("ignore::FutureWarning")
@pytest.mark.parametrize("average", [False, True])
@pytest.mark.parametrize("csr_container", [None, *CSR_CONTAINERS])
def test_regressor_partial_fit(csr_container, average):
y_bin = y.copy()
y_bin[y != 1] = -1
data = csr_container(X) if csr_container is not None else X
reg = PassiveAggressiveRegressor(random_state=0, average=average, max_iter=100)
for t in range(50):
reg.partial_fit(data, y_bin)
pred = reg.predict(data)
assert np.mean((pred - y_bin) ** 2) < 1.7
if average:
assert hasattr(reg, "_average_coef")
assert hasattr(reg, "_average_intercept")
assert hasattr(reg, "_standard_intercept")
assert hasattr(reg, "_standard_coef")
# TODO(1.10): Move to test_sgd.py
@pytest.mark.filterwarnings("ignore::FutureWarning")
@pytest.mark.parametrize("csr_container", [None, *CSR_CONTAINERS])
@pytest.mark.parametrize("loss", ("epsilon_insensitive", "squared_epsilon_insensitive"))
def test_regressor_correctness(loss, csr_container):
y_bin = y.copy()
y_bin[y != 1] = -1
data = csr_container(X) if csr_container is not None else X
reg1 = MyPassiveAggressive(loss=loss, n_iter=4)
reg1.fit(data, y_bin)
reg2 = PassiveAggressiveRegressor(loss=loss, max_iter=4, shuffle=False, tol=None)
reg2.fit(data, y_bin)
assert_allclose(reg1.w, reg2.coef_.ravel())
@pytest.mark.filterwarnings("ignore::FutureWarning")
def test_regressor_undefined_methods():
reg = PassiveAggressiveRegressor(max_iter=100)
with pytest.raises(AttributeError):
reg.transform(X)
# TODO(1.10): remove
@pytest.mark.parametrize(
"Estimator", [PassiveAggressiveClassifier, PassiveAggressiveRegressor]
)
def test_class_deprecation(Estimator):
# Check that we raise the proper deprecation warning.
with pytest.warns(FutureWarning, match="Class PassiveAggressive.+is deprecated"):
Estimator()
@pytest.mark.parametrize(["loss", "lr"], [("hinge", "pa1"), ("squared_hinge", "pa2")])
def test_passive_aggressive_classifier_vs_sgd(loss, lr):
"""Test that both are equivalent."""
X, y = make_classification(
n_samples=100, n_features=10, n_informative=5, random_state=1234
)
pa = PassiveAggressiveClassifier(loss=loss, C=0.987, random_state=42).fit(X, y)
sgd = SGDClassifier(
loss="hinge", penalty=None, learning_rate=lr, eta0=0.987, random_state=42
).fit(X, y)
assert_allclose(pa.decision_function(X), sgd.decision_function(X))
@pytest.mark.parametrize(
["loss", "lr"],
[("epsilon_insensitive", "pa1"), ("squared_epsilon_insensitive", "pa2")],
)
def test_passive_aggressive_regressor_vs_sgd(loss, lr):
"""Test that both are equivalent."""
X, y = make_regression(
n_samples=100, n_features=10, n_informative=5, random_state=1234
)
pa = PassiveAggressiveRegressor(
loss=loss, epsilon=0.123, C=0.987, random_state=42
).fit(X, y)
sgd = SGDRegressor(
loss="epsilon_insensitive",
epsilon=0.123,
penalty=None,
learning_rate=lr,
eta0=0.987,
random_state=42,
).fit(X, y)
assert_allclose(pa.predict(X), sgd.predict(X))

View File

@@ -0,0 +1,88 @@
import numpy as np
import pytest
from sklearn.datasets import load_iris
from sklearn.linear_model import Perceptron
from sklearn.utils import check_random_state
from sklearn.utils._testing import assert_allclose, assert_array_almost_equal
from sklearn.utils.fixes import CSR_CONTAINERS
iris = load_iris()
random_state = check_random_state(12)
indices = np.arange(iris.data.shape[0])
random_state.shuffle(indices)
X = iris.data[indices]
y = iris.target[indices]
class MyPerceptron:
def __init__(self, n_iter=1):
self.n_iter = n_iter
def fit(self, X, y):
n_samples, n_features = X.shape
self.w = np.zeros(n_features, dtype=np.float64)
self.b = 0.0
for t in range(self.n_iter):
for i in range(n_samples):
if self.predict(X[i])[0] != y[i]:
self.w += y[i] * X[i]
self.b += y[i]
def project(self, X):
return np.dot(X, self.w) + self.b
def predict(self, X):
X = np.atleast_2d(X)
return np.sign(self.project(X))
@pytest.mark.parametrize("container", CSR_CONTAINERS + [np.array])
def test_perceptron_accuracy(container):
data = container(X)
clf = Perceptron(max_iter=100, tol=None, shuffle=False)
clf.fit(data, y)
score = clf.score(data, y)
assert score > 0.7
def test_perceptron_correctness():
y_bin = y.copy()
y_bin[y != 1] = -1
clf1 = MyPerceptron(n_iter=2)
clf1.fit(X, y_bin)
clf2 = Perceptron(max_iter=2, shuffle=False, tol=None)
clf2.fit(X, y_bin)
assert_array_almost_equal(clf1.w, clf2.coef_.ravel())
def test_undefined_methods():
clf = Perceptron(max_iter=100)
for meth in ("predict_proba", "predict_log_proba"):
with pytest.raises(AttributeError):
getattr(clf, meth)
def test_perceptron_l1_ratio():
"""Check that `l1_ratio` has an impact when `penalty='elasticnet'`"""
clf1 = Perceptron(l1_ratio=0, penalty="elasticnet")
clf1.fit(X, y)
clf2 = Perceptron(l1_ratio=0.15, penalty="elasticnet")
clf2.fit(X, y)
assert clf1.score(X, y) != clf2.score(X, y)
# check that the bounds of elastic net which should correspond to an l1 or
# l2 penalty depending of `l1_ratio` value.
clf_l1 = Perceptron(penalty="l1").fit(X, y)
clf_elasticnet = Perceptron(l1_ratio=1, penalty="elasticnet").fit(X, y)
assert_allclose(clf_l1.coef_, clf_elasticnet.coef_)
clf_l2 = Perceptron(penalty="l2").fit(X, y)
clf_elasticnet = Perceptron(l1_ratio=0, penalty="elasticnet").fit(X, y)
assert_allclose(clf_l2.coef_, clf_elasticnet.coef_)

View File

@@ -0,0 +1,283 @@
# Authors: The scikit-learn developers
# SPDX-License-Identifier: BSD-3-Clause
import numpy as np
import pytest
from pytest import approx
from scipy.optimize import minimize
from sklearn.datasets import make_regression
from sklearn.exceptions import ConvergenceWarning
from sklearn.linear_model import HuberRegressor, QuantileRegressor
from sklearn.metrics import mean_pinball_loss
from sklearn.utils._testing import assert_allclose
from sklearn.utils.fixes import (
COO_CONTAINERS,
CSC_CONTAINERS,
CSR_CONTAINERS,
parse_version,
sp_version,
)
@pytest.fixture
def X_y_data():
X, y = make_regression(n_samples=10, n_features=1, random_state=0, noise=1)
return X, y
@pytest.mark.skipif(
parse_version(sp_version.base_version) >= parse_version("1.11"),
reason="interior-point solver is not available in SciPy 1.11",
)
@pytest.mark.parametrize("solver", ["interior-point", "revised simplex"])
@pytest.mark.parametrize("csc_container", CSC_CONTAINERS)
def test_incompatible_solver_for_sparse_input(X_y_data, solver, csc_container):
X, y = X_y_data
X_sparse = csc_container(X)
err_msg = (
f"Solver {solver} does not support sparse X. Use solver 'highs' for example."
)
with pytest.raises(ValueError, match=err_msg):
QuantileRegressor(solver=solver).fit(X_sparse, y)
@pytest.mark.parametrize(
"quantile, alpha, intercept, coef",
[
# for 50% quantile w/o regularization, any slope in [1, 10] is okay
[0.5, 0, 1, None],
# if positive error costs more, the slope is maximal
[0.51, 0, 1, 10],
# if negative error costs more, the slope is minimal
[0.49, 0, 1, 1],
# for a small lasso penalty, the slope is also minimal
[0.5, 0.01, 1, 1],
# for a large lasso penalty, the model predicts the constant median
[0.5, 100, 2, 0],
],
)
def test_quantile_toy_example(quantile, alpha, intercept, coef):
# test how different parameters affect a small intuitive example
X = [[0], [1], [1]]
y = [1, 2, 11]
model = QuantileRegressor(quantile=quantile, alpha=alpha).fit(X, y)
assert_allclose(model.intercept_, intercept, atol=1e-2)
if coef is not None:
assert_allclose(model.coef_[0], coef, atol=1e-2)
if alpha < 100:
assert model.coef_[0] >= 1
assert model.coef_[0] <= 10
@pytest.mark.parametrize("fit_intercept", [True, False])
def test_quantile_equals_huber_for_low_epsilon(fit_intercept):
X, y = make_regression(n_samples=100, n_features=20, random_state=0, noise=1.0)
alpha = 1e-4
huber = HuberRegressor(
epsilon=1 + 1e-4, alpha=alpha, fit_intercept=fit_intercept
).fit(X, y)
quant = QuantileRegressor(alpha=alpha, fit_intercept=fit_intercept).fit(X, y)
assert_allclose(huber.coef_, quant.coef_, atol=1e-1)
if fit_intercept:
assert huber.intercept_ == approx(quant.intercept_, abs=1e-1)
# check that we still predict fraction
assert np.mean(y < quant.predict(X)) == approx(0.5, abs=1e-1)
@pytest.mark.parametrize("q", [0.5, 0.9, 0.05])
def test_quantile_estimates_calibration(q):
# Test that model estimates percentage of points below the prediction
X, y = make_regression(n_samples=1000, n_features=20, random_state=0, noise=1.0)
quant = QuantileRegressor(quantile=q, alpha=0).fit(X, y)
assert np.mean(y < quant.predict(X)) == approx(q, abs=1e-2)
def test_quantile_sample_weight():
# test that with unequal sample weights we still estimate weighted fraction
n = 1000
X, y = make_regression(n_samples=n, n_features=5, random_state=0, noise=10.0)
weight = np.ones(n)
# when we increase weight of upper observations,
# estimate of quantile should go up
weight[y > y.mean()] = 100
quant = QuantileRegressor(quantile=0.5, alpha=1e-8)
quant.fit(X, y, sample_weight=weight)
fraction_below = np.mean(y < quant.predict(X))
assert fraction_below > 0.5
weighted_fraction_below = np.average(y < quant.predict(X), weights=weight)
assert weighted_fraction_below == approx(0.5, abs=3e-2)
@pytest.mark.parametrize("quantile", [0.2, 0.5, 0.8])
def test_asymmetric_error(quantile):
"""Test quantile regression for asymmetric distributed targets."""
n_samples = 1000
rng = np.random.RandomState(42)
X = np.concatenate(
(
np.abs(rng.randn(n_samples)[:, None]),
-rng.randint(2, size=(n_samples, 1)),
),
axis=1,
)
intercept = 1.23
coef = np.array([0.5, -2])
# Take care that X @ coef + intercept > 0
assert np.min(X @ coef + intercept) > 0
# For an exponential distribution with rate lambda, e.g. exp(-lambda * x),
# the quantile at level q is:
# quantile(q) = - log(1 - q) / lambda
# scale = 1/lambda = -quantile(q) / log(1 - q)
y = rng.exponential(
scale=-(X @ coef + intercept) / np.log(1 - quantile), size=n_samples
)
model = QuantileRegressor(
quantile=quantile,
alpha=0,
).fit(X, y)
# This test can be made to pass with any solver but in the interest
# of sparing continuous integration resources, the test is performed
# with the fastest solver only.
assert model.intercept_ == approx(intercept, rel=0.2)
assert_allclose(model.coef_, coef, rtol=0.6)
assert_allclose(np.mean(model.predict(X) > y), quantile, atol=1e-2)
# Now compare to Nelder-Mead optimization with L1 penalty
alpha = 0.01
model.set_params(alpha=alpha).fit(X, y)
model_coef = np.r_[model.intercept_, model.coef_]
def func(coef):
loss = mean_pinball_loss(y, X @ coef[1:] + coef[0], alpha=quantile)
L1 = np.sum(np.abs(coef[1:]))
return loss + alpha * L1
res = minimize(
fun=func,
x0=[1, 0, -1],
method="Nelder-Mead",
tol=1e-12,
options={"maxiter": 2000},
)
assert func(model_coef) == approx(func(res.x))
assert_allclose(model.intercept_, res.x[0])
assert_allclose(model.coef_, res.x[1:])
assert_allclose(np.mean(model.predict(X) > y), quantile, atol=1e-2)
@pytest.mark.parametrize("quantile", [0.2, 0.5, 0.8])
def test_equivariance(quantile):
"""Test equivariace of quantile regression.
See Koenker (2005) Quantile Regression, Chapter 2.2.3.
"""
rng = np.random.RandomState(42)
n_samples, n_features = 100, 5
X, y = make_regression(
n_samples=n_samples,
n_features=n_features,
n_informative=n_features,
noise=0,
random_state=rng,
shuffle=False,
)
# make y asymmetric
y += rng.exponential(scale=100, size=y.shape)
params = dict(alpha=0)
model1 = QuantileRegressor(quantile=quantile, **params).fit(X, y)
# coef(q; a*y, X) = a * coef(q; y, X)
a = 2.5
model2 = QuantileRegressor(quantile=quantile, **params).fit(X, a * y)
assert model2.intercept_ == approx(a * model1.intercept_, rel=1e-5)
assert_allclose(model2.coef_, a * model1.coef_, rtol=1e-5)
# coef(1-q; -a*y, X) = -a * coef(q; y, X)
model2 = QuantileRegressor(quantile=1 - quantile, **params).fit(X, -a * y)
assert model2.intercept_ == approx(-a * model1.intercept_, rel=1e-5)
assert_allclose(model2.coef_, -a * model1.coef_, rtol=1e-5)
# coef(q; y + X @ g, X) = coef(q; y, X) + g
g_intercept, g_coef = rng.randn(), rng.randn(n_features)
model2 = QuantileRegressor(quantile=quantile, **params)
model2.fit(X, y + X @ g_coef + g_intercept)
assert model2.intercept_ == approx(model1.intercept_ + g_intercept)
assert_allclose(model2.coef_, model1.coef_ + g_coef, rtol=1e-6)
# coef(q; y, X @ A) = A^-1 @ coef(q; y, X)
A = rng.randn(n_features, n_features)
model2 = QuantileRegressor(quantile=quantile, **params)
model2.fit(X @ A, y)
assert model2.intercept_ == approx(model1.intercept_, rel=1e-5)
assert_allclose(model2.coef_, np.linalg.solve(A, model1.coef_), rtol=1e-5)
@pytest.mark.skipif(
parse_version(sp_version.base_version) >= parse_version("1.11"),
reason="interior-point solver is not available in SciPy 1.11",
)
@pytest.mark.filterwarnings("ignore:`method='interior-point'` is deprecated")
def test_linprog_failure():
"""Test that linprog fails."""
X = np.linspace(0, 10, num=10).reshape(-1, 1)
y = np.linspace(0, 10, num=10)
reg = QuantileRegressor(
alpha=0, solver="interior-point", solver_options={"maxiter": 1}
)
msg = "Linear programming for QuantileRegressor did not succeed."
with pytest.warns(ConvergenceWarning, match=msg):
reg.fit(X, y)
@pytest.mark.parametrize(
"sparse_container", CSC_CONTAINERS + CSR_CONTAINERS + COO_CONTAINERS
)
@pytest.mark.parametrize("solver", ["highs", "highs-ds", "highs-ipm"])
@pytest.mark.parametrize("fit_intercept", [True, False])
def test_sparse_input(sparse_container, solver, fit_intercept, global_random_seed):
"""Test that sparse and dense X give same results."""
n_informative = 10
quantile_level = 0.6
X, y = make_regression(
n_samples=300,
n_features=20,
n_informative=10,
random_state=global_random_seed,
noise=1.0,
)
X_sparse = sparse_container(X)
alpha = 0.1
quant_dense = QuantileRegressor(
quantile=quantile_level, alpha=alpha, fit_intercept=fit_intercept
).fit(X, y)
quant_sparse = QuantileRegressor(
quantile=quantile_level, alpha=alpha, fit_intercept=fit_intercept, solver=solver
).fit(X_sparse, y)
assert_allclose(quant_sparse.coef_, quant_dense.coef_, rtol=1e-2)
sparse_support = quant_sparse.coef_ != 0
dense_support = quant_dense.coef_ != 0
assert dense_support.sum() == pytest.approx(n_informative, abs=1)
assert sparse_support.sum() == pytest.approx(n_informative, abs=1)
if fit_intercept:
assert quant_sparse.intercept_ == approx(quant_dense.intercept_)
# check that we still predict fraction
empirical_coverage = np.mean(y < quant_sparse.predict(X_sparse))
assert empirical_coverage == approx(quantile_level, abs=3e-2)
def test_error_interior_point_future(X_y_data, monkeypatch):
"""Check that we will raise a proper error when requesting
`solver='interior-point'` in SciPy >= 1.11.
"""
X, y = X_y_data
import sklearn.linear_model._quantile
with monkeypatch.context() as m:
m.setattr(sklearn.linear_model._quantile, "sp_version", parse_version("1.11.0"))
err_msg = "Solver interior-point is not anymore available in SciPy >= 1.11.0."
with pytest.raises(ValueError, match=err_msg):
QuantileRegressor(solver="interior-point").fit(X, y)

View File

@@ -0,0 +1,543 @@
import numpy as np
import pytest
from numpy.testing import assert_array_almost_equal, assert_array_equal
from sklearn.datasets import make_regression
from sklearn.exceptions import ConvergenceWarning
from sklearn.linear_model import (
LinearRegression,
OrthogonalMatchingPursuit,
RANSACRegressor,
Ridge,
)
from sklearn.linear_model._ransac import _dynamic_max_trials
from sklearn.utils import check_random_state
from sklearn.utils._testing import assert_allclose
from sklearn.utils.fixes import COO_CONTAINERS, CSC_CONTAINERS, CSR_CONTAINERS
# Generate coordinates of line
X = np.arange(-200, 200)
y = 0.2 * X + 20
data = np.column_stack([X, y])
# Add some faulty data
rng = np.random.RandomState(1000)
outliers = np.unique(rng.randint(len(X), size=200))
data[outliers, :] += 50 + rng.rand(len(outliers), 2) * 10
X = data[:, 0][:, np.newaxis]
y = data[:, 1]
def test_ransac_inliers_outliers():
estimator = LinearRegression()
ransac_estimator = RANSACRegressor(
estimator, min_samples=2, residual_threshold=5, random_state=0
)
# Estimate parameters of corrupted data
ransac_estimator.fit(X, y)
# Ground truth / reference inlier mask
ref_inlier_mask = np.ones_like(ransac_estimator.inlier_mask_).astype(np.bool_)
ref_inlier_mask[outliers] = False
assert_array_equal(ransac_estimator.inlier_mask_, ref_inlier_mask)
def test_ransac_is_data_valid():
def is_data_valid(X, y):
assert X.shape[0] == 2
assert y.shape[0] == 2
return False
rng = np.random.RandomState(0)
X = rng.rand(10, 2)
y = rng.rand(10, 1)
estimator = LinearRegression()
ransac_estimator = RANSACRegressor(
estimator,
min_samples=2,
residual_threshold=5,
is_data_valid=is_data_valid,
random_state=0,
)
with pytest.raises(ValueError):
ransac_estimator.fit(X, y)
def test_ransac_is_model_valid():
def is_model_valid(estimator, X, y):
assert X.shape[0] == 2
assert y.shape[0] == 2
return False
estimator = LinearRegression()
ransac_estimator = RANSACRegressor(
estimator,
min_samples=2,
residual_threshold=5,
is_model_valid=is_model_valid,
random_state=0,
)
with pytest.raises(ValueError):
ransac_estimator.fit(X, y)
def test_ransac_max_trials():
estimator = LinearRegression()
ransac_estimator = RANSACRegressor(
estimator,
min_samples=2,
residual_threshold=5,
max_trials=0,
random_state=0,
)
with pytest.raises(ValueError):
ransac_estimator.fit(X, y)
# there is a 1e-9 chance it will take these many trials. No good reason
# 1e-2 isn't enough, can still happen
# 2 is the what ransac defines as min_samples = X.shape[1] + 1
max_trials = _dynamic_max_trials(len(X) - len(outliers), X.shape[0], 2, 1 - 1e-9)
ransac_estimator = RANSACRegressor(estimator, min_samples=2)
for i in range(50):
ransac_estimator.set_params(min_samples=2, random_state=i)
ransac_estimator.fit(X, y)
assert ransac_estimator.n_trials_ < max_trials + 1
def test_ransac_stop_n_inliers():
estimator = LinearRegression()
ransac_estimator = RANSACRegressor(
estimator,
min_samples=2,
residual_threshold=5,
stop_n_inliers=2,
random_state=0,
)
ransac_estimator.fit(X, y)
assert ransac_estimator.n_trials_ == 1
def test_ransac_stop_score():
estimator = LinearRegression()
ransac_estimator = RANSACRegressor(
estimator,
min_samples=2,
residual_threshold=5,
stop_score=0,
random_state=0,
)
ransac_estimator.fit(X, y)
assert ransac_estimator.n_trials_ == 1
def test_ransac_score():
X = np.arange(100)[:, None]
y = np.zeros((100,))
y[0] = 1
y[1] = 100
estimator = LinearRegression()
ransac_estimator = RANSACRegressor(
estimator, min_samples=2, residual_threshold=0.5, random_state=0
)
ransac_estimator.fit(X, y)
assert ransac_estimator.score(X[2:], y[2:]) == 1
assert ransac_estimator.score(X[:2], y[:2]) < 1
def test_ransac_predict():
X = np.arange(100)[:, None]
y = np.zeros((100,))
y[0] = 1
y[1] = 100
estimator = LinearRegression()
ransac_estimator = RANSACRegressor(
estimator, min_samples=2, residual_threshold=0.5, random_state=0
)
ransac_estimator.fit(X, y)
assert_array_equal(ransac_estimator.predict(X), np.zeros(100))
def test_ransac_no_valid_data():
def is_data_valid(X, y):
return False
estimator = LinearRegression()
ransac_estimator = RANSACRegressor(
estimator, is_data_valid=is_data_valid, max_trials=5
)
msg = "RANSAC could not find a valid consensus set"
with pytest.raises(ValueError, match=msg):
ransac_estimator.fit(X, y)
assert ransac_estimator.n_skips_no_inliers_ == 0
assert ransac_estimator.n_skips_invalid_data_ == 5
assert ransac_estimator.n_skips_invalid_model_ == 0
def test_ransac_no_valid_model():
def is_model_valid(estimator, X, y):
return False
estimator = LinearRegression()
ransac_estimator = RANSACRegressor(
estimator, is_model_valid=is_model_valid, max_trials=5
)
msg = "RANSAC could not find a valid consensus set"
with pytest.raises(ValueError, match=msg):
ransac_estimator.fit(X, y)
assert ransac_estimator.n_skips_no_inliers_ == 0
assert ransac_estimator.n_skips_invalid_data_ == 0
assert ransac_estimator.n_skips_invalid_model_ == 5
def test_ransac_exceed_max_skips():
def is_data_valid(X, y):
return False
estimator = LinearRegression()
ransac_estimator = RANSACRegressor(
estimator, is_data_valid=is_data_valid, max_trials=5, max_skips=3
)
msg = "RANSAC skipped more iterations than `max_skips`"
with pytest.raises(ValueError, match=msg):
ransac_estimator.fit(X, y)
assert ransac_estimator.n_skips_no_inliers_ == 0
assert ransac_estimator.n_skips_invalid_data_ == 4
assert ransac_estimator.n_skips_invalid_model_ == 0
def test_ransac_warn_exceed_max_skips():
class IsDataValid:
def __init__(self):
self.call_counter = 0
def __call__(self, X, y):
result = self.call_counter == 0
self.call_counter += 1
return result
estimator = LinearRegression()
ransac_estimator = RANSACRegressor(
estimator, is_data_valid=IsDataValid(), max_skips=3, max_trials=5
)
warning_message = (
"RANSAC found a valid consensus set but exited "
"early due to skipping more iterations than "
"`max_skips`. See estimator attributes for "
"diagnostics."
)
with pytest.warns(ConvergenceWarning, match=warning_message):
ransac_estimator.fit(X, y)
assert ransac_estimator.n_skips_no_inliers_ == 0
assert ransac_estimator.n_skips_invalid_data_ == 4
assert ransac_estimator.n_skips_invalid_model_ == 0
@pytest.mark.parametrize(
"sparse_container", COO_CONTAINERS + CSR_CONTAINERS + CSC_CONTAINERS
)
def test_ransac_sparse(sparse_container):
X_sparse = sparse_container(X)
estimator = LinearRegression()
ransac_estimator = RANSACRegressor(
estimator, min_samples=2, residual_threshold=5, random_state=0
)
ransac_estimator.fit(X_sparse, y)
ref_inlier_mask = np.ones_like(ransac_estimator.inlier_mask_).astype(np.bool_)
ref_inlier_mask[outliers] = False
assert_array_equal(ransac_estimator.inlier_mask_, ref_inlier_mask)
def test_ransac_none_estimator():
estimator = LinearRegression()
ransac_estimator = RANSACRegressor(
estimator, min_samples=2, residual_threshold=5, random_state=0
)
ransac_none_estimator = RANSACRegressor(
None, min_samples=2, residual_threshold=5, random_state=0
)
ransac_estimator.fit(X, y)
ransac_none_estimator.fit(X, y)
assert_array_almost_equal(
ransac_estimator.predict(X), ransac_none_estimator.predict(X)
)
def test_ransac_min_n_samples():
estimator = LinearRegression()
ransac_estimator1 = RANSACRegressor(
estimator, min_samples=2, residual_threshold=5, random_state=0
)
ransac_estimator2 = RANSACRegressor(
estimator,
min_samples=2.0 / X.shape[0],
residual_threshold=5,
random_state=0,
)
ransac_estimator5 = RANSACRegressor(
estimator, min_samples=2, residual_threshold=5, random_state=0
)
ransac_estimator6 = RANSACRegressor(estimator, residual_threshold=5, random_state=0)
ransac_estimator7 = RANSACRegressor(
estimator, min_samples=X.shape[0] + 1, residual_threshold=5, random_state=0
)
# GH #19390
ransac_estimator8 = RANSACRegressor(
Ridge(), min_samples=None, residual_threshold=5, random_state=0
)
ransac_estimator1.fit(X, y)
ransac_estimator2.fit(X, y)
ransac_estimator5.fit(X, y)
ransac_estimator6.fit(X, y)
assert_array_almost_equal(
ransac_estimator1.predict(X), ransac_estimator2.predict(X)
)
assert_array_almost_equal(
ransac_estimator1.predict(X), ransac_estimator5.predict(X)
)
assert_array_almost_equal(
ransac_estimator1.predict(X), ransac_estimator6.predict(X)
)
with pytest.raises(ValueError):
ransac_estimator7.fit(X, y)
err_msg = "`min_samples` needs to be explicitly set"
with pytest.raises(ValueError, match=err_msg):
ransac_estimator8.fit(X, y)
def test_ransac_multi_dimensional_targets():
estimator = LinearRegression()
ransac_estimator = RANSACRegressor(
estimator, min_samples=2, residual_threshold=5, random_state=0
)
# 3-D target values
yyy = np.column_stack([y, y, y])
# Estimate parameters of corrupted data
ransac_estimator.fit(X, yyy)
# Ground truth / reference inlier mask
ref_inlier_mask = np.ones_like(ransac_estimator.inlier_mask_).astype(np.bool_)
ref_inlier_mask[outliers] = False
assert_array_equal(ransac_estimator.inlier_mask_, ref_inlier_mask)
def test_ransac_residual_loss():
def loss_multi1(y_true, y_pred):
return np.sum(np.abs(y_true - y_pred), axis=1)
def loss_multi2(y_true, y_pred):
return np.sum((y_true - y_pred) ** 2, axis=1)
def loss_mono(y_true, y_pred):
return np.abs(y_true - y_pred)
yyy = np.column_stack([y, y, y])
estimator = LinearRegression()
ransac_estimator0 = RANSACRegressor(
estimator, min_samples=2, residual_threshold=5, random_state=0
)
ransac_estimator1 = RANSACRegressor(
estimator,
min_samples=2,
residual_threshold=5,
random_state=0,
loss=loss_multi1,
)
ransac_estimator2 = RANSACRegressor(
estimator,
min_samples=2,
residual_threshold=5,
random_state=0,
loss=loss_multi2,
)
# multi-dimensional
ransac_estimator0.fit(X, yyy)
ransac_estimator1.fit(X, yyy)
ransac_estimator2.fit(X, yyy)
assert_array_almost_equal(
ransac_estimator0.predict(X), ransac_estimator1.predict(X)
)
assert_array_almost_equal(
ransac_estimator0.predict(X), ransac_estimator2.predict(X)
)
# one-dimensional
ransac_estimator0.fit(X, y)
ransac_estimator2.loss = loss_mono
ransac_estimator2.fit(X, y)
assert_array_almost_equal(
ransac_estimator0.predict(X), ransac_estimator2.predict(X)
)
ransac_estimator3 = RANSACRegressor(
estimator,
min_samples=2,
residual_threshold=5,
random_state=0,
loss="squared_error",
)
ransac_estimator3.fit(X, y)
assert_array_almost_equal(
ransac_estimator0.predict(X), ransac_estimator2.predict(X)
)
def test_ransac_default_residual_threshold():
estimator = LinearRegression()
ransac_estimator = RANSACRegressor(estimator, min_samples=2, random_state=0)
# Estimate parameters of corrupted data
ransac_estimator.fit(X, y)
# Ground truth / reference inlier mask
ref_inlier_mask = np.ones_like(ransac_estimator.inlier_mask_).astype(np.bool_)
ref_inlier_mask[outliers] = False
assert_array_equal(ransac_estimator.inlier_mask_, ref_inlier_mask)
def test_ransac_dynamic_max_trials():
# Numbers hand-calculated and confirmed on page 119 (Table 4.3) in
# Hartley, R.~I. and Zisserman, A., 2004,
# Multiple View Geometry in Computer Vision, Second Edition,
# Cambridge University Press, ISBN: 0521540518
# e = 0%, min_samples = X
assert _dynamic_max_trials(100, 100, 2, 0.99) == 1
# e = 5%, min_samples = 2
assert _dynamic_max_trials(95, 100, 2, 0.99) == 2
# e = 10%, min_samples = 2
assert _dynamic_max_trials(90, 100, 2, 0.99) == 3
# e = 30%, min_samples = 2
assert _dynamic_max_trials(70, 100, 2, 0.99) == 7
# e = 50%, min_samples = 2
assert _dynamic_max_trials(50, 100, 2, 0.99) == 17
# e = 5%, min_samples = 8
assert _dynamic_max_trials(95, 100, 8, 0.99) == 5
# e = 10%, min_samples = 8
assert _dynamic_max_trials(90, 100, 8, 0.99) == 9
# e = 30%, min_samples = 8
assert _dynamic_max_trials(70, 100, 8, 0.99) == 78
# e = 50%, min_samples = 8
assert _dynamic_max_trials(50, 100, 8, 0.99) == 1177
# e = 0%, min_samples = 10
assert _dynamic_max_trials(1, 100, 10, 0) == 0
assert _dynamic_max_trials(1, 100, 10, 1) == float("inf")
def test_ransac_fit_sample_weight():
ransac_estimator = RANSACRegressor(random_state=0)
n_samples = y.shape[0]
weights = np.ones(n_samples)
ransac_estimator.fit(X, y, sample_weight=weights)
# sanity check
assert ransac_estimator.inlier_mask_.shape[0] == n_samples
ref_inlier_mask = np.ones_like(ransac_estimator.inlier_mask_).astype(np.bool_)
ref_inlier_mask[outliers] = False
# check that mask is correct
assert_array_equal(ransac_estimator.inlier_mask_, ref_inlier_mask)
# check that fit(X) = fit([X1, X2, X3],sample_weight = [n1, n2, n3]) where
# X = X1 repeated n1 times, X2 repeated n2 times and so forth
random_state = check_random_state(0)
X_ = random_state.randint(0, 200, [10, 1])
y_ = np.ndarray.flatten(0.2 * X_ + 2)
sample_weight = random_state.randint(0, 10, 10)
outlier_X = random_state.randint(0, 1000, [1, 1])
outlier_weight = random_state.randint(0, 10, 1)
outlier_y = random_state.randint(-1000, 0, 1)
X_flat = np.append(
np.repeat(X_, sample_weight, axis=0),
np.repeat(outlier_X, outlier_weight, axis=0),
axis=0,
)
y_flat = np.ndarray.flatten(
np.append(
np.repeat(y_, sample_weight, axis=0),
np.repeat(outlier_y, outlier_weight, axis=0),
axis=0,
)
)
ransac_estimator.fit(X_flat, y_flat)
ref_coef_ = ransac_estimator.estimator_.coef_
sample_weight = np.append(sample_weight, outlier_weight)
X_ = np.append(X_, outlier_X, axis=0)
y_ = np.append(y_, outlier_y)
ransac_estimator.fit(X_, y_, sample_weight=sample_weight)
assert_allclose(ransac_estimator.estimator_.coef_, ref_coef_)
# check that if estimator.fit doesn't support
# sample_weight, raises error
estimator = OrthogonalMatchingPursuit()
ransac_estimator = RANSACRegressor(estimator, min_samples=10)
err_msg = f"{estimator.__class__.__name__} does not support sample_weight."
with pytest.raises(ValueError, match=err_msg):
ransac_estimator.fit(X, y, sample_weight=weights)
def test_ransac_final_model_fit_sample_weight():
X, y = make_regression(n_samples=1000, random_state=10)
rng = check_random_state(42)
sample_weight = rng.randint(1, 4, size=y.shape[0])
sample_weight = sample_weight / sample_weight.sum()
ransac = RANSACRegressor(random_state=0)
ransac.fit(X, y, sample_weight=sample_weight)
final_model = LinearRegression()
mask_samples = ransac.inlier_mask_
final_model.fit(
X[mask_samples], y[mask_samples], sample_weight=sample_weight[mask_samples]
)
assert_allclose(ransac.estimator_.coef_, final_model.coef_, atol=1e-12)
def test_perfect_horizontal_line():
"""Check that we can fit a line where all samples are inliers.
Non-regression test for:
https://github.com/scikit-learn/scikit-learn/issues/19497
"""
X = np.arange(100)[:, None]
y = np.zeros((100,))
estimator = LinearRegression()
ransac_estimator = RANSACRegressor(estimator, random_state=0)
ransac_estimator.fit(X, y)
assert_allclose(ransac_estimator.estimator_.coef_, 0.0)
assert_allclose(ransac_estimator.estimator_.intercept_, 0.0)

View File

@@ -0,0 +1,861 @@
# Authors: The scikit-learn developers
# SPDX-License-Identifier: BSD-3-Clause
import math
import re
import numpy as np
import pytest
from sklearn.base import clone
from sklearn.datasets import load_iris, make_blobs, make_classification
from sklearn.linear_model import LogisticRegression, Ridge
from sklearn.linear_model._sag import get_auto_step_size
from sklearn.multiclass import OneVsRestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.utils import check_random_state, compute_class_weight
from sklearn.utils._testing import (
assert_allclose,
assert_almost_equal,
assert_array_almost_equal,
)
from sklearn.utils.extmath import row_norms
from sklearn.utils.fixes import CSR_CONTAINERS
iris = load_iris()
# this is used for sag classification
def log_dloss(p, y):
z = p * y
# approximately equal and saves the computation of the log
if z > 18.0:
return math.exp(-z) * -y
if z < -18.0:
return -y
return -y / (math.exp(z) + 1.0)
def log_loss(p, y):
return np.mean(np.log(1.0 + np.exp(-y * p)))
# this is used for sag regression
def squared_dloss(p, y):
return p - y
def squared_loss(p, y):
return np.mean(0.5 * (p - y) * (p - y))
# function for measuring the log loss
def get_pobj(w, alpha, myX, myy, loss):
w = w.ravel()
pred = np.dot(myX, w)
p = loss(pred, myy)
p += alpha * w.dot(w) / 2.0
return p
def sag(
X,
y,
step_size,
alpha,
n_iter=1,
dloss=None,
sparse=False,
sample_weight=None,
fit_intercept=True,
saga=False,
):
n_samples, n_features = X.shape[0], X.shape[1]
weights = np.zeros(X.shape[1])
sum_gradient = np.zeros(X.shape[1])
gradient_memory = np.zeros((n_samples, n_features))
intercept = 0.0
intercept_sum_gradient = 0.0
intercept_gradient_memory = np.zeros(n_samples)
rng = np.random.RandomState(77)
decay = 1.0
seen = set()
# sparse data has a fixed decay of .01
if sparse:
decay = 0.01
for epoch in range(n_iter):
for k in range(n_samples):
idx = int(rng.rand() * n_samples)
# idx = k
entry = X[idx]
seen.add(idx)
p = np.dot(entry, weights) + intercept
gradient = dloss(p, y[idx])
if sample_weight is not None:
gradient *= sample_weight[idx]
update = entry * gradient + alpha * weights
gradient_correction = update - gradient_memory[idx]
sum_gradient += gradient_correction
gradient_memory[idx] = update
if saga:
weights -= gradient_correction * step_size * (1 - 1.0 / len(seen))
if fit_intercept:
gradient_correction = gradient - intercept_gradient_memory[idx]
intercept_gradient_memory[idx] = gradient
intercept_sum_gradient += gradient_correction
gradient_correction *= step_size * (1.0 - 1.0 / len(seen))
if saga:
intercept -= (
step_size * intercept_sum_gradient / len(seen) * decay
) + gradient_correction
else:
intercept -= step_size * intercept_sum_gradient / len(seen) * decay
weights -= step_size * sum_gradient / len(seen)
return weights, intercept
def sag_sparse(
X,
y,
step_size,
alpha,
n_iter=1,
dloss=None,
sample_weight=None,
sparse=False,
fit_intercept=True,
saga=False,
random_state=0,
):
if step_size * alpha == 1.0:
raise ZeroDivisionError(
"Sparse sag does not handle the case step_size * alpha == 1"
)
n_samples, n_features = X.shape[0], X.shape[1]
weights = np.zeros(n_features)
sum_gradient = np.zeros(n_features)
last_updated = np.zeros(n_features, dtype=int)
gradient_memory = np.zeros(n_samples)
rng = check_random_state(random_state)
intercept = 0.0
intercept_sum_gradient = 0.0
wscale = 1.0
decay = 1.0
seen = set()
c_sum = np.zeros(n_iter * n_samples)
# sparse data has a fixed decay of .01
if sparse:
decay = 0.01
counter = 0
for epoch in range(n_iter):
for k in range(n_samples):
# idx = k
idx = int(rng.rand() * n_samples)
entry = X[idx]
seen.add(idx)
if counter >= 1:
for j in range(n_features):
if last_updated[j] == 0:
weights[j] -= c_sum[counter - 1] * sum_gradient[j]
else:
weights[j] -= (
c_sum[counter - 1] - c_sum[last_updated[j] - 1]
) * sum_gradient[j]
last_updated[j] = counter
p = (wscale * np.dot(entry, weights)) + intercept
gradient = dloss(p, y[idx])
if sample_weight is not None:
gradient *= sample_weight[idx]
update = entry * gradient
gradient_correction = update - (gradient_memory[idx] * entry)
sum_gradient += gradient_correction
if saga:
for j in range(n_features):
weights[j] -= (
gradient_correction[j]
* step_size
* (1 - 1.0 / len(seen))
/ wscale
)
if fit_intercept:
gradient_correction = gradient - gradient_memory[idx]
intercept_sum_gradient += gradient_correction
gradient_correction *= step_size * (1.0 - 1.0 / len(seen))
if saga:
intercept -= (
step_size * intercept_sum_gradient / len(seen) * decay
) + gradient_correction
else:
intercept -= step_size * intercept_sum_gradient / len(seen) * decay
gradient_memory[idx] = gradient
wscale *= 1.0 - alpha * step_size
if counter == 0:
c_sum[0] = step_size / (wscale * len(seen))
else:
c_sum[counter] = c_sum[counter - 1] + step_size / (wscale * len(seen))
if counter >= 1 and wscale < 1e-9:
for j in range(n_features):
if last_updated[j] == 0:
weights[j] -= c_sum[counter] * sum_gradient[j]
else:
weights[j] -= (
c_sum[counter] - c_sum[last_updated[j] - 1]
) * sum_gradient[j]
last_updated[j] = counter + 1
c_sum[counter] = 0
weights *= wscale
wscale = 1.0
counter += 1
for j in range(n_features):
if last_updated[j] == 0:
weights[j] -= c_sum[counter - 1] * sum_gradient[j]
else:
weights[j] -= (
c_sum[counter - 1] - c_sum[last_updated[j] - 1]
) * sum_gradient[j]
weights *= wscale
return weights, intercept
def get_step_size(X, alpha, fit_intercept, classification=True):
if classification:
return 4.0 / (np.max(np.sum(X * X, axis=1)) + fit_intercept + 4.0 * alpha)
else:
return 1.0 / (np.max(np.sum(X * X, axis=1)) + fit_intercept + alpha)
def test_classifier_matching():
n_samples = 20
X, y = make_blobs(n_samples=n_samples, centers=2, random_state=0, cluster_std=0.1)
# y must be 0 or 1
alpha = 1.1
fit_intercept = True
step_size = get_step_size(X, alpha, fit_intercept)
for solver in ["sag", "saga"]:
if solver == "sag":
n_iter = 80
else:
# SAGA variance w.r.t. stream order is higher
n_iter = 300
clf = LogisticRegression(
solver=solver,
fit_intercept=fit_intercept,
tol=1e-11,
C=1.0 / alpha / n_samples,
max_iter=n_iter,
random_state=10,
)
clf.fit(X, y)
weights, intercept = sag_sparse(
X,
2 * y - 1, # y must be -1 or +1
step_size,
alpha,
n_iter=n_iter,
dloss=log_dloss,
fit_intercept=fit_intercept,
saga=solver == "saga",
)
weights2, intercept2 = sag(
X,
2 * y - 1, # y must be -1 or +1
step_size,
alpha,
n_iter=n_iter,
dloss=log_dloss,
fit_intercept=fit_intercept,
saga=solver == "saga",
)
weights = np.atleast_2d(weights)
intercept = np.atleast_1d(intercept)
weights2 = np.atleast_2d(weights2)
intercept2 = np.atleast_1d(intercept2)
assert_array_almost_equal(weights, clf.coef_, decimal=9)
assert_array_almost_equal(intercept, clf.intercept_, decimal=9)
assert_array_almost_equal(weights2, clf.coef_, decimal=9)
assert_array_almost_equal(intercept2, clf.intercept_, decimal=9)
def test_regressor_matching():
n_samples = 10
n_features = 5
rng = np.random.RandomState(10)
X = rng.normal(size=(n_samples, n_features))
true_w = rng.normal(size=n_features)
y = X.dot(true_w)
alpha = 1.0
n_iter = 100
fit_intercept = True
step_size = get_step_size(X, alpha, fit_intercept, classification=False)
clf = Ridge(
fit_intercept=fit_intercept,
tol=0.00000000001,
solver="sag",
alpha=alpha * n_samples,
max_iter=n_iter,
)
clf.fit(X, y)
weights1, intercept1 = sag_sparse(
X,
y,
step_size,
alpha,
n_iter=n_iter,
dloss=squared_dloss,
fit_intercept=fit_intercept,
)
weights2, intercept2 = sag(
X,
y,
step_size,
alpha,
n_iter=n_iter,
dloss=squared_dloss,
fit_intercept=fit_intercept,
)
assert_allclose(weights1, clf.coef_)
assert_allclose(intercept1, clf.intercept_)
assert_allclose(weights2, clf.coef_)
assert_allclose(intercept2, clf.intercept_)
@pytest.mark.filterwarnings("ignore:The max_iter was reached")
@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
def test_sag_pobj_matches_logistic_regression(csr_container):
"""tests if the sag pobj matches log reg"""
n_samples = 100
alpha = 1.0
max_iter = 20
X, y = make_blobs(n_samples=n_samples, centers=2, random_state=0, cluster_std=0.1)
clf1 = LogisticRegression(
solver="sag",
fit_intercept=False,
tol=0.0000001,
C=1.0 / alpha / n_samples,
max_iter=max_iter,
random_state=10,
)
clf2 = clone(clf1)
clf3 = LogisticRegression(
fit_intercept=False,
tol=0.0000001,
C=1.0 / alpha / n_samples,
max_iter=max_iter,
random_state=10,
)
clf1.fit(X, y)
clf2.fit(csr_container(X), y)
clf3.fit(X, y)
pobj1 = get_pobj(clf1.coef_, alpha, X, y, log_loss)
pobj2 = get_pobj(clf2.coef_, alpha, X, y, log_loss)
pobj3 = get_pobj(clf3.coef_, alpha, X, y, log_loss)
assert_array_almost_equal(pobj1, pobj2, decimal=4)
assert_array_almost_equal(pobj2, pobj3, decimal=4)
assert_array_almost_equal(pobj3, pobj1, decimal=4)
@pytest.mark.filterwarnings("ignore:The max_iter was reached")
@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
def test_sag_pobj_matches_ridge_regression(csr_container):
"""tests if the sag pobj matches ridge reg"""
n_samples = 100
n_features = 10
alpha = 1.0
n_iter = 100
fit_intercept = False
rng = np.random.RandomState(10)
X = rng.normal(size=(n_samples, n_features))
true_w = rng.normal(size=n_features)
y = X.dot(true_w)
clf1 = Ridge(
fit_intercept=fit_intercept,
tol=0.00000000001,
solver="sag",
alpha=alpha,
max_iter=n_iter,
random_state=42,
)
clf2 = clone(clf1)
clf3 = Ridge(
fit_intercept=fit_intercept,
tol=0.00001,
solver="lsqr",
alpha=alpha,
max_iter=n_iter,
random_state=42,
)
clf1.fit(X, y)
clf2.fit(csr_container(X), y)
clf3.fit(X, y)
pobj1 = get_pobj(clf1.coef_, alpha, X, y, squared_loss)
pobj2 = get_pobj(clf2.coef_, alpha, X, y, squared_loss)
pobj3 = get_pobj(clf3.coef_, alpha, X, y, squared_loss)
assert_array_almost_equal(pobj1, pobj2, decimal=4)
assert_array_almost_equal(pobj1, pobj3, decimal=4)
assert_array_almost_equal(pobj3, pobj2, decimal=4)
@pytest.mark.filterwarnings("ignore:The max_iter was reached")
@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
def test_sag_regressor_computed_correctly(csr_container):
"""tests if the sag regressor is computed correctly"""
alpha = 0.1
n_features = 10
n_samples = 40
max_iter = 100
tol = 0.000001
fit_intercept = True
rng = np.random.RandomState(0)
X = rng.normal(size=(n_samples, n_features))
w = rng.normal(size=n_features)
y = np.dot(X, w) + 2.0
step_size = get_step_size(X, alpha, fit_intercept, classification=False)
clf1 = Ridge(
fit_intercept=fit_intercept,
tol=tol,
solver="sag",
alpha=alpha * n_samples,
max_iter=max_iter,
random_state=rng,
)
clf2 = clone(clf1)
clf1.fit(X, y)
clf2.fit(csr_container(X), y)
spweights1, spintercept1 = sag_sparse(
X,
y,
step_size,
alpha,
n_iter=max_iter,
dloss=squared_dloss,
fit_intercept=fit_intercept,
random_state=rng,
)
spweights2, spintercept2 = sag_sparse(
X,
y,
step_size,
alpha,
n_iter=max_iter,
dloss=squared_dloss,
sparse=True,
fit_intercept=fit_intercept,
random_state=rng,
)
assert_array_almost_equal(clf1.coef_.ravel(), spweights1.ravel(), decimal=3)
assert_almost_equal(clf1.intercept_, spintercept1, decimal=1)
# TODO: uncomment when sparse Ridge with intercept will be fixed (#4710)
# assert_array_almost_equal(clf2.coef_.ravel(),
# spweights2.ravel(),
# decimal=3)
# assert_almost_equal(clf2.intercept_, spintercept2, decimal=1)'''
def test_get_auto_step_size():
X = np.array([[1, 2, 3], [2, 3, 4], [2, 3, 2]], dtype=np.float64)
alpha = 1.2
fit_intercept = False
# sum the squares of the second sample because that's the largest
max_squared_sum = 4 + 9 + 16
max_squared_sum_ = row_norms(X, squared=True).max()
n_samples = X.shape[0]
assert_almost_equal(max_squared_sum, max_squared_sum_, decimal=4)
for saga in [True, False]:
for fit_intercept in (True, False):
if saga:
L_sqr = max_squared_sum + alpha + int(fit_intercept)
L_log = (max_squared_sum + 4.0 * alpha + int(fit_intercept)) / 4.0
mun_sqr = min(2 * n_samples * alpha, L_sqr)
mun_log = min(2 * n_samples * alpha, L_log)
step_size_sqr = 1 / (2 * L_sqr + mun_sqr)
step_size_log = 1 / (2 * L_log + mun_log)
else:
step_size_sqr = 1.0 / (max_squared_sum + alpha + int(fit_intercept))
step_size_log = 4.0 / (
max_squared_sum + 4.0 * alpha + int(fit_intercept)
)
step_size_sqr_ = get_auto_step_size(
max_squared_sum_,
alpha,
"squared",
fit_intercept,
n_samples=n_samples,
is_saga=saga,
)
step_size_log_ = get_auto_step_size(
max_squared_sum_,
alpha,
"log",
fit_intercept,
n_samples=n_samples,
is_saga=saga,
)
assert_almost_equal(step_size_sqr, step_size_sqr_, decimal=4)
assert_almost_equal(step_size_log, step_size_log_, decimal=4)
msg = "Unknown loss function for SAG solver, got wrong instead of"
with pytest.raises(ValueError, match=msg):
get_auto_step_size(max_squared_sum_, alpha, "wrong", fit_intercept)
@pytest.mark.parametrize("seed", range(3)) # locally tested with 1000 seeds
@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
def test_sag_regressor(seed, csr_container):
"""tests if the sag regressor performs well"""
xmin, xmax = -5, 5
n_samples = 300
tol = 0.001
max_iter = 100
alpha = 0.1
rng = np.random.RandomState(seed)
X = np.linspace(xmin, xmax, n_samples).reshape(n_samples, 1)
# simple linear function without noise
y = 0.5 * X.ravel()
clf1 = Ridge(
tol=tol,
solver="sag",
max_iter=max_iter,
alpha=alpha * n_samples,
random_state=rng,
)
clf2 = clone(clf1)
clf1.fit(X, y)
clf2.fit(csr_container(X), y)
score1 = clf1.score(X, y)
score2 = clf2.score(X, y)
assert score1 > 0.98
assert score2 > 0.98
# simple linear function with noise
y = 0.5 * X.ravel() + rng.randn(n_samples, 1).ravel()
clf1 = Ridge(tol=tol, solver="sag", max_iter=max_iter, alpha=alpha * n_samples)
clf2 = clone(clf1)
clf1.fit(X, y)
clf2.fit(csr_container(X), y)
score1 = clf1.score(X, y)
score2 = clf2.score(X, y)
assert score1 > 0.45
assert score2 > 0.45
@pytest.mark.filterwarnings("ignore:The max_iter was reached")
@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
def test_sag_classifier_computed_correctly(csr_container):
"""tests if the binary classifier is computed correctly"""
alpha = 0.1
n_samples = 50
n_iter = 50
tol = 0.00001
fit_intercept = True
X, y = make_blobs(n_samples=n_samples, centers=2, random_state=0, cluster_std=0.1)
step_size = get_step_size(X, alpha, fit_intercept, classification=True)
classes = np.unique(y)
y_tmp = np.ones(n_samples)
y_tmp[y != classes[1]] = -1
y = y_tmp
clf1 = LogisticRegression(
solver="sag",
C=1.0 / alpha / n_samples,
max_iter=n_iter,
tol=tol,
random_state=77,
fit_intercept=fit_intercept,
)
clf2 = clone(clf1)
clf1.fit(X, y)
clf2.fit(csr_container(X), y)
spweights, spintercept = sag_sparse(
X,
y,
step_size,
alpha,
n_iter=n_iter,
dloss=log_dloss,
fit_intercept=fit_intercept,
)
spweights2, spintercept2 = sag_sparse(
X,
y,
step_size,
alpha,
n_iter=n_iter,
dloss=log_dloss,
sparse=True,
fit_intercept=fit_intercept,
)
assert_array_almost_equal(clf1.coef_.ravel(), spweights.ravel(), decimal=2)
assert_almost_equal(clf1.intercept_, spintercept, decimal=1)
assert_array_almost_equal(clf2.coef_.ravel(), spweights2.ravel(), decimal=2)
assert_almost_equal(clf2.intercept_, spintercept2, decimal=1)
@pytest.mark.filterwarnings("ignore:The max_iter was reached")
@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
def test_sag_multiclass_computed_correctly(csr_container):
"""tests if the multiclass classifier is computed correctly"""
alpha = 0.1
n_samples = 20
tol = 1e-5
max_iter = 70
fit_intercept = True
X, y = make_blobs(n_samples=n_samples, centers=3, random_state=0, cluster_std=0.1)
step_size = get_step_size(X, alpha, fit_intercept, classification=True)
classes = np.unique(y)
clf1 = OneVsRestClassifier(
LogisticRegression(
solver="sag",
C=1.0 / alpha / n_samples,
max_iter=max_iter,
tol=tol,
random_state=77,
fit_intercept=fit_intercept,
)
)
clf2 = clone(clf1)
clf1.fit(X, y)
clf2.fit(csr_container(X), y)
coef1 = []
intercept1 = []
coef2 = []
intercept2 = []
for cl in classes:
y_encoded = np.ones(n_samples)
y_encoded[y != cl] = -1
spweights1, spintercept1 = sag_sparse(
X,
y_encoded,
step_size,
alpha,
dloss=log_dloss,
n_iter=max_iter,
fit_intercept=fit_intercept,
)
spweights2, spintercept2 = sag_sparse(
X,
y_encoded,
step_size,
alpha,
dloss=log_dloss,
n_iter=max_iter,
sparse=True,
fit_intercept=fit_intercept,
)
coef1.append(spweights1)
intercept1.append(spintercept1)
coef2.append(spweights2)
intercept2.append(spintercept2)
coef1 = np.vstack(coef1)
intercept1 = np.array(intercept1)
coef2 = np.vstack(coef2)
intercept2 = np.array(intercept2)
for i, cl in enumerate(classes):
assert_allclose(clf1.estimators_[i].coef_.ravel(), coef1[i], rtol=1e-2)
assert_allclose(clf1.estimators_[i].intercept_, intercept1[i], rtol=1e-1)
assert_allclose(clf2.estimators_[i].coef_.ravel(), coef2[i], rtol=1e-2)
# Note the very crude accuracy, i.e. high rtol.
assert_allclose(clf2.estimators_[i].intercept_, intercept2[i], rtol=5e-1)
@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
def test_classifier_results(csr_container):
"""tests if classifier results match target"""
alpha = 0.1
n_features = 20
n_samples = 10
tol = 0.01
max_iter = 200
rng = np.random.RandomState(0)
X = rng.normal(size=(n_samples, n_features))
w = rng.normal(size=n_features)
y = np.dot(X, w)
y = np.sign(y)
clf1 = LogisticRegression(
solver="sag",
C=1.0 / alpha / n_samples,
max_iter=max_iter,
tol=tol,
random_state=77,
)
clf2 = clone(clf1)
clf1.fit(X, y)
clf2.fit(csr_container(X), y)
pred1 = clf1.predict(X)
pred2 = clf2.predict(X)
assert_almost_equal(pred1, y, decimal=12)
assert_almost_equal(pred2, y, decimal=12)
@pytest.mark.filterwarnings("ignore:The max_iter was reached")
@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
def test_binary_classifier_class_weight(csr_container):
"""tests binary classifier with classweights for each class"""
alpha = 0.1
n_samples = 50
n_iter = 20
tol = 0.00001
fit_intercept = True
X, y = make_blobs(n_samples=n_samples, centers=2, random_state=10, cluster_std=0.1)
step_size = get_step_size(X, alpha, fit_intercept, classification=True)
classes = np.unique(y)
y_tmp = np.ones(n_samples)
y_tmp[y != classes[1]] = -1
y = y_tmp
class_weight = {1: 0.45, -1: 0.55}
clf1 = LogisticRegression(
solver="sag",
C=1.0 / alpha / n_samples,
max_iter=n_iter,
tol=tol,
random_state=77,
fit_intercept=fit_intercept,
class_weight=class_weight,
)
clf2 = clone(clf1)
clf1.fit(X, y)
clf2.fit(csr_container(X), y)
le = LabelEncoder()
class_weight_ = compute_class_weight(class_weight, classes=np.unique(y), y=y)
sample_weight = class_weight_[le.fit_transform(y)]
spweights, spintercept = sag_sparse(
X,
y,
step_size,
alpha,
n_iter=n_iter,
dloss=log_dloss,
sample_weight=sample_weight,
fit_intercept=fit_intercept,
)
spweights2, spintercept2 = sag_sparse(
X,
y,
step_size,
alpha,
n_iter=n_iter,
dloss=log_dloss,
sparse=True,
sample_weight=sample_weight,
fit_intercept=fit_intercept,
)
assert_array_almost_equal(clf1.coef_.ravel(), spweights.ravel(), decimal=2)
assert_almost_equal(clf1.intercept_, spintercept, decimal=1)
assert_array_almost_equal(clf2.coef_.ravel(), spweights2.ravel(), decimal=2)
assert_almost_equal(clf2.intercept_, spintercept2, decimal=1)
def test_classifier_single_class():
"""tests if ValueError is thrown with only one class"""
X = [[1, 2], [3, 4]]
y = [1, 1]
msg = "This solver needs samples of at least 2 classes in the data"
with pytest.raises(ValueError, match=msg):
LogisticRegression(solver="sag").fit(X, y)
def test_step_size_alpha_error():
X = [[0, 0], [0, 0]]
y = [1, -1]
fit_intercept = False
alpha = 1.0
msg = re.escape(
"Current sag implementation does not handle the case"
" step_size * alpha_scaled == 1"
)
clf1 = LogisticRegression(solver="sag", C=1.0 / alpha, fit_intercept=fit_intercept)
with pytest.raises(ZeroDivisionError, match=msg):
clf1.fit(X, y)
clf2 = Ridge(fit_intercept=fit_intercept, solver="sag", alpha=alpha)
with pytest.raises(ZeroDivisionError, match=msg):
clf2.fit(X, y)
@pytest.mark.parametrize("solver", ["sag", "saga"])
def test_sag_classifier_raises_error(solver):
# Following #13316, the error handling behavior changed in cython sag. This
# is simply a non-regression test to make sure numerical errors are
# properly raised.
# Train a classifier on a simple problem
rng = np.random.RandomState(42)
X, y = make_classification(random_state=rng)
clf = LogisticRegression(solver=solver, random_state=rng, warm_start=True)
clf.fit(X, y)
# Trigger a numerical error by:
# - corrupting the fitted coefficients of the classifier
# - fit it again starting from its current state thanks to warm_start
clf.coef_[:] = np.nan
with pytest.raises(ValueError, match="Floating-point under-/overflow"):
clf.fit(X, y)

View File

@@ -0,0 +1,387 @@
import numpy as np
import pytest
import scipy.sparse as sp
from numpy.testing import assert_allclose
from sklearn.datasets import make_regression
from sklearn.exceptions import ConvergenceWarning
from sklearn.linear_model import ElasticNet, ElasticNetCV, Lasso, LassoCV
from sklearn.utils._testing import (
assert_almost_equal,
assert_array_almost_equal,
create_memmap_backed_data,
ignore_warnings,
)
from sklearn.utils.fixes import COO_CONTAINERS, CSC_CONTAINERS, LIL_CONTAINERS
def test_sparse_coef():
# Check that the sparse_coef property works
clf = ElasticNet()
clf.coef_ = [1, 2, 3]
assert sp.issparse(clf.sparse_coef_)
assert clf.sparse_coef_.toarray().tolist()[0] == clf.coef_
@pytest.mark.parametrize("csc_container", CSC_CONTAINERS)
def test_lasso_zero(csc_container):
# Check that the sparse lasso can handle zero data without crashing
X = csc_container((3, 1))
y = [0, 0, 0]
T = np.array([[1], [2], [3]])
clf = Lasso().fit(X, y)
pred = clf.predict(T)
assert_array_almost_equal(clf.coef_, [0])
assert_array_almost_equal(pred, [0, 0, 0])
assert_almost_equal(clf.dual_gap_, 0)
@pytest.mark.parametrize("with_sample_weight", [True, False])
@pytest.mark.parametrize("csc_container", CSC_CONTAINERS)
def test_enet_toy_list_input(with_sample_weight, csc_container):
# Test ElasticNet for various values of alpha and l1_ratio with list X
X = np.array([[-1], [0], [1]])
X = csc_container(X)
Y = [-1, 0, 1] # just a straight line
T = np.array([[2], [3], [4]]) # test sample
if with_sample_weight:
sw = np.array([2.0, 2, 2])
else:
sw = None
# this should be the same as unregularized least squares
clf = ElasticNet(alpha=0, l1_ratio=1.0)
# catch warning about alpha=0.
# this is discouraged but should work.
ignore_warnings(clf.fit)(X, Y, sample_weight=sw)
pred = clf.predict(T)
assert_array_almost_equal(clf.coef_, [1])
assert_array_almost_equal(pred, [2, 3, 4])
assert_almost_equal(clf.dual_gap_, 0)
clf = ElasticNet(alpha=0.5, l1_ratio=0.3)
clf.fit(X, Y, sample_weight=sw)
pred = clf.predict(T)
assert_array_almost_equal(clf.coef_, [0.50819], decimal=3)
assert_array_almost_equal(pred, [1.0163, 1.5245, 2.0327], decimal=3)
assert_almost_equal(clf.dual_gap_, 0)
clf = ElasticNet(alpha=0.5, l1_ratio=0.5)
clf.fit(X, Y, sample_weight=sw)
pred = clf.predict(T)
assert_array_almost_equal(clf.coef_, [0.45454], 3)
assert_array_almost_equal(pred, [0.9090, 1.3636, 1.8181], 3)
assert_almost_equal(clf.dual_gap_, 0)
@pytest.mark.parametrize("lil_container", LIL_CONTAINERS)
def test_enet_toy_explicit_sparse_input(lil_container):
# Test ElasticNet for various values of alpha and l1_ratio with sparse X
# training samples
X = lil_container((3, 1))
X[0, 0] = -1
# X[1, 0] = 0
X[2, 0] = 1
Y = [-1, 0, 1] # just a straight line (the identity function)
# test samples
T = lil_container((3, 1))
T[0, 0] = 2
T[1, 0] = 3
T[2, 0] = 4
# this should be the same as lasso
clf = ElasticNet(alpha=0, l1_ratio=1.0)
ignore_warnings(clf.fit)(X, Y)
pred = clf.predict(T)
assert_array_almost_equal(clf.coef_, [1])
assert_array_almost_equal(pred, [2, 3, 4])
assert_almost_equal(clf.dual_gap_, 0)
clf = ElasticNet(alpha=0.5, l1_ratio=0.3)
clf.fit(X, Y)
pred = clf.predict(T)
assert_array_almost_equal(clf.coef_, [0.50819], decimal=3)
assert_array_almost_equal(pred, [1.0163, 1.5245, 2.0327], decimal=3)
assert_almost_equal(clf.dual_gap_, 0)
clf = ElasticNet(alpha=0.5, l1_ratio=0.5)
clf.fit(X, Y)
pred = clf.predict(T)
assert_array_almost_equal(clf.coef_, [0.45454], 3)
assert_array_almost_equal(pred, [0.9090, 1.3636, 1.8181], 3)
assert_almost_equal(clf.dual_gap_, 0)
def make_sparse_data(
sparse_container,
n_samples=100,
n_features=100,
n_informative=10,
seed=42,
positive=False,
n_targets=1,
):
random_state = np.random.RandomState(seed)
# build an ill-posed linear regression problem with many noisy features and
# comparatively few samples
# generate a ground truth model
w = random_state.randn(n_features, n_targets)
w[n_informative:] = 0.0 # only the top features are impacting the model
if positive:
w = np.abs(w)
X = random_state.randn(n_samples, n_features)
rnd = random_state.uniform(size=(n_samples, n_features))
X[rnd > 0.5] = 0.0 # 50% of zeros in input signal
# generate training ground truth labels
y = np.dot(X, w)
X = sparse_container(X)
if n_targets == 1:
y = np.ravel(y)
return X, y
@pytest.mark.parametrize("csc_container", CSC_CONTAINERS)
@pytest.mark.parametrize(
"alpha, fit_intercept, positive",
[(0.1, False, False), (0.1, True, False), (1e-3, False, True), (1e-3, True, True)],
)
def test_sparse_enet_not_as_toy_dataset(csc_container, alpha, fit_intercept, positive):
n_samples, n_features, max_iter = 100, 100, 1000
n_informative = 10
X, y = make_sparse_data(
csc_container, n_samples, n_features, n_informative, positive=positive
)
X_train, X_test = X[n_samples // 2 :], X[: n_samples // 2]
y_train, y_test = y[n_samples // 2 :], y[: n_samples // 2]
s_clf = ElasticNet(
alpha=alpha,
l1_ratio=0.8,
fit_intercept=fit_intercept,
max_iter=max_iter,
tol=1e-7,
positive=positive,
warm_start=True,
)
s_clf.fit(X_train, y_train)
assert_almost_equal(s_clf.dual_gap_, 0, 4)
assert s_clf.score(X_test, y_test) > 0.85
# check the convergence is the same as the dense version
d_clf = ElasticNet(
alpha=alpha,
l1_ratio=0.8,
fit_intercept=fit_intercept,
max_iter=max_iter,
tol=1e-7,
positive=positive,
warm_start=True,
)
d_clf.fit(X_train.toarray(), y_train)
assert_almost_equal(d_clf.dual_gap_, 0, 4)
assert d_clf.score(X_test, y_test) > 0.85
assert_almost_equal(s_clf.coef_, d_clf.coef_, 5)
assert_almost_equal(s_clf.intercept_, d_clf.intercept_, 5)
# check that the coefs are sparse
assert np.sum(s_clf.coef_ != 0.0) < 2 * n_informative
@pytest.mark.parametrize("csc_container", CSC_CONTAINERS)
def test_sparse_lasso_not_as_toy_dataset(csc_container):
n_samples = 100
max_iter = 1000
n_informative = 10
X, y = make_sparse_data(
csc_container, n_samples=n_samples, n_informative=n_informative
)
X_train, X_test = X[n_samples // 2 :], X[: n_samples // 2]
y_train, y_test = y[n_samples // 2 :], y[: n_samples // 2]
s_clf = Lasso(alpha=0.1, fit_intercept=False, max_iter=max_iter, tol=1e-7)
s_clf.fit(X_train, y_train)
assert_almost_equal(s_clf.dual_gap_, 0, 4)
assert s_clf.score(X_test, y_test) > 0.85
# check the convergence is the same as the dense version
d_clf = Lasso(alpha=0.1, fit_intercept=False, max_iter=max_iter, tol=1e-7)
d_clf.fit(X_train.toarray(), y_train)
assert_almost_equal(d_clf.dual_gap_, 0, 4)
assert d_clf.score(X_test, y_test) > 0.85
# check that the coefs are sparse
assert np.sum(s_clf.coef_ != 0.0) == n_informative
@pytest.mark.parametrize("csc_container", CSC_CONTAINERS)
def test_enet_multitarget(csc_container):
n_targets = 3
X, y = make_sparse_data(csc_container, n_targets=n_targets)
estimator = ElasticNet(alpha=0.01, precompute=False)
# XXX: There is a bug when precompute is not False!
estimator.fit(X, y)
coef, intercept, dual_gap = (
estimator.coef_,
estimator.intercept_,
estimator.dual_gap_,
)
for k in range(n_targets):
estimator.fit(X, y[:, k])
assert_array_almost_equal(coef[k, :], estimator.coef_)
assert_array_almost_equal(intercept[k], estimator.intercept_)
assert_array_almost_equal(dual_gap[k], estimator.dual_gap_)
@pytest.mark.parametrize("csc_container", CSC_CONTAINERS)
def test_path_parameters(csc_container):
X, y = make_sparse_data(csc_container)
max_iter = 50
n_alphas = 10
clf = ElasticNetCV(
alphas=n_alphas,
eps=1e-3,
max_iter=max_iter,
l1_ratio=0.5,
fit_intercept=False,
)
clf.fit(X, y)
assert_almost_equal(0.5, clf.l1_ratio)
assert clf.alphas == n_alphas
assert len(clf.alphas_) == n_alphas
sparse_mse_path = clf.mse_path_
# compare with dense data
clf.fit(X.toarray(), y)
assert_almost_equal(clf.mse_path_, sparse_mse_path)
@pytest.mark.parametrize("Model", [Lasso, ElasticNet, LassoCV, ElasticNetCV])
@pytest.mark.parametrize("fit_intercept", [False, True])
@pytest.mark.parametrize("n_samples, n_features", [(24, 6), (6, 24)])
@pytest.mark.parametrize("with_sample_weight", [True, False])
@pytest.mark.parametrize("csc_container", CSC_CONTAINERS)
def test_sparse_dense_equality(
Model, fit_intercept, n_samples, n_features, with_sample_weight, csc_container
):
X, y = make_regression(
n_samples=n_samples,
n_features=n_features,
effective_rank=n_features // 2,
n_informative=n_features // 2,
bias=4 * fit_intercept,
noise=1,
random_state=42,
)
if with_sample_weight:
sw = np.abs(np.random.RandomState(42).normal(scale=10, size=y.shape))
else:
sw = None
Xs = csc_container(X)
params = {"fit_intercept": fit_intercept, "tol": 1e-6}
reg_dense = Model(**params).fit(X, y, sample_weight=sw)
reg_sparse = Model(**params).fit(Xs, y, sample_weight=sw)
if fit_intercept:
assert reg_sparse.intercept_ == pytest.approx(reg_dense.intercept_)
# balance property
assert np.average(reg_sparse.predict(X), weights=sw) == pytest.approx(
np.average(y, weights=sw)
)
assert_allclose(reg_sparse.coef_, reg_dense.coef_)
@pytest.mark.parametrize("csc_container", CSC_CONTAINERS)
def test_same_output_sparse_dense_lasso_and_enet_cv(csc_container):
X, y = make_sparse_data(csc_container, n_samples=40, n_features=10)
clfs = ElasticNetCV(max_iter=100, tol=1e-7)
clfs.fit(X, y)
clfd = ElasticNetCV(max_iter=100, tol=1e-7)
clfd.fit(X.toarray(), y)
assert_allclose(clfs.alpha_, clfd.alpha_)
assert_allclose(clfs.intercept_, clfd.intercept_)
assert_allclose(clfs.mse_path_, clfd.mse_path_)
assert_allclose(clfs.alphas_, clfd.alphas_)
clfs = LassoCV(max_iter=100, cv=4, tol=1e-8)
clfs.fit(X, y)
clfd = LassoCV(max_iter=100, cv=4, tol=1e-8)
clfd.fit(X.toarray(), y)
assert_allclose(clfs.alpha_, clfd.alpha_)
assert_allclose(clfs.intercept_, clfd.intercept_)
assert_allclose(clfs.mse_path_, clfd.mse_path_)
assert_allclose(clfs.alphas_, clfd.alphas_)
@pytest.mark.parametrize("coo_container", COO_CONTAINERS)
def test_same_multiple_output_sparse_dense(coo_container):
l = ElasticNet()
X = [
[0, 1, 2, 3, 4],
[0, 2, 5, 8, 11],
[9, 10, 11, 12, 13],
[10, 11, 12, 13, 14],
]
y = [
[1, 2, 3, 4, 5],
[1, 3, 6, 9, 12],
[10, 11, 12, 13, 14],
[11, 12, 13, 14, 15],
]
l.fit(X, y)
sample = np.array([1, 2, 3, 4, 5]).reshape(1, -1)
predict_dense = l.predict(sample)
l_sp = ElasticNet()
X_sp = coo_container(X)
l_sp.fit(X_sp, y)
sample_sparse = coo_container(sample)
predict_sparse = l_sp.predict(sample_sparse)
assert_array_almost_equal(predict_sparse, predict_dense)
@pytest.mark.parametrize("csc_container", CSC_CONTAINERS)
def test_sparse_enet_coordinate_descent(csc_container):
"""Test that a warning is issued if model does not converge"""
clf = Lasso(
alpha=1e-10, fit_intercept=False, warm_start=True, max_iter=2, tol=1e-10
)
# Set initial coefficients to very bad values.
clf.coef_ = np.array([1, 1, 1, 1000])
X = np.array([[-1, -1, 1, 1], [1, 1, -1, -1]])
X = csc_container(X)
y = np.array([-1, 1])
warning_message = (
"Objective did not converge. You might want "
"to increase the number of iterations."
)
with pytest.warns(ConvergenceWarning, match=warning_message):
clf.fit(X, y)
@pytest.mark.parametrize("copy_X", (True, False))
def test_sparse_read_only_buffer(copy_X):
"""Test that sparse coordinate descent works for read-only buffers"""
rng = np.random.RandomState(0)
clf = ElasticNet(alpha=0.1, copy_X=copy_X, random_state=rng)
X = sp.random(100, 20, format="csc", random_state=rng)
# Make X.data read-only
X.data = create_memmap_backed_data(X.data)
y = rng.rand(100)
clf.fit(X, y)

View File

@@ -0,0 +1,296 @@
"""
Testing for Theil-Sen module (sklearn.linear_model.theil_sen)
"""
# Authors: The scikit-learn developers
# SPDX-License-Identifier: BSD-3-Clause
import os
import re
import sys
from contextlib import contextmanager
import numpy as np
import pytest
from numpy.testing import (
assert_array_almost_equal,
assert_array_equal,
assert_array_less,
)
from scipy.linalg import norm
from scipy.optimize import fmin_bfgs
from sklearn.exceptions import ConvergenceWarning
from sklearn.linear_model import LinearRegression, TheilSenRegressor
from sklearn.linear_model._theil_sen import (
_breakdown_point,
_modified_weiszfeld_step,
_spatial_median,
)
from sklearn.utils._testing import assert_almost_equal
@contextmanager
def no_stdout_stderr():
old_stdout = sys.stdout
old_stderr = sys.stderr
with open(os.devnull, "w") as devnull:
sys.stdout = devnull
sys.stderr = devnull
yield
devnull.flush()
sys.stdout = old_stdout
sys.stderr = old_stderr
def gen_toy_problem_1d(intercept=True):
random_state = np.random.RandomState(0)
# Linear model y = 3*x + N(2, 0.1**2)
w = 3.0
if intercept:
c = 2.0
n_samples = 50
else:
c = 0.1
n_samples = 100
x = random_state.normal(size=n_samples)
noise = 0.1 * random_state.normal(size=n_samples)
y = w * x + c + noise
# Add some outliers
if intercept:
x[42], y[42] = (-2, 4)
x[43], y[43] = (-2.5, 8)
x[33], y[33] = (2.5, 1)
x[49], y[49] = (2.1, 2)
else:
x[42], y[42] = (-2, 4)
x[43], y[43] = (-2.5, 8)
x[53], y[53] = (2.5, 1)
x[60], y[60] = (2.1, 2)
x[72], y[72] = (1.8, -7)
return x[:, np.newaxis], y, w, c
def gen_toy_problem_2d():
random_state = np.random.RandomState(0)
n_samples = 100
# Linear model y = 5*x_1 + 10*x_2 + N(1, 0.1**2)
X = random_state.normal(size=(n_samples, 2))
w = np.array([5.0, 10.0])
c = 1.0
noise = 0.1 * random_state.normal(size=n_samples)
y = np.dot(X, w) + c + noise
# Add some outliers
n_outliers = n_samples // 10
ix = random_state.randint(0, n_samples, size=n_outliers)
y[ix] = 50 * random_state.normal(size=n_outliers)
return X, y, w, c
def gen_toy_problem_4d():
random_state = np.random.RandomState(0)
n_samples = 10000
# Linear model y = 5*x_1 + 10*x_2 + 42*x_3 + 7*x_4 + N(1, 0.1**2)
X = random_state.normal(size=(n_samples, 4))
w = np.array([5.0, 10.0, 42.0, 7.0])
c = 1.0
noise = 0.1 * random_state.normal(size=n_samples)
y = np.dot(X, w) + c + noise
# Add some outliers
n_outliers = n_samples // 10
ix = random_state.randint(0, n_samples, size=n_outliers)
y[ix] = 50 * random_state.normal(size=n_outliers)
return X, y, w, c
def test_modweiszfeld_step_1d():
X = np.array([1.0, 2.0, 3.0]).reshape(3, 1)
# Check startvalue is element of X and solution
median = 2.0
new_y = _modified_weiszfeld_step(X, median)
assert_array_almost_equal(new_y, median)
# Check startvalue is not the solution
y = 2.5
new_y = _modified_weiszfeld_step(X, y)
assert_array_less(median, new_y)
assert_array_less(new_y, y)
# Check startvalue is not the solution but element of X
y = 3.0
new_y = _modified_weiszfeld_step(X, y)
assert_array_less(median, new_y)
assert_array_less(new_y, y)
# Check that a single vector is identity
X = np.array([1.0, 2.0, 3.0]).reshape(1, 3)
y = X[0]
new_y = _modified_weiszfeld_step(X, y)
assert_array_equal(y, new_y)
def test_modweiszfeld_step_2d():
X = np.array([0.0, 0.0, 1.0, 1.0, 0.0, 1.0]).reshape(3, 2)
y = np.array([0.5, 0.5])
# Check first two iterations
new_y = _modified_weiszfeld_step(X, y)
assert_array_almost_equal(new_y, np.array([1 / 3, 2 / 3]))
new_y = _modified_weiszfeld_step(X, new_y)
assert_array_almost_equal(new_y, np.array([0.2792408, 0.7207592]))
# Check fix point
y = np.array([0.21132505, 0.78867497])
new_y = _modified_weiszfeld_step(X, y)
assert_array_almost_equal(new_y, y)
def test_spatial_median_1d():
X = np.array([1.0, 2.0, 3.0]).reshape(3, 1)
true_median = 2.0
_, median = _spatial_median(X)
assert_array_almost_equal(median, true_median)
# Test larger problem and for exact solution in 1d case
random_state = np.random.RandomState(0)
X = random_state.randint(100, size=(1000, 1))
true_median = np.median(X.ravel())
_, median = _spatial_median(X)
assert_array_equal(median, true_median)
def test_spatial_median_2d():
X = np.array([0.0, 0.0, 1.0, 1.0, 0.0, 1.0]).reshape(3, 2)
_, median = _spatial_median(X, max_iter=100, tol=1.0e-6)
def cost_func(y):
dists = np.array([norm(x - y) for x in X])
return np.sum(dists)
# Check if median is solution of the Fermat-Weber location problem
fermat_weber = fmin_bfgs(cost_func, median, disp=False)
assert_array_almost_equal(median, fermat_weber)
# Check when maximum iteration is exceeded a warning is emitted
warning_message = "Maximum number of iterations 30 reached in spatial median."
with pytest.warns(ConvergenceWarning, match=warning_message):
_spatial_median(X, max_iter=30, tol=0.0)
def test_theil_sen_1d():
X, y, w, c = gen_toy_problem_1d()
# Check that Least Squares fails
lstq = LinearRegression().fit(X, y)
assert np.abs(lstq.coef_ - w) > 0.9
# Check that Theil-Sen works
theil_sen = TheilSenRegressor(random_state=0).fit(X, y)
assert_array_almost_equal(theil_sen.coef_, w, 1)
assert_array_almost_equal(theil_sen.intercept_, c, 1)
def test_theil_sen_1d_no_intercept():
X, y, w, c = gen_toy_problem_1d(intercept=False)
# Check that Least Squares fails
lstq = LinearRegression(fit_intercept=False).fit(X, y)
assert np.abs(lstq.coef_ - w - c) > 0.5
# Check that Theil-Sen works
theil_sen = TheilSenRegressor(fit_intercept=False, random_state=0).fit(X, y)
assert_array_almost_equal(theil_sen.coef_, w + c, 1)
assert_almost_equal(theil_sen.intercept_, 0.0)
# non-regression test for #18104
theil_sen.score(X, y)
def test_theil_sen_2d():
X, y, w, c = gen_toy_problem_2d()
# Check that Least Squares fails
lstq = LinearRegression().fit(X, y)
assert norm(lstq.coef_ - w) > 1.0
# Check that Theil-Sen works
theil_sen = TheilSenRegressor(max_subpopulation=1e3, random_state=0).fit(X, y)
assert_array_almost_equal(theil_sen.coef_, w, 1)
assert_array_almost_equal(theil_sen.intercept_, c, 1)
def test_calc_breakdown_point():
bp = _breakdown_point(1e10, 2)
assert np.abs(bp - 1 + 1 / (np.sqrt(2))) < 1.0e-6
@pytest.mark.parametrize(
"param, ExceptionCls, match",
[
(
{"n_subsamples": 1},
ValueError,
re.escape("Invalid parameter since n_features+1 > n_subsamples (2 > 1)"),
),
(
{"n_subsamples": 101},
ValueError,
re.escape("Invalid parameter since n_subsamples > n_samples (101 > 50)"),
),
],
)
def test_checksubparams_invalid_input(param, ExceptionCls, match):
X, y, w, c = gen_toy_problem_1d()
theil_sen = TheilSenRegressor(**param, random_state=0)
with pytest.raises(ExceptionCls, match=match):
theil_sen.fit(X, y)
def test_checksubparams_n_subsamples_if_less_samples_than_features():
random_state = np.random.RandomState(0)
n_samples, n_features = 10, 20
X = random_state.normal(size=(n_samples, n_features))
y = random_state.normal(size=n_samples)
theil_sen = TheilSenRegressor(n_subsamples=9, random_state=0)
with pytest.raises(ValueError):
theil_sen.fit(X, y)
def test_subpopulation():
X, y, w, c = gen_toy_problem_4d()
theil_sen = TheilSenRegressor(max_subpopulation=250, random_state=0).fit(X, y)
assert_array_almost_equal(theil_sen.coef_, w, 1)
assert_array_almost_equal(theil_sen.intercept_, c, 1)
def test_subsamples():
X, y, w, c = gen_toy_problem_4d()
theil_sen = TheilSenRegressor(n_subsamples=X.shape[0], random_state=0).fit(X, y)
lstq = LinearRegression().fit(X, y)
# Check for exact the same results as Least Squares
assert_array_almost_equal(theil_sen.coef_, lstq.coef_, 9)
@pytest.mark.thread_unsafe # manually captured stdout
def test_verbosity():
X, y, w, c = gen_toy_problem_1d()
# Check that Theil-Sen can be verbose
with no_stdout_stderr():
TheilSenRegressor(verbose=True, random_state=0).fit(X, y)
TheilSenRegressor(verbose=True, max_subpopulation=10, random_state=0).fit(X, y)
def test_theil_sen_parallel():
X, y, w, c = gen_toy_problem_2d()
# Check that Least Squares fails
lstq = LinearRegression().fit(X, y)
assert norm(lstq.coef_ - w) > 1.0
# Check that Theil-Sen works
theil_sen = TheilSenRegressor(n_jobs=2, random_state=0, max_subpopulation=2e3).fit(
X, y
)
assert_array_almost_equal(theil_sen.coef_, w, 1)
assert_array_almost_equal(theil_sen.intercept_, c, 1)
def test_less_samples_than_features():
random_state = np.random.RandomState(0)
n_samples, n_features = 10, 20
X = random_state.normal(size=(n_samples, n_features))
y = random_state.normal(size=n_samples)
# Check that Theil-Sen falls back to Least Squares if fit_intercept=False
theil_sen = TheilSenRegressor(fit_intercept=False, random_state=0).fit(X, y)
lstq = LinearRegression(fit_intercept=False).fit(X, y)
assert_array_almost_equal(theil_sen.coef_, lstq.coef_, 12)
# Check fit_intercept=True case. This will not be equal to the Least
# Squares solution since the intercept is calculated differently.
theil_sen = TheilSenRegressor(fit_intercept=True, random_state=0).fit(X, y)
y_pred = theil_sen.predict(X)
assert_array_almost_equal(y_pred, y, 12)