This commit is contained in:
2026-04-10 15:06:59 +02:00
parent 3031b7153b
commit e5a4711004
7806 changed files with 1918528 additions and 335 deletions

View File

@@ -0,0 +1,31 @@
Copyright (c) 2007-2014 The LIBLINEAR Project.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. Neither name of copyright holders nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View File

@@ -0,0 +1,16 @@
#ifndef _CYTHON_BLAS_HELPERS_H
#define _CYTHON_BLAS_HELPERS_H
typedef double (*dot_func)(int, const double*, int, const double*, int);
typedef void (*axpy_func)(int, double, const double*, int, double*, int);
typedef void (*scal_func)(int, double, const double*, int);
typedef double (*nrm2_func)(int, const double*, int);
typedef struct BlasFunctions{
dot_func dot;
axpy_func axpy;
scal_func scal;
nrm2_func nrm2;
} BlasFunctions;
#endif

View File

@@ -0,0 +1,236 @@
#include <stdlib.h>
#define PY_SSIZE_T_CLEAN
#include <Python.h>
#include "linear.h"
/*
* Convert matrix to sparse representation suitable for liblinear. x is
* expected to be an array of length n_samples*n_features.
*
* Whether the matrix is densely or sparsely populated, the fastest way to
* convert it to liblinear's sparse format is to calculate the amount of memory
* needed and allocate a single big block.
*
* Special care must be taken with indices, since liblinear indices start at 1
* and not at 0.
*
* If bias is > 0, we append an item at the end.
*/
static struct feature_node **dense_to_sparse(char *x, int double_precision,
int n_samples, int n_features, int n_nonzero, double bias)
{
float *x32 = (float *)x;
double *x64 = (double *)x;
struct feature_node **sparse;
int i, j; /* number of nonzero elements in row i */
struct feature_node *T; /* pointer to the top of the stack */
int have_bias = (bias > 0);
sparse = malloc (n_samples * sizeof(struct feature_node *));
if (sparse == NULL)
return NULL;
n_nonzero += (have_bias+1) * n_samples;
T = malloc (n_nonzero * sizeof(struct feature_node));
if (T == NULL) {
free(sparse);
return NULL;
}
for (i=0; i<n_samples; ++i) {
sparse[i] = T;
for (j=1; j<=n_features; ++j) {
if (double_precision) {
if (*x64 != 0) {
T->value = *x64;
T->index = j;
++ T;
}
++ x64; /* go to next element */
} else {
if (*x32 != 0) {
T->value = *x32;
T->index = j;
++ T;
}
++ x32; /* go to next element */
}
}
/* set bias element */
if (have_bias) {
T->value = bias;
T->index = j;
++ T;
}
/* set sentinel */
T->index = -1;
++ T;
}
return sparse;
}
/*
* Convert scipy.sparse.csr to liblinear's sparse data structure
*/
static struct feature_node **csr_to_sparse(char *x, int double_precision,
int *indices, int *indptr, int n_samples, int n_features, int n_nonzero,
double bias)
{
float *x32 = (float *)x;
double *x64 = (double *)x;
struct feature_node **sparse;
int i, j=0, k=0, n;
struct feature_node *T;
int have_bias = (bias > 0);
sparse = malloc (n_samples * sizeof(struct feature_node *));
if (sparse == NULL)
return NULL;
n_nonzero += (have_bias+1) * n_samples;
T = malloc (n_nonzero * sizeof(struct feature_node));
if (T == NULL) {
free(sparse);
return NULL;
}
for (i=0; i<n_samples; ++i) {
sparse[i] = T;
n = indptr[i+1] - indptr[i]; /* count elements in row i */
for (j=0; j<n; ++j) {
T->value = double_precision ? x64[k] : x32[k];
T->index = indices[k] + 1; /* liblinear uses 1-based indexing */
++T;
++k;
}
if (have_bias) {
T->value = bias;
T->index = n_features + 1;
++T;
++j;
}
/* set sentinel */
T->index = -1;
++T;
}
return sparse;
}
struct problem * set_problem(char *X, int double_precision_X, int n_samples,
int n_features, int n_nonzero, double bias, char* sample_weight,
char *Y)
{
struct problem *problem;
/* not performant but simple */
problem = malloc(sizeof(struct problem));
if (problem == NULL) return NULL;
problem->l = n_samples;
problem->n = n_features + (bias > 0);
problem->y = (double *) Y;
problem->W = (double *) sample_weight;
problem->x = dense_to_sparse(X, double_precision_X, n_samples, n_features,
n_nonzero, bias);
problem->bias = bias;
if (problem->x == NULL) {
free(problem);
return NULL;
}
return problem;
}
struct problem * csr_set_problem (char *X, int double_precision_X,
char *indices, char *indptr, int n_samples, int n_features,
int n_nonzero, double bias, char *sample_weight, char *Y)
{
struct problem *problem;
problem = malloc (sizeof (struct problem));
if (problem == NULL) return NULL;
problem->l = n_samples;
problem->n = n_features + (bias > 0);
problem->y = (double *) Y;
problem->W = (double *) sample_weight;
problem->x = csr_to_sparse(X, double_precision_X, (int *) indices,
(int *) indptr, n_samples, n_features, n_nonzero, bias);
problem->bias = bias;
if (problem->x == NULL) {
free(problem);
return NULL;
}
return problem;
}
/* Create a parameter struct with and return it */
struct parameter *set_parameter(int solver_type, double eps, double C,
Py_ssize_t nr_weight, char *weight_label,
char *weight, int max_iter, unsigned seed,
double epsilon)
{
struct parameter *param = malloc(sizeof(struct parameter));
if (param == NULL)
return NULL;
set_seed(seed);
param->solver_type = solver_type;
param->eps = eps;
param->C = C;
param->p = epsilon; // epsilon for epsilon-SVR
param->nr_weight = (int) nr_weight;
param->weight_label = (int *) weight_label;
param->weight = (double *) weight;
param->max_iter = max_iter;
return param;
}
void copy_w(void *data, struct model *model, int len)
{
memcpy(data, model->w, len * sizeof(double));
}
double get_bias(struct model *model)
{
return model->bias;
}
void free_problem(struct problem *problem)
{
free(problem->x[0]);
free(problem->x);
free(problem);
}
void free_parameter(struct parameter *param)
{
free(param);
}
/* rely on built-in facility to control verbose output */
static void print_null(const char *s) {}
static void print_string_stdout(const char *s)
{
fputs(s ,stdout);
fflush(stdout);
}
/* provide convenience wrapper */
void set_verbosity(int verbosity_flag){
if (verbosity_flag)
set_print_string_function(&print_string_stdout);
else
set_print_string_function(&print_null);
}

View File

@@ -0,0 +1,86 @@
#ifndef _LIBLINEAR_H
#define _LIBLINEAR_H
#ifdef __cplusplus
extern "C" {
#endif
#include "_cython_blas_helpers.h"
struct feature_node
{
int index;
double value;
};
struct problem
{
int l, n;
double *y;
struct feature_node **x;
double bias; /* < 0 if no bias term */
double *W;
};
enum { L2R_LR, L2R_L2LOSS_SVC_DUAL, L2R_L2LOSS_SVC, L2R_L1LOSS_SVC_DUAL, MCSVM_CS, L1R_L2LOSS_SVC, L1R_LR, L2R_LR_DUAL, L2R_L2LOSS_SVR = 11, L2R_L2LOSS_SVR_DUAL, L2R_L1LOSS_SVR_DUAL }; /* solver_type */
struct parameter
{
int solver_type;
/* these are for training only */
double eps; /* stopping criteria */
double C;
int nr_weight;
int *weight_label;
double* weight;
int max_iter;
double p;
};
struct model
{
struct parameter param;
int nr_class; /* number of classes */
int nr_feature;
double *w;
int *label; /* label of each class */
double bias;
int *n_iter; /* no. of iterations of each class */
};
void set_seed(unsigned seed);
struct model* train(const struct problem *prob, const struct parameter *param, BlasFunctions *blas_functions);
void cross_validation(const struct problem *prob, const struct parameter *param, int nr_fold, double *target);
double predict_values(const struct model *model_, const struct feature_node *x, double* dec_values);
double predict(const struct model *model_, const struct feature_node *x);
double predict_probability(const struct model *model_, const struct feature_node *x, double* prob_estimates);
int save_model(const char *model_file_name, const struct model *model_);
struct model *load_model(const char *model_file_name);
int get_nr_feature(const struct model *model_);
int get_nr_class(const struct model *model_);
void get_labels(const struct model *model_, int* label);
void get_n_iter(const struct model *model_, int* n_iter);
#if 0
double get_decfun_coef(const struct model *model_, int feat_idx, int label_idx);
double get_decfun_bias(const struct model *model_, int label_idx);
#endif
void free_model_content(struct model *model_ptr);
void free_and_destroy_model(struct model **model_ptr_ptr);
void destroy_param(struct parameter *param);
const char *check_parameter(const struct problem *prob, const struct parameter *param);
int check_probability_model(const struct model *model);
int check_regression_model(const struct model *model);
void set_print_string_function(void (*print_func) (const char*));
#ifdef __cplusplus
}
#endif
#endif /* _LIBLINEAR_H */

View File

@@ -0,0 +1,223 @@
#include <math.h>
#include <stdio.h>
#include <string.h>
#include <stdarg.h>
#include "tron.h"
#ifndef min
template <class T> static inline T min(T x,T y) { return (x<y)?x:y; }
#endif
#ifndef max
template <class T> static inline T max(T x,T y) { return (x>y)?x:y; }
#endif
static void default_print(const char *buf)
{
fputs(buf,stdout);
fflush(stdout);
}
void TRON::info(const char *fmt,...)
{
char buf[BUFSIZ];
va_list ap;
va_start(ap,fmt);
vsnprintf(buf,sizeof buf,fmt,ap);
va_end(ap);
(*tron_print_string)(buf);
}
TRON::TRON(const function *fun_obj, double eps, int max_iter, BlasFunctions *blas)
{
this->fun_obj=const_cast<function *>(fun_obj);
this->eps=eps;
this->max_iter=max_iter;
this->blas=blas;
tron_print_string = default_print;
}
TRON::~TRON()
{
}
int TRON::tron(double *w)
{
// Parameters for updating the iterates.
double eta0 = 1e-4, eta1 = 0.25, eta2 = 0.75;
// Parameters for updating the trust region size delta.
double sigma1 = 0.25, sigma2 = 0.5, sigma3 = 4;
int n = fun_obj->get_nr_variable();
int i, cg_iter;
double delta, snorm;
double alpha, f, fnew, prered, actred, gs;
int search = 1, iter = 1, inc = 1;
double *s = new double[n];
double *r = new double[n];
double *w_new = new double[n];
double *g = new double[n];
for (i=0; i<n; i++)
w[i] = 0;
f = fun_obj->fun(w);
fun_obj->grad(w, g);
delta = blas->nrm2(n, g, inc);
double gnorm1 = delta;
double gnorm = gnorm1;
if (gnorm <= eps*gnorm1)
search = 0;
iter = 1;
while (iter <= max_iter && search)
{
cg_iter = trcg(delta, g, s, r);
memcpy(w_new, w, sizeof(double)*n);
blas->axpy(n, 1.0, s, inc, w_new, inc);
gs = blas->dot(n, g, inc, s, inc);
prered = -0.5*(gs - blas->dot(n, s, inc, r, inc));
fnew = fun_obj->fun(w_new);
// Compute the actual reduction.
actred = f - fnew;
// On the first iteration, adjust the initial step bound.
snorm = blas->nrm2(n, s, inc);
if (iter == 1)
delta = min(delta, snorm);
// Compute prediction alpha*snorm of the step.
if (fnew - f - gs <= 0)
alpha = sigma3;
else
alpha = max(sigma1, -0.5*(gs/(fnew - f - gs)));
// Update the trust region bound according to the ratio of actual to predicted reduction.
if (actred < eta0*prered)
delta = min(max(alpha, sigma1)*snorm, sigma2*delta);
else if (actred < eta1*prered)
delta = max(sigma1*delta, min(alpha*snorm, sigma2*delta));
else if (actred < eta2*prered)
delta = max(sigma1*delta, min(alpha*snorm, sigma3*delta));
else
delta = max(delta, min(alpha*snorm, sigma3*delta));
info("iter %2d act %5.3e pre %5.3e delta %5.3e f %5.3e |g| %5.3e CG %3d\n", iter, actred, prered, delta, f, gnorm, cg_iter);
if (actred > eta0*prered)
{
iter++;
memcpy(w, w_new, sizeof(double)*n);
f = fnew;
fun_obj->grad(w, g);
gnorm = blas->nrm2(n, g, inc);
if (gnorm <= eps*gnorm1)
break;
}
if (f < -1.0e+32)
{
info("WARNING: f < -1.0e+32\n");
break;
}
if (fabs(actred) <= 0 && prered <= 0)
{
info("WARNING: actred and prered <= 0\n");
break;
}
if (fabs(actred) <= 1.0e-12*fabs(f) &&
fabs(prered) <= 1.0e-12*fabs(f))
{
info("WARNING: actred and prered too small\n");
break;
}
}
delete[] g;
delete[] r;
delete[] w_new;
delete[] s;
return --iter;
}
int TRON::trcg(double delta, double *g, double *s, double *r)
{
int i, inc = 1;
int n = fun_obj->get_nr_variable();
double *d = new double[n];
double *Hd = new double[n];
double rTr, rnewTrnew, alpha, beta, cgtol;
for (i=0; i<n; i++)
{
s[i] = 0;
r[i] = -g[i];
d[i] = r[i];
}
cgtol = 0.1 * blas->nrm2(n, g, inc);
int cg_iter = 0;
rTr = blas->dot(n, r, inc, r, inc);
while (1)
{
if (blas->nrm2(n, r, inc) <= cgtol)
break;
cg_iter++;
fun_obj->Hv(d, Hd);
alpha = rTr / blas->dot(n, d, inc, Hd, inc);
blas->axpy(n, alpha, d, inc, s, inc);
if (blas->nrm2(n, s, inc) > delta)
{
info("cg reaches trust region boundary\n");
alpha = -alpha;
blas->axpy(n, alpha, d, inc, s, inc);
double std = blas->dot(n, s, inc, d, inc);
double sts = blas->dot(n, s, inc, s, inc);
double dtd = blas->dot(n, d, inc, d, inc);
double dsq = delta*delta;
double rad = sqrt(std*std + dtd*(dsq-sts));
if (std >= 0)
alpha = (dsq - sts)/(std + rad);
else
alpha = (rad - std)/dtd;
blas->axpy(n, alpha, d, inc, s, inc);
alpha = -alpha;
blas->axpy(n, alpha, Hd, inc, r, inc);
break;
}
alpha = -alpha;
blas->axpy(n, alpha, Hd, inc, r, inc);
rnewTrnew = blas->dot(n, r, inc, r, inc);
beta = rnewTrnew/rTr;
blas->scal(n, beta, d, inc);
blas->axpy(n, 1.0, r, inc, d, inc);
rTr = rnewTrnew;
}
delete[] d;
delete[] Hd;
return(cg_iter);
}
double TRON::norm_inf(int n, double *x)
{
double dmax = fabs(x[0]);
for (int i=1; i<n; i++)
if (fabs(x[i]) >= dmax)
dmax = fabs(x[i]);
return(dmax);
}
void TRON::set_print_string(void (*print_string) (const char *buf))
{
tron_print_string = print_string;
}

View File

@@ -0,0 +1,37 @@
#ifndef _TRON_H
#define _TRON_H
#include "_cython_blas_helpers.h"
class function
{
public:
virtual double fun(double *w) = 0 ;
virtual void grad(double *w, double *g) = 0 ;
virtual void Hv(double *s, double *Hs) = 0 ;
virtual int get_nr_variable(void) = 0 ;
virtual ~function(void){}
};
class TRON
{
public:
TRON(const function *fun_obj, double eps = 0.1, int max_iter = 1000, BlasFunctions *blas = 0);
~TRON();
int tron(double *w);
void set_print_string(void (*i_print) (const char *buf));
private:
int trcg(double delta, double *g, double *s, double *r);
double norm_inf(int n, double *x);
double eps;
int max_iter;
function *fun_obj;
BlasFunctions *blas;
void info(const char *fmt,...);
void (*tron_print_string)(const char *buf);
};
#endif

View File

@@ -0,0 +1,11 @@
Changes to Libsvm
This is here mainly as checklist for incorporation of new versions of libsvm.
* Add copyright to files svm.cpp and svm.h
* Add random_seed support and call to srand in fit function
* Improved random number generator (fix on windows, enhancement on other
platforms). See <https://github.com/scikit-learn/scikit-learn/pull/13511#issuecomment-481729756>
* invoke scipy blas api for svm kernel function to improve performance with speedup rate of 1.5X to 2X for dense data only. See <https://github.com/scikit-learn/scikit-learn/pull/16530>
* Expose the number of iterations run in optimization. See <https://github.com/scikit-learn/scikit-learn/pull/21408>
The changes made with respect to upstream are detailed in the heading of svm.cpp

View File

@@ -0,0 +1,9 @@
#ifndef _SVM_CYTHON_BLAS_HELPERS_H
#define _SVM_CYTHON_BLAS_HELPERS_H
typedef double (*dot_func)(int, const double*, int, const double*, int);
typedef struct BlasFunctions{
dot_func dot;
} BlasFunctions;
#endif

View File

@@ -0,0 +1,425 @@
#include <stdlib.h>
#define PY_SSIZE_T_CLEAN
#include <Python.h>
#include "svm.h"
#include "_svm_cython_blas_helpers.h"
#ifndef MAX
#define MAX(x, y) (((x) > (y)) ? (x) : (y))
#endif
/*
* Some helper methods for libsvm bindings.
*
* We need to access from python some parameters stored in svm_model
* but libsvm does not expose this structure, so we define it here
* along some utilities to convert from numpy arrays.
*
* Authors: The scikit-learn developers
* SPDX-License-Identifier: BSD-3-Clause
*
*/
/*
* Convert matrix to sparse representation suitable for libsvm. x is
* expected to be an array of length nrow*ncol.
*
* Typically the matrix will be dense, so we speed up the routine for
* this case. We create a temporary array temp that collects non-zero
* elements and after we just memcpy that to the proper array.
*
* Special care must be taken with indinces, since libsvm indices start
* at 1 and not at 0.
*
* Strictly speaking, the C standard does not require that structs are
* contiguous, but in practice its a reasonable assumption.
*
*/
struct svm_node *dense_to_libsvm (double *x, Py_ssize_t *dims)
{
struct svm_node *node;
Py_ssize_t len_row = dims[1];
double *tx = x;
int i;
node = malloc (dims[0] * sizeof(struct svm_node));
if (node == NULL) return NULL;
for (i=0; i<dims[0]; ++i) {
node[i].values = tx;
node[i].dim = (int) len_row;
node[i].ind = i; /* only used if kernel=precomputed, but not
too much overhead */
tx += len_row;
}
return node;
}
/*
* Fill an svm_parameter struct.
*/
void set_parameter(struct svm_parameter *param, int svm_type, int kernel_type, int degree,
double gamma, double coef0, double nu, double cache_size, double C,
double eps, double p, int shrinking, int probability, int nr_weight,
char *weight_label, char *weight, int max_iter, int random_seed)
{
param->svm_type = svm_type;
param->kernel_type = kernel_type;
param->degree = degree;
param->coef0 = coef0;
param->nu = nu;
param->cache_size = cache_size;
param->C = C;
param->eps = eps;
param->p = p;
param->shrinking = shrinking;
param->probability = probability;
param->nr_weight = nr_weight;
param->weight_label = (int *) weight_label;
param->weight = (double *) weight;
param->gamma = gamma;
param->max_iter = max_iter;
param->random_seed = random_seed;
}
/*
* Fill an svm_problem struct. problem->x will be malloc'd.
*/
void set_problem(struct svm_problem *problem, char *X, char *Y, char *sample_weight, Py_ssize_t *dims, int kernel_type)
{
if (problem == NULL) return;
problem->l = (int) dims[0]; /* number of samples */
problem->y = (double *) Y;
problem->x = dense_to_libsvm((double *) X, dims); /* implicit call to malloc */
problem->W = (double *) sample_weight;
}
/*
* Create and return an instance of svm_model.
*
* The copy of model->sv_coef should be straightforward, but
* unfortunately to represent a matrix numpy and libsvm use different
* approaches, so it requires some iteration.
*
* Possible issue: on 64 bits, the number of columns that numpy can
* store is a long, but libsvm enforces this number (model->l) to be
* an int, so we might have numpy matrices that do not fit into libsvm's
* data structure.
*
*/
struct svm_model *set_model(struct svm_parameter *param, int nr_class,
char *SV, Py_ssize_t *SV_dims,
char *support, Py_ssize_t *support_dims,
Py_ssize_t *sv_coef_strides,
char *sv_coef, char *rho, char *nSV,
char *probA, char *probB)
{
struct svm_model *model;
double *dsv_coef = (double *) sv_coef;
int i, m;
m = nr_class * (nr_class-1)/2;
if ((model = malloc(sizeof(struct svm_model))) == NULL)
goto model_error;
if ((model->nSV = malloc(nr_class * sizeof(int))) == NULL)
goto nsv_error;
if ((model->label = malloc(nr_class * sizeof(int))) == NULL)
goto label_error;
if ((model->sv_coef = malloc((nr_class-1)*sizeof(double *))) == NULL)
goto sv_coef_error;
if ((model->rho = malloc( m * sizeof(double))) == NULL)
goto rho_error;
// This is only allocated in dynamic memory while training.
model->n_iter = NULL;
model->nr_class = nr_class;
model->param = *param;
model->l = (int) support_dims[0];
if (param->kernel_type == PRECOMPUTED) {
if ((model->SV = malloc ((model->l) * sizeof(struct svm_node))) == NULL)
goto SV_error;
for (i=0; i<model->l; ++i) {
model->SV[i].ind = ((int *) support)[i];
model->SV[i].values = NULL;
}
} else {
model->SV = dense_to_libsvm((double *) SV, SV_dims);
}
/*
* regression and one-class does not use nSV, label.
* TODO: does this provoke memory leaks (we just malloc'ed them)?
*/
if (param->svm_type < 2) {
memcpy(model->nSV, nSV, model->nr_class * sizeof(int));
for(i=0; i < model->nr_class; i++)
model->label[i] = i;
}
for (i=0; i < model->nr_class-1; i++) {
model->sv_coef[i] = dsv_coef + i*(model->l);
}
for (i=0; i<m; ++i) {
(model->rho)[i] = -((double *) rho)[i];
}
/*
* just to avoid segfaults, these features are not wrapped but
* svm_destroy_model will try to free them.
*/
if (param->probability) {
if ((model->probA = malloc(m * sizeof(double))) == NULL)
goto probA_error;
memcpy(model->probA, probA, m * sizeof(double));
if ((model->probB = malloc(m * sizeof(double))) == NULL)
goto probB_error;
memcpy(model->probB, probB, m * sizeof(double));
} else {
model->probA = NULL;
model->probB = NULL;
}
/* We'll free SV ourselves */
model->free_sv = 0;
return model;
probB_error:
free(model->probA);
probA_error:
free(model->SV);
SV_error:
free(model->rho);
rho_error:
free(model->sv_coef);
sv_coef_error:
free(model->label);
label_error:
free(model->nSV);
nsv_error:
free(model);
model_error:
return NULL;
}
/*
* Get the number of support vectors in a model.
*/
Py_ssize_t get_l(struct svm_model *model)
{
return (Py_ssize_t) model->l;
}
/*
* Get the number of classes in a model, = 2 in regression/one class
* svm.
*/
Py_ssize_t get_nr(struct svm_model *model)
{
return (Py_ssize_t) model->nr_class;
}
/*
* Get the number of iterations run in optimization
*/
void copy_n_iter(char *data, struct svm_model *model)
{
const int n_models = MAX(1, model->nr_class * (model->nr_class-1) / 2);
memcpy(data, model->n_iter, n_models * sizeof(int));
}
/*
* Some helpers to convert from libsvm sparse data structures
* model->sv_coef is a double **, whereas data is just a double *,
* so we have to do some stupid copying.
*/
void copy_sv_coef(char *data, struct svm_model *model)
{
int i, len = model->nr_class-1;
double *temp = (double *) data;
for(i=0; i<len; ++i) {
memcpy(temp, model->sv_coef[i], sizeof(double) * model->l);
temp += model->l;
}
}
void copy_intercept(char *data, struct svm_model *model, Py_ssize_t *dims)
{
/* intercept = -rho */
Py_ssize_t i, n = dims[0];
double t, *ddata = (double *) data;
for (i=0; i<n; ++i) {
t = model->rho[i];
/* we do this to avoid ugly -0.0 */
*ddata = (t != 0) ? -t : 0;
++ddata;
}
}
/*
* This is a bit more complex since SV are stored as sparse
* structures, so we have to do the conversion on the fly and also
* iterate fast over data.
*/
void copy_SV(char *data, struct svm_model *model, Py_ssize_t *dims)
{
int i, n = model->l;
double *tdata = (double *) data;
int dim = model->SV[0].dim;
for (i=0; i<n; ++i) {
memcpy (tdata, model->SV[i].values, dim * sizeof(double));
tdata += dim;
}
}
void copy_support (char *data, struct svm_model *model)
{
memcpy (data, model->sv_ind, (model->l) * sizeof(int));
}
/*
* copy svm_model.nSV, an array with the number of SV for each class
* will be NULL in the case of SVR, OneClass
*/
void copy_nSV(char *data, struct svm_model *model)
{
if (model->label == NULL) return;
memcpy(data, model->nSV, model->nr_class * sizeof(int));
}
void copy_probA(char *data, struct svm_model *model, Py_ssize_t * dims)
{
memcpy(data, model->probA, dims[0] * sizeof(double));
}
void copy_probB(char *data, struct svm_model *model, Py_ssize_t * dims)
{
memcpy(data, model->probB, dims[0] * sizeof(double));
}
/*
* Predict using model.
*
* It will return -1 if we run out of memory.
*/
int copy_predict(char *predict, struct svm_model *model, Py_ssize_t *predict_dims,
char *dec_values, BlasFunctions *blas_functions)
{
double *t = (double *) dec_values;
struct svm_node *predict_nodes;
Py_ssize_t i;
predict_nodes = dense_to_libsvm((double *) predict, predict_dims);
if (predict_nodes == NULL)
return -1;
for(i=0; i<predict_dims[0]; ++i) {
*t = svm_predict(model, &predict_nodes[i], blas_functions);
++t;
}
free(predict_nodes);
return 0;
}
int copy_predict_values(char *predict, struct svm_model *model,
Py_ssize_t *predict_dims, char *dec_values, int nr_class, BlasFunctions *blas_functions)
{
Py_ssize_t i;
struct svm_node *predict_nodes;
predict_nodes = dense_to_libsvm((double *) predict, predict_dims);
if (predict_nodes == NULL)
return -1;
for(i=0; i<predict_dims[0]; ++i) {
svm_predict_values(model, &predict_nodes[i],
((double *) dec_values) + i*nr_class,
blas_functions);
}
free(predict_nodes);
return 0;
}
int copy_predict_proba(char *predict, struct svm_model *model, Py_ssize_t *predict_dims,
char *dec_values, BlasFunctions *blas_functions)
{
Py_ssize_t i, n, m;
struct svm_node *predict_nodes;
n = predict_dims[0];
m = (Py_ssize_t) model->nr_class;
predict_nodes = dense_to_libsvm((double *) predict, predict_dims);
if (predict_nodes == NULL)
return -1;
for(i=0; i<n; ++i) {
svm_predict_probability(model, &predict_nodes[i],
((double *) dec_values) + i*m,
blas_functions);
}
free(predict_nodes);
return 0;
}
/*
* Some free routines. Some of them are nontrivial since a lot of
* sharing happens across objects (they *must* be called in the
* correct order)
*/
int free_model(struct svm_model *model)
{
/* like svm_free_and_destroy_model, but does not free sv_coef[i] */
if (model == NULL) return -1;
free(model->SV);
/* We don't free sv_ind and n_iter, since we did not create them in
set_model */
/* free(model->sv_ind);
* free(model->n_iter);
*/
free(model->sv_coef);
free(model->rho);
free(model->label);
free(model->probA);
free(model->probB);
free(model->nSV);
free(model);
return 0;
}
int free_param(struct svm_parameter *param)
{
if (param == NULL) return -1;
free(param);
return 0;
}
/* borrowed from original libsvm code */
static void print_null(const char *s) {}
static void print_string_stdout(const char *s)
{
fputs(s,stdout);
fflush(stdout);
}
/* provide convenience wrapper */
void set_verbosity(int verbosity_flag){
if (verbosity_flag)
svm_set_print_string_function(&print_string_stdout);
else
svm_set_print_string_function(&print_null);
}

View File

@@ -0,0 +1,472 @@
#include <stdlib.h>
#define PY_SSIZE_T_CLEAN
#include <Python.h>
#include "svm.h"
#include "_svm_cython_blas_helpers.h"
#ifndef MAX
#define MAX(x, y) (((x) > (y)) ? (x) : (y))
#endif
/*
* Convert scipy.sparse.csr to libsvm's sparse data structure
*/
struct svm_csr_node **csr_to_libsvm (double *values, int* indices, int* indptr, int n_samples)
{
struct svm_csr_node **sparse, *temp;
int i, j=0, k=0, n;
sparse = malloc (n_samples * sizeof(struct svm_csr_node *));
if (sparse == NULL)
return NULL;
for (i=0; i<n_samples; ++i) {
n = indptr[i+1] - indptr[i]; /* count elements in row i */
temp = malloc ((n+1) * sizeof(struct svm_csr_node));
if (temp == NULL) {
for (j=0; j<i; j++)
free(sparse[j]);
free(sparse);
return NULL;
}
for (j=0; j<n; ++j) {
temp[j].value = values[k];
temp[j].index = indices[k] + 1; /* libsvm uses 1-based indexing */
++k;
}
/* set sentinel */
temp[n].index = -1;
sparse[i] = temp;
}
return sparse;
}
struct svm_parameter * set_parameter(int svm_type, int kernel_type, int degree,
double gamma, double coef0, double nu, double cache_size, double C,
double eps, double p, int shrinking, int probability, int nr_weight,
char *weight_label, char *weight, int max_iter, int random_seed)
{
struct svm_parameter *param;
param = malloc(sizeof(struct svm_parameter));
if (param == NULL) return NULL;
param->svm_type = svm_type;
param->kernel_type = kernel_type;
param->degree = degree;
param->coef0 = coef0;
param->nu = nu;
param->cache_size = cache_size;
param->C = C;
param->eps = eps;
param->p = p;
param->shrinking = shrinking;
param->probability = probability;
param->nr_weight = nr_weight;
param->weight_label = (int *) weight_label;
param->weight = (double *) weight;
param->gamma = gamma;
param->max_iter = max_iter;
param->random_seed = random_seed;
return param;
}
/*
* Create and return a svm_csr_problem struct from a scipy.sparse.csr matrix. It is
* up to the user to free resulting structure.
*
* TODO: precomputed kernel.
*/
struct svm_csr_problem * csr_set_problem (char *values, Py_ssize_t *n_indices,
char *indices, Py_ssize_t *n_indptr, char *indptr, char *Y,
char *sample_weight, int kernel_type) {
struct svm_csr_problem *problem;
problem = malloc (sizeof (struct svm_csr_problem));
if (problem == NULL) return NULL;
problem->l = (int) n_indptr[0] - 1;
problem->y = (double *) Y;
problem->x = csr_to_libsvm((double *) values, (int *) indices,
(int *) indptr, problem->l);
/* should be removed once we implement weighted samples */
problem->W = (double *) sample_weight;
if (problem->x == NULL) {
free(problem);
return NULL;
}
return problem;
}
struct svm_csr_model *csr_set_model(struct svm_parameter *param, int nr_class,
char *SV_data, Py_ssize_t *SV_indices_dims,
char *SV_indices, Py_ssize_t *SV_indptr_dims,
char *SV_intptr,
char *sv_coef, char *rho, char *nSV,
char *probA, char *probB)
{
struct svm_csr_model *model;
double *dsv_coef = (double *) sv_coef;
int i, m;
m = nr_class * (nr_class-1)/2;
if ((model = malloc(sizeof(struct svm_csr_model))) == NULL)
goto model_error;
if ((model->nSV = malloc(nr_class * sizeof(int))) == NULL)
goto nsv_error;
if ((model->label = malloc(nr_class * sizeof(int))) == NULL)
goto label_error;
if ((model->sv_coef = malloc((nr_class-1)*sizeof(double *))) == NULL)
goto sv_coef_error;
if ((model->rho = malloc( m * sizeof(double))) == NULL)
goto rho_error;
// This is only allocated in dynamic memory while training.
model->n_iter = NULL;
/* in the case of precomputed kernels we do not use
dense_to_precomputed because we don't want the leading 0. As
indices start at 1 (not at 0) this will work */
model->l = (int) SV_indptr_dims[0] - 1;
model->SV = csr_to_libsvm((double *) SV_data, (int *) SV_indices,
(int *) SV_intptr, model->l);
model->nr_class = nr_class;
model->param = *param;
/*
* regression and one-class does not use nSV, label.
*/
if (param->svm_type < 2) {
memcpy(model->nSV, nSV, model->nr_class * sizeof(int));
for(i=0; i < model->nr_class; i++)
model->label[i] = i;
}
for (i=0; i < model->nr_class-1; i++) {
/*
* We cannot squash all this mallocs in a single call since
* svm_destroy_model will free each element of the array.
*/
if ((model->sv_coef[i] = malloc((model->l) * sizeof(double))) == NULL) {
int j;
for (j=0; j<i; j++)
free(model->sv_coef[j]);
goto sv_coef_i_error;
}
memcpy(model->sv_coef[i], dsv_coef, (model->l) * sizeof(double));
dsv_coef += model->l;
}
for (i=0; i<m; ++i) {
(model->rho)[i] = -((double *) rho)[i];
}
/*
* just to avoid segfaults, these features are not wrapped but
* svm_destroy_model will try to free them.
*/
if (param->probability) {
if ((model->probA = malloc(m * sizeof(double))) == NULL)
goto probA_error;
memcpy(model->probA, probA, m * sizeof(double));
if ((model->probB = malloc(m * sizeof(double))) == NULL)
goto probB_error;
memcpy(model->probB, probB, m * sizeof(double));
} else {
model->probA = NULL;
model->probB = NULL;
}
/* We'll free SV ourselves */
model->free_sv = 0;
return model;
probB_error:
free(model->probA);
probA_error:
for (i=0; i < model->nr_class-1; i++)
free(model->sv_coef[i]);
sv_coef_i_error:
free(model->rho);
rho_error:
free(model->sv_coef);
sv_coef_error:
free(model->label);
label_error:
free(model->nSV);
nsv_error:
free(model);
model_error:
return NULL;
}
/*
* Copy support vectors into a scipy.sparse.csr matrix
*/
int csr_copy_SV (char *data, Py_ssize_t *n_indices,
char *indices, Py_ssize_t *n_indptr, char *indptr,
struct svm_csr_model *model, int n_features)
{
int i, j, k=0, index;
double *dvalues = (double *) data;
int *iindices = (int *) indices;
int *iindptr = (int *) indptr;
iindptr[0] = 0;
for (i=0; i<model->l; ++i) { /* iterate over support vectors */
index = model->SV[i][0].index;
for(j=0; index >=0 ; ++j) {
iindices[k] = index - 1;
dvalues[k] = model->SV[i][j].value;
index = model->SV[i][j+1].index;
++k;
}
iindptr[i+1] = k;
}
return 0;
}
/* get number of nonzero coefficients in support vectors */
Py_ssize_t get_nonzero_SV (struct svm_csr_model *model) {
int i, j;
Py_ssize_t count=0;
for (i=0; i<model->l; ++i) {
j = 0;
while (model->SV[i][j].index != -1) {
++j;
++count;
}
}
return count;
}
/*
* Predict using a model, where data is expected to be encoded into a csr matrix.
*/
int csr_copy_predict (Py_ssize_t *data_size, char *data, Py_ssize_t *index_size,
char *index, Py_ssize_t *intptr_size, char *intptr, struct svm_csr_model *model,
char *dec_values, BlasFunctions *blas_functions) {
double *t = (double *) dec_values;
struct svm_csr_node **predict_nodes;
Py_ssize_t i;
predict_nodes = csr_to_libsvm((double *) data, (int *) index,
(int *) intptr, intptr_size[0]-1);
if (predict_nodes == NULL)
return -1;
for(i=0; i < intptr_size[0] - 1; ++i) {
*t = svm_csr_predict(model, predict_nodes[i], blas_functions);
free(predict_nodes[i]);
++t;
}
free(predict_nodes);
return 0;
}
int csr_copy_predict_values (Py_ssize_t *data_size, char *data, Py_ssize_t *index_size,
char *index, Py_ssize_t *intptr_size, char *intptr, struct svm_csr_model *model,
char *dec_values, int nr_class, BlasFunctions *blas_functions) {
struct svm_csr_node **predict_nodes;
Py_ssize_t i;
predict_nodes = csr_to_libsvm((double *) data, (int *) index,
(int *) intptr, intptr_size[0]-1);
if (predict_nodes == NULL)
return -1;
for(i=0; i < intptr_size[0] - 1; ++i) {
svm_csr_predict_values(model, predict_nodes[i],
((double *) dec_values) + i*nr_class,
blas_functions);
free(predict_nodes[i]);
}
free(predict_nodes);
return 0;
}
int csr_copy_predict_proba (Py_ssize_t *data_size, char *data, Py_ssize_t *index_size,
char *index, Py_ssize_t *intptr_size, char *intptr, struct svm_csr_model *model,
char *dec_values, BlasFunctions *blas_functions) {
struct svm_csr_node **predict_nodes;
Py_ssize_t i;
int m = model->nr_class;
predict_nodes = csr_to_libsvm((double *) data, (int *) index,
(int *) intptr, intptr_size[0]-1);
if (predict_nodes == NULL)
return -1;
for(i=0; i < intptr_size[0] - 1; ++i) {
svm_csr_predict_probability(
model, predict_nodes[i], ((double *) dec_values) + i*m, blas_functions);
free(predict_nodes[i]);
}
free(predict_nodes);
return 0;
}
Py_ssize_t get_nr(struct svm_csr_model *model)
{
return (Py_ssize_t) model->nr_class;
}
void copy_intercept(char *data, struct svm_csr_model *model, Py_ssize_t *dims)
{
/* intercept = -rho */
Py_ssize_t i, n = dims[0];
double t, *ddata = (double *) data;
for (i=0; i<n; ++i) {
t = model->rho[i];
/* we do this to avoid ugly -0.0 */
*ddata = (t != 0) ? -t : 0;
++ddata;
}
}
void copy_support (char *data, struct svm_csr_model *model)
{
memcpy (data, model->sv_ind, (model->l) * sizeof(int));
}
/*
* Some helpers to convert from libsvm sparse data structures
* model->sv_coef is a double **, whereas data is just a double *,
* so we have to do some stupid copying.
*/
void copy_sv_coef(char *data, struct svm_csr_model *model)
{
int i, len = model->nr_class-1;
double *temp = (double *) data;
for(i=0; i<len; ++i) {
memcpy(temp, model->sv_coef[i], sizeof(double) * model->l);
temp += model->l;
}
}
/*
* Get the number of iterations run in optimization
*/
void copy_n_iter(char *data, struct svm_csr_model *model)
{
const int n_models = MAX(1, model->nr_class * (model->nr_class-1) / 2);
memcpy(data, model->n_iter, n_models * sizeof(int));
}
/*
* Get the number of support vectors in a model.
*/
Py_ssize_t get_l(struct svm_csr_model *model)
{
return (Py_ssize_t) model->l;
}
void copy_nSV(char *data, struct svm_csr_model *model)
{
if (model->label == NULL) return;
memcpy(data, model->nSV, model->nr_class * sizeof(int));
}
/*
* same as above with model->label
* TODO: merge in the cython layer
*/
void copy_label(char *data, struct svm_csr_model *model)
{
if (model->label == NULL) return;
memcpy(data, model->label, model->nr_class * sizeof(int));
}
void copy_probA(char *data, struct svm_csr_model *model, Py_ssize_t * dims)
{
memcpy(data, model->probA, dims[0] * sizeof(double));
}
void copy_probB(char *data, struct svm_csr_model *model, Py_ssize_t * dims)
{
memcpy(data, model->probB, dims[0] * sizeof(double));
}
/*
* Some free routines. Some of them are nontrivial since a lot of
* sharing happens across objects (they *must* be called in the
* correct order)
*/
int free_problem(struct svm_csr_problem *problem)
{
int i;
if (problem == NULL) return -1;
for (i=0; i<problem->l; ++i)
free (problem->x[i]);
free (problem->x);
free (problem);
return 0;
}
int free_model(struct svm_csr_model *model)
{
/* like svm_free_and_destroy_model, but does not free sv_coef[i] */
/* We don't free n_iter, since we did not create them in set_model. */
if (model == NULL) return -1;
free(model->SV);
free(model->sv_coef);
free(model->rho);
free(model->label);
free(model->probA);
free(model->probB);
free(model->nSV);
free(model);
return 0;
}
int free_param(struct svm_parameter *param)
{
if (param == NULL) return -1;
free(param);
return 0;
}
int free_model_SV(struct svm_csr_model *model)
{
int i;
for (i=model->l-1; i>=0; --i) free(model->SV[i]);
/* svn_destroy_model frees model->SV */
for (i=0; i < model->nr_class-1 ; ++i) free(model->sv_coef[i]);
/* svn_destroy_model frees model->sv_coef */
return 0;
}
/* borrowed from original libsvm code */
static void print_null(const char *s) {}
static void print_string_stdout(const char *s)
{
fputs(s,stdout);
fflush(stdout);
}
/* provide convenience wrapper */
void set_verbosity(int verbosity_flag){
if (verbosity_flag)
svm_set_print_string_function(&print_string_stdout);
else
svm_set_print_string_function(&print_null);
}

View File

@@ -0,0 +1,8 @@
/* this is a hack to generate libsvm with both sparse and dense
methods in the same binary*/
#define _DENSE_REP
#include "svm.cpp"
#undef _DENSE_REP
#include "svm.cpp"

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,176 @@
#ifndef _LIBSVM_H
#define _LIBSVM_H
#define LIBSVM_VERSION 310
#ifdef __cplusplus
extern "C" {
#endif
#include "_svm_cython_blas_helpers.h"
struct svm_node
{
int dim;
int ind; /* index. A bit redundant, but needed if using a
precomputed kernel */
double *values;
};
struct svm_problem
{
int l;
double *y;
struct svm_node *x;
double *W; /* instance weights */
};
struct svm_csr_node
{
int index;
double value;
};
struct svm_csr_problem
{
int l;
double *y;
struct svm_csr_node **x;
double *W; /* instance weights */
};
enum { C_SVC, NU_SVC, ONE_CLASS, EPSILON_SVR, NU_SVR }; /* svm_type */
enum { LINEAR, POLY, RBF, SIGMOID, PRECOMPUTED }; /* kernel_type */
struct svm_parameter
{
int svm_type;
int kernel_type;
int degree; /* for poly */
double gamma; /* for poly/rbf/sigmoid */
double coef0; /* for poly/sigmoid */
/* these are for training only */
double cache_size; /* in MB */
double eps; /* stopping criteria */
double C; /* for C_SVC, EPSILON_SVR and NU_SVR */
int nr_weight; /* for C_SVC */
int *weight_label; /* for C_SVC */
double* weight; /* for C_SVC */
double nu; /* for NU_SVC, ONE_CLASS, and NU_SVR */
double p; /* for EPSILON_SVR */
int shrinking; /* use the shrinking heuristics */
int probability; /* do probability estimates */
int max_iter; /* ceiling on Solver runtime */
int random_seed; /* seed for random number generator */
};
//
// svm_model
//
struct svm_model
{
struct svm_parameter param; /* parameter */
int nr_class; /* number of classes, = 2 in regression/one class svm */
int l; /* total #SV */
struct svm_node *SV; /* SVs (SV[l]) */
double **sv_coef; /* coefficients for SVs in decision functions (sv_coef[k-1][l]) */
int *n_iter; /* number of iterations run by the optimization routine to fit the model */
int *sv_ind; /* index of support vectors */
double *rho; /* constants in decision functions (rho[k*(k-1)/2]) */
double *probA; /* pairwise probability information */
double *probB;
/* for classification only */
int *label; /* label of each class (label[k]) */
int *nSV; /* number of SVs for each class (nSV[k]) */
/* nSV[0] + nSV[1] + ... + nSV[k-1] = l */
/* XXX */
int free_sv; /* 1 if svm_model is created by svm_load_model*/
/* 0 if svm_model is created by svm_train */
};
struct svm_csr_model
{
struct svm_parameter param; /* parameter */
int nr_class; /* number of classes, = 2 in regression/one class svm */
int l; /* total #SV */
struct svm_csr_node **SV; /* SVs (SV[l]) */
double **sv_coef; /* coefficients for SVs in decision functions (sv_coef[k-1][l]) */
int *n_iter; /* number of iterations run by the optimization routine to fit the model */
int *sv_ind; /* index of support vectors */
double *rho; /* constants in decision functions (rho[k*(k-1)/2]) */
double *probA; /* pairwise probability information */
double *probB;
/* for classification only */
int *label; /* label of each class (label[k]) */
int *nSV; /* number of SVs for each class (nSV[k]) */
/* nSV[0] + nSV[1] + ... + nSV[k-1] = l */
/* XXX */
int free_sv; /* 1 if svm_model is created by svm_load_model*/
/* 0 if svm_model is created by svm_train */
};
/* svm_ functions are defined by libsvm_template.cpp from generic versions in svm.cpp */
struct svm_model *svm_train(const struct svm_problem *prob, const struct svm_parameter *param, int *status, BlasFunctions *blas_functions);
void svm_cross_validation(const struct svm_problem *prob, const struct svm_parameter *param, int nr_fold, double *target, BlasFunctions *blas_functions);
int svm_save_model(const char *model_file_name, const struct svm_model *model);
struct svm_model *svm_load_model(const char *model_file_name);
int svm_get_svm_type(const struct svm_model *model);
int svm_get_nr_class(const struct svm_model *model);
void svm_get_labels(const struct svm_model *model, int *label);
double svm_get_svr_probability(const struct svm_model *model);
double svm_predict_values(const struct svm_model *model, const struct svm_node *x, double* dec_values, BlasFunctions *blas_functions);
double svm_predict(const struct svm_model *model, const struct svm_node *x, BlasFunctions *blas_functions);
double svm_predict_probability(const struct svm_model *model, const struct svm_node *x, double* prob_estimates, BlasFunctions *blas_functions);
void svm_free_model_content(struct svm_model *model_ptr);
void svm_free_and_destroy_model(struct svm_model **model_ptr_ptr);
void svm_destroy_param(struct svm_parameter *param);
const char *svm_check_parameter(const struct svm_problem *prob, const struct svm_parameter *param);
void svm_set_print_string_function(void (*print_func)(const char *));
/* sparse version */
/* svm_csr_ functions are defined by libsvm_template.cpp from generic versions in svm.cpp */
struct svm_csr_model *svm_csr_train(const struct svm_csr_problem *prob, const struct svm_parameter *param, int *status, BlasFunctions *blas_functions);
void svm_csr_cross_validation(const struct svm_csr_problem *prob, const struct svm_parameter *param, int nr_fold, double *target, BlasFunctions *blas_functions);
int svm_csr_get_svm_type(const struct svm_csr_model *model);
int svm_csr_get_nr_class(const struct svm_csr_model *model);
void svm_csr_get_labels(const struct svm_csr_model *model, int *label);
double svm_csr_get_svr_probability(const struct svm_csr_model *model);
double svm_csr_predict_values(const struct svm_csr_model *model, const struct svm_csr_node *x, double* dec_values, BlasFunctions *blas_functions);
double svm_csr_predict(const struct svm_csr_model *model, const struct svm_csr_node *x, BlasFunctions *blas_functions);
double svm_csr_predict_probability(const struct svm_csr_model *model, const struct svm_csr_node *x, double* prob_estimates, BlasFunctions *blas_functions);
void svm_csr_free_model_content(struct svm_csr_model *model_ptr);
void svm_csr_free_and_destroy_model(struct svm_csr_model **model_ptr_ptr);
void svm_csr_destroy_param(struct svm_parameter *param);
const char *svm_csr_check_parameter(const struct svm_csr_problem *prob, const struct svm_parameter *param);
/* end sparse version */
#ifdef __cplusplus
}
#endif
#endif /* _LIBSVM_H */

View File

@@ -0,0 +1,59 @@
/*
Creation, 2020:
- New random number generator using a mersenne twister + tweaked lemire
postprocessor. This fixed a convergence issue on windows targets for
libsvm and liblinear.
Sylvain Marie, Schneider Electric
See <https://github.com/scikit-learn/scikit-learn/pull/13511#issuecomment-481729756>
*/
#ifndef _NEWRAND_H
#define _NEWRAND_H
#ifdef __cplusplus
#include <random> // needed for cython to generate a .cpp file from newrand.h
extern "C" {
#endif
// Scikit-Learn-specific random number generator replacing `rand()` originally
// used in LibSVM / LibLinear, to ensure the same behaviour on windows-linux,
// with increased speed
// - (1) Init a `mt_rand` object
std::mt19937 mt_rand(std::mt19937::default_seed);
// - (2) public `set_seed()` function that should be used instead of `srand()` to set a new seed.
void set_seed(unsigned custom_seed) {
mt_rand.seed(custom_seed);
}
// - (3) New internal `bounded_rand_int` function, used instead of rand() everywhere.
inline uint32_t bounded_rand_int(uint32_t range) {
// "LibSVM / LibLinear Original way" - make a 31bit positive
// random number and use modulo to make it fit in the range
// return abs( (int)mt_rand()) % range;
// "Better way": tweaked Lemire post-processor
// from http://www.pcg-random.org/posts/bounded-rands.html
uint32_t x = mt_rand();
uint64_t m = uint64_t(x) * uint64_t(range);
uint32_t l = uint32_t(m);
if (l < range) {
uint32_t t = -range;
if (t >= range) {
t -= range;
if (t >= range)
t %= range;
}
while (l < t) {
x = mt_rand();
m = uint64_t(x) * uint64_t(range);
l = uint32_t(m);
}
}
return m >> 32;
}
#ifdef __cplusplus
}
#endif
#endif /* _NEWRAND_H */