#!/usr/bin/env python
"""
Copyright (c) 2000-2014 Chih-Chung Chang and Chih-Jen Lin
All rights reserved.
"""
import sys
import os
import pickle
from .svm import svm_node, svm_problem, svm_parameter, svm_model, toPyModel, SVM_TYPE, KERNEL_TYPE, \
gen_svm_nodearray, print_null
from ctypes import c_int, c_double
sys.path = [os.path.dirname(os.path.abspath(__file__))] + sys.path
[docs]def save_svm(svm_file_name, w, b):
"""
Save SVM weights and biais in PICKLE format
:return:
"""
if not os.path.exists(os.path.dirname(svm_file_name)):
os.makedirs(os.path.dirname(svm_file_name))
with open(svm_file_name, "wb") as f:
pickle.dump((w, b), f)
[docs]def read_svm(svm_file_name):
"""Read SVM model in PICKLE format
:param svm_file_name: name of the file to read from
"""
with open(svm_file_name, "rb") as f:
(w, b) = pickle.load(f)
return w, b
[docs]def svm_read_problem(data_file_name):
"""
svm_read_problem(data_file_name) -> [y, x]
Read LIBSVM-format data from data_file_name and return labels y
and data instances x.
:param data_file_name: name of the file to load from
"""
prob_y = []
prob_x = []
for line in open(data_file_name):
line = line.split(None, 1)
# In case an instance with all zero features
if len(line) == 1:
line += ['']
label, features = line
xi = {}
for e in features.split():
ind, val = e.split(":")
xi[int(ind)] = float(val)
prob_y += [float(label)]
prob_x += [xi]
return prob_y, prob_x
[docs]def svm_load_model(model_file_name):
"""
svm_load_model(model_file_name) -> model
Load a LIBSVM model from model_file_name and return.
:param model_file_name: file name to load from
"""
model = svm_load_model(model_file_name.encode())
if not model:
print("can't open model file %s" % model_file_name)
return None
model = toPyModel(model)
return model
[docs]def svm_save_model(model_file_name, model):
"""
svm_save_model(model_file_name, model) -> None
Save a LIBSVM model to the file model_file_name.
:param model_file_name: file name to write to
:param model: model to save
"""
svm_save_model(model_file_name.encode(), model)
[docs]def evaluations(ty, pv):
"""
evaluations(ty, pv) -> (ACC, MSE, SCC)
Calculate accuracy, mean squared error and squared correlation coefficient
using the true values (ty) and predicted values (pv).
"""
if len(ty) != len(pv):
raise ValueError("len(ty) must equal to len(pv)")
total_correct = total_error = 0
sumv = sumy = sumvv = sumyy = sumvy = 0
for v, y in zip(pv, ty):
if y == v:
total_correct += 1
total_error += (v-y)*(v-y)
sumv += v
sumy += y
sumvv += v*v
sumyy += y*y
sumvy += v*y
l = len(ty)
ACC = 100.0*total_correct/l
MSE = total_error/l
try:
SCC = ((l*sumvy-sumv*sumy)*(l*sumvy-sumv*sumy))/((l*sumvv-sumv*sumv)*(l*sumyy-sumy*sumy))
except:
SCC = float('nan')
return ACC, MSE, SCC
[docs]def svm_train(arg1, arg2=None, arg3=None):
"""
svm_train(y, x [, options]) -> model | ACC | MSE
svm_train(prob [, options]) -> model | ACC | MSE
svm_train(prob, param) -> model | ACC| MSE
Train an SVM model from data (y, x) or an svm_problem prob using
\'options\' or an svm_parameter param.
If \'-v\' is specified in \'options\' (i.e., cross validation)
either accuracy (ACC) or mean-squared error (MSE) is returned.
options:
- -s svm_type : set type of SVM (default 0)
- 0 -- C-SVC (multi-class classification)
- 1 -- nu-SVC (multi-class classification)
- 2 -- one-class SVM
- 3 -- epsilon-SVR (regression)
- 4 -- nu-SVR (regression)
- -t kernel_type : set type of kernel function (default 2)
- 0 -- linear: u\'\*v
- 1 -- polynomial: (gamma\*u\'\*v + coef0)^degree
- 2 -- radial basis function: exp(-gamma\*|u-v|^2)
- 3 -- sigmoid: tanh(gamma\*u\'\*v + coef0)
- 4 -- precomputed kernel (kernel values in training_set_file)
- -d degree : set degree in kernel function (default 3)
- -g gamma : set gamma in kernel function (default 1/num_features)
- -r coef0 : set coef0 in kernel function (default 0)
- -c cost : set the parameter C of C-SVC, epsilon-SVR, and nu-SVR (default 1)
- -n nu : set the parameter nu of nu-SVC, one-class SVM, and nu-SVR (default 0.5)
- -p epsilon : set the epsilon in loss function of epsilon-SVR (default 0.1)
- -m cachesize : set cache memory size in MB (default 100)
- -e epsilon : set tolerance of termination criterion (default 0.001)
- -h shrinking : whether to use the shrinking heuristics, 0 or 1 (default 1)
- -b probability_estimates : whether to train a SVC or SVR model for probability estimates, 0 or 1 (default 0)
- -wi weight : set the parameter C of class i to weight*C, for C-SVC (default 1)
- -v n: n-fold cross validation mode
- -q : quiet mode (no outputs)
"""
prob, param = None, None
if isinstance(arg1, (list, tuple)):
assert isinstance(arg2, (list, tuple))
y, x, options = arg1, arg2, arg3
param = svm_parameter(options)
prob = svm_problem(y, x, isKernel=(param.kernel_type == 'PRECOMPUTED'))
elif isinstance(arg1, svm_problem):
prob = arg1
if isinstance(arg2, svm_parameter):
param = arg2
else:
param = svm_parameter(arg2)
if prob is None or param is None:
raise TypeError("Wrong types for the arguments")
if param.kernel_type == 'PRECOMPUTED':
for xi in prob.x_space:
idx, val = xi[0].index, xi[0].value
if xi[0].index != 0:
raise ValueError('Wrong input format: first column must be 0:sample_serial_number')
if val <= 0 or val > prob.n:
raise ValueError('Wrong input format: sample_serial_number out of range')
if param.gamma == 0 and prob.n > 0:
param.gamma = 1.0 / prob.n
libsvm.svm_set_print_string_function(param.print_func)
err_msg = libsvm.svm_check_parameter(prob, param)
if err_msg:
raise ValueError('Error: %s' % err_msg)
if param.cross_validation:
l, nr_fold = prob.l, param.nr_fold
target = (c_double * l)() # pytype: disable=not-callable
libsvm.svm_cross_validation(prob, param, nr_fold, target)
ACC, MSE, SCC = evaluations(prob.y[:l], target[:l])
if param.svm_type in ['EPSILON_SVR', 'NU_SVR']:
print("Cross Validation Mean squared error = %g" % MSE)
print("Cross Validation Squared correlation coefficient = %g" % SCC)
return MSE
else:
print("Cross Validation Accuracy = %g%%" % ACC)
return ACC
else:
m = svm_train(prob, param)
m = toPyModel(m)
# If prob is destroyed, data including SVs pointed by m can remain.
m.x_space = prob.x_space
return m
[docs]def svm_predict(y, x, m, options=""):
"""svm_predict(y, x, m [, options]) -> (p_labels, p_acc, p_vals)
Predict data (y, x) with the SVM model m.
options:
- "-b" probability_estimates: whether to predict probability estimates,
0 or 1 (default 0); for one-class SVM only 0 is supported.
- "-q" : quiet mode (no outputs).
The return tuple contains
- p_labels: a list of predicted labels
- p_acc: a tuple including accuracy (for classification),
mean-squared error, and squared correlation coefficient (for regression).
- p_vals: a list of decision values or probability estimates
(if \'-b 1\' is specified). If k is the number of classes,
for decision values, each element includes results of predicting k(k-1)/2 binary-class SVMs.
For probabilities, each element contains k values indicating the probability that the testing instance
is in each class.
.. note:: that the order of classes here is the same as \'model.label\' field in the model structure.
"""
def info(s):
print(s)
predict_probability = 0
argv = options.split()
i = 0
while i < len(argv):
if argv[i] == '-b':
i += 1
predict_probability = int(argv[i])
elif argv[i] == '-q':
info = print_null
else:
raise ValueError("Wrong options")
i += 1
svm_type = m.get_svm_type()
is_prob_model = m.is_probability_model()
nr_class = m.get_nr_class()
pred_labels = []
pred_values = []
if predict_probability:
if not is_prob_model:
raise ValueError("Model does not support probabiliy estimates")
if svm_type in ['NU_SVR', 'EPSILON_SVR']:
info("Prob. model for test data: target value = predicted value + z,\n" +
"z: Laplace distribution e^(-|z|/sigma)/(2sigma),sigma=%g".format(m.get_svr_probability()))
nr_class = 0
prob_estimates = (c_double * nr_class)() # pytype: disable=not-callable
for xi in x:
xi, idx = gen_svm_nodearray(xi, isKernel=(m.param.kernel_type == 'PRECOMPUTED'))
label = libsvm.svm_predict_probability(m, xi, prob_estimates)
values = prob_estimates[:nr_class]
pred_labels += [label]
pred_values += [values]
else:
if is_prob_model:
info("Model supports probability estimates, but disabled in predicton.")
if svm_type in ('ONE_CLASS', 'EPSILON_SVR', 'NU_SVC'):
nr_classifier = 1
else:
nr_classifier = nr_class*(nr_class-1)//2
dec_values = (c_double * nr_classifier)() # pytype: disable=not-callable
for xi in x:
xi, idx = gen_svm_nodearray(xi, isKernel=(m.param.kernel_type == 'PRECOMPUTED'))
label = libsvm.svm_predict_values(m, xi, dec_values)
if nr_class == 1:
values = [1]
else:
values = dec_values[:nr_classifier]
pred_labels += [label]
pred_values += [values]
ACC, MSE, SCC = evaluations(y, pred_labels)
l = len(y)
if svm_type in ['EPSILON_SVR', 'NU_SVR']:
info("Mean squared error = %g (regression)" % MSE)
info("Squared correlation coefficient = %g (regression)" % SCC)
else:
info("Accuracy = %g%% (%d/%d) (classification)" % (ACC, int(l*ACC/100), l))
return pred_labels, (ACC, MSE, SCC), pred_values