Source code for libsvm.svmutil

#!/usr/bin/env python

"""
Copyright (c) 2000-2014 Chih-Chung Chang and Chih-Jen Lin
All rights reserved.
"""

import sys
import os
import pickle
from .svm import svm_node, svm_problem, svm_parameter, svm_model, toPyModel, SVM_TYPE, KERNEL_TYPE, \
    gen_svm_nodearray, print_null
from ctypes import c_int, c_double

sys.path = [os.path.dirname(os.path.abspath(__file__))] + sys.path

[docs]def save_svm(svm_file_name, w, b): """ Save SVM weights and biais in PICKLE format :return: """ if not os.path.exists(os.path.dirname(svm_file_name)): os.makedirs(os.path.dirname(svm_file_name)) with open(svm_file_name, "wb") as f: pickle.dump((w, b), f)
[docs]def read_svm(svm_file_name): """Read SVM model in PICKLE format :param svm_file_name: name of the file to read from """ with open(svm_file_name, "rb") as f: (w, b) = pickle.load(f) return w, b
[docs]def svm_read_problem(data_file_name): """ svm_read_problem(data_file_name) -> [y, x] Read LIBSVM-format data from data_file_name and return labels y and data instances x. :param data_file_name: name of the file to load from """ prob_y = [] prob_x = [] for line in open(data_file_name): line = line.split(None, 1) # In case an instance with all zero features if len(line) == 1: line += [''] label, features = line xi = {} for e in features.split(): ind, val = e.split(":") xi[int(ind)] = float(val) prob_y += [float(label)] prob_x += [xi] return prob_y, prob_x
[docs]def svm_load_model(model_file_name): """ svm_load_model(model_file_name) -> model Load a LIBSVM model from model_file_name and return. :param model_file_name: file name to load from """ model = svm_load_model(model_file_name.encode()) if not model: print("can't open model file %s" % model_file_name) return None model = toPyModel(model) return model
[docs]def svm_save_model(model_file_name, model): """ svm_save_model(model_file_name, model) -> None Save a LIBSVM model to the file model_file_name. :param model_file_name: file name to write to :param model: model to save """ svm_save_model(model_file_name.encode(), model)
[docs]def evaluations(ty, pv): """ evaluations(ty, pv) -> (ACC, MSE, SCC) Calculate accuracy, mean squared error and squared correlation coefficient using the true values (ty) and predicted values (pv). """ if len(ty) != len(pv): raise ValueError("len(ty) must equal to len(pv)") total_correct = total_error = 0 sumv = sumy = sumvv = sumyy = sumvy = 0 for v, y in zip(pv, ty): if y == v: total_correct += 1 total_error += (v-y)*(v-y) sumv += v sumy += y sumvv += v*v sumyy += y*y sumvy += v*y l = len(ty) ACC = 100.0*total_correct/l MSE = total_error/l try: SCC = ((l*sumvy-sumv*sumy)*(l*sumvy-sumv*sumy))/((l*sumvv-sumv*sumv)*(l*sumyy-sumy*sumy)) except: SCC = float('nan') return ACC, MSE, SCC
[docs]def svm_train(arg1, arg2=None, arg3=None): """ svm_train(y, x [, options]) -> model | ACC | MSE svm_train(prob [, options]) -> model | ACC | MSE svm_train(prob, param) -> model | ACC| MSE Train an SVM model from data (y, x) or an svm_problem prob using \'options\' or an svm_parameter param. If \'-v\' is specified in \'options\' (i.e., cross validation) either accuracy (ACC) or mean-squared error (MSE) is returned. options: - -s svm_type : set type of SVM (default 0) - 0 -- C-SVC (multi-class classification) - 1 -- nu-SVC (multi-class classification) - 2 -- one-class SVM - 3 -- epsilon-SVR (regression) - 4 -- nu-SVR (regression) - -t kernel_type : set type of kernel function (default 2) - 0 -- linear: u\'\*v - 1 -- polynomial: (gamma\*u\'\*v + coef0)^degree - 2 -- radial basis function: exp(-gamma\*|u-v|^2) - 3 -- sigmoid: tanh(gamma\*u\'\*v + coef0) - 4 -- precomputed kernel (kernel values in training_set_file) - -d degree : set degree in kernel function (default 3) - -g gamma : set gamma in kernel function (default 1/num_features) - -r coef0 : set coef0 in kernel function (default 0) - -c cost : set the parameter C of C-SVC, epsilon-SVR, and nu-SVR (default 1) - -n nu : set the parameter nu of nu-SVC, one-class SVM, and nu-SVR (default 0.5) - -p epsilon : set the epsilon in loss function of epsilon-SVR (default 0.1) - -m cachesize : set cache memory size in MB (default 100) - -e epsilon : set tolerance of termination criterion (default 0.001) - -h shrinking : whether to use the shrinking heuristics, 0 or 1 (default 1) - -b probability_estimates : whether to train a SVC or SVR model for probability estimates, 0 or 1 (default 0) - -wi weight : set the parameter C of class i to weight*C, for C-SVC (default 1) - -v n: n-fold cross validation mode - -q : quiet mode (no outputs) """ prob, param = None, None if isinstance(arg1, (list, tuple)): assert isinstance(arg2, (list, tuple)) y, x, options = arg1, arg2, arg3 param = svm_parameter(options) prob = svm_problem(y, x, isKernel=(param.kernel_type == 'PRECOMPUTED')) elif isinstance(arg1, svm_problem): prob = arg1 if isinstance(arg2, svm_parameter): param = arg2 else: param = svm_parameter(arg2) if prob is None or param is None: raise TypeError("Wrong types for the arguments") if param.kernel_type == 'PRECOMPUTED': for xi in prob.x_space: idx, val = xi[0].index, xi[0].value if xi[0].index != 0: raise ValueError('Wrong input format: first column must be 0:sample_serial_number') if val <= 0 or val > prob.n: raise ValueError('Wrong input format: sample_serial_number out of range') if param.gamma == 0 and prob.n > 0: param.gamma = 1.0 / prob.n libsvm.svm_set_print_string_function(param.print_func) err_msg = libsvm.svm_check_parameter(prob, param) if err_msg: raise ValueError('Error: %s' % err_msg) if param.cross_validation: l, nr_fold = prob.l, param.nr_fold target = (c_double * l)() # pytype: disable=not-callable libsvm.svm_cross_validation(prob, param, nr_fold, target) ACC, MSE, SCC = evaluations(prob.y[:l], target[:l]) if param.svm_type in ['EPSILON_SVR', 'NU_SVR']: print("Cross Validation Mean squared error = %g" % MSE) print("Cross Validation Squared correlation coefficient = %g" % SCC) return MSE else: print("Cross Validation Accuracy = %g%%" % ACC) return ACC else: m = svm_train(prob, param) m = toPyModel(m) # If prob is destroyed, data including SVs pointed by m can remain. m.x_space = prob.x_space return m
[docs]def svm_predict(y, x, m, options=""): """svm_predict(y, x, m [, options]) -> (p_labels, p_acc, p_vals) Predict data (y, x) with the SVM model m. options: - "-b" probability_estimates: whether to predict probability estimates, 0 or 1 (default 0); for one-class SVM only 0 is supported. - "-q" : quiet mode (no outputs). The return tuple contains - p_labels: a list of predicted labels - p_acc: a tuple including accuracy (for classification), mean-squared error, and squared correlation coefficient (for regression). - p_vals: a list of decision values or probability estimates (if \'-b 1\' is specified). If k is the number of classes, for decision values, each element includes results of predicting k(k-1)/2 binary-class SVMs. For probabilities, each element contains k values indicating the probability that the testing instance is in each class. .. note:: that the order of classes here is the same as \'model.label\' field in the model structure. """ def info(s): print(s) predict_probability = 0 argv = options.split() i = 0 while i < len(argv): if argv[i] == '-b': i += 1 predict_probability = int(argv[i]) elif argv[i] == '-q': info = print_null else: raise ValueError("Wrong options") i += 1 svm_type = m.get_svm_type() is_prob_model = m.is_probability_model() nr_class = m.get_nr_class() pred_labels = [] pred_values = [] if predict_probability: if not is_prob_model: raise ValueError("Model does not support probabiliy estimates") if svm_type in ['NU_SVR', 'EPSILON_SVR']: info("Prob. model for test data: target value = predicted value + z,\n" + "z: Laplace distribution e^(-|z|/sigma)/(2sigma),sigma=%g".format(m.get_svr_probability())) nr_class = 0 prob_estimates = (c_double * nr_class)() # pytype: disable=not-callable for xi in x: xi, idx = gen_svm_nodearray(xi, isKernel=(m.param.kernel_type == 'PRECOMPUTED')) label = libsvm.svm_predict_probability(m, xi, prob_estimates) values = prob_estimates[:nr_class] pred_labels += [label] pred_values += [values] else: if is_prob_model: info("Model supports probability estimates, but disabled in predicton.") if svm_type in ('ONE_CLASS', 'EPSILON_SVR', 'NU_SVC'): nr_classifier = 1 else: nr_classifier = nr_class*(nr_class-1)//2 dec_values = (c_double * nr_classifier)() # pytype: disable=not-callable for xi in x: xi, idx = gen_svm_nodearray(xi, isKernel=(m.param.kernel_type == 'PRECOMPUTED')) label = libsvm.svm_predict_values(m, xi, dec_values) if nr_class == 1: values = [1] else: values = dec_values[:nr_classifier] pred_labels += [label] pred_values += [values] ACC, MSE, SCC = evaluations(y, pred_labels) l = len(y) if svm_type in ['EPSILON_SVR', 'NU_SVR']: info("Mean squared error = %g (regression)" % MSE) info("Squared correlation coefficient = %g (regression)" % SCC) else: info("Accuracy = %g%% (%d/%d) (classification)" % (ACC, int(l*ACC/100), l)) return pred_labels, (ACC, MSE, SCC), pred_values