35 #ifndef OPENMS_ANALYSIS_SVM_SVMWRAPPER_H 
   36 #define OPENMS_ANALYSIS_SVM_SVMWRAPPER_H 
   59     std::vector<std::vector<std::pair<Int, DoubleReal> > > 
sequences;
 
   68     SVMData(std::vector<std::vector<std::pair<Int, DoubleReal> > > & seqs, std::vector<DoubleReal> & lbls) :
 
   82       std::ofstream output_file(filename.c_str());
 
   93         output_file << 
labels[i] << 
" ";
 
   98         output_file << std::endl;
 
  109       std::vector<String> parts;
 
  110       std::vector<String> temp_parts;
 
  125       TextFile text_file(filename.c_str(), 
true);
 
  126       TextFile::iterator it;
 
  128       it = text_file.begin();
 
  130       sequences.resize(text_file.size(), std::vector<std::pair<Int, DoubleReal> >());
 
  131       labels.resize(text_file.size(), 0.);
 
  132       while (counter < text_file.size() && it != text_file.end())
 
  134         it->split(
' ', parts);
 
  135         labels[counter] = parts[0].trim().toFloat();
 
  136         sequences[counter].resize(parts.size(), std::pair<Int, DoubleReal>());
 
  137         for (
Size j = 1; j < parts.size(); ++j)
 
  139           parts[j].split(
':', temp_parts);
 
  140           if (temp_parts.size() < 2)
 
  144           sequences[counter][j - 1].second = temp_parts[0].trim().toFloat();
 
  145           sequences[counter][j - 1].first = temp_parts[1].trim().toInt();
 
  213     void setParameter(SVM_parameter_type type, 
Int value);
 
  219     void setParameter(SVM_parameter_type type, 
DoubleReal value);
 
  226     Int train(
struct svm_problem * problem);
 
  243     void saveModel(std::string modelFilename) 
const;
 
  251     void loadModel(std::string modelFilename);
 
  259     void predict(
struct svm_problem * problem, std::vector<DoubleReal> & predicted_labels);
 
  267     void predict(
const SVMData & problem, std::vector<DoubleReal> & results);
 
  282     Int getIntParameter(SVM_parameter_type type);
 
  293     DoubleReal getDoubleParameter(SVM_parameter_type type);
 
  301     static void createRandomPartitions(svm_problem * problem, 
Size number, std::vector<svm_problem *> & partitions);
 
  309     static void createRandomPartitions(
const SVMData & problem,
 
  311                                        std::vector<SVMData> & problems);
 
  316     static svm_problem * mergePartitions(
const std::vector<svm_problem *> & problems, 
Size except);
 
  322     static void mergePartitions(
const std::vector<SVMData> & problems,
 
  332     void predict(
const std::vector<svm_node *> & vectors, std::vector<DoubleReal> & predicted_rts);
 
  338     static void getLabels(svm_problem * problem, std::vector<DoubleReal> & labels);
 
  344     DoubleReal performCrossValidation(svm_problem * problem_ul,
 
  346                                       const bool                                        is_labeled,
 
  347                                       const   std::map<SVM_parameter_type, DoubleReal> & start_values_map,
 
  348                                       const   std::map<SVM_parameter_type, DoubleReal> & step_sizes_map,
 
  349                                       const   std::map<SVM_parameter_type, DoubleReal> & end_values_map,
 
  350                                       Size                                                                                number_of_partitions,
 
  352                                       std::map<SVM_parameter_type, DoubleReal> & best_parameters,
 
  353                                       bool                                                                                            additive_step_sizes = 
true,
 
  355                                       String                                                                                      performances_file_name = 
"performances.txt",
 
  356                                       bool                                                                                            mcc_as_performance_measure = 
false);
 
  385     static DoubleReal kernelOligo(
const std::vector<std::pair<int, double> > & x,
 
  386                                   const std::vector<std::pair<int, double> > & y,
 
  387                                   const std::vector<double> & gauss_table,
 
  388                                   int                                                                     max_distance = -1);
 
  397     static DoubleReal kernelOligo(
const svm_node * x, 
const svm_node * y, 
const std::vector<DoubleReal> & gauss_table, 
DoubleReal sigma_square = 0, 
Size    max_distance = 50);
 
  403     void getSignificanceBorders(svm_problem * data, std::pair<DoubleReal, DoubleReal> & borders, 
DoubleReal confidence = 0.95, 
Size number_of_runs = 5, 
Size number_of_partitions = 5, 
DoubleReal step_size = 0.01, 
Size max_iterations = 1000000);
 
  409     void getSignificanceBorders(
const SVMData & data,
 
  410                                 std::pair<DoubleReal, DoubleReal> & sigmas,
 
  412                                 Size number_of_runs = 5,
 
  413                                 Size number_of_partitions = 5,
 
  415                                 Size max_iterations = 1000000);
 
  436     void getDecisionValues(svm_problem * data, std::vector<DoubleReal> & decision_values);
 
  444     void scaleData(svm_problem * data, 
Int max_scale_value = -1);
 
  446     static void calculateGaussTable(
Size border_length, 
DoubleReal sigma, std::vector<DoubleReal> & gauss_table);
 
  456     svm_problem * computeKernelMatrix(svm_problem * problem1, svm_problem * problem2);
 
  466     svm_problem * computeKernelMatrix(
const SVMData & problem1, 
const SVMData & problem2);
 
  472     void setTrainingSample(svm_problem * training_sample);
 
  478     void setTrainingSample(
SVMData & training_sample);
 
  489     void getSVCProbabilities(
struct svm_problem * problem, std::vector<DoubleReal> & probabilities, std::vector<DoubleReal> & prediction_labels);
 
  495     void setWeights(
const std::vector<Int> & weight_labels, 
const std::vector<DoubleReal> & weights);
 
  505     bool nextGrid_(
const std::vector<DoubleReal> & start_values,
 
  506                    const std::vector<DoubleReal> & step_sizes,
 
  507                    const std::vector<DoubleReal> & end_values,
 
  508                    const bool additive_step_sizes,
 
  509                    std::vector<DoubleReal> & actual_values);
 
  511     Size getNumberOfEnclosedPoints_(
DoubleReal m1, 
DoubleReal m2, 
const std::vector<std::pair<DoubleReal, DoubleReal> > & points);
 
  517     void initParameters_();
 
  525     static void printToVoid_(
const char * );
 
  543 #endif // OPENMS_ANALYSIS_SVM_SVMWRAPPER_H 
the C parameter of the svm 
Definition: SVMWrapper.h:179
static bool writable(const String &file)
Return true if the file is writable. 
bool operator==(const SVMData &rhs) const 
Definition: SVMWrapper.h:74
A more convenient string class. 
Definition: String.h:56
SVMData training_data_
Definition: SVMWrapper.h:537
svm_model * model_
Definition: SVMWrapper.h:528
svm_problem * training_problem_
Definition: SVMWrapper.h:536
Serves as a wrapper for the libsvm. 
Definition: SVMWrapper.h:163
svm_problem * training_set_
Definition: SVMWrapper.h:535
the epsilon parameter for epsilon-SVR 
Definition: SVMWrapper.h:181
SVM_kernel_type
Kernel type. 
Definition: SVMWrapper.h:189
std::vector< DoubleReal > gauss_table_
Definition: SVMWrapper.h:531
std::vector< DoubleReal > sigmas_
Definition: SVMWrapper.h:530
Size border_length_
Definition: SVMWrapper.h:534
Size kernel_type_
Definition: SVMWrapper.h:533
svm_parameter * param_
Definition: SVMWrapper.h:527
static bool exists(const String &file)
Method used to test if a file exists. 
Data structure used in SVMWrapper. 
Definition: SVMWrapper.h:57
the svm type cab be NU_SVR or EPSILON_SVR 
Definition: SVMWrapper.h:176
SVMData()
Definition: SVMWrapper.h:62
Definition: SVMWrapper.h:184
the gamma parameter of the POLY, RBF and SIGMOID kernel 
Definition: SVMWrapper.h:182
Definition: SVMWrapper.h:183
std::vector< std::vector< DoubleReal > > gauss_tables_
Definition: SVMWrapper.h:532
DoubleReal sigma_
Definition: SVMWrapper.h:529
SVMData(std::vector< std::vector< std::pair< Int, DoubleReal > > > &seqs, std::vector< DoubleReal > &lbls)
Definition: SVMWrapper.h:68
static bool empty(const String &file)
Return true if the file does not exist or the file is empty. 
the degree for the polynomial- kernel 
Definition: SVMWrapper.h:178
size_t Size
Size type e.g. used as variable which can hold result of size() 
Definition: Types.h:144
std::vector< DoubleReal > labels
Definition: SVMWrapper.h:60
Base class for all classes that want to report their progess. 
Definition: ProgressLogger.h:56
std::vector< std::vector< std::pair< Int, DoubleReal > > > sequences
Definition: SVMWrapper.h:59
bool store(const String &filename) const 
Definition: SVMWrapper.h:80
int Int
Signed integer type. 
Definition: Types.h:100
static bool readable(const String &file)
Return true if the file exists and is readable. 
the nu parameter for nu-SVR 
Definition: SVMWrapper.h:180
This class provides some basic file handling methods for text files. 
Definition: TextFile.h:47
the kernel type 
Definition: SVMWrapper.h:177
bool load(const String &filename)
Definition: SVMWrapper.h:106
SVM_parameter_type
Parameters for the svm to be set from outside. 
Definition: SVMWrapper.h:174