36 #ifndef OPENMS_TRANSFORMATIONS_FEATUREFINDER_MODELFITTER_H 
   37 #define OPENMS_TRANSFORMATIONS_FEATUREFINDER_MODELFITTER_H 
   49 #include <boost/math/special_functions/fpclassify.hpp> 
   76   template <
class PeakType, 
class FeatureType>
 
  106       Base(map, features, ff),
 
  125       std::vector<String> fit_opts;
 
  126       fit_opts.push_back(
"simple");
 
  127       fit_opts.push_back(
"simplest");
 
  128       fit_opts.push_back(
"wavelet");
 
  138       this->
defaults_.
setValue(
"tolerance_stdev_bounding_box", 3.0f, 
"Bounding box has range [minimim of data, maximum of data] enlarged by tolerance_stdev_bounding_box times the standard deviation of the data", 
StringList::create(
"advanced"));
 
  141       this->
defaults_.
setValue(
"intensity_cutoff_factor", 0.05f, 
"Cutoff peaks with a predicted intensity below intensity_cutoff_factor times the maximal intensity of the model");
 
  145       this->
defaults_.
setValue(
"feature_intensity_sum", 1, 
"Determines what is reported as feature intensity.\n1: the sum of peak intensities;\n0: the maximum intensity of all peaks", 
StringList::create(
"advanced"));
 
  149       this->
defaults_.
setValue(
"min_num_peaks:final", 5, 
"Minimum number of peaks left after cutoff. If smaller, feature will be discarded.");
 
  151       this->
defaults_.
setValue(
"min_num_peaks:extended", 10, 
"Minimum number of peaks after extension. If smaller, feature will be discarded.");
 
  155       this->
defaults_.
setValue(
"rt:interpolation_step", 0.2f, 
"Step size in seconds used to interpolate model for RT.");
 
  159       this->
defaults_.
setValue(
"mz:interpolation_step", 0.03f, 
"Interpolation step size for m/z.");
 
  161       this->
defaults_.
setValue(
"mz:model_type:first", 1, 
"Numeric id of first m/z model fitted (usually indicating the charge state), 0 = no isotope pattern (fit a single gaussian).");
 
  163       this->
defaults_.
setValue(
"mz:model_type:last", 4, 
"Numeric id of last m/z model fitted (usually indicating the charge state), 0 = no isotope pattern (fit a single gaussian).");
 
  168       std::vector<String> quality_opts;
 
  169       quality_opts.push_back(
"Correlation");
 
  170       quality_opts.push_back(
"RankCorrelation");
 
  172       this->
defaults_.
setValue(
"quality:minimum", 0.65f, 
"Minimum quality of fit, features below this threshold are discarded.");
 
  177       this->
defaults_.
setValue(
"isotope_model:stdev:first", 0.04f, 
"First standard deviation to be considered for isotope model.");
 
  179       this->
defaults_.
setValue(
"isotope_model:stdev:last", 0.12f, 
"Last standard deviation to be considered for isotope model.");
 
  181       this->
defaults_.
setValue(
"isotope_model:stdev:step", 0.04f, 
"Step size for standard deviations considered for isotope model.");
 
  195       this->
defaults_.
setSectionDescription(
"isotope_model:averagines", 
"Averagines are used to approximate the number of atoms (C,H,N,O,S) which a peptide of a given mass contains.");
 
  197       this->
defaults_.
setValue(
"isotope_model:isotope:trim_right_cutoff", 0.001f, 
"Cutoff for averagine distribution, trailing isotopes below this relative intensity are not considered.", 
StringList::create(
"advanced"));
 
  231         String mess = 
String(
"Skipping feature, IndexSet size too small: ") + index_set.size();
 
  232         throw Exception::UnableToFit(__FILE__, __LINE__, __PRETTY_FUNCTION__, 
"UnableToFit-IndexSet", mess.c_str());
 
  250       if (index_set.
charge != 0)
 
  259 #ifdef DEBUG_FEATUREFINDER 
  290           for (
IndexSetIter it = index_set.begin(); it != index_set.end(); ++it)
 
  293             if (model_int > model_max) model_max = model_int;
 
  299           for (
IndexSetIter it = index_set.begin(); it != index_set.end(); ++it)
 
  303               model_set.insert(*it);
 
  308 #ifdef DEBUG_FEATUREFINDER 
  309           std::cout << 
" Selected " << model_set.size() << 
" from " << index_set.size() << 
" peaks.\n";
 
  316           for (
IndexSetIter it = model_set.begin(); it != model_set.end(); ++it)
 
  319             model_sum += model_int;
 
  326             throw Exception::UnableToFit(__FILE__, __LINE__, __PRETTY_FUNCTION__, 
"UnableToFit-ZeroSum", 
"Skipping feature, model_sum zero.");
 
  370           if (intensity_choice == 1)
 
  373             for (
IndexSetIter it = model_set.begin(); it != model_set.end(); ++it)
 
  381             for (
IndexSetIter it = model_set.begin(); it != model_set.end(); ++it)
 
  399 #ifdef DEBUG_FEATUREFINDER 
  403             std::cout << 
"Feature " << counter_ << 
": (" << f.
getRT()   << 
"," << f.
getMZ() << 
") Qual.: "  << max_quality << std::endl;
 
  408           std::cout << 
"Feature charge: " << f.
getCharge() << std::endl;
 
  409           std::cout << 
"Feature quality in mz: " << f.
getQuality(
MZ) << std::endl;
 
  416           String fname = 
String(
"model") + counter_ + 
"_" + rt + 
"_" + mz;
 
  417           std::ofstream file(fname.c_str());
 
  418           for (
IndexSetIter it = model_set.begin(); it != model_set.end(); ++it)
 
  429           fname = 
String(
"feature") + counter_ + 
"_" + rt + 
"_" + mz;
 
  430           std::ofstream file2(fname.c_str());
 
  431           for (
IndexSetIter it = model_set.begin(); it != model_set.end(); ++it)
 
  446           feature_collection.push_back(f);
 
  454       if (feature_collection.empty())
 
  456         String mess = 
String(
"Skipping feature, nothing in the feature collection.");
 
  457         throw Exception::UnableToFit(__FILE__, __LINE__, __PRETTY_FUNCTION__, 
"UnableToFit-EmptyFeatureCollection", mess.c_str());
 
  460       QualityType best_quality = -std::numeric_limits<QualityType>::max();
 
  462       std::size_t best_idx = 0;
 
  463       for (std::size_t idx = 0; idx < feature_collection.size(); ++idx)
 
  465         if (best_quality < feature_collection[idx].getOverallQuality())
 
  467           best_quality = feature_collection[idx].getOverallQuality();
 
  472       Feature best_feature = feature_collection[best_idx];
 
  480         throw Exception::UnableToFit(__FILE__, __LINE__, __PRETTY_FUNCTION__, 
"UnableToFit-Correlation", mess.c_str());
 
  486       for (
IndexSetIter it = index_set.begin(); it != index_set.end(); ++it)
 
  490           model_set.insert(*it);
 
  500         throw Exception::UnableToFit(__FILE__, __LINE__, __PRETTY_FUNCTION__, 
"UnableToFit-FinalSet", 
String(
"Skipping feature, IndexSet size after cutoff too small: ") + model_set.size());
 
  504       for (std::size_t idx = 0; idx < feature_collection.size(); ++idx)
 
  506         if (idx == best_idx) 
continue;
 
  544       QualityType max_quality_mz = -std::numeric_limits<QualityType>::max();
 
  558           if (best_model_mz) 
delete best_model_mz;
 
  559           best_model_mz = model_mz;
 
  583         std::vector<Real> real_data;
 
  584         real_data.reserve(set.size());
 
  585         std::vector<Real> model_data;
 
  586         model_data.reserve(set.size());
 
  588         for (IndexSet::const_iterator it = set.begin(); it != set.end(); ++it)
 
  601       if (boost::math::isnan(quality)) quality = -1.0;
 
  643       if (boost::math::isnan(quality)) quality = -1.0;
 
  701       if (boost::math::isnan(quality)) quality = -1.0;
 
  714         std::map<CoordinateType, CoordinateType> data_map;
 
  718           for (IndexSet::const_iterator it = index_set.begin(); it != index_set.end(); ++it)
 
  725           for (IndexSet::const_iterator it = index_set.begin(); it != index_set.end(); ++it)
 
  732         set.resize(data_map.size());
 
  733         std::map<CoordinateType, CoordinateType>::iterator it;
 
  735         for (it = data_map.begin(); it != data_map.end(); ++it, ++i)
 
  737           set[i].setPosition((*it).first);
 
  738           set[i].setIntensity((*it).second);
 
  759 #ifdef DEBUG_FEATUREFINDER 
  810 #endif // OPENMS_TRANSFORMATIONS_FEATUREFINDER_MODELFITTER_H 
ProductModel & setModel(UInt dim, BaseModel< 1 > *dist)
set model dist for dimension dim 
Definition: ProductModel.h:184
const ChargeType & getCharge() const 
Non-mutable access to charge state. 
void update(ProbabilityIterator const probability_begin, ProbabilityIterator const probability_end, CoordinateIterator const coordinate_begin)
You can call this as often as you like, using different input vectors. 
Definition: AsymmetricStatistics.h:94
Implements a module of the FeatureFinder algorithm. 
Definition: FeaFiModule.h:157
Int first_mz_model_
first mz model (0=Gaussian, 1....n = charge ) 
Definition: ModelFitter.h:776
CoordinateType tolerance_stdev_box_
tolerance used for bounding box 
Definition: ModelFitter.h:756
RealType variance2() const 
"variance to the right hand side" 
Definition: AsymmetricStatistics.h:87
DoubleReal CoordinateType
Coordinate type (of the position) 
Definition: Peak2D.h:65
Definition: ModelFitter.h:100
Real IntensityType
Intensity type. 
Definition: Peak2D.h:63
ModelFitter()
Not implemented. 
RawDataArrayType mz_input_data_
mz raw data 
Definition: ModelFitter.h:752
float Real
Real type. 
Definition: Types.h:109
Param defaults_
Container for default parameters. This member should be filled in the constructor of derived classes!...
Definition: DefaultParamHandler.h:155
Feature::ChargeType ChargeType
Isotope charge. 
Definition: ModelFitter.h:92
CoordinateType iso_stdev_stepsize_
step size 
Definition: ModelFitter.h:774
void setValue(const String &key, const DataValue &value, const String &description="", const StringList &tags=StringList())
Sets a value. 
A more convenient string class. 
Definition: String.h:56
const ModelDescription< 2 > & getModelDescription() const 
Non-mutable access to the model description. 
CoordinateType getPeakRt(const FeatureFinderDefs::IndexPair &index) const 
Returns the retention time of a peak. 
Definition: FeaFiModule.h:210
CoordinateType deltaAbsError_
Absolute error. 
Definition: ModelFitter.h:787
void setMZ(CoordinateType coordinate)
Mutable access to the m/z coordinate (index 1) 
Definition: Peak2D.h:197
QualityType getOverallQuality() const 
Non-mutable access to the overall quality. 
IntensityType getPeakIntensity(const FeatureFinderDefs::IndexPair &index) const 
Returns the intensity of a peak. 
Definition: FeaFiModule.h:190
IntensityType getIntensity(const PositionType &pos) const 
intensity equals product of intensities in each dimension 
Definition: ProductModel.h:151
Param param_
Container for current parameters. 
Definition: DefaultParamHandler.h:148
Int ChargeType
Type of charge values. 
Definition: BaseFeature.h:64
Definition: FeatureFinderDefs.h:63
ConvexHull2D & getConvexHull() const 
Returns the overall convex hull of the feature (calculated from the convex hulls of the mass traces) ...
RealType variance() const 
Returns the variance. 
Definition: BasicStatistics.h:175
Retention time dimension id (0 if used as a const int) 
Definition: Peak2D.h:76
PeakType::IntensityType IntensityType
Input intensity type. 
Definition: FeaFiModule.h:168
RealType mean() const 
Returns the mean. 
Definition: BasicStatistics.h:171
A container for features. 
Definition: FeatureMap.h:111
virtual QualityType fit1d(const RawDataArrayType &, InterpolationModel *&)
return interpolation model 
Definition: Fitter1D.h:95
unsigned int UInt
Unsigned integer type. 
Definition: Types.h:92
Feature::QualityType QualityType
Quality of a feature. 
Definition: ModelFitter.h:86
std::vector< PeakType > RawDataArrayType
Raw data container type using for the temporary storage of the input data. 
Definition: ModelFitter.h:96
CoordinateType deltaRelError_
Relative error. 
Definition: ModelFitter.h:789
void update(ProbabilityIterator probability_begin, ProbabilityIterator const probability_end)
This does the actual calculation. 
Definition: BasicStatistics.h:114
Internal class for asymmetric distributions. 
Definition: AsymmetricStatistics.h:59
ProductModel< 2 > model2D_
2D model 
Definition: ModelFitter.h:746
FeaFiModule< PeakType, FeatureType > Base
FeaFiModule. 
Definition: ModelFitter.h:94
bool encloses(DoubleReal rt, DoubleReal mz) const 
Returns if the mass trace convex hulls of the feature enclose the position specified by rt and mz...
void setSectionDescription(const String &key, const String &description)
Sets a description for an existing section. 
void setMonoIsotopicMass(CoordinateType mz)
Sets or fixed the monoisotopic m/z at a specific position. 
Definition: ModelFitter.h:215
Mass-to-charge dimension id (1 if used as a const int) 
Definition: Peak2D.h:77
void setModelDescription(const ModelDescription< 2 > &q)
Set the model description. 
CoordinateType getRT() const 
Returns the RT coordinate (index 0) 
Definition: Peak2D.h:203
IndexSet::const_iterator IndexSetIter
IndexSet iterator. 
Definition: ModelFitter.h:84
The class template is only implemented for D=2 because we use Peak2D here. 
Definition: ProductModel.h:65
Calculates some basic statistical parameters of a distribution: sum, mean, variance, and provides the normal approximation. 
Definition: BasicStatistics.h:68
Int max_iteration_
Maximum number of iterations. 
Definition: ModelFitter.h:784
static FactoryProduct * create(const String &name)
return FactoryProduct according to unique identifier name 
Definition: Factory.h:113
void setParameters(const Param ¶m)
Sets the parameters. 
FeatureFinder * ff_
Pointer to the calling FeatureFinder that is used to access the feature flags and report progress...
Definition: FeaFiModule.h:415
The purpose of this struct is to provide definitions of classes and typedefs which are used throughou...
Definition: FeatureFinderDefs.h:51
Feature::CoordinateType CoordinateType
Single coordinate. 
Definition: ModelFitter.h:88
void setIntensity(IntensityType intensity)
Non-mutable access to the data point intensity (height) 
Definition: Peak2D.h:167
QualityType quality_rt_
fit quality in RT dimension 
Definition: ModelFitter.h:793
void setMaxInt(const String &key, Int max)
Sets the maximum value for the integer or integer list parameter key. 
QualityType fitMZLoop_(const ChargedIndexSet &set, const ChargeType &charge)
main fit loop 
Definition: ModelFitter.h:540
RawDataArrayType rt_input_data_
rt raw data 
Definition: ModelFitter.h:754
Abstract class for 1D-models that are approximated using linear interpolation. 
Definition: InterpolationModel.h:55
BaseModel< D > * createModel()
Definition: ModelDescription.h:97
QualityType fitRT_(InterpolationModel *&model) const 
1d fit in RT 
Definition: ModelFitter.h:607
CoordinateType interpolation_step_rt_
interpolation step size (in retention time) 
Definition: ModelFitter.h:766
CoordinateType getPeakMz(const FeatureFinderDefs::IndexPair &index) const 
Returns the m/z of a peak. 
Definition: FeaFiModule.h:200
const DataValue & getValue(const String &key) const 
Returns a value of a parameter. 
void setRT(CoordinateType coordinate)
Mutable access to the RT coordinate (index 0) 
Definition: Peak2D.h:209
virtual void updateMembers_()
This method is used to update extra member variables at the end of the setParameters() method...
Definition: ModelFitter.h:517
const String & getName() const 
Non-mutable access to the name. 
static DoubleReal pearsonCorrelationCoefficient(IteratorType1 begin_a, IteratorType1 end_a, IteratorType2 begin_b, IteratorType2 end_b)
Calculates the Pearson correlation coefficient for the values in [begin_a, end_a) and [begin_b...
Definition: StatisticFunctions.h:285
Math::BasicStatistics basic_stat_
statistics 
Definition: ModelFitter.h:791
CoordinateType interpolation_step_mz_
interpolation step size (in m/z) 
Definition: ModelFitter.h:764
BaseModel< 1 > * getModel(UInt dim) const 
Definition: ProductModel.h:204
const std::vector< Feature > & getSubordinates() const 
immutable access to subordinate features 
QualityType getQuality(Size index) const 
Non-mutable access to the quality in dimension c. 
ModelFitter(const MSExperiment< PeakType > *map, FeatureMap< FeatureType > *features, FeatureFinder *ff)
Constructor. 
Definition: ModelFitter.h:105
void setQuality(Size index, QualityType q)
Set the quality in dimension c. 
IsotopeCluster::IndexSet IndexSet
A set of peak indices. 
Definition: FeatureFinderDefs.h:60
void addConvexHull(const FeatureFinderDefs::IndexSet &set, Feature &feature) const 
Calculates the convex hull of a index set and adds it to the feature. 
Definition: FeaFiModule.h:393
Retention time iterator for a FeatureFinderDefs::IndexSet. 
Definition: FeaFiModule.h:132
CoordinateType isotope_stdev_
isotope stdev 
Definition: ModelFitter.h:780
CoordinateType monoisotopic_mz_
monoistopic mass 
Definition: ModelFitter.h:758
RealType variance1() const 
"variance to the left hand side" 
Definition: AsymmetricStatistics.h:81
static StringList create(const String &list, const char splitter= ',')
Returns a list that is created by splitting the given (comma-separated) string (String are not trimme...
Tests a group of data points in an LC-MS map for goodness-of-fit with a 2D averagine model...
Definition: ModelFitter.h:77
String algorithm_
algorithm 
Definition: ModelFitter.h:782
Real QualityType
Type of quality values. 
Definition: BaseFeature.h:62
DBoundingBox< 2 > getBoundingBox() const 
returns the bounding box of the feature hull points 
Exception indicating that an invalid parameter was handed over to an algorithm. 
Definition: Exception.h:348
QualityType evaluate_(const IndexSet &set) const 
evaluate 2d-model 
Definition: ModelFitter.h:576
An LC-MS feature. 
Definition: Feature.h:66
void setScale(IntensityType scale)
set the intensity scaling factor 
Definition: ProductModel.h:217
static DoubleReal rankCorrelationCoefficient(IteratorType1 begin_a, IteratorType1 end_a, IteratorType2 begin_b, IteratorType2 end_b)
calculates the rank correlation coefficient for the values in [begin_a, end_a) and [begin_b...
Definition: StatisticFunctions.h:379
Int max_isotope_
maximum isotopic rank to be considered 
Definition: ModelFitter.h:768
void setValidStrings(const String &key, const std::vector< String > &strings)
Sets the valid strings for the parameter key. 
Management and storage of parameters / INI files. 
Definition: Param.h:69
CoordinateType getMZ() const 
Returns the m/z coordinate (index 1) 
Definition: Peak2D.h:191
Definition: ModelFitter.h:101
Representation of a mass spectrometry experiment. 
Definition: MSExperiment.h:68
m/z iterator for a FeatureFinderDefs::IndexSet 
Definition: FeaFiModule.h:110
QualityType fitMZ_(InterpolationModel *&model, const ChargeType &charge) const 
1d fit in MZ 
Definition: ModelFitter.h:651
QualityType quality_mz_
fit quality in MZ dimension 
Definition: ModelFitter.h:795
Intensity iterator for a FeatureFinderDefs::IndexSet. 
Definition: FeaFiModule.h:88
void setOverallQuality(QualityType q)
Set the overall quality. 
Int last_mz_model_
last mz model 
Definition: ModelFitter.h:778
void doProjectionDim_(const Int dim, const ChargedIndexSet &index_set, RawDataArrayType &set) const 
Project the raw data into 1-dim array. 
Definition: ModelFitter.h:709
Exception used if an error occurred while fitting a model to a given dataset. 
Definition: Exception.h:662
void setMinInt(const String &key, Int min)
Sets the minimum value for the integer or integer list parameter key. 
virtual bool isContained(const PositionType &pos) const 
check if position pos is part of the model regarding the models cut-off. 
Definition: BaseModel.h:101
index set with associated charge estimate 
Definition: IsotopeCluster.h:53
Abstract base class for all 1D-dimensional model fitter. 
Definition: Fitter1D.h:59
CoordinateType iso_stdev_last_
last stdev 
Definition: ModelFitter.h:772
void setCharge(const ChargeType &ch)
Set charge state. 
#define DEBUG_FEATUREFINDER
Definition: EGHFitter1D.h:38
Int charge
charge estimate (convention: zero means "no charge estimate") 
Definition: IsotopeCluster.h:62
The main feature finder class. 
Definition: FeatureFinder.h:57
void setName(const String &name)
Mutable access to the name. 
Math::AsymmetricStatistics rt_stat_
statistics for rt 
Definition: ModelFitter.h:750
virtual ~ModelFitter()
Destructor. 
Definition: ModelFitter.h:209
const Flag & getPeakFlag(const IndexPair &index) const 
Returns a non-mutable reference to a peak flag. 
Definition: FeatureFinder.h:91
Math::BasicStatistics mz_stat_
statistics for mz 
Definition: ModelFitter.h:748
Feature::IntensityType IntensityType
Single intensity. 
Definition: ModelFitter.h:90
int Int
Signed integer type. 
Definition: Types.h:100
void setMinFloat(const String &key, DoubleReal min)
Sets the minimum value for the floating point or floating point list parameter key. 
Feature fit(const ChargedIndexSet &index_set)
Return next feature. 
Definition: ModelFitter.h:226
virtual void setCutOff(IntensityType cut_off)
set cutoff value 
Definition: BaseModel.h:135
ModelFitter & operator=(const ModelFitter &)
Not implemented. 
void defaultsToParam_()
Updates the parameters after the defaults have been set in the constructor. 
void setMaxFloat(const String &key, DoubleReal max)
Sets the maximum value for the floating point or floating point list parameter key. 
CoordinateType iso_stdev_first_
first stdev 
Definition: ModelFitter.h:770