39 #include <boost/unordered/unordered_set.hpp> 
   44 #ifndef OPENMS_COMPARISON_CLUSTERING_HIERARCHICALCLUSTERING_H 
   45 #define OPENMS_COMPARISON_CLUSTERING_HIERARCHICALCLUSTERING_H 
   66   template <
typename Po
intRef>
 
   81       public std::pair<PointCoordinate, PointCoordinate>
 
   94         return this->second - this->first;
 
  104         lit = this->first.begin(); rit = rhs.first.begin();
 
  105         for (; lit != this->first.end(); ++lit, ++rit) *lit = std::min(*lit, *rit);
 
  108         lit = this->second.begin(); rit = rhs.second.begin();
 
  109         for (; lit != this->second.end(); ++lit, ++rit) *lit = std::max(*lit, *rit);
 
  134       public boost::unordered_multimap<PointCoordinate, PointRef>
 
  173         coord(coord), bbox(bbox),
 
  174         left(left), right(right),
 
  181     typedef std::map<typename Grid::CellIndex, std::pair<typename Grid::CellContent *, bool> > 
ClusterCells;
 
  192         distance(distance), left(left), right(right)
 
  203     typedef std::priority_queue<TreeDistance, std::vector<TreeDistance>, std::greater<TreeDistance> > 
TreeDistanceQueue;
 
  211       grid(cluster_dimension)
 
  223       it->second.insert(std::make_pair(d, ref));
 
  233       std::vector<typename Grid::CellIndex> cells;
 
  234       for (
typename Grid::const_grid_iterator it = 
grid.grid_begin(); it != 
grid.grid_end(); ++it)
 
  235         cells.push_back(it->first);
 
  237       for (
typename std::vector<typename Grid::CellIndex>::const_iterator it = cells.begin(); it != cells.end(); ++it)
 
  250       return grid.insert(std::make_pair(p, Cluster(p)));
 
  276     void gridCell_(
const typename Grid::CellIndex & cur, 
ClusterCells & cells, 
bool center = 
false, 
bool ignore_missing = 
true)
 
  280         cells.insert(std::make_pair(cur, std::make_pair(&
grid.grid_at(cur), center)));
 
  282       catch (std::out_of_range &)
 
  284         if (!ignore_missing) 
throw;
 
  294       DoubleReal dist_min = std::numeric_limits<DoubleReal>::infinity();
 
  295       typename ClusterTrees::const_iterator dist_it = trees.end();
 
  298       for (
typename ClusterTrees::const_iterator it = trees.begin(); it != trees.end(); ++it)
 
  300         if (tree == *it) 
continue;
 
  310       if (dist_it != trees.end()) dists.push(TreeDistance(dist_min, tree, *dist_it));
 
  324       const BoundingBox bbox = left->bbox | right->bbox;
 
  327         return std::numeric_limits<DoubleReal>::infinity();
 
  343       if (tree->left && tree->right)
 
  350         cluster.insert(std::make_pair(tree->bbox.first, tree->ref));
 
  363       if (tree->left && tree->right)
 
  381       for (; it != ret.
end(); ++it, ++lit) *it = *lit / rhs;
 
  390       for (; it != ret.
end(); ++it, ++lit, ++rit) *it = *lit / *rit;
 
  397       for (; lit != lhs.
end(); ++lit, ++rit)
 
  399         if (*lit > *rit) 
return true;
 
  409       for (; it != p.
end(); ++it) ret += std::pow(*it, 2.);
 
  410       return std::sqrt(ret);
 
  415   template <
typename I>
 
  425       gridCells5x5_(cur, cells);
 
  427     catch (std::out_of_range &)
 
  433     for (
typename ClusterCells::iterator cell_it = cells.begin(); cell_it != cells.end(); ++cell_it)
 
  435       typename Grid::CellContent & cell_cur = *cell_it->second.first;
 
  436       const bool & cell_center = cell_it->second.second;
 
  439       typename Grid::cell_iterator cluster_tmp_it = cell_cur.begin();
 
  440       while (cluster_tmp_it != cell_cur.end())
 
  442         typename Grid::cell_iterator cluster_it = cluster_tmp_it;
 
  446         if (cluster_it->second.size() == 1)
 
  449           for (
typename Cluster::const_iterator point_it = cluster_it->second.begin(); point_it != cluster_it->second.end(); ++point_it)
 
  453             addTreeDistance_(tree, trees, dists);
 
  457           cell_cur.erase(cluster_it);
 
  463     while (!dists.empty())
 
  465       const typename TreeDistanceQueue::value_type cur_dist = dists.top();
 
  466       TreeNode * tree_left(cur_dist.left), *tree_right(cur_dist.right);
 
  470       Size count_left = trees.count(tree_left), count_right = trees.count(tree_right);
 
  471       if (count_left && count_right)
 
  473         trees.erase(tree_left);
 
  474         trees.erase(tree_right);
 
  480         const UInt & left_points = tree_left->points, & right_points = tree_right->
points;
 
  481         const PointCoordinate coord = coordScalarDiv_(left * left_points + right * right_points, left_points + right_points);
 
  485         addTreeDistance_(tree, trees, dists);
 
  490         addTreeDistance_(tree_left, trees, dists);
 
  491       else if (count_right)
 
  492         addTreeDistance_(tree_right, trees, dists);
 
  496     for (
typename ClusterTrees::iterator tree_it = trees.begin(); tree_it != trees.end(); ++tree_it)
 
  499       if ((**tree_it).center)
 
  501         Cluster & cluster = insertCluster_((**tree_it).bbox)->second;
 
  502         tree2Cluster_(*tree_it, cluster);
 
  507         tree2Points_(*tree_it);
 
  513   template <
typename I>
 
  517     gridCell_(base, cells, 
true, 
false);
 
  519     typename Grid::CellIndex cur = base;
 
  522     cur[1] -= 2; gridCell_(cur, cells);
 
  524     cur[1] += 1; gridCell_(cur, cells);
 
  526     cur[1] += 1; gridCell_(cur, cells);
 
  528     cur[1] += 1; gridCell_(cur, cells);
 
  530     cur[1] += 1; gridCell_(cur, cells);
 
  532     cur = base; cur[0] -= 1;
 
  534     cur[1] -= 2; gridCell_(cur, cells);
 
  536     cur[1] += 1; gridCell_(cur, cells, 
true);
 
  538     cur[1] += 1; gridCell_(cur, cells, 
true);
 
  540     cur[1] += 1; gridCell_(cur, cells, 
true);
 
  542     cur[1] += 1; gridCell_(cur, cells);
 
  546     cur[1] -= 2; gridCell_(cur, cells);
 
  548     cur[1] += 1; gridCell_(cur, cells, 
true);
 
  552     cur[1] += 1; gridCell_(cur, cells, 
true);
 
  554     cur[1] += 1; gridCell_(cur, cells);
 
  556     cur = base; cur[0] += 1;
 
  558     cur[1] -= 2; gridCell_(cur, cells);
 
  560     cur[1] += 1; gridCell_(cur, cells, 
true);
 
  562     cur[1] += 1; gridCell_(cur, cells, 
true);
 
  564     cur[1] += 1; gridCell_(cur, cells, 
true);
 
  566     cur[1] += 1; gridCell_(cur, cells);
 
  568     cur = base; cur[0] += 2;
 
  570     cur[1] -= 2; gridCell_(cur, cells);
 
  572     cur[1] += 1; gridCell_(cur, cells);
 
  574     cur[1] += 1; gridCell_(cur, cells);
 
  576     cur[1] += 1; gridCell_(cur, cells);
 
  578     cur[1] += 1; gridCell_(cur, cells);
 
Container for (2-dimensional coordinate, value) pairs. 
Definition: HashGrid.h:62
void tree2Points_(const TreeNode *tree)
Recursively add the points of an unfinished cluster back to the grid. All points are saved in the lea...
Definition: HierarchicalClustering.h:361
DoubleReal treeDistance_(TreeNode *left, TreeNode *right)
Returns distance of two tree nodes Returns the euclidean distance of the coordinates of the two trees...
Definition: HierarchicalClustering.h:322
BoundingBox(const PointCoordinate &p)
Definition: HierarchicalClustering.h:84
const CoordinateType * const_iterator
Definition: DPosition.h:75
DoubleReal distance
Definition: HierarchicalClustering.h:188
Grid grid
The hash grid. 
Definition: HierarchicalClustering.h:154
Generic 2-dimensional hierarchical clustering with geometric hashing. 
Definition: HierarchicalClustering.h:67
void gridCell_(const typename Grid::CellIndex &cur, ClusterCells &cells, bool center=false, bool ignore_missing=true)
Collect one cell. 
Definition: HierarchicalClustering.h:276
static PointCoordinate coordScalarDiv_(const PointCoordinate &lhs, const DoubleReal &rhs)
Definition: HierarchicalClustering.h:376
const BoundingBox bbox
Definition: HierarchicalClustering.h:162
boost::unordered_set< TreeNode * > ClusterTrees
Definition: HierarchicalClustering.h:182
BoundingBox & operator|=(const BoundingBox &rhs)
Intersection of bounding box. 
Definition: HierarchicalClustering.h:98
BoundingBox(const BoundingBox &b)
Definition: HierarchicalClustering.h:88
Set of points. Describes a cluster on the grid. A point consists of a PointCoordinate and a PointRef...
Definition: HierarchicalClustering.h:133
const PointCoordinate coord
Definition: HierarchicalClustering.h:161
Grid::cell_iterator insertPoint(const PointCoordinate &d, const PointRef &ref)
Insert new PointCoordinate into grid. 
Definition: HierarchicalClustering.h:220
const PointRef ref
Definition: HierarchicalClustering.h:166
static PointCoordinate coordElemDiv_(const PointCoordinate &lhs, const PointCoordinate &rhs)
Definition: HierarchicalClustering.h:385
ConstIterator end() const 
Non-mutable end iterator. 
Definition: DPosition.h:389
CoordinateType * iterator
Definition: DPosition.h:74
Wrapper class for two trees and the corresponding distance. 
Definition: HierarchicalClustering.h:185
TreeNode(const PointCoordinate &coord, const PointRef &ref, bool center)
Definition: HierarchicalClustering.h:168
PointCoordinate size() const 
Definition: HierarchicalClustering.h:92
Representation of a coordinate in D-dimensional space. 
Definition: DPosition.h:52
DPosition< 2, DoubleReal > PointCoordinate
Coordinate of a point to be clustered. 
Definition: HierarchicalClustering.h:74
Grid::cell_iterator insertCluster_(const P &p)
Insert new Cluster into grid. 
Definition: HierarchicalClustering.h:248
BoundingBox operator|(const BoundingBox &rhs) const 
Intersection of bounding box. 
Definition: HierarchicalClustering.h:115
static DoubleReal coordDist_(const PointCoordinate &lhs, const PointCoordinate &rhs)
Definition: HierarchicalClustering.h:404
void clusterIndex_(const typename Grid::CellIndex &p)
Perform clustering at given cell index. 
Definition: HierarchicalClustering.h:416
TreeNode * right
Definition: HierarchicalClustering.h:189
void tree2Cluster_(const TreeNode *tree, Cluster &cluster)
Recursively add the points of a finished cluster into the hash grid. All points are saved in the leaf...
Definition: HierarchicalClustering.h:341
UInt points
Definition: HierarchicalClustering.h:164
void cluster()
Perform clustering of all existing points. 
Definition: HierarchicalClustering.h:230
TreeNode(const PointCoordinate &coord, const BoundingBox &bbox, TreeNode *left, TreeNode *right)
Definition: HierarchicalClustering.h:172
HierarchicalClustering(const PointCoordinate &cluster_dimension)
Constructor. 
Definition: HierarchicalClustering.h:210
HashGrid< Cluster > Grid
The hash grid data type. 
Definition: HierarchicalClustering.h:147
std::priority_queue< TreeDistance, std::vector< TreeDistance >, std::greater< TreeDistance > > TreeDistanceQueue
Priority queue queue used to find minimum distances. 
Definition: HierarchicalClustering.h:203
void addTreeDistance_(TreeNode *tree, ClusterTrees &trees, TreeDistanceQueue &dists)
Add a new tree to the set of trees and distance queue. 
Definition: HierarchicalClustering.h:291
Tree node used for clustering. 
Definition: HierarchicalClustering.h:158
const bool center
Definition: HierarchicalClustering.h:165
size_t Size
Size type e.g. used as variable which can hold result of size() 
Definition: Types.h:144
std::map< typename Grid::CellIndex, std::pair< typename Grid::CellContent *, bool > > ClusterCells
Definition: HierarchicalClustering.h:181
TreeNode * left
Definition: HierarchicalClustering.h:189
TreeNode * left
Definition: HierarchicalClustering.h:163
bool operator>(const TreeDistance &rhs) const 
Definition: HierarchicalClustering.h:195
void gridCells5x5_(typename Grid::CellIndex cur, ClusterCells &cells)
Collect all cells used to cluster at given cell index. 
Definition: HierarchicalClustering.h:514
TreeNode * right
Definition: HierarchicalClustering.h:163
ConstIterator begin() const 
Non-mutable begin iterator. 
Definition: DPosition.h:383
Cluster(const BoundingBox &bbox)
Definition: HierarchicalClustering.h:139
TreeDistance(const DoubleReal &distance, TreeNode *left, TreeNode *right)
Definition: HierarchicalClustering.h:191
static bool coordElemGreater_(const PointCoordinate &lhs, const PointCoordinate &rhs)
Definition: HierarchicalClustering.h:394
Bounding box of cluster. 
Definition: HierarchicalClustering.h:80
BoundingBox bbox
Definition: HierarchicalClustering.h:137