IT++ Logo
mog_diag_kmeans.cpp
Go to the documentation of this file.
00001 
00030 #include <itpp/stat/mog_diag_kmeans.h>
00031 #include <iostream>
00032 
00033 namespace itpp
00034 {
00035 
00036 inline double MOG_diag_kmeans_sup::dist(const double * x, const double * y) const
00037 {
00038   double acc = 0.0;
00039   for (int d = 0;d < D;d++) { double tmp = x[d] - y[d]; acc += tmp * tmp; }
00040   return(acc);
00041 }
00042 
00043 void MOG_diag_kmeans_sup::assign_to_means()
00044 {
00045 
00046   for (int k = 0;k < K;k++) c_count[k] = 0;
00047 
00048   for (int n = 0;n < N;n++) {
00049 
00050     int k = 0;
00051     double min_dist = dist(c_means[k], c_X[n]);
00052     int k_winner = k;
00053 
00054     for (int k = 1;k < K;k++) {
00055       double tmp_dist = dist(c_means[k], c_X[n]);
00056       if (tmp_dist < min_dist) { min_dist = tmp_dist; k_winner = k; }
00057     }
00058 
00059     c_partitions[ k_winner ][ count[k_winner] ] = n;
00060     c_count[k_winner]++;
00061   }
00062 }
00063 
00064 
00065 void MOG_diag_kmeans_sup::recalculate_means()
00066 {
00067 
00068   for (int k = 0;k < K;k++) {
00069 
00070     for (int d = 0;d < D;d++)  c_tmpvec[d] = 0.0;
00071 
00072     int Nk = c_count[k];
00073 
00074     for (int n = 0;n < Nk;n++) {
00075       double * x = c_X[ c_partitions[k][n] ];
00076       for (int d = 0;d < D;d++)  c_tmpvec[d] += x[d];
00077     }
00078 
00079     if (Nk > 0) {
00080       double * c_mean = c_means[k];
00081       for (int d = 0;d < D;d++)  c_mean[d] = c_tmpvec[d] / Nk;
00082     }
00083   }
00084 
00085 }
00086 
00087 
00088 bool MOG_diag_kmeans_sup::dezombify_means()
00089 {
00090 
00091   static int counter = 0;
00092 
00093   bool zombie_mean = false;
00094 
00095   int k = 0;
00096   int max_count = count[k];
00097   int k_hog = k;
00098 
00099   for (int k = 1;k < K;k++)  if (c_count[k] > max_count) { max_count = c_count[k]; k_hog = k; }
00100 
00101   for (int k = 0;k < K;k++) {
00102     if (c_count[k] == 0) {
00103 
00104       zombie_mean = true;
00105       if (verbose) {
00106         it_warning("MOG_diag_kmeans_sup::dezombify_means(): detected zombie mean");
00107       }
00108       if (k_hog == k) {
00109         it_warning("MOG_diag_kmeans_sup::dezombify_means(): weirdness: k_hog == k");
00110         return(false);
00111       }
00112 
00113       if (counter >= c_count[k_hog])  counter = 0;
00114 
00115       double * c_mean = c_means[k];
00116       double * c_x = c_X[ c_partitions[k_hog][counter] ];
00117 
00118       for (int d = 0;d < D;d++)  c_mean[d] = 0.5 * (c_means[k_hog][d] + c_x[d]);
00119       counter++;
00120     }
00121 
00122   }
00123 
00124   if (zombie_mean)  assign_to_means();
00125 
00126   return(true);
00127 }
00128 
00129 
00130 double MOG_diag_kmeans_sup::measure_change() const
00131 {
00132 
00133   double tmp_dist = 0.0;
00134   for (int k = 0;k < K;k++)   tmp_dist += dist(c_means[k], c_means_old[k]);
00135   return(tmp_dist);
00136 }
00137 
00138 
00139 void MOG_diag_kmeans_sup::initial_means()
00140 {
00141 
00142   for (int d = 0;d < D;d++) c_tmpvec[d] = 0.0;
00143 
00144   for (int n = 0;n < N;n++) {
00145     double * c_x = c_X[n];
00146     for (int d = 0;d < D;d++)  c_tmpvec[d] += c_x[d];
00147   }
00148 
00149   for (int d = 0;d < D;d++) c_tmpvec[d] /= N;
00150 
00151   int step = int(floor(double(N) / double(K)));
00152   for (int k = 0;k < K;k++) {
00153     double * c_mean = c_means[k];
00154     double * c_x = c_X[k*step];
00155 
00156     for (int d = 0;d < D;d++)  c_mean[d] = 0.5 * (c_tmpvec[d] + c_x[d]);
00157   }
00158 }
00159 
00160 
00161 void MOG_diag_kmeans_sup::iterate()
00162 {
00163 
00164   for (int k = 0;k < K;k++)  for (int d = 0;d < D;d++)  c_means_old[k][d] = c_means[k][d];
00165 
00166   for (int i = 0;i < max_iter;i++) {
00167 
00168     assign_to_means();
00169     if (!dezombify_means()) return;
00170     recalculate_means();
00171 
00172     double change = measure_change();
00173 
00174     if (verbose) std::cout << "MOG_diag_kmeans_sup::iterate(): iteration = " << i << "  change = " << change << std::endl;
00175     if (change == 0) break;
00176 
00177     for (int k = 0;k < K;k++)  for (int d = 0;d < D;d++)   c_means_old[k][d] = c_means[k][d];
00178   }
00179 
00180 }
00181 
00182 
00183 void MOG_diag_kmeans_sup::calc_means()
00184 {
00185   initial_means();
00186   iterate();
00187 }
00188 
00189 
00190 void MOG_diag_kmeans_sup::calc_covs()
00191 {
00192 
00193   for (int k = 0;k < K;k++) {
00194     int Nk = c_count[k];
00195 
00196     if (Nk >= 2) {
00197       double * c_mean = c_means[k];
00198 
00199       for (int d = 0;d < D;d++) c_tmpvec[d] = 0.0;
00200 
00201       for (int n = 0;n < Nk;n++) {
00202         double * c_x = c_X[ c_partitions[k][n] ];
00203         for (int d = 0;d < D;d++) { double tmp = c_x[d] - c_mean[d];  c_tmpvec[d] += tmp * tmp; }
00204       }
00205 
00206       for (int d = 0;d < D;d++)  c_diag_covs[k][d] = trust * (c_tmpvec[d] / (Nk - 1.0)) + (1.0 - trust) * (1.0);
00207     }
00208     else {
00209       for (int d = 0;d < D;d++)  c_diag_covs[k][d] = 1.0;
00210     }
00211   }
00212 
00213 }
00214 
00215 
00216 void MOG_diag_kmeans_sup::calc_weights()
00217 {
00218   for (int k = 0;k < K;k++)  c_weights[k] = trust * (c_count[k] / double(N)) + (1.0 - trust) * (1.0 / K);
00219 }
00220 
00221 
00222 void MOG_diag_kmeans_sup::normalise_vectors()
00223 {
00224 
00225   for (int d = 0;d < D;d++) {
00226     double acc = 0.0;
00227     for (int n = 0;n < N;n++)  acc += c_X[n][d];
00228     c_norm_mu[d] = acc / N;
00229   }
00230 
00231   for (int d = 0;d < D;d++) {
00232     double acc = 0.0;
00233     for (int n = 0;n < N;n++) { double tmp = c_X[n][d] - c_norm_mu[d];  acc += tmp * tmp; }
00234     c_norm_sd[d] = std::sqrt(acc / (N - 1));
00235   }
00236 
00237   for (int n = 0;n < N;n++) for (int d = 0;d < D;d++) {
00238       c_X[n][d] -= c_norm_mu[d];
00239       if (c_norm_sd[d] > 0.0)  c_X[n][d] /= c_norm_sd[d];
00240     }
00241 }
00242 
00243 
00244 void MOG_diag_kmeans_sup::unnormalise_vectors()
00245 {
00246 
00247   for (int n = 0;n < N;n++)  for (int d = 0;d < D;d++) {
00248       if (c_norm_sd[d] > 0.0)  c_X[n][d] *= c_norm_sd[d];
00249       c_X[n][d] += c_norm_mu[d];
00250     }
00251 }
00252 
00253 
00254 void MOG_diag_kmeans_sup::unnormalise_means()
00255 {
00256 
00257   for (int k = 0;k < K;k++) for (int d = 0;d < D;d++) {
00258       if (norm_sd[d] > 0.0) c_means[k][d] *= c_norm_sd[d];
00259       c_means[k][d] += norm_mu[d];
00260     }
00261 }
00262 
00263 
00264 void MOG_diag_kmeans_sup::run(MOG_diag &model_in, Array<vec> &X_in, int max_iter_in, double trust_in, bool normalise_in, bool verbose_in)
00265 {
00266 
00267   it_assert(model_in.is_valid(), "MOG_diag_kmeans_sup::run(): given model is not valid");
00268   it_assert((max_iter_in > 0), "MOG_diag_kmeans_sup::run(): 'max_iter' needs to be greater than zero");
00269   it_assert(((trust_in >= 0.0) && (trust_in <= 1.0)), "MOG_diag_kmeans_sup::run(): 'trust' must be between 0 and 1 (inclusive)");
00270 
00271   verbose = verbose_in;
00272 
00273   Array<vec> means_in = model_in.get_means();
00274   Array<vec> diag_covs_in = model_in.get_diag_covs();
00275   vec weights_in = model_in.get_weights();
00276 
00277   init(means_in, diag_covs_in, weights_in);
00278 
00279   means_in.set_size(0);
00280   diag_covs_in.set_size(0);
00281   weights_in.set_size(0);
00282 
00283   it_assert(check_size(X_in), "MOG_diag_kmeans_sup::run(): 'X' is empty or contains vectors of wrong dimensionality");
00284 
00285   N = X_in.size();
00286 
00287   if (K > N) {
00288     it_warning("MOG_diag_kmeans_sup::run(): K > N");
00289   }
00290   else {
00291     if (K > N / 10) {
00292       it_warning("MOG_diag_kmeans_sup::run(): K > N/10");
00293     }
00294   }
00295 
00296   max_iter = max_iter_in;
00297   trust = trust_in;
00298 
00299   means_old.set_size(K);
00300   for (int k = 0;k < K;k++) means_old(k).set_size(D);
00301   partitions.set_size(K);
00302   for (int k = 0;k < K;k++) partitions(k).set_size(N);
00303   count.set_size(K);
00304   tmpvec.set_size(D);
00305   norm_mu.set_size(D);
00306   norm_sd.set_size(D);
00307 
00308   c_X = enable_c_access(X_in);
00309   c_means_old = enable_c_access(means_old);
00310   c_partitions = enable_c_access(partitions);
00311   c_count = enable_c_access(count);
00312   c_tmpvec = enable_c_access(tmpvec);
00313   c_norm_mu = enable_c_access(norm_mu);
00314   c_norm_sd = enable_c_access(norm_sd);
00315 
00316   if (normalise_in)  normalise_vectors();
00317 
00318   calc_means();
00319   if (normalise_in) { unnormalise_vectors(); unnormalise_means(); }
00320   calc_covs();
00321   calc_weights();
00322 
00323   model_in.init(means, diag_covs, weights);
00324 
00325   disable_c_access(c_X);
00326   disable_c_access(c_means_old);
00327   disable_c_access(c_partitions);
00328   disable_c_access(c_count);
00329   disable_c_access(c_tmpvec);
00330   disable_c_access(c_norm_mu);
00331   disable_c_access(c_norm_sd);
00332 
00333   means_old.set_size(0);
00334   partitions.set_size(0);
00335   count.set_size(0);
00336   tmpvec.set_size(0);
00337   norm_mu.set_size(0);
00338   norm_sd.set_size(0);
00339 
00340   cleanup();
00341 
00342 }
00343 
00344 //
00345 // convenience functions
00346 
00347 void MOG_diag_kmeans(MOG_diag &model_in, Array<vec> &X_in, int max_iter_in, double trust_in, bool normalise_in, bool verbose_in)
00348 {
00349   MOG_diag_kmeans_sup km;
00350   km.run(model_in, X_in, max_iter_in, trust_in, normalise_in, verbose_in);
00351 }
00352 
00353 
00354 }
00355 
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines
SourceForge Logo

Generated on Sat Jul 9 2011 15:21:33 for IT++ by Doxygen 1.7.4