00001 00030 #include <itpp/stat/mog_diag_kmeans.h> 00031 #include <iostream> 00032 00033 namespace itpp 00034 { 00035 00036 inline double MOG_diag_kmeans_sup::dist(const double * x, const double * y) const 00037 { 00038 double acc = 0.0; 00039 for (int d = 0;d < D;d++) { double tmp = x[d] - y[d]; acc += tmp * tmp; } 00040 return(acc); 00041 } 00042 00043 void MOG_diag_kmeans_sup::assign_to_means() 00044 { 00045 00046 for (int k = 0;k < K;k++) c_count[k] = 0; 00047 00048 for (int n = 0;n < N;n++) { 00049 00050 int k = 0; 00051 double min_dist = dist(c_means[k], c_X[n]); 00052 int k_winner = k; 00053 00054 for (int k = 1;k < K;k++) { 00055 double tmp_dist = dist(c_means[k], c_X[n]); 00056 if (tmp_dist < min_dist) { min_dist = tmp_dist; k_winner = k; } 00057 } 00058 00059 c_partitions[ k_winner ][ count[k_winner] ] = n; 00060 c_count[k_winner]++; 00061 } 00062 } 00063 00064 00065 void MOG_diag_kmeans_sup::recalculate_means() 00066 { 00067 00068 for (int k = 0;k < K;k++) { 00069 00070 for (int d = 0;d < D;d++) c_tmpvec[d] = 0.0; 00071 00072 int Nk = c_count[k]; 00073 00074 for (int n = 0;n < Nk;n++) { 00075 double * x = c_X[ c_partitions[k][n] ]; 00076 for (int d = 0;d < D;d++) c_tmpvec[d] += x[d]; 00077 } 00078 00079 if (Nk > 0) { 00080 double * c_mean = c_means[k]; 00081 for (int d = 0;d < D;d++) c_mean[d] = c_tmpvec[d] / Nk; 00082 } 00083 } 00084 00085 } 00086 00087 00088 bool MOG_diag_kmeans_sup::dezombify_means() 00089 { 00090 00091 static int counter = 0; 00092 00093 bool zombie_mean = false; 00094 00095 int k = 0; 00096 int max_count = count[k]; 00097 int k_hog = k; 00098 00099 for (int k = 1;k < K;k++) if (c_count[k] > max_count) { max_count = c_count[k]; k_hog = k; } 00100 00101 for (int k = 0;k < K;k++) { 00102 if (c_count[k] == 0) { 00103 00104 zombie_mean = true; 00105 if (verbose) { 00106 it_warning("MOG_diag_kmeans_sup::dezombify_means(): detected zombie mean"); 00107 } 00108 if (k_hog == k) { 00109 it_warning("MOG_diag_kmeans_sup::dezombify_means(): weirdness: k_hog == k"); 00110 return(false); 00111 } 00112 00113 if (counter >= c_count[k_hog]) counter = 0; 00114 00115 double * c_mean = c_means[k]; 00116 double * c_x = c_X[ c_partitions[k_hog][counter] ]; 00117 00118 for (int d = 0;d < D;d++) c_mean[d] = 0.5 * (c_means[k_hog][d] + c_x[d]); 00119 counter++; 00120 } 00121 00122 } 00123 00124 if (zombie_mean) assign_to_means(); 00125 00126 return(true); 00127 } 00128 00129 00130 double MOG_diag_kmeans_sup::measure_change() const 00131 { 00132 00133 double tmp_dist = 0.0; 00134 for (int k = 0;k < K;k++) tmp_dist += dist(c_means[k], c_means_old[k]); 00135 return(tmp_dist); 00136 } 00137 00138 00139 void MOG_diag_kmeans_sup::initial_means() 00140 { 00141 00142 for (int d = 0;d < D;d++) c_tmpvec[d] = 0.0; 00143 00144 for (int n = 0;n < N;n++) { 00145 double * c_x = c_X[n]; 00146 for (int d = 0;d < D;d++) c_tmpvec[d] += c_x[d]; 00147 } 00148 00149 for (int d = 0;d < D;d++) c_tmpvec[d] /= N; 00150 00151 int step = int(floor(double(N) / double(K))); 00152 for (int k = 0;k < K;k++) { 00153 double * c_mean = c_means[k]; 00154 double * c_x = c_X[k*step]; 00155 00156 for (int d = 0;d < D;d++) c_mean[d] = 0.5 * (c_tmpvec[d] + c_x[d]); 00157 } 00158 } 00159 00160 00161 void MOG_diag_kmeans_sup::iterate() 00162 { 00163 00164 for (int k = 0;k < K;k++) for (int d = 0;d < D;d++) c_means_old[k][d] = c_means[k][d]; 00165 00166 for (int i = 0;i < max_iter;i++) { 00167 00168 assign_to_means(); 00169 if (!dezombify_means()) return; 00170 recalculate_means(); 00171 00172 double change = measure_change(); 00173 00174 if (verbose) std::cout << "MOG_diag_kmeans_sup::iterate(): iteration = " << i << " change = " << change << std::endl; 00175 if (change == 0) break; 00176 00177 for (int k = 0;k < K;k++) for (int d = 0;d < D;d++) c_means_old[k][d] = c_means[k][d]; 00178 } 00179 00180 } 00181 00182 00183 void MOG_diag_kmeans_sup::calc_means() 00184 { 00185 initial_means(); 00186 iterate(); 00187 } 00188 00189 00190 void MOG_diag_kmeans_sup::calc_covs() 00191 { 00192 00193 for (int k = 0;k < K;k++) { 00194 int Nk = c_count[k]; 00195 00196 if (Nk >= 2) { 00197 double * c_mean = c_means[k]; 00198 00199 for (int d = 0;d < D;d++) c_tmpvec[d] = 0.0; 00200 00201 for (int n = 0;n < Nk;n++) { 00202 double * c_x = c_X[ c_partitions[k][n] ]; 00203 for (int d = 0;d < D;d++) { double tmp = c_x[d] - c_mean[d]; c_tmpvec[d] += tmp * tmp; } 00204 } 00205 00206 for (int d = 0;d < D;d++) c_diag_covs[k][d] = trust * (c_tmpvec[d] / (Nk - 1.0)) + (1.0 - trust) * (1.0); 00207 } 00208 else { 00209 for (int d = 0;d < D;d++) c_diag_covs[k][d] = 1.0; 00210 } 00211 } 00212 00213 } 00214 00215 00216 void MOG_diag_kmeans_sup::calc_weights() 00217 { 00218 for (int k = 0;k < K;k++) c_weights[k] = trust * (c_count[k] / double(N)) + (1.0 - trust) * (1.0 / K); 00219 } 00220 00221 00222 void MOG_diag_kmeans_sup::normalise_vectors() 00223 { 00224 00225 for (int d = 0;d < D;d++) { 00226 double acc = 0.0; 00227 for (int n = 0;n < N;n++) acc += c_X[n][d]; 00228 c_norm_mu[d] = acc / N; 00229 } 00230 00231 for (int d = 0;d < D;d++) { 00232 double acc = 0.0; 00233 for (int n = 0;n < N;n++) { double tmp = c_X[n][d] - c_norm_mu[d]; acc += tmp * tmp; } 00234 c_norm_sd[d] = std::sqrt(acc / (N - 1)); 00235 } 00236 00237 for (int n = 0;n < N;n++) for (int d = 0;d < D;d++) { 00238 c_X[n][d] -= c_norm_mu[d]; 00239 if (c_norm_sd[d] > 0.0) c_X[n][d] /= c_norm_sd[d]; 00240 } 00241 } 00242 00243 00244 void MOG_diag_kmeans_sup::unnormalise_vectors() 00245 { 00246 00247 for (int n = 0;n < N;n++) for (int d = 0;d < D;d++) { 00248 if (c_norm_sd[d] > 0.0) c_X[n][d] *= c_norm_sd[d]; 00249 c_X[n][d] += c_norm_mu[d]; 00250 } 00251 } 00252 00253 00254 void MOG_diag_kmeans_sup::unnormalise_means() 00255 { 00256 00257 for (int k = 0;k < K;k++) for (int d = 0;d < D;d++) { 00258 if (norm_sd[d] > 0.0) c_means[k][d] *= c_norm_sd[d]; 00259 c_means[k][d] += norm_mu[d]; 00260 } 00261 } 00262 00263 00264 void MOG_diag_kmeans_sup::run(MOG_diag &model_in, Array<vec> &X_in, int max_iter_in, double trust_in, bool normalise_in, bool verbose_in) 00265 { 00266 00267 it_assert(model_in.is_valid(), "MOG_diag_kmeans_sup::run(): given model is not valid"); 00268 it_assert((max_iter_in > 0), "MOG_diag_kmeans_sup::run(): 'max_iter' needs to be greater than zero"); 00269 it_assert(((trust_in >= 0.0) && (trust_in <= 1.0)), "MOG_diag_kmeans_sup::run(): 'trust' must be between 0 and 1 (inclusive)"); 00270 00271 verbose = verbose_in; 00272 00273 Array<vec> means_in = model_in.get_means(); 00274 Array<vec> diag_covs_in = model_in.get_diag_covs(); 00275 vec weights_in = model_in.get_weights(); 00276 00277 init(means_in, diag_covs_in, weights_in); 00278 00279 means_in.set_size(0); 00280 diag_covs_in.set_size(0); 00281 weights_in.set_size(0); 00282 00283 it_assert(check_size(X_in), "MOG_diag_kmeans_sup::run(): 'X' is empty or contains vectors of wrong dimensionality"); 00284 00285 N = X_in.size(); 00286 00287 if (K > N) { 00288 it_warning("MOG_diag_kmeans_sup::run(): K > N"); 00289 } 00290 else { 00291 if (K > N / 10) { 00292 it_warning("MOG_diag_kmeans_sup::run(): K > N/10"); 00293 } 00294 } 00295 00296 max_iter = max_iter_in; 00297 trust = trust_in; 00298 00299 means_old.set_size(K); 00300 for (int k = 0;k < K;k++) means_old(k).set_size(D); 00301 partitions.set_size(K); 00302 for (int k = 0;k < K;k++) partitions(k).set_size(N); 00303 count.set_size(K); 00304 tmpvec.set_size(D); 00305 norm_mu.set_size(D); 00306 norm_sd.set_size(D); 00307 00308 c_X = enable_c_access(X_in); 00309 c_means_old = enable_c_access(means_old); 00310 c_partitions = enable_c_access(partitions); 00311 c_count = enable_c_access(count); 00312 c_tmpvec = enable_c_access(tmpvec); 00313 c_norm_mu = enable_c_access(norm_mu); 00314 c_norm_sd = enable_c_access(norm_sd); 00315 00316 if (normalise_in) normalise_vectors(); 00317 00318 calc_means(); 00319 if (normalise_in) { unnormalise_vectors(); unnormalise_means(); } 00320 calc_covs(); 00321 calc_weights(); 00322 00323 model_in.init(means, diag_covs, weights); 00324 00325 disable_c_access(c_X); 00326 disable_c_access(c_means_old); 00327 disable_c_access(c_partitions); 00328 disable_c_access(c_count); 00329 disable_c_access(c_tmpvec); 00330 disable_c_access(c_norm_mu); 00331 disable_c_access(c_norm_sd); 00332 00333 means_old.set_size(0); 00334 partitions.set_size(0); 00335 count.set_size(0); 00336 tmpvec.set_size(0); 00337 norm_mu.set_size(0); 00338 norm_sd.set_size(0); 00339 00340 cleanup(); 00341 00342 } 00343 00344 // 00345 // convenience functions 00346 00347 void MOG_diag_kmeans(MOG_diag &model_in, Array<vec> &X_in, int max_iter_in, double trust_in, bool normalise_in, bool verbose_in) 00348 { 00349 MOG_diag_kmeans_sup km; 00350 km.run(model_in, X_in, max_iter_in, trust_in, normalise_in, verbose_in); 00351 } 00352 00353 00354 } 00355
Generated on Sat Jul 9 2011 15:21:33 for IT++ by Doxygen 1.7.4