27 using namespace shogun;
59 REQUIRE(centers.
vlen ==
k*
dimensions,
"Vector dimension of initial cluster centers supplied does not match expectation");
76 int32_t num=lhs->get_num_vectors();
80 for (int32_t i=0; i<num; i++)
150 #ifndef DOXYGEN_SHOULD_SKIP_THIS
160 #endif // DOXYGEN_SHOULD_SKIP_THIS
166 struct thread_data *TD=(
struct thread_data*) P;
177 for (j=js; j<je; j++)
187 sum = sum +
CMath::sq(x[i*m + k] - vec[k]);
201 ASSERT(lhs && lhs->get_num_features()>0 && lhs->get_num_vectors()>0)
203 int32_t XSize=lhs->get_num_vectors();
205 int32_t i, changed=1;
213 int32_t *ClList=SG_CALLOC(int32_t, XSize);
226 memset(ClList, 0,
sizeof(int32_t)*XSize);
228 memset(weights_set, 0,
sizeof(
float64_t)*k);
231 memset(mus.matrix, 0,
sizeof(
float64_t)*XDimk);
235 for (i=0; i<XSize; i++)
241 weights_set[Cl]+=weight;
244 vec=lhs->get_feature_vector(i, vlen, vfree);
247 mus.matrix[Cl*dimensions+j] += weight*vec[j];
249 lhs->free_feature_vector(vec, i, vfree);
255 if (weights_set[i]!=0.0)
257 mus.matrix[i*dimensions+j] /= weights_set[i];
267 for(int32_t idx=0;idx<XSize;idx++)
269 for(int32_t m=0;m<
k;m++)
273 for (i=0; i<XSize; i++)
280 if (dists[i*k+j]<mini)
291 for (i=0; i<XSize; i++)
293 const int32_t Cl = ClList[i];
295 weights_set[Cl]+=weight;
297 vec=lhs->get_feature_vector(i, vlen, vfree);
300 mus.matrix[Cl*dimensions+j] += weight*vec[j];
302 lhs->free_feature_vector(vec, i, vfree);
309 if (weights_set[i]!=0.0)
313 mus.matrix[i*dimensions+j] /= weights_set[i];
325 SG_WARNING(
"kmeans clustering changed throughout %d iterations stopping...\n",
max_iter-1)
328 SG_INFO(
"Iteration[%d/%d]: Assignment of %i patterns changed.\n", iter,
max_iter, changed)
333 memset(mus.matrix, 0,
sizeof(
float64_t)*XDimk);
335 for (i=0; i<XSize; i++)
338 int32_t Cl=ClList[i];
341 vec=lhs->get_feature_vector(i, vlen, vfree);
344 mus.matrix[Cl*dimensions+j] += weight*vec[j];
346 lhs->free_feature_vector(vec, i, vfree);
352 if (weights_set[i]!=0.0)
354 mus.matrix[i*dimensions+j] /= weights_set[i];
360 for (i=0; i<XSize; i++)
364 const int32_t ClList_Pat=ClList[Pat];
368 weight=Weights.
vector[Pat];
371 for(int32_t idx_k=0;idx_k<
k;idx_k++)
375 imini=0 ; mini=dists[0];
383 if (imini!=ClList_Pat)
385 changed= changed + 1;
388 weights_set[imini]+= weight;
390 weights_set[ClList_Pat]-= weight;
392 vec=lhs->get_feature_vector(Pat, vlen, vfree);
396 mus.matrix[imini*dimensions+j]-=(vec[j]
397 -mus.matrix[imini*dimensions+j])
398 *(weight/weights_set[imini]);
401 lhs->free_feature_vector(vec, Pat, vfree);
405 if (weights_set[ClList_Pat]!=0.0)
407 vec=lhs->get_feature_vector(Pat, vlen, vfree);
411 mus.matrix[ClList_Pat*dimensions+j]-=
413 -mus.matrix[ClList_Pat
415 *(weight/weights_set[ClList_Pat]);
417 lhs->free_feature_vector(vec, Pat, vfree);
422 mus.matrix[ClList_Pat*dimensions+j]=0;
436 bool first_round=
true;
438 for (int32_t j=0; j<
k; j++)
448 mus.matrix[i*dimensions+l]
449 -mus.matrix[j*dimensions+l]);
460 if ((dist<rmin2) && (dist>=rmin1))
478 SG_FREE(weights_set);
virtual bool save(FILE *dstfile)
Class Distance, a base class for all the distances used in the Shogun toolbox.
int32_t max_iter
maximum number of iterations
int32_t dimensions
number of dimensions
SGVector< float64_t > R
radi of the clusters (size k)
virtual bool set_initial_centers(SGVector< float64_t > centers)
A generic DistanceMachine interface.
ST * get_feature_vector(int32_t num, int32_t &len, bool &dofree)
void clustknb(bool use_old_mus, float64_t *mus_start)
SGVector< float64_t > get_radiuses()
SGVector< float64_t > mus_initial
initial centers supplied
virtual void copy_feature_matrix(SGMatrix< ST > src)
virtual bool load(FILE *srcfile)
void free_feature_vector(ST *feat_vec, int32_t num, bool dofree)
void set_max_iter(int32_t iter)
CFeatures * replace_rhs(CFeatures *rhs)
virtual float64_t distance(int32_t idx_a, int32_t idx_b)
The class Features is the base class of all feature objects.
void set_distance(CDistance *d)
virtual void store_model_features()
virtual EFeatureType get_feature_type()=0
static float32_t sqrt(float32_t x)
x^0.5
virtual bool train_machine(CFeatures *data=NULL)
virtual bool init(CFeatures *lhs, CFeatures *rhs)
int32_t k
the k parameter in KMeans
void * sqdist_thread_func(void *P)
SGMatrix< float64_t > get_cluster_centers()