00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #ifndef _KMEANS_H
00022 #define _KMEANS_H
00023
00024 #include "global.h"
00025 #include "math.h"
00026
00027 struct MyKMeansInfo
00028 {
00029 public:
00030 MyKMeansInfo()
00031 : isSet(false), cost(), firstIndex()
00032 {}
00033
00034 bool isSet;
00035 MyNT cost;
00036 unsigned int firstIndex;
00037 };
00038
00039 inline ostream& operator<<(ostream &ostr, const MyKMeansInfo &info)
00040 {
00041 ostr << info.cost << " ";
00042 return(ostr);
00043 }
00044
00045 typedef pointer_to_binary_function< MyNT, MyNT, MyNT > MyDistanceFunction;
00046
00047 struct distance_Euclidean: public binary_function< MyNT, MyNT, MyNT >
00048 {
00049 MyNT operator()(MyNT __x, MyNT __y) const
00050 {
00051 return(fabs(__x - __y));
00052 }
00053 };
00054
00055 struct distance_squared_Euclidean: public binary_function< MyNT, MyNT, MyNT >
00056 {
00057 MyNT operator()(MyNT __x, MyNT __y) const
00058 {
00059 return((__x - __y)*(__x - __y));
00060 }
00061 };
00062
00063
00064
00065
00066 template < class MyPointType >
00067 class MyKMeansClusterType
00068 {
00069
00070
00071 public:
00072 void add(MyPointType &point)
00073 {
00074 _points.insert(point);
00075 }
00076
00077 void add(const vector< MyPointType > &points);
00078
00079 void remove(MyPointType &point)
00080 {
00081 _points.erase(point);
00082 }
00083
00084 void remove(const vector< MyPointType > &points);
00085
00086
00087 void mean()
00088 {
00089 _mean = 0;
00090 typename _MyPtsType::const_iterator itr;
00091 for (itr = _points.begin(); itr != _points.end(); itr++)
00092 _mean += *itr;
00093
00094
00095
00096
00097 _mean = _mean/_points.size();
00098 }
00099
00100 void variance()
00101 {
00102 _variance = 0;
00103 typename _MyPtsType::const_iterator itr;
00104 for (itr = _points.begin(); itr != _points.end(); itr++)
00105 _variance += (*itr - _mean)*(*itr - _mean);
00106
00107
00108
00109 _variance = _variance/_points.size();
00110
00111 }
00112
00113
00114 MyNT distance(MyPointType point)
00115 {
00116 return(::distance_Euclidean(mean(), point))();
00117 }
00118
00119 private:
00120 MyPointType _mean;
00121 MyPointType _variance;
00122
00123 typedef set< MyPointType > _MyPtsType;
00124 set< MyPointType > _points;
00125
00126 };
00127
00128
00129
00130
00131 template < class MyKMeansPointType >
00132 class MyKMeansAlgo
00133 {
00134
00135
00136
00137
00138
00139
00140
00141
00142 public:
00143
00144
00145 MyKMeansAlgo(unsigned int num = 0)
00146 : _points(), _numClusters(num), _clusters(_numClusters)
00147 {}
00148
00149 void setPoints(vector< MyKMeansPointType > &points)
00150 {
00151 _points = vector< MyKMeansPointType >(points.size());
00152 for (unsigned int i = 0; i < points.size(); i++)
00153 _points[i] = points[i];
00154
00155 }
00156
00157 void setNumClusters(unsigned int num)
00158 {
00159 _numClusters = num;
00160 }
00161
00162 unsigned int getNumClusters()
00163 {
00164 return(numClusters);
00165 }
00166
00167
00168 void init()
00169 {
00170
00171
00172 static MyRand< int > generator;
00173 int index;
00174 for (unsigned int i = 0; i < _points.size(); i++)
00175 {
00176 index = generator(_numClusters - 1);
00177
00178 _clusters[index].add(_points[i]);
00179 _whichCluster[i] = index;
00180 }
00181 for (unsigned int j = 0; j < _numClusters; j++)
00182 {
00183 _clusters[j].mean();
00184 _clusters[j].variance();
00185 }
00186
00187 }
00188
00189
00190 void estep()
00191 {
00192 for (unsigned int i = 0; i < _numClusters; i++)
00193 {
00194 _clusters[i].mean();
00195 _clusters[i].variance();
00196 }
00197 }
00198
00199
00200
00201 bool mstep()
00202 {
00203 unsigned int bestCluster;
00204 MyNT bestDist, dist;
00205 bool pointMoved = false;
00206 for (unsigned int i = 0; i < _points.size(); i++)
00207 {
00208 bestCluster = 0;
00209 bestDist = _clusters[bestCluster].distance(_points[i]);;
00210 for (unsigned int j = 1; j < _numClusters; j++)
00211 {
00212 dist = _clusters[j].distance(_points[i]);
00213 if (dist < bestDist)
00214 {
00215 bestCluster = j;
00216 bestDist = dist;;
00217 }
00218 }
00219
00220 if (bestCluster == _whichCluster[i])
00221 continue;
00222 pointMoved = true;
00223
00224
00225
00226 _clusters[_whichCluster[i]].remove(_points[i]);
00227 _clusters[bestCluster].add(_points[i]);
00228 _whichCluster[i] = bestCluster;
00229 }
00230 return(pointMoved);
00231 }
00232
00233
00234
00235
00236
00237
00238
00239
00240 private:
00241
00242 vector< MyKMeansPointType > _points;
00243
00244 unsigned int _numClusters;
00245
00246
00247
00248
00249
00250
00251 vector< MyKMeansClusterType< MyKMeansPointType > > _clusters;
00252
00253
00254 vector< unsigned int > _whichCluster;
00255
00256 };
00257
00258
00259
00260
00261 #endif // _KMEANS_H