00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024 #ifndef POINT_H
00025 #define POINT_H
00026
00027 #include <map>
00028 #include <set>
00029
00030 #include "constants.h"
00031
00032 #if (GCC_VERSION >= 40300)
00033
00034 #include <tr1/unordered_set>
00035 using namespace std::tr1;
00036 #else
00037 #include <ext/hash_set>
00038 using namespace __gnu_cxx;
00039 #endif
00040
00041 #include "myiterator.h"
00042
00043 #include "format.h"
00044 #include "global.h"
00045 #include "histogram.h"
00046 #include "info.h"
00047 #include "interval.h"
00048 #include "params.h"
00049 #include "random.h"
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061 template <class NT>
00062 class Point_d
00063 {
00064 public:
00065 Point_d(int d=0)
00066 : dimension(0), missingDims(), coords()
00067 {}
00068 Point_d (int d, const vector< NT> &c,
00069 const set< unsigned int > *missing = NULL)
00070 : dimension(d), missingDims(), coords(c)
00071 {
00072 if (missing)
00073 missingDims = *missing;
00074 }
00075 Point_d(const Point_d& other)
00076 : dimension(other.dimension), missingDims(other.missingDims), coords(other.coords)
00077 {}
00078
00079
00080 virtual ~Point_d()
00081 {}
00082
00083 unsigned int getNumDimensions() const
00084 {
00085 return(dimension);
00086 }
00087
00088 NT operator[](int i) const
00089 {
00090
00091 return(coords[i]);
00092 }
00093
00094
00095 bool isDimMissing(unsigned int i ) const
00096 {
00097
00098
00099
00100 return((0 != missingDims.size()) && (missingDims.end() != missingDims.find(i)));
00101
00102 }
00103
00106 unsigned int getNumMissingDimensions() const
00107 {
00108 return(missingDims.size());
00109 }
00110
00115 void standardise()
00116 {
00117 MyNT mean = computeMean(coords, &missingDims);
00118 MyNT variance = computeVariance(coords, mean, &missingDims);
00119 standardise(mean, sqrt(variance));
00120 }
00121
00122
00127 void standardise(MyNT m, MyNT s)
00128 {
00129 for (unsigned int i = 0; i < coords.size(); i++)
00130 if (!isDimMissing(i))
00131 coords[i] = (coords[i] - m)/s;
00132 }
00133
00134
00135
00136 void print(ostream& ostr) const
00137 {
00138 for (typename vector< NT>::const_iterator itr = coords.begin(); itr != coords.end(); itr++)
00139 ostr << "\t" << *itr;
00140 ostr << endl;
00141 }
00142
00143 friend ostream& operator<<(ostream& ostr, const Point_d &pt)
00144 {
00145 pt.print(ostr);
00146 return(ostr);
00147 }
00148
00149 friend istream& operator>>(istream& istr, Point_d &pt)
00150 {
00151 cerr << "ERROR! istream& operator>>(istream& istr, Point_d &pt) not implemented. Exiting.\n";
00152 exit(-1);
00153 return(istr);
00154 }
00155
00161 virtual void randomise()
00162 {
00163 random_shuffle(coords.begin(), coords.end());
00164 }
00165
00166
00167
00168 NT& operator[](int i)
00169 {
00170 return(coords[i]);
00171 }
00172
00173 protected:
00174
00175 void addMissingDim(unsigned int i)
00176 {
00177 missingDims.insert(i);
00178 }
00179
00180 void deleteMissingDim(unsigned int i)
00181 {
00182 missingDims.erase(i);
00183 }
00184
00185
00186 protected:
00187 unsigned int dimension;
00188
00189 set< unsigned int > missingDims;
00190 vector< NT > coords;
00191 };
00192
00193 typedef Point_d< MyNT > _MyPoint;
00194
00195 class MyPointInfo : public MyInfo
00196 {
00197 public:
00198 MyPointInfo(const MyString &id = "", const MyString &sid = "", const string &cname = "")
00199 : MyInfo(id, sid), className(cname)
00200 {}
00201 MyPointInfo(const MyPointInfo& info)
00202 : MyInfo(info), className(info.className)
00203 {}
00204 MyPointInfo(const MyInfo& info)
00205 : MyInfo(info), className()
00206 {}
00207 virtual ~MyPointInfo()
00208 {}
00209
00210
00211
00212 virtual MyPointInfo* clone() const
00213 {
00214 MyPointInfo* clone = new MyPointInfo(*this);
00215 clone->setClassName(className);
00216 return(clone);
00217 }
00218
00219 virtual void setClassName(string name)
00220 {
00221 className = name;
00222 }
00223 virtual string getClassName() const
00224 {
00225 return(className);
00226 }
00227
00228 virtual void print(ostream& ostr) const
00229 {
00230 MyInfo::print(ostr);
00231 ostr << "Class name = " << className << endl;
00232 }
00233
00234 private:
00235
00236 string className;
00237 };
00238
00239
00240
00241 typedef MyPointInfo MyDimInfo;
00242
00243
00244
00245
00246 class MyPoint : public _MyPoint
00247 {
00248 friend class MyPointSet;
00249
00250 public:
00251 MyPoint()
00252 : _MyPoint(), info(NULL), chosen(false)
00253 {}
00254
00255 MyPoint(int dim, const vector< MyNT >& coords, MyInfo *inf)
00256 : _MyPoint(dim, coords), info(NULL), chosen(false), hidden(false)
00257 {
00258 if (inf)
00259 setInfo(inf);
00260 }
00261
00262 MyPoint(int dim, const vector< MyNT >& coords,
00263
00264 MyInfo *inf,
00265 set< unsigned int > *missing = NULL)
00266 : _MyPoint(dim, coords, missing), info(NULL), chosen(false), hidden(false)
00267 {
00268 if (inf)
00269 setInfo(inf);
00270 }
00271
00272 MyPoint(const _MyPoint &pt, MyPointInfo *inf)
00273 : _MyPoint(pt), info(NULL), chosen(false), hidden(false)
00274 {
00275 if (inf)
00276 setInfo(inf);
00277 }
00278 MyPoint(const MyPoint& copy)
00279 : _MyPoint(copy), info(NULL), chosen(copy.chosen), hidden(copy.hidden)
00280 {
00281 if (copy.info)
00282 setInfo(copy.info);
00283 }
00284 const MyPoint& operator=(const MyPoint& rhs)
00285 {
00286 if (this != &rhs)
00287 {
00288 this->_MyPoint::operator=(rhs);
00289 if (rhs.info)
00290 setInfo(rhs.info);
00291 chosen = rhs.chosen;
00292 hidden = rhs.hidden;
00293 }
00294 return(*this);
00295 }
00296
00297 virtual ~MyPoint()
00298 {
00299 delete info;
00300 }
00301
00302 const MyInfo* getInfo() const
00303 {
00304 return(info);
00305 }
00306
00307 MyString getName() const
00308 {
00309 if (info)
00310 {
00311
00312 if ("" != info->getShortName())
00313 return(info->getShortName());
00314 else
00315 return(info->getName());
00316 }
00317 else
00318 return(string("NONAME"));
00319 }
00320
00321
00322
00323
00324 void setInfo(MyPointInfo* inf)
00325 {
00326 #if 0
00327 info = inf;
00328 #endif
00329 delete info;
00330
00331
00332 info = inf->clone();
00333
00334
00335
00336
00337 }
00338 void setInfo(MyInfo* inf)
00339 {
00340 delete info;
00341
00342 info = new MyPointInfo(*inf);
00343 }
00344
00345 #if 0
00346 void setInfo(const MyString &name, const MyString &shortName,
00347 const string className = "")
00348 {
00349 if (!info)
00350 info = new MyPointInfo;
00351
00352
00353 info->setName(name);
00354 info->setShortName(shortName);
00355 info->setClassName(className);
00356 }
00357 #endif
00358
00359 string getClassName() const
00360 {
00361 if (info)
00362 return(info->getClassName());
00363 else
00364 return("NONAME");
00365
00366 }
00367
00368 void setClassName(string name)
00369 {
00370 if (info)
00371 info->setClassName(name);
00372 else
00373 cerr << "Point " << getName() << " has no associated info to store class name with.\n";
00374 }
00375 bool getChosen() const
00376 {
00377 return(chosen);
00378 }
00379
00380 bool getHidden() const
00381 {
00382 return(hidden);
00383 }
00384
00389 MyNT getMaxCoord() const
00390 {
00391 if (getNumMissingDimensions() == getNumDimensions())
00392 return(0);
00393 unsigned int i = 0;
00394 MyNT max;
00395 while (isDimMissing(i))
00396 i++;
00397
00398 max = coords[i];
00399 while (i < getNumDimensions())
00400 {
00401 if (!isDimMissing(i) && (coords[i] > max))
00402 max = coords[i];
00403 i++;
00404 }
00405 return(max);
00406 }
00407
00412 MyNT getMinCoord() const
00413 {
00414 if (getNumMissingDimensions() == getNumDimensions())
00415 return(0);
00416 unsigned int i = 0;
00417 MyNT min;
00418 while (isDimMissing(i))
00419 i++;
00420
00421 min = coords[i];
00422 while (i < getNumDimensions())
00423 {
00424 if (!isDimMissing(i) && (coords[i] < min))
00425 min = coords[i];
00426 i++;
00427 }
00428 return(min);
00429 }
00430
00435 MyNT computeNorm(unsigned int p) const;
00436
00444 MyNT computeSparsityHoyer() const;
00445
00446
00447
00459 int computeDiscreteCorrelation(const MyPoint &other,
00460 bool computeSign = false) const;
00461
00491 MyNT computePearsonsCorrelation(const MyPoint &other,
00492 const MyNT *mean1 = NULL, const MyNT *stdDev1 = NULL,
00493 const MyNT *mean2 = NULL, const MyNT *stdDev2 = NULL)
00494 const;
00495
00496
00502 void computeRanks(MyPoint &rankedPoint) const;
00503
00504 private:
00505
00506
00507
00508
00509 void setChosen(bool c)
00510 {
00511 chosen = c;
00512 }
00513
00514
00515 void setHidden(bool h)
00516 {
00517 hidden = h;
00518 }
00519
00520
00521
00522 MyNT distance(const MyPoint &other)
00523 {
00524 MyNT distance = 0;
00525 for (unsigned int i = 0; i < getNumDimensions(); i++)
00526
00527 distance += coords[i]*other.coords[i];
00528 return(sqrt(distance));
00529 }
00530
00531 MyNT mean() const
00532 {
00533 MyNT _mean = 0;
00534 for (unsigned int i = 0; i < getNumDimensions(); i++)
00535 {
00536
00537 if (isDimMissing(i))
00538 continue;
00539 _mean += coords[i];
00540 }
00541 _mean /= (getNumDimensions() - getNumMissingDimensions());
00542 return(_mean);
00543 }
00544
00545 MyNT variance(MyNT mean) const
00546 {
00547 MyNT _variance = 0;
00548 for (unsigned int i = 0; i < getNumDimensions(); i++)
00549 {
00550
00551 if (isDimMissing(i))
00552 continue;
00553 _variance += (coords[i] - mean)*(coords[i] - mean);
00554 }
00555 _variance /= (getNumDimensions() - getNumMissingDimensions());
00556 return(_variance);
00557 }
00558
00559 MyNT standardDeviation(MyNT mean) const
00560 {
00561 return(sqrt(variance(mean)));
00562 }
00563
00564
00565 private:
00566 MyPointInfo* info;
00567
00568
00569
00570
00571
00572
00573
00574
00575
00576
00577 bool chosen;
00578 bool hidden;
00579 };
00580
00581
00582 typedef pair< int, MyNT > MyEntropy;
00583
00584 struct compareEntropies : public binary_function< MyEntropy, MyEntropy, bool>
00585 {
00586 bool operator()(const MyEntropy& __x, const MyEntropy& __y) const
00587 {
00588 return __x.second < __y.second; }
00589 };
00590
00591
00592
00593
00594
00595
00596
00597
00598
00599
00600 typedef map< string, string, ltstr > MyClassesType;
00601 typedef set< string, ltstr > MyClassNamesType;
00602
00603 const unsigned int NUM_CORRELATION_HISTOGRAM_BINS = 100;
00604 const MyNT MIN_CORRELATION = -1;
00605 const MyNT MAX_CORRELATION = 1;
00606 const MyNT CORRELATION_RANGE = MAX_CORRELATION - MIN_CORRELATION;
00607
00608
00614 class MyPointSet
00615 {
00626 friend class MyCluster;
00627
00628 typedef map< MyNT, unsigned int > MyDimCountsType;
00629 typedef MyDimCountsType::const_iterator MyDimCountsConstPtr;
00630 typedef MyDimCountsType::iterator MyDimCountsPtr;
00631
00632 public:
00633
00634 MyPointSet()
00635 : numPoints(0), numDimensions(0), numNotChosen(0), numNotHidden(0),
00636 points(),
00637 hiddenPoints(), hiddenPointsByClass(),
00638 flipped(false),
00639 namesToIndices(), classNamesExist(false),
00640 classNames(),
00641 classesToIndices(), classCounts(),
00642 dimInfo(), dimLows(0), dimHighs(0), dimMeans(0), dimSigmas(0), dimCounts(0),
00643 dimEntropies(), sortedDims(), _pointMeans(), _pointStandardDeviations()
00644 {}
00645 MyPointSet(const MyPointSet& copy)
00646 : numPoints(copy.numPoints), numDimensions(copy.numDimensions),
00647 numNotChosen(copy.numNotChosen),
00648 numNotHidden(copy.numNotHidden),
00649 points(copy.points),
00650 hiddenPoints(copy.hiddenPoints),
00651 hiddenPointsByClass(copy.hiddenPointsByClass),
00652 flipped(copy.flipped),
00653 namesToIndices(copy.namesToIndices),
00654 classNamesExist(copy.classNamesExist),
00655 classNames(copy.classNames),
00656 classesToIndices(copy.classesToIndices),
00657 classCounts(copy.classCounts),
00658 dimInfo(copy.dimInfo), dimLows(copy.dimLows), dimHighs(copy.dimHighs),
00659 dimMeans(copy.dimMeans), dimSigmas(copy.dimSigmas),
00660 dimCounts(copy.dimCounts),
00661 dimEntropies(copy.dimEntropies), sortedDims(copy.sortedDims),
00662 _pointMeans(copy._pointMeans),
00663 _pointStandardDeviations(copy._pointStandardDeviations)
00664 {}
00665 const MyPointSet& operator=(const MyPointSet& rhs)
00666 {
00667 if (this != &rhs)
00668 {
00669 numPoints = rhs.numPoints;
00670 numDimensions = rhs.numDimensions;
00671 numNotChosen = rhs.numNotChosen;
00672 numNotHidden = rhs.numNotHidden;
00673 points.clear();
00674 points = rhs.points;
00675 hiddenPoints.clear();
00676 hiddenPoints = rhs.hiddenPoints;
00677 hiddenPointsByClass.clear();
00678 hiddenPointsByClass = rhs.hiddenPointsByClass;
00679 flipped = rhs.flipped,
00680 namesToIndices.clear();
00681 namesToIndices = rhs.namesToIndices;
00682 classNamesExist = rhs.classNamesExist;
00683 classNames.clear();
00684 classNames = rhs.classNames;
00685 classesToIndices.clear();
00686 classesToIndices = rhs.classesToIndices;
00687 classCounts.clear();
00688 classCounts = rhs.classCounts;
00689
00690 dimInfo.clear();
00691 dimInfo = rhs.dimInfo;
00692 dimLows.clear();
00693 dimLows = rhs.dimLows;
00694 dimHighs.clear();
00695 dimHighs = rhs.dimHighs;
00696 dimMeans.clear();
00697 dimMeans = rhs.dimMeans;
00698 dimSigmas.clear();
00699 dimSigmas = rhs.dimSigmas;
00700
00701 dimCounts.clear();
00702 dimCounts = rhs.dimCounts;
00703
00704 dimEntropies.clear();
00705 dimEntropies = rhs.dimEntropies;
00706 sortedDims.clear();
00707 sortedDims = rhs.sortedDims;
00708 _pointMeans = rhs._pointMeans;
00709 _pointStandardDeviations = rhs._pointStandardDeviations;
00710 }
00711 return(*this);
00712 }
00713
00714 virtual ~MyPointSet()
00715 {}
00716
00717
00731 void compareTTest(string name, const MyPointSet &other, MyNT &tstat,
00732 MyNT &pvalue) const;
00733
00738 MyNT computeAverageExpressionValue(const set< string > &subset);
00739
00741 void computeCoordinateDistribution(MyHistogram &histogram) const;
00742
00744 MyNT computeMean(string point) const
00745 {
00746 MyPoint mean;
00747 computeMean(point, mean);
00748 return(::computeMean(mean.coords, &mean.missingDims));
00749 }
00750
00752 bool computeMean(string point, MyPoint &mean) const;
00753
00768 bool computeMeanAndVariance(string point, MyNT &mu, MyNT &var,
00769 unsigned int &numCoords) const;
00770
00771
00772
00775 void computeMeanAndStandardDeviation(MyNT &mean, MyNT &stddev);
00776
00777 void computeSignificantValues(MyNT pvalueThreshold,
00778 MyHistogram &histogram, MyNT &value);
00779
00781 void convertToRanks();
00782
00783
00785 void print(string outfile);
00786
00788 void print(ostream &ostr);
00789
00790
00791
00792 virtual void read(MyClusterParams& params, unsigned int whichPointSet = 0);
00793
00794 virtual void read(string infile, const MyAffyFileFormat &format = MyAffyFileFormat());
00795
00796 virtual void insert(const MyPoint& point)
00797 {
00798 points.push_back(point);
00799 numPoints++;
00800 }
00801
00802 virtual unsigned int size() const
00803 {
00804 return(points.size());
00805 }
00806
00807 virtual unsigned int getNumPoints() const
00808 {
00809 return(numPoints);
00810 }
00811
00812 virtual unsigned int getNumDimensions() const
00813 {
00814 return(numDimensions);
00815 }
00816
00817 virtual unsigned int getNumSamples() const
00818 {
00819 if (_isFlipped())
00820 return(numPoints);
00821 return(numDimensions);
00822 }
00823
00824 virtual unsigned int getNumGenes() const
00825 {
00826 if (_isFlipped())
00827 return(numDimensions);
00828 return(numPoints);
00829 }
00835 virtual bool isPoint(string name) const
00836 {
00837 return(namesToIndices.end() != namesToIndices.find(name));
00838 }
00839
00840 virtual vector< unsigned int > getPointIndex(string name) const
00841 {
00842 map< string, vector< unsigned int >, ltstr >::const_iterator itr =
00843 namesToIndices.find(name);
00844 if (namesToIndices.end() == itr)
00845 {
00846 #ifdef DEBUG
00847
00848 #endif
00849
00850
00851 return(vector<unsigned int>(1, getNumPoints()));
00852 }
00853 return(itr->second);
00854 }
00855
00856
00857 virtual const MyPoint& getSample(unsigned int i ) const;
00858
00859
00860
00861 bool doClassNamesExist() const
00862 {
00863 return(classNamesExist);
00864 }
00865
00866
00867
00868
00869
00870
00871
00872 string getClassName(int i) const;
00873
00874 string getSampleName(int i) const;
00875
00876 string getGeneName(int i) const;
00877
00878
00879 unsigned int numInClass(string className) const
00880 {
00881 map< string, int, ltstr >::const_iterator itr;
00882 itr = classCounts.find(className);
00883 return((*itr).second);
00884
00885 }
00886
00887
00888 void setClasses(string className);
00889 void setClasses(const MyClassesType& classes);
00890
00891 unsigned int readClasses(string fileName);
00892
00893 void getClasses(MyClassNamesType &names)
00894 {
00895 names = classNames;
00896 }
00897
00898
00899
00900 virtual const vector< MyDimInfo >& getDimensionInfo() const
00901 {
00902 return(dimInfo);
00903 }
00904
00905
00906 virtual MyDimInfo getDimensionInfo(int i) const
00907 {
00908 return(dimInfo[i]);
00909 }
00910
00912 virtual void getDimensionNames(set< string > &names) const
00913 {
00914 for (unsigned int i = 0; i < getNumDimensions(); i++)
00915 names.insert(dimInfo[i].getName());
00916 }
00917
00918 virtual void getDimCoords(int dim, vector< MyNT >& coords) const
00919 {
00920 coords = dimensions[dim];
00921
00922
00923 }
00924
00925 virtual MyNT getDimensionLow(int i) const
00926 {
00927 return(dimLows[i]);
00928 }
00929 virtual MyNT getDimensionHigh(int i) const
00930 {
00931 return(dimHighs[i]);
00932 }
00933
00934 virtual MyNT getDimensionMean(int i) const
00935 {
00936 return(dimMeans[i]);
00937 }
00938
00939 virtual MyNT getDimensionSigma(int i) const
00940 {
00941 return(dimSigmas[i]);
00942 }
00943
00944
00945 virtual void getDimensionValues(unsigned int i, vector< MyNT > &values)
00946 const;
00947
00948 virtual unsigned int getDimensionCount(unsigned int i, MyNT value) const
00949 {
00950 return(_getDimensionCount(dimCounts[i].find(value)));
00951 }
00952
00953
00954
00955 virtual const MyPoint& operator[](int i) const
00956 {
00957
00958 return(points[i]);
00959 }
00960
00961 virtual MyPoint& operator[](int i)
00962 {
00963
00964 return(points[i]);
00965 }
00966
00967
00968
00969 virtual void getInterval(const vector< int > &discIndices,
00970 unsigned int dim, MyNT &low, MyNT &high) const;
00971
00972
00973
00974
00975 virtual string suggestClass() const
00976 {
00977 static set< string, ltstr>::const_iterator classNameItr =
00978 classNames.begin();
00979 ++classNameItr;
00980
00981 if (classNames.end() == classNameItr)
00982 classNameItr = classNames.begin();
00983 return(*classNameItr);
00984 }
00985
00986 virtual bool generate(int& index, bool forbidChosen = true) const;
00987
00988 virtual bool generate(int num, vector< int >& indices,
00989
00990 bool forbidHidden = true,
00991
00992 string classToUse = "") const;
00993
00994 virtual int getNumNotChosen() const
00995 {
00996 return(numNotChosen);
00997 }
00998
00999 virtual int getNumNotHidden() const
01000 {
01001 return(numNotHidden);
01002 }
01003
01005 virtual void getPointNames(set< string > &names) const
01006 {
01007 for (unsigned int i = 0; i < getNumPoints(); i++)
01008 names.insert(points[i].getName());
01009 }
01010
01011
01012 virtual void flip();
01013
01021 void splitByClass(vector< MyPointSet > &splitPointSets,
01022 map< string, unsigned int > &classToIndex);
01023
01024
01027 void randomise()
01028 {
01029 for (unsigned int i = 0; i < points.size(); i++)
01030 points[i].randomise();
01031 }
01032
01033
01034
01035
01036
01037
01038
01039
01040
01041 virtual void choosePoints(const vector< int >& containedPoints);
01042
01043
01044 virtual void hidePoints(const vector< int >& containedPoints);
01045
01046 virtual void hide(const vector< int >& containedPoints,
01047 const vector< MyDimensionInterval > &intervals);
01048
01049 virtual MyNT computeEntropy(int dim, MyVerbosityLevel verbosity) const;
01050 virtual void computeEntropies(MyVerbosityLevel verbosity);
01051
01052 virtual unsigned int filter(const MyClusterParams ¶ms);
01053
01054 unsigned int filter(MyNT maxDownRegulated, MyNT minUpRegulated);
01055
01056
01087 virtual bool computeCorrelations(string point1, string point2,
01088 vector< MyNT > &correlations,
01089 string method = "Pearsons") const;
01090
01117 virtual void computeCorrelations(MyNT threshold, string id,
01118 bool absoluteValues = true,
01119 map< string, map< string, MyNT > >*pairs = NULL);
01157 virtual void printCorrelations(MyNT threshold, string outputFile,
01158 bool absoluteValues = true,
01159 map< string, map< string, MyNT > >*pairs = NULL);
01160
01170 MyHistogram printCorrelationsHistogram(string outputFile) const;
01171
01183 MyHistogram computeCorrelationsHistogram(map< string, map< string, MyNT > >*pairs = NULL)
01184 const;
01185
01192 virtual MyHistogram
01193 computeRandomisedCorrelationsHistogram(unsigned int numRandomisations, string outputFile);
01194
01195
01196
01197
01198
01199
01200
01201 void standardise()
01202 {
01203 for (unsigned int i = 0; i < points.size(); i++)
01204 points[i].standardise();
01205 }
01206
01207
01208
01209
01210
01211
01212 void standardise(MyNT m, MyNT s)
01213 {
01214 for (unsigned int i = 0; i < points.size(); i++)
01215 points[i].standardise(m, s);
01216 }
01217
01218
01219
01220
01221
01222
01223
01224
01225
01226
01227
01228
01229
01230
01231
01232 void storeRowNameTranslations(const map< string, set< string > > & rowNameAliases);
01233
01234
01235
01236
01237
01238 bool _isFlipped() const
01239 {
01240 return(flipped);
01241 }
01242
01243 private:
01244
01245
01246
01247
01248 virtual void _countNumNotChosen();
01249
01250
01251
01252
01253 virtual void _countNumNotHidden();
01254
01255
01256 void _fillIn();
01257
01258
01259 void _createNameToIndicesMap();
01260
01261
01262 MyNT _getCoord(MyDimCountsConstPtr itr) const
01263 {
01264 return((*itr).first);
01265 }
01266
01267 void _computeDimCounts();
01268
01269 unsigned int _getDimensionCount(MyDimCountsConstPtr itr) const
01270 {
01271 return((*itr).second);
01272 }
01273
01274 void _computeClassCounts();
01275
01276 private:
01277
01278 unsigned int numPoints;
01279 unsigned int numDimensions;
01280 unsigned int numNotChosen;
01281 unsigned int numNotHidden;
01282 vector< MyPoint > points;
01283
01284
01285
01286 vector< vector< MyNT > > dimensions;
01287
01288 #if GCC_VERSION >= 40300
01289 unordered_set< int > hiddenPoints;
01290
01291 map< string, unordered_set< int > > hiddenPointsByClass;
01292 #else
01293 hash_set< int > hiddenPoints;
01294
01295 map< string, hash_set< int > > hiddenPointsByClass;
01296 #endif
01297
01298 bool flipped;
01299
01300
01301
01302
01303
01304
01305
01306 map< string, vector< unsigned int >, ltstr > namesToIndices;
01307
01308 bool classNamesExist;
01309
01310
01311 set< string, ltstr> classNames;
01312
01313 map< string, vector< int >, ltstr > classesToIndices;
01314
01315
01316 map< string, int, ltstr > classCounts;
01317
01318
01319
01320
01321
01322 vector< MyDimInfo > dimInfo;
01323
01324 vector< MyNT > dimLows;
01325
01326 vector< MyNT > dimHighs;
01327
01328 vector< MyNT > dimMeans;
01329
01330 vector< MyNT > dimSigmas;
01331
01332
01333
01334 vector< MyDimCountsType > dimCounts;
01335
01336
01337 vector< MyNT > dimEntropies;
01338
01339 vector< int > sortedDims;
01340
01341
01342 vector< MyNT > _pointMeans;
01343
01344 vector< MyNT > _pointStandardDeviations;
01345
01346 };
01347
01348
01358 class MySetOfPointSets
01359 {
01360 private:
01361
01362 vector< MyPointSet > _pointSets;
01363
01364 vector< string > _classes;
01365
01366 map< string, unsigned int > _classToIndex;
01367
01368 public:
01369
01370
01371
01372 MySetOfPointSets()
01373 : _pointSets(), _classToIndex()
01374 {}
01375
01376 virtual ~MySetOfPointSets()
01377 {}
01378
01380 void insert(MyPointSet &p)
01381 {
01382 _pointSets.push_back(p);
01383
01384 _classes.push_back("");
01385 }
01386
01388 void insert(MyPointSet &p, string condition)
01389 {
01390 _pointSets.push_back(p);
01391 _classes.push_back(condition);
01392 _classToIndex[condition] = _pointSets.size() - 1;
01393 }
01394
01401 void assignClass(unsigned int index, string condition)
01402 {
01403 if (index >= _pointSets.size())
01404 {
01405 cerr << "Assinging class " << condition
01406 << "to a non-existant point set index " << index << "." << endl;
01407 return;
01408 }
01409
01410 _classToIndex.erase(_classes[index]);
01411
01412 _classes[index] = condition;
01413 _classToIndex[condition] = index;
01414 }
01415
01423 bool getPointSet(string condition, MyPointSet *pointSet)
01424 {
01425 map< string, unsigned int >::iterator citr = _classToIndex.find(condition);
01426 if (_classToIndex.end() == citr)
01427 {
01428 pointSet = NULL;
01429 return(false);
01430 }
01431 pointSet = &(_pointSets[citr->second]);
01432 return(true);
01433 }
01434
01440
01443 const MyPointSet * getPointSet(string condition) const
01444 {
01445 map< string, unsigned int >::const_iterator citr = _classToIndex.find(condition);
01446 if (_classToIndex.end() == citr)
01447 {
01448 return(NULL);
01449
01450
01451 }
01452 return(&(_pointSets[citr->second]));
01453
01454
01455 }
01456 };
01457
01458
01459 class MySetOfPointSetsIterator: public MyIterator< MyPointSet >
01460 {
01461 private:
01462 typedef vector< MyPointSet >::iterator _MySetOfPointSetsIterator;
01463
01464 _MySetOfPointSetsIterator start;
01465 _MySetOfPointSetsIterator end;
01466 _MySetOfPointSetsIterator current;
01467 vector< MyPointSet > &pointSets;
01468
01469 public:
01470
01471 MySetOfPointSetsIterator(_MySetOfPointSetsIterator _start,
01472 _MySetOfPointSetsIterator _end);
01473
01474
01475 MySetOfPointSetsIterator(_MySetOfPointSetsIterator _start,
01476 _MySetOfPointSetsIterator _end,
01477 vector< MyPointSet > &_pointSets)
01478 : start(_start), end(_end), current(_start), pointSets(_pointSets)
01479 {}
01480
01481 virtual bool hasNext()
01482 {
01483 return(current != end);
01484 }
01485
01486 MyPointSet &next()
01487 {
01488 MyPointSet &_next = *current;
01489 current++;
01490 return(_next);
01491 }
01492 };
01493
01494
01495
01496 #endif //POINT_H