00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023 #ifndef _PARAMS_H
00024 #define _PARAMS_H
00025
00026 #include <math.h>
00027 #include <fstream>
00028 #include <map>
00029 #include <set>
00030
00031 #include "format.h"
00032 #include "global.h"
00033
00034
00035
00036
00037
00038
00039
00040 class gengetopt_args_info;
00041
00042 enum MyDataType {aff, cog, mic};
00043 enum MyVerbosityLevel {verbosityZero, verbosityOne, verbosityTwo, verbosityThree, verbosityFour};
00044 const MyVerbosityLevel MY_VERBOSITY_LEVEL_TABLE[] =
00045 {verbosityZero, verbosityOne, verbosityTwo, verbosityThree, verbosityFour};
00046
00047 const int MAX_VERBOSITY_LEVEL = 4;
00048
00049
00050 const int MAX_LINE_LENGTH = 1024;
00051 const int DEFAULT_NUM_SEEDS = -1;
00052 const int DEFAULT_SIZE_DISC = -1;
00053 const int DEFAULT_NUM_DISCS = -1;
00054 const int DEFAULT_MAX_COUNT = -1;
00055
00056
00057 class MyCluster;
00058 class MyPointSet;
00059
00060
00061
00062
00063
00064 struct MyClusterParams
00065 {
00066 friend class MyCluster;
00067
00068
00069 public:
00070 MyClusterParams();
00071
00072
00073 MyNT getMinimumSupport() const
00074 {
00075 return(minimumSupport);
00076 }
00077
00078 bool getAnneal() const
00079 {
00080 return(anneal);
00081 }
00082
00083
00084 bool getApriori() const
00085 {
00086 return(runAprioriAlgo);
00087 }
00088
00089
00090 MyNT getBeta() const
00091 {
00092 return(beta);
00093 }
00094
00095 string getClassFileName() const
00096 {
00097 return(classFile);
00098 }
00099
00100 string getCommandLine() const
00101 {
00102 return(commandLine);
00103 }
00104
00105 void setCommandLine(int argc, char **argv)
00106 {
00107 for (int i = 0; i < argc; i++)
00108 {
00109 commandLine += argv[i];
00110 commandLine += " ";
00111 }
00112 }
00113
00114
00115 string getConfigFileName() const
00116 {
00117 return(configFile);
00118 }
00119 void readConfigFile();
00120
00121
00122 bool getComputeCorrelations() const
00123 {
00124 return(computeCorrelations);
00125 }
00126 MyNT getCorrelationThreshold() const
00127 {
00128 return(correlationThreshold);
00129 }
00130
00131
00132
00133 MyDataType getDataType() const
00134 {
00135 return(dataType);
00136 }
00137 char getDelimiter() const
00138 {
00139 return(delimiter);
00140 }
00141
00142
00143
00144 MyAffyFileFormat getFileFormat(unsigned int which) const
00145 {
00146 return(fileFormats[which]);
00147 }
00148
00149
00150
00151
00152
00153
00154 bool getFlip() const
00155 {
00156 return(flipPointsAndCoords);
00157 }
00158
00159
00160 bool getUseGaussian() const
00161 {
00162 return(useGaussian);
00163 }
00164
00165
00166
00167 bool getGreedy() const
00168 {
00169 return(findClustersGreedily);
00170 }
00171
00172
00173 bool getGenerateData() const
00174 {
00175 return(generateData);
00176 }
00177
00178
00179 string getGenesFileName() const
00180 {
00181 return(genesFile);
00182 }
00183
00184
00185 string getGeneIdColumn(unsigned int whichPointSet) const
00186 {
00187 return(geneIdColumns[whichPointSet]);
00188 }
00189
00190
00191
00192 string getGeneNameColumn(unsigned int whichPointSet) const
00193 {
00194 return(geneNameColumns[whichPointSet]);
00195 }
00196
00197
00198
00199 string getInputFileName(int index = 0) const
00200 {
00201 return(inputFiles[index]);
00202
00203 }
00204
00205
00206 bool getComputeItemsets(unsigned int which = 0) const
00207 {
00208
00209 return((0 != computeItemsets[which]));
00210
00211 }
00212
00213 int getNumLimitSets() const
00214 {
00215 return(limitFiles.size());
00216 }
00217
00218
00219 string getLimitFileName(unsigned int which = 0) const
00220 {
00221 return(limitFiles[which]);
00222 }
00223
00224
00225
00226 MyNT getLowerBoundForFiltering() const
00227 {
00228 return(lowerBound);
00229 }
00230 bool getLowerBoundGiven() const
00231 {
00232 return(lowerBoundGiven);
00233 }
00234
00235
00236
00237 bool getUseLogarithm() const
00238 {
00239 return(useLogarithm);
00240 }
00241
00242
00243
00244 bool getSaveMemory() const
00245 {
00246 return(saveMemory);
00247 }
00248
00249
00250 int getMaxCount() const
00251 {
00252 return(maxCount);
00253
00254 }
00255 void setMaxCount(int m)
00256 {
00257 maxCount = m;
00258 }
00259
00260
00261 MyNT getMaximumDownRegulated() const
00262 {
00263 return(maxDownRegulatedValue);
00264 }
00265 bool maximumDownRegulatedGiven() const
00266 {
00267 return(maxDownRegulatedGiven);
00268 }
00269
00270
00271 MyNT getMinimumUpRegulated() const
00272 {
00273 return(minUpRegulatedValue);
00274 }
00275 bool minimumUpRegulatedGiven() const
00276 {
00277 return(minUpRegulatedGiven);
00278 }
00279
00280
00281
00282 bool getMinimiseClusterSize() const
00283 {
00284 return(minimiseClusterSize);
00285 }
00286
00287
00288 MyNT getMinimumHomogeneity() const
00289 {
00290 return(minimumClusterHomogeneity);
00291 }
00292 void setMinimumHomogeneity(MyNT q)
00293 {
00294 minimumClusterHomogeneity = q;
00295 }
00296
00297
00298 int getNumNames() const
00299 {
00300 return(numNames);
00301 }
00302
00303
00304 int getNumPointsToRead() const
00305 {
00306 return(numPointsToRead);
00307 }
00308
00309
00310 int getNumBestClusters() const
00311 {
00312 return(numBestClusters);
00313 }
00314
00315
00316 int getNumClustersToCompute() const
00317 {
00318 return(numClusters);
00319 }
00320
00321
00322 int getNumDiscs(unsigned int index = 0) const
00323 {
00324 return(numDiscs[index]);
00325 }
00326
00327 unsigned int getNumPointSets() const
00328 {
00329 unsigned int retval;
00330
00331 retval = numPointSets;
00332 retval = inputFiles.size();
00333 cout << "\tMyClusterParams::getNumPointSets(): Returning "
00334 << retval << endl;
00335
00336
00337 return(retval);
00338 }
00339
00340
00341 int getNumSeeds(unsigned int index = 0) const
00342 {
00343 return(numSeeds[index]);
00344 }
00345
00346
00347 string getOutputFileName() const
00348 {
00349 return(outputFile);
00350 }
00351
00352
00353 string getOutputInternalValuesFileName() const
00354 {
00355 return(outputInternalValuesFile);
00356 }
00357
00358
00359
00360
00361 bool getPartition() const
00362 {
00363 return(partitionPoints);
00364 }
00365
00366
00367 int getMaxPointCount() const
00368 {
00369 return(maxPointCount);
00370 }
00371
00372
00373 MyNT getMaxPValue() const
00374 {
00375 return(maxPValue);
00376 }
00377
00378
00379 string getRepeatFileName() const
00380 {
00381 return(repeatFile);
00382 }
00383
00384
00385 string getWidthRangeFileName() const
00386 {
00387 return(widthRangeFile);
00388 }
00389
00390
00391 string getRowNameColumn() const
00392 {
00393 return(rowNameColumn);
00394 }
00395
00396
00397
00398 bool getUseSignificantIntervals(unsigned int which = 0) const
00399 {
00400 return(useSignificantIntervals[which]);
00401 }
00402 void setUseSignificantIntervals(unsigned int which = 0, bool val = true)
00403 {
00404 useSignificantIntervals[which] = val;
00405 }
00406
00407
00408
00409 bool getComputeStatistics() const
00410 {
00411 return(computeStatistics);
00412 }
00413
00414
00415
00416
00417
00418
00419 int getSizeDisc(unsigned int index = 0) const
00420 {
00421 return(sizeDiscs[index]);
00422
00423 }
00424
00425 void setSizeDisc(int sd, unsigned int index = 0)
00426 {
00427 sizeDiscs[index] = sd;
00428
00429 }
00430
00431
00432 MyNT getSimilarityThreshold() const
00433 {
00434 return(clusterSimilarityThreshold);
00435 }
00436
00437
00438 bool getShamir() const
00439 {
00440 return(runShamirAlgo);
00441 }
00442
00443 void set(gengetopt_args_info &cl, unsigned int whichPointSet = 0,
00444
00445
00446
00447 bool update = false);
00448
00449
00450
00451 bool getUseUniform() const
00452 {
00453 return(useUniform);
00454 }
00455
00456
00457 MyNT getUpperBoundForFiltering() const
00458 {
00459 return(upperBound);
00460 }
00461 bool getUpperBoundGiven() const
00462 {
00463 return(upperBoundGiven);
00464 }
00465
00466
00467 MyVerbosityLevel getVerbosity() const
00468 {
00469 return(verbose);
00470 }
00471
00472
00473 MyNT getWidth() const
00474 {
00475 return(width);
00476 }
00477
00478
00479 string getWidthFileName() const
00480 {
00481 return(widthFile);
00482 }
00483
00484
00485 void print(ostream& ostr) const;
00486
00487
00488 void set(int numPoints, int numDimensions, unsigned int index = 0)
00489 {
00490 setNumSeeds(numPoints, numDimensions, index);
00491
00492 setSizeDisc(numPoints, numDimensions, index);
00493 setNumDiscs(numPoints, numDimensions, index);
00494
00495
00496
00497 if (DEFAULT_MAX_COUNT == maxCount)
00498 maxCount = numPoints;
00499
00500 }
00501
00502
00503 private:
00504 void setDataType(const string& type)
00505 {
00506 if (("a" == type) || ("aff" == type) || ("affy" == type) || ("affymetrix" == type))
00507 {
00508 dataType = aff;
00509 delimiter = '\t';
00510 numNames = 2;
00511 }
00512 else if (("c" == type) || ("cog" == type))
00513 {
00514 dataType = cog;
00515 delimiter = ' ';
00516 }
00517 else if (("m" == type) || ("mic" == type) || ("microarray" == type))
00518 {
00519 dataType = mic;
00520 delimiter = ' ';
00521 numNames = 1;
00522 }
00523 else
00524 cerr << "ERROR! Unknown data type " << type << ". Exiting.\n";
00525 }
00526
00527 void setNumSeeds(unsigned int numPoints,
00528 unsigned int numDimensions, unsigned int index = 0)
00529 {
00530
00531 if (numSeeds[index] > numPoints)
00532 numSeeds[index] = numPoints;
00533 }
00534
00535 void setSizeDisc(unsigned int numPoints, unsigned int numDimensions,
00536 unsigned int index = 0)
00537 {
00538 if (DEFAULT_SIZE_DISC == sizeDiscs[index])
00539
00540
00541
00542 sizeDiscs[index] = static_cast<int>(10*log(numDimensions/(1 - probDisc))/log(1/(2*beta)));
00543 if (sizeDiscs[index] > numPoints)
00544
00545
00546
00547
00548
00549 sizeDiscs[index] = numPoints/2;
00550 }
00551
00552 void setNumDiscs(unsigned int numPoints, unsigned int numDimensions,
00553 unsigned int index)
00554 {
00555 if (DEFAULT_NUM_DISCS == numDiscs[index])
00556
00557
00558
00559
00560
00561 numDiscs[index] = static_cast<int>(log(1 - probCluster)/log(1 - pow((double)minimumSupport, sizeDiscs[index]/10.0)));
00562 }
00563
00564 void processRepeatFile(ifstream& rstr, int& localArgc, char **&localArgv);
00565
00566 private:
00567
00568
00569
00570
00571 MyNT minimumSupport;
00572
00573 bool anneal;
00574
00575
00576 bool runAprioriAlgo;
00577
00578
00579 MyNT beta;
00580
00581
00582 string classFile;
00583
00584 string commandLine;
00585
00586
00587 string configFile;
00588 map< string, string > configInfo;
00589
00590
00591 bool computeCorrelations;
00592 MyNT correlationThreshold;
00593
00594
00595
00596 MyDataType dataType;
00597
00598
00599 char delimiter;
00600
00601
00602 vector< MyAffyFileFormat > fileFormats;
00603
00604
00605 bool flipPointsAndCoords;
00606
00607
00608 bool useGaussian;
00609
00610
00611 bool findClustersGreedily;
00612
00613
00614 bool generateData;
00615
00616
00617 string genesFile;
00618
00619
00620
00621 vector< string > geneIdColumns;
00622
00623
00624
00625 vector< string > geneNameColumns;
00626
00627
00628
00629 string inputFile;
00630 vector< string > inputFiles;
00631
00632
00633
00634
00635 vector< unsigned int > computeItemsets;
00636
00637
00638
00639
00640
00641
00642
00643 map< string, bool > ignoredColumns;
00644
00645
00646
00647
00648 map< string, bool > ignoredRows;
00649
00650
00651
00652 vector< string > limitFiles;
00653
00654
00655 MyNT lowerBound;
00656 bool lowerBoundGiven;
00657
00658
00659 bool useLogarithm;
00660
00661
00662 bool saveMemory;
00663
00664
00665 int maxCount;
00666
00667
00668 bool minimiseClusterSize;
00669
00670
00671 MyNT maxDownRegulatedValue;
00672 bool maxDownRegulatedGiven;
00673
00674
00675 MyNT minUpRegulatedValue;
00676 bool minUpRegulatedGiven;
00677
00678
00679 MyNT minimumClusterHomogeneity;
00680
00681
00682 int numNames;
00683
00684
00685 int numPointsToRead;
00686
00687
00688
00689
00690 int numBestClusters;
00691
00692
00693 int numClusters;
00694
00695
00696 vector< unsigned int > numDiscs;
00697
00698
00699
00700 unsigned int numPointSets;
00701
00702
00703
00704 vector< unsigned int > numSeeds;
00705
00706
00707 string outputFile;
00708
00709
00710
00711 bool outputInternalValues;
00712 string outputInternalValuesFile;
00713
00714
00715 bool partitionPoints;
00716
00717
00718
00719
00720
00721
00722 int maxPointCount;
00723
00724
00725
00726 MyNT probCluster;
00727
00728
00729 MyNT probDisc;
00730
00731
00732 MyNT probSeed;
00733
00734
00735 MyNT maxPValue;
00736
00737
00738
00739 bool repeatExperiment;
00740 string repeatFile;
00741
00742
00743
00744 string widthRangeFile;
00745
00746
00747 string rowNameColumn;
00748
00749
00750
00751
00752
00753
00754
00755
00756 vector< unsigned int > useSignificantIntervals;
00757
00758
00759 bool computeStatistics;
00760
00761
00762
00763
00764 vector< unsigned int > sizeDiscs;
00765
00766
00767 bool runShamirAlgo;
00768
00769
00770
00771 MyNT clusterSimilarityThreshold;
00772
00773
00774 MyNT upperBound;
00775 bool upperBoundGiven;
00776
00777
00778
00779 bool useUniform;
00780
00781
00782 MyVerbosityLevel verbose;
00783
00784
00785 MyNT width;
00786
00787 string widthFile;
00788
00789 };
00790
00791
00792 #endif // _PARAMS_H