00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023 #include<string>
00024 #include<iostream>
00025 #include<fstream>
00026 #include<vector>
00027 #include<map>
00028 #include<sstream>
00029 #include<cstdlib>
00030 #include<ctime>
00031 #include<set>
00032 #include<cmath>
00033 #include<cfloat>
00034 #include<algorithm>
00035
00036
00037
00038 #include "enrichment.h"
00039
00040 #include "ltqnorm.h"
00041
00042
00043 #include "motifDataset.h"
00044 #include "itemset.h"
00045
00046 using namespace std;
00047
00048 #ifndef XMOTIFDATASETGREG
00049 #define XMOTIFDATASETGREG
00050
00051 enum XmotifDiscriminantMethod { DISCRIMINANT_ALL_SAMPLES = 1, DISCRIMINANT_ONE_CLASS_SAMPLES,
00052
00053 DISCRIMINANT_ONE_CLASS_SAMPLES_WEIGHTED };
00054
00055 enum XmotifSampleExpansionMethod { ADD_SAMPLE_MATCH_GENE_RANGE = 1, ADD_SAMPLE_MATCH_GENE_RANGE_ONE_CLASS,
00056 ADD_SAMPLE_SEARCH, ADD_SAMPLE_SEARCH_ONE_CLASS,
00057 ADD_SAMPLE_SEARCH_ONLY_ADD, ADD_SAMPLE_SEARCH_ONLY_ADD_ONE_CLASS,
00058 ADD_SAMPLE_NONE};
00059
00060 enum XmotifGeneSelectionMethod { GENE_SELECTION_MAX = 1, GENE_SELECTION_STOUFFER,
00061
00062
00063
00064
00065
00066
00067
00068 GENE_SELECTION_TRIVIAL};
00069
00070 enum XmotifPValueMethod {PVALUE_RANK, PVALUE_TTEST};
00071
00073 struct XMotifMethods
00074 {
00075 private:
00076 XmotifDiscriminantMethod _discSelectionMethod;
00077 XmotifGeneSelectionMethod _geneSelectionMethod;
00078 XmotifSampleExpansionMethod _sampleExpansionMethod;
00079 XmotifPValueMethod _pValueMethod;
00080 public:
00082 XMotifMethods(XmotifDiscriminantMethod _dsm, XmotifGeneSelectionMethod _gsm,
00083 XmotifSampleExpansionMethod _asm, XmotifPValueMethod _pvm)
00084 : _discSelectionMethod(_dsm), _geneSelectionMethod(_gsm),
00085 _sampleExpansionMethod(_asm), _pValueMethod(_pvm)
00086 {}
00087
00089 virtual ~XMotifMethods()
00090 {}
00091
00093 XmotifDiscriminantMethod getDiscriminantSelectionMethod() const
00094 {
00095 return(_discSelectionMethod);
00096 }
00097 XmotifSampleExpansionMethod getSampleExpansionMethod() const
00098 {
00099 return(_sampleExpansionMethod);
00100 }
00101 XmotifGeneSelectionMethod getGeneSelectionMethod() const
00102 {
00103 return(_geneSelectionMethod);
00104 }
00105 XmotifPValueMethod getPValueMethod() const
00106 {
00107 return(_pValueMethod);
00108 }
00109 };
00110
00111
00112
00116 class xmotif
00117 {
00118 private:
00119 XMotifMethods methods;
00120
00121 struct _ProbeInterval
00122 {
00123 private:
00124 int probeIndex;
00125 double low;
00126 double high;
00127 double pval;
00128 double zscore;
00129
00130 public:
00131 _ProbeInterval(int i, double l, double h, double p)
00132 : probeIndex(i), low(l), high(h), pval(p)
00133 {
00134 zscore = ltqnorm(1 - p);
00135 }
00136
00137 double getHigh() const
00138 {
00139 return(high);
00140 }
00141 double getLow() const
00142 {
00143 return(low);
00144 }
00145 int getProbeIndex() const
00146 {
00147 return(probeIndex);
00148 }
00149
00150 double getPValue() const
00151 {
00152 return(pval);
00153 }
00154
00155 double getZScore() const
00156 {
00157 return(zscore);
00158 }
00159
00160
00161 };
00162
00163 struct compareProbeIntervals
00164 : public binary_function< const xmotif::_ProbeInterval&, const xmotif::_ProbeInterval&, bool>
00165 {
00166 bool operator()(const _ProbeInterval& __x, const _ProbeInterval& __y) const
00167 {
00168 return(__x.getZScore() > __y.getZScore());
00169 }
00170 };
00171
00173 double pCutoff;
00174
00175 vector<vector<int> > prevVal, nextVal, prevCountVal, nextCountVal;
00176 vector<int> numData;
00177
00178 vector<vector<vector<double> > > countTables;
00179 double countPVal(int,int,int);
00180 double countPVal2(int,int,int);
00181
00182
00183 double pValueTTest(const set<int> &discriminatingSet,
00184 const set<int> &otherSet, int probeIndex);
00185
00186 void calculateCountTables();
00187 void calculateFactTables();
00188 void calculateWeightsTables();
00189 vector<double> logFactTable;
00190 double logProbeSize;
00191
00192
00193 ItemsetRange computeMatchingProbes(const set<int> &discriminatingSet,
00194 const set<int> &otherSet);
00195
00196
00197 ItemsetRange computeMatchingProbesKS(set<int>& D);
00198
00199
00200
00201
00202
00203 ItemsetRange computeMatchingProbesCount(const set<int> &discriminatingSet,
00204 const set<int> &otherSet);
00205
00206
00207
00208
00209
00210 ItemsetRange computeMatchingProbesStouffersZ(
00211 const set<int>& discriminatingSet, const set<int>& otherSet);
00212
00213
00214
00215
00216
00217 ItemsetRange computeMatchingProbesTrivial(set<int>& D, set< int > &probes);
00218
00219 bool relaxRange;
00220
00221
00222
00223
00224
00225
00226
00227
00228
00229 bool computeRangeInDiscriminant(int j, const set<int> &D,
00230 int &minIndex, int &maxIndex,
00231 double &minExpression, double &maxExpression,
00232 int &numPoints, int &totalCount);
00233
00234
00235 set<int> randomSubset(set<int>&,int);
00236 set<int> randomSubset(vector<int>, int);
00237 set<int> randomSubset(vector<int>);
00238 set<int> randomSubset(vector<int>, vector<double>, double);
00239 set<int> randomSubset(int,int);
00240 set<int> randomSubset(int);
00241 int randInt(int,int);
00242 double randDouble(double,double);
00243
00244
00245
00246
00247 void getDiscriminatingSet(int discriminantMethod, int &chosenClass,
00248 set<int> &discriminatingSet, set<int> &otherSet);
00249
00250 map<set<int>,bool> alreadyFoundDiscriminant;
00251 vector<vector<double> > classWeights,nextWeights;
00252 vector<double> normalize(vector<double>);
00253 vector<double> updateWeights(vector<double>,set<int>);
00254
00255 bool expandSamples(set<int> &discriminatingSet, set<int> &otherSet,
00256 ItemsetRange &, int);
00257 bool expandSamplesMatchRange(set<int> &, ItemsetRange &, int, bool);
00258 bool expandSamplesGradientDescent(set<int> &discriminatingSet,
00259 set<int> &otherSet, ItemsetRange &, int, bool);
00260 bool expandSamplesGradientDescentKeepOriginal(set<int> &, set<int> &,
00261 ItemsetRange &, int, bool);
00262 itemset buildItemset(set<int>&, ItemsetRange&, int);
00263
00265 motifDataset* myData;
00266 set<itemset> motifs;
00267
00268 bool _motifsRead;
00269
00270 public:
00277 xmotif(motifDataset* in, const XMotifMethods& methods);
00278
00281 void changePval(double);
00282
00289 void computeMotifs(int ns, void (*processFunc)(const itemset&));
00290
00291
00293 bool getMotifsRead() const
00294 {
00295 return(_motifsRead);
00296 }
00297
00301 void printMotifs(ostream* out);
00302
00304 void readMotifs(string infile);
00305
00309 set<itemset> getMotifs();
00310 };
00311
00312 #endif