Biorithm  1.1
 All Classes Functions Variables Typedefs Friends
classifier.h
00001 /**************************************************************************
00002  * Copyright (c) 2005-2011 T. M. Murali                                   *
00003  * Copyright (c) 2008-2011 Naveed Massjouni                               *
00004  * Copyright (c) 2004 Greg Grothaus                                       *
00005  *                                                                        *
00006  * This file is part of Biorithm.                                         *
00007  *                                                                        *
00008  * Biorithm is free software: you can redistribute it and/or modify       *
00009  * it under the terms of the GNU General Public License as published by   *
00010  * the Free Software Foundation, either version 3 of the License, or      *
00011  * (at your option) any later version.                                    *
00012  *                                                                        *
00013  * Biorithm is distributed in the hope that it will be useful,            *
00014  * but WITHOUT ANY WARRANTY; without even the implied warranty of         *
00015  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          *
00016  * GNU General Public License for more details.                           *
00017  *                                                                        *
00018  * You should have received a copy of the GNU General Public License      *
00019  * along with Biorithm.  If not, see <http://www.gnu.org/licenses/>.      *
00020  *                                                                        *
00021  **************************************************************************/
00022 
00023 #include<set>
00024 #include<string>
00025 #include<iostream>
00026 #include<algorithm>
00027 #include<vector>
00028 #include<cmath>
00029 
00030 #include "motifDataset.h"
00031 #include "setTemplates.C"
00032 #include "readfile.h"
00033 #include "xmotif.h"
00034 #include "greedySetCover.h"
00035 
00036 using namespace std;
00037 
00038 #ifndef CLASSIFIERGREG
00039 #define CLASSIFIERGREG
00040 
00041 class classifier
00042 {
00043 private:
00044   static const double BIG_DBL;
00045   set<itemset> _classifierMotifs;
00046   set<itemset> _unprunedMotifs;
00047   void pruneItemsets(motifDataset*);
00048   double cosine(vector<double> a,vector<double> b);
00049   double distnce(vector<double> &a,vector<double> &b);
00050   vector<double> normalize(vector<double> in);
00051   double randDouble(double min,double max);
00052   double zprob(int dimensions,double dist);
00053   map<int,double> dimmean,dimdev;
00054   vector<string> probes,samples;
00055   void normalizeValues(vector<double>& values);
00056   double multivariatePValue(const vector<double> testData,
00057     const vector<double>& means, const vector<double>& stdDevs, int totalRows);
00058   double stouffersZ(const vector<double> testData,
00059     const vector<double>& means, const vector<double>& stdDevs, int totalRows);
00060                 
00061 public:
00062   
00063   void processItemset(const itemset &);
00064   classifier();
00065   void train(double pVal, motifDataset*,int, XMotifMethods& methods,
00066              void (*)(const itemset&) = NULL, string xmotifFile = NULL);
00067   void print(ostream*);
00068   void load(string,motifDataset*);
00069   void save(string filename);
00070   classifier(string);
00071   vector<string> classify(motifDataset*);
00072   vector<string> classifyUsingUnitHypercube(motifDataset*);
00073   vector<string> classifyUsingMultivariateNormal(motifDataset*);
00074   vector<string> classifyUsingStouffers(motifDataset*);
00075   set<itemset> getMotifs();
00076 };
00077 #endif
 All Classes Functions Variables Typedefs Friends