Biorithm  1.1
 All Classes Functions Variables Typedefs Friends
itemset.h
00001 /**************************************************************************
00002  * Copyright (c) 2005-2011 T. M. Murali                                   *
00003  * Copyright (c) 2008-2011 Naveed Massjouni                               *
00004  * Copyright (c) 2004 Greg Grothaus                                       *
00005  *                                                                        *
00006  * This file is part of Biorithm.                                         *
00007  *                                                                        *
00008  * Biorithm is free software: you can redistribute it and/or modify       *
00009  * it under the terms of the GNU General Public License as published by   *
00010  * the Free Software Foundation, either version 3 of the License, or      *
00011  * (at your option) any later version.                                    *
00012  *                                                                        *
00013  * Biorithm is distributed in the hope that it will be useful,            *
00014  * but WITHOUT ANY WARRANTY; without even the implied warranty of         *
00015  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          *
00016  * GNU General Public License for more details.                           *
00017  *                                                                        *
00018  * You should have received a copy of the GNU General Public License      *
00019  * along with Biorithm.  If not, see <http://www.gnu.org/licenses/>.      *
00020  *                                                                        *
00021  **************************************************************************/
00022 
00023 #include<map>
00024 #include<vector>
00025 #include<string>
00026 #include<iostream>
00027 #include<sstream>
00028 
00029 #include "motifDataset.h"
00030 using namespace std;
00031 
00032 #ifndef ITEMSET
00033 #define ITEMSET
00034 
00035 struct ItemsetRange
00036 {
00037   set<int> probes;
00038   map<int,double> high;
00039   map<int,double>       low;
00040   double pval;
00041 };
00042 
00043 class itemset
00044 {
00045   friend class xmotif;
00046   friend class greedySetCover;
00047   friend class classifier;
00048   friend class itemsetEnrichment;
00049 
00050 private:
00051   
00052   int id; // Used to identify this itemset.
00053 
00054   //stores the indices of columns and rows
00055   //so we dont have to work with strings
00056   vector<int> columns;
00057   vector<int> rows;
00058   
00059   vector<vector<double> > rowValues;
00060   // an itemset keeps track of whether it is in the classifier or not. 
00061   // this variable is useful when processing enrichments and building
00062   // the navigation HTML file.
00063   bool _inClassifier;
00064 
00065   bool _getInClassifier() const
00066     {
00067       return(_inClassifier);
00068     }
00069   void _setInClassifier()
00070     {
00071       _inClassifier = true;
00072     }
00073   void _unsetInClassifier()
00074     {
00075       _inClassifier = false;
00076     }
00077   
00078   const bool consumes(const itemset&) const;
00079   string mClass;
00080   vector<double> _mean;
00081   vector<double> _stddev;
00082   double pOfZ(double) const;
00083   int requiredIterations;
00084   double pval;
00085 
00086 
00087   //converts a T to a string
00088   template <class T> string toa(T in) const
00089     {
00090       stringstream ss;
00091       ss<<in;
00092       string out;
00093       ss>>out;
00094       return out;
00095     }
00096   
00097   
00098   
00099 public:
00100   
00102   itemset()
00103     {
00104       _inClassifier = false;
00105           id = -1;
00106     }
00107 
00109   //  itemset(const itemset &itm);
00110   
00116   itemset(string line, const motifDataset* data);
00117 
00119   virtual ~itemset()
00120     {}
00121 
00124   void build(set<int> &D, ItemsetRange &G, motifDataset *dataset,
00125              int chosenClass);
00126 
00127   int size() const;
00128   bool operator<(const itemset &a) const;
00129   bool operator==(const itemset &a) const;
00130 
00132   string getClass() const {return mClass; };
00133 
00136   void getColumnSet(set< int > &columnSet) const;
00137   
00139   void getRowSet(set< int > &rowSet) const;
00140 
00142   void getRowNames(const motifDataset *data, set<string>& itemsetRows) const;
00143   
00144 
00145   //prints the itemset in Murali's standard format to standard out
00146   void print(const int itemsetnum,vector<string> probes,vector<string> samples, ostream*) const;        
00147   
00148 
00149   void save(ostream *out) const;
00150   void load(istream *in);
00151   
00152   vector<double> classify(motifDataset*) const;
00153   vector<double> classifyMedian(motifDataset*) const;
00154 
00155   int getId() const;
00156   void setId(int id);
00157 };
00158 
00159 #endif
 All Classes Functions Variables Typedefs Friends