Biorithm  1.1
 All Classes Functions Variables Typedefs Friends
motifDataset.h
00001 /**************************************************************************
00002  * Copyright (c) 2005-2011 T. M. Murali                                   *
00003  * Copyright (c) 2008-2011 Naveed Massjouni                               *
00004  * Copyright (c) 2004 Greg Grothaus                                       *
00005  *                                                                        *
00006  * This file is part of Biorithm.                                         *
00007  *                                                                        *
00008  * Biorithm is free software: you can redistribute it and/or modify       *
00009  * it under the terms of the GNU General Public License as published by   *
00010  * the Free Software Foundation, either version 3 of the License, or      *
00011  * (at your option) any later version.                                    *
00012  *                                                                        *
00013  * Biorithm is distributed in the hope that it will be useful,            *
00014  * but WITHOUT ANY WARRANTY; without even the implied warranty of         *
00015  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          *
00016  * GNU General Public License for more details.                           *
00017  *                                                                        *
00018  * You should have received a copy of the GNU General Public License      *
00019  * along with Biorithm.  If not, see <http://www.gnu.org/licenses/>.      *
00020  *                                                                        *
00021  **************************************************************************/
00022 
00023 #ifndef MOTIFDATASETGREG
00024 #define MOTIFDATASETGREG
00025 
00026 #include<iostream>
00027 #include<map>
00028 #include<set>
00029 #include<string>
00030 #include<vector>
00031 
00032 #include "setTemplates.C"
00033 using namespace std;
00034 
00037 class ProbeInfo
00038 {
00039 private:
00040   string _probeId;
00041   string _symbol;
00042   string _description;
00043 public:
00044   ProbeInfo()
00045     {}
00046   
00047   ProbeInfo(string i, string s, string d)
00048       : _probeId(i), _symbol(s), _description(d)
00049     {
00050       if ("" == _probeId)
00051         {
00052           cerr << "Probe info cannot have an empty string as the probe ID." << endl;
00053           exit(-1);
00054         }
00055     }
00056   virtual ~ProbeInfo()
00057     {}
00058 
00059   string getDescription() const
00060     {
00061       return(_description);
00062     }
00063 
00064   string getSymbol() const
00065     {
00066       return(_symbol);
00067     }
00068 
00069   string getURL(string stub) const
00070     {
00071       if ("" != _symbol)
00072         return(stub + _symbol);
00073       return(stub + _probeId);
00074     }
00075 };
00076   
00077 
00078 
00079 class motifDataset
00080 {
00081 public:
00082   virtual vector<vector<int> >* classToExperiments() = 0;
00083   virtual vector<vector<int> >* experimentsToClass() = 0;
00084 //              virtual vector<string>* classes() const = 0;
00085 
00088   virtual string computeClassNameForSamples(const set< int > &columnSet, double frac = 1) const = 0;
00089   
00092   virtual int computeClassIndexForSamples(const set< int > &columnSet, double frac = 1) const = 0;
00093   
00095   virtual string getClassNameForSample(unsigned int index) const = 0;
00096 
00098   virtual unsigned int getClassIndexForSample(unsigned int index) const = 0;
00099   
00101   virtual string getClassName(unsigned int index) const = 0;
00102 
00104   virtual unsigned int getClassSize(unsigned int index) const = 0;
00105 
00107   virtual unsigned int getNumClasses() const = 0;  
00108   
00109   virtual vector<string>* probes() = 0;
00110   virtual vector<string>* samples() = 0;
00111   virtual double operator()(int,int) = 0;
00112   virtual double operator()(string,string) = 0;
00113 
00114   virtual bool exists(int,int) = 0;
00115   virtual bool exists(string,string) = 0;
00116 
00119   virtual int getColumnIndex(string name) const = 0;
00120 
00124   virtual void getRowAliases(string name, set< string >& aliases) const;
00125 
00129   virtual void getRowAliases(const set< string > & names, set< string >& aliases)
00130     const;
00131 
00133   virtual void getRowsForAlias(string otherName, set< string > &rows) const;
00134   
00137   virtual int getRowIndex(string name) const = 0;
00138 
00140   virtual string getRowName(unsigned int index) const = 0;
00141 
00143   virtual bool isAlias(string probe, string alias) const;
00144 
00148   virtual bool isProbe(string id) const = 0;
00149 
00157   virtual void readProbeAliases(string aliasFile) = 0;
00158 
00162   bool getProbeInfo(string probeId, ProbeInfo &info) const;
00163   
00164   
00175   virtual void readProbeInfo(string infoFile) = 0;
00176 
00177   virtual string getProbeURL() const
00178     {
00179       return(_probeURL);
00180     }
00181   
00182   virtual void setProbeURL(string url)
00183     {
00184       _probeURL = url;
00185     }
00186 
00187 
00188 protected:
00189 
00190   // probe aliases.
00191   map< string, set < string > > _probesToAliases;
00192   map< string, set < string > > _aliasesToProbes;
00193 
00194   // probe info
00195   map< string, ProbeInfo > _probesToInfo;
00196 
00197   string _probeURL;
00198   
00199   void _addAlias(string probe, string alias)
00200     {
00201       _probesToAliases[probe].insert(alias);
00202       _aliasesToProbes[alias].insert(probe);
00203     }
00204 
00205   void _addInfo(string probe, string symbol, string name)
00206     {
00207       _probesToInfo[probe] = ProbeInfo(probe, symbol, name);
00208     }
00209 
00210   
00211 };
00212 
00213 
00214 
00215 
00216 #endif
 All Classes Functions Variables Typedefs Friends