Biorithm  1.1
 All Classes Functions Variables Typedefs Friends
itemset.h
00001 /**************************************************************************
00002  * Copyright (c) 2004-2011 T. M. Murali                                   *
00003  * Copyright (c) 2004 Greg Grothaus                                       *
00004  *                                                                        *
00005  * This file is part of Biorithm.                                         *
00006  *                                                                        *
00007  * Biorithm is free software: you can redistribute it and/or modify       *
00008  * it under the terms of the GNU General Public License as published by   *
00009  * the Free Software Foundation, either version 3 of the License, or      *
00010  * (at your option) any later version.                                    *
00011  *                                                                        *
00012  * Biorithm is distributed in the hope that it will be useful,            *
00013  * but WITHOUT ANY WARRANTY; without even the implied warranty of         *
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          *
00015  * GNU General Public License for more details.                           *
00016  *                                                                        *
00017  * You should have received a copy of the GNU General Public License      *
00018  * along with Biorithm.  If not, see <http://www.gnu.org/licenses/>.      *
00019  *                                                                        *
00020  **************************************************************************/
00021 
00028 #include<iostream>
00029 #include<sstream>
00030 #include<set>
00031 #include<string>
00032 #include<vector>
00033 
00034 using namespace std;
00035 
00036 #ifndef ITEMSET
00037 #define ITEMSET
00038 
00039 typedef string ItemsetId;
00040 
00041 class Itemset
00042 {
00043   friend class Apriori;
00044   friend class AprioriWithComplement;
00045 
00046 private:
00047 
00048   ItemsetId _id;
00049   //stores the indices of columns and rows
00050   //so we dont have to work with strings
00051   vector<unsigned int> columns;
00052 //   vector<unsigned int> rows;
00053 
00054   // i need this variable when i don't store columns explicitly.
00055   unsigned int _numColumns;
00056   
00057   // the set of rows and columns in the itemset
00058   set< unsigned int > _columnSet;
00059   set< unsigned int > _rowSet;
00060 
00061   // HACKHACKHACK! ideally, i should subclass Itemset to store complemented rows but the 2007 RECOMB deadline is just 5 days away!
00062   set< unsigned int > _complementedRowSet;
00063   
00064   
00065   //this stores the publically available column and row
00066   //strings after processing
00067   vector<string> columnStrings;
00068   vector<string> rowStrings;
00069 
00070   // the pvalue of the #columns amongst all itemsets with the same #rows.
00071   float _columnPValue;
00072   // the pvalue of the #rows amongst all itemsets with the same #columns.
00073   float _rowPValue;
00074   // the pvalue of the size of the itemset.
00075   float _sizePValue;
00076 
00077   
00078   void transpose();
00079 
00080   static unsigned int _currentId;
00081   static ItemsetId _makeId(unsigned int index)
00082     {
00083       stringstream sstr;
00084       sstr << "itemset_";
00085       sstr << index;
00086       return(sstr.str());
00087     }
00088   
00089   static ItemsetId _getNewId() 
00090     {
00091       _currentId++;
00092       return(_makeId(_currentId));
00093     }
00094 
00095   void _computeColumnSet()
00096     {
00097       copy(columns.begin(), columns.end(), inserter(_columnSet, _columnSet.begin()));
00098     }
00099   
00100 //   void _computeRowSet()
00101 //     {
00102 //       copy(rows.begin(), rows.end(), inserter(_rowSet, _rowSet.begin()));
00103 //     }
00104   
00105   
00106 public:
00107   Itemset()
00108       : _id(), columns(), //rows(),
00109         _numColumns(0),
00110         _columnSet(), _rowSet(), _complementedRowSet(),
00111         columnStrings(), rowStrings(),
00112         _columnPValue(1), _rowPValue(1), _sizePValue(1)
00113     {}
00114   
00116   Itemset(const Itemset &rhs)
00117     {
00118       _id = rhs._id;
00119       columns = rhs.columns;
00120 //       rows = rhs.rows;
00121       _numColumns = rhs._numColumns;
00122       _columnSet = rhs._columnSet;
00123       _rowSet = rhs._rowSet;
00124       _complementedRowSet = rhs._complementedRowSet;
00125       columnStrings = rhs.columnStrings;
00126       rowStrings = rhs.rowStrings;
00127       _columnPValue = rhs._columnPValue;
00128       _rowPValue = rhs._rowPValue;
00129       _sizePValue = rhs._sizePValue;
00130       
00131     }
00132 
00134   void addRow(unsigned int index)
00135     {
00136       _rowSet.insert(index);
00137     }
00138   
00139   
00140   void setId(ItemsetId id)
00141     {
00142       _id = id;
00143     }
00144   ItemsetId getId() const
00145     {
00146       return(_id);
00147     }
00148   
00149   unsigned int size() const;
00150 
00151   const Itemset & operator=(const Itemset &rhs)
00152     {
00153       if (this != &rhs)
00154         {
00155           _id = rhs._id;
00156           columns = rhs.columns;
00157 //           rows = rhs.rows;
00158           _numColumns = rhs._numColumns;
00159           _columnSet = rhs._columnSet;
00160           _rowSet = rhs._rowSet;
00161           _complementedRowSet = rhs._complementedRowSet;
00162           columnStrings = rhs.columnStrings;
00163           rowStrings = rhs.rowStrings;
00164           _columnPValue = rhs._columnPValue;
00165           _rowPValue = rhs._rowPValue;
00166           _sizePValue = rhs._sizePValue;
00167         }
00168       return(*this);
00169     }
00170   
00171 
00172   bool operator<(const Itemset &a) const;
00173   bool operator<(const Itemset *a) const;
00174   
00175   bool operator==(const Itemset &a) const;
00176   bool operator==(const Itemset *a) const;
00177 
00178   
00180   void clearColumns()
00181     {
00182       _numColumns = columns.size();
00183       columns.clear();
00184 //      _columnsCleared = true;
00185     }
00186   
00191   bool containsColumn(unsigned int index) const
00192     {
00193       return(_columnSet.end() != _columnSet.find(index));
00194     }
00195   
00197   bool containsRow(unsigned int index) const
00198     {
00199       return(_rowSet.end() != _rowSet.find(index));
00200     }
00201   
00209   bool containsRows(const Itemset &other) const;
00210   
00218   bool containsRows(const set< unsigned int > &otherRows) const;
00219   
00220 
00222   vector<string> getColumns() const { return columnStrings; }
00223 
00225   unsigned int getNumColumns() const 
00226     {
00227       return(_numColumns);
00228       return(columns.size());
00229     }
00230   
00232   unsigned int getNumRows() const 
00233     {
00234       return(_rowSet.size());
00235 //       return(rows.size());
00236     }
00237   
00239   unsigned int getNumComplementedRows() const 
00240     {
00241       return(_complementedRowSet.size());
00242     }
00243   
00245   vector<string> getRows() const { return rowStrings; }
00246 
00248   void printColumns(ostream &ostr) const;
00249 
00251   void printRows(ostream &ostr) const;
00252 
00260   void print(ostream &ostr, unsigned int itemsetnum) const;     
00261 
00267   void printGraph(ostream &ostr, unsigned int itemsetNum) const;
00268   
00270   void setColumnPvalue(float pval)
00271     {
00272       _columnPValue = pval;
00273     }
00275   float getColumnPvalue() const
00276     {
00277       return(_columnPValue);
00278     }
00279   
00281   void setRowPvalue(float pval)
00282     {
00283       _rowPValue = pval;
00284     }
00286   float getRowPvalue() const
00287     {
00288       return(_rowPValue);
00289     }
00290   
00292   void setSizePvalue(float pval)
00293     {
00294       _sizePValue = pval;
00295     }
00297   float getSizePvalue() const
00298     {
00299       return(_sizePValue);
00300     }
00301   
00302 
00303   
00304 };
00305 
00306 #endif
 All Classes Functions Variables Typedefs Friends