Biorithm  1.1
 All Classes Functions Variables Typedefs Friends
format.h
00001 /**************************************************************************
00002  * Copyright (c) 2002-2011 T. M. Murali                                   *
00003  *                                                                        *
00004  * This file is part of Biorithm.                                         *
00005  *                                                                        *
00006  * Biorithm is free software: you can redistribute it and/or modify       *
00007  * it under the terms of the GNU General Public License as published by   *
00008  * the Free Software Foundation, either version 3 of the License, or      *
00009  * (at your option) any later version.                                    *
00010  *                                                                        *
00011  * Biorithm is distributed in the hope that it will be useful,            *
00012  * but WITHOUT ANY WARRANTY; without even the implied warranty of         *
00013  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          *
00014  * GNU General Public License for more details.                           *
00015  *                                                                        *
00016  * You should have received a copy of the GNU General Public License      *
00017  * along with Biorithm.  If not, see <http://www.gnu.org/licenses/>.      *
00018  *                                                                        *
00019  **************************************************************************/
00020 
00021 #ifndef _FORMAT_H
00022 #define _FORMAT_H
00023 
00024 #include <set>
00025 #include <vector>
00026 
00027 #include "global.h"
00028 
00029 // the file format of an affy-like file. mainly describes the content of each row.
00030 struct MyAffyFileFormat
00031 {
00032   // the character (should be a regex) that separates items in a line.
00033   char delimiter;
00034   // does the file have "CALL" information?
00035   bool hasCallInfo;
00036   // the number of names for a gene in each line of the file. e.g., if
00037   // there is an id and a name, this field = 2.
00038   int numGeneNames;
00039   // i have to let geneIdColumn and the like to be -1 by default so
00040   // that i can initliase them to some default values.
00041   int geneIdColumn;
00042   int geneNameColumn;
00043   string geneIdColumnName;
00044   string geneNameColumnName;
00045   
00046   // since i am generalising this code to non-gene expression data
00047   // (for example, to compute itemsets), i need to just read row names
00048   // without assuming they are gene names.
00049   int rowNameColumn;
00050   string rowNameColumnName;
00051   
00052   string callColumnName;
00053   vector< int > valueColumns;
00054   
00055   // columns and rows to ignore. these are the same type in MyClusterParams.
00056   map< string, bool > ignoredColumns;
00057   // i need to store indices corresponding to ignored columns so that
00058   // i dont have to look up the name of the column and check if i
00059   // should ignore it.
00060   set< unsigned int > ignoredColumnIndices;
00061   map< string, bool > ignoredRows;
00062 
00063 
00064   void print(ostream& ostr) const;
00065 
00066   // i need a constructor. otherwise, badly initialised values screw
00067   // things up.
00068   MyAffyFileFormat()
00069     : delimiter('\t'), hasCallInfo(false), numGeneNames(-1), 
00070       // changing default to 0 for geneIdColumn and 1 for geneNameColumn
00071       geneIdColumn(0), geneNameColumn(1), geneIdColumnName(""),
00072          geneNameColumnName(""), rowNameColumn(-1), rowNameColumnName(""),
00073          callColumnName(""), valueColumns(), 
00074          ignoredColumns(), ignoredColumnIndices(), ignoredRows()
00075   {}
00076 
00077 };
00078 
00079 
00080 // print the various fields of MyAffyFileFormat.
00081 inline void MyAffyFileFormat::print(ostream& ostr) const
00082 {
00083   ostr << "File format variables:\n";
00084   ostr << "\tdelimiter = \"" << delimiter << "\"\n";
00085   if ("" != geneIdColumnName)
00086     {
00087       ostr << "\tgene id column = " << geneIdColumn << endl;
00088       ostr << "\tname of gene id column = " << geneIdColumnName << endl;
00089     }
00090   if ("" != geneNameColumnName)
00091     {
00092       ostr << "\tgene name column = " << geneNameColumn << endl;
00093       ostr << "\tname of gene name column = " << geneNameColumnName << endl;
00094     }
00095   if ("" != rowNameColumnName)
00096     {
00097       ostr << "\trow name column = " << rowNameColumn << endl;
00098       ostr << "\tname of row name column = " << rowNameColumnName << endl;
00099     }
00100   
00101 
00102   ostr << "\thas call info = " << hasCallInfo << endl;
00103   ostr << "\tname of call column = " << callColumnName << endl;
00104 //  ostr << "\tcolumns with expression values are " << valueColumns << endl;
00105 }
00106 
00107 
00108 
00109 
00110 #endif // _FORMAT_H 
 All Classes Functions Variables Typedefs Friends