Biorithm  1.1
 All Classes Functions Variables Typedefs Friends
netman.h
00001 /**************************************************************************
00002  * Copyright (c) 2005-2011 T. M. Murali                                   *
00003  * Copyright (c) 2009-2011 Christopher L. Poirel                          *
00004  * Copyright (c) 2011 Christopher D. Lasher                               *
00005  * Copyright (c) 2010 Jacqueline Addesa                                   *
00006  * Copyright (c) 2003-2005 Deept Kumar                                    *
00007  *                                                                        *
00008  * This file is part of Biorithm.                                         *
00009  *                                                                        *
00010  * Biorithm is free software: you can redistribute it and/or modify       *
00011  * it under the terms of the GNU General Public License as published by   *
00012  * the Free Software Foundation, either version 3 of the License, or      *
00013  * (at your option) any later version.                                    *
00014  *                                                                        *
00015  * Biorithm is distributed in the hope that it will be useful,            *
00016  * but WITHOUT ANY WARRANTY; without even the implied warranty of         *
00017  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          *
00018  * GNU General Public License for more details.                           *
00019  *                                                                        *
00020  * You should have received a copy of the GNU General Public License      *
00021  * along with Biorithm.  If not, see <http://www.gnu.org/licenses/>.      *
00022  *                                                                        *
00023  **************************************************************************/
00024 
00031 #ifndef _NETMAN_H_
00032 #define _NETMAN_H_
00033 
00034 
00035 // for transform in NetMan::getCorrelationMethod().
00036 #include <algorithm>
00037 #include <set>
00038 #include <string>
00039 
00040 #include <stdio.h>
00041 #include <stdlib.h>
00042 #include <fstream>
00043 #include <math.h>
00044 
00045 using namespace std;
00046 
00047 #include "boost/multi_index_container.hpp"
00048 #include "boost/multi_index/member.hpp"
00049 #include "boost/multi_index/ordered_index.hpp"
00050 #include <boost/multi_index/composite_key.hpp>
00051 
00052 using boost::multi_index_container;
00053 using namespace boost::multi_index;
00054 
00055 
00056 
00057 // for MyAnnotations.
00058 #include "old-annotations.h"
00059 #include "enrichment.h"
00060 // for GeneOntology
00061 #include "GO.h"
00062 #include "histogram.h"
00063 
00064 #include "active-networks.h"
00065 #include "active-networks-opts.h"
00066 
00067 // add '/' at the end, if necessary.
00068 inline string _normaliseDirectory(string dir)
00069 {
00070   if ('/' != dir[dir.length() - 1])
00071     dir += '/';
00072   return(dir);
00073 }
00074 
00075 
00076 void _reverseAliases(const map< string, set< string > > &nodeAliases,
00077                      map< string, set< string> > &reversedAliases);
00078 
00079 
00080 
00081 struct PPNodeStats
00082 {
00083 public:
00084   string _nodeId;
00085   MyNT _tstat;
00086   MyNT _pvalue;
00087   MyNT _zscore;
00088 public:
00089   PPNodeStats (string n, MyNT t, MyNT p, MyNT z)
00090       : _nodeId(n), _tstat(t), _pvalue(p), _zscore(z)
00091     {}
00092 };
00093 
00094 struct pa_nodeid_index {};
00095 struct pa_zscore_index {};
00096 
00097 typedef multi_index_container<
00098   PPNodeStats,
00099   indexed_by<
00100 
00101     // define single-column primary key.
00102     ordered_unique<
00103     tag< pa_nodeid_index >,
00104     member< PPNodeStats, string, &PPNodeStats::_nodeId > >,
00105 
00106     // there can be many nodes with the same z-score.
00107     ordered_non_unique<
00108     tag< pa_zscore_index >,
00109     member< PPNodeStats, MyNT, &PPNodeStats::_zscore > >
00110 
00111     > // matches indexed_by
00112   > SetOfPPNodeStats;
00113 
00121 class NetMan
00122 {
00123 private:
00124 
00125   gengetopt_args_info _options;
00126   string _commandLine;
00127 
00128   // -a
00129 //  vector< string > _activeNetworkFiles;
00130 
00131   // -b, --compute-betweenness
00132   bool _computeBetweennessCentralities;
00133 
00134   // -C
00135   MyNT _correlationThreshold;
00136   // currently, i set this value based on the value of
00137   // _correlationThreshold in setCorrelationThreshold().
00138   bool _useRandomisedGEData;
00139 
00140 
00141   // -E
00142   string _activeNetworkEdgesFile;
00143 
00144   // --edge-annotations-file
00145   MyAnnotations _edgeAnnotations;
00146   string _edgeAnnotationsFile;
00147 
00148 
00149   // -f, --functions-file
00150   MyAnnotations _nodeAnnotations;
00151 //  string _nodeAnnotationsFile;
00152   vector< string > _nodeAnnotationsFiles;
00153 
00154 
00155   MyGraph wiringDiagram;
00156   bool _computeDenseFunctions;
00157   map< string, MyGraph > _denseFunctionSubgraphs;
00158   MyGraph _allDenseFunctionSubgraphs;
00159   bool _useMaximumSpanningTree;
00160   bool _deleteDenseFunctionSubgraphsCompletely;
00161 
00162   map< string, MyGraph > functionSubgraphs;
00163 
00164   // store all ge data in this variable. 2008-12-12, start the
00165   // transition from vector< GEData >.
00166   GEDataSets geDataSets;
00167 public:
00170   vector< GEData > geData;
00171 private:
00172   // map storing which class is in which index.
00173   map< string, unsigned int > _classToIndex;
00174 
00175   string currentExperimentName;
00176   vector< GEData >::iterator currentGEData;
00177   vector< string > directories;
00178   vector< string > experimentNames;
00179 
00180   // -I
00181   set< string > _ignoredEdgeTypes;
00182   string _ignoredEdgeString;
00183   MyGraph _ignoredWiringDiagram;
00184 
00185   // -l
00186   bool _computeNetworkLegos;
00187 
00188 
00189   // -m, --multiple-hypotheses-correction
00190   LIBENRICHMENT_TEST_TYPE _multipleHypothesesCorrectionTest;
00191 
00192   // --min-number-conditions
00193   unsigned int _minNumberConditions;
00194 
00195   // --min-number-interactions
00196   unsigned int _minNumberInteractions;
00197 
00198   // -N
00199   string _activeNetworkNodesFile;
00200 
00201 public:
00204   // --node-alias-file
00205   map< string, set< string > > _nodeAliases;
00206   map< string, set< string > > _reversedNodeAliases;
00207 private:
00208   
00209   // --node-url
00210   string _nodeURL;
00211 
00212   // -r, --num-random-runs
00213   unsigned int _numRandomRuns;
00214   bool _runOnlyRandomRuns;
00215 
00216   // -s
00217   bool _splitActiveNetworksIntoComponents;
00218 
00219   // --translate-annotations.
00220   bool _translateAnnotations;
00221 
00222   // --www
00223   string _currentWWWDirectory;
00224 
00225   // file containing information on the class each chip in a gene
00226   // expression data set belongs to.
00227   string classFile;
00228   string functionSubgraphsFile;
00229   string geFile;
00230   string networkFile;
00231   string oFile;
00232 
00233   GeneOntology _goDAG;
00234   bool _goDAGRead;
00235   string _oboFile;
00236 
00237   // -p
00238   MyNT _pvalueThreshold;
00239 
00240   // -t
00241   MyNT _edgeWeightThreshold;
00242 
00243   // -T
00244   bool _useStudentsTDistribution;
00245 
00246   // -x
00247   bool _expandActiveNetworks;
00248 
00249 
00250   // -z
00251   bool _computeActiveNetworksTillTheBitterEnd;
00252 
00253 public:
00256   
00257   // each of these output files will contain tab-delimited data about
00258   // the ActiveNetworks in a form fit for loading into a database.
00259 
00260 
00261   // to output in 0-1 matrix format.
00262   ofstream binaryMatrixStream;
00263   // to output condition in each ActiveNetwork.
00264   ofstream conditionStream;
00265 
00266   // the edges in an ActiveNetwork or in a combined ActiveNetwork.
00267   ofstream edgeStream;
00268   // statistics on each edge in an ActiveNetwork or in a combined ActiveNetwork.
00269   ofstream edgeCentralityStream;
00270 
00271   // statistics on functional enrichment for edges.
00272   ofstream edgeEnrichmentStream;
00273   // for each edges, which functions annotating it are enriched.
00274   ofstream edgeEnrichedFunctionStream;
00275   // graph containing edge similarities of ANs.
00276   ofstream edgeSimilarityGraphStream;
00277   // the count of edge types in an ActiveNetwork or in a combined ActiveNetwork.
00278   ofstream edgeTypesCountStream;
00279   // histogram of edge weights in ANs/NLs.
00280   ofstream edgeWeightsHistogramStream;
00281 
00282 
00283   // statistics on function subgraphs.
00284   ofstream functionSubgraphsStream;
00285 
00286   // histogram info.
00287 
00288   // the histogram of expression values in a gene expression dataset.
00289   ofstream expressionValuesHistogramStream;
00290   ofstream histogramStream;
00291 
00292   // lattice edges.
00293   ofstream latticeStream;
00294   // transitively closed lattice.
00295   ofstream closedLatticeStream;
00296 
00297   // generic information.
00298   ofstream logStream;
00299   // the nodes in an ActiveNetwork or in a combined ActiveNetwork.
00300   ofstream nodeStream;
00301   // statistics on each node in an ActiveNetwork or in a combined ActiveNetwork.
00302   ofstream nodeCentralityStream;
00303 
00304   // statistics on functional enrichment for nodes.
00305   ofstream nodeEnrichmentStream;
00306   ofstream nodeEnrichmentOntologizerStream;
00307   // for each node, which functions annotating it are enriched.
00308   ofstream nodeEnrichedFunctionStream;
00309   // analysis of functional enrichment of nodes across ANs.
00310   ofstream processedNodeEnrichmentsStream;
00311   ofstream processedNodeEnrichmentsOntologizerStream;
00312   // graph containing node similarities of ANs.
00313   ofstream nodeSimilarityGraphStream;
00314   // histogram of node weights in ANs/NLs.
00315   ofstream nodeWeightsHistogramStream;
00316 
00317   // to output in tab-delimited "itm" format.
00318   ofstream itemsetStream;
00319   // statistics about ActiveNetworks in random graphs.
00320   ofstream randomStream;
00321   ofstream randomPvaluesStream;
00322   // various stats about ActiveNetworks.
00323   ofstream statsStream;
00324 
00325   // string streams to print info to before opening output files.
00326   stringstream logStringStream;
00327 //  stringstream expressionValuesHistogramStringStream;
00328 //  stringstream expressionValuesGnuplotStringStream;
00329 
00330   // the names of the output files. the first string is the type of
00331   // the file (same as the string in outputStreams. the second string
00332   // is the actual file name.
00333   map< string, string > _outputFileNames;
00334   // a convenience variable to simplify the code for opening and
00335   // closing all the above streams.
00336   map< string, ofstream* > outputStreams;
00337 
00338 private:
00339   set< string > _negativeWeightEdgeTypes;
00340 
00341   MyHistogram _expressionValuesHistogram;
00342 
00343   // am i re-processing previously computed ANs?
00344   bool _reprocessComputedActiveNetworks;
00345   // am i re-processing previously computed NLs?
00346   bool _reprocessComputedNetworkLegos;
00347 
00348   // map from treatment to set of controls. each treatment can have
00349   // >=1 control, but each t-c pair is analysed separately. this data
00350   // structure is also used in Netman::createWebPages() (and therefore
00351   // in SetOfActiveNetworks::layout() to decide which conditions to
00352   // group together when drawing function-specific networks.
00353   map< string, set< string > > _treatmentControlPairs;
00354 
00355 
00356 public:
00357 
00358   NetMan(void);
00359   ~NetMan(void);
00360 
00361   /************ START OF COMMAND LINE RELATED METHODS. *************/
00362 
00363   // -a
00364   // void addActiveNetworkFile(string optarg)
00365   //   {
00366   //     _activeNetworkFiles.push_back(optarg);
00367   //   }
00368   // unsigned int getNumActiveNetworkFiles() cons
00369   //   {
00370   //     return(_activeNetworkFiles.size());
00371   //   }
00372 
00373   // string getActiveNetworkFiles(unsigned int index) const
00374   //   {
00375   //     return(_activeNetworkFiles[index]);
00376   //   }
00377 
00378 
00379   // void getActiveNetworkFiles(vector< string > &files) const
00380   //   {
00381   //     files = _activeNetworkFiles;
00382   //   }
00383 
00384   // MORON! calling this method set() confuses the poor compilers when
00385   // they see set used as an STL data structure later.
00386 
00388   virtual void setParameters(int argc, char **argv);
00389 
00390   // -b, --compute-betweenness
00391   void setComputeBetweennessCentralities()
00392     {
00393       _computeBetweennessCentralities = true;
00394     }
00395   bool getComputeBetweennessCentralities() const
00396     {
00397       return(_computeBetweennessCentralities);
00398     }
00399 
00400 
00401 
00402   // -B
00403   void setOBOFile(char* file)
00404     {
00405       _oboFile = file;
00406     }
00407 
00408   // -c
00409   // call this method only after setting the experiment name (NetMan::expt_name).
00410   void setClassFile(string name)
00411     {
00412       classFile = name;
00413     }
00414   string getClassFile(void) const
00415     {
00416       return classFile;
00417     }
00418 
00419 
00420   // -C
00421   void setCorrelationThreshold(float arg)
00422     {
00423       _correlationThreshold = arg;//atof(arg.c_str());
00424       if (1 < _correlationThreshold)
00425         setUseRandomisedGEData();
00426     }
00427 
00428   // --compute-stability.
00429   bool getComputeNetworkLegoStability() const
00430     {
00431       return(_options.compute_stability_given);
00432     }
00433 
00434   void setUseRandomisedGEData()
00435     {
00436       _useRandomisedGEData = true;
00437     }
00438   bool getUseRandomisedGEData() const
00439     {
00440       return(_useRandomisedGEData);
00441     }
00442 
00443   // --correlation-method
00444   string getCorrelationMethod() const
00445     {
00446       string method = _options.correlation_method_arg;
00447       // see http://www.devx.com/getHelpOn/Article/9702/1954?pf=true
00448       transform(method.begin(), method.end(), method.begin(), (int(*)(int)) tolower);
00449       return(method);
00450 //      return(_options.correlation_method_arg);
00451     }
00452 
00453   MyNT getCorrelationThreshold() const
00454     {
00455       return(_correlationThreshold);
00456     }
00457 
00458 
00459   // -d
00460   void addDirectory(char* dir)
00461     {
00462       directories.push_back(dir);
00463     }
00464   unsigned int getNumDirectories() const
00465     {
00466       return(directories.size());
00467     }
00468   string getDirectory(unsigned int index) const
00469     {
00470       return(directories[index]);
00471     }
00472 
00473 
00474 
00475   // -D
00476   void setComputeDenseFunctions()
00477     {
00478       _computeDenseFunctions = true;
00479     }
00480   bool getComputeDenseFunctions() const
00481     {
00482       return(_computeDenseFunctions);
00483     }
00484   
00485   // -E
00487   void setActiveNetworksEdgesFile(string file)
00488     {
00489       _activeNetworkEdgesFile = file;
00490     }
00492   string getActiveNetworksEdgesFile() const
00493     {
00494       return(_activeNetworkEdgesFile);
00495     }
00496 
00504   bool getProcessComputedActiveNetworks() const
00505     {
00506       return(("" != getActiveNetworksEdgesFile())
00507              || ("" != getActiveNetworksNodesFile()));;
00508     }
00509 
00510   // --experiment-name
00511   string getExperimentName() const
00512     {
00513       return(_options.experiment_name_arg);
00514     }
00515 
00516   // -f
00517   void setNodeAnnotationsFile(char* name)
00518     {
00519 //      _nodeAnnotationsFile = name;
00520       _nodeAnnotationsFiles.push_back(name);
00521     }
00522 
00523   // -g
00524   void setGEfile(char* name)
00525     {
00526       geFile = name;
00527     }
00528   string getGEfile(void) const
00529     {
00530       return geFile;
00531     }
00532 
00533 
00534   // -I
00535   void addIgnoredEdgeType(string type)
00536     {
00537       // _ignoredEdgeString must start with a "-" if it is non-empty.
00538       if (0 == _ignoredEdgeTypes.size())
00539         _ignoredEdgeString = "-NO";
00540       _ignoredEdgeString += "-" + type;
00541       _ignoredEdgeTypes.insert(type);
00542     }
00543 
00544   //  am I computing ANs?
00545   bool getComputeActiveNetworks(void) const
00546     {
00547       //  I am computing ANs if ..
00548       //
00549       // i am not computing NLs
00550       return(!getComputeNetworkLegos()
00551               // and i am not computing pathway activation
00552               && !getComputePathwayPerturbation()
00553               // and i am not just processing the network.
00554               && !getProcessNetwork()
00555               // and i don't have a pre-computed AN file.
00556               && !getProcessComputedActiveNetworks());
00557     }
00558 
00559 
00560   // -l
00561   void setComputeNetworkLegos()
00562     {
00563       _computeNetworkLegos = true;
00564     }
00565 
00566   bool getComputeNetworkLegos() const
00567     {
00568       return(_computeNetworkLegos);
00569     }
00570 
00571   // -L
00572   bool setFunctionSubgraphsFile(string arg)
00573     {
00574       functionSubgraphsFile = arg;
00575     }
00576 
00577   // -m, --multiple-hypotheses-correction
00578   void setMultipleHypothesesCorrectionTest(string optarg)
00579     {
00580       _multipleHypothesesCorrectionTest =
00581         convertStringToTestType(optarg);
00582     }
00583   LIBENRICHMENT_TEST_TYPE getMultipleHypothesesCorrectionTest() const
00584     {
00585       return(_multipleHypothesesCorrectionTest);
00586     }
00587 
00588 
00589   
00590   // --min-number-conditions
00591   void setMinNumberConditions(unsigned int min)
00592     {
00593       _minNumberConditions = min;
00594     }
00595   unsigned int getMinNumberConditions() const
00596     {
00597       return(_minNumberConditions);
00598     }
00599 
00600   // --min-number-interactions
00601   void setMinNumberInteractions(unsigned int min)
00602     {
00603       _minNumberInteractions = min;
00604     }
00605   unsigned int getMinNumberInteractions() const
00606     {
00607       return(_minNumberInteractions);
00608     }
00609 
00610   // -n
00611   void setNetworkFile(char* file)
00612     {
00613       networkFile = file;
00614     }
00615 
00616   string getNetworkFile(void) const
00617     {
00618       return networkFile;
00619     }
00620 
00621   // -N
00623   void setActiveNetworksNodesFile(string file)
00624     {
00625       _activeNetworkNodesFile = file;
00626     }
00628   string getActiveNetworksNodesFile() const
00629     {
00630       return(_activeNetworkNodesFile);
00631     }
00632 
00634   bool getProcessNetwork() const
00635     {
00636       return(//(0 == getNumActiveNetworkFiles())
00637         //&&
00638              (0 == getNumDirectories())
00639              && ("" == getGEfile())
00640              && ("" != getNetworkFile())
00641              );
00642     }
00643 
00644 
00645   // --layout-constraints-file
00646   string getLayoutConstraintsFile() const
00647     {
00648       return(_options.layout_constraints_file_arg);
00649     }
00650 
00651 
00652   // -p
00653   void setPvalueThreshold(float thresh)
00654     {
00655       _pvalueThreshold = thresh;//atof(thresh.c_str());
00656     }
00657   MyNT getPvalueThreshold() const
00658     {
00659       return(_pvalueThreshold);
00660     }
00661 
00662   // --pathway-perturbation, -P
00663   bool getComputePathwayPerturbation() const
00664     {
00665       return(_options.pathway_perturbation_given);
00666     }
00667       
00668   // -r
00669   void setNumRandomRuns(unsigned int num)
00670     {
00671       _numRandomRuns = num;//atoi(arg);
00672     }
00673 
00674 
00675   void setOnlyRandomRuns()
00676     {
00677       _runOnlyRandomRuns = true;
00678     }
00679 
00680   bool getOnlyRandomRuns() const
00681     {
00682       return(_runOnlyRandomRuns);
00683     }
00684 
00685   unsigned int getNumRandomRuns() const
00686     {
00687       return(_numRandomRuns);
00688     }
00689 
00690 
00691   // --regulation-bound.
00692   MyNT getRegulationBound() const
00693     {
00694       if (_options.regulation_bound_given)
00695         return(_options.regulation_bound_arg);
00696       return(1);
00697     }
00698   MyNT getUpRegulationBound() const
00699     {
00700       return(getRegulationBound());
00701     }
00702   MyNT getDownRegulationBound() const
00703     {
00704       return(-getRegulationBound());
00705     }
00706 
00707   string getRegulationMethod() const
00708     {
00709       return(_options.regulation_method_arg);
00710     }
00711 
00712   // --sa-factor-number-iterations.
00713   unsigned int getSimulatedAnnealingFactorNumberIterations() const
00714     {
00715       return(_options.sa_factor_number_iterations_arg);
00716     }
00717 
00718 
00719   // --sparsify-network
00720   bool getSparsifyNetwork() const
00721     {
00722       return(_options.sparsify_network_given);
00723     }
00724   string getSparsificationMethod() const
00725     {
00726       return(_options.sparsify_network_arg);
00727     }
00728 
00729   // -s, --split-active-networks
00730   void setSplitActiveNetworksIntoComponents()
00731     {
00732       _splitActiveNetworksIntoComponents = true;
00733     }
00734   bool getSplitActiveNetworksIntoComponents() const
00735     {
00736       return(_splitActiveNetworksIntoComponents);
00737     }
00738 
00739   // --split-gene-expression-data
00740   bool getSplitGEData() const
00741     {
00742       return(_options.split_gene_expression_data_given);
00743     }
00744 
00745   // -t
00746   void setEdgeWeightThreshold(string optarg)
00747     {
00748       _edgeWeightThreshold = atof(optarg.c_str());
00749     }
00750   void setEdgeWeightThreshold(MyNT thresh)
00751     {
00752       _edgeWeightThreshold = thresh;
00753     }
00754   MyNT getEdgeWeightThreshold() const
00755     {
00756       return(_edgeWeightThreshold);
00757     }
00758 
00759 
00760   // -T
00761   void setUseStudentsTDistribution()
00762     {
00763       _useStudentsTDistribution = true;
00764     }
00765   bool getUseStudentsTDistribution() const
00766     {
00767       return(_useStudentsTDistribution);
00768     }
00769 
00770   // --translateAnnotations
00771   bool getTranslateAnnotations() const
00772     {
00773       return(_options.translate_annotations_given);
00774     }
00775 
00776   // -trivial-network-legos
00777   bool getComputeTrivialNetworkLegos() const
00778     {
00779       return(_options.trivial_network_legos_given);
00780     }
00781 
00782 
00783 
00784 
00785   // -x
00786   void setExpandActiveNetworks()
00787     {
00788       _expandActiveNetworks = true;
00789     }
00790   bool getExpandActiveNetworks() const
00791     {
00792       return(_expandActiveNetworks);
00793     }
00794 
00795   // -z
00796   void setComputeActiveNetworksTillTheBitterEnd()
00797     {
00798       _computeActiveNetworksTillTheBitterEnd = true;
00799     }
00800   bool getComputeActiveNetworksTillTheBitterEnd() const
00801     {
00802       return(_computeActiveNetworksTillTheBitterEnd);
00803     }
00804 
00805 
00806   // WWW-related options.
00807 
00808   // --embed
00811   bool getEmbedActiveNetworkInSuperGraph() const
00812     {
00813       return(_options.embed_given);
00814     }
00815 
00816   // --www
00817   bool getCreateWebPages() const
00818     {
00819 #ifdef CTEMPLATE
00820       return(_options.www_given);
00821 #else
00822       return(false);
00823 #endif
00824     }
00825 
00829   void createWebPages(SetOfActiveNetworks &anetSet);
00830 
00831 
00832   // --www-params-dir
00833   string getWWWParametersDir() const
00834     {
00835       return(_normaliseDirectory(_options.www_params_dir_arg));
00836     }
00837 
00838   /************ END OF COMMAND LINE RELATED METHODS. *************/
00839 
00841   void getWiringDiagram(MyGraph &wd) //const: COMPILATION ERROR WITH const.
00842     {
00843       wd = wiringDiagram;
00844     }
00845 
00847   void getWiringDiagramNodes(set< string > &nodes) const
00848     {
00849       wiringDiagram.getNodeSet(nodes);
00850     }
00851 
00853   string getEdgeAnnotationsFile(void) const
00854     {
00855       return(_options.edge_annotations_file_arg);
00856     }
00857 
00858 //  string getNodeAnnotationsFile(void) const;
00859   void getNodeAnnotationsFiles(vector< string > &files) const;
00860   string getNodeAnnotationsFile(unsigned int index) const
00861     {
00862       return(_nodeAnnotationsFiles[index]);
00863     }
00864   unsigned int getNumNodeAnnotationsFiles() const
00865     {
00866       return(_nodeAnnotationsFiles.size());
00867     }
00868 
00869   // --node-alias-file
00871   string getNodeAliasFile() const
00872     {
00873       return(_options.node_alias_file_arg);
00874     }
00875 
00876 
00877   string getFunctionSubgraphsFile() const
00878     {
00879       return(functionSubgraphsFile);
00880     }
00881 
00882   string getOBOFile(void) const
00883     {
00884       return(_oboFile);
00885     }
00886 
00887   // --treatment-control-file
00889   string getTreatmentControlFile() const
00890     {
00891       return(_options.treatment_control_file_arg);
00892     }
00893 
00894 
00896   void readEdgeAnnotations(string name);
00897 
00903   void readNodeAliases(string file);
00904 
00925   void readNodeAnnotations(string name, set< string > *geneSet = NULL);
00926 
00927   void readOBO(string name)
00928     {
00929       _goDAG.read(name);
00930       // i better do this if i want depths to be printed out.
00931       _goDAG.computeFunctionDepths();
00932       _goDAGRead = true;
00933     }
00934 
00935   bool readFunctionSubgraphs(string);
00936   bool readGEData(string);
00937   bool readNetwork(string, MyNT minimumEdgeWeight = 0);
00938 
00939   vector< SetOfPPNodeStats > treatAliases(MyGraph &pathway, const GEData &geData);
00940 
00943   bool allowNegativeEdgeWeight(set< string >& types) const
00944     {
00945       set< string >::const_iterator itr;
00946       for (itr = _negativeWeightEdgeTypes.begin(); itr != _negativeWeightEdgeTypes.end();
00947            itr++)
00948         if (types.end() != types.find(*itr))
00949           return(true);
00950       return(false);
00951     }
00952 
00968   // void combineActiveNetworks(vector< string > &activeNetworkFiles);
00969 
00970 
00971 
01001   void computeDenseFunctions(ostream *fstr,
01002                              map< string, MyGraph > &denseFunctionSubgraphs);
01003 
01008   void computeDenseFunctions(ostream *fstr = NULL)
01009     {
01010       computeDenseFunctions(fstr, _denseFunctionSubgraphs);
01011       map< string, MyGraph >::iterator gitr;
01012       for (gitr = _denseFunctionSubgraphs.begin(); gitr != _denseFunctionSubgraphs.end();
01013            gitr++)
01014 #ifdef DEBUG
01015         {
01016           cout << "Function with dense subgraph is " << gitr->first
01017                << " with " << gitr->second.numNodes() << " nodes and "
01018                << gitr->second.numEdges() << " edges." << endl;
01019           gitr->second.printEdges(cout);
01020 #endif
01021           _allDenseFunctionSubgraphs.add(gitr->second);
01022 #ifdef DEBUG
01023         }
01024       cout << "Union of all functions with dense subgraphs has "
01025            << _allDenseFunctionSubgraphs.numNodes() << " nodes and "
01026            << _allDenseFunctionSubgraphs.numEdges() << " edges." << endl;
01027       _allDenseFunctionSubgraphs.printNodes(cout);
01028       _allDenseFunctionSubgraphs.printEdges(cout);
01029 #endif
01030     }
01031 
01049   void computePathwayPerturbation();
01050 
01065   // i cannot make pathways const because MyGraph has no const node iterator.
01066   void computePathwayPerturbation(const map< string, set< string > > &treatmentControlPairs, map< string, MyGraph> &pathways);
01067 
01087   void computePathwayPerturbation(MyGraph &pathway, const GEData &treatment,
01088                                   const GEData &control, string contrast, ActiveNetwork &subgraph);
01089 
01092   void computePathwayPerturbation(MyGraph &pathway, const GEData &geData);
01093 
01094 
01095 //  static bool compareFunction(MyNode x, MyNode y);
01096 
01097 
01101   void plotExpressionValuesHistogram();
01102 
01107   void plotExpressionCorrelationsHistogram();
01108 
01112   void processNetworkLegos(SetOfActiveNetworks &anetSet);
01113 
01114 
01115   void readDirectories();
01116 
01126   void removeUnresponsiveGenes(GEData &gedata);
01127 
01142   bool computeCorrelations(MyGraph &wiring);//GEData &expression);
01143 
01144   // \param[in] randomRun, if true, the method knows that the wiring
01145   // diagram is a randomly generated network. In this case, the method
01146   // does not print out some statistics about the dense subgraphs it
01147   // finds.
01148   bool computeDenseSubgraphs(vector< ActiveNetwork > &denseSubgraphs, bool randomRun = false);
01149 
01150 
01152   void setWiringDiagramEdgeWeightsFS()
01153     {
01154       wiringDiagram.computeEdgeFSWeights();
01155     }
01156 
01158   void setWiringDiagramEdgeWeightsCD()
01159     {
01160       wiringDiagram.computeEdgeCDWeights();
01161     }
01162 
01172   void postProcess(SetOfActiveNetworks &anetSet);
01173 
01176   void process();
01177 
01185   void processComputedActiveNetworks();
01186 
01187 
01190   void processFunctionSubgraphs();
01191 
01194   void processNetwork();
01195 
01196 
01197   void randomise(MyHistogram &histogram);
01198 
01201   void separateDenseFunctions();
01202 
01203 
01204 
01205 private:
01206 
01207   // assign edge weights based on node stats.
01208   void _assignEdgeWeights(MyGraph &pathway, SetOfPPNodeStats &nodeStats,
01209                           ostream &ostr);
01210 
01211 public:
01214   void _openOutputFiles();
01215   void _closeOutputFiles();
01216 
01217 private:
01218   // check if ids are same for different datasets.
01219   void _performSanityChecks();
01220 
01221   void _processDenseFunctionSubgraphs(MyGraph &wiringDiagram, MyGraph &mst);
01222 
01223   void _readTreatmentControlPairs(string file);
01224 
01225 };
01226 
01227 #endif
 All Classes Functions Variables Typedefs Friends