00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00031 #ifndef _NETMAN_H_
00032 #define _NETMAN_H_
00033
00034
00035
00036 #include <algorithm>
00037 #include <set>
00038 #include <string>
00039
00040 #include <stdio.h>
00041 #include <stdlib.h>
00042 #include <fstream>
00043 #include <math.h>
00044
00045 using namespace std;
00046
00047 #include "boost/multi_index_container.hpp"
00048 #include "boost/multi_index/member.hpp"
00049 #include "boost/multi_index/ordered_index.hpp"
00050 #include <boost/multi_index/composite_key.hpp>
00051
00052 using boost::multi_index_container;
00053 using namespace boost::multi_index;
00054
00055
00056
00057
00058 #include "old-annotations.h"
00059 #include "enrichment.h"
00060
00061 #include "GO.h"
00062 #include "histogram.h"
00063
00064 #include "active-networks.h"
00065 #include "active-networks-opts.h"
00066
00067
00068 inline string _normaliseDirectory(string dir)
00069 {
00070 if ('/' != dir[dir.length() - 1])
00071 dir += '/';
00072 return(dir);
00073 }
00074
00075
00076 void _reverseAliases(const map< string, set< string > > &nodeAliases,
00077 map< string, set< string> > &reversedAliases);
00078
00079
00080
00081 struct PPNodeStats
00082 {
00083 public:
00084 string _nodeId;
00085 MyNT _tstat;
00086 MyNT _pvalue;
00087 MyNT _zscore;
00088 public:
00089 PPNodeStats (string n, MyNT t, MyNT p, MyNT z)
00090 : _nodeId(n), _tstat(t), _pvalue(p), _zscore(z)
00091 {}
00092 };
00093
00094 struct pa_nodeid_index {};
00095 struct pa_zscore_index {};
00096
00097 typedef multi_index_container<
00098 PPNodeStats,
00099 indexed_by<
00100
00101
00102 ordered_unique<
00103 tag< pa_nodeid_index >,
00104 member< PPNodeStats, string, &PPNodeStats::_nodeId > >,
00105
00106
00107 ordered_non_unique<
00108 tag< pa_zscore_index >,
00109 member< PPNodeStats, MyNT, &PPNodeStats::_zscore > >
00110
00111 >
00112 > SetOfPPNodeStats;
00113
00121 class NetMan
00122 {
00123 private:
00124
00125 gengetopt_args_info _options;
00126 string _commandLine;
00127
00128
00129
00130
00131
00132 bool _computeBetweennessCentralities;
00133
00134
00135 MyNT _correlationThreshold;
00136
00137
00138 bool _useRandomisedGEData;
00139
00140
00141
00142 string _activeNetworkEdgesFile;
00143
00144
00145 MyAnnotations _edgeAnnotations;
00146 string _edgeAnnotationsFile;
00147
00148
00149
00150 MyAnnotations _nodeAnnotations;
00151
00152 vector< string > _nodeAnnotationsFiles;
00153
00154
00155 MyGraph wiringDiagram;
00156 bool _computeDenseFunctions;
00157 map< string, MyGraph > _denseFunctionSubgraphs;
00158 MyGraph _allDenseFunctionSubgraphs;
00159 bool _useMaximumSpanningTree;
00160 bool _deleteDenseFunctionSubgraphsCompletely;
00161
00162 map< string, MyGraph > functionSubgraphs;
00163
00164
00165
00166 GEDataSets geDataSets;
00167 public:
00170 vector< GEData > geData;
00171 private:
00172
00173 map< string, unsigned int > _classToIndex;
00174
00175 string currentExperimentName;
00176 vector< GEData >::iterator currentGEData;
00177 vector< string > directories;
00178 vector< string > experimentNames;
00179
00180
00181 set< string > _ignoredEdgeTypes;
00182 string _ignoredEdgeString;
00183 MyGraph _ignoredWiringDiagram;
00184
00185
00186 bool _computeNetworkLegos;
00187
00188
00189
00190 LIBENRICHMENT_TEST_TYPE _multipleHypothesesCorrectionTest;
00191
00192
00193 unsigned int _minNumberConditions;
00194
00195
00196 unsigned int _minNumberInteractions;
00197
00198
00199 string _activeNetworkNodesFile;
00200
00201 public:
00204
00205 map< string, set< string > > _nodeAliases;
00206 map< string, set< string > > _reversedNodeAliases;
00207 private:
00208
00209
00210 string _nodeURL;
00211
00212
00213 unsigned int _numRandomRuns;
00214 bool _runOnlyRandomRuns;
00215
00216
00217 bool _splitActiveNetworksIntoComponents;
00218
00219
00220 bool _translateAnnotations;
00221
00222
00223 string _currentWWWDirectory;
00224
00225
00226
00227 string classFile;
00228 string functionSubgraphsFile;
00229 string geFile;
00230 string networkFile;
00231 string oFile;
00232
00233 GeneOntology _goDAG;
00234 bool _goDAGRead;
00235 string _oboFile;
00236
00237
00238 MyNT _pvalueThreshold;
00239
00240
00241 MyNT _edgeWeightThreshold;
00242
00243
00244 bool _useStudentsTDistribution;
00245
00246
00247 bool _expandActiveNetworks;
00248
00249
00250
00251 bool _computeActiveNetworksTillTheBitterEnd;
00252
00253 public:
00256
00257
00258
00259
00260
00261
00262 ofstream binaryMatrixStream;
00263
00264 ofstream conditionStream;
00265
00266
00267 ofstream edgeStream;
00268
00269 ofstream edgeCentralityStream;
00270
00271
00272 ofstream edgeEnrichmentStream;
00273
00274 ofstream edgeEnrichedFunctionStream;
00275
00276 ofstream edgeSimilarityGraphStream;
00277
00278 ofstream edgeTypesCountStream;
00279
00280 ofstream edgeWeightsHistogramStream;
00281
00282
00283
00284 ofstream functionSubgraphsStream;
00285
00286
00287
00288
00289 ofstream expressionValuesHistogramStream;
00290 ofstream histogramStream;
00291
00292
00293 ofstream latticeStream;
00294
00295 ofstream closedLatticeStream;
00296
00297
00298 ofstream logStream;
00299
00300 ofstream nodeStream;
00301
00302 ofstream nodeCentralityStream;
00303
00304
00305 ofstream nodeEnrichmentStream;
00306 ofstream nodeEnrichmentOntologizerStream;
00307
00308 ofstream nodeEnrichedFunctionStream;
00309
00310 ofstream processedNodeEnrichmentsStream;
00311 ofstream processedNodeEnrichmentsOntologizerStream;
00312
00313 ofstream nodeSimilarityGraphStream;
00314
00315 ofstream nodeWeightsHistogramStream;
00316
00317
00318 ofstream itemsetStream;
00319
00320 ofstream randomStream;
00321 ofstream randomPvaluesStream;
00322
00323 ofstream statsStream;
00324
00325
00326 stringstream logStringStream;
00327
00328
00329
00330
00331
00332
00333 map< string, string > _outputFileNames;
00334
00335
00336 map< string, ofstream* > outputStreams;
00337
00338 private:
00339 set< string > _negativeWeightEdgeTypes;
00340
00341 MyHistogram _expressionValuesHistogram;
00342
00343
00344 bool _reprocessComputedActiveNetworks;
00345
00346 bool _reprocessComputedNetworkLegos;
00347
00348
00349
00350
00351
00352
00353 map< string, set< string > > _treatmentControlPairs;
00354
00355
00356 public:
00357
00358 NetMan(void);
00359 ~NetMan(void);
00360
00361
00362
00363
00364
00365
00366
00367
00368
00369
00370
00371
00372
00373
00374
00375
00376
00377
00378
00379
00380
00381
00382
00383
00384
00385
00386
00388 virtual void setParameters(int argc, char **argv);
00389
00390
00391 void setComputeBetweennessCentralities()
00392 {
00393 _computeBetweennessCentralities = true;
00394 }
00395 bool getComputeBetweennessCentralities() const
00396 {
00397 return(_computeBetweennessCentralities);
00398 }
00399
00400
00401
00402
00403 void setOBOFile(char* file)
00404 {
00405 _oboFile = file;
00406 }
00407
00408
00409
00410 void setClassFile(string name)
00411 {
00412 classFile = name;
00413 }
00414 string getClassFile(void) const
00415 {
00416 return classFile;
00417 }
00418
00419
00420
00421 void setCorrelationThreshold(float arg)
00422 {
00423 _correlationThreshold = arg;
00424 if (1 < _correlationThreshold)
00425 setUseRandomisedGEData();
00426 }
00427
00428
00429 bool getComputeNetworkLegoStability() const
00430 {
00431 return(_options.compute_stability_given);
00432 }
00433
00434 void setUseRandomisedGEData()
00435 {
00436 _useRandomisedGEData = true;
00437 }
00438 bool getUseRandomisedGEData() const
00439 {
00440 return(_useRandomisedGEData);
00441 }
00442
00443
00444 string getCorrelationMethod() const
00445 {
00446 string method = _options.correlation_method_arg;
00447
00448 transform(method.begin(), method.end(), method.begin(), (int(*)(int)) tolower);
00449 return(method);
00450
00451 }
00452
00453 MyNT getCorrelationThreshold() const
00454 {
00455 return(_correlationThreshold);
00456 }
00457
00458
00459
00460 void addDirectory(char* dir)
00461 {
00462 directories.push_back(dir);
00463 }
00464 unsigned int getNumDirectories() const
00465 {
00466 return(directories.size());
00467 }
00468 string getDirectory(unsigned int index) const
00469 {
00470 return(directories[index]);
00471 }
00472
00473
00474
00475
00476 void setComputeDenseFunctions()
00477 {
00478 _computeDenseFunctions = true;
00479 }
00480 bool getComputeDenseFunctions() const
00481 {
00482 return(_computeDenseFunctions);
00483 }
00484
00485
00487 void setActiveNetworksEdgesFile(string file)
00488 {
00489 _activeNetworkEdgesFile = file;
00490 }
00492 string getActiveNetworksEdgesFile() const
00493 {
00494 return(_activeNetworkEdgesFile);
00495 }
00496
00504 bool getProcessComputedActiveNetworks() const
00505 {
00506 return(("" != getActiveNetworksEdgesFile())
00507 || ("" != getActiveNetworksNodesFile()));;
00508 }
00509
00510
00511 string getExperimentName() const
00512 {
00513 return(_options.experiment_name_arg);
00514 }
00515
00516
00517 void setNodeAnnotationsFile(char* name)
00518 {
00519
00520 _nodeAnnotationsFiles.push_back(name);
00521 }
00522
00523
00524 void setGEfile(char* name)
00525 {
00526 geFile = name;
00527 }
00528 string getGEfile(void) const
00529 {
00530 return geFile;
00531 }
00532
00533
00534
00535 void addIgnoredEdgeType(string type)
00536 {
00537
00538 if (0 == _ignoredEdgeTypes.size())
00539 _ignoredEdgeString = "-NO";
00540 _ignoredEdgeString += "-" + type;
00541 _ignoredEdgeTypes.insert(type);
00542 }
00543
00544
00545 bool getComputeActiveNetworks(void) const
00546 {
00547
00548
00549
00550 return(!getComputeNetworkLegos()
00551
00552 && !getComputePathwayPerturbation()
00553
00554 && !getProcessNetwork()
00555
00556 && !getProcessComputedActiveNetworks());
00557 }
00558
00559
00560
00561 void setComputeNetworkLegos()
00562 {
00563 _computeNetworkLegos = true;
00564 }
00565
00566 bool getComputeNetworkLegos() const
00567 {
00568 return(_computeNetworkLegos);
00569 }
00570
00571
00572 bool setFunctionSubgraphsFile(string arg)
00573 {
00574 functionSubgraphsFile = arg;
00575 }
00576
00577
00578 void setMultipleHypothesesCorrectionTest(string optarg)
00579 {
00580 _multipleHypothesesCorrectionTest =
00581 convertStringToTestType(optarg);
00582 }
00583 LIBENRICHMENT_TEST_TYPE getMultipleHypothesesCorrectionTest() const
00584 {
00585 return(_multipleHypothesesCorrectionTest);
00586 }
00587
00588
00589
00590
00591 void setMinNumberConditions(unsigned int min)
00592 {
00593 _minNumberConditions = min;
00594 }
00595 unsigned int getMinNumberConditions() const
00596 {
00597 return(_minNumberConditions);
00598 }
00599
00600
00601 void setMinNumberInteractions(unsigned int min)
00602 {
00603 _minNumberInteractions = min;
00604 }
00605 unsigned int getMinNumberInteractions() const
00606 {
00607 return(_minNumberInteractions);
00608 }
00609
00610
00611 void setNetworkFile(char* file)
00612 {
00613 networkFile = file;
00614 }
00615
00616 string getNetworkFile(void) const
00617 {
00618 return networkFile;
00619 }
00620
00621
00623 void setActiveNetworksNodesFile(string file)
00624 {
00625 _activeNetworkNodesFile = file;
00626 }
00628 string getActiveNetworksNodesFile() const
00629 {
00630 return(_activeNetworkNodesFile);
00631 }
00632
00634 bool getProcessNetwork() const
00635 {
00636 return(
00637
00638 (0 == getNumDirectories())
00639 && ("" == getGEfile())
00640 && ("" != getNetworkFile())
00641 );
00642 }
00643
00644
00645
00646 string getLayoutConstraintsFile() const
00647 {
00648 return(_options.layout_constraints_file_arg);
00649 }
00650
00651
00652
00653 void setPvalueThreshold(float thresh)
00654 {
00655 _pvalueThreshold = thresh;
00656 }
00657 MyNT getPvalueThreshold() const
00658 {
00659 return(_pvalueThreshold);
00660 }
00661
00662
00663 bool getComputePathwayPerturbation() const
00664 {
00665 return(_options.pathway_perturbation_given);
00666 }
00667
00668
00669 void setNumRandomRuns(unsigned int num)
00670 {
00671 _numRandomRuns = num;
00672 }
00673
00674
00675 void setOnlyRandomRuns()
00676 {
00677 _runOnlyRandomRuns = true;
00678 }
00679
00680 bool getOnlyRandomRuns() const
00681 {
00682 return(_runOnlyRandomRuns);
00683 }
00684
00685 unsigned int getNumRandomRuns() const
00686 {
00687 return(_numRandomRuns);
00688 }
00689
00690
00691
00692 MyNT getRegulationBound() const
00693 {
00694 if (_options.regulation_bound_given)
00695 return(_options.regulation_bound_arg);
00696 return(1);
00697 }
00698 MyNT getUpRegulationBound() const
00699 {
00700 return(getRegulationBound());
00701 }
00702 MyNT getDownRegulationBound() const
00703 {
00704 return(-getRegulationBound());
00705 }
00706
00707 string getRegulationMethod() const
00708 {
00709 return(_options.regulation_method_arg);
00710 }
00711
00712
00713 unsigned int getSimulatedAnnealingFactorNumberIterations() const
00714 {
00715 return(_options.sa_factor_number_iterations_arg);
00716 }
00717
00718
00719
00720 bool getSparsifyNetwork() const
00721 {
00722 return(_options.sparsify_network_given);
00723 }
00724 string getSparsificationMethod() const
00725 {
00726 return(_options.sparsify_network_arg);
00727 }
00728
00729
00730 void setSplitActiveNetworksIntoComponents()
00731 {
00732 _splitActiveNetworksIntoComponents = true;
00733 }
00734 bool getSplitActiveNetworksIntoComponents() const
00735 {
00736 return(_splitActiveNetworksIntoComponents);
00737 }
00738
00739
00740 bool getSplitGEData() const
00741 {
00742 return(_options.split_gene_expression_data_given);
00743 }
00744
00745
00746 void setEdgeWeightThreshold(string optarg)
00747 {
00748 _edgeWeightThreshold = atof(optarg.c_str());
00749 }
00750 void setEdgeWeightThreshold(MyNT thresh)
00751 {
00752 _edgeWeightThreshold = thresh;
00753 }
00754 MyNT getEdgeWeightThreshold() const
00755 {
00756 return(_edgeWeightThreshold);
00757 }
00758
00759
00760
00761 void setUseStudentsTDistribution()
00762 {
00763 _useStudentsTDistribution = true;
00764 }
00765 bool getUseStudentsTDistribution() const
00766 {
00767 return(_useStudentsTDistribution);
00768 }
00769
00770
00771 bool getTranslateAnnotations() const
00772 {
00773 return(_options.translate_annotations_given);
00774 }
00775
00776
00777 bool getComputeTrivialNetworkLegos() const
00778 {
00779 return(_options.trivial_network_legos_given);
00780 }
00781
00782
00783
00784
00785
00786 void setExpandActiveNetworks()
00787 {
00788 _expandActiveNetworks = true;
00789 }
00790 bool getExpandActiveNetworks() const
00791 {
00792 return(_expandActiveNetworks);
00793 }
00794
00795
00796 void setComputeActiveNetworksTillTheBitterEnd()
00797 {
00798 _computeActiveNetworksTillTheBitterEnd = true;
00799 }
00800 bool getComputeActiveNetworksTillTheBitterEnd() const
00801 {
00802 return(_computeActiveNetworksTillTheBitterEnd);
00803 }
00804
00805
00806
00807
00808
00811 bool getEmbedActiveNetworkInSuperGraph() const
00812 {
00813 return(_options.embed_given);
00814 }
00815
00816
00817 bool getCreateWebPages() const
00818 {
00819 #ifdef CTEMPLATE
00820 return(_options.www_given);
00821 #else
00822 return(false);
00823 #endif
00824 }
00825
00829 void createWebPages(SetOfActiveNetworks &anetSet);
00830
00831
00832
00833 string getWWWParametersDir() const
00834 {
00835 return(_normaliseDirectory(_options.www_params_dir_arg));
00836 }
00837
00838
00839
00841 void getWiringDiagram(MyGraph &wd)
00842 {
00843 wd = wiringDiagram;
00844 }
00845
00847 void getWiringDiagramNodes(set< string > &nodes) const
00848 {
00849 wiringDiagram.getNodeSet(nodes);
00850 }
00851
00853 string getEdgeAnnotationsFile(void) const
00854 {
00855 return(_options.edge_annotations_file_arg);
00856 }
00857
00858
00859 void getNodeAnnotationsFiles(vector< string > &files) const;
00860 string getNodeAnnotationsFile(unsigned int index) const
00861 {
00862 return(_nodeAnnotationsFiles[index]);
00863 }
00864 unsigned int getNumNodeAnnotationsFiles() const
00865 {
00866 return(_nodeAnnotationsFiles.size());
00867 }
00868
00869
00871 string getNodeAliasFile() const
00872 {
00873 return(_options.node_alias_file_arg);
00874 }
00875
00876
00877 string getFunctionSubgraphsFile() const
00878 {
00879 return(functionSubgraphsFile);
00880 }
00881
00882 string getOBOFile(void) const
00883 {
00884 return(_oboFile);
00885 }
00886
00887
00889 string getTreatmentControlFile() const
00890 {
00891 return(_options.treatment_control_file_arg);
00892 }
00893
00894
00896 void readEdgeAnnotations(string name);
00897
00903 void readNodeAliases(string file);
00904
00925 void readNodeAnnotations(string name, set< string > *geneSet = NULL);
00926
00927 void readOBO(string name)
00928 {
00929 _goDAG.read(name);
00930
00931 _goDAG.computeFunctionDepths();
00932 _goDAGRead = true;
00933 }
00934
00935 bool readFunctionSubgraphs(string);
00936 bool readGEData(string);
00937 bool readNetwork(string, MyNT minimumEdgeWeight = 0);
00938
00939 vector< SetOfPPNodeStats > treatAliases(MyGraph &pathway, const GEData &geData);
00940
00943 bool allowNegativeEdgeWeight(set< string >& types) const
00944 {
00945 set< string >::const_iterator itr;
00946 for (itr = _negativeWeightEdgeTypes.begin(); itr != _negativeWeightEdgeTypes.end();
00947 itr++)
00948 if (types.end() != types.find(*itr))
00949 return(true);
00950 return(false);
00951 }
00952
00968
00969
00970
00971
01001 void computeDenseFunctions(ostream *fstr,
01002 map< string, MyGraph > &denseFunctionSubgraphs);
01003
01008 void computeDenseFunctions(ostream *fstr = NULL)
01009 {
01010 computeDenseFunctions(fstr, _denseFunctionSubgraphs);
01011 map< string, MyGraph >::iterator gitr;
01012 for (gitr = _denseFunctionSubgraphs.begin(); gitr != _denseFunctionSubgraphs.end();
01013 gitr++)
01014 #ifdef DEBUG
01015 {
01016 cout << "Function with dense subgraph is " << gitr->first
01017 << " with " << gitr->second.numNodes() << " nodes and "
01018 << gitr->second.numEdges() << " edges." << endl;
01019 gitr->second.printEdges(cout);
01020 #endif
01021 _allDenseFunctionSubgraphs.add(gitr->second);
01022 #ifdef DEBUG
01023 }
01024 cout << "Union of all functions with dense subgraphs has "
01025 << _allDenseFunctionSubgraphs.numNodes() << " nodes and "
01026 << _allDenseFunctionSubgraphs.numEdges() << " edges." << endl;
01027 _allDenseFunctionSubgraphs.printNodes(cout);
01028 _allDenseFunctionSubgraphs.printEdges(cout);
01029 #endif
01030 }
01031
01049 void computePathwayPerturbation();
01050
01065
01066 void computePathwayPerturbation(const map< string, set< string > > &treatmentControlPairs, map< string, MyGraph> &pathways);
01067
01087 void computePathwayPerturbation(MyGraph &pathway, const GEData &treatment,
01088 const GEData &control, string contrast, ActiveNetwork &subgraph);
01089
01092 void computePathwayPerturbation(MyGraph &pathway, const GEData &geData);
01093
01094
01095
01096
01097
01101 void plotExpressionValuesHistogram();
01102
01107 void plotExpressionCorrelationsHistogram();
01108
01112 void processNetworkLegos(SetOfActiveNetworks &anetSet);
01113
01114
01115 void readDirectories();
01116
01126 void removeUnresponsiveGenes(GEData &gedata);
01127
01142 bool computeCorrelations(MyGraph &wiring);
01143
01144
01145
01146
01147
01148 bool computeDenseSubgraphs(vector< ActiveNetwork > &denseSubgraphs, bool randomRun = false);
01149
01150
01152 void setWiringDiagramEdgeWeightsFS()
01153 {
01154 wiringDiagram.computeEdgeFSWeights();
01155 }
01156
01158 void setWiringDiagramEdgeWeightsCD()
01159 {
01160 wiringDiagram.computeEdgeCDWeights();
01161 }
01162
01172 void postProcess(SetOfActiveNetworks &anetSet);
01173
01176 void process();
01177
01185 void processComputedActiveNetworks();
01186
01187
01190 void processFunctionSubgraphs();
01191
01194 void processNetwork();
01195
01196
01197 void randomise(MyHistogram &histogram);
01198
01201 void separateDenseFunctions();
01202
01203
01204
01205 private:
01206
01207
01208 void _assignEdgeWeights(MyGraph &pathway, SetOfPPNodeStats &nodeStats,
01209 ostream &ostr);
01210
01211 public:
01214 void _openOutputFiles();
01215 void _closeOutputFiles();
01216
01217 private:
01218
01219 void _performSanityChecks();
01220
01221 void _processDenseFunctionSubgraphs(MyGraph &wiringDiagram, MyGraph &mst);
01222
01223 void _readTreatmentControlPairs(string file);
01224
01225 };
01226
01227 #endif