Biorithm  1.1
 All Classes Functions Variables Typedefs Friends
active-networks.h
00001 /**************************************************************************
00002  * Copyright (c) 2005-2011 T. M. Murali                                   *
00003  * Copyright (c) 2009-2011 Christopher L. Poirel                          *
00004  * Copyright (c) 2011 Christopher D. Lasher                               *
00005  * Copyright (c) 2010 Jacqueline Addesa                                   *
00006  * Copyright (c) 2003-2005 Deept Kumar                                    *
00007  *                                                                        *
00008  * This file is part of Biorithm.                                         *
00009  *                                                                        *
00010  * Biorithm is free software: you can redistribute it and/or modify       *
00011  * it under the terms of the GNU General Public License as published by   *
00012  * the Free Software Foundation, either version 3 of the License, or      *
00013  * (at your option) any later version.                                    *
00014  *                                                                        *
00015  * Biorithm is distributed in the hope that it will be useful,            *
00016  * but WITHOUT ANY WARRANTY; without even the implied warranty of         *
00017  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          *
00018  * GNU General Public License for more details.                           *
00019  *                                                                        *
00020  * You should have received a copy of the GNU General Public License      *
00021  * along with Biorithm.  If not, see <http://www.gnu.org/licenses/>.      *
00022  *                                                                        *
00023  **************************************************************************/
00024 
00032 #ifndef _ACTIVE_NETWORKS_H
00033 #define _ACTIVE_NETWORKS_H
00034 
00035 #ifdef HAVE_CONFIG_H
00036 #    include "config.h"
00037 #endif
00038 
00039 #ifdef CTEMPLATE
00040 
00041 #include "boost/multi_index_container.hpp"
00042 #include "boost/multi_index/member.hpp"
00043 #include <boost/multi_index/mem_fun.hpp>
00044 #include "boost/multi_index/ordered_index.hpp"
00045 #include <boost/multi_index/composite_key.hpp>
00046 
00047 using boost::multi_index_container;
00048 using namespace boost::multi_index;
00049 
00050 #include "google/template.h"
00051 using google::Template;
00052 using google::TemplateDictionary;
00053 using google::STRIP_WHITESPACE;
00054 #endif
00055 
00056 
00057 #include "old-annotations.h"
00058 #include "enrichment.h"
00059 #include "itemset.h"
00060 #include "graph.h"
00061 #include "enrichment-algorithm.h"
00062 
00063 // for MyPointSet (to store GE data).
00064 #include "point.h"
00065 
00066 typedef MyPointSet GEData;
00067 typedef MySetOfPointSets GEDataSets;
00068 
00069 //#include "NetMan.h"
00070 
00071 inline void _computeComplement2(vector< unsigned int > &binaryVector)
00072 {
00073   for (unsigned int i = 0; i < binaryVector.size(); i++)
00074     binaryVector[i] = 1 - binaryVector[i];
00075 }
00076 
00077 
00085 class ActiveNetwork : public MyGraph
00086 {
00087 private:
00088   vector< string > _conditions;
00089   // the records for edge functions that are enriched
00090   vector< EnrichmentRecord< string, string > > _edgeEnrichmentRecords;
00091   // convenience variable that stores for each edge, which functions
00092   // annotating that nodes are enriched in the ActiveNetwork. this
00093   // data is useful in _print_greg_nodes().
00094   map< string , vector< string > > _enrichedFunctionsByEdge;
00095 
00096   // the most specific enriched functions (for GO functions).
00097   set< string > _mostSpecificEnrichedFunctions;
00098 
00099   // the records for node functions that are enriched
00100   vector< EnrichmentRecord< string, string > > _nodeEnrichmentRecords;
00101   // convenience variable that stores for each node, which functions
00102   // annotating that nodes are enriched in the ActiveNetwork. this
00103   // data is useful in _print_greg_nodes().
00104   map< MyNodeId , vector< string > > _enrichedFunctionsByNode;
00105 
00106   // the first string is the type of
00107 //  map< MyFunctionCategoryType, vector< EnrichmentRecord< MyNodeId, string > > enrichments;
00108 
00109   MyNT _pvalue;
00110 
00111   // file containing layout image.
00112   string _wwwLayoutFile;
00113   // file containing client-side image map for layout image.
00114   string _wwwMapFile;
00115 
00116 public:
00117   ActiveNetwork()
00118       : _conditions(), _edgeEnrichmentRecords(), _enrichedFunctionsByEdge(),
00119         _mostSpecificEnrichedFunctions(),
00120         _nodeEnrichmentRecords(), _enrichedFunctionsByNode(), _pvalue(-1),
00121         _wwwLayoutFile(), _wwwMapFile()
00122     {}
00123 
00124   ActiveNetwork(const MyGraph &graph)
00125       : MyGraph(graph), _conditions(), _edgeEnrichmentRecords(), _enrichedFunctionsByEdge(),
00126         _mostSpecificEnrichedFunctions(),
00127         _nodeEnrichmentRecords(), _enrichedFunctionsByNode(), _pvalue(-1),
00128         _wwwLayoutFile(), _wwwMapFile()
00129     {}
00130 
00131   ActiveNetwork(const Itemset &itemset, unsigned int index);
00132 
00133   void addCondition(string cond)
00134     {
00135       _conditions.push_back(cond);
00136     }
00137   void getConditions(vector< string > &conds) const
00138     {
00139       conds = _conditions;
00140     }
00141   unsigned int numConditions() const
00142     {
00143 
00144       return(_conditions.size());
00145     }
00146 
00148   void setPvalue(MyNT val)
00149     {
00150       _pvalue = val;
00151     }
00152 
00153   MyNT getPvalue() const
00154     {
00155       return(_pvalue);
00156     }
00157 
00158 
00163   void clearEdgeEnrichments();
00164 
00180   void computeEdgeEnrichments(const MyAnnotations& annotations);
00181 
00192   void computeEdgeEnrichments(map< string, Enrichment< string, string > >&annotations);
00193 
00194 
00199   void clearNodeEnrichments();
00200 
00201 
00213   void computeNodeEnrichments(map< string, Enrichment< string, string > >&annotations);
00214 
00223   void computeNodeWeights(const GEData &gedata);
00224 
00225 
00226 #ifdef CTEMPLATE
00227 
00228 
00229 
00230 
00231 
00232 
00233 
00234 
00235 
00236 
00237 
00238 
00239 
00240 
00241 
00242 
00243 
00244 
00245 
00246 
00247 
00248 
00249 
00250   void layout(string layoutDirectory, string wwwParamsDirectory,
00251               string nodeUrl = "",
00252               const map< string, set< string > > *nodeAliases = NULL,
00253               MyGraph *superGraph = NULL,
00254               const map< string, set< string > > *layoutConstraints = NULL
00255               );
00256 #endif
00257 
00258 
00306 #ifdef CTEMPLATE
00307   void layoutNodeAnnotation(string layoutDirectory, string wwwParamsDirectory,
00308                             const MyAnnotations &annotations,
00309                             string nodeAnnotationToLayout,
00310                             string nodeUrl = "",
00311                             const map< string, set< string > > *nodeAliases = NULL,
00312                             MyGraph *superGraph = NULL,
00313                             const set< ActiveNetwork* > *otherActiveNetworks = NULL,
00314                             // const set< string > *nodeAnnotationsToLayout
00315                             // const GEData *geData = NULL
00316                             const GEDataSets *geDataSets = NULL,
00317                             TemplateDictionary *efDict = NULL
00318                             );
00319 #endif
00320 
00324   void printConditions(ostream &ostr) const;
00325 
00326 
00330   void printEdgesItemset(ostream &itemsetStream, string id);
00331 
00332 
00333 
00342   void printEdgeEnrichments(//const MyAnnotations& annotations,
00343     ostream &fstr);
00344 
00353   void printEdgeEnrichedFunctions(ostream &ostr);
00354 
00355 
00356 
00367   void printNodeEnrichments(//const MyAnnotations& annotations,
00368     ostream &fstr, const GeneOntology *goDag = NULL);
00369 
00370 
00373   void printItemset(ostream &ostr) const;
00374 
00375 
00384   void printNodeEnrichedFunctions(ostream &ostr);
00385 
00403   void printStatistics(ostream &ostr);
00404 
00419   void printStatisticsComponents(ostream &ostr);
00420 
00421 
00422 #ifdef CTEMPLATE
00423 
00444   string printWWW(string wwwDirectory, string wwwParamsDirectory, string nodeURL,
00445                 const map< string, set< string > > *nodeAliases);
00446 #endif
00447 
00448 
00479   void expand(const GEData &expression, MyNT correlationThreshold, bool trivialExpand = true);
00480 
00488   void expand(ActiveNetwork &expanded, const GEData &expression, MyNT correlationThreshold)
00489     {
00490       expanded = *this;
00491       expanded.expand(expression, correlationThreshold);
00492     }
00493 
00511   void read(string anetString);
00512 
00513 
00515   void setEnrichedFunctionsByEdge(map< string, vector< string > > &efeMap)
00516     {
00517       _enrichedFunctionsByEdge = efeMap;
00518     }
00519 
00521   void setPerNodeEnrichedFunctions(map< MyNodeId, vector< string > > &efnMap)
00522     {
00523       _enrichedFunctionsByNode = efnMap;
00524     }
00525 
00526 
00528   void setNodeEnrichedFunctions(vector< EnrichmentRecord< string, string > > &efMap)
00529     {
00530       _nodeEnrichmentRecords = efMap;
00531     }
00532 
00533 
00539   void updateNodeEnrichmentsMap(map< string, map< string, MyNT > > &functionsToANs,
00540                                 const GeneOntology *goDag = NULL);
00541 
00542 };
00543 
00549 struct NetworkLegoInActiveNetwork
00550 {
00551 public:
00552   // these need to be public for .
00553   string _nlId;
00554   string _anId;
00555   MyNT _weight;
00556 
00557 public:
00558   NetworkLegoInActiveNetwork(string nl, string an, MyNT w)
00559       : _nlId(nl), _anId(an), _weight(w)
00560     {}
00561 
00567   bool operator<(const NetworkLegoInActiveNetwork &other) const
00568     {
00569       if (_nlId != other._nlId)
00570         return(_nlId < other._nlId);
00571       else if (_anId != other._anId)
00572         return(_anId < other._anId);
00573       else
00574         return(_weight < other._weight);
00575     }
00576 
00577 
00578 };
00579 
00580 struct nlid_anid_index {};
00581 struct nlid_index {};
00582 struct anid_index {};
00583 
00590 typedef multi_index_container<
00591   NetworkLegoInActiveNetwork,
00592   indexed_by<
00593 
00594     // define multi-column primary key. There can be only one instance
00595     // of any (network lego, active network) pair.
00596     ordered_unique<
00597     tag< nlid_anid_index >,
00598     composite_key<
00599       NetworkLegoInActiveNetwork,
00600       member< NetworkLegoInActiveNetwork, string, &NetworkLegoInActiveNetwork::_nlId>,
00601       member< NetworkLegoInActiveNetwork, string, &NetworkLegoInActiveNetwork::_anId>
00602       >
00603     >,
00604 
00605     // sort by less<string> on _nlId
00606     ordered_non_unique< tag< nlid_index >,
00607                         member< NetworkLegoInActiveNetwork, string,
00608                                 &NetworkLegoInActiveNetwork::_nlId > >,
00609 
00610     // sort on _anId
00611     ordered_non_unique< tag< anid_index >,
00612                         member< NetworkLegoInActiveNetwork, string,
00613                                 &NetworkLegoInActiveNetwork::_anId > >
00614     > // matches indexed_by
00615   > NetworkLegosInActiveNetworksMapping;
00616 
00617 
00618 
00619 // forward declaration.
00620 class SetOfNetworkLegos;
00621 
00622 
00630 class SetOfActiveNetworks
00631 {
00632   // WHAT AN UGLY HACK! without this,
00633   // SetOfNetworkLegos::computeStability() does not compile eventhough
00634   // SetOfNetworkLegos is a sub-class of SetOfActiveNetworks.
00635   friend class SetOfNetworkLegos;
00636 
00637 protected:
00638 
00639   vector< ActiveNetwork > _anets;
00640   // for each active network id, store which index of _anets contains
00641   // that active network.
00642   map< string, unsigned int > _anetIdToIndex;
00643   ActiveNetwork _combinedAnet;
00644 
00645   map< MyEdge, MyNT > _edgeCentralities;
00646   map< MyNodeId, MyNT > _nodeCentralities;
00647   map< MyNodeId, vector< MyGraph * > > _nodeCounts;
00648 
00649 public:
00650 
00651   SetOfActiveNetworks()
00652       : _anets(), _anetIdToIndex(), _combinedAnet(),
00653         _edgeCentralities(), _nodeCentralities(),
00654         _nodeCounts()
00655     {}
00656 
00657   virtual ~SetOfActiveNetworks()
00658     {}
00659 
00663   void add(ActiveNetwork &anet)
00664     {
00665 //      _anets.insert(anet);
00666       _anets.push_back(anet);
00667       // update _anetIdToIndex.
00668       _anetIdToIndex[anet.getName()] = _anets.size() - 1;
00669     }
00670 
00673   void add(vector< ActiveNetwork >& nets)
00674     {
00675       unsigned int size = _anets.size();
00676       _anets.insert(_anets.end(), nets.begin(), nets.end());
00677       // update _anetIdToIndex.
00678       for (unsigned int i = size; i < _anets.size(); i++)
00679         _anetIdToIndex[_anets[i].getName()] = i;
00680     }
00681 
00689   void add(Itemset &itemset, unsigned int index)
00690     {
00691       _anets.push_back(ActiveNetwork(itemset, index));
00692       // update _anetIdToIndex.
00693       _anetIdToIndex[_anets.back().getName()] = _anets.size() - 1;
00694     }
00695 
00698   void add(vector< Itemset > &itemsets)
00699     {
00700       unsigned int index = 1;
00701       vector< Itemset >::iterator iitr;
00702       for (iitr = itemsets.begin(); iitr != itemsets.end(); iitr++)
00703         {
00704           _anets.push_back(ActiveNetwork(*iitr, index++));
00705           // update _anetIdToIndex.
00706           _anetIdToIndex[_anets.back().getName()] = _anets.size() - 1;
00707         }
00708     }
00709 
00712   void addCondition(string condition)
00713     {
00714       vector< ActiveNetwork >::iterator aitr;
00715       for (aitr = _anets.begin(); aitr != _anets.end(); aitr++)
00716         aitr->addCondition(condition);
00717     }
00727 
00728   virtual void compareNetworkLegosToActiveNetworks()
00729     {
00730       cerr << "SetOfActiveNetworks::compareNetworkLegosToActiveNetworks() should never be invoked. Hit the maintainer on the head until he/she fixes this awful bug." << endl;
00731       exit(-1);
00732     }
00733 
00764   void computeBinaryMatrix(vector< vector< unsigned int > > &binaryMatrix,
00765                            map< unsigned int, string > &rowNames,
00766                            map< unsigned int, string > &columnNames,
00767                            bool addComplements = false,
00768                            set< unsigned int > *complementRows = NULL);
00769 
00772   void computeEdgeBetweenessCentrality();
00773 
00778   void clearEdgeEnrichments();
00779 
00787   void computeEdgeEnrichments(const MyAnnotations &annotations);
00788 
00794   void computeEdgeSimilarityGraph(MyGraph &graph) const;
00795 
00796 
00801   void clearNodeEnrichments();
00802 
00803 
00809   void computeNodeWeights(const GEData &gedata);
00810 
00818   MyNT computeMostSimilarEdges(const MyGraph & other, unsigned int &bestIndex) const;
00819 
00827   MyNT computeMostSimilarNodes(const MyGraph & other, unsigned int &bestIndex) const;
00828 
00829 
00832 //  void computeNetworkLegos();
00833 
00834 
00837   void computeNodeBetweenessCentrality();
00838 
00841   void computeNodeCounts();
00842 
00848   void computeNodeSimilarityGraph(MyGraph &graph) const;
00849 
00855   void computePvalues(const MyHistogram &histogram);
00856 
00857 
00871   void computeSNetwork();
00872 
00873 
00880   void computeUnion(string name, ActiveNetwork &unionActiveNetwork)
00881     {
00882       computeUnion(name);
00883       unionActiveNetwork = _combinedAnet;
00884     }
00890   void computeUnion(string name);
00891 
00894   void expand(const GEData &expression, string id,  MyNT correlationThreshold);
00895 
00896 
00902   vector< ActiveNetwork > getActiveNetworks()
00903     {
00904       return _anets;
00905     }
00906 
00907 
00916   void getUnion(ActiveNetwork &unionActiveNetwork)
00917     {
00918       unionActiveNetwork = _combinedAnet;
00919     }
00920 
00922   unsigned int getNumActiveNetworks() const
00923     {
00924       return(_anets.size());
00925     }
00926 
00927 #ifdef CTEMPLATE
00928 
00929 
00930 
00931 
00932 
00933 
00934 
00935 
00936 
00937 
00938 
00939 
00940 
00941 
00942 
00943 
00944 
00945 
00946 
00947 
00948 
00949 
00950 
00951 
00952 
00953 
00954 
00955 
00956 
00957   void layout(string directory, string wwwParamsDir,
00958               const MyAnnotations &annotations,
00959               const GeneOntology &go,
00960               string nodeUrl = "",
00961               const map< string, set< string > > *nodeAliases = NULL,
00962               MyGraph *superGraph = NULL,
00963               const map< string, set< string > > *layoutConstraints = NULL,
00964               const GEDataSets *geDataSets = NULL,
00965               const map< string, set< string > > *conditionGroups = NULL
00966               // const vector< GEData > *geData = NULL,
00967               // const map< string, unsigned int > *classToIndex = NULL
00968               );
00969 #endif
00970 
00971 
00972   void printConditions(ostream &conditionStream);
00973   void printEdges(ostream &edgeStream);
00974   void printEdgeBetweenessCentrality(ostream &edgeCentralityStream);
00975   void printEdgesItemset(ostream &itemsetStream, string id);
00976 
00977 
00978   void printEdgeTypeCounts(ostream &edgeTypesCountStream);
00979 
00980   void printEdgeEnrichments(ostream &edgeEnrichmentStream,
00981                             ostream &edgeEnrichedFunctionStream);
00982 
00989   void printEdgeWeightsHistograms(ostream &edgeWeightsHistogramStream, string fileName);
00990 
00991 
00993   void printNodeEnrichments(ostream &nodeEnrichmentStream,
00994                             ostream &nodeEnrichedFunctionStream,
00995                             GeneOntology *go);
00996 
00998   void processNodeEnrichments(ostream &processNodeEnrichmentsStream, const GeneOntology *go);
00999 
01000 
01001   void printNodes(ostream &nodeStream);
01002   void printNodeBetweenessCentrality(ostream &nodeCentralityStream);
01003 
01010   void printNodeWeightsHistograms(ostream &nodeWeightsHistogramStream, string fileName);
01011 
01012   void printStatistics(ostream &statStream);
01013 
01014 #ifdef CTEMPLATE
01015 
01016   void printWWW(string directory, string wwwParamsDir,
01017                 string nodeUrl = "",
01018                 const map< string, set< string > > *nodeAliases = NULL);
01019 #endif
01020 
01026   void read(const vector< string > &activeNetworkFiles);
01027 
01034   void read(string activeNetworkFile);
01035 
01043   void readEdges(string edgesFile);
01044 
01054   virtual void readMappingFromNetworkLegosToActiveNetworks(string nlsToRNsFile)
01055     {
01056       cerr << "SetOfActiveNetworks::readMappingFromNetworkLegosToActiveNetworks() should never be invoked. Ask the maintainer to fix this awful bug." << endl;
01057       exit(-1);
01058     }
01059 
01060 
01065   void readNodeEnrichedFunctions(string nodeEnrichedFunctionsFile);
01066 
01071   void readNodeEnrichments(string nodeEnrichmentsFile);
01072 
01086   void readNodes(string nodesFile, MyGraph &superGraph);
01087 };
01088 
01089 
01090 
01091 #endif // _ACTIVE_NETWORKS_H
 All Classes Functions Variables Typedefs Friends