Biorithm  1.1
 All Classes Functions Variables Typedefs Friends
graph.h
00001 /**************************************************************************
00002  * Copyright (c) 2002-2011 T. M. Murali                                   *
00003  * Copyright (c) 2007-2011 Naveed Massjouni                               *
00004  * Copyright (c) 2009-2011 Christopher L. Poirel                          *
00005  * Copyright (c) 2010-2011 Ahsanur Rahman                                 *
00006  * Copyright (c) 2011 David Badger                                        *
00007  * Copyright (c) 2005 Shivaram Narayanan                                  *
00008  *                                                                        *
00009  * This file is part of Biorithm.                                         *
00010  *                                                                        *
00011  * Biorithm is free software: you can redistribute it and/or modify       *
00012  * it under the terms of the GNU General Public License as published by   *
00013  * the Free Software Foundation, either version 3 of the License, or      *
00014  * (at your option) any later version.                                    *
00015  *                                                                        *
00016  * Biorithm is distributed in the hope that it will be useful,            *
00017  * but WITHOUT ANY WARRANTY; without even the implied warranty of         *
00018  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          *
00019  * GNU General Public License for more details.                           *
00020  *                                                                        *
00021  * You should have received a copy of the GNU General Public License      *
00022  * along with Biorithm.  If not, see <http://www.gnu.org/licenses/>.      *
00023  *                                                                        *
00024  **************************************************************************/
00025 
00033 #ifndef _GRAPH_H
00034 #define _GRAPH_H
00035 
00036 #include <algorithm>
00037 #include <iostream>
00038 #include <list>
00039 #include <map>
00040 #include <queue>
00041 #include <set>
00042 #include <string>
00043 #include <stack>
00044 #include <vector>
00045 
00046 #include <math.h>
00047 
00048 // for converting integer to string
00049 #include <sstream>
00050 
00051 #ifdef HAVE_GLPK
00052 extern "C"
00053 {
00054 #include <glpk.h>
00055 }
00056 #endif
00057 
00058 #include "graph-global.h"
00059 #include "histogram.h"
00060 #include "myiterator.h"
00061 #include "setTemplates.C"
00062 //#include "generic.h"
00063 //using namespace myNamespace;
00064 //#include "reporter.h"
00065 
00066 using namespace std;
00067 
00068 // this string is a separator between node ids and types in the string
00069 // representing an edge. this string is used in MyEdge::getEdge() and
00070 // in _getEdge() in active-networks.C
00071 //
00072 // "_" is a bad idea for a separator because
00073 // sometimes node names may have "_" in them. in that case, parsing
00074 // the node names and types from edge string becomes problematic.
00075 const string EDGE_STRING_SEPARATOR = "~@@~";
00076 
00077 // a small value to use in MyNode::updateTotalEdgeWeight(). 
00078 const MyNT EDGE_WEIGHT_EPSILON = 1E-05;
00079 
00081 struct negativeLogTransform : public unary_function< MyNT, MyNT > 
00082 {
00083   MyNT operator()(const MyNT __x) const
00084     {
00085       return(-log(__x));
00086     }
00087   
00088 };
00089 
00091 struct exponentNegativeTransform : public unary_function< MyNT, MyNT > 
00092 {
00093   MyNT operator()(const MyNT __x) const
00094     {
00095       return(exp(- __x));
00096     }
00097 };
00098 
00100 struct oneMinusTransform : public unary_function< MyNT, MyNT > 
00101 {
00102   MyNT operator()(const MyNT __x) const
00103     {
00104       return(1 - __x);
00105     }
00106 };
00107 
00109 struct oneMinusExponentNegativeTransform : public unary_function< MyNT, MyNT > 
00110 {
00111   MyNT operator()(const MyNT __x) const
00112     {
00113       return(1 - exp(- __x));
00114     }
00115 };
00116 
00117 
00118 
00119 // typedef int MyNodeId;
00120 typedef string MyNodeId;
00121 typedef vector< string > MyNodeIdList;
00122 typedef set< string > MyNodeIdSet;
00123 
00124 // forward declaration
00125 class MyGraph;
00126 class MyEdge;
00127 
00128 // i cannot use MyInfo since that class is defined in libexpression.
00129 class MyGraphInfo
00130 {
00131 public:
00132   // murali, sep 12, 2007. incredibly, i did not have a constructor
00133   // (or a destructor) for this class. weight got assigned weird
00134   // values sometimes.
00135   MyGraphInfo()
00136       :  _evidence(),  _hidden(false), _weight(0), _types()
00137     {}
00138   virtual ~MyGraphInfo()
00139     {}
00140 
00141   void clear()
00142     {
00143       _evidence.clear();
00144     }
00145   void setWeight(MyNT w)
00146     {
00147       _weight = w;
00148     }
00149   MyNT getWeight() const
00150     {
00151       return(_weight);
00152     }
00153 
00154   // functions related to edge types.
00155   bool hasType(string type)
00156     {
00157       return(_types.end() != _types.find(type));
00158     }
00159   
00160   void addType(string type)
00161     {
00162       _types.insert(type);
00163     }
00164   void addTypes(const set< string > &types)
00165     {
00166       _types = types;
00167     }
00168   void getTypes(set< string > &types) const
00169     {
00170       types = _types;
00171     }
00172 private:
00173   // source of evidence for this interaction.
00174   vector< string > _evidence;
00175   // is this edge hidden or not.
00176   bool _hidden;
00177   // weight of the edge.
00178   MyNT _weight;
00179   // edge types. 
00180   set< string > _types;
00181 };
00182 
00183 // function objects for combining weights.
00184 struct combineAND : public binary_function< MyNT, MyNT, MyNT> 
00185 {
00186   MyNT operator()(const MyNT __x, const MyNT __y) const
00187     {
00188       return(__x*__y);
00189     }
00190 };
00191 
00192 
00193 // assumes but does not check that bith arguments are between 0 and 1.
00194 struct combineOR : public binary_function< MyNT, MyNT, MyNT> 
00195 {
00196   MyNT operator()(const MyNT __x, const MyNT __y) const
00197     {
00198       return(1 - (1 -__x)*(1 - __y));
00199     }
00200 };
00201 
00202 typedef MyGraphInfo MyNodeInfo;
00203 
00204 class MyNode
00205 {
00206 public:
00207   // this typedef is just to make sure i define MyEdgeList exactly as
00208   // in MyGraph. i do not create any variables of this type in MyNode.
00209   typedef list< MyEdge > MyEdgeList;
00210   typedef MyEdgeList::iterator MyEdgePtr;
00211   typedef MyEdgeList::const_iterator MyConstEdgePtr;
00212   
00213   // nested class
00214   template< typename Object, typename ObjectPtr > class EdgeObjectIterator;
00215   
00216   typedef  EdgeObjectIterator< MyEdgePtr, map< MyNodeId, MyEdgePtr >::iterator > MyEdgeIterator;
00217   typedef  EdgeObjectIterator< MyConstEdgePtr, map< MyNodeId, MyConstEdgePtr >::iterator > MyConstEdgeIterator;
00218 
00219 //  class MyEdgeIterator;
00220 
00221   typedef MyEdgeIterator EdgeIterator;
00222   typedef MyConstEdgeIterator ConstEdgeIterator;
00223   
00224   
00225 private:
00226   MyNodeId _id;
00227   MyNodeInfo _nodeInfo;
00228   
00229   int _index;
00230   // the set of functions associated with a node.
00231   vector< string > _functions;
00232   bool _hidden;
00233 
00234   // weight of the node.
00235   MyNT _weight;
00236 
00237   // _edges is a (hash)map< MyNodeId, iterator >, where the iterator
00238   // points to the global list of edges in the graph. iterator is like
00239   // "position" in goodrich+tamassia. the list of edges in MyGraph is
00240   // a list< MyEdge > so that iterators do not get invalidated by edge
00241   // deletion.
00242 //   set< MyNodeId > _edges;
00243 //   MyEdgeList _edgeList;
00244   map< MyNodeId, MyEdgePtr > _edges;
00245 
00246 
00247   
00248 //  MyNodeEdgeWeights _edgeWeights;
00249   MyNT _totalEdgeWeight;
00250   // edge weights may be negative. this variable stores the sum of the
00251   // absolute values of these weights.
00252   MyNT _totalAbsoluteEdgeWeight;
00253   unsigned int _numHiddenEdges;
00254   
00255   map< string, bool > _status;
00256 
00257 
00258   // the graph the node belongs to.
00259 //  const MyGraph *_graph;
00260 
00261 
00262 private:
00263   // private methods.
00264 
00265   
00266 public:
00267   
00268 template< typename Object, typename ObjectPtr >
00269 class EdgeObjectIterator: public MyIterator< Object >
00270 {
00271 private:
00272   ObjectPtr start, end, current;
00273 
00274 public:
00275   EdgeObjectIterator()
00276   {}
00277   EdgeObjectIterator(ObjectPtr _start, ObjectPtr _end)
00278   {
00279     start = _start;
00280     current = _start;
00281     end = _end;
00282   }
00283     
00284   virtual bool hasNext()
00285   {
00286     return(current != end);
00287   }
00288   Object& next()
00289   {
00290     Object& _next = (*current).second;
00291     current++;
00292     return(_next);
00293   }
00294 };
00295 
00296 public:
00297   
00298   MyNode()
00299       : _id(), _nodeInfo(),_index(), _functions(), _hidden(false), _weight(0),
00300         _edges(), _totalEdgeWeight(0), _totalAbsoluteEdgeWeight(0),
00301       _numHiddenEdges(0),
00302     _status()
00303     {}
00304 
00305   MyNode(MyNodeId name)
00306       : _id(name), _nodeInfo(),_index(), _functions(), _hidden(false),  _weight(0),
00307         _edges(), _totalEdgeWeight(0), _totalAbsoluteEdgeWeight(0),
00308         _numHiddenEdges(0),
00309         _status()
00310     {}
00311 
00312   MyNode(const MyNode &copy)
00313       : _id(copy._id), _nodeInfo(copy._nodeInfo),
00314         _index(copy._index), _functions(copy._functions),
00315         _hidden(copy._hidden), _weight(copy._weight),  
00316         _edges(copy._edges), _totalEdgeWeight(copy._totalEdgeWeight),
00317         _totalAbsoluteEdgeWeight(copy._totalAbsoluteEdgeWeight),
00318         _numHiddenEdges(copy._numHiddenEdges), _status(copy._status)
00319     {}
00320 
00321   // copy certain aspects of copy into *this.
00322   void copy(const MyNode &copy);
00323   
00324   bool operator==(const MyNode &rhs)
00325     {
00326       return(_id == rhs._id);
00327     }
00328   
00329   const MyNode &operator=(const MyNode &rhs)
00330     {
00331 #ifdef DEBUG
00332 //       cout << "\tIn MyNode::operator= for node "
00333 //            << rhs.getId() << "!" << endl;
00334 #endif
00335       if (this != &rhs)
00336         {
00337           clear();
00338           _id = rhs._id;
00339           _nodeInfo = rhs._nodeInfo;
00340           _index = rhs._index;
00341           _functions = rhs._functions;
00342           _hidden = rhs._hidden;
00343           _weight = rhs._weight; 
00344 //          _edges.clear();
00345           _edges = rhs._edges; 
00346           _numHiddenEdges = rhs._numHiddenEdges;
00347 //          _status.clear();
00348           _status = rhs._status;
00349           
00350         }
00351       return(*this);
00352     }
00353   
00354   virtual ~MyNode()
00355     {}
00356 
00357 // free all the memory used by the node.
00358   virtual void clear()
00359   {
00360     _nodeInfo.clear();
00361     _functions.clear();
00362     _edges.clear();
00363     _status.clear();
00364   }
00365 
00366   // functions related to node types.
00367   bool hasType(string type)
00368     {
00369       return(_nodeInfo.hasType(type));
00370     }
00371   
00372   void addType(string type)
00373     {
00374       _nodeInfo.addType(type);
00375     }
00376   void addTypes(set< string > types)
00377     {
00378       _nodeInfo.addTypes(types);
00379     }
00380   void getTypes(set< string > &types) const
00381     {
00382       _nodeInfo.getTypes(types);
00383     }
00384   set< string > getTypes() const
00385     {
00386       set< string > types;
00387       _nodeInfo.getTypes(types);
00388       return(types);
00389     }
00390 
00391   MyNodeId getId() const
00392     {
00393       return(_id);
00394     }
00395 
00396   void setStatus(string statusType)
00397     {
00398       _status[statusType] = true;
00399     }
00400   void clearStatus(string statusType)
00401     {
00402       _status[statusType] = false;
00403     }
00404   bool getStatus(string statusType) //const
00405     {
00406 #ifdef DEBUG
00407       // check if statusType exists.
00408       if (_status.end() == _status.find(statusType))
00409         {
00410           cerr << "\tnode " << getId() << " has no status flag called "
00411                << statusType << endl;
00412           // set the statusType to false since that is the default.
00413           _status[statusType] = false;
00414         }
00415       
00416 #endif      
00417       return(_status[statusType]);
00418     }
00419   
00420   bool addFunction(string fn)
00421     {
00422       // add a function only if i have not seen it before.
00423       if (_functions.end() == find(_functions.begin(), _functions.end(), fn))
00424         _functions.push_back(fn);
00425       return(true);
00426     }
00427   
00428   void functions(vector <string> &fns) const
00429     {
00430       fns = _functions;
00431     }
00432 
00433   // functions to get and set hidden status.
00434   bool isHidden() const
00435     {
00436       return(_hidden);
00437     }
00438   
00439   bool isVisible() const
00440     {
00441       return(!_hidden);
00442     }
00443   
00444   void hide()
00445     {
00446       _hidden = true;
00447     }
00448 
00449   void unhide()
00450     {
00451       _hidden = false;
00452     }
00453 
00454   MyNT getWeight() const
00455   {
00456     return(_weight);
00457   }
00458   void setWeight(MyNT wt)
00459   {
00460     _weight = wt;
00461   }
00462 
00463   virtual MyEdgeIterator incidentEdges()
00464   {
00465     MyEdgeIterator itr(_edges.begin(), _edges.end());
00466     return(itr);
00467   }
00468 
00469 //   virtual MyConstEdgeIterator incidentEdges() const
00470 //     {
00471 //       MyConstEdgeIterator itr(_edges.begin(), _edges.end());
00472 //       return(itr);
00473 //     }
00474 
00476   unsigned int degree() const
00477     {
00478       return(_edges.size() - numHiddenEdges());
00479     }
00480 
00483   MyNT getWeightedDegree() const
00484     {
00485       return(_totalAbsoluteEdgeWeight);
00486 //      return(_totalEdgeWeight);
00487     }
00488 
00489   // murali, sep 12, 2007.
00490   // Subtract oldWeight and add newWeight to the total edge weight of the node.
00491   //
00492   // TODO: It is ugly to make this function public. How do I make it private?
00493   void updateTotalEdgeWeight(MyNT oldWeight, MyNT newWeight)
00494     {
00495 #ifdef DEBUG
00496       MyNT _old, _oldAbs;
00497       _old = _totalEdgeWeight;
00498       _oldAbs = _totalAbsoluteEdgeWeight;
00499 #endif      
00500       _totalEdgeWeight += -oldWeight + newWeight;
00501       _totalAbsoluteEdgeWeight += -fabs(oldWeight) + fabs(newWeight);
00502 #ifdef DEBUG
00503       if (_totalAbsoluteEdgeWeight < 0 - EDGE_WEIGHT_EPSILON)
00504         cerr << "MyEdge::updateTotalEdgeWeight(): total absolute edge weight = "
00505              << _totalAbsoluteEdgeWeight << " is < " << 0 - EDGE_WEIGHT_EPSILON
00506              << ", old weight = " << oldWeight
00507              << ", new weight = " << newWeight << "." << endl;
00508 #endif      
00509     }
00510 
00511   unsigned int numHiddenEdges() const
00512     {
00513       return(_numHiddenEdges);
00514     }
00515   
00516   // is otherNode adjacent to *this? true only if otherNode is in
00517   // adjacency list and both nodes are visible. but i can't check if
00518   // the other node is visible so i am going to drop that check.
00519   //
00520   // TODO: BUGBUGBUG!
00521   bool isAdjacent(MyNodeId otherNode) const
00522     {
00523       return((_edges.end() != _edges.find(otherNode)) && isVisible());
00524     }
00525   
00526   bool isAdjacent(const MyNode &otherNode) const
00527     {
00528       return((_edges.end() != _edges.find(otherNode.getId())) && isVisible());
00529     }
00530 
00531 //   bool getEdge(MyNodeId otherNode, MyEdge &edge);
00532 //   bool getEdgePtr(MyNodeId otherNode, MyEdgePtr &ptr);
00533 
00537   MyEdge *getEdge(MyNodeId otherNode) const;
00538   bool getEdgePtr(MyNodeId otherNode, MyEdgePtr &ptr);
00539       
00540   
00541   // no default value for weight since i want only MyGraph::addEdge()
00542   // to contain a default value.
00543   bool addEdge(MyNodeId otherNode, MyEdgePtr edgePtr);
00544   MyEdgePtr deleteEdge(MyNodeId otherNode);
00545 
00546 
00547   // functions to get and set hidden status of an edge.
00548 
00549   // hide an edge. currently, this function only increments
00550   // _numHiddenEdges for *this if *this is not already hidden. a
00551   // complete implementation would store which edge is hidden.
00552   void hideEdge(MyNodeId other)
00553     {
00554       if (isVisible())
00555         _numHiddenEdges++;
00556     }
00557   // make an edge visible. if the node is visible, decrement
00558   // _numHiddenEdges.
00559   void unhideEdge(MyNodeId other)
00560     {
00561       if (isVisible())
00562         _numHiddenEdges--;
00563     }
00564 
00572   unsigned int computeCommonNeighbours(const MyNode &other, set< MyNodeId > &common);
00573   
00574 private:
00575   int _get_index() const
00576     {
00577       return(_index);
00578     }
00579   void _set_index(int i)
00580     {
00581       _index = i;
00582     }
00583 
00584 };
00585 
00586 
00587 typedef MyGraphInfo MyEdgeInfo;
00588 
00589 class MyEdge
00590 {
00591 private:
00592   // can't have these as references. if i do, inserting a MyEdge into
00593   // an STL vector causes problems.
00594   MyNode *node1;
00595   MyNode *node2;
00596   MyEdgeInfo _edgeInfo;
00597   map< string, bool > _status;
00598   
00599 public:
00600   // i have a MyNode() constructor. is there a harm in a MyEdge() constructor?
00601   MyEdge()
00602       : node1(), node2()
00603     {}
00604   MyEdge(MyNode &n1, MyNode &n2)
00605       : node1(&n1), node2(&n2), _edgeInfo(), _status()
00606     {}
00607 
00608   MyEdge(const MyEdge &other)
00609       : node1(other.node1), node2(other.node2), _edgeInfo(other._edgeInfo),
00610         _status(other._status)
00611     {}
00612   const MyEdge& operator=(const MyEdge& rhs)
00613     {
00614       if (this != &rhs)
00615         {
00616           node1 = rhs.node1;
00617           node2 = rhs.node2;
00618           _edgeInfo.clear();
00619           _edgeInfo = rhs._edgeInfo;
00620           _status.clear();
00621           _status = rhs._status;
00622         }
00623       return(*this);
00624     }
00625 
00626   //
00627   // Defined operator '<' for MyEdge for map of edges. 
00628   //
00629   bool operator<(const MyEdge& rhs) const
00630     {
00631       MyNode *n1 = rhs.node1;
00632       MyNode *n2 = (*this).node1;
00633       MyNode *n3 = rhs.node2;
00634       MyNode *n4 = (*this).node2;
00635       return (((*n1).getId() > (*n2).getId())||(((*n1).getId()==(*n2).getId())&&((*n3).getId() > (*n4).getId())));
00636     } 
00637   // return true iff edge is incident on node.
00638   bool incident(string node) const
00639     {
00640       return((node1->getId() == node) || (node2->getId() == node));
00641     }
00642   
00643   // return true iff edge connects n1 and n2.
00644   bool incident(string n1, string n2)
00645     {
00646       return(incident(n1) && incident(n2));
00647     }
00648 
00649   // return the "head" (node1) of the edge.
00650   MyNode &head() const
00651     {
00652       return(*node1);
00653     }
00654   
00655   // return the "tail" (node2) of the edge.
00656   MyNode &tail() const
00657     {
00658       return(*node2);
00659     }
00660   
00661   // return the node incident on *this that is opposite to "node."
00662   MyNode *opposite(const MyNode &node) const
00663     {
00664       // BUG? when i copy a graph, i copy a node's _edgeList but the
00665       // node1 and node2 pointers in each MyEdge in the _edgeList
00666       // still point to the original graph. when i call opposite for
00667       // an edge in the copy, node points to a node in the copy but
00668       // node1 and node2 to a node in the original!
00669       
00670       if (node.getId() == node1->getId())
00671         return(node2);
00672       if (node.getId() == node2->getId())
00673         return(node1);
00674       cerr << "MyEdge::opposite(): passed node " << node.getId()
00675            << " but this edge connects nodes " << node1->getId()
00676            << " and " << node2->getId() << ". Exiting." << endl;
00677       exit(-1);
00678     }
00679   void setStatus(string statusType)
00680     {
00681       _status[statusType] = true;
00682     }
00683   void clearStatus(string statusType)
00684     {
00685       _status[statusType] = false;
00686     }
00687   bool getStatus(string statusType) //const
00688     {
00689       return(_status[statusType]);
00690     }
00691 
00701   void setWeight(MyNT w)
00702     {
00703       // murali, sep 12, 2007
00704       MyNT oldWeight = getWeight();
00705       _edgeInfo.setWeight(w);
00706       // update total edge weights of incident nodes.
00707       head().updateTotalEdgeWeight(oldWeight, w);
00708       tail().updateTotalEdgeWeight(oldWeight, w);
00709     }
00710 
00711   MyNT getWeight() const
00712     {
00713       return(_edgeInfo.getWeight());
00714     }
00715 
00716   // functions related to edge types.
00717   bool hasType(string type)
00718     {
00719       return(_edgeInfo.hasType(type));
00720     }
00721   
00722   void addType(string type)
00723     {
00724       _edgeInfo.addType(type);
00725     }
00726   void addTypes(const set< string > &types)
00727     {
00728       _edgeInfo.addTypes(types);
00729     }
00730   void getTypes(set< string > &types) const
00731     {
00732       _edgeInfo.getTypes(types);
00733     }
00734   set< string > getTypes() const
00735     {
00736       set< string > types;
00737       _edgeInfo.getTypes(types);
00738       return(types);
00739     }
00740 
00748   void getEdgeStrings(vector< string > &edgeStrings) const;
00749 
00756    string getEdgeString() const
00757     {
00758       string headId = head().getId();
00759       string tailId = tail().getId();
00760       return(getEdgeString(headId, tailId));
00761     }
00762 
00763 
00771   static string getEdgeString(string headId, string tailId) 
00772     {
00773       string temp;
00774       if (headId > tailId)
00775         {
00776           temp = headId;
00777           headId = tailId;
00778           tailId = temp;
00779         }
00780       return(headId + string("_____") + tailId);
00781     }
00782   
00783     
00789 //   pair< string, string > getEdgeString() const
00790 //     {
00791 //       string head = head().getId();
00792 //       string tail = tail().getId();
00793 //       if (head > tail)
00794 //         {
00795 //           temp = head;
00796 //           head = tail;
00797 //           tail = temp;
00798 //         }
00799 //       return(pair<string, string>(head, tail));
00800 //     }
00801   
00802 };
00803 
00804 
00805 
00806 
00807 
00808 // when nodes and edges are hidden, the function operates only on the
00809 // visible nodes and edges.
00810 
00813 class MyGraph
00814 {
00815 
00816 public:
00817 
00818   typedef map< MyNodeId, MyNode > MyNodeList;
00819   typedef MyNodeList::const_iterator MyConstNodePtr;
00820   typedef MyNodeList::iterator MyNodePtr;
00821 
00822   typedef list< MyEdge > MyEdgeList;
00823   typedef MyEdgeList::iterator MyEdgePtr;
00824   typedef MyEdgeList::const_iterator MyConstEdgePtr;
00825 
00826 
00827   // nested classes.
00828   class MyNodeIterator;
00829   // why am i declaring these classes here? i want the definition of
00830   // the nested class to be private but i want instances of these
00831   // templates (e.g., MyConstNodeIterator) to be public. i also want
00832   // all public typedefs to be in one place in the file.
00833   template< typename Object, typename ObjectPtr > class NodeObjectIterator;
00834   
00835   typedef  MyGraph::NodeObjectIterator< const MyNode, MyConstNodePtr > MyConstNodeIterator;
00836 
00837   template< typename Object, typename ObjectPtr > class EdgeObjectIterator;
00838   
00839   typedef  MyGraph::EdgeObjectIterator< MyEdge, MyEdgePtr > MyEdgeIterator;
00840   typedef  MyGraph::EdgeObjectIterator< const MyEdge, MyConstEdgePtr > MyConstEdgeIterator;
00841 
00842 
00843   
00844   typedef MyNode Node;
00845   typedef MyEdge Edge;
00846   typedef MyNodeIterator NodeIterator;
00847   typedef MyEdgeIterator EdgeIterator;
00848   typedef Node::EdgeIterator IncidentEdgeIterator;
00849   typedef Node::ConstEdgeIterator ConstIncidentEdgeIterator;
00850   
00851 
00852   
00853 
00854   
00855 public:
00856 
00857 
00858   // next() will return a MyNode.
00859   class MyNodeIterator: public MyIterator<MyNode>
00860   {
00861   private:
00862     MyGraph::MyNodePtr start;
00863     MyGraph::MyNodePtr end;
00864     MyGraph::MyNodePtr current;
00865     
00866   public:
00867     MyNodeIterator()
00868       {}
00869     MyNodeIterator(MyGraph::MyNodePtr _start, MyGraph::MyNodePtr _end)
00870       {
00871         start = _start;
00872         current = _start;
00873         end = _end;
00874       }
00875     
00876     virtual bool hasNext()
00877       {
00878         return(current != end);
00879       }
00880     MyNode& next()
00881       {
00882         MyNode& _next = (*current).second;
00883         current++;
00884         return(_next);
00885       }
00886   };
00887   
00888   template< typename Object, typename ObjectPtr >
00889   class NodeObjectIterator: public MyIterator< Object >
00890   {
00891   private:
00892     ObjectPtr start, end, current;
00893     
00894   public:
00895     NodeObjectIterator()
00896       {}
00897     NodeObjectIterator(ObjectPtr _start, ObjectPtr _end)
00898       {
00899         start = _start;
00900         current = _start;
00901         end = _end;
00902       }
00903     
00904     virtual bool hasNext()
00905       {
00906         return(current != end);
00907       }
00908     Object& next()
00909       {
00910         Object& _next = (*current).second;
00911         current++;
00912         return(_next);
00913       }
00914   };
00915 
00916   template< typename Object, typename ObjectPtr >
00917   class EdgeObjectIterator: public MyIterator< Object >
00918   {
00919   private:
00920     ObjectPtr start, end, current;
00921     
00922   public:
00923     EdgeObjectIterator()
00924       {}
00925     EdgeObjectIterator(ObjectPtr _start, ObjectPtr _end)
00926       {
00927         start = _start;
00928         current = _start;
00929         end = _end;
00930   }
00931     
00932     virtual bool hasNext()
00933       {
00934         return(current != end);
00935       }
00936     Object& next()
00937       {
00938         Object& _next = *current;
00939         current++;
00940         return(_next);
00941       }
00942   };
00943 
00944 private:
00945 //  static int globalGraphId;
00946 
00947   string _name;
00948   
00949   // i store nodes by name. another possibility is to store a vector
00950   // of MyNodes and a map of strings to ints to iterators but
00951   // maintaining that second map is a nightmare.
00952   MyNodeList _nodes;
00953   MyNodePtr _internalNodePtr;
00954   
00955 //  vector< MyNode > nodes;
00956 
00957   MyEdgeList _edgeList;
00958 
00959   // various graph quantities.
00960   int _numEdges;
00961 
00962   int _numHiddenNodes;
00963   int _numHiddenEdges;
00964 
00965   // the sum of the squares of the degrees of the nodes.
00966   unsigned int _totalSquaredNodeDegrees;
00967   // the sum of the squares of the weighted degrees of the nodes.
00968   MyNT _totalSquaredNodeWeightedDegrees;
00969   
00970   MyNT _totalEdgeWeight;
00971   MyNT _totalAbsoluteEdgeWeight;
00972   
00973   int _id;
00974 
00975   map< string, string > _edgeTypeGroups;
00976 
00977   // private methods
00978   void _computeDenseSubgraphs(ostream &logStream, vector< MyGraph >&denseSubgraphs,
00979                               bool computeDenseSubgraphsTillTheBitterEnd,
00980                               bool splitDenseSubgraphsIntoComponents,
00981                               bool suppressDetails);
00982 
00984   void _updateUponEdgeInsertion(MyNodeId node1, MyNodeId node2, MyNT edgeWeight);
00985   
00987   void _updateUponEdgeDeletion(MyNodeId node1, MyNodeId node2, MyNT edgeWeight,                                // when I invoke this method from
00988                                 // MyGraph::deleteNode, node1's degree
00989                                 // is not decremented by 1 before i
00990                                 // invoke this method. therefore, i
00991                                 // pass the decrement in this
00992                                 // variable.
00993                                int correctionForGrossHack = 0);
00994   
00995 
00996 public:
00997 
00998   MyGraph()
00999       : _name(), _nodes(), _internalNodePtr(), _edgeList(),
01000         _numEdges(0), _numHiddenNodes(0), _numHiddenEdges(0),
01001         _totalSquaredNodeDegrees(0), _totalSquaredNodeWeightedDegrees(0),
01002         _totalEdgeWeight(0), _totalAbsoluteEdgeWeight(0), _id(-1)
01003     {}
01004   // ignore edges with weight < minEdgeWeight.
01005   MyGraph(string infile, string type, MyNT minEdgeWeight = 0)
01006       : _name(), _nodes(), _internalNodePtr(), _edgeList(),
01007         _numEdges(0), _numHiddenNodes(0), _numHiddenEdges(0),
01008         _totalSquaredNodeDegrees(0), _totalSquaredNodeWeightedDegrees(0),
01009         _totalEdgeWeight(0), _totalAbsoluteEdgeWeight(0), _id(-1)
01010     {
01011       if ("" == infile)
01012         return;
01013 
01014       read(infile, "", minEdgeWeight, cout);
01015       _checkDegree();
01016     }
01017   
01018   MyGraph(const MyGraph &copy);
01019 
01020   void setName(string name)
01021     {
01022       _name = name;
01023     }
01024   string getName() const
01025     {
01026       return(_name);
01027     }
01028 
01030   virtual MyGraph::MyNodeIterator nodes()
01031   {
01032     MyNodeIterator itr(_nodes.begin(), _nodes.end());
01033     return(itr);
01034   }
01035 
01037   virtual MyGraph::MyConstNodeIterator nodes() const
01038   {
01039     MyConstNodeIterator itr(_nodes.begin(), _nodes.end());
01040     return(itr);
01041   }
01042 
01044   virtual MyGraph::MyEdgeIterator edges()
01045   {
01046     MyEdgeIterator itr(_edgeList.begin(), _edgeList.end());
01047     return(itr);
01048   }
01049 
01051   virtual MyGraph::MyConstEdgeIterator edges() const
01052   {
01053     MyConstEdgeIterator itr(_edgeList.begin(), _edgeList.end());
01054     return(itr);
01055   }
01056 
01057   virtual MyNode::MyEdgeIterator incidentEdges(MyNodeId nodeId)
01058   {
01059     MyNode &node = _getNode(nodeId);
01060     return(node.incidentEdges());
01061   }
01062   
01063   virtual MyNode::MyEdgeIterator incidentEdges(MyNode &node)
01064   {
01065     return(node.incidentEdges());
01066   }
01067 
01083   void computeBinaryVector(const map< string, unsigned int > &edgeToColumn,
01084                            vector< unsigned int > &binaryVector) const;
01085 
01093   void computeEdgeTypeCounts(map< string, unsigned int > &counts) const;
01094   
01104   unsigned int computeCommonNeighbours(MyNode &node1, MyNode &node2, set< MyNodeId > &common);
01105 
01114   MyNT computeSegregation(MyNodeIdList &nodes)
01115   {
01116           MyNT internalSum(0), externalSum(0);
01117           for (MyNodeIdList::iterator nitr = nodes.begin(); nitr != nodes.end(); nitr++)
01118           {
01119                   if (!isNode(*nitr))
01120                           continue;
01121                   MyNode::MyEdgeIterator eitr = incidentEdges(*nitr);
01122                   while (eitr.hasNext())
01123                   {
01124                           MyEdge &edge = *(eitr.next());
01125                           MyNodeId otherId = edge.opposite(*nitr)->getId();
01126                           if (nodes.end() != find(nodes.begin(), nodes.end(), otherId))
01127                                   internalSum += edge.getWeight();
01128                           else
01129                                   externalSum += edge.getWeight();
01130                   }
01131           }
01132 
01133 #ifdef FALSE
01134           cout << "\t" << internalSum/2 << "\t" << externalSum << "\t" << internalSum/(internalSum + 2*externalSum);
01135 #endif
01136           // 2*externalSum corrects for the double-counting of internal edges, which happens
01137           // because we are looping over the input nodes rather than the edges between them.
01138           return (internalSum/(internalSum + 2*externalSum));
01139   }
01140 
01147   void printEdgeTypeCounts(ostream &ostr) const;
01148 
01149 
01156   virtual void construct(const map< string, map< string, MyNT > > &allEdgeWeights);
01157 
01174   virtual void read(string fileName, string type="", MyNT minEdgeWeight=0, ostream &logStream=cout);
01175 
01176   
01196   virtual void read2(string fileName, string type="", MyNT minEdgeWeight=0, ostream &logstream=cout);
01197 
01208   virtual void readNodeTypes(string file);
01209 
01222   virtual void print(ostream &fstr=cout, string name="");
01223 
01236   virtual void printNodes(ostream &fstr=cout, string name="");
01237 
01251   virtual void printEdges(ostream &fstr=cout, string name="") const;
01252 
01253   // types is a vector of "experiment" types.
01254   virtual void printEdgesItemset(ostream &fstr, vector< string > types); //const;
01255   
01256   virtual void readEdgeWeights(string weightsFile);
01257 
01258 
01270   virtual void transformEdgeWeightsString(string transform);
01271 
01276   template < typename _Transform >
01277   void transformEdgeWeights(_Transform function);//unary_function< MyNT, MyNT> transform);
01278 
01279   // clear up space used.
01280   virtual void clear();
01281   
01282   virtual ~MyGraph()
01283     {
01284       // TODO: BUG/DESIGN. do i need this clear operation?
01285       clear();
01286     }
01287 
01288 
01294   // add other's nodes and edges to *this. making other a const
01295   // reference causes a strange compilation error.
01296   virtual void add(MyGraph &other);
01297 
01308   virtual void addInduced(MyGraph &other);
01309 
01318   void copyEdgeWeights(MyGraph &other);
01319   
01320 
01328   // add other's nodes and edges to *this. making other a const
01329   // reference causes a strange compilation error.
01330   virtual void subtract(MyGraph &other);
01331 
01332 
01333   int getId() const
01334     {
01335       return(_id);
01336     }
01337 
01338   MyGraph &operator=(const MyGraph &rhs);
01339 
01344   bool operator<(const MyGraph &rhs) const;
01345 
01346   //ahsanur: commenting out...
01347   // *this < other if #nodes is less or both have equal #nodes and
01348   // *this has fewer edges.
01349 /*  bool operator<(const MyGraph& other) const
01350     {
01351       return((numNodes() < other.numNodes()) ||
01352              ((numNodes() == other.numNodes()) &&
01353               (numEdges() < other.numEdges())));
01354     }
01355 
01356   bool operator>(const MyGraph& other) const
01357     {
01358       return(other < *this);
01359     }
01360 */
01361 
01370   void getEdgeSet(set< string > &edgeSet) const
01371     {
01372       MyConstEdgeIterator eitr = edges();
01373       while (eitr.hasNext())
01374         {
01375           edgeSet.insert(eitr.next().getEdgeString());
01376         }      
01377     }
01378 
01385   void getNodeSet(set< MyNodeId > &nodeSet) const
01386     {
01387       MyConstNodeIterator nitr = nodes();
01388       while (nitr.hasNext())
01389         nodeSet.insert(nitr.next().getId());
01390     }
01391   
01398   void getNodeVec(vector< MyNodeId > &nodeVec) const
01399       {
01400         MyConstNodeIterator nitr = nodes();
01401         while (nitr.hasNext())
01402           nodeVec.push_back(nitr.next().getId());
01403       }
01404 
01406   unsigned int numNodes() const
01407     {
01408       return(_nodes.size() - numHiddenNodes());
01409     }
01410 
01412   unsigned int numHiddenNodes() const
01413     {
01414       return(_numHiddenNodes);
01415     }
01416   
01418   unsigned int numEdges() const
01419     {
01420       return(_numEdges - numHiddenEdges());
01421     }
01422 
01424   unsigned int numHiddenEdges() const
01425     {
01426       return(_numHiddenEdges);
01427     }
01428 
01434   bool isConnected();
01435   
01438   MyNT computeDensity() const;
01439 
01445   MyNT computeWeightedDensity();
01446 
01461   MyNT computeStouffersZScore() const;
01462 
01475   MyNT computeStouffersZScoreSlowly() const;
01476   
01478   MyNT getAverageNodeWeight();
01479   
01481   MyNT getTotalNodeWeight() const;
01482 
01484   MyNT getTotalSquaredNodeDegrees() const;
01485   
01487   MyNT getTotalAbsoluteNodeWeight() const;
01488 
01490   MyNT getTotalEdgeWeight() const ;
01491 
01493   MyNT getTotalAbsoluteEdgeWeight() const;
01494   
01495   
01496 
01497   
01507 
01508   MyNT computeCompleteness() const
01509     {
01510       if (numEdges() == 0)
01511         return 0;
01512       return(numEdges()*2.0/(numNodes()*(numNodes() - 1)));
01513     }
01514 
01525   MyNT computeExpansionNodeBased(MyGraph &superGraph);
01526   
01527   
01529   int degree(MyNodeId nid) const
01530     {
01531       MyConstNodePtr itr = _nodes.find(nid);
01532       if (_nodes.end() == itr)
01533         {
01534           cerr << "MyGraph::degree(): node " << nid << " is not in the graph." << endl;
01535           return(-1);
01536         }
01537       return((*itr).second.degree());
01538     }
01539 
01541   MyNT weightedDegree(MyNodeId nid) const
01542     {
01543       MyConstNodePtr itr = _nodes.find(nid);
01544       if (_nodes.end() == itr)
01545         {
01546           cerr << "MyGraph::weightedDegree(): node " << nid << " is not in the graph." << endl;
01547           return(-1);
01548         }
01549       return((*itr).second.getWeightedDegree());
01550     }
01551 
01557   void degrees(map< unsigned int, unsigned int > &distribution) const;
01558 
01559   // compute the power of a graph, i.e., connect each node to every
01560   // node that is <= distance edges away.
01561   void power(unsigned int distance);
01562 
01563 
01565   bool isNode(const MyNode &node) const
01566     {
01567       return(isNode(node.getId()));
01568     }
01569 
01572   bool isNode(MyNodeId id) const
01573     {
01574       return(_nodes.end() != _nodes.find(id));
01575     }
01576 
01580   bool isVisibleNode(MyNodeId id) const
01581     {
01582       MyConstNodePtr np = _nodes.find(id);
01583       return((_nodes.end() != np) && (!_getNode(np).isHidden()));
01584     }
01585   
01589   bool isHiddenNode(MyNodeId id) const
01590     {
01591       MyConstNodePtr np = _nodes.find(id);
01592       return((_nodes.end() != np) && (_getNode(np).isHidden()));
01593     }
01594 
01596   void addNodes(set<MyNodeId> ids)
01597   {
01598           for(set<MyNodeId>::iterator it = ids.begin(); it != ids.end(); it ++)
01599                   addNode(*it);
01600   }
01601 
01602   // add an empty node.
01603   MyNode *addNode(MyNodeId id)
01604     {
01605  //     _nodes[id] = MyNode(id, g);
01606       //
01607       // see p 108, item 24 of effective STL. the insert call is more
01608       // efficient plus in this case it doesn't force me to create the
01609       // default constructor for _nodes[id], which i disallow because
01610       // of the initialisation issue with MyNode::_graph.
01611       //
01612       // remember to pass this to the node so that the node knows
01613       // which graph it belongs to.
01614       MyNodeList::iterator itr
01615         = _nodes.insert(MyNodeList::value_type(id, MyNode(id))).first;
01616       // itr is a pair<MyNodeId, MyNode>
01617       return(&(*itr).second);
01618 //      return(true);
01619     }
01620 
01621   // add an empty node.
01622   MyNode *addNode(const MyNode &node)
01623     {
01624       MyNode *addedNode = addNode(node.getId());
01625       // add other aspects of node to addedNode.
01626       addedNode->copy(node);
01627       return(addedNode);
01628     }
01629 
01630   // return the functions of _nodes[id] in fns.
01631   void nodeFunctions(MyNodeId id, vector< string > &fns) const
01632     {
01633       if (!isNode(id))
01634         {
01635           cerr << "MyGraph::nodeFunctions(): Node with id = " << id
01636                << " is not in the graph." << endl;
01637           return;
01638         }
01639       _getNode(id).functions(fns);
01640     }
01641   
01642   // add function to a node.
01643   bool addNodeFunction(MyNodeId id, string function)
01644     {
01645       if (_nodes.end() == _nodes.find(id))
01646         cerr << "Adding function " << function << " to a non-existing node " << id << endl;
01647       _nodes[id].addFunction(function);
01648       return(true);
01649     }
01650 
01659   bool deleteNode(MyNodeId id, set< MyNodeId > *neighbours = NULL);
01660 
01669   void deleteNodeSet(const set< MyNodeId > ids);
01670   
01677   MyEdge *getEdge(MyNodeId node1, MyNodeId node2) const
01678     {
01679       return(_getNode(node1).getEdge(node2));
01680     }
01681   
01682   bool getEdge(MyNodeId node1, MyNodeId node2, MyEdgePtr &ptr) 
01683     {
01684       return(_getNode(node1).getEdgePtr(node2, ptr));
01685     }
01686 
01693   bool isValidEdge(const MyEdgePtr &eptr) const
01694     {
01695       return(_edgeList.end() != eptr);
01696     }
01697   
01701   /*bool isEdge(const MyEdge &edge) const
01702     {
01703       MyNodeId headId = edge.head().getId();
01704       MyNodeId tailId = edge.tail().getId();
01705       return(isEdge(headId, tailId));
01706     }*/
01707   
01708 
01709   /* \brief Check if an edge connecting node1 and node2 is in the
01710      invocant.
01711      
01712    * \note This function checks for adjacency between two nodes if
01713      they both exist in this graph.
01714 
01715    */
01716   bool isEdge(MyNodeId node1, MyNodeId node2) const
01717     {
01718         map< MyNodeId, MyNode >::const_iterator nitr1 = _nodes.find(node1), nitr2 = _nodes.find(node2);
01719         if ((nitr1 != _nodes.end())&&(nitr2 != _nodes.end()))
01720         return(nitr1->second.isAdjacent(node2) && nitr2->second.isAdjacent(node1));
01721       return(0);
01722       //return(_getNode(node1).isAdjacent(node2) && _getNode(node2).isAdjacent(node1));
01723     }
01724 
01725   // this function is faster than the one with ids as parameters since
01726   // i don't need to access the _nodes map.
01727 
01736   bool isEdge(const MyNode &node1, const MyNode &node2) const
01737     {
01738       return(node1.isAdjacent(node2) && node2.isAdjacent(node1));
01739     }
01740   
01741   MyEdgePtr addEdge(const MyEdge &edge);
01742 
01748   MyEdgePtr addEdge(MyNodeId nodeId1, MyNodeId nodeId2, MyNT weight = 1,
01749                     set< string > *types = NULL, bool keepGraphSimple = true);
01750 
01758   bool deleteEdge(MyNodeId node1, MyNodeId node2);
01759 
01767   bool isEdgeValid(MyEdgePtr eptr)
01768     {
01769       return(_edgeList.end() != eptr);
01770     }
01771   
01772   /* Set the weight of the edge connecting node1 and node2.
01773 
01774      \param[in] node1 identifier of the first node.
01775 
01776      \param[in] node2 identifier of the second node.
01777 
01778      \param[in] weight the weight of the edge.
01779      
01780      This method performs no action if the edge connecting the nodes
01781      does not exist.
01782    */
01783   void setEdgeWeight(MyNodeId node1, MyNodeId node2, MyNT weight);
01784   
01785   /* Set the weight of the edge.
01786 
01787      \param[in] edge the edge whose weight to set.
01788 
01789      \param[in] weight the weight of the edge.
01790      
01791      This method performs no action if the edge connecting the nodes
01792      does not exist.
01793    */
01794   void setEdgeWeight(MyEdge *edge, MyNT weight);
01795 
01796   // murali, sep 12, 2007. check if the total edge weight of each node
01797   // is correct. this method is useful to invoke after changing edge
01798   // weights. this method should really be protected but how else will
01799   // i invoke it from GAIN algorithms?
01800   bool _checkNodeTotalEdgeWeight();
01801 
01806   void computeEdgeTypeOverlaps() const;
01807   
01810   void computeEdgeTypeSubgraphs(ostream &logStream,
01811                                 map< string, MyGraph>&edgeTypeSubgraphs, bool useShared = 1);
01812 
01814   //  from largest to smallest in terms of the number of nodes.
01815   //
01816   //  \param[in] comps The resulting components will be placed here
01817   void components(vector< MyGraph > *comps = NULL); //const;
01818 
01819 
01834   MyNT computeClosestNode(const MyNode &rootNode, MyNodeIdSet &nodeSet,
01835                           MyNodeId &closest);
01836 
01837 
01851   void computeHistogramNodeWeights(MyHistogram &hist, bool cumulative = false,
01852                                    bool reverse = false);
01853   
01864   void computeHistogramEdgeWeights(MyHistogram &hist, bool cumulative = false,
01865                                    bool reverse = false);
01866   
01867   
01871   void computeMinimumSpanningTreePrim(MyGraph &mst);
01872 
01876   void computeMaximumSpanningTreePrim(MyGraph &mst);
01877 
01878   // generic version of Prim's to compute either minimum or maximum
01879   // spanning tree based on the compare method.
01880   template< typename Compare > // StrictWeakOrdering
01881   void computeMSTPrim(MyGraph &mst);//, Compare compare);
01882   
01883   // computeNumCommonEdges() and computeNumCommonNodes() are helpers
01884   // for computeSimilarityEdges() and computeSimilarityNodes(),
01885   // respectively, since they easily allow asymmetric similarity to be
01886   // computed.
01887   
01889   unsigned int computeNumCommonEdges(const MyGraph &other) const;
01890 
01898   MyNT computeNumCommonEdgesWeighted(const MyGraph &other) const;
01899   
01901   unsigned int computeNumCommonNodes(const MyGraph &other) const;
01902   
01910   MyNT computeNumCommonNodesWeighted(const MyGraph &other) const;
01911   
01912 
01913   //  different versions of compute shortest path DAG. NOTE: these
01914   //  methods and printAllPairsShortestPathDistances() cannot be const
01915   //  since the underlying Dijkstra's algorithm modifies the graph,
01916   //  specifically node's visitation status.
01917   
01929   void computeShortestPathDAG(const MyNode &node, map<MyNodeId,MyNT> &distance,
01930                               map<MyNodeId, unsigned int> &count);
01931 
01937   void computeShortestPathDAG(const MyNodeId nodeid, map<MyNodeId,MyNT> &distance,
01938                               map<MyNodeId, unsigned int> &count);
01939 
01940   void computeShortestPathDAG(const MyNode &node, map<MyNodeId,MyNT> &distance,
01941                               map<MyNodeId, unsigned int> &count,
01942                               stack<MyNodeId> &descendents,
01943                               map< MyNodeId,vector<MyNodeId> > &predecessors);
01944 
01945   void computeShortestPathDAG(const MyNode &node, map<MyNodeId,MyNT> &distance,
01946                               map<MyNodeId, unsigned int> &count,
01947                               stack<MyNodeId> &descendents,
01948                               map< MyNodeId,vector<MyNodeId> > &predecessors,
01949                               map< MyNodeId,vector<MyNodeId> > &successors);
01950 
01959   void printAllPairsShortestPathDistances(ostream &ostr);
01960   
01963   void printAllPairsShortestPathDistances(string outfile);
01964 
01984   MyNT computeSimilarityEdges(const MyGraph &other, bool jacquard = true, bool asymmetric = false) const;
01985 
01990   MyNT computeSimilarityEdgesWeighted(const MyGraph &other, bool jacquard = true, bool asymmetric = false) const;
01991   
01999   MyNT computeMostSimilarGraphEdges(const vector< MyGraph >& others, unsigned int &bestIndex) const;
02000   
02023   MyNT computeSimilarityNodes(const MyGraph &other, bool jacquard = true, bool asymmetric = false) const;
02024 
02029   MyNT computeSimilarityNodesWeighted(const MyGraph &other, bool jacquard = true, bool asymmetric = false) const;
02030   
02038   MyNT computeMostSimilarGraphNodes(const vector< MyGraph >& others, unsigned int &bestIndex) const;
02039   
02040 //   //Compute betweeness of each node(old algorithm, O(n^2)
02041 //   //invocations of Dijkstra's algorithm).
02042 //   void computeBetweeness(ostream &fstr);
02043 
02044   //Compute betweeness of each node (fast algorithm by Brandes).
02045   void computeNodeBetweenessCentrality(ostream *fstr, map< MyNodeId, MyNT> *bets);
02046   //Compute betweeness of each edge (fast algorithm by Newman).
02052   void computeEdgeBetweenessCentrality(ostream *fstr, map< MyEdge, MyNT> *bets);
02053  
02054   // functions to create random graphs.
02055   
02059   void computeEdgeFSWeights();
02060 
02064   void computeEdgeCDWeights();
02065 
02066   // create a random graph with the same degree distribution. 
02067   void randomise(MyGraph &random, map<MyNodeId,bool> *vertexcover = NULL);
02068 
02079   void randomiseVectorOfEdges(MyGraph& random, unsigned int numIterations);
02080 
02081 
02096   void randomiseErdosRenyiSubgraph(MyGraph& random, const vector<MyNodeId> &subgraphNodes, MyNT prob = 0.5);
02097 
02098 
02114   void randomiseErdosRenyiSubgraph(MyGraph& random, const set<MyNodeId> &subgraphNodes, MyNT prob = 0.5);
02115 /*
02118   void randomiseErdosRenyiSubgraph(MyGraph& random, const set<MyNodeId> &subgraphNodes, MyNT prob,
02119                   MultiRandSeqGenerator &multiRandSeq, NaturalNumber randIndex);
02120 */
02124   void randomiseErdosRenyiSubgraph(const set<MyNodeId> &subgraphNodes, MyNT prob);
02125 
02141   static void createErdosRenyiNPGraph(MyGraph& graph, unsigned long numNodes, MyNT prob = 0.5);
02142 
02143 
02147   unsigned int deleteDisconnectedNodes();
02148 
02156   void createSubgraph(const vector< MyNodeId >& deletedNodes,
02157                        MyGraph& subgraph) const;
02158 
02164   void deleteSubgraphNodes(MyGraph& subgraph);
02165 
02173   void deleteSubgraphEdges(MyGraph& subgraph);
02174   
02175   // hide all the nodes in subgraph and the edges incident on these nodes. 
02176 //  void hideSubgraphNodes(MyGraph& subgraph);
02177   // hide only the edges induced by the nodes in subgraph.
02178 //  void hideSubgraphEdges(MyGraph& subgraph);
02179 
02190   void computeInducedSubgraph(const set< MyNodeId > &subgraphNodes,
02191                               MyGraph &subgraph) const;
02192 
02200   bool isInduced(const MyGraph &subgraph);
02201   
02202 
02203   // dense subgraph functions.
02204   //
02205   // find 1 core of given degree.
02206   void core(unsigned int k, MyGraph &subgraph, bool hide, ostream *fstr = NULL) const;
02207   // find all cores.
02208   void find_cores(vector< MyGraph > &cores) const;
02209 
02210   
02211 #ifdef HAVE_GLPK
02212   // exact algorithm using Charikar's LP formulation.
02213   void computeDensestSubgraph(MyGraph &subgraph) const;
02214 #endif // HAVE_GLPK
02215 
02216   // factor of 2 approximation using greedy algorithm.
02217   void computeDensestSubgraphApprox(MyGraph &subgraph, bool optimiseStouffersZscore = false,
02218                                     ostream *fstr = NULL) const;
02219 
02220 
02243   void computeDenseSubgraphs(ostream &logStream, vector< MyGraph >&denseSubgraphs,
02244                              bool computeDenseSubgraphsTillTheBitterEnd = false,
02245                              bool splitDenseSubgraphsIntoComponents = false,
02246                              bool suppressDetails = false);
02247 
02266   void computeMostPerturbedSubgraph(MyGraph &subgraph,
02267                                     unsigned int numIterationsFactor,
02268                                     ostream &logStream,
02269                                     bool runOnCopy = true);
02270 
02271   // find all cliques in the graph.
02272   void findCliques(vector< MyGraph > &cliques, ostream &ostr) const;
02273 
02274 
02285   virtual void sparsify(string method);
02286 
02287   // return the node with id = id. ALWAYS CALL IT WHEN YOU ARE SURE
02288   // THE NODE EXISTS.
02289   const MyNode& _getNode(MyNodeId id) const
02290     {
02291 #ifdef DEBUG
02292       if ("" == id)
02293         cerr << "MyGraph::_getNode(): node id is a null string." << endl;
02294       if (_nodes.end() == _nodes.find(id))
02295         cerr << "MyGraph::_getNode(): node with id \"" << id
02296              << "\" is not the in graph!" << endl;
02297 #endif      
02298       return((*_nodes.find(id)).second);
02299     }
02300   // i need this non-const version for hiding the node, for instance.
02301   MyNode& _getNode(MyNodeId id) 
02302     {
02303 #ifdef DEBUG
02304       if ("" == id)
02305         cerr << "MyGraph::_getNode(): node id is a null string." << endl;
02306 #endif      
02307       return((*_nodes.find(id)).second);
02308     }
02309 
02310 //    MyEdgePtr _getEdge(MyNodeId id1, MyNodeId id2)
02311 //    {
02312 //      MyNode::MyEdgeIterator inEdges = incidentEdges(_getNode(id1));
02313 //      while (inEdges.hasNext()) 
02314 //        {
02315 //                      // try all the edges incident on *node.
02316 //                      Edge &e = inEdges.next();
02317 //                      Node *w = e.opposite(_getNode(id1));
02318 //                      if((*w).getId() == id2)
02319 //                      {
02320 //                              return e;
02321 //                      }
02322 //              }
02323 //    }
02324 
02325         // compute the subgraph induced by each edge type
02326   //void computeEdgeTypeSubgraphs(ostream &logStream, map< string, MyGraph >&edgeTypeSubgraphs);
02327   // generate a weight for every edge from its corresponding type weights
02328   void assignEdgeWeightsFromEdgeTypeWeights(const map< string, MyNT > &edgeTypeWeights);
02329   
02335   MyGraph computeIntersection(const vector< MyGraph > &others);
02336 
02337   // read edge type group mappings from a file
02338         void readEdgeTypeGroups(string fileName);
02339         // group types according to edgeTypeGroups and put them in workingTypes
02340         void translateTypes(const set< string > &types, set< string > &workingTypes, bool useShared = 1);
02341 
02342 
02352   void computeSeparator(const set< MyNodeId > &nodes, set< MyNodeId > &separator);
02353   
02358   MyNT computeSeparatorRatio(MyGraph subgraph);
02359 
02360   // functions used in MyGraph::_computeDensestSubgraphApprox() and MyGraph::core(). also in treedex, so made public now (Apr 4, 2007)
02361   void _createDegreeLists(unsigned int &minDegree,
02362                           vector< list< MyNodeId > > &degreeLists,
02363                           map< MyNodeId, list< MyNodeId >::iterator > &nodePositions) const;
02364   void _decrementDegrees(vector< list< MyNodeId > > &degreeLists,
02365                          map< MyNodeId, list< MyNodeId >::iterator > &nodePositions,
02366                          const set< MyNodeId > &neighbours) const;
02367 
02368   
02369 protected:
02370   // protected functions so that subclasses can access them.
02371 
02372   void _copyNodes(const MyGraph &copy);
02373 
02374   // const argument gives weird compilation error with MyConstEdgeIterator
02375   void _copyEdges(const MyGraph &copy);
02376 
02377   
02378   // check that the graph is "valid."
02379   // const gives weird compilation error with MyConstEdgeIterator.
02380   bool _check() const;
02381 // check if a node exists.
02382 //bool _checkNode(
02383 
02384   
02385   // check the average degree of the graph.
02386   bool _checkDegree() const;
02387 
02388   
02389 
02390 
02391   
02392   // aaaghhhh! i have to write one version for both the const and
02393   // non-const versions of MyNodePtr. it seems to make sense for the
02394   // non-const versions *not* to return a const class.
02395   MyNodeId _getNodeId(MyNodePtr &nptr) 
02396     {
02397       return((*nptr).first);
02398     }
02399   MyNodeId _getNodeId(MyConstNodePtr &nptr) const
02400     {
02401       return((*nptr).first);
02402     }
02403   MyNode& _getNode(MyNodePtr &nptr) 
02404     {
02405       return((*nptr).second);
02406     }
02407   const MyNode& _getNode(MyConstNodePtr &nptr) const
02408     {
02409       return((*nptr).second);
02410     }
02411   
02412 
02413   // function to do the real work of finding a k-core by deleting all
02414   // the nodes with degree > k.
02415   void _core(unsigned int k, bool hide, ostream *fstr);
02416   void _find_cores(vector< MyGraph > &cores);
02417 
02418   bool _tryAddingNode(MyNodeId nid,
02419                       const set< MyNodeId >& currentClique);
02420   
02421   void _findCliques(vector< MyGraph > &cliques, ostream &ostr);
02422   
02423   // this function does all the real work by working on a copy of the
02424   // original graph. i have to give it a different name (starting with
02425   // an underscore here) otherwise i can call
02426   // MyGraph::computeDensestSubgraphApprox() only on a const MyGraph.
02427   void _computeDensestSubgraphApprox(MyGraph &subgraph, bool hide = false,
02428                                      ostream *fstr = NULL);
02429 void _computeDensestWeightedSubgraphApprox(MyGraph &subgraph,
02430                                            bool optimiseStouffersZscore = false,
02431                                            bool hide = false,
02432                                            ostream *fstr = NULL);
02433 
02434 
02435   
02436 #ifdef HAVE_GLPK
02437 void _computeDensestSubgraph(MyGraph &subgraph, bool hide);
02438 
02439   void _get_subgraph_from_lp(MyGraph &subgraph, LPX *lp);
02440 #endif // HAVE_GLPK
02441 
02442 };
02443 
02444 typedef MyEdge*  MyEdgePQNode;
02445 
02446 
02447 
02448 template< typename Compare >
02449 void MyGraph::computeMSTPrim(MyGraph &mst)//, Compare comp)
02450 {
02451   typedef priority_queue< MyEdgePQNode, vector< MyEdgePQNode >, Compare > MyEdgePQ;
02452   MyEdgePQ epq;//(comp);
02453   set< string > edgeTypes;
02454 
02455 //   // find the components. run the algorithm for each component.
02456 //   vector< MyGraph > comps;
02457 //   vector< MyGraph >::iterator compItr;
02458 //   components(&comps);
02459 //   for (compItr = comps.begin(); compItr != comps.end(); compItr++)
02460 //     {
02461       
02462 //  MyNodeIterator nitr = compItr->nodes(); 
02463   MyNodeIterator nitr = nodes(); 
02464   MyNode::MyEdgeIterator eitr;
02465   // loop over all the nodes so that i process all components.
02466   while (nitr.hasNext())
02467     {
02468       // making this variable a reference has ghastly consequences in
02469       // the when i assign the head or tail of a popped edge to
02470       // nodeNotInMST at the end of the main PQ loop below: *head* and
02471       // *tail* become the same!
02472       MyNode *nodeNotInMST = &nitr.next();
02473       string id = nodeNotInMST->getId();
02474       if (!mst.isNode(id))
02475         mst.addNode(id);
02476       else
02477         // node is already in the MST, which means i have processed
02478         // its connected component.
02479         continue;
02480       // start constructing the MST for a brand-new component.
02481       while (true)
02482         {
02483           // reset id at the beginning of the loop.
02484           id = nodeNotInMST->getId();
02485           // add the edges incident on nodeNotInMST to the PQ.
02486           eitr = nodeNotInMST->incidentEdges();
02487           while (eitr.hasNext())
02488             {
02489               MyEdge &edge = *(eitr.next());
02490               // get neighbour.
02491               MyNode &neighbour = *(edge.opposite(id));
02492 #ifdef DEBUG
02493               cout << "MyGraph::computeMSTPrim(): processing edge between nodes "
02494                    << edge.head().getId() << " and " << edge.tail().getId() << endl;
02495 #endif 
02496               // ignore edge connecting nodes in the MST.
02497               if (mst.isNode(neighbour.getId()))
02498                 continue;
02499               // add edge to PQ.
02500               epq.push(&edge);
02501 #ifdef DEBUG
02502               cout << "MyGraph::computeMSTPrim(): adding to the PQ edge between nodes "
02503                    << edge.head().getId() << " and " << edge.tail().getId() << endl;
02504 #endif 
02505             }
02506           if (epq.empty())
02507             break;
02508           // get the edge with smallest weight in the PQ.
02509           MyEdge *edge = epq.top();
02510           epq.pop();
02511           
02512           // ignore the edge if both endpoints are in tree.
02513           MyNode &head = edge->head();
02514           MyNode &tail = edge->tail();
02515 #ifdef DEBUG
02516           cout << "MyGraph::computeMSTPrim(): popped edge between nodes "
02517                << head.getId() << " and " << tail.getId()
02518                << " with weight " << edge->getWeight() << endl;
02519 #endif 
02520           if ((mst.isNode(head.getId())) && (mst.isNode(tail.getId())))
02521             continue;
02522 #ifdef DEBUG
02523           // at least one endpoint must be in the tree.
02524           if (!(mst.isNode(head.getId())) && !(mst.isNode(tail.getId())))
02525             cerr << "MyGraph::computeMSTPrim(): trying to add an edge to the MST "
02526                  << "but neither of its endpoints " << head.getId()
02527                  << " and " << tail.getId()
02528                  << " are in the MST!" << endl;
02529 #endif
02530           // find the node not in the tree. that is the new nodeNotInMST. 
02531           // add the node and the edge just found.
02532           if (mst.isNode(head.getId()))
02533             nodeNotInMST = &tail;
02534           else
02535             nodeNotInMST = &head;
02536           mst.addNode(nodeNotInMST->getId());
02537           // add the edge.
02538           MyEdgePtr newEdge = mst.addEdge(head.getId(), tail.getId(), edge->getWeight());
02539           // add the edge types too.
02540           edgeTypes.clear();
02541           edge->getTypes(edgeTypes);
02542           newEdge->addTypes(edgeTypes);
02543           
02544           // epq modified at the beginning of the loop.
02545         }
02546     } // end of loop over all nodes (in a component)
02547 //    } // end of loop over all components.
02548   
02549 }
02550 
02551 
02552 template < typename _Transform >
02553 void MyGraph::transformEdgeWeights(_Transform transformFunction)
02554 {
02555   MyEdgeIterator itr = edges();
02556   MyNT weight;
02557   while (itr.hasNext())
02558     {
02559       MyEdge &edge = itr.next();
02560       weight = edge.getWeight();
02561       edge.setWeight(transformFunction(weight));
02562     }
02563 }
02564 
02565 
02566 #endif // _GRAPH_H 
02567 
 All Classes Functions Variables Typedefs Friends