Biorithm  1.1
 All Classes Functions Variables Typedefs Friends
gain-algorithm.h
00001 /**************************************************************************
00002  * Copyright (c) 2001-2011 T. M. Murali                                   *
00003  * Copyright (c) 2011 Phillip Whisenhunt                                  *
00004  * Copyright (c) 2011 David Badger                                        *
00005  * Copyright (c) 2010 Jacqueline Addesa                                   *
00006  *                                                                        *
00007  * This file is part of Biorithm.                                         *
00008  *                                                                        *
00009  * Biorithm is free software: you can redistribute it and/or modify       *
00010  * it under the terms of the GNU General Public License as published by   *
00011  * the Free Software Foundation, either version 3 of the License, or      *
00012  * (at your option) any later version.                                    *
00013  *                                                                        *
00014  * Biorithm is distributed in the hope that it will be useful,            *
00015  * but WITHOUT ANY WARRANTY; without even the implied warranty of         *
00016  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          *
00017  * GNU General Public License for more details.                           *
00018  *                                                                        *
00019  * You should have received a copy of the GNU General Public License      *
00020  * along with Biorithm.  If not, see <http://www.gnu.org/licenses/>.      *
00021  *                                                                        *
00022  **************************************************************************/
00023 
00040 // Purpose: define classes of algorithms for assigning functions.
00041 
00042 
00043 #ifndef _GAIN_ALGORITHM_H
00044 #define _GAIN_ALGORITHM_H
00045 
00046 #include "boost/lexical_cast.hpp"
00047 
00048 #include "gain.h"
00049 #include "gain-state.h"
00050 #include "gain-traverse.h"
00051 #include "bfs.h"
00052 #include "GO.h"
00053 #include "mcode.h"
00054 #include "reporter.h"
00055 
00056 
00057 #ifdef HAVE_LIBGSL
00058 #include <gsl/gsl_linalg.h>
00059 #include <gsl/gsl_matrix.h>
00060 #include <gsl/gsl_permutation.h>
00061 #include <gsl/gsl_vector.h>
00062 #endif //HAVE_LIBGSL
00063 
00142 // ultra abstract class to resolve issues in main()
00143 // arising from pointers to original abstract class, which is now
00144 // templated
00145 
00152 class MyReallyAbstractGainAlgo
00153 {
00154 public:
00158   virtual void initialiseAlgorithm()
00159     {}
00160 
00162   virtual string getName() = 0;
00163 
00165   virtual void pipeline(const BioFunction &function) = 0;
00166 
00169   virtual void printStatistics(ostream &statsStream)
00170     {}
00171 
00172   // virtual void tieUpLooseEnds() = 0;
00173 
00174 };
00175 
00176 //class MyAbstractGainAlgo
00177 template< typename StateType > class MyAbstractGainAlgo : public MyReallyAbstractGainAlgo
00178 {
00179 protected:
00180   MyAnnotations &_annotations;
00181   // useful to store this to avoid recomputing FLNs. each algorithm
00182   // must decide when to store the function.
00183   BioFunction _currentFunction;
00184 //  static string _name;
00185   // this one is a pointer since i may get it or not in the constructor.
00186   GeneOntology *_go;
00187   MyGainGraph & _graph;
00188   MyGainParams & _params;
00189 
00190   Reporter & _reporter;
00191 
00192 
00193   // for each possible initial state type, store the list of nodes in that initial state type.
00194   map< StateType, MyNodeIdList > _nodesByInitialStateType;
00195 
00196   map< StateType, MyNodeIdList > _nodesByRealStateType;
00197 
00198   // for each node, store information on its state (including partial
00199   // or complete history of modification of node state by the
00200   // algorithm.
00201   map< MyNodeId, MyGainStateInfo< MyGainState< StateType > > > _states;
00202 
00203   map< MyNodeId, StateType > _realStateTypes;
00204   map< MyNodeId, StateType > _initialStateTypes;
00205 
00206   // set of nodes in the cut.
00207   MyNodeIdSet _cutNodes;
00208 
00209 protected:
00210 //  virtual static void _setName() = 0;
00211 
00212 
00213 
00214 public:
00215   MyAbstractGainAlgo(MyGainGraph &g, MyAnnotations &a, MyGainParams &p, Reporter &r, GeneOntology *go = NULL)
00216       : _annotations(a), _graph(g), _params(p), _reporter(r), _go(go),
00217         _nodesByInitialStateType(), _nodesByRealStateType(), _states(), _realStateTypes(), _initialStateTypes()
00218 
00219     {
00220       //_reporter.setExperimentName(p.getExperimentName());
00221     }
00222 
00223   virtual ~MyAbstractGainAlgo()
00224     {}
00225 
00233   virtual void computeCrossValidationResults(const BioFunction &function,
00234                                              const MyNodeIdList &cvNodes) = 0;
00235 
00255   virtual void computeCut(MyNodeIdList &nodesToAnnotate,
00256                           MyNodeIdSet &cutNodes)
00257     {}
00258 
00265   /*virtual void computePredictionConfidences(const BioFunction &function,
00266                                             MyNodeIdList &nodesToAnnotate,
00267                                             MyNodeIdList &predictedNodes) = 0;
00268 */
00269   virtual void computePredictions(const BioFunction &function,
00270                                   const MyNodeIdList &nodesToAnnotate,
00271                                   MyNodeIdList &predictedNodes) = 0;
00272 
00273 
00285   // not const MyNode& because of problems with MyNode::ConstIncidentEdgeIterator
00286   virtual MyGainState< StateType > computeState(MyNode &node) = 0;
00287 
00303   virtual void constructCrossValidationLists(vector<MyNodeIdList> &cvLists) = 0;
00304 
00305   // this method is not const since it may involve collapsing node states.
00306   virtual bool hasConverged(const MyNodeIdList &nodesToAnnotate)  = 0;
00307 
00308   virtual void crossValidate(const BioFunction &function);
00309 
00317   virtual void maskNodeStates(const MyNodeIdList &nodes) = 0;
00318 
00326   //virtual void unmaskNodeStates(const MyNodeIdList &nodes) = 0;
00327 
00328 
00340   virtual void evaluatePredictions(MyAnnotations &newAnnotations);
00341 
00342 
00343 
00346 //  static
00347   virtual string getName() = 0;
00348 
00349 
00350   virtual void initialiseNodeStates(const BioFunction &function,
00351                                     MyNodeIdList &nodesToAnnotate) = 0;
00352 
00355   virtual void pipeline(const BioFunction &function);
00356 
00357 
00358   virtual void predict(const BioFunction &function);
00359 
00360   virtual void run(const MyNodeIdList &nodesToAnnotate) = 0;
00361 
00375   virtual void setCrossValidationList(MyNodeIdList &nodesToAnnotate,
00376                                       const MyNodeIdList &cvList);
00377 
00380 //  virtual void visualise(BioFunction &currentFunction, MyNodeIdList &nodesToAnnotate);
00381   virtual void visualiseCrossValidation(const BioFunction &currentFunction);
00382 
00390   virtual void visualiseCut(const BioFunction &currentFunction,
00391                             MyNodeIdList &predictedNodes);
00392   virtual void visualisePredictions(const BioFunction &currentFunction,
00393                                     MyNodeIdList &nodesToAnnotate);
00394 
00395   virtual void initialiseEdgeWeights(const BioFunction &function)
00396     {
00397 #ifdef DEBUG
00398 //                _graph._checkNodeTotalEdgeWeight();
00399 #endif
00400       if (_params.weightEdgeTypesCutoff)
00401         initialiseEdgeWeightsCutoff(function);
00402       else if (_params.weightEdgeTypesDepth)
00403         initialiseEdgeWeightsDepth(function);
00404       // murali, sep 12, 2007.
00405 #ifdef DEBUG
00406 //                _graph._checkNodeTotalEdgeWeight();
00407 #endif
00408     }
00409   virtual void initialiseEdgeWeightsCutoff(const BioFunction &function) {};
00410   virtual void initialiseEdgeWeightsDepth(const BioFunction &function) {};
00411 
00412   virtual void removeUnpredictableNodes(const MyNodeIdSet &unpredictableNodes, MyNodeIdList &oldNodes)
00413     {
00414       MyNodeIdList newNodes;
00415       for (MyNodeIdList::iterator nitr = oldNodes.begin(); nitr != oldNodes.end(); nitr++)
00416         if (unpredictableNodes.end() == unpredictableNodes.find(*nitr))
00417           newNodes.push_back(*nitr);
00418       oldNodes = newNodes;
00419     }
00420 };
00421 
00422 
00423 // template< typename StateType >
00424 // void MyAbstractGainAlgo< StateType >::comparePredictions(MyReallyAbstractGainAlgo &other)
00425 // {
00426 //   comparePredictions(dynamic_cast< MyAbstractGainAlgo >(other));
00427 // }
00428 
00429 // template< typename StateType >
00430 // void MyAbstractGainAlgo< StateType >::comparePredictions(MyAbstractGainAlgo &other)
00431 // {
00432 //   typename map< MyNodeId, MyGainStateInfo< MyGainState< StateType > > >::iterator
00433 //     sitr, oitr;
00434 //   _params.logStream << "Comparing predictions for algorithms "
00435 //                     << getName() << " and " << other.getName()
00436 //                     << " for function " << _currentFunction << endl;
00437 //   // compare initial and final states.
00438 //   for (sitr = _states.begin(); sitr != _states.end(); sitr++)
00439 //     {
00440 //       oitr = other._states.find(sitr->first);
00441 //       if (other._states.end() == oitr)
00442 //         _params.logStream << other.getName() << " does not have a state for node " << sitr->first << endl;
00443 //       if (sitr->second.getRealState() != oitr->second.getRealState())
00444 //         _params.logStream << " Node " << sitr->first << "has getReal state "
00445 //                           << sitr->second.getRealState()
00446 //                           << " for " << getName() << " but "
00447 //                           << oitr->second.getRealState()
00448 //                           << " for " << other.getName() << endl;
00449 //       if (sitr->second.getInitialState() != oitr->second.getInitialState())
00450 //         _params.logStream << " Node " << sitr->first << "has getInitial state "
00451 //                           << sitr->second.getInitialState()
00452 //                           << " for " << getName() << " but "
00453 //                           << oitr->second.getInitialState()
00454 //                           << " for " << other.getName() << endl;
00455 //       if (sitr->second.getCurrentState() != oitr->second.getCurrentState())
00456 //         _params.logStream << " Node " << sitr->first << "has getCurrent state "
00457 //                           << sitr->second.getCurrentState()
00458 //                           << " for " << getName() << " but "
00459 //                           << oitr->second.getCurrentState()
00460 //                           << " for " << other.getName() << endl;
00461 //     }
00462 // }
00463 
00464 
00465 template< typename StateType >
00466 void MyAbstractGainAlgo< StateType >::crossValidate(const BioFunction &function)
00467 {
00468         // BUG: even if this is done inside the loop it still has to be done here or --only-cv breaks
00469   MyNodeIdList hypotheticalNodes;
00470   initialiseNodeStates(function, hypotheticalNodes);
00471 
00472   // each algorithm needs to decide how to build its cross validation
00473   // lists. for example, MyOneVersusNoneGainAlgo does not have any
00474   // proteins in state NOT_ANNOTATED_STATE.
00475   // each MyNodeIdList is a list of nodes i should erase. for loocv,
00476   // each MyNodeIdList will contain a single node.
00477   vector< MyNodeIdList > cvLists;
00478   constructCrossValidationLists(cvLists);
00479 
00480   vector< MyNodeIdList >::iterator itr;
00481   for (itr = cvLists.begin(); itr != cvLists.end(); itr++)
00482     {
00483       // Need to reinit each time until unmasking works.
00484       // When it does, just set nodesToAnnotate = hypotheticalNodes.
00485       // Then masking and setCVList will take care of business without needing to reinit.
00486       MyNodeIdList nodesToAnnotate, dummyNodesToAnnotate;
00487       initialiseNodeStates(function, nodesToAnnotate);
00488 
00489       // "forget" the existing annotations of the nodes in *itr and add them to nodesToAnnotate.
00490       maskNodeStates(*itr);
00491 
00492       MyNodeIdSet unpredictableNodes;
00493       MyGainGraph reducedGraph;
00494       if (!_params.getDoNotReduce())
00495         {
00496           _graph.reduce2(_params.logStream, _initialStateTypes, _params, reducedGraph, unpredictableNodes);
00497 #ifdef DEBUG
00498           _params.logStream << "\n\t" << getName() << ": after reducing the FLN for cross validation, found " << unpredictableNodes.size() << " unpredictable nodes" << flush;
00499 #endif // DEBUG
00500           // remove unpredictableNodes from nodesToAnnotate AND the current cvList INDIVIDUALLY.
00501           // computeCrossValidationResults needs the modified cvList so that we don't evaulate unpredictable nodes.
00502           removeUnpredictableNodes(unpredictableNodes, nodesToAnnotate);
00503           removeUnpredictableNodes(unpredictableNodes, *itr);
00504         }
00505 
00506       setCrossValidationList(nodesToAnnotate, *itr);
00507 
00508       initialiseEdgeWeights(function);
00509 
00510       // run.
00511       run(nodesToAnnotate);
00512       computeCrossValidationResults(function, *itr);
00513 
00514       //visualiseCVTemp(function, *itr);
00515 
00516       // now "remember" the existing annotations of the nodes in *itr.
00517       //unmaskNodeStates(*itr);
00518     }
00519 }
00520 
00521 
00522 template< typename StateType >
00523 void MyAbstractGainAlgo< StateType >::evaluatePredictions(MyAnnotations &newAnnotations)
00524 {
00525   _reporter.evaluatePredictions(getName(), _annotations, newAnnotations, *_go);
00526 }
00527 
00530 template< typename StateType >
00531 void MyAbstractGainAlgo< StateType >::pipeline(const BioFunction &function)
00532 {
00533   // predict. visualise, if necessary, inside predict.
00534   if (!_params.onlyCrossVal)
00535     {
00536       predict(function);
00537       _reporter.printPredictions(_params.predictionsStream, _params.getNumPredictionsToPrintPerFunction());
00538     }
00539 
00540   // cross validate visualise, if necessary, inside crossValidate.
00541   if (!_params.onlyPredictions)
00542     {
00543       crossValidate(function);
00544       // tmm, 2008-03-25. no need to print these results, since we are
00545       // invoking Reporter::printCurveDataFromCV() in gain.C
00546 //      _reporter.printCVResults(_params.cvStream);
00547       //_reporter.printDetailedCVResults(_params.detailedCVStream);
00548     }
00549 
00550 // //   // compute probabilities of predictions based solely on distribution
00551 // //   // of scores for nodes in states 0 and nodes in other states.
00552 // //   computePredictionProbabilities(function);
00553 
00554   // randomise and/or statistical significance
00555 
00556   // report.
00557   /*if (!_params.onlyCrossVal)
00558     // send function so that i print info only for this function.
00559     //_reporter.printPredictions(_params.predictionsStream, &function);
00560     //_reporter.flushPredictions(_params.predictionsStream);
00561   if (!_params.onlyPredictions)
00562   {
00563     // send function so that i print info only for this function.
00564     //_reporter.printCVResults(_params.cvStream, &function);
00565     //_reporter.flushCVResults(_params.cvStream);
00566     //_reporter.flushDetailedCVResults(_params.detailedCVStream);
00567   }*/
00568 }
00569 
00570 template< typename StateType >
00571 void MyAbstractGainAlgo< StateType >::predict(const BioFunction &function)
00572 {
00573    MyNodeIdList nodesToAnnotate;
00574    initialiseNodeStates(function, nodesToAnnotate);
00575 
00576    MyNodeIdSet unpredictableNodes;
00577    MyGainGraph reducedGraph;
00578    if (!_params.getDoNotReduce())
00579      {
00580        _graph.reduce2(_params.logStream, _initialStateTypes, _params, reducedGraph, unpredictableNodes);
00581 #ifdef DEBUG
00582        _params.logStream << "\n\t" << getName() << ": after reducing the FLN for prediction, found " << unpredictableNodes.size() << " unpredictable nodes and " << flush;
00583 #endif // DEBUG
00584        // remove unpredictableNodes from nodesToAnnotate.
00585        removeUnpredictableNodes(unpredictableNodes, nodesToAnnotate);
00586        _params.logStream << nodesToAnnotate.size() << " predictable nodes." << endl;
00587      }
00588 
00589    initialiseEdgeWeights(function);
00590 
00591    run(nodesToAnnotate);
00592    MyNodeIdList predictedNodes;
00593    // also stores predictions.
00594    computePredictions(function, nodesToAnnotate, predictedNodes);
00595 //   computePredictionConfidences(function, nodesToAnnotate, predictedNodes);
00596 
00597    if (_params.visualisePredictions)
00598     visualisePredictions(function, nodesToAnnotate);
00599    if (_params.visualiseCut)
00600      visualiseCut(function, predictedNodes);
00601 }
00602 
00603 template< typename StateType >
00604 void MyAbstractGainAlgo< StateType >::setCrossValidationList(
00605   MyNodeIdList &nodesToAnnotate, const MyNodeIdList &cvList)
00606 {
00607   // add the nodes in cvList to nodesToAnnotate.
00608   nodesToAnnotate.insert(nodesToAnnotate.end(), cvList.begin(), cvList.end());
00609 }
00610 
00611 // template< typename StateType >
00612 // void MyAbstractGainAlgo< StateType >::visualise(BioFunction &currentFunction,
00613 //                                    MyNodeIdList &nodesToAnnotate)
00614 // {
00615 //   if (!_params.onlyCrossVal)
00616 //     visualisePredictions(currentFunction, nodesToAnnotate);
00617 // }
00618 
00619 template< typename StateType >
00620 void MyAbstractGainAlgo< StateType >::visualiseCrossValidation(
00621   const BioFunction &currentFunction)
00622 {
00623   cerr << "MyAbstractGainAlgo< StateType >::visualiseCrossValidation not implemented."
00624        << endl;
00625 }
00626 
00627 template< typename StateType >
00628 void MyAbstractGainAlgo< StateType >::visualiseCut(
00629   const BioFunction &currentFunction, MyNodeIdList &nodesToAnnotate)
00630 {
00631   cerr << "MyAbstractGainAlgo< StateType >::visualiseCut not implemented."
00632        << endl;
00633 }
00634 
00635 template< typename StateType >
00636 void MyAbstractGainAlgo< StateType >::visualisePredictions(
00637   const BioFunction &currentFunction, MyNodeIdList &nodesToAnnotate)
00638 {
00639   cerr << "MyAbstractGainAlgo< StateType >::visualisePredictions not implemented."
00640        << endl;
00641 }
00642 
00643 
00653 //class MyOneVersusAllGainAlgo : public MyAbstractGainAlgo
00654 class MyOneVersusAllGainAlgo : public MyAbstractGainAlgo< MyGainTriStateType >
00655 {
00656 protected:
00657   // can i allow approximate convergence? this algorithm will but not
00658   // MyOneVersusAllHopfieldGainAlgo will.
00659   bool _allowApproximateConvergence;
00660   map< BioFunction, MyNT > _finalEnergies;
00661   map< BioFunction, MyNT > _initialEnergies;
00662 
00663   // for each possible initial state, store the list of nodes in that initial state.
00664 //  map< MyGainTriStateType, MyNodeIdList > _nodesByInitialStateType;
00665 
00666   map< BioFunction, unsigned int > _numIterations;
00667   map< BioFunction, unsigned int > _numOperations;
00668 
00669 
00670   unsigned int _numTruePathViolatingAnnotations;
00671   unsigned int _numTruePathViolatingPredictions;
00672 
00673   // map from function ids to genes for annotations that violate the true path rule.
00674   map< string, set < string > > _truePathViolatingAnnotations;
00675   // map from function ids to genes for predictions that violate the true path rule.
00676   map< string, set < string > > _truePathViolatingPredictions;
00677 
00678 
00679 public:
00680   MyOneVersusAllGainAlgo(MyGainGraph &g, MyAnnotations &a, MyGainParams &p, Reporter &r,
00681                          GeneOntology *go = NULL)
00682       : MyAbstractGainAlgo< MyGainTriStateType >(g, a, p, r, go),
00683         _allowApproximateConvergence(true),
00684         _finalEnergies(), _initialEnergies(),
00685         _numIterations(),
00686         _numTruePathViolatingAnnotations(0), _numTruePathViolatingPredictions(0),
00687         _truePathViolatingAnnotations(), _truePathViolatingPredictions()
00688     {}
00689   virtual ~MyOneVersusAllGainAlgo()
00690     {}
00691 
00692 
00693   virtual void computeCrossValidationResults(const BioFunction &function,
00694                                              const MyNodeIdList &cvNodes);
00695 
00696   virtual void computeCut(MyNodeIdList &nodesToAnnotate,
00697                           MyNodeIdSet &cutNodes);
00698 
00711   virtual MyNT computeEnergy(const MyNodeIdList &nodesToAnnotate);
00712 
00713 
00714   virtual void computePredictionConfidences(const BioFunction &function,
00715                                             MyNodeIdList &nodesToAnnotate,
00716                                             MyNodeIdList &predictedNodes);
00717 
00718   virtual void computePredictions(const BioFunction &function,
00719                                   const MyNodeIdList &nodesToAnnotate,
00720                                   MyNodeIdList &predictedNodes);
00721 //   template< typename StateType >
00722 //   MyGainState< StateType > computeState(MyNode &node);
00723   virtual MyGainTriState computeState(MyNode &node);
00724 
00725   virtual void constructCrossValidationLists(vector<MyNodeIdList> &cvLists);
00726 
00731   virtual bool hasConverged(const MyNodeIdList &nodesToAnnotate);
00732 
00733   virtual void maskNodeStates(const MyNodeIdList &nodes);
00734   //virtual void unmaskNodeStates(const MyNodeIdList &nodes);
00735 
00738 //  static
00739   virtual string getName()
00740     {
00741       return("OneVersusAllLinearSystem");
00742     }
00743 
00752   virtual void initialiseNodeStates(const BioFunction &function,
00753                                     MyNodeIdList &nodesToAnnotate);
00754 
00761   virtual void run(const MyNodeIdList &nodesToAnnotate);
00762 
00763 
00764 //  virtual void visualise(BioFunction &currentFunction, MyNodeIdList &nodesToAnnotate);
00765   virtual void visualisePredictions(const BioFunction &currentFunction,
00766                                     MyNodeIdList &nodesToAnnotate);
00767   virtual void visualiseCut(const BioFunction &currentFunction,
00768                             MyNodeIdList &predictedNodes);
00769 
00770   // these two methods cannot be const since they involve calls to
00771   // MyGainState::collapse().
00772 //   virtual void printNodesSpring(MyGraph &propagationGraph,
00773 //                                 string file, string currentNode);
00774 //   virtual void printEdgesSpring(MyGraph &propagationGraph, string file);
00775 
00780   virtual void printStatistics(ostream &statsStream);
00781 
00782         virtual void initialiseEdgeWeightsCutoff(const BioFunction &function);
00783         virtual void initialiseEdgeWeightsDepth(const BioFunction &function);
00784 
00785 };
00786 
00787 
00788 
00789 
00834 class MyOneVersusNoneGainAlgo : public MyOneVersusAllGainAlgo
00835 {
00836 protected:
00837 
00838 public:
00839   MyOneVersusNoneGainAlgo(MyGainGraph &g, MyAnnotations &a, MyGainParams &p, Reporter &r, GeneOntology *go = NULL)
00840       : MyOneVersusAllGainAlgo(g, a, p, r, go)
00841     {}
00842 
00843   virtual ~MyOneVersusNoneGainAlgo()
00844     {}
00845 
00846   virtual void initialiseNodeStates(const BioFunction &function,
00847                             MyNodeIdList &nodesToAnnotate);
00848 
00849   virtual void setCrossValidationList(MyNodeIdList &nodesToAnnotate, const MyNodeIdList &cvList);
00850 
00853 //  static
00854   virtual string getName()
00855     {
00856       return("OneVersusNoneLinearSystem");
00857     }
00858 };
00859 
00860 
00861 
00862 #if 0
00863 
00877 class MyOneVersusNoneGainAlgo : public MyAbstractGainAlgo< MyGainBiStateType >
00878 {
00879 protected:
00880 
00881 public:
00882   MyOneVersusNoneGainAlgo(MyGainGraph &g, MyAnnotations &a, MyGainParams &p, Reporter &r)
00883       : MyAbstractGainAlgo< MyGainBiStateType >(g, a, p, r)
00884     {}
00885 
00886   virtual ~MyOneVersusNoneGainAlgo()
00887     {}
00888 
00889   virtual void computeCrossValidationResults(const BioFunction &function,
00890                                              const MyNodeIdList &cvNodes);
00891 
00892   virtual void computePredictionConfidences(const BioFunction &function,
00893                                             MyNodeIdList &nodesToAnnotate,
00894                                             MyNodeIdList &predictedNodes);
00895 
00896   virtual void computePredictions(const BioFunction &function,
00897                                   const MyNodeIdList &nodesToAnnotate,
00898                                   MyNodeIdList &predictedNodes);
00899 //   template< typename StateType >
00900 //   MyGainState< StateType > computeState(MyNode &node);
00901   virtual MyGainBiState computeState(MyNode &node);
00902 
00903   virtual void constructCrossValidationLists(vector<MyNodeIdList> &cvLists);
00904 
00905   virtual bool hasConverged(const MyNodeIdList &nodesToAnnotate);
00906 
00907   virtual void maskNodeStates(const MyNodeIdList &nodes);
00908   //virtual void unmaskNodeStates(const MyNodeIdList &nodes);
00909 
00912 //  static
00913   virtual string getName()
00914     {
00915       return("OneVersusNoneLinearSystem");
00916     }
00917 
00933   virtual void initialiseNodeStates(const BioFunction &function,
00934                                     MyNodeIdList &nodesToAnnotate);
00935 
00942   virtual void run(const MyNodeIdList &nodesToAnnotate);
00943 
00944 };
00945 
00946 
00966 class MyOneVersusNoneFunctionalFlowGainAlgo : public MyOneVersusNoneGainAlgo
00967 {
00968 protected:
00969   // a map from a node to the size of the reservoir at that node. the
00970   // _states variable will take care of the total inflow into the
00971   // node.
00972   map< MyNodeId, MyNT > _currentNodeReservoirs, _previousNodeReservoirs;
00973   // a map from a node to the total flow into the node. this variable
00974   // is just a helper (for debugging, to make sure that _states stores
00975   // the right info).
00976   map< MyNodeId, MyNT > _totalInflow;
00977 
00978 public:
00979   MyOneVersusNoneFunctionalFlowGainAlgo(MyGainGraph &g, MyAnnotations &a, MyGainParams &p, Reporter &r)
00980       : MyOneVersusNoneGainAlgo(g, a, p, r), _currentNodeReservoirs(), _previousNodeReservoirs(), _totalInflow()
00981     {}
00982 
00983   virtual ~MyOneVersusNoneFunctionalFlowGainAlgo()
00984     {}
00985 
00999   virtual void computePredictions(const BioFunction &function,
01000                                   const MyNodeIdList &nodesToAnnotate,
01001                                   MyNodeIdList &predictedNodes);
01002   virtual MyGainBiState computeState(MyNode &node);
01003 
01004 
01007 //  static
01008   virtual string getName()
01009     {
01010       return("OneVersusNoneFunctionalFlow");
01011     }
01012 
01013   virtual void run(const MyNodeIdList &nodesToAnnotate);
01014 };
01015 #endif
01016 
01017 
01032 #if(0)
01033 class MyOneVersusNoneHeavisideGainAlgo : public MyOneVersusNoneGainAlgo
01034 {
01035 private:
01036     // this map stores the computed node thresholds. this map also keeps
01037   // track of which nodes i have already processed, so that i can
01038   // avoid processing them again. (a node may have multiple entries in
01039   // the pq, since it can get inserted whenever one of its neighbours
01040   // changes to a 1.)
01041   map< MyNodeId, MyNT > _nodeThresholds;
01042 
01043 public:
01044   MyOneVersusNoneHeavisideGainAlgo(MyGainGraph &g, MyAnnotations &a, MyGainParams &p, Reporter &r)
01045       : MyOneVersusNoneGainAlgo(g, a, p, r)
01046     {}
01047 
01048   virtual ~MyOneVersusNoneHeavisideGainAlgo()
01049     {}
01050 
01063   virtual void computePredictions(const BioFunction &function,
01064                                   const MyNodeIdList &nodesToAnnotate,
01065                                   MyNodeIdList &predictedNodes);
01066 
01071   virtual MyGainBiState computeState(MyNode &node);
01072 
01075 //  static
01076   virtual string getName()
01077     {
01078       return("OneVersusNoneHeaviside");
01079     }
01080 
01081 
01082   virtual void run(const MyNodeIdList &nodesToAnnotate);
01083 };
01084 #endif
01085 
01093 #if(0)
01094 class MyOneVersusNoneShortestPathGainAlgo : public MyOneVersusNoneGainAlgo
01095 {
01096 private:
01097     // this map stores the computed node thresholds. this map also keeps
01098   // track of which nodes i have already processed, so that i can
01099   // avoid processing them again. (a node may have multiple entries in
01100   // the pq, since it can get inserted whenever one of its neighbours
01101   // changes to a 1.)
01102   map< MyNodeId, MyNT > _nodeThresholds;
01103 
01104   // the graph where the edge weight in the length of the shortest
01105   // path in _graph. this graph only needs to have edges between nodes
01106   // in state HYPOTHETICAL_STATE and nodes in state ANNOTATED_STATE.
01107   MyGraph _shortestPathGraph;
01108   // a subgraph of _shortestPathGraph that only connects nodes in +1
01109   // state. this graph is useful for cross validation.
01110   MyGraph _shortestPathGraphPositives;
01111 
01112 
01113 public:
01114   MyOneVersusNoneShortestPathGainAlgo(MyGainGraph &g, MyAnnotations &a, MyGainParams &p, Reporter &r)
01115       : MyOneVersusNoneGainAlgo(g, a, p, r), _nodeThresholds(), _shortestPathGraph(),
01116         _shortestPathGraphPositives()
01117     {}
01118 
01119   virtual ~MyOneVersusNoneShortestPathGainAlgo()
01120     {}
01121 
01122 //   virtual void computePredictions(const BioFunction &function,
01123 //                                   const MyNodeIdList &nodesToAnnotate,
01124 //                                   MyNodeIdList &predictedNodes);
01125 //   template< typename StateType >
01126 //   MyGainState< StateType > computeState(const MyNode &node);
01127 
01128 //  virtual bool hasConverged(const MyNodeIdList &nodesToAnnotate);
01129 
01132 //  static
01133   virtual
01134   string getName()
01135     {
01136       return("OneVersusNoneShortestPath");
01137     }
01138 
01139   virtual void computePredictions(const BioFunction &function,  const MyNodeIdList &nodesToAnnotate,
01140                                   MyNodeIdList &predictedNodes);
01141 
01142 //   template< typename StateType >
01143 //   MyGainState< StateType > computeState(MyNode &node);
01144   MyGainBiState computeState(MyNode &node);
01145 
01149   virtual void initialiseNodeStates(const BioFunction &function,
01150                                     MyNodeIdList &nodesToAnnotate);
01151 
01152   virtual void run(const MyNodeIdList &nodesToAnnotate);
01153 
01154   virtual void setCrossValidationList(MyNodeIdList &nodesToAnnotate,
01155                                       const MyNodeIdList &cvList);
01156 };
01157 
01158 #endif
01159 
01160 
01161 
01168 class MyOneVersusAllLocalGainAlgo : public MyOneVersusAllGainAlgo
01169 {
01170 protected:
01171 
01172 public:
01173   MyOneVersusAllLocalGainAlgo(MyGainGraph &g, MyAnnotations &a, MyGainParams &p, Reporter &r,
01174                               GeneOntology *go = NULL)
01175       : MyOneVersusAllGainAlgo(g, a, p, r, go)
01176     {
01177       // there is no sense of convergence here.
01178       _allowApproximateConvergence = true;
01179     }
01180 
01181   virtual ~MyOneVersusAllLocalGainAlgo()
01182     {}
01183 
01184   MyGainTriState computeState(MyNode &node);
01185 
01188 //  static
01189   virtual string getName()
01190     {
01191       return("OneVersusAllLocal");
01192     }
01193 
01194   // i need to implement this method just so that i can call the
01195   // computeState method for this class and not for the parent class.
01196   virtual void run(const MyNodeIdList &nodesToAnnotate);
01197 
01198 };
01199 
01200 
01218 class MyOneVersusAllHopfieldGainAlgo : public MyOneVersusAllGainAlgo
01219 {
01220 protected:
01221 
01222 public:
01223   MyOneVersusAllHopfieldGainAlgo(MyGainGraph &g, MyAnnotations &a, MyGainParams &p, Reporter &r,
01224                                  GeneOntology *go = NULL)
01225       : MyOneVersusAllGainAlgo(g, a, p, r, go)
01226     {
01227       _allowApproximateConvergence = false;
01228     }
01229 
01230   virtual ~MyOneVersusAllHopfieldGainAlgo()
01231     {}
01232 
01233 //   virtual void computeCrossValidationResults(const BioFunction &function,
01234 //                                              const MyNodeIdList &cvNodes);
01235 
01236 //   virtual void computePredictionProbabilities(const BioFunction &function);
01237 
01238 //   virtual void computePredictions(const BioFunction &function,
01239 //                                   const MyNodeIdList &nodesToAnnotate,
01240 //                                   MyNodeIdList &predictedNodes);
01241 //  template< typename StateType >
01242 //  MyGainState< StateType > computeState(MyNode &node);
01243   MyGainTriState computeState(MyNode &node);
01244 
01249 //  virtual bool hasConverged(const MyNodeIdList &nodesToAnnotate);
01250 
01251 //  virtual void maskNodeStates(const MyNodeIdList &nodes);
01252 //  virtual void unmaskNodeStates(const MyNodeIdList &nodes);
01253 
01256 //  static
01257   virtual string getName()
01258     {
01259       return("OneVersusAllHopfield");
01260     }
01261 
01262   // i need to implement this method just so that i can call the
01263   // computeState method for this class and not for the parent class.
01264   virtual void run(const MyNodeIdList &nodesToAnnotate);
01265 
01266 };
01267 
01268 
01275 typedef struct
01276 {} MyGainMinCutInfo;
01277 
01278 
01279 class MyOneVersusAllMincutGainAlgo : public MyOneVersusAllGainAlgo
01280 {
01281 protected:
01282   // set states of nodesToAnnotate based on sinkSideNodes.
01283   void _setStates(const MyNodeIdList &nodesToAnnotate, const set< MyNodeId > &sinkSideNodes);
01284 
01285 public:
01286 
01287   virtual ~MyOneVersusAllMincutGainAlgo()
01288     {}
01289 
01290 //  static
01291   virtual string getName()
01292     {
01293       return("OneVersusAllMincut");
01294     }
01295 
01296   // i need to implement this method just so that i can call the
01297   // computeState method for this class and not for the parent class.
01298   virtual void run(const MyNodeIdList &nodesToAnnotate);
01299 
01300 };
01301 
01302 
01317 class MyOneVersusAllSemiHierarchicalHopfieldGainAlgo : public  MyOneVersusAllHopfieldGainAlgo
01318 {
01319 protected:
01320   // history of results.
01321 //  map< BioFunction, map< MyNodeId, MyGainTriStateInfo > > _allStateWeights;
01322   map< BioFunction, map< MyNodeId, MyGainTriStateType > > _allInitialStates;
01323   map< BioFunction, map< MyNodeId, MyGainTriStateType > > _allFinalStates;
01324   // permutation of nodes.
01325   vector< MyNodeId > _globalNodePermutation;
01326   // mapping from node id to position in the permutation.
01327   map< MyNodeId, unsigned int > _globalNodePermutationMap;
01328 
01329 
01330   // for each function, count the number of its children that the
01331   // algorithm has processed. _storeResults() uses this variable to
01332   // reclaim memory
01333   map< GOFunction *, unsigned int > _numProcessedChildren;
01334 
01335   // counts of numbers stored in the next two maps.
01336   unsigned int _numStatesFixedAtStart;
01337   // map from function ids to genes storing those function-gene pairs
01338   // for which i am able to fix the state while initialising node
01339   // states.
01340   map< string, set < string > > _statesFixedAtStart;
01341 
01342 public:
01343   MyOneVersusAllSemiHierarchicalHopfieldGainAlgo(MyGainGraph &g, MyAnnotations &a, MyGainParams &p, Reporter &r,
01344                                                  GeneOntology *go)
01345       : MyOneVersusAllHopfieldGainAlgo(g, a, p, r, go),
01346 //        _allStateWeights(),
01347         _allFinalStates(), _allInitialStates(),
01348         _globalNodePermutation(), _globalNodePermutationMap(),
01349         _numProcessedChildren(), _numStatesFixedAtStart(0),
01350         _statesFixedAtStart()
01351     {}
01352 
01353   virtual ~MyOneVersusAllSemiHierarchicalHopfieldGainAlgo()
01354     {}
01355 
01356 //   virtual void computeCrossValidationResults(const BioFunction &function,
01357 //                                              const MyNodeIdList &cvNodes);
01358 
01359 //   virtual void computePredictionProbabilities(const BioFunction &function);
01360 
01361   // the only reason for implementing this method in this class is to
01362   // have some place to store the results for the current function in
01363   // _allStateWeights.
01364   virtual void computePredictions(const BioFunction &function,
01365                                   const MyNodeIdList &nodesToAnnotate,
01366                                   MyNodeIdList &predictedNodes);
01367 
01376   virtual void initialiseNodeStates(const BioFunction &function,
01377                                     MyNodeIdList &nodesToAnnotate);
01378 
01381 //  static
01382   virtual string getName()
01383     {
01384       return("OneVersusAllSemiHierarchicalHopfield");
01385     }
01386   virtual void initialiseAlgorithm()
01387     {
01388       MyGraph::NodeIterator nitr = _graph.nodes();
01389       // i have the graph. create a random permutation of the nodes.
01390       while (nitr.hasNext())
01391         _globalNodePermutation.push_back(nitr.next().getId());
01392       random_shuffle(_globalNodePermutation.begin(), _globalNodePermutation.end());
01393       unsigned int index = 0;
01394       // compute map from node id to index in the permutation.
01395       for (vector< MyNodeId >::iterator gnpItr = _globalNodePermutation.begin();
01396            gnpItr != _globalNodePermutation.end(); gnpItr++)
01397         _globalNodePermutationMap[*gnpItr] = index++;
01398     }
01399 
01400   virtual void printStatistics(ostream &statsStream);
01401 
01402 //   virtual void run(const MyNodeIdList &nodesToAnnotate);
01403 
01404 protected:
01405     
01406   // store the current value of _states as results
01407   void _storeResults(const BioFunction &function);
01408 };
01409 
01410 
01440 class MyOneVersusAllHierarchicalHopfieldGainAlgo : public  MyOneVersusAllSemiHierarchicalHopfieldGainAlgo
01441 {
01442 protected:
01443 
01444 public:
01445   MyOneVersusAllHierarchicalHopfieldGainAlgo(MyGainGraph &g, MyAnnotations &a,
01446                                              MyGainParams &p, Reporter &r, GeneOntology *go)
01447       : MyOneVersusAllSemiHierarchicalHopfieldGainAlgo(g, a, p, r, go)
01448     {
01449     }
01450 
01451   virtual ~MyOneVersusAllHierarchicalHopfieldGainAlgo()
01452     {}
01453 
01454 //   virtual void computeCrossValidationResults(const BioFunction &function,
01455 //                                              const MyNodeIdList &cvNodes);
01456 
01457 //   virtual void computePredictionProbabilities(const BioFunction &function);
01458 
01459 //   virtual void computePredictions(const BioFunction &function,
01460 //                                   const MyNodeIdList &nodesToAnnotate,
01461 //                                   MyNodeIdList &predictedNodes);
01462 
01463 //   virtual void initialiseNodeStates(const BioFunction &function,
01464 //                                     MyNodeIdList &nodesToAnnotate);
01465 
01468 //  static
01469   virtual string getName()
01470     {
01471       return("OneVersusAllHierarchicalHopfield");
01472     }
01473 
01474 
01475 //   virtual void run(const MyNodeIdList &nodesToAnnotate);
01476 
01477 };
01478 
01479 
01480 typedef map< MyNodeId, unsigned int> MyFeatureNumberMap;
01481 typedef map< unsigned int, MyNodeId > MyFeatureNameMap;
01482 typedef map< MyNodeId, double > MyVectorWeightMap;
01483 typedef map< unsigned int, double > MySeparatorCoefficientMap;
01484 typedef enum {SUPERVISED, SEMI_SUPERVISED} MyLearnMode;
01485 
01486 class MyOneVersusAllAbstractSVMGainAlgo : public MyOneVersusAllGainAlgo
01487 {
01488         protected:
01489                 string trainBinaryFilename;
01490                 string trainArgs;
01491                 string trainInputFilename;
01492                 string modelFilename;
01493                 string testBinaryFilename;
01494                 string testArgs;
01495                 string testInputFilename;
01496                 string testOutputFilename;
01497 
01498                 MyFeatureNumberMap _featureNameToNumber;
01499                 MyFeatureNameMap _featureNumberToName;
01500                 MyVectorWeightMap _vectorWeights;
01501                 MySeparatorCoefficientMap _separatorCoefficients;
01502                 double _threshold;
01503                 MyLearnMode _learnMode;
01504                 bool _onboardPrediction;
01505 
01506                 virtual void _train (const MyNodeIdList &nodesToAnnotate);
01507                 virtual void _predict (const MyNodeIdList &nodesToAnnotate);
01508 
01509                 virtual void _createTrainInputFile (const MyNodeIdList &nodesToAnnotate);
01510 
01511                 virtual void _createTestInputFile (const MyNodeIdList &nodesToAnnotate);
01512 
01513                 virtual void _readTestOutputFile (const MyNodeIdList &nodesToAnnotate);
01514 
01515                 virtual void _createSVMOutputLine(ostream &ostr, MyGainTriStateType state, MyNode &node, bool includeHypothetical);
01516 
01517                 virtual void _addComment (ostream &ostr, MyNode &node) = 0;
01518 
01519                 virtual void _readModelFile () = 0;
01520 
01521                 virtual double _calculateSVMResult (MyNode &node);
01522 
01523                 virtual void _clearMaps();
01524 
01525         public:
01526                 MyOneVersusAllAbstractSVMGainAlgo(MyGainGraph &g, MyAnnotations &a, MyGainParams &p, Reporter &r, GeneOntology *go = NULL);
01527 
01528                 virtual void run(const MyNodeIdList &nodesToAnnotate);
01529 
01530                 virtual void visualiseCut(const BioFunction &currentFunction, MyNodeIdList &predictedNodes);
01531 
01532                 virtual void visualiseCVTemp(const BioFunction &currentFunction, MyNodeIdList &nodesToAnnotate);
01533 
01534                 virtual void setCrossValidationList(MyNodeIdList &nodesToAnnotate, const MyNodeIdList &cvList);
01535 
01536         private:
01537 
01538 };
01539 
01540 
01541 
01542 
01543 
01544 class MyOneVersusAllLibSVMGainAlgo : public MyOneVersusAllAbstractSVMGainAlgo
01545 {
01546         protected:
01547                 virtual void _addComment (ostream &ostr, MyNode &node) {}
01548 
01549                 virtual void _readModelFile () {_onboardPrediction = 0;}
01550 
01551         public:
01552                 MyOneVersusAllLibSVMGainAlgo(MyGainGraph &g, MyAnnotations &a, MyGainParams &p, Reporter &r, GeneOntology *go = NULL)
01553                 : MyOneVersusAllAbstractSVMGainAlgo(g, a, p, r, go)
01554                 {
01555                         trainBinaryFilename = _params.getLibSVMDirectory() + "svm-train";
01556                         if ("" == (trainArgs =  _params.getLibSVMTrainOptions())) trainArgs = "-t 0 -c 674";
01557                         trainInputFilename = _params.getOutputDirectory() + "LIBSVM_TRAIN";
01558                         modelFilename = _params.getOutputDirectory() + "LIBSVM_MODEL";
01559                         testBinaryFilename = _params.getLibSVMDirectory() + "svm-predict";
01560                         testArgs = _params.getLibSVMTestOptions();
01561                         testInputFilename = _params.getOutputDirectory() + "LIBSVM_TEST";
01562                         testOutputFilename = _params.getOutputDirectory() + "LIBSVM_OUT";
01563                 }
01564 
01565                 virtual string getName()
01566                 {
01567                         return("OneVersusAllLibSVM");
01568                 }
01569 
01570         private:
01571 };
01572 
01573 
01574 class MyOneVersusAllSVMLightGainAlgo : public MyOneVersusAllAbstractSVMGainAlgo
01575 {
01576         protected:
01577                 virtual void _addComment (ostream &ostr, MyNode &node)
01578                 {
01579                         ostr << "# " << node.getId();
01580                 }
01581 
01582                 virtual void _readModelFile ();
01583 
01584         public:
01585                 MyOneVersusAllSVMLightGainAlgo(MyGainGraph &g, MyAnnotations &a, MyGainParams &p, Reporter &r, GeneOntology *go = NULL)
01586                 : MyOneVersusAllAbstractSVMGainAlgo(g, a, p, r, go)
01587                 {
01588                         trainBinaryFilename = _params.getSVMLightDirectory() + "svm_learn";
01589                         if ("" == (trainArgs = _params.getSVMLightTrainOptions())) trainArgs = "";//"-b 0 -c 1.5";
01590                         trainInputFilename = _params.getOutputDirectory() + "SVMLIGHT_TRAIN";
01591                         modelFilename = _params.getOutputDirectory() + "SVMLIGHT_MODEL";
01592                         testBinaryFilename = _params.getSVMLightDirectory() + "svm_classify";
01593                         testArgs = _params.getSVMLightTestOptions();
01594                         testInputFilename = _params.getOutputDirectory() + "SVMLIGHT_TEST";
01595                         testOutputFilename = _params.getOutputDirectory() + "SVMLIGHT_OUT";
01596                 }
01597 
01598                 virtual string getName()
01599                 {
01600                         return("OneVersusAllSVMLight");
01601                 }
01602 
01603         private:
01604 
01605 };
01606 
01607 class MyOneVersusAllSVMLightTransductiveGainAlgo : public MyOneVersusAllSVMLightGainAlgo
01608 {
01609         protected:
01610         public:
01611                 MyOneVersusAllSVMLightTransductiveGainAlgo(MyGainGraph &g, MyAnnotations &a, MyGainParams &p, Reporter &r, GeneOntology *go = NULL)
01612                 : MyOneVersusAllSVMLightGainAlgo(g, a, p, r, go)
01613                 {
01614                         _learnMode = SEMI_SUPERVISED;
01615                         trainArgs = _params.getSVMLightTrainOptions();
01616                 }
01617 
01618                 virtual string getName()
01619                 {
01620                         return("OneVersusAllSVMLightTransductive");
01621                 }
01622 
01623         private:
01624 
01625 };
01626 
01630 class MyOneVersusAllSinkSourceGainAlgo : public MyOneVersusAllGainAlgo
01631 {
01632 protected:
01633   const MyNT _ANN_RES, _HYP_RES, _NOT_RES;
01634   // map from node id to evidence code to weight, used to compute the
01635   // weight of the evidence codes that say that a node has a
01636   // particular function. I use this variable in MyOneVersusAllSinkSourceGainAlgo::initialiseNodeStates().
01637   //
01638   // tmm: 2010-02-16. I don't this need this any more, since I am just
01639   // getting the final weight directly from MyAnnotations. However,
01640   // there is no one unclear use of this variable in
01641   // MyOneVersusAllSinkSourceGainAlgo::run(), which I need to
01642   // understand before I can abolish this variable forever.
01643   map< string, map< string, MyNT > > _helperWeights;
01644 
01645   // edge weight for artificial sink
01646   MyNT _lambda;
01647   // edge weight for artificial source
01648   MyNT _mu;
01649 
01650   virtual void _applyArtificialSink(const MyNode &node, MyNT &num, MyNT &den)
01651   {
01652           // apply artificial sink to any node initially in not-annotated state
01653           bool isNotAnnotated(NOT_ANNOTATED_STATE == _initialStateTypes[node.getId()]);
01654           num += MyNT(isNotAnnotated)*_lambda*_NOT_RES;
01655           den += MyNT(isNotAnnotated)*_lambda;
01656   }
01657 
01658   virtual void _applyArtificialSource(const MyNode &node, MyNT &num, MyNT &den)
01659   {
01660           // apply artificial source to any node initially in annotated state
01661           bool isAnnotated(ANNOTATED_STATE == _initialStateTypes[node.getId()]);
01662           num += MyNT(isAnnotated)*_mu*_ANN_RES;
01663           den += MyNT(isAnnotated)*_mu;
01664   }
01665 
01666 public:
01670   MyOneVersusAllSinkSourceGainAlgo(MyGainGraph &g, MyAnnotations &a, MyGainParams &p, Reporter &r, GeneOntology *go = NULL)
01671       : MyOneVersusAllGainAlgo(g, a, p, r, go), _ANN_RES(1), _HYP_RES(0), _NOT_RES(0), _helperWeights(), _lambda(0), _mu(0)
01672     {}
01673 
01677   virtual ~MyOneVersusAllSinkSourceGainAlgo()
01678     {}
01679 
01680   virtual void maskNodeStates(const MyNodeIdList &nodes);
01681 
01682   virtual void initialiseNodeStates(const BioFunction &function,
01683                                     MyNodeIdList &nodesToAnnotate);
01684 
01685 
01686   virtual void computePredictions(const BioFunction &function,
01687                                   const MyNodeIdList &nodesToAnnotate,
01688                                   MyNodeIdList &predictedNodes);
01689 
01690   virtual void computeCrossValidationResults(const BioFunction &function,
01691                                              const MyNodeIdList &cvNodes);
01692 
01693   virtual MyGainTriState computeState(MyNode &node);
01694 
01695   virtual bool hasConverged(const MyNodeIdList& nodes);
01696 
01699 //  static
01700   virtual string getName()
01701     {
01702       return("OneVersusAllSinkSource");
01703     }
01704 
01705   virtual void run(const MyNodeIdList &nodesToAnnotate);
01706 
01707 };
01708 
01709 #ifdef HAVE_LIBGSL
01710 class MyOneVersusAllSinkSourceDirectGainAlgo : public MyOneVersusAllSinkSourceGainAlgo
01711 {
01712 protected:
01713         gsl_matrix *A;
01714         gsl_permutation *P;
01715         gsl_vector *x;
01716         int *signum;
01717         map< MyNodeId, size_t > nodeIndices;
01718 public:
01719   MyOneVersusAllSinkSourceDirectGainAlgo(MyGainGraph &g, MyAnnotations &a, MyGainParams &p, Reporter &r, GeneOntology *go = NULL)
01720       : MyOneVersusAllSinkSourceGainAlgo(g, a, p, r, go), A(), P(), x(), signum(new(int)), nodeIndices()
01721     {}
01722 
01723   virtual ~MyOneVersusAllSinkSourceDirectGainAlgo()
01724     {
01725                         delete(signum);
01726     }
01727 
01730 //  static
01731   virtual string getName()
01732     {
01733       return("OneVersusAllSinkSourceDirect");
01734     }
01735 
01736   virtual void run(const MyNodeIdList &nodesToAnnotate);
01737 };
01738 #endif //HAVE_LIBGSL
01739 
01740 
01741 
01742 
01759 class MyOneVersusNoneSinkSourceGainAlgo : public MyOneVersusAllSinkSourceGainAlgo
01760 {
01761 protected:
01762 
01763   virtual void _applyArtificialSink(const MyNode &node, MyNT &num, MyNT &den)
01764   {
01765           // apply artificial sink to any node initially in hypothetical state
01766           bool isHypothetical(HYPOTHETICAL_STATE == _initialStateTypes[node.getId()]);
01767           num += MyNT(isHypothetical)*_lambda*_NOT_RES;
01768           den += MyNT(isHypothetical)*_lambda;
01769   }
01770 
01771 public:
01777   MyOneVersusNoneSinkSourceGainAlgo(MyGainGraph &g, MyAnnotations &a, MyGainParams &p, Reporter &r, GeneOntology *go = NULL)
01778       : MyOneVersusAllSinkSourceGainAlgo(g, a, p, r, go)
01779     {
01780       _lambda = p.getOneVersusNoneSinkSourceArtificialEdgeWeight();
01781     }
01782 
01789   virtual ~MyOneVersusNoneSinkSourceGainAlgo()
01790     {}
01791 
01792 
01795   virtual string getName()
01796     {
01797       return("OneVersusNoneSinkSource");
01798     }
01799 
01800   virtual void initialiseNodeStates(const BioFunction &function,
01801                                     MyNodeIdList &nodesToAnnotate);
01802 
01803   virtual void setCrossValidationList(
01804     MyNodeIdList &nodesToAnnotate, const MyNodeIdList &cvList);
01805 
01806 };
01807 
01808 
01809 class MyOneVersusNoneFunctionalFlowGainAlgo : public MyOneVersusNoneGainAlgo
01810 {
01811 protected:
01812   // a map from a node to the size of the reservoir at that node. the
01813   // _states variable will take care of the total inflow into the
01814   // node.
01815   map< MyNodeId, MyNT > _currentNodeReservoirs, _previousNodeReservoirs;
01816   // a map from a node to the total flow into the node. this variable
01817   // is just a helper (for debugging, to make sure that _states stores
01818   // the right info).
01819   map< MyNodeId, MyNT > _totalInflow;
01820 
01821 public:
01822   MyOneVersusNoneFunctionalFlowGainAlgo(MyGainGraph &g, MyAnnotations &a, MyGainParams &p, Reporter &r, GeneOntology *go = NULL)
01823       : MyOneVersusNoneGainAlgo(g, a, p, r, go), _currentNodeReservoirs(), _previousNodeReservoirs()
01824     {}
01825 
01826   virtual ~MyOneVersusNoneFunctionalFlowGainAlgo()
01827     {}
01828 
01829   virtual void computePredictions(const BioFunction &function,
01830                                   const MyNodeIdList &nodesToAnnotate,
01831                                   MyNodeIdList &predictedNodes);
01832 
01833   virtual MyGainTriState computeState(MyNode &node);
01834 
01835 
01838 //  static
01839   virtual string getName()
01840     {
01841       return(string("OneVersusAllFunctionalFlow") + "-nr-" + boost::lexical_cast< string >(_params.getNumRounds()));
01842     }
01843 
01844   virtual void run(const MyNodeIdList &nodesToAnnotate);
01845 };
01846 
01847 #if(0)
01848 class MyOneVersusAllHeavisideGainAlgo : public MyOneVersusAllToNoneGainAlgo
01849 {
01850 private:
01851     // this map stores the computed node thresholds. this map also keeps
01852   // track of which nodes i have already processed, so that i can
01853   // avoid processing them again. (a node may have multiple entries in
01854   // the pq, since it can get inserted whenever one of its neighbours
01855   // changes to a 1.)
01856   map< MyNodeId, MyNT > _nodeThresholds;
01857 
01858 public:
01859   MyOneVersusAllHeavisideGainAlgo(MyGainGraph &g, MyAnnotations &a, MyGainParams &p, Reporter &r, GeneOntology *go = NULL)
01860       : MyOneVersusAllToNoneGainAlgo(g, a, p, r, go)
01861     {}
01862 
01863   virtual ~MyOneVersusAllHeavisideGainAlgo()
01864     {}
01865 
01866   virtual void computePredictions(const BioFunction &function,
01867                                   const MyNodeIdList &nodesToAnnotate,
01868                                   MyNodeIdList &predictedNodes);
01869 //   template< typename StateType >
01870 //   MyGainState< StateType > computeState(MyNode &node);
01871   virtual MyGainTriState computeState(MyNode &node);
01872 
01873 //  virtual bool hasConverged(const MyNodeIdList &nodesToAnnotate);
01874 
01877 //  static
01878   virtual string getName()
01879     {
01880       return("OneVersusAllHeaviside");
01881     }
01882 
01883   virtual void run(const MyNodeIdList &nodesToAnnotate);
01884 };
01885 #endif
01886 
01900 class MyOneVersusNoneLocalGainAlgo : public MyOneVersusNoneGainAlgo
01901 {
01902 protected:
01903 
01904 public:
01905   MyOneVersusNoneLocalGainAlgo(MyGainGraph &g, MyAnnotations &a, MyGainParams &p, Reporter &r, GeneOntology *go = NULL)
01906       : MyOneVersusNoneGainAlgo(g, a, p, r, go)
01907     {}
01908 
01909   virtual ~MyOneVersusNoneLocalGainAlgo()
01910     {}
01911 
01912   virtual MyGainTriState computeState(MyNode &node);
01913 
01914 
01917 //  static
01918   virtual string getName()
01919     {
01920       return("OneVersusNoneLocal");
01921     }
01924   virtual void run(const MyNodeIdList &nodesToAnnotate);
01925 };
01926 
01927 
01945 class MyOneVersusAllGeneManiaGainAlgo : public MyOneVersusAllGainAlgo
01946 {
01947  protected:
01948   const MyNT _ANN_RES, _NOT_RES;
01949   //when 1, geneMania algorithm will run exactly like SinkSource
01950   //to ensure the same output is obtained
01951   //when 0, geneMania will run as outlined in the paper
01952   const unsigned int sinkSourceTest;
01953  public:
01954   MyOneVersusAllGeneManiaGainAlgo(MyGainGraph &g, MyAnnotations &a, MyGainParams &p, Reporter &r, GeneOntology *go = NULL)
01955       : MyOneVersusAllGainAlgo(g, a, p, r, go), _ANN_RES(1), _NOT_RES(-1), sinkSourceTest(0)
01956     {}
01957 
01958   //bias for unknown nodes, (pos - neg)/total, called k in (Mostafavi, et al, 2008)
01959   MyNT hypotheticalNodeBiasValue;
01960 
01961   virtual ~MyOneVersusAllGeneManiaGainAlgo()
01962     {}
01963 
01964   virtual void maskNodeStates(const MyNodeIdList &nodes);
01965 
01966   virtual void initialiseNodeStates(const BioFunction &function,
01967                                     MyNodeIdList &nodesToAnnotate);
01968 
01969 
01970   virtual void computePredictions(const BioFunction &function,
01971                                   const MyNodeIdList &nodesToAnnotate,
01972                                   MyNodeIdList &predictedNodes);
01973 
01974   virtual void computeCrossValidationResults(const BioFunction &function,
01975                                              const MyNodeIdList &cvNodes);
01976 
01977   virtual MyGainTriState computeState(MyNode &node);
01978 
01979   virtual MyGainTriState computeStateImplicit(MyNode &node);
01980 
01981   virtual bool hasConverged(const MyNodeIdList& nodes);
01982 
01983 
01984   virtual void setCrossValidationList(MyNodeIdList &nodesToAnnotate, const MyNodeIdList &cvList);
01985 
01986   virtual void redoBias();
01987 
01988 
01991 //  static
01992   virtual string getName()
01993     {
01994       return("OneVersusAllGeneMania");
01995     }
01996 
01997   virtual void run(const MyNodeIdList &nodesToAnnotate);
01998 
01999 };
02000 
02020 #if(0)
02021 class MyOneVersusNoneGeneManiaGainAlgo : public MyOneVersusAllGeneManiaGainAlgo
02022 {
02023 protected:
02024   const MyNT _ANN_RES, _NOT_RES;
02025   //when 1, geneMania algorithm will run exactly like SinkSource
02026   //to ensure the same output is obtained
02027   //when 0, geneMania will run as outlined in the paper
02028   const unsigned int sinkSourceTest;
02029   MyNT artificialEdgeWeight;
02030 
02031 
02032 public:
02038   MyOneVersusNoneGeneManiaGainAlgo(MyGainGraph &g, MyAnnotations &a, MyGainParams &p, Reporter &r, GeneOntology *go = NULL)
02039       : MyOneVersusAllGeneManiaGainAlgo(g, a, p, r, go), _ANN_RES(1), _NOT_RES(0), sinkSourceTest(0)
02040     {
02041         artificialEdgeWeight = p.getOneVersusNoneSinkSourceArtificialEdgeWeight();
02042     }
02043 
02044 
02047   virtual string getName()
02048     {
02049       return("OneVersusNoneGeneMania");
02050     }
02051 
02052   virtual void initialiseNodeStates(const BioFunction &function,
02053                                     MyNodeIdList &nodesToAnnotate);
02054 
02055   virtual void maskNodeStates(const MyNodeIdList &nodes);
02056 
02057   virtual MyGainTriState computeState(MyNode &node);
02058 
02059   virtual void computeCrossValidationResults(const BioFunction &function,
02060                                                             const MyNodeIdList &cvNodes);
02061 
02062 };
02063 #endif
02064 
02065 #if 0
02066 class MyOneVersusNoneAnnotationCountGainAlgo : public MyOneVersusNoneGainAlgo
02067 {
02068 protected:
02069 
02070 public:
02071         MyOneVersusNoneAnnotationCountGainAlgo(MyGainGraph &g, MyAnnotations &a, MyGainParams &p, Reporter &r, GeneOntology *go = NULL)
02072       : MyOneVersusNoneGainAlgo(g, a, p, r, go)
02073     {}
02074 
02075   virtual ~MyOneVersusNoneAnnotationCountGainAlgo()
02076     {}
02077 
02078   virtual MyGainTriState computeState(MyNode &node)
02079   {
02080           MyNT score(0);
02081           int count(0);
02082           set< string > functions;
02083           _annotations.getAnnotationsForGene("", node.getId(), functions);
02084           for (set< string >::iterator fitr = functions.begin(); fitr != functions.end(); fitr++)
02085           {
02086                   if (_go->getFunctionById(*fitr)->getMinimumDepth() < 1)
02087                           continue;
02088                   unsigned int in(_annotations.numAnnotatedGenes(_annotations.getFunctionType(*fitr), *fitr));
02089                   unsigned int total(_graph.numNodes());
02090                   score += 1.0/(in);//*(total - in));
02091                   count++;
02092           }
02093           return(MyGainTriState(ANNOTATED_STATE, count));
02094   }
02095 
02096   virtual bool hasConverged(const MyNodeIdList &nodesToAnnotate)
02097   {
02098           return(true);
02099   }
02100 
02101   virtual string getName()
02102     {
02103       return("MyOneVersusNoneAnnotationCountGainAlgo");
02104     }
02105 };
02106 
02107 
02108 class MyOneVersusNoneNodeDegreeGainAlgo : public MyOneVersusNoneGainAlgo
02109 {
02110 protected:
02111 
02112 public:
02113         MyOneVersusNoneNodeDegreeGainAlgo(MyGainGraph &g, MyAnnotations &a, MyGainParams &p, Reporter &r, GeneOntology *go = NULL)
02114       : MyOneVersusNoneGainAlgo(g, a, p, r, go)
02115     {}
02116 
02117   virtual ~MyOneVersusNoneNodeDegreeGainAlgo()
02118     {}
02119 
02120   virtual MyGainTriState computeState(MyNode &node)
02121   {
02122           return(MyGainTriState(ANNOTATED_STATE, node.getWeightedDegree()));
02123   }
02124 
02125   virtual bool hasConverged(const MyNodeIdList &nodesToAnnotate)
02126   {
02127           return(true);
02128   }
02129 
02130   virtual string getName()
02131     {
02132       return("MyOneVersusNoneNodeDegreeGainAlgo");
02133     }
02134 };
02135 
02136 class MyOneVersusNoneRandomScoreGainAlgo : public MyOneVersusNoneGainAlgo
02137 {
02138 protected:
02139 
02140 public:
02141         MyOneVersusNoneRandomScoreGainAlgo(MyGainGraph &g, MyAnnotations &a, MyGainParams &p, Reporter &r, GeneOntology *go = NULL)
02142       : MyOneVersusNoneGainAlgo(g, a, p, r, go)
02143     {}
02144 
02145   virtual ~MyOneVersusNoneRandomScoreGainAlgo()
02146     {}
02147 
02148   virtual MyGainTriState computeState(MyNode &node)
02149   {
02150           return(MyGainTriState(ANNOTATED_STATE, 1.0*rand()/RAND_MAX));
02151   }
02152 
02153   virtual bool hasConverged(const MyNodeIdList &nodesToAnnotate)
02154   {
02155           return(true);
02156   }
02157 
02158   virtual string getName()
02159     {
02160       return("MyOneVersusNoneRandomScoreGainAlgo");
02161     }
02162 };
02163 #endif
02164 
02165 #endif //_GAIN_ALGORITHM_H
 All Classes Functions Variables Typedefs Friends