00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00032 #ifndef _ANNOTATIONS_H
00033 #define _ANNOTATIONS_H
00034
00035 #include <iostream>
00036 #include <fstream>
00037 #include <iterator>
00038 #include <map>
00039 #include <set>
00040 #include <string>
00041
00042 using namespace std;
00043
00044 #include "boost/multi_index_container.hpp"
00045 #include "boost/multi_index/member.hpp"
00046 #include "boost/multi_index/mem_fun.hpp"
00047 #include "boost/multi_index/ordered_index.hpp"
00048 #include <boost/multi_index/composite_key.hpp>
00049
00050 using boost::multi_index_container;
00051 using namespace boost::multi_index;
00052
00053
00054 #include "boost/algorithm/string/classification.hpp"
00055
00056 #include "boost/algorithm/string.hpp"
00057
00058 #include "enrichment.h"
00059 #include "myiterator.h"
00060
00061 #include "biofunction.h"
00062 #include "GO.h"
00063
00064
00065 typedef double MyNT;
00066
00068
00069
00070
00071
00072
00073
00079 enum GenGOParametersOptimisationType { GENGO_PARAMETER_OPTIMISATION_NONE = 0,
00080 GENGO_PARAMETER_OPTIMISATION_LOOP,
00081 GENGO_PARAMETER_OPTIMISATION_LEARN,
00082
00083 GENGO_PARAMETER_OPTIMISATION_LOOP_AND_LEARN,
00084 };
00085
00086
00087
00088
00089 enum MyGainAnnotationType { NOT_ANNOTATED_STATE = -1, HYPOTHETICAL_STATE,
00090 ANNOTATED_STATE};
00091
00093 inline MyGainAnnotationType makeAnnotationType(string c)
00094 {
00095 if ("-1" == c)
00096 return(NOT_ANNOTATED_STATE);
00097 if ("0" == c)
00098 return(HYPOTHETICAL_STATE);
00099 if ("1" == c)
00100 return(ANNOTATED_STATE);
00101 cerr << "Unknown annotation state" << c << endl;
00102 exit(-1);
00103 }
00104
00105 extern map< string, string > GO_UNKNOWN_FUNCTIONS;
00106
00107
00108
00109
00110
00111
00112
00113
00114
00118 struct GOEvidenceCodes
00119 {
00120 private:
00121 map< string, MyNT >_weights;
00122 public:
00123
00125 GOEvidenceCodes()
00126 {
00127
00128
00129
00130
00131
00132
00133
00134
00135
00136
00137
00138
00139
00140
00141
00142
00143
00144
00145
00146
00147 _weights["IDA"] = 0.9;
00148 _weights["IMP"] = 0.9;
00149 _weights["IGI"] = 0.9;
00150 _weights["IC"] = 0.8;
00151 _weights["IPI"] = 0.6;
00152 _weights["IEP"] = 0.6;
00153 _weights["ISS"] = 0.6;
00154 _weights["RCA"] = 0.6;
00155 _weights["TAS"] = 0.6;
00156 _weights["NAS"] = 0.3;
00157 _weights["IEA"] = 0.2;
00158 }
00159
00165 MyNT getWeight(string code) const
00166 {
00167 static set< string > _unknownCodes;
00168 map< string, MyNT >::const_iterator witr = _weights.find(code);
00169 if (_weights.end() == witr)
00170 {
00171 if (_unknownCodes.end() == _unknownCodes.find(code))
00172 {
00173 cerr << "GOEvidenceCodes::getWeight(): unknown evidence code "
00174 << code << ", returning weight of 0" << endl;
00175 _unknownCodes.insert(code);
00176 }
00177 return(0);
00178 }
00179
00180 return(witr->second);
00181 }
00182
00186 bool hasWeight(string code) const
00187 {
00188 map< string, MyNT >::const_iterator witr = _weights.find(code);
00189 return(_weights.end() != witr);
00190 }
00191
00192
00198 bool isExperimental(string code) const
00199 {
00200 if ("TAS" == code)
00201 return(true);
00202 if ("IDA" == code)
00203 return(true);
00204 if ("IMP" == code)
00205 return(true);
00206 if ("IGI" == code)
00207 return(true);
00208 if ("IPI" == code)
00209 return(true);
00210 if ("IEP" == code)
00211 return(true);
00212 if ("ISS" == code)
00213 return(false);
00214 if ("IC" == code)
00215 return(true);
00216 if ("NAS" == code)
00217 return(true);
00218 if ("RCA" == code)
00219 return(false);
00220 if ("IEA" == code)
00221 return(false);
00222 }
00223
00229 void checkEvidenceCodeWeights(const map< string, unsigned int > &ecUsageCounts, ostream &logStream);
00230
00231
00242 void readEvidenceCodeWeights(string ecwFile);
00243 };
00244
00245
00246 template < class container >
00247 void readFunctions(string file, container &functions)
00248 {
00249 cout << "Reading items from " << file << endl;
00250 ifstream ifstr(file.c_str());
00251 if (!ifstr)
00252 {
00253 cerr << "Cannot open file " << file << endl;
00254 exit(-1);
00255 }
00256 string func, line;
00257 vector< string > items;
00258 while (!ifstr.eof())
00259 {
00260 getline(ifstr, line);
00261
00262 if ('#' == line[0])
00263 continue;
00264 items.clear();
00265 boost::split(items, line, boost::is_any_of("\t"));
00266 if (items.size() == 0)
00267 continue;
00268 func = items[0];
00269 if (func=="")
00270 continue;
00271 functions.insert(func);
00272 }
00273 ifstr.close();
00274 cout << "Read " << functions.size() << " items." << endl;
00275 }
00276
00277
00281 struct annotation_gene_function_evidencecode_index {};
00282 struct annotation_gene_function_index {};
00283 struct annotation_gene_functiontype_index {};
00284 struct annotation_gene_index {};
00285 struct annotation_function_index {};
00286
00287
00288 struct annotation_function_gene_index {};
00293
00294
00295
00296
00297
00298
00299 struct FunctionalAnnotation
00300 {
00301 public:
00302
00303 string _gene;
00304 BioFunction _function;
00305 string _evidenceCode;
00306 MyGainAnnotationType _annotationStatus;
00307
00308 bool _mostSpecificAnnotation;
00309
00310 public:
00311 FunctionalAnnotation(string g, string f, string c, string e,
00312 MyGainAnnotationType status, bool msa = false)
00313 : _gene(g), _function(c, f), _evidenceCode(e),
00314 _annotationStatus(status),_mostSpecificAnnotation(msa)
00315 {}
00316 FunctionalAnnotation(string g, const BioFunction &f, string e,
00317 MyGainAnnotationType status, bool msa = false)
00318 : _gene(g), _function(f), _evidenceCode(e), _annotationStatus(status),
00319 _mostSpecificAnnotation(msa)
00320 {}
00321
00323 string getFunctionType() const
00324 {
00325 return(_function.getCategory());
00326 }
00327
00328 };
00329
00330
00331
00332
00333
00334
00335
00336
00337
00338
00339
00340
00341
00342
00343
00344
00345
00355 typedef multi_index_container<
00356
00357 FunctionalAnnotation,
00358
00359 indexed_by<
00360
00361
00362
00363
00364
00365
00366
00367
00368 ordered_unique<
00369
00370 tag< annotation_gene_function_evidencecode_index >,
00371
00372 composite_key<
00373
00374 FunctionalAnnotation,
00375
00376
00377
00378
00379 member< FunctionalAnnotation, string, &FunctionalAnnotation::_gene>,
00380 member< FunctionalAnnotation, BioFunction, &FunctionalAnnotation::_function >,
00381 member< FunctionalAnnotation, string, &FunctionalAnnotation::_evidenceCode >
00382 >
00383
00384
00385
00386
00387
00388 >,
00389
00390
00391
00392
00393 ordered_non_unique<
00394 tag< annotation_gene_function_index >,
00395 composite_key<
00396 FunctionalAnnotation,
00397 member< FunctionalAnnotation, string, &FunctionalAnnotation::_gene >,
00398 member< FunctionalAnnotation, BioFunction, &FunctionalAnnotation::_function >
00399 >,
00400 composite_key_compare<
00401 std::less<std::string>,
00402 std::less< BioFunction >
00403 >
00404 >,
00405
00406
00407
00408
00409
00410 ordered_non_unique<
00411 tag< annotation_function_gene_index >,
00412 composite_key<
00413 FunctionalAnnotation,
00414 member< FunctionalAnnotation, BioFunction, &FunctionalAnnotation::_function >,
00415 member< FunctionalAnnotation, string, &FunctionalAnnotation::_gene >
00416 >,
00417 composite_key_compare<
00418 std::less< BioFunction >,
00419 std::less<std::string>
00420 >
00421 >,
00422
00423
00424
00425
00426
00427
00428
00429
00430 ordered_non_unique<
00431 tag< annotation_gene_functiontype_index >,
00432 composite_key<
00433 FunctionalAnnotation,
00434 member< FunctionalAnnotation, string, &FunctionalAnnotation::_gene >,
00435
00436 const_mem_fun< FunctionalAnnotation, string, &FunctionalAnnotation::getFunctionType >
00437 >
00438 >,
00439
00440
00441 ordered_non_unique< tag< annotation_gene_index >,
00442 member< FunctionalAnnotation, std::string,
00443 &FunctionalAnnotation::_gene > >,
00444
00445
00446 ordered_non_unique< tag< annotation_function_index >,
00447 member< FunctionalAnnotation, BioFunction,
00448 &FunctionalAnnotation::_function > >
00449
00450 >
00451 > SetOfFunctionalAnnotations;
00452
00500 class MyAnnotations
00501 {
00502
00503 private:
00504
00505 typedef set< string > BioFunctionSet;
00506 typedef map< string, BioFunctionSet > MyGenesToBioFunctionTypes;
00507 typedef map< string, MyGenesToBioFunctionTypes > MyGenesToFunctions;
00508
00509
00510
00511 typedef set< string > MyGeneSet;
00512 typedef map< string, MyGeneSet > BioFunctionToGenes;
00513 typedef map< string, BioFunctionToGenes > BioFunctionTypesToGenes;
00514
00515 typedef BioFunctionToGenes::iterator _BioFunctionsIterator;
00516 typedef BioFunctionToGenes::const_iterator _BioFunctionsConstIterator;
00517 typedef BioFunctionTypesToGenes::iterator _NewBioFunctionsIterator;
00518
00519
00520 typedef SetOfFunctionalAnnotations::index< annotation_function_index >::type FunctionIndex;
00521 typedef SetOfFunctionalAnnotations::index< annotation_function_gene_index >::type FunctionGeneIndex;
00522 typedef SetOfFunctionalAnnotations::index< annotation_gene_index >::type GeneIndex;
00523
00524
00525 SetOfFunctionalAnnotations _allAnnotations;
00526
00527 SetOfFunctionalAnnotations _allNonAnnotations;
00528
00529
00530 GOEvidenceCodes _evidenceCodes;
00531
00532 map< string, unsigned int > _evidenceCodeCountsInAllAnnotations;
00533
00534
00535
00536 map< string, set< string > > functionTypeAnnotatedGenes;
00537
00538
00539
00540 map< string, set< string > > functionTypeNonAnnotatedGenes;
00541
00542
00543 BioFunctionTypesToGenes functionsAnnotatedGenes;
00544
00545
00546 BioFunctionTypesToGenes functionsNonAnnotatedGenes;
00547
00548
00549 MyGenesToFunctions geneAnnotatedFunctions;
00550 MyGenesToFunctions geneNonAnnotatedFunctions;
00551
00552
00553 map< string, unsigned int > numFunctionAnnotatedGenes;
00554
00555
00556 map< string, unsigned int > numFunctionNonAnnotatedGenes;
00557
00558 map< string, MyNT > functionFrequencies;
00559
00560
00561 GeneOntology *goDAG;
00562 bool _truePathRuleAppliedUpward;
00563 bool _truePathRuleAppliedDownward;
00564 bool _truePathRuleAppliedSideways;
00565
00566 map< string, set< string > > _functionsWithOverlappingAnnoations;
00567
00568 bool _overlappingFunctionsFlag;
00569
00570 public:
00571
00572 class NewBioFunctionsIterator;
00573 friend class NewBioFunctionsIterator;
00574
00581 class BioFunctionsIterator: public MyIterator< string >
00582 {
00583 private:
00584 _BioFunctionsIterator start;
00585 _BioFunctionsIterator end;
00586 _BioFunctionsIterator current;
00587
00588 public:
00589 BioFunctionsIterator(const _BioFunctionsIterator _start,
00590 const _BioFunctionsIterator _end)
00591 : start(_start), end(_end), current(_start)
00592 {}
00593
00594 virtual bool hasNext()
00595 {
00596 return(current != end);
00597 }
00598
00599 string &next()
00600 {
00601
00602
00603
00604 string &_next(const_cast< string & >((*current).first));
00605 current++;
00606 return(_next);
00607 }
00608 };
00609
00616 class BioFunctionsConstIterator: public MyIterator< string >
00617 {
00618 private:
00619 _BioFunctionsConstIterator start;
00620 _BioFunctionsConstIterator end;
00621 _BioFunctionsConstIterator current;
00622
00623 public:
00624 BioFunctionsConstIterator(_BioFunctionsConstIterator _start,
00625 _BioFunctionsConstIterator _end)
00626 : start(_start), end(_end), current(_start)
00627 {}
00628
00629 virtual bool hasNext()
00630 {
00631 return(current != end);
00632 }
00633
00634 string &next()
00635 {
00636
00637
00638
00639 string &_next(const_cast< string & >((*current).first));
00640
00641 current++;
00642 return(_next);
00643 }
00644 };
00645
00654 class BioFunctionsIteratorMultiIndex: public MyIterator< BioFunction >
00655 {
00656 private:
00657
00658 const SetOfFunctionalAnnotations & _allAnnotations;
00659
00660 const FunctionIndex &_functionIndex;
00661 FunctionIndex::iterator _startItr;
00662 FunctionIndex::iterator _endItr;
00663
00664 FunctionIndex::iterator _currentItr;
00665
00666 BioFunction _currentFunction;
00667
00668
00669 public:
00671 BioFunctionsIteratorMultiIndex(const SetOfFunctionalAnnotations &_anns)
00672 : _allAnnotations(_anns), _functionIndex(_allAnnotations.get< annotation_function_index >()),
00673 _startItr(_allAnnotations.get< annotation_function_index >().begin()),
00674 _endItr(_allAnnotations.get< annotation_function_index >().end()), _currentItr(_startItr),
00675 _currentFunction()
00676 {
00677
00678 if (_startItr != _endItr)
00679 _currentFunction = _currentItr->_function;
00680 }
00682 virtual bool hasNext()
00683 {
00684 return(_currentItr != _endItr);
00685 }
00687 BioFunction &next()
00688 {
00689 _currentFunction = _currentItr->_function;
00690 _moveCurrentItr();
00691 return(_currentFunction);
00692 }
00693 private:
00694
00695 void _moveCurrentItr()
00696 {
00697 _currentItr = _functionIndex.upper_bound(_currentFunction);
00698 }
00699
00700 };
00701
00702
00703 public:
00704 MyAnnotations()
00705 : functionTypeAnnotatedGenes(), functionTypeNonAnnotatedGenes(),
00706 functionsAnnotatedGenes(), functionsNonAnnotatedGenes(),
00707 geneAnnotatedFunctions(),
00708 geneNonAnnotatedFunctions(),
00709 numFunctionAnnotatedGenes(),
00710 numFunctionNonAnnotatedGenes(),
00711 _evidenceCodes(), _evidenceCodeCountsInAllAnnotations(),
00712 functionFrequencies(),
00713 goDAG(NULL), _truePathRuleAppliedUpward(false), _truePathRuleAppliedDownward(false),
00714 _truePathRuleAppliedSideways(false), _overlappingFunctionsFlag(false)
00715 {
00716
00717 _initialise();
00718 }
00719
00720 virtual ~MyAnnotations()
00721 {}
00722
00723 void setOverlappingFunctionsFlag()
00724 {
00725 _overlappingFunctionsFlag = true;
00726 }
00727
00729 void addAnnotation(string gene, const BioFunction &function,
00730 string evidenceCode,
00731 MyGainAnnotationType status = ANNOTATED_STATE,
00732 bool msa = false);
00733
00735 void addAnnotation(string gene, string function, string funcType,
00736 string evidenceCode,
00737 MyGainAnnotationType status = ANNOTATED_STATE,
00738 bool msa = false);
00739
00740
00741
00748
00786 void applyTruePathRule(GeneOntology &go, bool applyUpward = true, bool applyDownward = true, bool applySideways = true);
00787
00788
00810 bool checkTruePathRule(GeneOntology &go, ostream *ostr = NULL,
00811 bool *annotationsOK = NULL, bool *unknownOK = NULL);
00812
00813
00836 void computeDifference(MyAnnotations &other, MyAnnotations &difference,
00837 bool restrictToCommonGenes = false);
00838
00844 void computeEnrichments(const set< string > &geneSet,
00845 const GeneOntology *go,
00846 vector<EnrichmentRecord< string, string> > &rejalts);
00847
00863 void computeEnrichmentsGenGO(const set< string > &geneSet,
00864 vector<EnrichmentRecord< string, string> > &rejalts,
00865 const GeneOntology *go = NULL,
00866 GenGOParametersOptimisationType optType = GENGO_PARAMETER_OPTIMISATION_NONE
00867 ) const;
00868
00878 void computeEnrichmentsGenGO(const set< string > &geneSet,
00879 string functionType,
00880 vector<EnrichmentRecord< string, string> > &rejalts,
00881 const GeneOntology *go = NULL,
00882 GenGOParametersOptimisationType optType = GENGO_PARAMETER_OPTIMISATION_NONE
00883 ) const;
00884
00888 MyNT computeFunctionFrequency(string funcType, string func);
00889
00890
00891 void computeStatistics();
00892
00893
00894 void computeGeneCounts(map< BioFunction, unsigned int > &geneCounts);
00895
00896 void copyAnnotationsForFunctions(const set< BioFunction > &functions, MyAnnotations &annotations);
00897
00898
00904 virtual MyAnnotations::BioFunctionsIterator functions(string type)
00905 {
00906 MyAnnotations::BioFunctionsIterator itr(functionsAnnotatedGenes[type].begin(),
00907 functionsAnnotatedGenes[type].end());
00908 return(itr);
00909 }
00910
00913 virtual MyAnnotations::BioFunctionsIteratorMultiIndex functionsMultiIndex() const
00914 {
00915
00916 MyAnnotations::BioFunctionsIteratorMultiIndex itr(_allAnnotations);
00917 return(itr);
00918 }
00919
00926 set< string > functionTypes() const
00927 {
00928 set< string > types;
00929 BioFunctionTypesToGenes::const_iterator itr;
00930 for (itr = functionsAnnotatedGenes.begin(); itr != functionsAnnotatedGenes.end(); itr++)
00931 types.insert((*itr).first);
00932 return(types);
00933 }
00934
00949 MyNT getAnnotatedFunctionWeight(string gene, string func, string type);
00950
00962 MyNT getAnnotatedFunctionWeightProbabilisticOr(string gene, string func, string type);
00963
00976 map< string, MyNT > getAnnotatedFunctionWeights(string gene, string func, string type);
00977
00995
00996 map< string, MyNT > getAnnotatedWeights(string gene, string type);
00997
01004 void getAnnotatedGenes(string functype, set< string >& annotatedGenes);
01005
01018 void getAnnotations(string type, map< string, set< string > > &annotations) const;
01019
01033 void getAnnotationsByGene(string type, map< string, set< string > > &annotations) const;
01034
01047 virtual void getAnnotations(string type, string function,
01048 set< string > &annotations) const;
01049
01058 void getAnnotations(const GOFunction * function, set< string > &annotations) const;
01059
01069 void getAnnotations(const set< GOFunction *> &functions,
01070 set< string > &annotations) const;
01071
01089 virtual void getAnnotationsForGene(string type, string gene,
01090 set< string > &annotations) const;
01091
01092
01099 void getEvidenceCodeCounts(map< string, unsigned int > &ecCounts) const
01100 {
01101 ecCounts = _evidenceCodeCountsInAllAnnotations;
01102 }
01103
01117 void getEvidenceCodeCounts(string funcType, string funcId, map< string, unsigned int > &ecCounts) const;
01118
01119
01120
01122 MyNT getFunctionFrequency(string function) const
01123 {
01124 map< string, MyNT >::const_iterator itr;
01125 itr = functionFrequencies.find(function);
01126 if (itr == functionFrequencies.end())
01127 return(0);
01128 return((*itr).second);
01129 }
01130
01140 string getFunctionType(string functionName) const;
01141
01142
01146 bool hasExperimentallyAnnotatedFunction(string gene, string category) const
01147 {
01148
01149 const GeneIndex &gi = _allAnnotations.get< annotation_gene_index >();
01150 GeneIndex::const_iterator start = gi.lower_bound(gene), end = gi.upper_bound(gene);
01151 for (GeneIndex::const_iterator gitr = start; gitr != end; gitr++)
01152 {
01153 if (_evidenceCodes.isExperimental(gitr->_evidenceCode) && gitr->_function.getCategory() == category)
01154 return (true);
01155 }
01156 return (false);
01157 }
01158
01159
01161 bool hasAnnotatedFunction(string gene) const
01162 {
01163 const GeneIndex &gi = _allAnnotations.get< annotation_gene_index >();
01164 return (gi.end() != gi.find(gene));
01165
01166
01167 }
01168
01170 bool hasAnnotatedFunction(string gene, string category) const
01171 {
01172 return(hasAnnotatedFunction(gene) &&
01173
01174 (geneAnnotatedFunctions.find(gene)->second.end() !=
01175 geneAnnotatedFunctions.find(gene)->second.find(category)));
01176 }
01177
01198
01199 bool isNotKnownToBeAnnotatedWithFunction(string gene, string func, string type)
01200 {
01201 if (!_truePathRuleAppliedDownward)
01202
01203 return(isNotKnownToBeAnnotatedWithFunctionSansTruePathRule(gene, func, type, *goDAG));
01204
01205 if(_overlappingFunctionsFlag)
01206 {
01207 if (haveOverlappingFunction(gene, func, type))
01208 return true;
01209 }
01210
01211
01212
01213
01214 MyGenesToFunctions::const_iterator gfItr;
01215 MyGenesToBioFunctionTypes::const_iterator gbftItr;
01216
01217 if (geneAnnotatedFunctions.end() ==
01218 (gfItr = geneAnnotatedFunctions.find(gene)))
01219 return(true);
01220
01221 if (gfItr->second.end() == (gbftItr = gfItr->second.find(type)))
01222 return(true);
01223
01224 if (gbftItr->second.end() !=
01225 gbftItr->second.find(GO_UNKNOWN_FUNCTIONS[type]))
01226 return(true);
01227
01228
01229
01230 if ((geneNonAnnotatedFunctions[gene][type].end() !=
01231 geneNonAnnotatedFunctions[gene][type].find(func))
01232
01233
01234
01235
01236
01237
01238 )
01239 return(true);
01240
01241
01242 if (geneAnnotatedFunctions.end() ==
01243 geneAnnotatedFunctions.find(gene))
01244 return(true);
01245
01246 if (geneAnnotatedFunctions[gene].end() ==
01247 geneAnnotatedFunctions[gene].find(type))
01248 return(true);
01249
01250 if (geneAnnotatedFunctions[gene][type].end() !=
01251 geneAnnotatedFunctions[gene][type].find(GO_UNKNOWN_FUNCTIONS[type]))
01252 return(true);
01253
01254
01255
01256 if ((geneNonAnnotatedFunctions[gene][type].end() !=
01257 geneNonAnnotatedFunctions[gene][type].find(func))
01258
01259
01260
01261
01262
01263
01264 )
01265 return(true);
01266
01267
01268 return(
01269
01270 (geneAnnotatedFunctions.end() ==
01271 geneAnnotatedFunctions.find(gene)) ||
01272
01273 (geneAnnotatedFunctions[gene].end() ==
01274 geneAnnotatedFunctions[gene].find(type)) ||
01275
01276 (geneAnnotatedFunctions[gene][type].end() !=
01277 geneAnnotatedFunctions[gene][type].find(GO_UNKNOWN_FUNCTIONS[type])) ||
01278
01279
01280
01281 ((geneNonAnnotatedFunctions[gene][type].end() !=
01282 geneNonAnnotatedFunctions[gene][type].find(func))
01283
01284
01285
01286
01287
01288
01289 )
01290 );
01291 }
01292
01318 bool isNotKnownToBeAnnotatedWithFunctionSansTruePathRule(string gene, string func, string type,
01319 GeneOntology &goDAG);
01320
01321
01324 bool hasAnnotatedGene(string func, string type) const
01325 {
01326 BioFunctionTypesToGenes::const_iterator itr = functionsAnnotatedGenes.find(type);
01327 if (functionsAnnotatedGenes.end() == itr)
01328
01329 return(false);
01330 BioFunctionToGenes::const_iterator gitr = itr->second.find(func);
01331 if (itr->second.end() == gitr)
01332
01333 return(false);
01334
01335 if (gitr->second.empty())
01336 return(false);
01337 return(true);
01338 }
01339
01341 bool hasAnnotatedGene(const BioFunction &function) const
01342 {
01343 return(hasAnnotatedGene(function.getId(), function.getCategory()));
01344 }
01345
01346
01347
01348 bool haveSameFunction(string gene1, string gene2, const BioFunction &function);
01349
01350
01351 bool haveSameFunction(string gene1, string gene2,
01352 const vector< BioFunction >& functionVector);
01353 template < class InputIterator >
01354 bool haveSameFunction(string gene1, string gene2,
01355 InputIterator first, InputIterator last);
01356
01357 bool haveSameFunction(string gene1, string gene2, string type);
01358
01359 bool haveSameFunction(string gene1, string gene2);
01360
01361
01362 bool haveOverlappingFunction(string gene1, string function, string type);
01363
01364
01365
01381
01382
01383
01384
01385 bool isAnnotatedWithFunction(string gene, string func, string type, bool beforeTruePathRule = false);
01386
01387
01391 bool isAnnotatedWithFunctionExperimentally(string gene, string func, string type);
01392
01399 bool isUnknownFunction(string func, string type) const
01400 {
01401 map< string, string >::const_iterator itr = GO_UNKNOWN_FUNCTIONS.find(type);
01402 return((GO_UNKNOWN_FUNCTIONS.end() != itr)
01403 && (func == itr->second));
01404 }
01405
01410
01411
01415 void keepAnnotationsForGenes(set< string > &geneSet);
01416
01417
01419 unsigned int numAnnotations(string category = "");
01420
01424
01425 unsigned int numUnknownAnnotations(string category = "");
01426
01433 unsigned int numAnnotatedGenes(string funcType, string function)
01434 {
01435 return(functionsAnnotatedGenes[funcType][function].size());
01436
01437 }
01438
01440 unsigned int numAnnotatedGenes(string funcType = "")
01441 {
01442 if ("" != funcType)
01443 return(functionTypeAnnotatedGenes[funcType].size());
01444 set< string > funcTypes = functionTypes();
01445 set< string >::iterator ftItr;
01446 unsigned int sum = 0;
01447 for (ftItr = funcTypes.begin(); ftItr != funcTypes.end(); ftItr++)
01448 sum += functionTypeAnnotatedGenes[*ftItr].size();
01449 return(sum);
01450 }
01451
01453 unsigned int numAnnotatingFunctions(string gene, string funcType)
01454 {
01455 return(geneAnnotatedFunctions[gene][funcType].size());
01456 }
01457
01465 unsigned int numAnnotatingFunctions(string funcType = "")
01466 {
01467 if ("" != funcType)
01468 return(functionsAnnotatedGenes[funcType].size());
01469 set< string > funcTypes = functionTypes();
01470 set< string >::iterator ftItr;
01471 unsigned int sum = 0;
01472 for (ftItr = funcTypes.begin(); ftItr != funcTypes.end(); ftItr++)
01473 sum += numAnnotatingFunctions(*ftItr);
01474 return(sum);
01475 }
01476
01478 unsigned int numNonAnnotatedGenes(string funcType)
01479 {
01480 #ifdef DEBUG
01481 cout << "function type is " << funcType
01482 << ", unknown function is " << GO_UNKNOWN_FUNCTIONS[funcType]
01483 << ", #non-annotated genes is "
01484 << functionsNonAnnotatedGenes[funcType][GO_UNKNOWN_FUNCTIONS[funcType]].size()
01485 << " or " << functionTypeNonAnnotatedGenes[funcType].size()
01486 << endl;
01487
01488 #endif
01489 return(functionTypeNonAnnotatedGenes[funcType].size());
01490 }
01491
01495 unsigned int numTotalNonAnnotatedGenes(string function, string funcType)
01496 {
01497 return(numNonAnnotatedGenes(funcType) +
01498 numNonAnnotatedGenes(funcType, function));
01499 }
01500
01501
01502 unsigned int numNonAnnotatedGenes(string funcType, string function)
01503 {
01504 return(functionsNonAnnotatedGenes[funcType][function].size());
01505 }
01506
01508
01509 unsigned int numUnknownAnnotatingFunctions(string gene, string funcType)
01510 {
01511 return(geneNonAnnotatedFunctions[gene][funcType].size());
01512 }
01513
01514
01515
01516
01517
01518
01519 unsigned int numNegativeGenes(string funcType, string function)
01520 {
01521 return(numAnnotatedGenes(funcType) -
01522 numAnnotatedGenes(funcType, function) -
01523 numNonAnnotatedGenes(funcType, function)
01524 );
01525 }
01526
01528 void print(string fileName);
01529
01534 void printWithName(string fileName);
01535
01538 void printNumAnnotationsPerFunction(ostream &numFunStream);
01539
01561 void read(string fileName, string dataType = "unweighted",
01562 const map< string, set< string > > *nodeAliases = NULL,
01563 bool mostSpecificAnnotations = false,
01564 const set< string > *evidenceCodesToIgnore = NULL);
01565
01566
01581 void read(string fileName, unsigned int keySize);
01582
01583
01602 void readGO(string fileName, unsigned int altSymbolCol=0);
01603
01610 void readEvidenceCodeWeights(string ecwFile)
01611 {
01612 _evidenceCodes.readEvidenceCodeWeights(ecwFile);
01613 _evidenceCodes.checkEvidenceCodeWeights(_evidenceCodeCountsInAllAnnotations, cerr);
01614 }
01615
01630 void subtract(MyAnnotations &other, MyAnnotations &difference);
01631
01632
01635 void storeGODAG(GeneOntology &go)
01636 {
01637 goDAG = &go;
01638 }
01639
01640 void readOverlappingFunctions(string fileName);
01641
01642 private:
01643
01644
01645 void _cleanUp();
01646
01653 unsigned int _copyAnnotatedGenes(const BioFunction &source, const BioFunction &target);
01654
01675 unsigned int _copyAnnotatedGenesToNonAnnotated(const BioFunction &source, const BioFunction &target,
01676 bool force = false,
01677 MyAnnotations *original = NULL);
01678
01679
01680
01687 unsigned int _copyNonAnnotatedGenes(const BioFunction &source, const BioFunction &target);
01688
01689
01690
01691 bool _isAnnotatedWithFunctionMap(string gene, string func, string type);
01692
01693
01694
01695 bool _isAnnotatedWithFunctionMultiIndex(string gene, string func, string type, bool beforeTruePathRule = false);
01696
01697 map< string, vector< string > > _functionsWithOverlappingAnnotations;
01698
01699 private:
01700
01701 static void _initialise();
01702 void _compareFunctionTypes(string funcType1, string funcType2);
01703 bool _check();
01704
01705 };
01706
01707
01708
01709
01710
01711 #endif // _ANNOTATIONS_H