Biorithm  1.1
 All Classes Functions Variables Typedefs Friends
histogram.h
00001 /**************************************************************************
00002  * Copyright (c) 2004-2011 T. M. Murali                                   *
00003  *                                                                        *
00004  * This file is part of Biorithm.                                         *
00005  *                                                                        *
00006  * Biorithm is free software: you can redistribute it and/or modify       *
00007  * it under the terms of the GNU General Public License as published by   *
00008  * the Free Software Foundation, either version 3 of the License, or      *
00009  * (at your option) any later version.                                    *
00010  *                                                                        *
00011  * Biorithm is distributed in the hope that it will be useful,            *
00012  * but WITHOUT ANY WARRANTY; without even the implied warranty of         *
00013  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the          *
00014  * GNU General Public License for more details.                           *
00015  *                                                                        *
00016  * You should have received a copy of the GNU General Public License      *
00017  * along with Biorithm.  If not, see <http://www.gnu.org/licenses/>.      *
00018  *                                                                        *
00019  **************************************************************************/
00020 
00029 #ifndef _HISTOGRAM_H
00030 #define _HISTOGRAM_H
00031 
00032 #include <vector>
00033 
00034 //#include "global.h"
00035 #include "enrichment.h"
00036 
00037 using namespace std;
00038 
00046 inline MyNT computeMean(const vector< MyNT > &coords, set< unsigned int > *missingIndices = NULL)
00047 {
00048   MyNT numCoords = coords.size(), sum = 0;
00049   for (unsigned int i = 0; i < numCoords; i++)
00050     if (!missingIndices || (missingIndices->end() == missingIndices->find(i)))
00051       sum += coords[i];
00052   if (missingIndices)
00053     numCoords -= missingIndices->size();
00054   return(sum/numCoords);
00055 }
00056 
00068 inline MyNT computeVariance(const vector< MyNT > &coords, MyNT mean, set< unsigned int > *missingIndices = NULL)
00069 {
00070   MyNT numCoords = coords.size(), sum = 0;
00071   for (unsigned int i = 0; i < numCoords; i++)
00072     if (!missingIndices || (missingIndices->end() == missingIndices->find(i)))
00073       sum += (coords[i] - mean)*(coords[i] - mean);
00074   if (missingIndices)
00075     numCoords -= missingIndices->size();
00076   return(sum/numCoords);
00077 }
00078 
00082 MyNT studentsTDistributionFunction(MyNT t, MyNT nu);
00083 
00089 MyNT studentsTDistributionFunctionTail(MyNT t, MyNT nu);
00090 
00098 MyNT studentsTDistributionFunctionTwoSidedTail(MyNT t, MyNT nu);
00099 
00100 
00101 
00102 
00114 class MyHistogram
00115 {
00116 private:
00117   unsigned int _numBins;
00118   // each bin can store a fraction, so the type is MyNT
00119   vector< MyNT > _bins;
00120   MyNT _low;
00121   MyNT _high;
00122   // type is MyNT for the same reason as _bins.
00123   MyNT _numTotalValues;
00124 //  unsigned int _numTotalValues;
00125 public:
00126 
00142   MyHistogram(unsigned int numBins = 10, MyNT low = 0, MyNT high = 1)
00143       : _numBins(numBins), _bins(numBins, 0), _low(low), _high(high),
00144         _numTotalValues(0)
00145     {}
00146 
00149   virtual ~MyHistogram()
00150     {}
00151 
00156   virtual MyNT computeMean();
00157   
00165   virtual MyNT computeStandardDeviation(MyNT mean);
00166 
00174   virtual MyNT computeVariance(MyNT mean);
00175 
00176   
00179   virtual unsigned int getNumBins() const
00180     {
00181       return(_numBins);
00182     }
00183   
00185   virtual MyNT getBin(MyNT coord) const
00186     {
00187       // first find the index of the bin. then that bin's lower value.
00188       return(_binToValue(_valueToBin(coord)));
00189     }
00190 
00192   virtual void setBin(MyNT coord, MyNT value)
00193     {
00194 //       cout << "coord is " << coord << ", value is " << value
00195 //            << ", bin is " << _valueToBin(coord) << endl;
00196       _numTotalValues -= _bins[_valueToBin(coord)];
00197       _numTotalValues += value;
00198       _bins[_valueToBin(coord)] = value;
00199     }
00200   
00201   
00203   virtual MyNT getValue(MyNT coord) const
00204     {
00205       return(_bins[_valueToBin(coord)]);
00206     }
00207   
00208 
00211   MyNT getMin() const
00212     {
00213       return(_low);
00214     }
00215   
00218   MyNT getMax() const
00219     {
00220       return(_high);
00221     }
00222   
00225   MyNT getRange() const
00226     {
00227       return(getMax() - getMin());
00228     }
00229   
00232   virtual void insert(MyNT value);
00233 
00242   virtual void insert(const vector< MyNT > &values);
00243   
00244   // insert a vector of unsigned ints into the histogram
00245   // TODO eventually template MyHistogram so that this isn't necessary
00246   virtual void insert(const vector< unsigned int > &values);
00247 
00248   virtual void abs();
00249   virtual void accumulate(bool reverse = false);
00250 
00258   void constructTDistributionHistogram(unsigned int dof);
00259 
00261   virtual MyNT convertToPvalues(MyNT pvalueThreshold = 0.01, LIBENRICHMENT_TEST_TYPE testType = LIBENRICHMENT_NONE,
00262                                 MyHistogram *reference = NULL);
00263   
00265   virtual void convertToPvalues(MyNT pvalueThreshold, const set< LIBENRICHMENT_TEST_TYPE > &testTypes,
00266                                 map< LIBENRICHMENT_TEST_TYPE, MyNT > &thresholds,
00267                                 map< LIBENRICHMENT_TEST_TYPE, MyHistogram > &histograms,
00268                                 MyHistogram *reference = NULL);
00269   
00272   virtual const MyHistogram& operator=(const MyHistogram &other);
00273 
00276   virtual MyHistogram operator+(const MyHistogram &other);
00277 
00280   virtual MyHistogram operator-(MyNT weight);
00281 
00284   virtual MyHistogram operator-();
00285 
00288   virtual const MyHistogram &operator+=(const MyHistogram &other);
00289 
00291   virtual MyHistogram operator*(MyNT weight);
00292 
00296   virtual void operator*=(MyNT weight);
00297 
00304   virtual MyHistogram operator/(MyHistogram &other);
00305   
00312   virtual const MyHistogram &operator/=(MyHistogram &other);
00313 
00320   virtual void normalise();
00321   
00324   virtual MyNT getNumTotalValues() const
00325     {
00326       return(_numTotalValues);
00327     }
00328 
00346   virtual void print(ostream &ostr, string extra = "", vector< MyHistogram > *others = NULL);
00347 
00366   virtual void print(string file, string extra = "", vector< MyHistogram > *others = NULL);
00367 
00375   virtual void read(string infile);
00376   
00377   // compute the trapezoidal area under the curve generated by this histogram and other
00378   // this histogram must be monotonically increasing and other must have corresponding bins
00379   virtual double computeTrapezoidalAUC(const MyHistogram &other) const
00380   {
00381                 double area = 0;
00382           for (unsigned int i = 1; i < getNumBins(); i++)
00383                         area += 0.5*(_bins[i] - _bins[i - 1])*(other._bins[i] + other._bins[i - 1]);
00384                 return (area);
00385         }
00386   
00387   
00388 private:
00390   MyNT _binToValue(unsigned int i) const
00391     {
00392       return((i*1.0/getNumBins())*getRange() + getMin());
00393     }
00395   unsigned int _valueToBin(MyNT value) const
00396     {
00397       if (value < getMin())
00398         {
00399 //          cerr << "\tInserting value " << value << " into histogram whose minimum value is " << getMin() << ". Inserting " << getMin() << "." << endl;
00400           value = getMin();
00401         }
00402       if (value > getMax())
00403         {
00404 //          cerr << "\tInserting value " << value << " into histogram whose maximum value is " << getMax() << ". Inserting " << getMax() << "." << endl;
00405           value = getMax();
00406         }
00407 //       cout << "\tvalue is " << value
00408 //            << ", bin is " << getNumBins()*(value - getMin())/getRange()
00409 //            << ", int(bin) is " << int(getNumBins()*(value - getMin())/getRange())
00410 //            << ", static cast to unsigned int is "
00411 //            << static_cast<unsigned int>(getNumBins()*(value - getMin())/getRange())
00412 //            << endl;
00413       return(static_cast<unsigned int>(
00414                // adding 0.5 to perform rounding. in some cases, i
00415                // compute the previous bin, otherwise.
00416                0.5
00417                + getNumBins()*(value - getMin())/getRange()));
00418     }
00419 };
00420 
00421 
00422 
00423 #endif // _HISTOGRAM_H 
 All Classes Functions Variables Typedefs Friends