documents/yat-0.12.x/ROC_8h_source.html

 #ifndef _theplu_yat_statistics_roc_

 #define _theplu_yat_statistics_roc_


 // $Id: ROC.h 3023 2013-04-06 02:35:36Z peter $


 /*

   Copyright (C) 2004 Peter Johansson

   Copyright (C) 2005, 2006, 2007, 2008 Jari Häkkinen, Peter Johansson

   Copyright (C) 2011, 2012, 2013 Peter Johansson


   This file is part of the yat library, http://dev.thep.lu.se/yat


   The yat library is free software; you can redistribute it and/or

   modify it under the terms of the GNU General Public License as

   published by the Free Software Foundation; either version 3 of the

   License, or (at your option) any later version.


   The yat library is distributed in the hope that it will be useful,

   but WITHOUT ANY WARRANTY; without even the implied warranty of

   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

   General Public License for more details.


   You should have received a copy of the GNU General Public License

   along with yat. If not, see <http://www.gnu.org/licenses/>.

 */


 #include "Averager.h"

 #include "yat/utility/deprecate.h"

 #include "yat/utility/stl_utility.h"

 #include "yat/utility/yat_assert.h"


 #include <gsl/gsl_randist.h>


 #include <algorithm>

 #include <map>

 #include <utility>

 #include <vector>


 namespace theplu {

 namespace yat {

 namespace statistics {


   class ROC

   {


   public:

     ROC(void);


     void add(double value, bool target, double weight=1.0);


     double area(void) const;


     unsigned int& minimum_size(void);


     const unsigned int& minimum_size(void) const;


     double n(void) const;


     double n_neg(void) const;


     double n_pos(void) const;


     double p_left(void) const;


     double p_right(void) const;


     double p_value_one_sided(void) const YAT_DEPRECATE;


     double p_value(void) const;


     void remove(double value, bool target, double weight=1.0);


     void reset(void);


   private:

     typedef std::multimap<double, std::pair<bool, double> > Map;

     typedef std::vector<std::pair<bool, Map::mapped_type> > Vector;


     // struct used in count functions

     struct Weights

     {

       Weights(void);

       double small_pos;

       double small_neg;

       double tied_pos;

       double tied_neg;

     };


     double get_p_approx(double) const;


     bool is_weighted(void) const;


     size_t nof_points(const Averager& a) const;


     /*

       Calculate probability to get an area equal (smaller) than \a

       area given the distribution of weights and ties in multimap_

      */

     double p_left_weighted(double area) const;


     /*

       Calculate probability to get an area equal (greater) than \a

       area given the distribution of weights and ties in multimap_

      */

     double p_right_weighted(double area) const;


     /*

       Count number of combinations (of N!) that gives weight sum equal

       or larger than \a threshold.


       Range [first, last) is used to check for ties. If, e.g., *first

       and *(first+1) are equal implies that the two largest values are

       equal.

      */

     template <typename Iterator>

     double count(Iterator first, Iterator last, double threshold) const;


     /*

       Loop over all elements in \a weights and call count(7)

      */

     template <typename Iterator>

     double count(Vector& weights, Iterator iter, Iterator last,

                  double threshold, double sum, const Weights& weight) const;


     /*

       Count number of combinations in which sum>=threshold given

       classes and weights in \a weight. Range [iter, last) is used to

       handle ties.

      */

     template <typename Iterator>

     double count(Vector& weights, Iterator iter, Iterator last,

                  double threshold, double sum, Weights weight,

                  const std::pair<bool, double>& entry) const;


     /*

       Calculates probability to get \a block number of pairs correctly

       sorted when having \a pos positive samples and \a neg negative

       samples given the distribution of ties as in [first, last).

      */

     template<typename ForwardIterator>

     double p_exact_with_ties(ForwardIterator first, ForwardIterator last,

                              double block, unsigned int pos,

                              unsigned int neg) const;


     double p_exact_right(double area) const;


     double p_exact_left(double area) const;


     bool use_exact_method(void) const;


     mutable double area_;

     bool has_ties_;

     unsigned int minimum_size_;

     Averager neg_weights_;

     Averager pos_weights_;

     Map multimap_;

   };


   template<typename ForwardIterator>

   double

   ROC::p_exact_with_ties(ForwardIterator begin, ForwardIterator end,

                          double block, unsigned int pos,unsigned int neg) const

   {

     if (block <= 0)

       return 1.0;

     if (block > pos*neg)

       return 0.0;


     ForwardIterator iter(begin);

     unsigned int n=0;

     while (iter!=end && iter->first == begin->first) {

       ++iter;

       ++n;

     }

     double result = 0;

     /*

       pos1  neg1  |  n

       pos2  neg2  |

       ----  ----   ----

       pos   neg

      */


     // ensure pos1 and neg2 are non-negative

     unsigned int pos1 = n - std::min(n, neg);

     // ensure pos2 and neg1 are non-negative

     unsigned int max = std::min(n, pos);

     YAT_ASSERT(pos1<=max);

     for ( ; pos1<=max; ++pos1) {

       unsigned int neg1 = n-pos1;

       YAT_ASSERT(neg1<=n);

       unsigned int pos2 = pos-pos1;

       YAT_ASSERT(pos2<=pos);

       unsigned int neg2 = neg-neg1;

       YAT_ASSERT(neg2<=neg);

       result += gsl_ran_hypergeometric_pdf(pos1, static_cast<unsigned int>(pos),

                                            static_cast<unsigned int>(neg), n)

         * p_exact_with_ties(iter, end,

                             block - pos2*neg1 - 0.5*pos1*neg1,

                             pos2, neg2);

     }

     return result;

   }


   template <typename Iterator>

   double ROC::count(Iterator first, Iterator last, double threshold) const

   {

     Vector vec;

     vec.reserve(multimap_.size());

     // copy values from multimap_ to v

     for (Map::const_iterator i = multimap_.begin(); i!=multimap_.end(); ++i)

       vec.push_back(std::make_pair(false, i->second));


     ROC::Weights w;

     w.small_pos = pos_weights_.sum_x();

     w.small_neg = neg_weights_.sum_x();

     return count(vec, first, last, threshold*w.small_pos*w.small_neg, 0, w);

   }


   template <typename Iterator>

   double ROC::count(ROC::Vector& v, Iterator iter, Iterator last,

                     double threshold, double sum, const Weights& w) const

   {

     double result = 0.0;

     // loop over all elements

     int nof_elements = 0;

     for (ROC::Vector::iterator i=v.begin(); i!=v.end(); ++i) {

       if (i->first)

         continue;

       i->first = true;

       result += count(v, iter, last, threshold, sum, w, i->second);

       i->first = false;

       ++nof_elements;

     }

     YAT_ASSERT(nof_elements);

     return result/nof_elements;

   }


   template <typename Iterator>

   double ROC::count(Vector& weights, Iterator iter, Iterator last,

                     double threshold, double sum, Weights w,

                     const std::pair<bool, double>& entry) const

   {

     double tiny = 10e-10;


     Iterator next(iter);

     YAT_ASSERT(next!=last);

     ++next;


     // update weights

     if (entry.first) {

       w.tied_pos += entry.second;

       w.small_pos -= entry.second;

     }

     else {

       w.tied_neg += entry.second;

       w.small_neg -= entry.second;

     }


     // last entry in equal range

     if (next==last || *next!=*iter) {

       sum += 0.5*w.tied_pos*w.tied_neg + w.tied_pos * w.small_neg;

       w.tied_pos=0;

       w.tied_neg=0;

     }


     // max sum happens if all pos values belong to current equal range

     // and none of the remaining neg values

     double max_sum = sum + 0.5*(w.tied_pos+w.small_pos)*w.tied_neg +

       (w.tied_pos+w.small_pos)*w.small_neg;


     if (max_sum<threshold-tiny)

       return 0.0;

     if (sum + 0.5*w.tied_pos*(w.tied_neg+w.small_neg) >= threshold-tiny)

       return 1.0;


     if (next!=last)

       return count(weights, next, last, threshold, sum, w);

     return 0.0;

   }


 }}} // of namespace statistics, yat, and theplu

 #endif

theplu::yat::statistics::ROC::area
double area(void) const
Area Under Curve, AUC.

theplu::yat::statistics::ROC::ROC
ROC(void)
Default constructor.

theplu::yat::statistics::ROC::minimum_size
unsigned int & minimum_size(void)
threshold for p_value calculation

theplu::yat::statistics::Averager
Class to calculate simple (first and second moments) averages.
Definition: Averager.h:45

theplu::yat::statistics::ROC::n_neg
double n_neg(void) const
number of negative samples

theplu::yat::statistics::ROC::p_value
double p_value(void) const
Two-sided p-value.

theplu::yat::statistics::ROC::add
void add(double value, bool target, double weight=1.0)
Add a data value.

theplu::yat::statistics::ROC::p_right
double p_right(void) const
One-sided P-value.

stl_utility.h

theplu::yat::utility::max
T max(const T &a, const T &b, const T &c)
Definition: stl_utility.h:638

theplu::yat::statistics::ROC::p_value_one_sided
double p_value_one_sided(void) const

theplu::yat::statistics::ROC::n_pos
double n_pos(void) const
number of positive samples

theplu::yat::statistics::ROC::p_left
double p_left(void) const

deprecate.h

theplu::yat::statistics::ROC::n
double n(void) const
number of samples

theplu::yat::statistics::ROC
Reciever Operating Characteristic.
Definition: ROC.h:52

theplu::yat::statistics::averager_base::sum_x
double sum_x(void) const
Definition: averager_base.h:128

theplu::yat::statistics::ROC::reset
void reset(void)
Set everything to zero.