1 #ifndef _theplu_yat_statistics_roc_
2 #define _theplu_yat_statistics_roc_
29 #include "yat/utility/yat_assert.h"
31 #include <gsl/gsl_randist.h>
40 namespace statistics {
73 void add(
double value,
bool target,
double weight=1.0);
113 double n(
void)
const;
120 double n_neg(
void)
const;
127 double n_pos(
void)
const;
204 void remove(
double value,
bool target,
double weight=1.0);
212 typedef std::multimap<double, std::pair<bool, double> > Map;
213 typedef std::vector<std::pair<bool, Map::mapped_type> > Vector;
226 double get_p_approx(
double)
const;
231 bool is_weighted(
void)
const;
236 size_t nof_points(
const Averager& a)
const;
242 double p_left_weighted(
double area)
const;
248 double p_right_weighted(
double area)
const;
258 template <
typename Iterator>
259 double count(Iterator first, Iterator last,
double threshold)
const;
264 template <
typename Iterator>
265 double count(Vector& weights, Iterator iter, Iterator last,
266 double threshold,
double sum,
const Weights& weight)
const;
273 template <
typename Iterator>
274 double count(Vector& weights, Iterator iter, Iterator last,
275 double threshold,
double sum, Weights weight,
276 const std::pair<bool, double>& entry)
const;
283 template<
typename ForwardIterator>
284 double p_exact_with_ties(ForwardIterator first, ForwardIterator last,
285 double block,
unsigned int pos,
286 unsigned int neg)
const;
291 double p_exact_right(
double area)
const;
296 double p_exact_left(
double area)
const;
298 bool use_exact_method(
void)
const;
302 unsigned int minimum_size_;
308 template<
typename ForwardIterator>
310 ROC::p_exact_with_ties(ForwardIterator begin, ForwardIterator end,
311 double block,
unsigned int pos,
unsigned int neg)
const
318 ForwardIterator iter(begin);
320 while (iter!=end && iter->first == begin->first) {
333 unsigned int pos1 = n - std::min(n, neg);
335 unsigned int max = std::min(n, pos);
336 YAT_ASSERT(pos1<=max);
337 for ( ; pos1<=
max; ++pos1) {
338 unsigned int neg1 = n-pos1;
340 unsigned int pos2 = pos-pos1;
341 YAT_ASSERT(pos2<=pos);
342 unsigned int neg2 = neg-neg1;
343 YAT_ASSERT(neg2<=neg);
344 result += gsl_ran_hypergeometric_pdf(pos1, static_cast<unsigned int>(pos),
345 static_cast<unsigned int>(neg), n)
346 * p_exact_with_ties(iter, end,
347 block - pos2*neg1 - 0.5*pos1*neg1,
354 template <
typename Iterator>
355 double ROC::count(Iterator first, Iterator last,
double threshold)
const
358 vec.reserve(multimap_.size());
360 for (Map::const_iterator i = multimap_.begin(); i!=multimap_.end(); ++i)
361 vec.push_back(std::make_pair(
false, i->second));
364 w.small_pos = pos_weights_.
sum_x();
365 w.small_neg = neg_weights_.
sum_x();
366 return count(vec, first, last, threshold*w.small_pos*w.small_neg, 0, w);
371 template <
typename Iterator>
372 double ROC::count(ROC::Vector& v, Iterator iter, Iterator last,
373 double threshold,
double sum,
const Weights& w)
const
377 int nof_elements = 0;
378 for (ROC::Vector::iterator i=v.begin(); i!=v.end(); ++i) {
382 result += count(v, iter, last, threshold, sum, w, i->second);
386 YAT_ASSERT(nof_elements);
387 return result/nof_elements;
391 template <
typename Iterator>
392 double ROC::count(Vector& weights, Iterator iter, Iterator last,
393 double threshold,
double sum, Weights w,
394 const std::pair<bool, double>& entry)
const
396 double tiny = 10e-10;
399 YAT_ASSERT(next!=last);
404 w.tied_pos += entry.second;
405 w.small_pos -= entry.second;
408 w.tied_neg += entry.second;
409 w.small_neg -= entry.second;
413 if (next==last || *next!=*iter) {
414 sum += 0.5*w.tied_pos*w.tied_neg + w.tied_pos * w.small_neg;
421 double max_sum = sum + 0.5*(w.tied_pos+w.small_pos)*w.tied_neg +
422 (w.tied_pos+w.small_pos)*w.small_neg;
424 if (max_sum<threshold-tiny)
426 if (sum + 0.5*w.tied_pos*(w.tied_neg+w.small_neg) >= threshold-tiny)
430 return count(weights, next, last, threshold, sum, w);