blob: c0dee39f5598dbfd8911908cf6da986b8aded413 [file] [log] [blame]
/* cdf/hypergeometric.c
*
* Copyright (C) 2004 Jason H. Stover.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or (at
* your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
*/
/*
* Computes the cumulative distribution function for a hypergeometric
* random variable. A hypergeometric random variable X is the number
* of elements of type 1 in a sample of size t, drawn from a population
* of size n1 + n2, in which n1 are of type 1 and n2 are of type 2.
*
* This algorithm computes Pr( X <= k ) by summing the terms from
* the mass function, Pr( X = k ).
*
* References:
*
* T. Wu. An accurate computation of the hypergeometric distribution
* function. ACM Transactions on Mathematical Software. Volume 19, number 1,
* March 1993.
* This algorithm is not used, since it requires factoring the
* numerator and denominator, then cancelling. It is more accurate
* than the algorithm used here, but the cancellation requires more
* time than the algorithm used here.
*
* W. Feller. An Introduction to Probability Theory and Its Applications,
* third edition. 1968. Chapter 2, section 6.
*/
#include <config.h>
#include <math.h>
#include <gsl/gsl_math.h>
#include <gsl/gsl_errno.h>
#include <gsl/gsl_cdf.h>
#include <gsl/gsl_randist.h>
#include "error.h"
static double
lower_tail (const unsigned int k, const unsigned int n1,
const unsigned int n2, const unsigned int t)
{
double relerr;
int i = k;
double s, P;
s = gsl_ran_hypergeometric_pdf (i, n1, n2, t);
P = s;
while (i > 0)
{
double factor =
(i / (n1 - i + 1.0)) * ((n2 + i - t) / (t - i + 1.0));
s *= factor;
P += s;
relerr = s / P;
if (relerr < GSL_DBL_EPSILON)
break;
i--;
}
return P;
}
static double
upper_tail (const unsigned int k, const unsigned int n1,
const unsigned int n2, const unsigned int t)
{
double relerr;
unsigned int i = k + 1;
double s, Q;
s = gsl_ran_hypergeometric_pdf (i, n1, n2, t);
Q = s;
while (i < t)
{
double factor =
((n1 - i) / (i + 1.0)) * ((t - i) / (n2 + i + 1.0 - t));
s *= factor;
Q += s;
relerr = s / Q;
if (relerr < GSL_DBL_EPSILON)
break;
i++;
}
return Q;
}
/*
* Pr (X <= k)
*/
double
gsl_cdf_hypergeometric_P (const unsigned int k,
const unsigned int n1,
const unsigned int n2, const unsigned int t)
{
double P;
if (t > (n1 + n2))
{
CDF_ERROR ("t larger than population size", GSL_EDOM);
}
else if (k >= n1 || k >= t)
{
P = 1.0;
}
else if (k < 0.0)
{
P = 0.0;
}
else
{
double midpoint = (int) (t * n1 / (n1 + n2));
if (k >= midpoint)
{
P = 1 - upper_tail (k, n1, n2, t);
}
else
{
P = lower_tail (k, n1, n2, t);
}
}
return P;
}
/*
* Pr (X > k)
*/
double
gsl_cdf_hypergeometric_Q (const unsigned int k,
const unsigned int n1,
const unsigned int n2, const unsigned int t)
{
double Q;
if (t > (n1 + n2))
{
CDF_ERROR ("t larger than population size", GSL_EDOM);
}
else if (k >= n1 || k >= t)
{
Q = 0.0;
}
else if (k < 0.0)
{
Q = 1.0;
}
else
{
double midpoint = (int) (t * n1 / (n1 + n2));
if (k < midpoint)
{
Q = 1 - lower_tail (k, n1, n2, t);
}
else
{
Q = upper_tail (k, n1, n2, t);
}
}
return Q;
}