001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018 package org.apache.commons.math.distribution;
019
020 import java.io.Serializable;
021
022 import org.apache.commons.math.MathRuntimeException;
023 import org.apache.commons.math.util.MathUtils;
024
025 /**
026 * The default implementation of {@link HypergeometricDistribution}.
027 *
028 * @version $Revision: 920852 $ $Date: 2010-03-09 07:53:44 -0500 (Tue, 09 Mar 2010) $
029 */
030 public class HypergeometricDistributionImpl extends AbstractIntegerDistribution
031 implements HypergeometricDistribution, Serializable {
032
033 /** Serializable version identifier */
034 private static final long serialVersionUID = -436928820673516179L;
035
036 /** The number of successes in the population. */
037 private int numberOfSuccesses;
038
039 /** The population size. */
040 private int populationSize;
041
042 /** The sample size. */
043 private int sampleSize;
044
045 /**
046 * Construct a new hypergeometric distribution with the given the population
047 * size, the number of successes in the population, and the sample size.
048 *
049 * @param populationSize the population size.
050 * @param numberOfSuccesses number of successes in the population.
051 * @param sampleSize the sample size.
052 */
053 public HypergeometricDistributionImpl(int populationSize,
054 int numberOfSuccesses, int sampleSize) {
055 super();
056 if (numberOfSuccesses > populationSize) {
057 throw MathRuntimeException
058 .createIllegalArgumentException(
059 "number of successes ({0}) must be less than or equal to population size ({1})",
060 numberOfSuccesses, populationSize);
061 }
062 if (sampleSize > populationSize) {
063 throw MathRuntimeException
064 .createIllegalArgumentException(
065 "sample size ({0}) must be less than or equal to population size ({1})",
066 sampleSize, populationSize);
067 }
068
069 setPopulationSizeInternal(populationSize);
070 setSampleSizeInternal(sampleSize);
071 setNumberOfSuccessesInternal(numberOfSuccesses);
072 }
073
074 /**
075 * For this distribution, X, this method returns P(X ≤ x).
076 *
077 * @param x the value at which the PDF is evaluated.
078 * @return PDF for this distribution.
079 */
080 @Override
081 public double cumulativeProbability(int x) {
082 double ret;
083
084 int[] domain = getDomain(populationSize, numberOfSuccesses, sampleSize);
085 if (x < domain[0]) {
086 ret = 0.0;
087 } else if (x >= domain[1]) {
088 ret = 1.0;
089 } else {
090 ret = innerCumulativeProbability(domain[0], x, 1, populationSize,
091 numberOfSuccesses, sampleSize);
092 }
093
094 return ret;
095 }
096
097 /**
098 * Return the domain for the given hypergeometric distribution parameters.
099 *
100 * @param n the population size.
101 * @param m number of successes in the population.
102 * @param k the sample size.
103 * @return a two element array containing the lower and upper bounds of the
104 * hypergeometric distribution.
105 */
106 private int[] getDomain(int n, int m, int k) {
107 return new int[] { getLowerDomain(n, m, k), getUpperDomain(m, k) };
108 }
109
110 /**
111 * Access the domain value lower bound, based on <code>p</code>, used to
112 * bracket a PDF root.
113 *
114 * @param p the desired probability for the critical value
115 * @return domain value lower bound, i.e. P(X < <i>lower bound</i>) <
116 * <code>p</code>
117 */
118 @Override
119 protected int getDomainLowerBound(double p) {
120 return getLowerDomain(populationSize, numberOfSuccesses, sampleSize);
121 }
122
123 /**
124 * Access the domain value upper bound, based on <code>p</code>, used to
125 * bracket a PDF root.
126 *
127 * @param p the desired probability for the critical value
128 * @return domain value upper bound, i.e. P(X < <i>upper bound</i>) >
129 * <code>p</code>
130 */
131 @Override
132 protected int getDomainUpperBound(double p) {
133 return getUpperDomain(sampleSize, numberOfSuccesses);
134 }
135
136 /**
137 * Return the lowest domain value for the given hypergeometric distribution
138 * parameters.
139 *
140 * @param n the population size.
141 * @param m number of successes in the population.
142 * @param k the sample size.
143 * @return the lowest domain value of the hypergeometric distribution.
144 */
145 private int getLowerDomain(int n, int m, int k) {
146 return Math.max(0, m - (n - k));
147 }
148
149 /**
150 * Access the number of successes.
151 *
152 * @return the number of successes.
153 */
154 public int getNumberOfSuccesses() {
155 return numberOfSuccesses;
156 }
157
158 /**
159 * Access the population size.
160 *
161 * @return the population size.
162 */
163 public int getPopulationSize() {
164 return populationSize;
165 }
166
167 /**
168 * Access the sample size.
169 *
170 * @return the sample size.
171 */
172 public int getSampleSize() {
173 return sampleSize;
174 }
175
176 /**
177 * Return the highest domain value for the given hypergeometric distribution
178 * parameters.
179 *
180 * @param m number of successes in the population.
181 * @param k the sample size.
182 * @return the highest domain value of the hypergeometric distribution.
183 */
184 private int getUpperDomain(int m, int k) {
185 return Math.min(k, m);
186 }
187
188 /**
189 * For this distribution, X, this method returns P(X = x).
190 *
191 * @param x the value at which the PMF is evaluated.
192 * @return PMF for this distribution.
193 */
194 public double probability(int x) {
195 double ret;
196
197 int[] domain = getDomain(populationSize, numberOfSuccesses, sampleSize);
198 if (x < domain[0] || x > domain[1]) {
199 ret = 0.0;
200 } else {
201 double p = (double) sampleSize / (double) populationSize;
202 double q = (double) (populationSize - sampleSize) / (double) populationSize;
203 double p1 = SaddlePointExpansion.logBinomialProbability(x,
204 numberOfSuccesses, p, q);
205 double p2 =
206 SaddlePointExpansion.logBinomialProbability(sampleSize - x,
207 populationSize - numberOfSuccesses, p, q);
208 double p3 =
209 SaddlePointExpansion.logBinomialProbability(sampleSize, populationSize, p, q);
210 ret = Math.exp(p1 + p2 - p3);
211 }
212
213 return ret;
214 }
215
216 /**
217 * For the distribution, X, defined by the given hypergeometric distribution
218 * parameters, this method returns P(X = x).
219 *
220 * @param n the population size.
221 * @param m number of successes in the population.
222 * @param k the sample size.
223 * @param x the value at which the PMF is evaluated.
224 * @return PMF for the distribution.
225 */
226 private double probability(int n, int m, int k, int x) {
227 return Math.exp(MathUtils.binomialCoefficientLog(m, x) +
228 MathUtils.binomialCoefficientLog(n - m, k - x) -
229 MathUtils.binomialCoefficientLog(n, k));
230 }
231
232 /**
233 * Modify the number of successes.
234 *
235 * @param num the new number of successes.
236 * @throws IllegalArgumentException if <code>num</code> is negative.
237 * @deprecated as of 2.1 (class will become immutable in 3.0)
238 */
239 @Deprecated
240 public void setNumberOfSuccesses(int num) {
241 setNumberOfSuccessesInternal(num);
242 }
243 /**
244 * Modify the number of successes.
245 *
246 * @param num the new number of successes.
247 * @throws IllegalArgumentException if <code>num</code> is negative.
248 */
249 private void setNumberOfSuccessesInternal(int num) {
250 if (num < 0) {
251 throw MathRuntimeException.createIllegalArgumentException(
252 "number of successes must be non-negative ({0})", num);
253 }
254 numberOfSuccesses = num;
255 }
256
257 /**
258 * Modify the population size.
259 *
260 * @param size the new population size.
261 * @throws IllegalArgumentException if <code>size</code> is not positive.
262 * @deprecated as of 2.1 (class will become immutable in 3.0)
263 */
264 @Deprecated
265 public void setPopulationSize(int size) {
266 setPopulationSizeInternal(size);
267 }
268 /**
269 * Modify the population size.
270 *
271 * @param size the new population size.
272 * @throws IllegalArgumentException if <code>size</code> is not positive.
273 */
274 private void setPopulationSizeInternal(int size) {
275 if (size <= 0) {
276 throw MathRuntimeException.createIllegalArgumentException(
277 "population size must be positive ({0})", size);
278 }
279 populationSize = size;
280 }
281
282 /**
283 * Modify the sample size.
284 *
285 * @param size the new sample size.
286 * @throws IllegalArgumentException if <code>size</code> is negative.
287 * @deprecated as of 2.1 (class will become immutable in 3.0)
288 */
289 @Deprecated
290 public void setSampleSize(int size) {
291 setSampleSizeInternal(size);
292 }
293 /**
294 * Modify the sample size.
295 *
296 * @param size the new sample size.
297 * @throws IllegalArgumentException if <code>size</code> is negative.
298 */
299 private void setSampleSizeInternal(int size) {
300 if (size < 0) {
301 throw MathRuntimeException.createIllegalArgumentException(
302 "sample size must be positive ({0})", size);
303 }
304 sampleSize = size;
305 }
306
307 /**
308 * For this distribution, X, this method returns P(X ≥ x).
309 *
310 * @param x the value at which the CDF is evaluated.
311 * @return upper tail CDF for this distribution.
312 * @since 1.1
313 */
314 public double upperCumulativeProbability(int x) {
315 double ret;
316
317 final int[] domain = getDomain(populationSize, numberOfSuccesses, sampleSize);
318 if (x < domain[0]) {
319 ret = 1.0;
320 } else if (x > domain[1]) {
321 ret = 0.0;
322 } else {
323 ret = innerCumulativeProbability(domain[1], x, -1, populationSize, numberOfSuccesses, sampleSize);
324 }
325
326 return ret;
327 }
328
329 /**
330 * For this distribution, X, this method returns P(x0 ≤ X ≤ x1). This
331 * probability is computed by summing the point probabilities for the values
332 * x0, x0 + 1, x0 + 2, ..., x1, in the order directed by dx.
333 *
334 * @param x0 the inclusive, lower bound
335 * @param x1 the inclusive, upper bound
336 * @param dx the direction of summation. 1 indicates summing from x0 to x1.
337 * 0 indicates summing from x1 to x0.
338 * @param n the population size.
339 * @param m number of successes in the population.
340 * @param k the sample size.
341 * @return P(x0 ≤ X ≤ x1).
342 */
343 private double innerCumulativeProbability(int x0, int x1, int dx, int n,
344 int m, int k) {
345 double ret = probability(n, m, k, x0);
346 while (x0 != x1) {
347 x0 += dx;
348 ret += probability(n, m, k, x0);
349 }
350 return ret;
351 }
352 }