001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017 package org.apache.commons.math.stat.inference;
018
019 import org.apache.commons.math.MathException;
020 import org.apache.commons.math.MathRuntimeException;
021 import org.apache.commons.math.distribution.ChiSquaredDistribution;
022 import org.apache.commons.math.distribution.ChiSquaredDistributionImpl;
023
024 /**
025 * Implements Chi-Square test statistics defined in the
026 * {@link UnknownDistributionChiSquareTest} interface.
027 *
028 * @version $Revision: 811833 $ $Date: 2009-09-06 12:27:50 -0400 (Sun, 06 Sep 2009) $
029 */
030 public class ChiSquareTestImpl implements UnknownDistributionChiSquareTest {
031
032 /** Distribution used to compute inference statistics. */
033 private ChiSquaredDistribution distribution;
034
035 /**
036 * Construct a ChiSquareTestImpl
037 */
038 public ChiSquareTestImpl() {
039 this(new ChiSquaredDistributionImpl(1.0));
040 }
041
042 /**
043 * Create a test instance using the given distribution for computing
044 * inference statistics.
045 * @param x distribution used to compute inference statistics.
046 * @since 1.2
047 */
048 public ChiSquareTestImpl(ChiSquaredDistribution x) {
049 super();
050 setDistribution(x);
051 }
052 /**
053 * {@inheritDoc}
054 * <p><strong>Note: </strong>This implementation rescales the
055 * <code>expected</code> array if necessary to ensure that the sum of the
056 * expected and observed counts are equal.</p>
057 *
058 * @param observed array of observed frequency counts
059 * @param expected array of expected frequency counts
060 * @return chi-square test statistic
061 * @throws IllegalArgumentException if preconditions are not met
062 * or length is less than 2
063 */
064 public double chiSquare(double[] expected, long[] observed)
065 throws IllegalArgumentException {
066 if (expected.length < 2) {
067 throw MathRuntimeException.createIllegalArgumentException(
068 "expected array length = {0}, must be at least 2",
069 expected.length);
070 }
071 if (expected.length != observed.length) {
072 throw MathRuntimeException.createIllegalArgumentException(
073 "dimension mismatch {0} != {1}", expected.length, observed.length);
074 }
075 checkPositive(expected);
076 checkNonNegative(observed);
077 double sumExpected = 0d;
078 double sumObserved = 0d;
079 for (int i = 0; i < observed.length; i++) {
080 sumExpected += expected[i];
081 sumObserved += observed[i];
082 }
083 double ratio = 1.0d;
084 boolean rescale = false;
085 if (Math.abs(sumExpected - sumObserved) > 10E-6) {
086 ratio = sumObserved / sumExpected;
087 rescale = true;
088 }
089 double sumSq = 0.0d;
090 for (int i = 0; i < observed.length; i++) {
091 if (rescale) {
092 final double dev = observed[i] - ratio * expected[i];
093 sumSq += dev * dev / (ratio * expected[i]);
094 } else {
095 final double dev = observed[i] - expected[i];
096 sumSq += dev * dev / expected[i];
097 }
098 }
099 return sumSq;
100 }
101
102 /**
103 * {@inheritDoc}
104 * <p><strong>Note: </strong>This implementation rescales the
105 * <code>expected</code> array if necessary to ensure that the sum of the
106 * expected and observed counts are equal.</p>
107 *
108 * @param observed array of observed frequency counts
109 * @param expected array of expected frequency counts
110 * @return p-value
111 * @throws IllegalArgumentException if preconditions are not met
112 * @throws MathException if an error occurs computing the p-value
113 */
114 public double chiSquareTest(double[] expected, long[] observed)
115 throws IllegalArgumentException, MathException {
116 distribution.setDegreesOfFreedom(expected.length - 1.0);
117 return 1.0 - distribution.cumulativeProbability(
118 chiSquare(expected, observed));
119 }
120
121 /**
122 * {@inheritDoc}
123 * <p><strong>Note: </strong>This implementation rescales the
124 * <code>expected</code> array if necessary to ensure that the sum of the
125 * expected and observed counts are equal.</p>
126 *
127 * @param observed array of observed frequency counts
128 * @param expected array of expected frequency counts
129 * @param alpha significance level of the test
130 * @return true iff null hypothesis can be rejected with confidence
131 * 1 - alpha
132 * @throws IllegalArgumentException if preconditions are not met
133 * @throws MathException if an error occurs performing the test
134 */
135 public boolean chiSquareTest(double[] expected, long[] observed,
136 double alpha) throws IllegalArgumentException, MathException {
137 if ((alpha <= 0) || (alpha > 0.5)) {
138 throw MathRuntimeException.createIllegalArgumentException(
139 "out of bounds significance level {0}, must be between {1} and {2}",
140 alpha, 0, 0.5);
141 }
142 return chiSquareTest(expected, observed) < alpha;
143 }
144
145 /**
146 * @param counts array representation of 2-way table
147 * @return chi-square test statistic
148 * @throws IllegalArgumentException if preconditions are not met
149 */
150 public double chiSquare(long[][] counts) throws IllegalArgumentException {
151
152 checkArray(counts);
153 int nRows = counts.length;
154 int nCols = counts[0].length;
155
156 // compute row, column and total sums
157 double[] rowSum = new double[nRows];
158 double[] colSum = new double[nCols];
159 double total = 0.0d;
160 for (int row = 0; row < nRows; row++) {
161 for (int col = 0; col < nCols; col++) {
162 rowSum[row] += counts[row][col];
163 colSum[col] += counts[row][col];
164 total += counts[row][col];
165 }
166 }
167
168 // compute expected counts and chi-square
169 double sumSq = 0.0d;
170 double expected = 0.0d;
171 for (int row = 0; row < nRows; row++) {
172 for (int col = 0; col < nCols; col++) {
173 expected = (rowSum[row] * colSum[col]) / total;
174 sumSq += ((counts[row][col] - expected) *
175 (counts[row][col] - expected)) / expected;
176 }
177 }
178 return sumSq;
179 }
180
181 /**
182 * @param counts array representation of 2-way table
183 * @return p-value
184 * @throws IllegalArgumentException if preconditions are not met
185 * @throws MathException if an error occurs computing the p-value
186 */
187 public double chiSquareTest(long[][] counts)
188 throws IllegalArgumentException, MathException {
189 checkArray(counts);
190 double df = ((double) counts.length -1) * ((double) counts[0].length - 1);
191 distribution.setDegreesOfFreedom(df);
192 return 1 - distribution.cumulativeProbability(chiSquare(counts));
193 }
194
195 /**
196 * @param counts array representation of 2-way table
197 * @param alpha significance level of the test
198 * @return true iff null hypothesis can be rejected with confidence
199 * 1 - alpha
200 * @throws IllegalArgumentException if preconditions are not met
201 * @throws MathException if an error occurs performing the test
202 */
203 public boolean chiSquareTest(long[][] counts, double alpha)
204 throws IllegalArgumentException, MathException {
205 if ((alpha <= 0) || (alpha > 0.5)) {
206 throw MathRuntimeException.createIllegalArgumentException(
207 "out of bounds significance level {0}, must be between {1} and {2}",
208 alpha, 0.0, 0.5);
209 }
210 return chiSquareTest(counts) < alpha;
211 }
212
213 /**
214 * @param observed1 array of observed frequency counts of the first data set
215 * @param observed2 array of observed frequency counts of the second data set
216 * @return chi-square test statistic
217 * @throws IllegalArgumentException if preconditions are not met
218 * @since 1.2
219 */
220 public double chiSquareDataSetsComparison(long[] observed1, long[] observed2)
221 throws IllegalArgumentException {
222
223 // Make sure lengths are same
224 if (observed1.length < 2) {
225 throw MathRuntimeException.createIllegalArgumentException(
226 "observed array length = {0}, must be at least 2",
227 observed1.length);
228 }
229 if (observed1.length != observed2.length) {
230 throw MathRuntimeException.createIllegalArgumentException(
231 "dimension mismatch {0} != {1}",
232 observed1.length, observed2.length);
233 }
234
235 // Ensure non-negative counts
236 checkNonNegative(observed1);
237 checkNonNegative(observed2);
238
239 // Compute and compare count sums
240 long countSum1 = 0;
241 long countSum2 = 0;
242 boolean unequalCounts = false;
243 double weight = 0.0;
244 for (int i = 0; i < observed1.length; i++) {
245 countSum1 += observed1[i];
246 countSum2 += observed2[i];
247 }
248 // Ensure neither sample is uniformly 0
249 if (countSum1 == 0) {
250 throw MathRuntimeException.createIllegalArgumentException(
251 "observed counts are all 0 in first observed array");
252 }
253 if (countSum2 == 0) {
254 throw MathRuntimeException.createIllegalArgumentException(
255 "observed counts are all 0 in second observed array");
256 }
257 // Compare and compute weight only if different
258 unequalCounts = countSum1 != countSum2;
259 if (unequalCounts) {
260 weight = Math.sqrt((double) countSum1 / (double) countSum2);
261 }
262 // Compute ChiSquare statistic
263 double sumSq = 0.0d;
264 double dev = 0.0d;
265 double obs1 = 0.0d;
266 double obs2 = 0.0d;
267 for (int i = 0; i < observed1.length; i++) {
268 if (observed1[i] == 0 && observed2[i] == 0) {
269 throw MathRuntimeException.createIllegalArgumentException(
270 "observed counts are both zero for entry {0}", i);
271 } else {
272 obs1 = observed1[i];
273 obs2 = observed2[i];
274 if (unequalCounts) { // apply weights
275 dev = obs1/weight - obs2 * weight;
276 } else {
277 dev = obs1 - obs2;
278 }
279 sumSq += (dev * dev) / (obs1 + obs2);
280 }
281 }
282 return sumSq;
283 }
284
285 /**
286 * @param observed1 array of observed frequency counts of the first data set
287 * @param observed2 array of observed frequency counts of the second data set
288 * @return p-value
289 * @throws IllegalArgumentException if preconditions are not met
290 * @throws MathException if an error occurs computing the p-value
291 * @since 1.2
292 */
293 public double chiSquareTestDataSetsComparison(long[] observed1, long[] observed2)
294 throws IllegalArgumentException, MathException {
295 distribution.setDegreesOfFreedom((double) observed1.length - 1);
296 return 1 - distribution.cumulativeProbability(
297 chiSquareDataSetsComparison(observed1, observed2));
298 }
299
300 /**
301 * @param observed1 array of observed frequency counts of the first data set
302 * @param observed2 array of observed frequency counts of the second data set
303 * @param alpha significance level of the test
304 * @return true iff null hypothesis can be rejected with confidence
305 * 1 - alpha
306 * @throws IllegalArgumentException if preconditions are not met
307 * @throws MathException if an error occurs performing the test
308 * @since 1.2
309 */
310 public boolean chiSquareTestDataSetsComparison(long[] observed1, long[] observed2,
311 double alpha) throws IllegalArgumentException, MathException {
312 if ((alpha <= 0) || (alpha > 0.5)) {
313 throw MathRuntimeException.createIllegalArgumentException(
314 "out of bounds significance level {0}, must be between {1} and {2}",
315 alpha, 0.0, 0.5);
316 }
317 return chiSquareTestDataSetsComparison(observed1, observed2) < alpha;
318 }
319
320 /**
321 * Checks to make sure that the input long[][] array is rectangular,
322 * has at least 2 rows and 2 columns, and has all non-negative entries,
323 * throwing IllegalArgumentException if any of these checks fail.
324 *
325 * @param in input 2-way table to check
326 * @throws IllegalArgumentException if the array is not valid
327 */
328 private void checkArray(long[][] in) throws IllegalArgumentException {
329
330 if (in.length < 2) {
331 throw MathRuntimeException.createIllegalArgumentException(
332 "invalid row dimension: {0} (must be at least 2)",
333 in.length);
334 }
335
336 if (in[0].length < 2) {
337 throw MathRuntimeException.createIllegalArgumentException(
338 "invalid column dimension: {0} (must be at least 2)",
339 in[0].length);
340 }
341
342 checkRectangular(in);
343 checkNonNegative(in);
344
345 }
346
347 //--------------------- Private array methods -- should find a utility home for these
348
349 /**
350 * Throws IllegalArgumentException if the input array is not rectangular.
351 *
352 * @param in array to be tested
353 * @throws NullPointerException if input array is null
354 * @throws IllegalArgumentException if input array is not rectangular
355 */
356 private void checkRectangular(long[][] in) {
357 for (int i = 1; i < in.length; i++) {
358 if (in[i].length != in[0].length) {
359 throw MathRuntimeException.createIllegalArgumentException(
360 "some rows have length {0} while others have length {1}",
361 in[i].length, in[0].length);
362 }
363 }
364 }
365
366 /**
367 * Check all entries of the input array are > 0.
368 *
369 * @param in array to be tested
370 * @exception IllegalArgumentException if one entry is not positive
371 */
372 private void checkPositive(double[] in) throws IllegalArgumentException {
373 for (int i = 0; i < in.length; i++) {
374 if (in[i] <= 0) {
375 throw MathRuntimeException.createIllegalArgumentException(
376 "element {0} is not positive: {1}",
377 i, in[i]);
378 }
379 }
380 }
381
382 /**
383 * Check all entries of the input array are >= 0.
384 *
385 * @param in array to be tested
386 * @exception IllegalArgumentException if one entry is negative
387 */
388 private void checkNonNegative(long[] in) throws IllegalArgumentException {
389 for (int i = 0; i < in.length; i++) {
390 if (in[i] < 0) {
391 throw MathRuntimeException.createIllegalArgumentException(
392 "element {0} is negative: {1}",
393 i, in[i]);
394 }
395 }
396 }
397
398 /**
399 * Check all entries of the input array are >= 0.
400 *
401 * @param in array to be tested
402 * @exception IllegalArgumentException if one entry is negative
403 */
404 private void checkNonNegative(long[][] in) throws IllegalArgumentException {
405 for (int i = 0; i < in.length; i ++) {
406 for (int j = 0; j < in[i].length; j++) {
407 if (in[i][j] < 0) {
408 throw MathRuntimeException.createIllegalArgumentException(
409 "element ({0}, {1}) is negative: {2}",
410 i, j, in[i][j]);
411 }
412 }
413 }
414 }
415
416 /**
417 * Modify the distribution used to compute inference statistics.
418 *
419 * @param value
420 * the new distribution
421 * @since 1.2
422 */
423 public void setDistribution(ChiSquaredDistribution value) {
424 distribution = value;
425 }
426 }