001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017 package org.apache.commons.math.stat;
018
019 import org.apache.commons.math.MathRuntimeException;
020 import org.apache.commons.math.stat.descriptive.UnivariateStatistic;
021 import org.apache.commons.math.stat.descriptive.moment.GeometricMean;
022 import org.apache.commons.math.stat.descriptive.moment.Mean;
023 import org.apache.commons.math.stat.descriptive.moment.Variance;
024 import org.apache.commons.math.stat.descriptive.rank.Max;
025 import org.apache.commons.math.stat.descriptive.rank.Min;
026 import org.apache.commons.math.stat.descriptive.rank.Percentile;
027 import org.apache.commons.math.stat.descriptive.summary.Product;
028 import org.apache.commons.math.stat.descriptive.summary.Sum;
029 import org.apache.commons.math.stat.descriptive.summary.SumOfLogs;
030 import org.apache.commons.math.stat.descriptive.summary.SumOfSquares;
031
032 /**
033 * StatUtils provides static methods for computing statistics based on data
034 * stored in double[] arrays.
035 *
036 * @version $Revision: 811685 $ $Date: 2009-09-05 13:36:48 -0400 (Sat, 05 Sep 2009) $
037 */
038 public final class StatUtils {
039
040 /** sum */
041 private static final UnivariateStatistic SUM = new Sum();
042
043 /** sumSq */
044 private static final UnivariateStatistic SUM_OF_SQUARES = new SumOfSquares();
045
046 /** prod */
047 private static final UnivariateStatistic PRODUCT = new Product();
048
049 /** sumLog */
050 private static final UnivariateStatistic SUM_OF_LOGS = new SumOfLogs();
051
052 /** min */
053 private static final UnivariateStatistic MIN = new Min();
054
055 /** max */
056 private static final UnivariateStatistic MAX = new Max();
057
058 /** mean */
059 private static final UnivariateStatistic MEAN = new Mean();
060
061 /** variance */
062 private static final Variance VARIANCE = new Variance();
063
064 /** percentile */
065 private static final Percentile PERCENTILE = new Percentile();
066
067 /** geometric mean */
068 private static final GeometricMean GEOMETRIC_MEAN = new GeometricMean();
069
070 /**
071 * Private Constructor
072 */
073 private StatUtils() {
074 }
075
076 /**
077 * Returns the sum of the values in the input array, or
078 * <code>Double.NaN</code> if the array is empty.
079 * <p>
080 * Throws <code>IllegalArgumentException</code> if the input array
081 * is null.</p>
082 *
083 * @param values array of values to sum
084 * @return the sum of the values or <code>Double.NaN</code> if the array
085 * is empty
086 * @throws IllegalArgumentException if the array is null
087 */
088 public static double sum(final double[] values) {
089 return SUM.evaluate(values);
090 }
091
092 /**
093 * Returns the sum of the entries in the specified portion of
094 * the input array, or <code>Double.NaN</code> if the designated subarray
095 * is empty.
096 * <p>
097 * Throws <code>IllegalArgumentException</code> if the array is null.</p>
098 *
099 * @param values the input array
100 * @param begin index of the first array element to include
101 * @param length the number of elements to include
102 * @return the sum of the values or Double.NaN if length = 0
103 * @throws IllegalArgumentException if the array is null or the array index
104 * parameters are not valid
105 */
106 public static double sum(final double[] values, final int begin,
107 final int length) {
108 return SUM.evaluate(values, begin, length);
109 }
110
111 /**
112 * Returns the sum of the squares of the entries in the input array, or
113 * <code>Double.NaN</code> if the array is empty.
114 * <p>
115 * Throws <code>IllegalArgumentException</code> if the array is null.</p>
116 *
117 * @param values input array
118 * @return the sum of the squared values or <code>Double.NaN</code> if the
119 * array is empty
120 * @throws IllegalArgumentException if the array is null
121 */
122 public static double sumSq(final double[] values) {
123 return SUM_OF_SQUARES.evaluate(values);
124 }
125
126 /**
127 * Returns the sum of the squares of the entries in the specified portion of
128 * the input array, or <code>Double.NaN</code> if the designated subarray
129 * is empty.
130 * <p>
131 * Throws <code>IllegalArgumentException</code> if the array is null.</p>
132 *
133 * @param values the input array
134 * @param begin index of the first array element to include
135 * @param length the number of elements to include
136 * @return the sum of the squares of the values or Double.NaN if length = 0
137 * @throws IllegalArgumentException if the array is null or the array index
138 * parameters are not valid
139 */
140 public static double sumSq(final double[] values, final int begin,
141 final int length) {
142 return SUM_OF_SQUARES.evaluate(values, begin, length);
143 }
144
145 /**
146 * Returns the product of the entries in the input array, or
147 * <code>Double.NaN</code> if the array is empty.
148 * <p>
149 * Throws <code>IllegalArgumentException</code> if the array is null.</p>
150 *
151 * @param values the input array
152 * @return the product of the values or Double.NaN if the array is empty
153 * @throws IllegalArgumentException if the array is null
154 */
155 public static double product(final double[] values) {
156 return PRODUCT.evaluate(values);
157 }
158
159 /**
160 * Returns the product of the entries in the specified portion of
161 * the input array, or <code>Double.NaN</code> if the designated subarray
162 * is empty.
163 * <p>
164 * Throws <code>IllegalArgumentException</code> if the array is null.</p>
165 *
166 * @param values the input array
167 * @param begin index of the first array element to include
168 * @param length the number of elements to include
169 * @return the product of the values or Double.NaN if length = 0
170 * @throws IllegalArgumentException if the array is null or the array index
171 * parameters are not valid
172 */
173 public static double product(final double[] values, final int begin,
174 final int length) {
175 return PRODUCT.evaluate(values, begin, length);
176 }
177
178 /**
179 * Returns the sum of the natural logs of the entries in the input array, or
180 * <code>Double.NaN</code> if the array is empty.
181 * <p>
182 * Throws <code>IllegalArgumentException</code> if the array is null.</p>
183 * <p>
184 * See {@link org.apache.commons.math.stat.descriptive.summary.SumOfLogs}.
185 * </p>
186 *
187 * @param values the input array
188 * @return the sum of the natural logs of the values or Double.NaN if
189 * the array is empty
190 * @throws IllegalArgumentException if the array is null
191 */
192 public static double sumLog(final double[] values) {
193 return SUM_OF_LOGS.evaluate(values);
194 }
195
196 /**
197 * Returns the sum of the natural logs of the entries in the specified portion of
198 * the input array, or <code>Double.NaN</code> if the designated subarray
199 * is empty.
200 * <p>
201 * Throws <code>IllegalArgumentException</code> if the array is null.</p>
202 * <p>
203 * See {@link org.apache.commons.math.stat.descriptive.summary.SumOfLogs}.
204 * </p>
205 *
206 * @param values the input array
207 * @param begin index of the first array element to include
208 * @param length the number of elements to include
209 * @return the sum of the natural logs of the values or Double.NaN if
210 * length = 0
211 * @throws IllegalArgumentException if the array is null or the array index
212 * parameters are not valid
213 */
214 public static double sumLog(final double[] values, final int begin,
215 final int length) {
216 return SUM_OF_LOGS.evaluate(values, begin, length);
217 }
218
219 /**
220 * Returns the arithmetic mean of the entries in the input array, or
221 * <code>Double.NaN</code> if the array is empty.
222 * <p>
223 * Throws <code>IllegalArgumentException</code> if the array is null.</p>
224 * <p>
225 * See {@link org.apache.commons.math.stat.descriptive.moment.Mean} for
226 * details on the computing algorithm.</p>
227 *
228 * @param values the input array
229 * @return the mean of the values or Double.NaN if the array is empty
230 * @throws IllegalArgumentException if the array is null
231 */
232 public static double mean(final double[] values) {
233 return MEAN.evaluate(values);
234 }
235
236 /**
237 * Returns the arithmetic mean of the entries in the specified portion of
238 * the input array, or <code>Double.NaN</code> if the designated subarray
239 * is empty.
240 * <p>
241 * Throws <code>IllegalArgumentException</code> if the array is null.</p>
242 * <p>
243 * See {@link org.apache.commons.math.stat.descriptive.moment.Mean} for
244 * details on the computing algorithm.</p>
245 *
246 * @param values the input array
247 * @param begin index of the first array element to include
248 * @param length the number of elements to include
249 * @return the mean of the values or Double.NaN if length = 0
250 * @throws IllegalArgumentException if the array is null or the array index
251 * parameters are not valid
252 */
253 public static double mean(final double[] values, final int begin,
254 final int length) {
255 return MEAN.evaluate(values, begin, length);
256 }
257
258 /**
259 * Returns the geometric mean of the entries in the input array, or
260 * <code>Double.NaN</code> if the array is empty.
261 * <p>
262 * Throws <code>IllegalArgumentException</code> if the array is null.</p>
263 * <p>
264 * See {@link org.apache.commons.math.stat.descriptive.moment.GeometricMean}
265 * for details on the computing algorithm.</p>
266 *
267 * @param values the input array
268 * @return the geometric mean of the values or Double.NaN if the array is empty
269 * @throws IllegalArgumentException if the array is null
270 */
271 public static double geometricMean(final double[] values) {
272 return GEOMETRIC_MEAN.evaluate(values);
273 }
274
275 /**
276 * Returns the geometric mean of the entries in the specified portion of
277 * the input array, or <code>Double.NaN</code> if the designated subarray
278 * is empty.
279 * <p>
280 * Throws <code>IllegalArgumentException</code> if the array is null.</p>
281 * <p>
282 * See {@link org.apache.commons.math.stat.descriptive.moment.GeometricMean}
283 * for details on the computing algorithm.</p>
284 *
285 * @param values the input array
286 * @param begin index of the first array element to include
287 * @param length the number of elements to include
288 * @return the geometric mean of the values or Double.NaN if length = 0
289 * @throws IllegalArgumentException if the array is null or the array index
290 * parameters are not valid
291 */
292 public static double geometricMean(final double[] values, final int begin,
293 final int length) {
294 return GEOMETRIC_MEAN.evaluate(values, begin, length);
295 }
296
297
298 /**
299 * Returns the variance of the entries in the input array, or
300 * <code>Double.NaN</code> if the array is empty.
301 * <p>
302 * See {@link org.apache.commons.math.stat.descriptive.moment.Variance} for
303 * details on the computing algorithm.</p>
304 * <p>
305 * Returns 0 for a single-value (i.e. length = 1) sample.</p>
306 * <p>
307 * Throws <code>IllegalArgumentException</code> if the array is null.</p>
308 *
309 * @param values the input array
310 * @return the variance of the values or Double.NaN if the array is empty
311 * @throws IllegalArgumentException if the array is null
312 */
313 public static double variance(final double[] values) {
314 return VARIANCE.evaluate(values);
315 }
316
317 /**
318 * Returns the variance of the entries in the specified portion of
319 * the input array, or <code>Double.NaN</code> if the designated subarray
320 * is empty.
321 * <p>
322 * See {@link org.apache.commons.math.stat.descriptive.moment.Variance} for
323 * details on the computing algorithm.</p>
324 * <p>
325 * Returns 0 for a single-value (i.e. length = 1) sample.</p>
326 * <p>
327 * Throws <code>IllegalArgumentException</code> if the array is null or the
328 * array index parameters are not valid.</p>
329 *
330 * @param values the input array
331 * @param begin index of the first array element to include
332 * @param length the number of elements to include
333 * @return the variance of the values or Double.NaN if length = 0
334 * @throws IllegalArgumentException if the array is null or the array index
335 * parameters are not valid
336 */
337 public static double variance(final double[] values, final int begin,
338 final int length) {
339 return VARIANCE.evaluate(values, begin, length);
340 }
341
342 /**
343 * Returns the variance of the entries in the specified portion of
344 * the input array, using the precomputed mean value. Returns
345 * <code>Double.NaN</code> if the designated subarray is empty.
346 * <p>
347 * See {@link org.apache.commons.math.stat.descriptive.moment.Variance} for
348 * details on the computing algorithm.</p>
349 * <p>
350 * The formula used assumes that the supplied mean value is the arithmetic
351 * mean of the sample data, not a known population parameter. This method
352 * is supplied only to save computation when the mean has already been
353 * computed.</p>
354 * <p>
355 * Returns 0 for a single-value (i.e. length = 1) sample.</p>
356 * <p>
357 * Throws <code>IllegalArgumentException</code> if the array is null or the
358 * array index parameters are not valid.</p>
359 *
360 * @param values the input array
361 * @param mean the precomputed mean value
362 * @param begin index of the first array element to include
363 * @param length the number of elements to include
364 * @return the variance of the values or Double.NaN if length = 0
365 * @throws IllegalArgumentException if the array is null or the array index
366 * parameters are not valid
367 */
368 public static double variance(final double[] values, final double mean,
369 final int begin, final int length) {
370 return VARIANCE.evaluate(values, mean, begin, length);
371 }
372
373 /**
374 * Returns the variance of the entries in the input array, using the
375 * precomputed mean value. Returns <code>Double.NaN</code> if the array
376 * is empty.
377 * <p>
378 * See {@link org.apache.commons.math.stat.descriptive.moment.Variance} for
379 * details on the computing algorithm.</p>
380 * <p>
381 * The formula used assumes that the supplied mean value is the arithmetic
382 * mean of the sample data, not a known population parameter. This method
383 * is supplied only to save computation when the mean has already been
384 * computed.</p>
385 * <p>
386 * Returns 0 for a single-value (i.e. length = 1) sample.</p>
387 * <p>
388 * Throws <code>IllegalArgumentException</code> if the array is null.</p>
389 *
390 * @param values the input array
391 * @param mean the precomputed mean value
392 * @return the variance of the values or Double.NaN if the array is empty
393 * @throws IllegalArgumentException if the array is null
394 */
395 public static double variance(final double[] values, final double mean) {
396 return VARIANCE.evaluate(values, mean);
397 }
398
399 /**
400 * Returns the maximum of the entries in the input array, or
401 * <code>Double.NaN</code> if the array is empty.
402 * <p>
403 * Throws <code>IllegalArgumentException</code> if the array is null.</p>
404 * <p>
405 * <ul>
406 * <li>The result is <code>NaN</code> iff all values are <code>NaN</code>
407 * (i.e. <code>NaN</code> values have no impact on the value of the statistic).</li>
408 * <li>If any of the values equals <code>Double.POSITIVE_INFINITY</code>,
409 * the result is <code>Double.POSITIVE_INFINITY.</code></li>
410 * </ul></p>
411 *
412 * @param values the input array
413 * @return the maximum of the values or Double.NaN if the array is empty
414 * @throws IllegalArgumentException if the array is null
415 */
416 public static double max(final double[] values) {
417 return MAX.evaluate(values);
418 }
419
420 /**
421 * Returns the maximum of the entries in the specified portion of
422 * the input array, or <code>Double.NaN</code> if the designated subarray
423 * is empty.
424 * <p>
425 * Throws <code>IllegalArgumentException</code> if the array is null or
426 * the array index parameters are not valid.</p>
427 * <p>
428 * <ul>
429 * <li>The result is <code>NaN</code> iff all values are <code>NaN</code>
430 * (i.e. <code>NaN</code> values have no impact on the value of the statistic).</li>
431 * <li>If any of the values equals <code>Double.POSITIVE_INFINITY</code>,
432 * the result is <code>Double.POSITIVE_INFINITY.</code></li>
433 * </ul></p>
434 *
435 * @param values the input array
436 * @param begin index of the first array element to include
437 * @param length the number of elements to include
438 * @return the maximum of the values or Double.NaN if length = 0
439 * @throws IllegalArgumentException if the array is null or the array index
440 * parameters are not valid
441 */
442 public static double max(final double[] values, final int begin,
443 final int length) {
444 return MAX.evaluate(values, begin, length);
445 }
446
447 /**
448 * Returns the minimum of the entries in the input array, or
449 * <code>Double.NaN</code> if the array is empty.
450 * <p>
451 * Throws <code>IllegalArgumentException</code> if the array is null.</p>
452 * <p>
453 * <ul>
454 * <li>The result is <code>NaN</code> iff all values are <code>NaN</code>
455 * (i.e. <code>NaN</code> values have no impact on the value of the statistic).</li>
456 * <li>If any of the values equals <code>Double.NEGATIVE_INFINITY</code>,
457 * the result is <code>Double.NEGATIVE_INFINITY.</code></li>
458 * </ul> </p>
459 *
460 * @param values the input array
461 * @return the minimum of the values or Double.NaN if the array is empty
462 * @throws IllegalArgumentException if the array is null
463 */
464 public static double min(final double[] values) {
465 return MIN.evaluate(values);
466 }
467
468 /**
469 * Returns the minimum of the entries in the specified portion of
470 * the input array, or <code>Double.NaN</code> if the designated subarray
471 * is empty.
472 * <p>
473 * Throws <code>IllegalArgumentException</code> if the array is null or
474 * the array index parameters are not valid.</p>
475 * <p>
476 * <ul>
477 * <li>The result is <code>NaN</code> iff all values are <code>NaN</code>
478 * (i.e. <code>NaN</code> values have no impact on the value of the statistic).</li>
479 * <li>If any of the values equals <code>Double.NEGATIVE_INFINITY</code>,
480 * the result is <code>Double.NEGATIVE_INFINITY.</code></li>
481 * </ul></p>
482 *
483 * @param values the input array
484 * @param begin index of the first array element to include
485 * @param length the number of elements to include
486 * @return the minimum of the values or Double.NaN if length = 0
487 * @throws IllegalArgumentException if the array is null or the array index
488 * parameters are not valid
489 */
490 public static double min(final double[] values, final int begin,
491 final int length) {
492 return MIN.evaluate(values, begin, length);
493 }
494
495 /**
496 * Returns an estimate of the <code>p</code>th percentile of the values
497 * in the <code>values</code> array.
498 * <p>
499 * <ul>
500 * <li>Returns <code>Double.NaN</code> if <code>values</code> has length
501 * <code>0</code></li></p>
502 * <li>Returns (for any value of <code>p</code>) <code>values[0]</code>
503 * if <code>values</code> has length <code>1</code></li>
504 * <li>Throws <code>IllegalArgumentException</code> if <code>values</code>
505 * is null or p is not a valid quantile value (p must be greater than 0
506 * and less than or equal to 100)</li>
507 * </ul></p>
508 * <p>
509 * See {@link org.apache.commons.math.stat.descriptive.rank.Percentile} for
510 * a description of the percentile estimation algorithm used.</p>
511 *
512 * @param values input array of values
513 * @param p the percentile value to compute
514 * @return the percentile value or Double.NaN if the array is empty
515 * @throws IllegalArgumentException if <code>values</code> is null
516 * or p is invalid
517 */
518 public static double percentile(final double[] values, final double p) {
519 return PERCENTILE.evaluate(values,p);
520 }
521
522 /**
523 * Returns an estimate of the <code>p</code>th percentile of the values
524 * in the <code>values</code> array, starting with the element in (0-based)
525 * position <code>begin</code> in the array and including <code>length</code>
526 * values.
527 * <p>
528 * <ul>
529 * <li>Returns <code>Double.NaN</code> if <code>length = 0</code></li>
530 * <li>Returns (for any value of <code>p</code>) <code>values[begin]</code>
531 * if <code>length = 1 </code></li>
532 * <li>Throws <code>IllegalArgumentException</code> if <code>values</code>
533 * is null , <code>begin</code> or <code>length</code> is invalid, or
534 * <code>p</code> is not a valid quantile value (p must be greater than 0
535 * and less than or equal to 100)</li>
536 * </ul></p>
537 * <p>
538 * See {@link org.apache.commons.math.stat.descriptive.rank.Percentile} for
539 * a description of the percentile estimation algorithm used.</p>
540 *
541 * @param values array of input values
542 * @param p the percentile to compute
543 * @param begin the first (0-based) element to include in the computation
544 * @param length the number of array elements to include
545 * @return the percentile value
546 * @throws IllegalArgumentException if the parameters are not valid or the
547 * input array is null
548 */
549 public static double percentile(final double[] values, final int begin,
550 final int length, final double p) {
551 return PERCENTILE.evaluate(values, begin, length, p);
552 }
553
554 /**
555 * Returns the sum of the (signed) differences between corresponding elements of the
556 * input arrays -- i.e., sum(sample1[i] - sample2[i]).
557 *
558 * @param sample1 the first array
559 * @param sample2 the second array
560 * @return sum of paired differences
561 * @throws IllegalArgumentException if the arrays do not have the same
562 * (positive) length
563 */
564 public static double sumDifference(final double[] sample1, final double[] sample2)
565 throws IllegalArgumentException {
566 int n = sample1.length;
567 if ((n != sample2.length) || (n < 1)) {
568 throw MathRuntimeException.createIllegalArgumentException(
569 "input arrays must have the same positive length ({0} and {1})",
570 n, sample2.length);
571 }
572 double result = 0;
573 for (int i = 0; i < n; i++) {
574 result += sample1[i] - sample2[i];
575 }
576 return result;
577 }
578
579 /**
580 * Returns the mean of the (signed) differences between corresponding elements of the
581 * input arrays -- i.e., sum(sample1[i] - sample2[i]) / sample1.length.
582 *
583 * @param sample1 the first array
584 * @param sample2 the second array
585 * @return mean of paired differences
586 * @throws IllegalArgumentException if the arrays do not have the same
587 * (positive) length
588 */
589 public static double meanDifference(final double[] sample1, final double[] sample2)
590 throws IllegalArgumentException {
591 return sumDifference(sample1, sample2) / sample1.length;
592 }
593
594 /**
595 * Returns the variance of the (signed) differences between corresponding elements of the
596 * input arrays -- i.e., var(sample1[i] - sample2[i]).
597 *
598 * @param sample1 the first array
599 * @param sample2 the second array
600 * @param meanDifference the mean difference between corresponding entries
601 * @see #meanDifference(double[],double[])
602 * @return variance of paired differences
603 * @throws IllegalArgumentException if the arrays do not have the same
604 * length or their common length is less than 2.
605 */
606 public static double varianceDifference(final double[] sample1, final double[] sample2,
607 double meanDifference) throws IllegalArgumentException {
608 double sum1 = 0d;
609 double sum2 = 0d;
610 double diff = 0d;
611 int n = sample1.length;
612 if (n < 2 || n != sample2.length) {
613 throw MathRuntimeException.createIllegalArgumentException(
614 "input arrays must have the same length and at least two elements ({0} and {1})",
615 n, sample2.length);
616 }
617 for (int i = 0; i < n; i++) {
618 diff = sample1[i] - sample2[i];
619 sum1 += (diff - meanDifference) *(diff - meanDifference);
620 sum2 += diff - meanDifference;
621 }
622 return (sum1 - (sum2 * sum2 / n)) / (n - 1);
623 }
624
625 }