001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017 package org.apache.commons.math.stat.descriptive;
018
019 import java.io.Serializable;
020 import java.lang.reflect.InvocationTargetException;
021 import java.util.Arrays;
022
023 import org.apache.commons.math.MathRuntimeException;
024 import org.apache.commons.math.stat.descriptive.moment.GeometricMean;
025 import org.apache.commons.math.stat.descriptive.moment.Kurtosis;
026 import org.apache.commons.math.stat.descriptive.moment.Mean;
027 import org.apache.commons.math.stat.descriptive.moment.Skewness;
028 import org.apache.commons.math.stat.descriptive.moment.Variance;
029 import org.apache.commons.math.stat.descriptive.rank.Max;
030 import org.apache.commons.math.stat.descriptive.rank.Min;
031 import org.apache.commons.math.stat.descriptive.rank.Percentile;
032 import org.apache.commons.math.stat.descriptive.summary.Sum;
033 import org.apache.commons.math.stat.descriptive.summary.SumOfSquares;
034 import org.apache.commons.math.util.ResizableDoubleArray;
035
036
037 /**
038 * Maintains a dataset of values of a single variable and computes descriptive
039 * statistics based on stored data. The {@link #getWindowSize() windowSize}
040 * property sets a limit on the number of values that can be stored in the
041 * dataset. The default value, INFINITE_WINDOW, puts no limit on the size of
042 * the dataset. This value should be used with caution, as the backing store
043 * will grow without bound in this case. For very large datasets,
044 * {@link SummaryStatistics}, which does not store the dataset, should be used
045 * instead of this class. If <code>windowSize</code> is not INFINITE_WINDOW and
046 * more values are added than can be stored in the dataset, new values are
047 * added in a "rolling" manner, with new values replacing the "oldest" values
048 * in the dataset.
049 *
050 * <p>Note: this class is not threadsafe. Use
051 * {@link SynchronizedDescriptiveStatistics} if concurrent access from multiple
052 * threads is required.</p>
053 *
054 * @version $Revision: 885278 $ $Date: 2009-11-29 16:47:51 -0500 (Sun, 29 Nov 2009) $
055 */
056 public class DescriptiveStatistics implements StatisticalSummary, Serializable {
057
058 /**
059 * Represents an infinite window size. When the {@link #getWindowSize()}
060 * returns this value, there is no limit to the number of data values
061 * that can be stored in the dataset.
062 */
063 public static final int INFINITE_WINDOW = -1;
064
065 /** Serialization UID */
066 private static final long serialVersionUID = 4133067267405273064L;
067
068 /** Name of the setQuantile method. */
069 private static final String SET_QUANTILE_METHOD_NAME = "setQuantile";
070
071 /** Message for unsupported setQuantile. */
072 private static final String UNSUPPORTED_METHOD_MESSAGE =
073 "percentile implementation {0} does not support {1}";
074
075 /** Message for illegal accesson setquantile. */
076 private static final String ILLEGAL_ACCESS_MESSAGE =
077 "cannot access {0} method in percentile implementation {1}";
078
079 /** hold the window size **/
080 protected int windowSize = INFINITE_WINDOW;
081
082 /**
083 * Stored data values
084 */
085 protected ResizableDoubleArray eDA = new ResizableDoubleArray();
086
087 /** Mean statistic implementation - can be reset by setter. */
088 private UnivariateStatistic meanImpl = new Mean();
089
090 /** Geometric mean statistic implementation - can be reset by setter. */
091 private UnivariateStatistic geometricMeanImpl = new GeometricMean();
092
093 /** Kurtosis statistic implementation - can be reset by setter. */
094 private UnivariateStatistic kurtosisImpl = new Kurtosis();
095
096 /** Maximum statistic implementation - can be reset by setter. */
097 private UnivariateStatistic maxImpl = new Max();
098
099 /** Minimum statistic implementation - can be reset by setter. */
100 private UnivariateStatistic minImpl = new Min();
101
102 /** Percentile statistic implementation - can be reset by setter. */
103 private UnivariateStatistic percentileImpl = new Percentile();
104
105 /** Skewness statistic implementation - can be reset by setter. */
106 private UnivariateStatistic skewnessImpl = new Skewness();
107
108 /** Variance statistic implementation - can be reset by setter. */
109 private UnivariateStatistic varianceImpl = new Variance();
110
111 /** Sum of squares statistic implementation - can be reset by setter. */
112 private UnivariateStatistic sumsqImpl = new SumOfSquares();
113
114 /** Sum statistic implementation - can be reset by setter. */
115 private UnivariateStatistic sumImpl = new Sum();
116
117 /**
118 * Construct a DescriptiveStatistics instance with an infinite window
119 */
120 public DescriptiveStatistics() {
121 }
122
123 /**
124 * Construct a DescriptiveStatistics instance with the specified window
125 *
126 * @param window the window size.
127 */
128 public DescriptiveStatistics(int window) {
129 setWindowSize(window);
130 }
131
132 /**
133 * Copy constructor. Construct a new DescriptiveStatistics instance that
134 * is a copy of original.
135 *
136 * @param original DescriptiveStatistics instance to copy
137 */
138 public DescriptiveStatistics(DescriptiveStatistics original) {
139 copy(original, this);
140 }
141
142 /**
143 * Adds the value to the dataset. If the dataset is at the maximum size
144 * (i.e., the number of stored elements equals the currently configured
145 * windowSize), the first (oldest) element in the dataset is discarded
146 * to make room for the new value.
147 *
148 * @param v the value to be added
149 */
150 public void addValue(double v) {
151 if (windowSize != INFINITE_WINDOW) {
152 if (getN() == windowSize) {
153 eDA.addElementRolling(v);
154 } else if (getN() < windowSize) {
155 eDA.addElement(v);
156 }
157 } else {
158 eDA.addElement(v);
159 }
160 }
161
162 /**
163 * Removes the most recent value from the dataset.
164 */
165 public void removeMostRecentValue() {
166 eDA.discardMostRecentElements(1);
167 }
168
169 /**
170 * Replaces the most recently stored value with the given value.
171 * There must be at least one element stored to call this method.
172 *
173 * @param v the value to replace the most recent stored value
174 * @return replaced value
175 */
176 public double replaceMostRecentValue(double v) {
177 return eDA.substituteMostRecentElement(v);
178 }
179
180 /**
181 * Returns the <a href="http://www.xycoon.com/arithmetic_mean.htm">
182 * arithmetic mean </a> of the available values
183 * @return The mean or Double.NaN if no values have been added.
184 */
185 public double getMean() {
186 return apply(meanImpl);
187 }
188
189 /**
190 * Returns the <a href="http://www.xycoon.com/geometric_mean.htm">
191 * geometric mean </a> of the available values
192 * @return The geometricMean, Double.NaN if no values have been added,
193 * or if the product of the available values is less than or equal to 0.
194 */
195 public double getGeometricMean() {
196 return apply(geometricMeanImpl);
197 }
198
199 /**
200 * Returns the variance of the available values.
201 * @return The variance, Double.NaN if no values have been added
202 * or 0.0 for a single value set.
203 */
204 public double getVariance() {
205 return apply(varianceImpl);
206 }
207
208 /**
209 * Returns the standard deviation of the available values.
210 * @return The standard deviation, Double.NaN if no values have been added
211 * or 0.0 for a single value set.
212 */
213 public double getStandardDeviation() {
214 double stdDev = Double.NaN;
215 if (getN() > 0) {
216 if (getN() > 1) {
217 stdDev = Math.sqrt(getVariance());
218 } else {
219 stdDev = 0.0;
220 }
221 }
222 return stdDev;
223 }
224
225 /**
226 * Returns the skewness of the available values. Skewness is a
227 * measure of the asymmetry of a given distribution.
228 * @return The skewness, Double.NaN if no values have been added
229 * or 0.0 for a value set <=2.
230 */
231 public double getSkewness() {
232 return apply(skewnessImpl);
233 }
234
235 /**
236 * Returns the Kurtosis of the available values. Kurtosis is a
237 * measure of the "peakedness" of a distribution
238 * @return The kurtosis, Double.NaN if no values have been added, or 0.0
239 * for a value set <=3.
240 */
241 public double getKurtosis() {
242 return apply(kurtosisImpl);
243 }
244
245 /**
246 * Returns the maximum of the available values
247 * @return The max or Double.NaN if no values have been added.
248 */
249 public double getMax() {
250 return apply(maxImpl);
251 }
252
253 /**
254 * Returns the minimum of the available values
255 * @return The min or Double.NaN if no values have been added.
256 */
257 public double getMin() {
258 return apply(minImpl);
259 }
260
261 /**
262 * Returns the number of available values
263 * @return The number of available values
264 */
265 public long getN() {
266 return eDA.getNumElements();
267 }
268
269 /**
270 * Returns the sum of the values that have been added to Univariate.
271 * @return The sum or Double.NaN if no values have been added
272 */
273 public double getSum() {
274 return apply(sumImpl);
275 }
276
277 /**
278 * Returns the sum of the squares of the available values.
279 * @return The sum of the squares or Double.NaN if no
280 * values have been added.
281 */
282 public double getSumsq() {
283 return apply(sumsqImpl);
284 }
285
286 /**
287 * Resets all statistics and storage
288 */
289 public void clear() {
290 eDA.clear();
291 }
292
293
294 /**
295 * Returns the maximum number of values that can be stored in the
296 * dataset, or INFINITE_WINDOW (-1) if there is no limit.
297 *
298 * @return The current window size or -1 if its Infinite.
299 */
300 public int getWindowSize() {
301 return windowSize;
302 }
303
304 /**
305 * WindowSize controls the number of values which contribute
306 * to the reported statistics. For example, if
307 * windowSize is set to 3 and the values {1,2,3,4,5}
308 * have been added <strong> in that order</strong>
309 * then the <i>available values</i> are {3,4,5} and all
310 * reported statistics will be based on these values
311 * @param windowSize sets the size of the window.
312 */
313 public void setWindowSize(int windowSize) {
314 if (windowSize < 1) {
315 if (windowSize != INFINITE_WINDOW) {
316 throw MathRuntimeException.createIllegalArgumentException(
317 "window size must be positive ({0})", windowSize);
318 }
319 }
320
321 this.windowSize = windowSize;
322
323 // We need to check to see if we need to discard elements
324 // from the front of the array. If the windowSize is less than
325 // the current number of elements.
326 if (windowSize != INFINITE_WINDOW && windowSize < eDA.getNumElements()) {
327 eDA.discardFrontElements(eDA.getNumElements() - windowSize);
328 }
329 }
330
331 /**
332 * Returns the current set of values in an array of double primitives.
333 * The order of addition is preserved. The returned array is a fresh
334 * copy of the underlying data -- i.e., it is not a reference to the
335 * stored data.
336 *
337 * @return returns the current set of numbers in the order in which they
338 * were added to this set
339 */
340 public double[] getValues() {
341 return eDA.getElements();
342 }
343
344 /**
345 * Returns the current set of values in an array of double primitives,
346 * sorted in ascending order. The returned array is a fresh
347 * copy of the underlying data -- i.e., it is not a reference to the
348 * stored data.
349 * @return returns the current set of
350 * numbers sorted in ascending order
351 */
352 public double[] getSortedValues() {
353 double[] sort = getValues();
354 Arrays.sort(sort);
355 return sort;
356 }
357
358 /**
359 * Returns the element at the specified index
360 * @param index The Index of the element
361 * @return return the element at the specified index
362 */
363 public double getElement(int index) {
364 return eDA.getElement(index);
365 }
366
367 /**
368 * Returns an estimate for the pth percentile of the stored values.
369 * <p>
370 * The implementation provided here follows the first estimation procedure presented
371 * <a href="http://www.itl.nist.gov/div898/handbook/prc/section2/prc252.htm">here.</a>
372 * </p><p>
373 * <strong>Preconditions</strong>:<ul>
374 * <li><code>0 < p ≤ 100</code> (otherwise an
375 * <code>IllegalArgumentException</code> is thrown)</li>
376 * <li>at least one value must be stored (returns <code>Double.NaN
377 * </code> otherwise)</li>
378 * </ul></p>
379 *
380 * @param p the requested percentile (scaled from 0 - 100)
381 * @return An estimate for the pth percentile of the stored data
382 * @throws IllegalStateException if percentile implementation has been
383 * overridden and the supplied implementation does not support setQuantile
384 * values
385 */
386 public double getPercentile(double p) {
387 if (percentileImpl instanceof Percentile) {
388 ((Percentile) percentileImpl).setQuantile(p);
389 } else {
390 try {
391 percentileImpl.getClass().getMethod(SET_QUANTILE_METHOD_NAME,
392 new Class[] {Double.TYPE}).invoke(percentileImpl,
393 new Object[] {Double.valueOf(p)});
394 } catch (NoSuchMethodException e1) { // Setter guard should prevent
395 throw MathRuntimeException.createIllegalArgumentException(
396 UNSUPPORTED_METHOD_MESSAGE,
397 percentileImpl.getClass().getName(), SET_QUANTILE_METHOD_NAME);
398 } catch (IllegalAccessException e2) {
399 throw MathRuntimeException.createIllegalArgumentException(
400 ILLEGAL_ACCESS_MESSAGE,
401 SET_QUANTILE_METHOD_NAME, percentileImpl.getClass().getName());
402 } catch (InvocationTargetException e3) {
403 throw MathRuntimeException.createIllegalArgumentException(e3.getCause());
404 }
405 }
406 return apply(percentileImpl);
407 }
408
409 /**
410 * Generates a text report displaying univariate statistics from values
411 * that have been added. Each statistic is displayed on a separate
412 * line.
413 *
414 * @return String with line feeds displaying statistics
415 */
416 @Override
417 public String toString() {
418 StringBuffer outBuffer = new StringBuffer();
419 String endl = "\n";
420 outBuffer.append("DescriptiveStatistics:").append(endl);
421 outBuffer.append("n: ").append(getN()).append(endl);
422 outBuffer.append("min: ").append(getMin()).append(endl);
423 outBuffer.append("max: ").append(getMax()).append(endl);
424 outBuffer.append("mean: ").append(getMean()).append(endl);
425 outBuffer.append("std dev: ").append(getStandardDeviation())
426 .append(endl);
427 outBuffer.append("median: ").append(getPercentile(50)).append(endl);
428 outBuffer.append("skewness: ").append(getSkewness()).append(endl);
429 outBuffer.append("kurtosis: ").append(getKurtosis()).append(endl);
430 return outBuffer.toString();
431 }
432
433 /**
434 * Apply the given statistic to the data associated with this set of statistics.
435 * @param stat the statistic to apply
436 * @return the computed value of the statistic.
437 */
438 public double apply(UnivariateStatistic stat) {
439 return stat.evaluate(eDA.getInternalValues(), eDA.start(), eDA.getNumElements());
440 }
441
442 // Implementation getters and setter
443
444 /**
445 * Returns the currently configured mean implementation.
446 *
447 * @return the UnivariateStatistic implementing the mean
448 * @since 1.2
449 */
450 public synchronized UnivariateStatistic getMeanImpl() {
451 return meanImpl;
452 }
453
454 /**
455 * <p>Sets the implementation for the mean.</p>
456 *
457 * @param meanImpl the UnivariateStatistic instance to use
458 * for computing the mean
459 * @since 1.2
460 */
461 public synchronized void setMeanImpl(UnivariateStatistic meanImpl) {
462 this.meanImpl = meanImpl;
463 }
464
465 /**
466 * Returns the currently configured geometric mean implementation.
467 *
468 * @return the UnivariateStatistic implementing the geometric mean
469 * @since 1.2
470 */
471 public synchronized UnivariateStatistic getGeometricMeanImpl() {
472 return geometricMeanImpl;
473 }
474
475 /**
476 * <p>Sets the implementation for the gemoetric mean.</p>
477 *
478 * @param geometricMeanImpl the UnivariateStatistic instance to use
479 * for computing the geometric mean
480 * @since 1.2
481 */
482 public synchronized void setGeometricMeanImpl(
483 UnivariateStatistic geometricMeanImpl) {
484 this.geometricMeanImpl = geometricMeanImpl;
485 }
486
487 /**
488 * Returns the currently configured kurtosis implementation.
489 *
490 * @return the UnivariateStatistic implementing the kurtosis
491 * @since 1.2
492 */
493 public synchronized UnivariateStatistic getKurtosisImpl() {
494 return kurtosisImpl;
495 }
496
497 /**
498 * <p>Sets the implementation for the kurtosis.</p>
499 *
500 * @param kurtosisImpl the UnivariateStatistic instance to use
501 * for computing the kurtosis
502 * @since 1.2
503 */
504 public synchronized void setKurtosisImpl(UnivariateStatistic kurtosisImpl) {
505 this.kurtosisImpl = kurtosisImpl;
506 }
507
508 /**
509 * Returns the currently configured maximum implementation.
510 *
511 * @return the UnivariateStatistic implementing the maximum
512 * @since 1.2
513 */
514 public synchronized UnivariateStatistic getMaxImpl() {
515 return maxImpl;
516 }
517
518 /**
519 * <p>Sets the implementation for the maximum.</p>
520 *
521 * @param maxImpl the UnivariateStatistic instance to use
522 * for computing the maximum
523 * @since 1.2
524 */
525 public synchronized void setMaxImpl(UnivariateStatistic maxImpl) {
526 this.maxImpl = maxImpl;
527 }
528
529 /**
530 * Returns the currently configured minimum implementation.
531 *
532 * @return the UnivariateStatistic implementing the minimum
533 * @since 1.2
534 */
535 public synchronized UnivariateStatistic getMinImpl() {
536 return minImpl;
537 }
538
539 /**
540 * <p>Sets the implementation for the minimum.</p>
541 *
542 * @param minImpl the UnivariateStatistic instance to use
543 * for computing the minimum
544 * @since 1.2
545 */
546 public synchronized void setMinImpl(UnivariateStatistic minImpl) {
547 this.minImpl = minImpl;
548 }
549
550 /**
551 * Returns the currently configured percentile implementation.
552 *
553 * @return the UnivariateStatistic implementing the percentile
554 * @since 1.2
555 */
556 public synchronized UnivariateStatistic getPercentileImpl() {
557 return percentileImpl;
558 }
559
560 /**
561 * Sets the implementation to be used by {@link #getPercentile(double)}.
562 * The supplied <code>UnivariateStatistic</code> must provide a
563 * <code>setQuantile(double)</code> method; otherwise
564 * <code>IllegalArgumentException</code> is thrown.
565 *
566 * @param percentileImpl the percentileImpl to set
567 * @throws IllegalArgumentException if the supplied implementation does not
568 * provide a <code>setQuantile</code> method
569 * @since 1.2
570 */
571 public synchronized void setPercentileImpl(
572 UnivariateStatistic percentileImpl) {
573 try {
574 percentileImpl.getClass().getMethod(SET_QUANTILE_METHOD_NAME,
575 new Class[] {Double.TYPE}).invoke(percentileImpl,
576 new Object[] {Double.valueOf(50.0d)});
577 } catch (NoSuchMethodException e1) {
578 throw MathRuntimeException.createIllegalArgumentException(
579 "percentile implementation {0} does not support setQuantile",
580 percentileImpl.getClass().getName());
581 } catch (IllegalAccessException e2) {
582 throw MathRuntimeException.createIllegalArgumentException(
583 ILLEGAL_ACCESS_MESSAGE,
584 SET_QUANTILE_METHOD_NAME, percentileImpl.getClass().getName());
585 } catch (InvocationTargetException e3) {
586 throw MathRuntimeException.createIllegalArgumentException(e3.getCause());
587 }
588 this.percentileImpl = percentileImpl;
589 }
590
591 /**
592 * Returns the currently configured skewness implementation.
593 *
594 * @return the UnivariateStatistic implementing the skewness
595 * @since 1.2
596 */
597 public synchronized UnivariateStatistic getSkewnessImpl() {
598 return skewnessImpl;
599 }
600
601 /**
602 * <p>Sets the implementation for the skewness.</p>
603 *
604 * @param skewnessImpl the UnivariateStatistic instance to use
605 * for computing the skewness
606 * @since 1.2
607 */
608 public synchronized void setSkewnessImpl(
609 UnivariateStatistic skewnessImpl) {
610 this.skewnessImpl = skewnessImpl;
611 }
612
613 /**
614 * Returns the currently configured variance implementation.
615 *
616 * @return the UnivariateStatistic implementing the variance
617 * @since 1.2
618 */
619 public synchronized UnivariateStatistic getVarianceImpl() {
620 return varianceImpl;
621 }
622
623 /**
624 * <p>Sets the implementation for the variance.</p>
625 *
626 * @param varianceImpl the UnivariateStatistic instance to use
627 * for computing the variance
628 * @since 1.2
629 */
630 public synchronized void setVarianceImpl(
631 UnivariateStatistic varianceImpl) {
632 this.varianceImpl = varianceImpl;
633 }
634
635 /**
636 * Returns the currently configured sum of squares implementation.
637 *
638 * @return the UnivariateStatistic implementing the sum of squares
639 * @since 1.2
640 */
641 public synchronized UnivariateStatistic getSumsqImpl() {
642 return sumsqImpl;
643 }
644
645 /**
646 * <p>Sets the implementation for the sum of squares.</p>
647 *
648 * @param sumsqImpl the UnivariateStatistic instance to use
649 * for computing the sum of squares
650 * @since 1.2
651 */
652 public synchronized void setSumsqImpl(UnivariateStatistic sumsqImpl) {
653 this.sumsqImpl = sumsqImpl;
654 }
655
656 /**
657 * Returns the currently configured sum implementation.
658 *
659 * @return the UnivariateStatistic implementing the sum
660 * @since 1.2
661 */
662 public synchronized UnivariateStatistic getSumImpl() {
663 return sumImpl;
664 }
665
666 /**
667 * <p>Sets the implementation for the sum.</p>
668 *
669 * @param sumImpl the UnivariateStatistic instance to use
670 * for computing the sum
671 * @since 1.2
672 */
673 public synchronized void setSumImpl(UnivariateStatistic sumImpl) {
674 this.sumImpl = sumImpl;
675 }
676
677 /**
678 * Returns a copy of this DescriptiveStatistics instance with the same internal state.
679 *
680 * @return a copy of this
681 */
682 public DescriptiveStatistics copy() {
683 DescriptiveStatistics result = new DescriptiveStatistics();
684 copy(this, result);
685 return result;
686 }
687
688 /**
689 * Copies source to dest.
690 * <p>Neither source nor dest can be null.</p>
691 *
692 * @param source DescriptiveStatistics to copy
693 * @param dest DescriptiveStatistics to copy to
694 * @throws NullPointerException if either source or dest is null
695 */
696 public static void copy(DescriptiveStatistics source, DescriptiveStatistics dest) {
697 // Copy data and window size
698 dest.eDA = source.eDA.copy();
699 dest.windowSize = source.windowSize;
700
701 // Copy implementations
702 dest.maxImpl = source.maxImpl.copy();
703 dest.meanImpl = source.meanImpl.copy();
704 dest.minImpl = source.minImpl.copy();
705 dest.sumImpl = source.sumImpl.copy();
706 dest.varianceImpl = source.varianceImpl.copy();
707 dest.sumsqImpl = source.sumsqImpl.copy();
708 dest.geometricMeanImpl = source.geometricMeanImpl.copy();
709 dest.kurtosisImpl = source.kurtosisImpl;
710 dest.skewnessImpl = source.skewnessImpl;
711 dest.percentileImpl = source.percentileImpl;
712 }
713 }