001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017 package org.apache.commons.math.stat.descriptive;
018
019 import java.io.Serializable;
020
021 import org.apache.commons.math.MathRuntimeException;
022 import org.apache.commons.math.stat.descriptive.moment.GeometricMean;
023 import org.apache.commons.math.stat.descriptive.moment.Mean;
024 import org.apache.commons.math.stat.descriptive.moment.SecondMoment;
025 import org.apache.commons.math.stat.descriptive.moment.Variance;
026 import org.apache.commons.math.stat.descriptive.rank.Max;
027 import org.apache.commons.math.stat.descriptive.rank.Min;
028 import org.apache.commons.math.stat.descriptive.summary.Sum;
029 import org.apache.commons.math.stat.descriptive.summary.SumOfLogs;
030 import org.apache.commons.math.stat.descriptive.summary.SumOfSquares;
031 import org.apache.commons.math.util.MathUtils;
032
033 /**
034 * <p>
035 * Computes summary statistics for a stream of data values added using the
036 * {@link #addValue(double) addValue} method. The data values are not stored in
037 * memory, so this class can be used to compute statistics for very large data
038 * streams.
039 * </p>
040 * <p>
041 * The {@link StorelessUnivariateStatistic} instances used to maintain summary
042 * state and compute statistics are configurable via setters. For example, the
043 * default implementation for the variance can be overridden by calling
044 * {@link #setVarianceImpl(StorelessUnivariateStatistic)}. Actual parameters to
045 * these methods must implement the {@link StorelessUnivariateStatistic}
046 * interface and configuration must be completed before <code>addValue</code>
047 * is called. No configuration is necessary to use the default, commons-math
048 * provided implementations.
049 * </p>
050 * <p>
051 * Note: This class is not thread-safe. Use
052 * {@link SynchronizedSummaryStatistics} if concurrent access from multiple
053 * threads is required.
054 * </p>
055 * @version $Revision: 811833 $ $Date: 2009-09-06 12:27:50 -0400 (Sun, 06 Sep 2009) $
056 */
057 public class SummaryStatistics implements StatisticalSummary, Serializable {
058
059 /** Serialization UID */
060 private static final long serialVersionUID = -2021321786743555871L;
061
062 /** count of values that have been added */
063 protected long n = 0;
064
065 /** SecondMoment is used to compute the mean and variance */
066 protected SecondMoment secondMoment = new SecondMoment();
067
068 /** sum of values that have been added */
069 protected Sum sum = new Sum();
070
071 /** sum of the square of each value that has been added */
072 protected SumOfSquares sumsq = new SumOfSquares();
073
074 /** min of values that have been added */
075 protected Min min = new Min();
076
077 /** max of values that have been added */
078 protected Max max = new Max();
079
080 /** sumLog of values that have been added */
081 protected SumOfLogs sumLog = new SumOfLogs();
082
083 /** geoMean of values that have been added */
084 protected GeometricMean geoMean = new GeometricMean(sumLog);
085
086 /** mean of values that have been added */
087 protected Mean mean = new Mean();
088
089 /** variance of values that have been added */
090 protected Variance variance = new Variance();
091
092 /** Sum statistic implementation - can be reset by setter. */
093 private StorelessUnivariateStatistic sumImpl = sum;
094
095 /** Sum of squares statistic implementation - can be reset by setter. */
096 private StorelessUnivariateStatistic sumsqImpl = sumsq;
097
098 /** Minimum statistic implementation - can be reset by setter. */
099 private StorelessUnivariateStatistic minImpl = min;
100
101 /** Maximum statistic implementation - can be reset by setter. */
102 private StorelessUnivariateStatistic maxImpl = max;
103
104 /** Sum of log statistic implementation - can be reset by setter. */
105 private StorelessUnivariateStatistic sumLogImpl = sumLog;
106
107 /** Geometric mean statistic implementation - can be reset by setter. */
108 private StorelessUnivariateStatistic geoMeanImpl = geoMean;
109
110 /** Mean statistic implementation - can be reset by setter. */
111 private StorelessUnivariateStatistic meanImpl = mean;
112
113 /** Variance statistic implementation - can be reset by setter. */
114 private StorelessUnivariateStatistic varianceImpl = variance;
115
116 /**
117 * Construct a SummaryStatistics instance
118 */
119 public SummaryStatistics() {
120 }
121
122 /**
123 * A copy constructor. Creates a deep-copy of the {@code original}.
124 *
125 * @param original the {@code SummaryStatistics} instance to copy
126 */
127 public SummaryStatistics(SummaryStatistics original) {
128 copy(original, this);
129 }
130
131 /**
132 * Return a {@link StatisticalSummaryValues} instance reporting current
133 * statistics.
134 * @return Current values of statistics
135 */
136 public StatisticalSummary getSummary() {
137 return new StatisticalSummaryValues(getMean(), getVariance(), getN(),
138 getMax(), getMin(), getSum());
139 }
140
141 /**
142 * Add a value to the data
143 * @param value the value to add
144 */
145 public void addValue(double value) {
146 sumImpl.increment(value);
147 sumsqImpl.increment(value);
148 minImpl.increment(value);
149 maxImpl.increment(value);
150 sumLogImpl.increment(value);
151 secondMoment.increment(value);
152 // If mean, variance or geomean have been overridden,
153 // need to increment these
154 if (!(meanImpl instanceof Mean)) {
155 meanImpl.increment(value);
156 }
157 if (!(varianceImpl instanceof Variance)) {
158 varianceImpl.increment(value);
159 }
160 if (!(geoMeanImpl instanceof GeometricMean)) {
161 geoMeanImpl.increment(value);
162 }
163 n++;
164 }
165
166 /**
167 * Returns the number of available values
168 * @return The number of available values
169 */
170 public long getN() {
171 return n;
172 }
173
174 /**
175 * Returns the sum of the values that have been added
176 * @return The sum or <code>Double.NaN</code> if no values have been added
177 */
178 public double getSum() {
179 return sumImpl.getResult();
180 }
181
182 /**
183 * Returns the sum of the squares of the values that have been added.
184 * <p>
185 * Double.NaN is returned if no values have been added.
186 * </p>
187 * @return The sum of squares
188 */
189 public double getSumsq() {
190 return sumsqImpl.getResult();
191 }
192
193 /**
194 * Returns the mean of the values that have been added.
195 * <p>
196 * Double.NaN is returned if no values have been added.
197 * </p>
198 * @return the mean
199 */
200 public double getMean() {
201 if (mean == meanImpl) {
202 return new Mean(secondMoment).getResult();
203 } else {
204 return meanImpl.getResult();
205 }
206 }
207
208 /**
209 * Returns the standard deviation of the values that have been added.
210 * <p>
211 * Double.NaN is returned if no values have been added.
212 * </p>
213 * @return the standard deviation
214 */
215 public double getStandardDeviation() {
216 double stdDev = Double.NaN;
217 if (getN() > 0) {
218 if (getN() > 1) {
219 stdDev = Math.sqrt(getVariance());
220 } else {
221 stdDev = 0.0;
222 }
223 }
224 return stdDev;
225 }
226
227 /**
228 * Returns the variance of the values that have been added.
229 * <p>
230 * Double.NaN is returned if no values have been added.
231 * </p>
232 * @return the variance
233 */
234 public double getVariance() {
235 if (varianceImpl == variance) {
236 return new Variance(secondMoment).getResult();
237 } else {
238 return varianceImpl.getResult();
239 }
240 }
241
242 /**
243 * Returns the maximum of the values that have been added.
244 * <p>
245 * Double.NaN is returned if no values have been added.
246 * </p>
247 * @return the maximum
248 */
249 public double getMax() {
250 return maxImpl.getResult();
251 }
252
253 /**
254 * Returns the minimum of the values that have been added.
255 * <p>
256 * Double.NaN is returned if no values have been added.
257 * </p>
258 * @return the minimum
259 */
260 public double getMin() {
261 return minImpl.getResult();
262 }
263
264 /**
265 * Returns the geometric mean of the values that have been added.
266 * <p>
267 * Double.NaN is returned if no values have been added.
268 * </p>
269 * @return the geometric mean
270 */
271 public double getGeometricMean() {
272 return geoMeanImpl.getResult();
273 }
274
275 /**
276 * Returns the sum of the logs of the values that have been added.
277 * <p>
278 * Double.NaN is returned if no values have been added.
279 * </p>
280 * @return the sum of logs
281 * @since 1.2
282 */
283 public double getSumOfLogs() {
284 return sumLogImpl.getResult();
285 }
286
287 /**
288 * Returns a statistic related to the Second Central Moment. Specifically,
289 * what is returned is the sum of squared deviations from the sample mean
290 * among the values that have been added.
291 * <p>
292 * Returns <code>Double.NaN</code> if no data values have been added and
293 * returns <code>0</code> if there is just one value in the data set.</p>
294 * <p>
295 * @return second central moment statistic
296 * @since 2.0
297 */
298 public double getSecondMoment() {
299 return secondMoment.getResult();
300 }
301
302 /**
303 * Generates a text report displaying summary statistics from values that
304 * have been added.
305 * @return String with line feeds displaying statistics
306 * @since 1.2
307 */
308 @Override
309 public String toString() {
310 StringBuffer outBuffer = new StringBuffer();
311 String endl = "\n";
312 outBuffer.append("SummaryStatistics:").append(endl);
313 outBuffer.append("n: ").append(getN()).append(endl);
314 outBuffer.append("min: ").append(getMin()).append(endl);
315 outBuffer.append("max: ").append(getMax()).append(endl);
316 outBuffer.append("mean: ").append(getMean()).append(endl);
317 outBuffer.append("geometric mean: ").append(getGeometricMean())
318 .append(endl);
319 outBuffer.append("variance: ").append(getVariance()).append(endl);
320 outBuffer.append("sum of squares: ").append(getSumsq()).append(endl);
321 outBuffer.append("standard deviation: ").append(getStandardDeviation())
322 .append(endl);
323 outBuffer.append("sum of logs: ").append(getSumOfLogs()).append(endl);
324 return outBuffer.toString();
325 }
326
327 /**
328 * Resets all statistics and storage
329 */
330 public void clear() {
331 this.n = 0;
332 minImpl.clear();
333 maxImpl.clear();
334 sumImpl.clear();
335 sumLogImpl.clear();
336 sumsqImpl.clear();
337 geoMeanImpl.clear();
338 secondMoment.clear();
339 if (meanImpl != mean) {
340 meanImpl.clear();
341 }
342 if (varianceImpl != variance) {
343 varianceImpl.clear();
344 }
345 }
346
347 /**
348 * Returns true iff <code>object</code> is a
349 * <code>SummaryStatistics</code> instance and all statistics have the
350 * same values as this.
351 * @param object the object to test equality against.
352 * @return true if object equals this
353 */
354 @Override
355 public boolean equals(Object object) {
356 if (object == this) {
357 return true;
358 }
359 if (object instanceof SummaryStatistics == false) {
360 return false;
361 }
362 SummaryStatistics stat = (SummaryStatistics)object;
363 return MathUtils.equals(stat.getGeometricMean(), getGeometricMean()) &&
364 MathUtils.equals(stat.getMax(), getMax()) &&
365 MathUtils.equals(stat.getMean(), getMean()) &&
366 MathUtils.equals(stat.getMin(), getMin()) &&
367 MathUtils.equals(stat.getN(), getN()) &&
368 MathUtils.equals(stat.getSum(), getSum()) &&
369 MathUtils.equals(stat.getSumsq(), getSumsq()) &&
370 MathUtils.equals(stat.getVariance(), getVariance());
371 }
372
373 /**
374 * Returns hash code based on values of statistics
375 * @return hash code
376 */
377 @Override
378 public int hashCode() {
379 int result = 31 + MathUtils.hash(getGeometricMean());
380 result = result * 31 + MathUtils.hash(getGeometricMean());
381 result = result * 31 + MathUtils.hash(getMax());
382 result = result * 31 + MathUtils.hash(getMean());
383 result = result * 31 + MathUtils.hash(getMin());
384 result = result * 31 + MathUtils.hash(getN());
385 result = result * 31 + MathUtils.hash(getSum());
386 result = result * 31 + MathUtils.hash(getSumsq());
387 result = result * 31 + MathUtils.hash(getVariance());
388 return result;
389 }
390
391 // Getters and setters for statistics implementations
392 /**
393 * Returns the currently configured Sum implementation
394 * @return the StorelessUnivariateStatistic implementing the sum
395 * @since 1.2
396 */
397 public StorelessUnivariateStatistic getSumImpl() {
398 return sumImpl;
399 }
400
401 /**
402 * <p>
403 * Sets the implementation for the Sum.
404 * </p>
405 * <p>
406 * This method must be activated before any data has been added - i.e.,
407 * before {@link #addValue(double) addValue} has been used to add data;
408 * otherwise an IllegalStateException will be thrown.
409 * </p>
410 * @param sumImpl the StorelessUnivariateStatistic instance to use for
411 * computing the Sum
412 * @throws IllegalStateException if data has already been added (i.e if n >
413 * 0)
414 * @since 1.2
415 */
416 public void setSumImpl(StorelessUnivariateStatistic sumImpl) {
417 checkEmpty();
418 this.sumImpl = sumImpl;
419 }
420
421 /**
422 * Returns the currently configured sum of squares implementation
423 * @return the StorelessUnivariateStatistic implementing the sum of squares
424 * @since 1.2
425 */
426 public StorelessUnivariateStatistic getSumsqImpl() {
427 return sumsqImpl;
428 }
429
430 /**
431 * <p>
432 * Sets the implementation for the sum of squares.
433 * </p>
434 * <p>
435 * This method must be activated before any data has been added - i.e.,
436 * before {@link #addValue(double) addValue} has been used to add data;
437 * otherwise an IllegalStateException will be thrown.
438 * </p>
439 * @param sumsqImpl the StorelessUnivariateStatistic instance to use for
440 * computing the sum of squares
441 * @throws IllegalStateException if data has already been added (i.e if n >
442 * 0)
443 * @since 1.2
444 */
445 public void setSumsqImpl(StorelessUnivariateStatistic sumsqImpl) {
446 checkEmpty();
447 this.sumsqImpl = sumsqImpl;
448 }
449
450 /**
451 * Returns the currently configured minimum implementation
452 * @return the StorelessUnivariateStatistic implementing the minimum
453 * @since 1.2
454 */
455 public StorelessUnivariateStatistic getMinImpl() {
456 return minImpl;
457 }
458
459 /**
460 * <p>
461 * Sets the implementation for the minimum.
462 * </p>
463 * <p>
464 * This method must be activated before any data has been added - i.e.,
465 * before {@link #addValue(double) addValue} has been used to add data;
466 * otherwise an IllegalStateException will be thrown.
467 * </p>
468 * @param minImpl the StorelessUnivariateStatistic instance to use for
469 * computing the minimum
470 * @throws IllegalStateException if data has already been added (i.e if n >
471 * 0)
472 * @since 1.2
473 */
474 public void setMinImpl(StorelessUnivariateStatistic minImpl) {
475 checkEmpty();
476 this.minImpl = minImpl;
477 }
478
479 /**
480 * Returns the currently configured maximum implementation
481 * @return the StorelessUnivariateStatistic implementing the maximum
482 * @since 1.2
483 */
484 public StorelessUnivariateStatistic getMaxImpl() {
485 return maxImpl;
486 }
487
488 /**
489 * <p>
490 * Sets the implementation for the maximum.
491 * </p>
492 * <p>
493 * This method must be activated before any data has been added - i.e.,
494 * before {@link #addValue(double) addValue} has been used to add data;
495 * otherwise an IllegalStateException will be thrown.
496 * </p>
497 * @param maxImpl the StorelessUnivariateStatistic instance to use for
498 * computing the maximum
499 * @throws IllegalStateException if data has already been added (i.e if n >
500 * 0)
501 * @since 1.2
502 */
503 public void setMaxImpl(StorelessUnivariateStatistic maxImpl) {
504 checkEmpty();
505 this.maxImpl = maxImpl;
506 }
507
508 /**
509 * Returns the currently configured sum of logs implementation
510 * @return the StorelessUnivariateStatistic implementing the log sum
511 * @since 1.2
512 */
513 public StorelessUnivariateStatistic getSumLogImpl() {
514 return sumLogImpl;
515 }
516
517 /**
518 * <p>
519 * Sets the implementation for the sum of logs.
520 * </p>
521 * <p>
522 * This method must be activated before any data has been added - i.e.,
523 * before {@link #addValue(double) addValue} has been used to add data;
524 * otherwise an IllegalStateException will be thrown.
525 * </p>
526 * @param sumLogImpl the StorelessUnivariateStatistic instance to use for
527 * computing the log sum
528 * @throws IllegalStateException if data has already been added (i.e if n >
529 * 0)
530 * @since 1.2
531 */
532 public void setSumLogImpl(StorelessUnivariateStatistic sumLogImpl) {
533 checkEmpty();
534 this.sumLogImpl = sumLogImpl;
535 geoMean.setSumLogImpl(sumLogImpl);
536 }
537
538 /**
539 * Returns the currently configured geometric mean implementation
540 * @return the StorelessUnivariateStatistic implementing the geometric mean
541 * @since 1.2
542 */
543 public StorelessUnivariateStatistic getGeoMeanImpl() {
544 return geoMeanImpl;
545 }
546
547 /**
548 * <p>
549 * Sets the implementation for the geometric mean.
550 * </p>
551 * <p>
552 * This method must be activated before any data has been added - i.e.,
553 * before {@link #addValue(double) addValue} has been used to add data;
554 * otherwise an IllegalStateException will be thrown.
555 * </p>
556 * @param geoMeanImpl the StorelessUnivariateStatistic instance to use for
557 * computing the geometric mean
558 * @throws IllegalStateException if data has already been added (i.e if n >
559 * 0)
560 * @since 1.2
561 */
562 public void setGeoMeanImpl(StorelessUnivariateStatistic geoMeanImpl) {
563 checkEmpty();
564 this.geoMeanImpl = geoMeanImpl;
565 }
566
567 /**
568 * Returns the currently configured mean implementation
569 * @return the StorelessUnivariateStatistic implementing the mean
570 * @since 1.2
571 */
572 public StorelessUnivariateStatistic getMeanImpl() {
573 return meanImpl;
574 }
575
576 /**
577 * <p>
578 * Sets the implementation for the mean.
579 * </p>
580 * <p>
581 * This method must be activated before any data has been added - i.e.,
582 * before {@link #addValue(double) addValue} has been used to add data;
583 * otherwise an IllegalStateException will be thrown.
584 * </p>
585 * @param meanImpl the StorelessUnivariateStatistic instance to use for
586 * computing the mean
587 * @throws IllegalStateException if data has already been added (i.e if n >
588 * 0)
589 * @since 1.2
590 */
591 public void setMeanImpl(StorelessUnivariateStatistic meanImpl) {
592 checkEmpty();
593 this.meanImpl = meanImpl;
594 }
595
596 /**
597 * Returns the currently configured variance implementation
598 * @return the StorelessUnivariateStatistic implementing the variance
599 * @since 1.2
600 */
601 public StorelessUnivariateStatistic getVarianceImpl() {
602 return varianceImpl;
603 }
604
605 /**
606 * <p>
607 * Sets the implementation for the variance.
608 * </p>
609 * <p>
610 * This method must be activated before any data has been added - i.e.,
611 * before {@link #addValue(double) addValue} has been used to add data;
612 * otherwise an IllegalStateException will be thrown.
613 * </p>
614 * @param varianceImpl the StorelessUnivariateStatistic instance to use for
615 * computing the variance
616 * @throws IllegalStateException if data has already been added (i.e if n >
617 * 0)
618 * @since 1.2
619 */
620 public void setVarianceImpl(StorelessUnivariateStatistic varianceImpl) {
621 checkEmpty();
622 this.varianceImpl = varianceImpl;
623 }
624
625 /**
626 * Throws IllegalStateException if n > 0.
627 */
628 private void checkEmpty() {
629 if (n > 0) {
630 throw MathRuntimeException.createIllegalStateException(
631 "{0} values have been added before statistic is configured",
632 n);
633 }
634 }
635
636 /**
637 * Returns a copy of this SummaryStatistics instance with the same internal state.
638 *
639 * @return a copy of this
640 */
641 public SummaryStatistics copy() {
642 SummaryStatistics result = new SummaryStatistics();
643 copy(this, result);
644 return result;
645 }
646
647 /**
648 * Copies source to dest.
649 * <p>Neither source nor dest can be null.</p>
650 *
651 * @param source SummaryStatistics to copy
652 * @param dest SummaryStatistics to copy to
653 * @throws NullPointerException if either source or dest is null
654 */
655 public static void copy(SummaryStatistics source, SummaryStatistics dest) {
656 dest.maxImpl = source.maxImpl.copy();
657 dest.meanImpl = source.meanImpl.copy();
658 dest.minImpl = source.minImpl.copy();
659 dest.sumImpl = source.sumImpl.copy();
660 dest.varianceImpl = source.varianceImpl.copy();
661 dest.sumLogImpl = source.sumLogImpl.copy();
662 dest.sumsqImpl = source.sumsqImpl.copy();
663 if (source.getGeoMeanImpl() instanceof GeometricMean) {
664 // Keep geoMeanImpl, sumLogImpl in synch
665 dest.geoMeanImpl = new GeometricMean((SumOfLogs) dest.sumLogImpl);
666 } else {
667 dest.geoMeanImpl = source.geoMeanImpl.copy();
668 }
669 SecondMoment.copy(source.secondMoment, dest.secondMoment);
670 dest.n = source.n;
671
672 // Make sure that if stat == statImpl in source, same
673 // holds in dest; otherwise copy stat
674 if (source.geoMean == source.geoMeanImpl) {
675 dest.geoMean = (GeometricMean) dest.geoMeanImpl;
676 } else {
677 GeometricMean.copy(source.geoMean, dest.geoMean);
678 }
679 if (source.max == source.maxImpl) {
680 dest.max = (Max) dest.maxImpl;
681 } else {
682 Max.copy(source.max, dest.max);
683 }
684 if (source.mean == source.meanImpl) {
685 dest.mean = (Mean) dest.meanImpl;
686 } else {
687 Mean.copy(source.mean, dest.mean);
688 }
689 if (source.min == source.minImpl) {
690 dest.min = (Min) dest.minImpl;
691 } else {
692 Min.copy(source.min, dest.min);
693 }
694 if (source.sum == source.sumImpl) {
695 dest.sum = (Sum) dest.sumImpl;
696 } else {
697 Sum.copy(source.sum, dest.sum);
698 }
699 if (source.variance == source.varianceImpl) {
700 dest.variance = (Variance) dest.varianceImpl;
701 } else {
702 Variance.copy(source.variance, dest.variance);
703 }
704 if (source.sumLog == source.sumLogImpl) {
705 dest.sumLog = (SumOfLogs) dest.sumLogImpl;
706 } else {
707 SumOfLogs.copy(source.sumLog, dest.sumLog);
708 }
709 if (source.sumsq == source.sumsqImpl) {
710 dest.sumsq = (SumOfSquares) dest.sumsqImpl;
711 } else {
712 SumOfSquares.copy(source.sumsq, dest.sumsq);
713 }
714 }
715 }