001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017 package org.apache.commons.math.stat.descriptive.moment;
018
019 import java.io.Serializable;
020
021 import org.apache.commons.math.MathRuntimeException;
022 import org.apache.commons.math.stat.descriptive.WeightedEvaluation;
023 import org.apache.commons.math.stat.descriptive.AbstractStorelessUnivariateStatistic;
024
025 /**
026 * Computes the variance of the available values. By default, the unbiased
027 * "sample variance" definitional formula is used:
028 * <p>
029 * variance = sum((x_i - mean)^2) / (n - 1) </p>
030 * <p>
031 * where mean is the {@link Mean} and <code>n</code> is the number
032 * of sample observations.</p>
033 * <p>
034 * The definitional formula does not have good numerical properties, so
035 * this implementation does not compute the statistic using the definitional
036 * formula. <ul>
037 * <li> The <code>getResult</code> method computes the variance using
038 * updating formulas based on West's algorithm, as described in
039 * <a href="http://doi.acm.org/10.1145/359146.359152"> Chan, T. F. and
040 * J. G. Lewis 1979, <i>Communications of the ACM</i>,
041 * vol. 22 no. 9, pp. 526-531.</a></li>
042 * <li> The <code>evaluate</code> methods leverage the fact that they have the
043 * full array of values in memory to execute a two-pass algorithm.
044 * Specifically, these methods use the "corrected two-pass algorithm" from
045 * Chan, Golub, Levesque, <i>Algorithms for Computing the Sample Variance</i>,
046 * American Statistician, vol. 37, no. 3 (1983) pp. 242-247.</li></ul>
047 * Note that adding values using <code>increment</code> or
048 * <code>incrementAll</code> and then executing <code>getResult</code> will
049 * sometimes give a different, less accurate, result than executing
050 * <code>evaluate</code> with the full array of values. The former approach
051 * should only be used when the full array of values is not available.</p>
052 * <p>
053 * The "population variance" ( sum((x_i - mean)^2) / n ) can also
054 * be computed using this statistic. The <code>isBiasCorrected</code>
055 * property determines whether the "population" or "sample" value is
056 * returned by the <code>evaluate</code> and <code>getResult</code> methods.
057 * To compute population variances, set this property to <code>false.</code>
058 * </p>
059 * <p>
060 * <strong>Note that this implementation is not synchronized.</strong> If
061 * multiple threads access an instance of this class concurrently, and at least
062 * one of the threads invokes the <code>increment()</code> or
063 * <code>clear()</code> method, it must be synchronized externally.</p>
064 *
065 * @version $Revision: 908626 $ $Date: 2010-02-10 13:44:42 -0500 (Wed, 10 Feb 2010) $
066 */
067 public class Variance extends AbstractStorelessUnivariateStatistic implements Serializable, WeightedEvaluation {
068
069 /** Serializable version identifier */
070 private static final long serialVersionUID = -9111962718267217978L;
071
072 /** SecondMoment is used in incremental calculation of Variance*/
073 protected SecondMoment moment = null;
074
075 /**
076 * Boolean test to determine if this Variance should also increment
077 * the second moment, this evaluates to false when this Variance is
078 * constructed with an external SecondMoment as a parameter.
079 */
080 protected boolean incMoment = true;
081
082 /**
083 * Determines whether or not bias correction is applied when computing the
084 * value of the statisic. True means that bias is corrected. See
085 * {@link Variance} for details on the formula.
086 */
087 private boolean isBiasCorrected = true;
088
089 /**
090 * Constructs a Variance with default (true) <code>isBiasCorrected</code>
091 * property.
092 */
093 public Variance() {
094 moment = new SecondMoment();
095 }
096
097 /**
098 * Constructs a Variance based on an external second moment.
099 *
100 * @param m2 the SecondMoment (Third or Fourth moments work
101 * here as well.)
102 */
103 public Variance(final SecondMoment m2) {
104 incMoment = false;
105 this.moment = m2;
106 }
107
108 /**
109 * Constructs a Variance with the specified <code>isBiasCorrected</code>
110 * property
111 *
112 * @param isBiasCorrected setting for bias correction - true means
113 * bias will be corrected and is equivalent to using the argumentless
114 * constructor
115 */
116 public Variance(boolean isBiasCorrected) {
117 moment = new SecondMoment();
118 this.isBiasCorrected = isBiasCorrected;
119 }
120
121 /**
122 * Constructs a Variance with the specified <code>isBiasCorrected</code>
123 * property and the supplied external second moment.
124 *
125 * @param isBiasCorrected setting for bias correction - true means
126 * bias will be corrected
127 * @param m2 the SecondMoment (Third or Fourth moments work
128 * here as well.)
129 */
130 public Variance(boolean isBiasCorrected, SecondMoment m2) {
131 incMoment = false;
132 this.moment = m2;
133 this.isBiasCorrected = isBiasCorrected;
134 }
135
136 /**
137 * Copy constructor, creates a new {@code Variance} identical
138 * to the {@code original}
139 *
140 * @param original the {@code Variance} instance to copy
141 */
142 public Variance(Variance original) {
143 copy(original, this);
144 }
145
146 /**
147 * {@inheritDoc}
148 * <p>If all values are available, it is more accurate to use
149 * {@link #evaluate(double[])} rather than adding values one at a time
150 * using this method and then executing {@link #getResult}, since
151 * <code>evaluate</code> leverages the fact that is has the full
152 * list of values together to execute a two-pass algorithm.
153 * See {@link Variance}.</p>
154 */
155 @Override
156 public void increment(final double d) {
157 if (incMoment) {
158 moment.increment(d);
159 }
160 }
161
162 /**
163 * {@inheritDoc}
164 */
165 @Override
166 public double getResult() {
167 if (moment.n == 0) {
168 return Double.NaN;
169 } else if (moment.n == 1) {
170 return 0d;
171 } else {
172 if (isBiasCorrected) {
173 return moment.m2 / (moment.n - 1d);
174 } else {
175 return moment.m2 / (moment.n);
176 }
177 }
178 }
179
180 /**
181 * {@inheritDoc}
182 */
183 public long getN() {
184 return moment.getN();
185 }
186
187 /**
188 * {@inheritDoc}
189 */
190 @Override
191 public void clear() {
192 if (incMoment) {
193 moment.clear();
194 }
195 }
196
197 /**
198 * Returns the variance of the entries in the input array, or
199 * <code>Double.NaN</code> if the array is empty.
200 * <p>
201 * See {@link Variance} for details on the computing algorithm.</p>
202 * <p>
203 * Returns 0 for a single-value (i.e. length = 1) sample.</p>
204 * <p>
205 * Throws <code>IllegalArgumentException</code> if the array is null.</p>
206 * <p>
207 * Does not change the internal state of the statistic.</p>
208 *
209 * @param values the input array
210 * @return the variance of the values or Double.NaN if length = 0
211 * @throws IllegalArgumentException if the array is null
212 */
213 @Override
214 public double evaluate(final double[] values) {
215 if (values == null) {
216 throw MathRuntimeException.createIllegalArgumentException("input values array is null");
217 }
218 return evaluate(values, 0, values.length);
219 }
220
221 /**
222 * Returns the variance of the entries in the specified portion of
223 * the input array, or <code>Double.NaN</code> if the designated subarray
224 * is empty.
225 * <p>
226 * See {@link Variance} for details on the computing algorithm.</p>
227 * <p>
228 * Returns 0 for a single-value (i.e. length = 1) sample.</p>
229 * <p>
230 * Does not change the internal state of the statistic.</p>
231 * <p>
232 * Throws <code>IllegalArgumentException</code> if the array is null.</p>
233 *
234 * @param values the input array
235 * @param begin index of the first array element to include
236 * @param length the number of elements to include
237 * @return the variance of the values or Double.NaN if length = 0
238 * @throws IllegalArgumentException if the array is null or the array index
239 * parameters are not valid
240 */
241 @Override
242 public double evaluate(final double[] values, final int begin, final int length) {
243
244 double var = Double.NaN;
245
246 if (test(values, begin, length)) {
247 clear();
248 if (length == 1) {
249 var = 0.0;
250 } else if (length > 1) {
251 Mean mean = new Mean();
252 double m = mean.evaluate(values, begin, length);
253 var = evaluate(values, m, begin, length);
254 }
255 }
256 return var;
257 }
258
259 /**
260 * <p>Returns the weighted variance of the entries in the specified portion of
261 * the input array, or <code>Double.NaN</code> if the designated subarray
262 * is empty.</p>
263 * <p>
264 * Uses the formula <pre>
265 * Σ(weights[i]*(values[i] - weightedMean)<sup>2</sup>)/(Σ(weights[i]) - 1)
266 * </pre>
267 * where weightedMean is the weighted mean</p>
268 * <p>
269 * This formula will not return the same result as the unweighted variance when all
270 * weights are equal, unless all weights are equal to 1. The formula assumes that
271 * weights are to be treated as "expansion values," as will be the case if for example
272 * the weights represent frequency counts. To normalize weights so that the denominator
273 * in the variance computation equals the length of the input vector minus one, use <pre>
274 * <code>evaluate(values, MathUtils.normalizeArray(weights, values.length)); </code>
275 * </pre>
276 * <p>
277 * Returns 0 for a single-value (i.e. length = 1) sample.</p>
278 * <p>
279 * Throws <code>IllegalArgumentException</code> if any of the following are true:
280 * <ul><li>the values array is null</li>
281 * <li>the weights array is null</li>
282 * <li>the weights array does not have the same length as the values array</li>
283 * <li>the weights array contains one or more infinite values</li>
284 * <li>the weights array contains one or more NaN values</li>
285 * <li>the weights array contains negative values</li>
286 * <li>the start and length arguments do not determine a valid array</li>
287 * </ul></p>
288 * <p>
289 * Does not change the internal state of the statistic.</p>
290 * <p>
291 * Throws <code>IllegalArgumentException</code> if either array is null.</p>
292 *
293 * @param values the input array
294 * @param weights the weights array
295 * @param begin index of the first array element to include
296 * @param length the number of elements to include
297 * @return the weighted variance of the values or Double.NaN if length = 0
298 * @throws IllegalArgumentException if the parameters are not valid
299 * @since 2.1
300 */
301 public double evaluate(final double[] values, final double[] weights,
302 final int begin, final int length) {
303
304 double var = Double.NaN;
305
306 if (test(values, weights,begin, length)) {
307 clear();
308 if (length == 1) {
309 var = 0.0;
310 } else if (length > 1) {
311 Mean mean = new Mean();
312 double m = mean.evaluate(values, weights, begin, length);
313 var = evaluate(values, weights, m, begin, length);
314 }
315 }
316 return var;
317 }
318
319 /**
320 * <p>
321 * Returns the weighted variance of the entries in the the input array.</p>
322 * <p>
323 * Uses the formula <pre>
324 * Σ(weights[i]*(values[i] - weightedMean)<sup>2</sup>)/(Σ(weights[i]) - 1)
325 * </pre>
326 * where weightedMean is the weighted mean</p>
327 * <p>
328 * This formula will not return the same result as the unweighted variance when all
329 * weights are equal, unless all weights are equal to 1. The formula assumes that
330 * weights are to be treated as "expansion values," as will be the case if for example
331 * the weights represent frequency counts. To normalize weights so that the denominator
332 * in the variance computation equals the length of the input vector minus one, use <pre>
333 * <code>evaluate(values, MathUtils.normalizeArray(weights, values.length)); </code>
334 * </pre>
335 * <p>
336 * Returns 0 for a single-value (i.e. length = 1) sample.</p>
337 * <p>
338 * Throws <code>IllegalArgumentException</code> if any of the following are true:
339 * <ul><li>the values array is null</li>
340 * <li>the weights array is null</li>
341 * <li>the weights array does not have the same length as the values array</li>
342 * <li>the weights array contains one or more infinite values</li>
343 * <li>the weights array contains one or more NaN values</li>
344 * <li>the weights array contains negative values</li>
345 * </ul></p>
346 * <p>
347 * Does not change the internal state of the statistic.</p>
348 * <p>
349 * Throws <code>IllegalArgumentException</code> if either array is null.</p>
350 *
351 * @param values the input array
352 * @param weights the weights array
353 * @return the weighted variance of the values
354 * @throws IllegalArgumentException if the parameters are not valid
355 * @since 2.1
356 */
357 public double evaluate(final double[] values, final double[] weights) {
358 return evaluate(values, weights, 0, values.length);
359 }
360
361 /**
362 * Returns the variance of the entries in the specified portion of
363 * the input array, using the precomputed mean value. Returns
364 * <code>Double.NaN</code> if the designated subarray is empty.
365 * <p>
366 * See {@link Variance} for details on the computing algorithm.</p>
367 * <p>
368 * The formula used assumes that the supplied mean value is the arithmetic
369 * mean of the sample data, not a known population parameter. This method
370 * is supplied only to save computation when the mean has already been
371 * computed.</p>
372 * <p>
373 * Returns 0 for a single-value (i.e. length = 1) sample.</p>
374 * <p>
375 * Throws <code>IllegalArgumentException</code> if the array is null.</p>
376 * <p>
377 * Does not change the internal state of the statistic.</p>
378 *
379 * @param values the input array
380 * @param mean the precomputed mean value
381 * @param begin index of the first array element to include
382 * @param length the number of elements to include
383 * @return the variance of the values or Double.NaN if length = 0
384 * @throws IllegalArgumentException if the array is null or the array index
385 * parameters are not valid
386 */
387 public double evaluate(final double[] values, final double mean,
388 final int begin, final int length) {
389
390 double var = Double.NaN;
391
392 if (test(values, begin, length)) {
393 if (length == 1) {
394 var = 0.0;
395 } else if (length > 1) {
396 double accum = 0.0;
397 double dev = 0.0;
398 double accum2 = 0.0;
399 for (int i = begin; i < begin + length; i++) {
400 dev = values[i] - mean;
401 accum += dev * dev;
402 accum2 += dev;
403 }
404 double len = length;
405 if (isBiasCorrected) {
406 var = (accum - (accum2 * accum2 / len)) / (len - 1.0);
407 } else {
408 var = (accum - (accum2 * accum2 / len)) / len;
409 }
410 }
411 }
412 return var;
413 }
414
415 /**
416 * Returns the variance of the entries in the input array, using the
417 * precomputed mean value. Returns <code>Double.NaN</code> if the array
418 * is empty.
419 * <p>
420 * See {@link Variance} for details on the computing algorithm.</p>
421 * <p>
422 * If <code>isBiasCorrected</code> is <code>true</code> the formula used
423 * assumes that the supplied mean value is the arithmetic mean of the
424 * sample data, not a known population parameter. If the mean is a known
425 * population parameter, or if the "population" version of the variance is
426 * desired, set <code>isBiasCorrected</code> to <code>false</code> before
427 * invoking this method.</p>
428 * <p>
429 * Returns 0 for a single-value (i.e. length = 1) sample.</p>
430 * <p>
431 * Throws <code>IllegalArgumentException</code> if the array is null.</p>
432 * <p>
433 * Does not change the internal state of the statistic.</p>
434 *
435 * @param values the input array
436 * @param mean the precomputed mean value
437 * @return the variance of the values or Double.NaN if the array is empty
438 * @throws IllegalArgumentException if the array is null
439 */
440 public double evaluate(final double[] values, final double mean) {
441 return evaluate(values, mean, 0, values.length);
442 }
443
444 /**
445 * Returns the weighted variance of the entries in the specified portion of
446 * the input array, using the precomputed weighted mean value. Returns
447 * <code>Double.NaN</code> if the designated subarray is empty.
448 * <p>
449 * Uses the formula <pre>
450 * Σ(weights[i]*(values[i] - mean)<sup>2</sup>)/(Σ(weights[i]) - 1)
451 * </pre></p>
452 * <p>
453 * The formula used assumes that the supplied mean value is the weighted arithmetic
454 * mean of the sample data, not a known population parameter. This method
455 * is supplied only to save computation when the mean has already been
456 * computed.</p>
457 * <p>
458 * This formula will not return the same result as the unweighted variance when all
459 * weights are equal, unless all weights are equal to 1. The formula assumes that
460 * weights are to be treated as "expansion values," as will be the case if for example
461 * the weights represent frequency counts. To normalize weights so that the denominator
462 * in the variance computation equals the length of the input vector minus one, use <pre>
463 * <code>evaluate(values, MathUtils.normalizeArray(weights, values.length), mean); </code>
464 * </pre>
465 * <p>
466 * Returns 0 for a single-value (i.e. length = 1) sample.</p>
467 * <p>
468 * Throws <code>IllegalArgumentException</code> if any of the following are true:
469 * <ul><li>the values array is null</li>
470 * <li>the weights array is null</li>
471 * <li>the weights array does not have the same length as the values array</li>
472 * <li>the weights array contains one or more infinite values</li>
473 * <li>the weights array contains one or more NaN values</li>
474 * <li>the weights array contains negative values</li>
475 * <li>the start and length arguments do not determine a valid array</li>
476 * </ul></p>
477 * <p>
478 * Does not change the internal state of the statistic.</p>
479 *
480 * @param values the input array
481 * @param weights the weights array
482 * @param mean the precomputed weighted mean value
483 * @param begin index of the first array element to include
484 * @param length the number of elements to include
485 * @return the variance of the values or Double.NaN if length = 0
486 * @throws IllegalArgumentException if the parameters are not valid
487 * @since 2.1
488 */
489 public double evaluate(final double[] values, final double[] weights,
490 final double mean, final int begin, final int length) {
491
492 double var = Double.NaN;
493
494 if (test(values, weights, begin, length)) {
495 if (length == 1) {
496 var = 0.0;
497 } else if (length > 1) {
498 double accum = 0.0;
499 double dev = 0.0;
500 double accum2 = 0.0;
501 for (int i = begin; i < begin + length; i++) {
502 dev = values[i] - mean;
503 accum += weights[i] * (dev * dev);
504 accum2 += weights[i] * dev;
505 }
506
507 double sumWts = 0;
508 for (int i = 0; i < weights.length; i++) {
509 sumWts += weights[i];
510 }
511
512 if (isBiasCorrected) {
513 var = (accum - (accum2 * accum2 / sumWts)) / (sumWts - 1.0);
514 } else {
515 var = (accum - (accum2 * accum2 / sumWts)) / sumWts;
516 }
517 }
518 }
519 return var;
520 }
521
522 /**
523 * <p>Returns the weighted variance of the values in the input array, using
524 * the precomputed weighted mean value.</p>
525 * <p>
526 * Uses the formula <pre>
527 * Σ(weights[i]*(values[i] - mean)<sup>2</sup>)/(Σ(weights[i]) - 1)
528 * </pre></p>
529 * <p>
530 * The formula used assumes that the supplied mean value is the weighted arithmetic
531 * mean of the sample data, not a known population parameter. This method
532 * is supplied only to save computation when the mean has already been
533 * computed.</p>
534 * <p>
535 * This formula will not return the same result as the unweighted variance when all
536 * weights are equal, unless all weights are equal to 1. The formula assumes that
537 * weights are to be treated as "expansion values," as will be the case if for example
538 * the weights represent frequency counts. To normalize weights so that the denominator
539 * in the variance computation equals the length of the input vector minus one, use <pre>
540 * <code>evaluate(values, MathUtils.normalizeArray(weights, values.length), mean); </code>
541 * </pre>
542 * <p>
543 * Returns 0 for a single-value (i.e. length = 1) sample.</p>
544 * <p>
545 * Throws <code>IllegalArgumentException</code> if any of the following are true:
546 * <ul><li>the values array is null</li>
547 * <li>the weights array is null</li>
548 * <li>the weights array does not have the same length as the values array</li>
549 * <li>the weights array contains one or more infinite values</li>
550 * <li>the weights array contains one or more NaN values</li>
551 * <li>the weights array contains negative values</li>
552 * </ul></p>
553 * <p>
554 * Does not change the internal state of the statistic.</p>
555 *
556 * @param values the input array
557 * @param weights the weights array
558 * @param mean the precomputed weighted mean value
559 * @return the variance of the values or Double.NaN if length = 0
560 * @throws IllegalArgumentException if the parameters are not valid
561 * @since 2.1
562 */
563 public double evaluate(final double[] values, final double[] weights, final double mean) {
564 return evaluate(values, weights, mean, 0, values.length);
565 }
566
567 /**
568 * @return Returns the isBiasCorrected.
569 */
570 public boolean isBiasCorrected() {
571 return isBiasCorrected;
572 }
573
574 /**
575 * @param biasCorrected The isBiasCorrected to set.
576 */
577 public void setBiasCorrected(boolean biasCorrected) {
578 this.isBiasCorrected = biasCorrected;
579 }
580
581 /**
582 * {@inheritDoc}
583 */
584 @Override
585 public Variance copy() {
586 Variance result = new Variance();
587 copy(this, result);
588 return result;
589 }
590
591
592 /**
593 * Copies source to dest.
594 * <p>Neither source nor dest can be null.</p>
595 *
596 * @param source Variance to copy
597 * @param dest Variance to copy to
598 * @throws NullPointerException if either source or dest is null
599 */
600 public static void copy(Variance source, Variance dest) {
601 dest.moment = source.moment.copy();
602 dest.isBiasCorrected = source.isBiasCorrected;
603 dest.incMoment = source.incMoment;
604 }
605
606 }