001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017 package org.apache.commons.math.stat.inference;
018
019 import java.util.Collection;
020
021 import org.apache.commons.math.MathException;
022 import org.apache.commons.math.MathRuntimeException;
023 import org.apache.commons.math.distribution.FDistribution;
024 import org.apache.commons.math.distribution.FDistributionImpl;
025 import org.apache.commons.math.stat.descriptive.summary.Sum;
026 import org.apache.commons.math.stat.descriptive.summary.SumOfSquares;
027
028
029 /**
030 * Implements one-way ANOVA statistics defined in the {@link OneWayAnovaImpl}
031 * interface.
032 *
033 * <p>Uses the
034 * {@link org.apache.commons.math.distribution.FDistribution
035 * commons-math F Distribution implementation} to estimate exact p-values.</p>
036 *
037 * <p>This implementation is based on a description at
038 * http://faculty.vassar.edu/lowry/ch13pt1.html</p>
039 * <pre>
040 * Abbreviations: bg = between groups,
041 * wg = within groups,
042 * ss = sum squared deviations
043 * </pre>
044 *
045 * @since 1.2
046 * @version $Revision: 825917 $ $Date: 2009-10-16 10:47:27 -0400 (Fri, 16 Oct 2009) $
047 */
048 public class OneWayAnovaImpl implements OneWayAnova {
049
050 /**
051 * Default constructor.
052 */
053 public OneWayAnovaImpl() {
054 }
055
056 /**
057 * {@inheritDoc}<p>
058 * This implementation computes the F statistic using the definitional
059 * formula<pre>
060 * F = msbg/mswg</pre>
061 * where<pre>
062 * msbg = between group mean square
063 * mswg = within group mean square</pre>
064 * are as defined <a href="http://faculty.vassar.edu/lowry/ch13pt1.html">
065 * here</a></p>
066 */
067 public double anovaFValue(Collection<double[]> categoryData)
068 throws IllegalArgumentException, MathException {
069 AnovaStats a = anovaStats(categoryData);
070 return a.F;
071 }
072
073 /**
074 * {@inheritDoc}<p>
075 * This implementation uses the
076 * {@link org.apache.commons.math.distribution.FDistribution
077 * commons-math F Distribution implementation} to estimate the exact
078 * p-value, using the formula<pre>
079 * p = 1 - cumulativeProbability(F)</pre>
080 * where <code>F</code> is the F value and <code>cumulativeProbability</code>
081 * is the commons-math implementation of the F distribution.</p>
082 */
083 public double anovaPValue(Collection<double[]> categoryData)
084 throws IllegalArgumentException, MathException {
085 AnovaStats a = anovaStats(categoryData);
086 FDistribution fdist = new FDistributionImpl(a.dfbg, a.dfwg);
087 return 1.0 - fdist.cumulativeProbability(a.F);
088 }
089
090 /**
091 * {@inheritDoc}<p>
092 * This implementation uses the
093 * {@link org.apache.commons.math.distribution.FDistribution
094 * commons-math F Distribution implementation} to estimate the exact
095 * p-value, using the formula<pre>
096 * p = 1 - cumulativeProbability(F)</pre>
097 * where <code>F</code> is the F value and <code>cumulativeProbability</code>
098 * is the commons-math implementation of the F distribution.</p>
099 * <p>True is returned iff the estimated p-value is less than alpha.</p>
100 */
101 public boolean anovaTest(Collection<double[]> categoryData, double alpha)
102 throws IllegalArgumentException, MathException {
103 if ((alpha <= 0) || (alpha > 0.5)) {
104 throw MathRuntimeException.createIllegalArgumentException(
105 "out of bounds significance level {0}, must be between {1} and {2}",
106 alpha, 0, 0.5);
107 }
108 return anovaPValue(categoryData) < alpha;
109 }
110
111
112 /**
113 * This method actually does the calculations (except P-value).
114 *
115 * @param categoryData <code>Collection</code> of <code>double[]</code>
116 * arrays each containing data for one category
117 * @return computed AnovaStats
118 * @throws IllegalArgumentException if categoryData does not meet
119 * preconditions specified in the interface definition
120 * @throws MathException if an error occurs computing the Anova stats
121 */
122 private AnovaStats anovaStats(Collection<double[]> categoryData)
123 throws IllegalArgumentException, MathException {
124
125 // check if we have enough categories
126 if (categoryData.size() < 2) {
127 throw MathRuntimeException.createIllegalArgumentException(
128 "two or more categories required, got {0}",
129 categoryData.size());
130 }
131
132 // check if each category has enough data and all is double[]
133 for (double[] array : categoryData) {
134 if (array.length <= 1) {
135 throw MathRuntimeException.createIllegalArgumentException(
136 "two or more values required in each category, one has {0}",
137 array.length);
138 }
139 }
140
141 int dfwg = 0;
142 double sswg = 0;
143 Sum totsum = new Sum();
144 SumOfSquares totsumsq = new SumOfSquares();
145 int totnum = 0;
146
147 for (double[] data : categoryData) {
148
149 Sum sum = new Sum();
150 SumOfSquares sumsq = new SumOfSquares();
151 int num = 0;
152
153 for (int i = 0; i < data.length; i++) {
154 double val = data[i];
155
156 // within category
157 num++;
158 sum.increment(val);
159 sumsq.increment(val);
160
161 // for all categories
162 totnum++;
163 totsum.increment(val);
164 totsumsq.increment(val);
165 }
166 dfwg += num - 1;
167 double ss = sumsq.getResult() - sum.getResult() * sum.getResult() / num;
168 sswg += ss;
169 }
170 double sst = totsumsq.getResult() - totsum.getResult() *
171 totsum.getResult()/totnum;
172 double ssbg = sst - sswg;
173 int dfbg = categoryData.size() - 1;
174 double msbg = ssbg/dfbg;
175 double mswg = sswg/dfwg;
176 double F = msbg/mswg;
177
178 return new AnovaStats(dfbg, dfwg, F);
179 }
180
181 /**
182 Convenience class to pass dfbg,dfwg,F values around within AnovaImpl.
183 No get/set methods provided.
184 */
185 private static class AnovaStats {
186
187 /** Degrees of freedom in numerator (between groups). */
188 private int dfbg;
189
190 /** Degrees of freedom in denominator (within groups). */
191 private int dfwg;
192
193 /** Statistic. */
194 private double F;
195
196 /**
197 * Constructor
198 * @param dfbg degrees of freedom in numerator (between groups)
199 * @param dfwg degrees of freedom in denominator (within groups)
200 * @param F statistic
201 */
202 private AnovaStats(int dfbg, int dfwg, double F) {
203 this.dfbg = dfbg;
204 this.dfwg = dfwg;
205 this.F = F;
206 }
207 }
208
209 }