001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017 package org.apache.commons.math.stat.correlation;
018
019 import org.apache.commons.math.MathRuntimeException;
020 import org.apache.commons.math.linear.RealMatrix;
021 import org.apache.commons.math.linear.BlockRealMatrix;
022 import org.apache.commons.math.stat.descriptive.moment.Mean;
023 import org.apache.commons.math.stat.descriptive.moment.Variance;
024
025 /**
026 * Computes covariances for pairs of arrays or columns of a matrix.
027 *
028 * <p>The constructors that take <code>RealMatrix</code> or
029 * <code>double[][]</code> arguments generate covariance matrices. The
030 * columns of the input matrices are assumed to represent variable values.</p>
031 *
032 * <p>The constructor argument <code>biasCorrected</code> determines whether or
033 * not computed covariances are bias-corrected.</p>
034 *
035 * <p>Unbiased covariances are given by the formula</p>
036 * <code>cov(X, Y) = Σ[(x<sub>i</sub> - E(X))(y<sub>i</sub> - E(Y))] / (n - 1)</code>
037 * where <code>E(X)</code> is the mean of <code>X</code> and <code>E(Y)</code>
038 * is the mean of the <code>Y</code> values.
039 *
040 * <p>Non-bias-corrected estimates use <code>n</code> in place of <code>n - 1</code>
041 *
042 * @version $Revision: 811685 $ $Date: 2009-09-05 13:36:48 -0400 (Sat, 05 Sep 2009) $
043 * @since 2.0
044 */
045 public class Covariance {
046
047 /** covariance matrix */
048 private final RealMatrix covarianceMatrix;
049
050 /**
051 * Create an empty covariance matrix.
052 */
053 /** Number of observations (length of covariate vectors) */
054 private final int n;
055
056 /**
057 * Create a Covariance with no data
058 */
059 public Covariance() {
060 super();
061 covarianceMatrix = null;
062 n = 0;
063 }
064
065 /**
066 * Create a Covariance matrix from a rectangular array
067 * whose columns represent covariates.
068 *
069 * <p>The <code>biasCorrected</code> parameter determines whether or not
070 * covariance estimates are bias-corrected.</p>
071 *
072 * <p>The input array must be rectangular with at least two columns
073 * and two rows.</p>
074 *
075 * @param data rectangular array with columns representing covariates
076 * @param biasCorrected true means covariances are bias-corrected
077 * @throws IllegalArgumentException if the input data array is not
078 * rectangular with at least two rows and two columns.
079 */
080 public Covariance(double[][] data, boolean biasCorrected) {
081 this(new BlockRealMatrix(data), biasCorrected);
082 }
083
084 /**
085 * Create a Covariance matrix from a rectangular array
086 * whose columns represent covariates.
087 *
088 * <p>The input array must be rectangular with at least two columns
089 * and two rows</p>
090 *
091 * @param data rectangular array with columns representing covariates
092 * @throws IllegalArgumentException if the input data array is not
093 * rectangular with at least two rows and two columns.
094 */
095 public Covariance(double[][] data) {
096 this(data, true);
097 }
098
099 /**
100 * Create a covariance matrix from a matrix whose columns
101 * represent covariates.
102 *
103 * <p>The <code>biasCorrected</code> parameter determines whether or not
104 * covariance estimates are bias-corrected.</p>
105 *
106 * <p>The matrix must have at least two columns and two rows</p>
107 *
108 * @param matrix matrix with columns representing covariates
109 * @param biasCorrected true means covariances are bias-corrected
110 * @throws IllegalArgumentException if the input matrix does not have
111 * at least two rows and two columns
112 */
113 public Covariance(RealMatrix matrix, boolean biasCorrected) {
114 checkSufficientData(matrix);
115 n = matrix.getRowDimension();
116 covarianceMatrix = computeCovarianceMatrix(matrix, biasCorrected);
117 }
118
119 /**
120 * Create a covariance matrix from a matrix whose columns
121 * represent covariates.
122 *
123 * <p>The matrix must have at least two columns and two rows</p>
124 *
125 * @param matrix matrix with columns representing covariates
126 * @throws IllegalArgumentException if the input matrix does not have
127 * at least two rows and two columns
128 */
129 public Covariance(RealMatrix matrix) {
130 this(matrix, true);
131 }
132
133 /**
134 * Returns the covariance matrix
135 *
136 * @return covariance matrix
137 */
138 public RealMatrix getCovarianceMatrix() {
139 return covarianceMatrix;
140 }
141
142 /**
143 * Returns the number of observations (length of covariate vectors)
144 *
145 * @return number of observations
146 */
147
148 public int getN() {
149 return n;
150 }
151
152 /**
153 * Compute a covariance matrix from a matrix whose columns represent
154 * covariates.
155 * @param matrix input matrix (must have at least two columns and two rows)
156 * @param biasCorrected determines whether or not covariance estimates are bias-corrected
157 * @return covariance matrix
158 */
159 protected RealMatrix computeCovarianceMatrix(RealMatrix matrix, boolean biasCorrected) {
160 int dimension = matrix.getColumnDimension();
161 Variance variance = new Variance(biasCorrected);
162 RealMatrix outMatrix = new BlockRealMatrix(dimension, dimension);
163 for (int i = 0; i < dimension; i++) {
164 for (int j = 0; j < i; j++) {
165 double cov = covariance(matrix.getColumn(i), matrix.getColumn(j), biasCorrected);
166 outMatrix.setEntry(i, j, cov);
167 outMatrix.setEntry(j, i, cov);
168 }
169 outMatrix.setEntry(i, i, variance.evaluate(matrix.getColumn(i)));
170 }
171 return outMatrix;
172 }
173
174 /**
175 * Create a covariance matrix from a matrix whose columns represent
176 * covariates. Covariances are computed using the bias-corrected formula.
177 * @param matrix input matrix (must have at least two columns and two rows)
178 * @return covariance matrix
179 * @see #Covariance
180 */
181 protected RealMatrix computeCovarianceMatrix(RealMatrix matrix) {
182 return computeCovarianceMatrix(matrix, true);
183 }
184
185 /**
186 * Compute a covariance matrix from a rectangular array whose columns represent
187 * covariates.
188 * @param data input array (must have at least two columns and two rows)
189 * @param biasCorrected determines whether or not covariance estimates are bias-corrected
190 * @return covariance matrix
191 */
192 protected RealMatrix computeCovarianceMatrix(double[][] data, boolean biasCorrected) {
193 return computeCovarianceMatrix(new BlockRealMatrix(data), biasCorrected);
194 }
195
196 /**
197 * Create a covariance matrix from a rectangual array whose columns represent
198 * covariates. Covariances are computed using the bias-corrected formula.
199 * @param data input array (must have at least two columns and two rows)
200 * @return covariance matrix
201 * @see #Covariance
202 */
203 protected RealMatrix computeCovarianceMatrix(double[][] data) {
204 return computeCovarianceMatrix(data, true);
205 }
206
207 /**
208 * Computes the covariance between the two arrays.
209 *
210 * <p>Array lengths must match and the common length must be at least 2.</p>
211 *
212 * @param xArray first data array
213 * @param yArray second data array
214 * @param biasCorrected if true, returned value will be bias-corrected
215 * @return returns the covariance for the two arrays
216 * @throws IllegalArgumentException if the arrays lengths do not match or
217 * there is insufficient data
218 */
219 public double covariance(final double[] xArray, final double[] yArray, boolean biasCorrected)
220 throws IllegalArgumentException {
221 Mean mean = new Mean();
222 double result = 0d;
223 int length = xArray.length;
224 if(length == yArray.length && length > 1) {
225 double xMean = mean.evaluate(xArray);
226 double yMean = mean.evaluate(yArray);
227 for (int i = 0; i < length; i++) {
228 double xDev = xArray[i] - xMean;
229 double yDev = yArray[i] - yMean;
230 result += (xDev * yDev - result) / (i + 1);
231 }
232 }
233 else {
234 throw MathRuntimeException.createIllegalArgumentException(
235 "arrays must have the same length and both must have at " +
236 "least two elements. xArray has size {0}, yArray has {1} elements",
237 length, yArray.length);
238 }
239 return biasCorrected ? result * ((double) length / (double)(length - 1)) : result;
240 }
241
242 /**
243 * Computes the covariance between the two arrays, using the bias-corrected
244 * formula.
245 *
246 * <p>Array lengths must match and the common length must be at least 2.</p>
247 *
248 * @param xArray first data array
249 * @param yArray second data array
250 * @return returns the covariance for the two arrays
251 * @throws IllegalArgumentException if the arrays lengths do not match or
252 * there is insufficient data
253 */
254 public double covariance(final double[] xArray, final double[] yArray)
255 throws IllegalArgumentException {
256 return covariance(xArray, yArray, true);
257 }
258
259 /**
260 * Throws IllegalArgumentException of the matrix does not have at least
261 * two columns and two rows
262 * @param matrix matrix to check
263 */
264 private void checkSufficientData(final RealMatrix matrix) {
265 int nRows = matrix.getRowDimension();
266 int nCols = matrix.getColumnDimension();
267 if (nRows < 2 || nCols < 2) {
268 throw MathRuntimeException.createIllegalArgumentException(
269 "insufficient data: only {0} rows and {1} columns.",
270 nRows, nCols);
271 }
272 }
273 }