001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017 package org.apache.commons.math.stat.correlation;
018
019 import org.apache.commons.math.MathRuntimeException;
020 import org.apache.commons.math.exception.util.LocalizedFormats;
021 import org.apache.commons.math.linear.RealMatrix;
022 import org.apache.commons.math.linear.BlockRealMatrix;
023 import org.apache.commons.math.stat.descriptive.moment.Mean;
024 import org.apache.commons.math.stat.descriptive.moment.Variance;
025
026 /**
027 * Computes covariances for pairs of arrays or columns of a matrix.
028 *
029 * <p>The constructors that take <code>RealMatrix</code> or
030 * <code>double[][]</code> arguments generate covariance matrices. The
031 * columns of the input matrices are assumed to represent variable values.</p>
032 *
033 * <p>The constructor argument <code>biasCorrected</code> determines whether or
034 * not computed covariances are bias-corrected.</p>
035 *
036 * <p>Unbiased covariances are given by the formula</p>
037 * <code>cov(X, Y) = Σ[(x<sub>i</sub> - E(X))(y<sub>i</sub> - E(Y))] / (n - 1)</code>
038 * where <code>E(X)</code> is the mean of <code>X</code> and <code>E(Y)</code>
039 * is the mean of the <code>Y</code> values.
040 *
041 * <p>Non-bias-corrected estimates use <code>n</code> in place of <code>n - 1</code>
042 *
043 * @version $Revision: 983921 $ $Date: 2010-08-10 12:46:06 +0200 (mar. 10 ao??t 2010) $
044 * @since 2.0
045 */
046 public class Covariance {
047
048 /** covariance matrix */
049 private final RealMatrix covarianceMatrix;
050
051 /**
052 * Create an empty covariance matrix.
053 */
054 /** Number of observations (length of covariate vectors) */
055 private final int n;
056
057 /**
058 * Create a Covariance with no data
059 */
060 public Covariance() {
061 super();
062 covarianceMatrix = null;
063 n = 0;
064 }
065
066 /**
067 * Create a Covariance matrix from a rectangular array
068 * whose columns represent covariates.
069 *
070 * <p>The <code>biasCorrected</code> parameter determines whether or not
071 * covariance estimates are bias-corrected.</p>
072 *
073 * <p>The input array must be rectangular with at least two columns
074 * and two rows.</p>
075 *
076 * @param data rectangular array with columns representing covariates
077 * @param biasCorrected true means covariances are bias-corrected
078 * @throws IllegalArgumentException if the input data array is not
079 * rectangular with at least two rows and two columns.
080 */
081 public Covariance(double[][] data, boolean biasCorrected) {
082 this(new BlockRealMatrix(data), biasCorrected);
083 }
084
085 /**
086 * Create a Covariance matrix from a rectangular array
087 * whose columns represent covariates.
088 *
089 * <p>The input array must be rectangular with at least two columns
090 * and two rows</p>
091 *
092 * @param data rectangular array with columns representing covariates
093 * @throws IllegalArgumentException if the input data array is not
094 * rectangular with at least two rows and two columns.
095 */
096 public Covariance(double[][] data) {
097 this(data, true);
098 }
099
100 /**
101 * Create a covariance matrix from a matrix whose columns
102 * represent covariates.
103 *
104 * <p>The <code>biasCorrected</code> parameter determines whether or not
105 * covariance estimates are bias-corrected.</p>
106 *
107 * <p>The matrix must have at least two columns and two rows</p>
108 *
109 * @param matrix matrix with columns representing covariates
110 * @param biasCorrected true means covariances are bias-corrected
111 * @throws IllegalArgumentException if the input matrix does not have
112 * at least two rows and two columns
113 */
114 public Covariance(RealMatrix matrix, boolean biasCorrected) {
115 checkSufficientData(matrix);
116 n = matrix.getRowDimension();
117 covarianceMatrix = computeCovarianceMatrix(matrix, biasCorrected);
118 }
119
120 /**
121 * Create a covariance matrix from a matrix whose columns
122 * represent covariates.
123 *
124 * <p>The matrix must have at least two columns and two rows</p>
125 *
126 * @param matrix matrix with columns representing covariates
127 * @throws IllegalArgumentException if the input matrix does not have
128 * at least two rows and two columns
129 */
130 public Covariance(RealMatrix matrix) {
131 this(matrix, true);
132 }
133
134 /**
135 * Returns the covariance matrix
136 *
137 * @return covariance matrix
138 */
139 public RealMatrix getCovarianceMatrix() {
140 return covarianceMatrix;
141 }
142
143 /**
144 * Returns the number of observations (length of covariate vectors)
145 *
146 * @return number of observations
147 */
148
149 public int getN() {
150 return n;
151 }
152
153 /**
154 * Compute a covariance matrix from a matrix whose columns represent
155 * covariates.
156 * @param matrix input matrix (must have at least two columns and two rows)
157 * @param biasCorrected determines whether or not covariance estimates are bias-corrected
158 * @return covariance matrix
159 */
160 protected RealMatrix computeCovarianceMatrix(RealMatrix matrix, boolean biasCorrected) {
161 int dimension = matrix.getColumnDimension();
162 Variance variance = new Variance(biasCorrected);
163 RealMatrix outMatrix = new BlockRealMatrix(dimension, dimension);
164 for (int i = 0; i < dimension; i++) {
165 for (int j = 0; j < i; j++) {
166 double cov = covariance(matrix.getColumn(i), matrix.getColumn(j), biasCorrected);
167 outMatrix.setEntry(i, j, cov);
168 outMatrix.setEntry(j, i, cov);
169 }
170 outMatrix.setEntry(i, i, variance.evaluate(matrix.getColumn(i)));
171 }
172 return outMatrix;
173 }
174
175 /**
176 * Create a covariance matrix from a matrix whose columns represent
177 * covariates. Covariances are computed using the bias-corrected formula.
178 * @param matrix input matrix (must have at least two columns and two rows)
179 * @return covariance matrix
180 * @see #Covariance
181 */
182 protected RealMatrix computeCovarianceMatrix(RealMatrix matrix) {
183 return computeCovarianceMatrix(matrix, true);
184 }
185
186 /**
187 * Compute a covariance matrix from a rectangular array whose columns represent
188 * covariates.
189 * @param data input array (must have at least two columns and two rows)
190 * @param biasCorrected determines whether or not covariance estimates are bias-corrected
191 * @return covariance matrix
192 */
193 protected RealMatrix computeCovarianceMatrix(double[][] data, boolean biasCorrected) {
194 return computeCovarianceMatrix(new BlockRealMatrix(data), biasCorrected);
195 }
196
197 /**
198 * Create a covariance matrix from a rectangual array whose columns represent
199 * covariates. Covariances are computed using the bias-corrected formula.
200 * @param data input array (must have at least two columns and two rows)
201 * @return covariance matrix
202 * @see #Covariance
203 */
204 protected RealMatrix computeCovarianceMatrix(double[][] data) {
205 return computeCovarianceMatrix(data, true);
206 }
207
208 /**
209 * Computes the covariance between the two arrays.
210 *
211 * <p>Array lengths must match and the common length must be at least 2.</p>
212 *
213 * @param xArray first data array
214 * @param yArray second data array
215 * @param biasCorrected if true, returned value will be bias-corrected
216 * @return returns the covariance for the two arrays
217 * @throws IllegalArgumentException if the arrays lengths do not match or
218 * there is insufficient data
219 */
220 public double covariance(final double[] xArray, final double[] yArray, boolean biasCorrected)
221 throws IllegalArgumentException {
222 Mean mean = new Mean();
223 double result = 0d;
224 int length = xArray.length;
225 if (length != yArray.length) {
226 throw MathRuntimeException.createIllegalArgumentException(
227 LocalizedFormats.DIMENSIONS_MISMATCH_SIMPLE, length, yArray.length);
228 } else if (length < 2) {
229 throw MathRuntimeException.createIllegalArgumentException(
230 LocalizedFormats.INSUFFICIENT_DIMENSION, length, 2);
231 } else {
232 double xMean = mean.evaluate(xArray);
233 double yMean = mean.evaluate(yArray);
234 for (int i = 0; i < length; i++) {
235 double xDev = xArray[i] - xMean;
236 double yDev = yArray[i] - yMean;
237 result += (xDev * yDev - result) / (i + 1);
238 }
239 }
240 return biasCorrected ? result * ((double) length / (double)(length - 1)) : result;
241 }
242
243 /**
244 * Computes the covariance between the two arrays, using the bias-corrected
245 * formula.
246 *
247 * <p>Array lengths must match and the common length must be at least 2.</p>
248 *
249 * @param xArray first data array
250 * @param yArray second data array
251 * @return returns the covariance for the two arrays
252 * @throws IllegalArgumentException if the arrays lengths do not match or
253 * there is insufficient data
254 */
255 public double covariance(final double[] xArray, final double[] yArray)
256 throws IllegalArgumentException {
257 return covariance(xArray, yArray, true);
258 }
259
260 /**
261 * Throws IllegalArgumentException of the matrix does not have at least
262 * two columns and two rows
263 * @param matrix matrix to check
264 */
265 private void checkSufficientData(final RealMatrix matrix) {
266 int nRows = matrix.getRowDimension();
267 int nCols = matrix.getColumnDimension();
268 if (nRows < 2 || nCols < 2) {
269 throw MathRuntimeException.createIllegalArgumentException(
270 LocalizedFormats.INSUFFICIENT_ROWS_AND_COLUMNS,
271 nRows, nCols);
272 }
273 }
274 }