001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017 package org.apache.commons.math.stat.inference;
018
019 import java.util.Collection;
020
021 import org.apache.commons.math.MathException;
022 import org.apache.commons.math.MathRuntimeException;
023 import org.apache.commons.math.distribution.FDistribution;
024 import org.apache.commons.math.distribution.FDistributionImpl;
025 import org.apache.commons.math.exception.util.LocalizedFormats;
026 import org.apache.commons.math.stat.descriptive.summary.Sum;
027 import org.apache.commons.math.stat.descriptive.summary.SumOfSquares;
028
029
030 /**
031 * Implements one-way ANOVA statistics defined in the {@link OneWayAnovaImpl}
032 * interface.
033 *
034 * <p>Uses the
035 * {@link org.apache.commons.math.distribution.FDistribution
036 * commons-math F Distribution implementation} to estimate exact p-values.</p>
037 *
038 * <p>This implementation is based on a description at
039 * http://faculty.vassar.edu/lowry/ch13pt1.html</p>
040 * <pre>
041 * Abbreviations: bg = between groups,
042 * wg = within groups,
043 * ss = sum squared deviations
044 * </pre>
045 *
046 * @since 1.2
047 * @version $Revision: 983921 $ $Date: 2010-08-10 12:46:06 +0200 (mar. 10 ao??t 2010) $
048 */
049 public class OneWayAnovaImpl implements OneWayAnova {
050
051 /**
052 * Default constructor.
053 */
054 public OneWayAnovaImpl() {
055 }
056
057 /**
058 * {@inheritDoc}<p>
059 * This implementation computes the F statistic using the definitional
060 * formula<pre>
061 * F = msbg/mswg</pre>
062 * where<pre>
063 * msbg = between group mean square
064 * mswg = within group mean square</pre>
065 * are as defined <a href="http://faculty.vassar.edu/lowry/ch13pt1.html">
066 * here</a></p>
067 */
068 public double anovaFValue(Collection<double[]> categoryData)
069 throws IllegalArgumentException, MathException {
070 AnovaStats a = anovaStats(categoryData);
071 return a.F;
072 }
073
074 /**
075 * {@inheritDoc}<p>
076 * This implementation uses the
077 * {@link org.apache.commons.math.distribution.FDistribution
078 * commons-math F Distribution implementation} to estimate the exact
079 * p-value, using the formula<pre>
080 * p = 1 - cumulativeProbability(F)</pre>
081 * where <code>F</code> is the F value and <code>cumulativeProbability</code>
082 * is the commons-math implementation of the F distribution.</p>
083 */
084 public double anovaPValue(Collection<double[]> categoryData)
085 throws IllegalArgumentException, MathException {
086 AnovaStats a = anovaStats(categoryData);
087 FDistribution fdist = new FDistributionImpl(a.dfbg, a.dfwg);
088 return 1.0 - fdist.cumulativeProbability(a.F);
089 }
090
091 /**
092 * {@inheritDoc}<p>
093 * This implementation uses the
094 * {@link org.apache.commons.math.distribution.FDistribution
095 * commons-math F Distribution implementation} to estimate the exact
096 * p-value, using the formula<pre>
097 * p = 1 - cumulativeProbability(F)</pre>
098 * where <code>F</code> is the F value and <code>cumulativeProbability</code>
099 * is the commons-math implementation of the F distribution.</p>
100 * <p>True is returned iff the estimated p-value is less than alpha.</p>
101 */
102 public boolean anovaTest(Collection<double[]> categoryData, double alpha)
103 throws IllegalArgumentException, MathException {
104 if ((alpha <= 0) || (alpha > 0.5)) {
105 throw MathRuntimeException.createIllegalArgumentException(
106 LocalizedFormats.OUT_OF_BOUND_SIGNIFICANCE_LEVEL,
107 alpha, 0, 0.5);
108 }
109 return anovaPValue(categoryData) < alpha;
110 }
111
112
113 /**
114 * This method actually does the calculations (except P-value).
115 *
116 * @param categoryData <code>Collection</code> of <code>double[]</code>
117 * arrays each containing data for one category
118 * @return computed AnovaStats
119 * @throws IllegalArgumentException if categoryData does not meet
120 * preconditions specified in the interface definition
121 * @throws MathException if an error occurs computing the Anova stats
122 */
123 private AnovaStats anovaStats(Collection<double[]> categoryData)
124 throws IllegalArgumentException, MathException {
125
126 // check if we have enough categories
127 if (categoryData.size() < 2) {
128 throw MathRuntimeException.createIllegalArgumentException(
129 LocalizedFormats.TWO_OR_MORE_CATEGORIES_REQUIRED,
130 categoryData.size());
131 }
132
133 // check if each category has enough data and all is double[]
134 for (double[] array : categoryData) {
135 if (array.length <= 1) {
136 throw MathRuntimeException.createIllegalArgumentException(
137 LocalizedFormats.TWO_OR_MORE_VALUES_IN_CATEGORY_REQUIRED,
138 array.length);
139 }
140 }
141
142 int dfwg = 0;
143 double sswg = 0;
144 Sum totsum = new Sum();
145 SumOfSquares totsumsq = new SumOfSquares();
146 int totnum = 0;
147
148 for (double[] data : categoryData) {
149
150 Sum sum = new Sum();
151 SumOfSquares sumsq = new SumOfSquares();
152 int num = 0;
153
154 for (int i = 0; i < data.length; i++) {
155 double val = data[i];
156
157 // within category
158 num++;
159 sum.increment(val);
160 sumsq.increment(val);
161
162 // for all categories
163 totnum++;
164 totsum.increment(val);
165 totsumsq.increment(val);
166 }
167 dfwg += num - 1;
168 double ss = sumsq.getResult() - sum.getResult() * sum.getResult() / num;
169 sswg += ss;
170 }
171 double sst = totsumsq.getResult() - totsum.getResult() *
172 totsum.getResult()/totnum;
173 double ssbg = sst - sswg;
174 int dfbg = categoryData.size() - 1;
175 double msbg = ssbg/dfbg;
176 double mswg = sswg/dfwg;
177 double F = msbg/mswg;
178
179 return new AnovaStats(dfbg, dfwg, F);
180 }
181
182 /**
183 Convenience class to pass dfbg,dfwg,F values around within AnovaImpl.
184 No get/set methods provided.
185 */
186 private static class AnovaStats {
187
188 /** Degrees of freedom in numerator (between groups). */
189 private int dfbg;
190
191 /** Degrees of freedom in denominator (within groups). */
192 private int dfwg;
193
194 /** Statistic. */
195 private double F;
196
197 /**
198 * Constructor
199 * @param dfbg degrees of freedom in numerator (between groups)
200 * @param dfwg degrees of freedom in denominator (within groups)
201 * @param F statistic
202 */
203 private AnovaStats(int dfbg, int dfwg, double F) {
204 this.dfbg = dfbg;
205 this.dfwg = dfwg;
206 this.F = F;
207 }
208 }
209
210 }