001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017 package org.apache.commons.math.stat.descriptive.rank;
018
019 import java.io.Serializable;
020 import java.util.Arrays;
021
022 import org.apache.commons.math.MathRuntimeException;
023 import org.apache.commons.math.stat.descriptive.AbstractUnivariateStatistic;
024
025 /**
026 * Provides percentile computation.
027 * <p>
028 * There are several commonly used methods for estimating percentiles (a.k.a.
029 * quantiles) based on sample data. For large samples, the different methods
030 * agree closely, but when sample sizes are small, different methods will give
031 * significantly different results. The algorithm implemented here works as follows:
032 * <ol>
033 * <li>Let <code>n</code> be the length of the (sorted) array and
034 * <code>0 < p <= 100</code> be the desired percentile.</li>
035 * <li>If <code> n = 1 </code> return the unique array element (regardless of
036 * the value of <code>p</code>); otherwise </li>
037 * <li>Compute the estimated percentile position
038 * <code> pos = p * (n + 1) / 100</code> and the difference, <code>d</code>
039 * between <code>pos</code> and <code>floor(pos)</code> (i.e. the fractional
040 * part of <code>pos</code>). If <code>pos >= n</code> return the largest
041 * element in the array; otherwise</li>
042 * <li>Let <code>lower</code> be the element in position
043 * <code>floor(pos)</code> in the array and let <code>upper</code> be the
044 * next element in the array. Return <code>lower + d * (upper - lower)</code>
045 * </li>
046 * </ol></p>
047 * <p>
048 * To compute percentiles, the data must be (totally) ordered. Input arrays
049 * are copied and then sorted using {@link java.util.Arrays#sort(double[])}.
050 * The ordering used by <code>Arrays.sort(double[])</code> is the one determined
051 * by {@link java.lang.Double#compareTo(Double)}. This ordering makes
052 * <code>Double.NaN</code> larger than any other value (including
053 * <code>Double.POSITIVE_INFINITY</code>). Therefore, for example, the median
054 * (50th percentile) of
055 * <code>{0, 1, 2, 3, 4, Double.NaN}</code> evaluates to <code>2.5.</code></p>
056 * <p>
057 * Since percentile estimation usually involves interpolation between array
058 * elements, arrays containing <code>NaN</code> or infinite values will often
059 * result in <code>NaN<code> or infinite values returned.</p>
060 * <p>
061 * <strong>Note that this implementation is not synchronized.</strong> If
062 * multiple threads access an instance of this class concurrently, and at least
063 * one of the threads invokes the <code>increment()</code> or
064 * <code>clear()</code> method, it must be synchronized externally.</p>
065 *
066 * @version $Revision: 811685 $ $Date: 2009-09-05 13:36:48 -0400 (Sat, 05 Sep 2009) $
067 */
068 public class Percentile extends AbstractUnivariateStatistic implements Serializable {
069
070 /** Serializable version identifier */
071 private static final long serialVersionUID = -8091216485095130416L;
072
073 /** Determines what percentile is computed when evaluate() is activated
074 * with no quantile argument */
075 private double quantile = 0.0;
076
077 /**
078 * Constructs a Percentile with a default quantile
079 * value of 50.0.
080 */
081 public Percentile() {
082 this(50.0);
083 }
084
085 /**
086 * Constructs a Percentile with the specific quantile value.
087 * @param p the quantile
088 * @throws IllegalArgumentException if p is not greater than 0 and less
089 * than or equal to 100
090 */
091 public Percentile(final double p) {
092 setQuantile(p);
093 }
094
095 /**
096 * Copy constructor, creates a new {@code Percentile} identical
097 * to the {@code original}
098 *
099 * @param original the {@code Percentile} instance to copy
100 */
101 public Percentile(Percentile original) {
102 copy(original, this);
103 }
104
105 /**
106 * Returns an estimate of the <code>p</code>th percentile of the values
107 * in the <code>values</code> array.
108 * <p>
109 * Calls to this method do not modify the internal <code>quantile</code>
110 * state of this statistic.</p>
111 * <p>
112 * <ul>
113 * <li>Returns <code>Double.NaN</code> if <code>values</code> has length
114 * <code>0</code></li>
115 * <li>Returns (for any value of <code>p</code>) <code>values[0]</code>
116 * if <code>values</code> has length <code>1</code></li>
117 * <li>Throws <code>IllegalArgumentException</code> if <code>values</code>
118 * is null or p is not a valid quantile value (p must be greater than 0
119 * and less than or equal to 100) </li>
120 * </ul></p>
121 * <p>
122 * See {@link Percentile} for a description of the percentile estimation
123 * algorithm used.</p>
124 *
125 * @param values input array of values
126 * @param p the percentile value to compute
127 * @return the percentile value or Double.NaN if the array is empty
128 * @throws IllegalArgumentException if <code>values</code> is null
129 * or p is invalid
130 */
131 public double evaluate(final double[] values, final double p) {
132 test(values, 0, 0);
133 return evaluate(values, 0, values.length, p);
134 }
135
136 /**
137 * Returns an estimate of the <code>quantile</code>th percentile of the
138 * designated values in the <code>values</code> array. The quantile
139 * estimated is determined by the <code>quantile</code> property.
140 * <p>
141 * <ul>
142 * <li>Returns <code>Double.NaN</code> if <code>length = 0</code></li>
143 * <li>Returns (for any value of <code>quantile</code>)
144 * <code>values[begin]</code> if <code>length = 1 </code></li>
145 * <li>Throws <code>IllegalArgumentException</code> if <code>values</code>
146 * is null, or <code>start</code> or <code>length</code>
147 * is invalid</li>
148 * </ul></p>
149 * <p>
150 * See {@link Percentile} for a description of the percentile estimation
151 * algorithm used.</p>
152 *
153 * @param values the input array
154 * @param start index of the first array element to include
155 * @param length the number of elements to include
156 * @return the percentile value
157 * @throws IllegalArgumentException if the parameters are not valid
158 *
159 */
160 @Override
161 public double evaluate( final double[] values, final int start, final int length) {
162 return evaluate(values, start, length, quantile);
163 }
164
165 /**
166 * Returns an estimate of the <code>p</code>th percentile of the values
167 * in the <code>values</code> array, starting with the element in (0-based)
168 * position <code>begin</code> in the array and including <code>length</code>
169 * values.
170 * <p>
171 * Calls to this method do not modify the internal <code>quantile</code>
172 * state of this statistic.</p>
173 * <p>
174 * <ul>
175 * <li>Returns <code>Double.NaN</code> if <code>length = 0</code></li>
176 * <li>Returns (for any value of <code>p</code>) <code>values[begin]</code>
177 * if <code>length = 1 </code></li>
178 * <li>Throws <code>IllegalArgumentException</code> if <code>values</code>
179 * is null , <code>begin</code> or <code>length</code> is invalid, or
180 * <code>p</code> is not a valid quantile value (p must be greater than 0
181 * and less than or equal to 100)</li>
182 * </ul></p>
183 * <p>
184 * See {@link Percentile} for a description of the percentile estimation
185 * algorithm used.</p>
186 *
187 * @param values array of input values
188 * @param p the percentile to compute
189 * @param begin the first (0-based) element to include in the computation
190 * @param length the number of array elements to include
191 * @return the percentile value
192 * @throws IllegalArgumentException if the parameters are not valid or the
193 * input array is null
194 */
195 public double evaluate(final double[] values, final int begin,
196 final int length, final double p) {
197
198 test(values, begin, length);
199
200 if ((p > 100) || (p <= 0)) {
201 throw MathRuntimeException.createIllegalArgumentException(
202 "out of bounds quantile value: {0}, must be in (0, 100]", p);
203 }
204 if (length == 0) {
205 return Double.NaN;
206 }
207 if (length == 1) {
208 return values[begin]; // always return single value for n = 1
209 }
210 double n = length;
211 double pos = p * (n + 1) / 100;
212 double fpos = Math.floor(pos);
213 int intPos = (int) fpos;
214 double dif = pos - fpos;
215 double[] sorted = new double[length];
216 System.arraycopy(values, begin, sorted, 0, length);
217 Arrays.sort(sorted);
218
219 if (pos < 1) {
220 return sorted[0];
221 }
222 if (pos >= n) {
223 return sorted[length - 1];
224 }
225 double lower = sorted[intPos - 1];
226 double upper = sorted[intPos];
227 return lower + dif * (upper - lower);
228 }
229
230 /**
231 * Returns the value of the quantile field (determines what percentile is
232 * computed when evaluate() is called with no quantile argument).
233 *
234 * @return quantile
235 */
236 public double getQuantile() {
237 return quantile;
238 }
239
240 /**
241 * Sets the value of the quantile field (determines what percentile is
242 * computed when evaluate() is called with no quantile argument).
243 *
244 * @param p a value between 0 < p <= 100
245 * @throws IllegalArgumentException if p is not greater than 0 and less
246 * than or equal to 100
247 */
248 public void setQuantile(final double p) {
249 if (p <= 0 || p > 100) {
250 throw MathRuntimeException.createIllegalArgumentException(
251 "out of bounds quantile value: {0}, must be in (0, 100]", p);
252 }
253 quantile = p;
254 }
255
256 /**
257 * {@inheritDoc}
258 */
259 @Override
260 public Percentile copy() {
261 Percentile result = new Percentile();
262 copy(this, result);
263 return result;
264 }
265
266 /**
267 * Copies source to dest.
268 * <p>Neither source nor dest can be null.</p>
269 *
270 * @param source Percentile to copy
271 * @param dest Percentile to copy to
272 * @throws NullPointerException if either source or dest is null
273 */
274 public static void copy(Percentile source, Percentile dest) {
275 dest.quantile = source.quantile;
276 }
277
278 }