001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018 package org.apache.commons.math.stat.ranking;
019
020 import java.util.ArrayList;
021 import java.util.Arrays;
022 import java.util.Iterator;
023 import java.util.List;
024
025 import org.apache.commons.math.MathRuntimeException;
026 import org.apache.commons.math.random.RandomData;
027 import org.apache.commons.math.random.RandomDataImpl;
028 import org.apache.commons.math.random.RandomGenerator;
029
030
031 /**
032 * <p> Ranking based on the natural ordering on doubles.</p>
033 * <p>NaNs are treated according to the configured {@link NaNStrategy} and ties
034 * are handled using the selected {@link TiesStrategy}.
035 * Configuration settings are supplied in optional constructor arguments.
036 * Defaults are {@link NaNStrategy#MAXIMAL} and {@link TiesStrategy#AVERAGE},
037 * respectively. When using {@link TiesStrategy#RANDOM}, a
038 * {@link RandomGenerator} may be supplied as a constructor argument.</p>
039 * <p>Examples:
040 * <table border="1" cellpadding="3">
041 * <tr><th colspan="3">
042 * Input data: (20, 17, 30, 42.3, 17, 50, Double.NaN, Double.NEGATIVE_INFINITY, 17)
043 * </th></tr>
044 * <tr><th>NaNStrategy</th><th>TiesStrategy</th>
045 * <th><code>rank(data)</code></th>
046 * <tr>
047 * <td>default (NaNs maximal)</td>
048 * <td>default (ties averaged)</td>
049 * <td>(5, 3, 6, 7, 3, 8, 9, 1, 3)</td></tr>
050 * <tr>
051 * <td>default (NaNs maximal)</td>
052 * <td>MINIMUM</td>
053 * <td>(5, 2, 6, 7, 2, 8, 9, 1, 2)</td></tr>
054 * <tr>
055 * <td>MINIMAL</td>
056 * <td>default (ties averaged)</td>
057 * <td>(6, 4, 7, 8, 4, 9, 1.5, 1.5, 4)</td></tr>
058 * <tr>
059 * <td>REMOVED</td>
060 * <td>SEQUENTIAL</td>
061 * <td>(5, 2, 6, 7, 3, 8, 1, 4)</td></tr>
062 * <tr>
063 * <td>MINIMAL</td>
064 * <td>MAXIMUM</td>
065 * <td>(6, 5, 7, 8, 5, 9, 2, 2, 5)</td></tr></table></p>
066 *
067 * @since 2.0
068 * @version $Revision: 811827 $ $Date: 2009-09-06 11:32:50 -0400 (Sun, 06 Sep 2009) $
069 */
070 public class NaturalRanking implements RankingAlgorithm {
071
072 /** default NaN strategy */
073 public static final NaNStrategy DEFAULT_NAN_STRATEGY = NaNStrategy.MAXIMAL;
074
075 /** default ties strategy */
076 public static final TiesStrategy DEFAULT_TIES_STRATEGY = TiesStrategy.AVERAGE;
077
078 /** NaN strategy - defaults to NaNs maximal */
079 private final NaNStrategy nanStrategy;
080
081 /** Ties strategy - defaults to ties averaged */
082 private final TiesStrategy tiesStrategy;
083
084 /** Source of random data - used only when ties strategy is RANDOM */
085 private final RandomData randomData;
086
087 /**
088 * Create a NaturalRanking with default strategies for handling ties and NaNs.
089 */
090 public NaturalRanking() {
091 super();
092 tiesStrategy = DEFAULT_TIES_STRATEGY;
093 nanStrategy = DEFAULT_NAN_STRATEGY;
094 randomData = null;
095 }
096
097 /**
098 * Create a NaturalRanking with the given TiesStrategy.
099 *
100 * @param tiesStrategy the TiesStrategy to use
101 */
102 public NaturalRanking(TiesStrategy tiesStrategy) {
103 super();
104 this.tiesStrategy = tiesStrategy;
105 nanStrategy = DEFAULT_NAN_STRATEGY;
106 randomData = new RandomDataImpl();
107 }
108
109 /**
110 * Create a NaturalRanking with the given NaNStrategy.
111 *
112 * @param nanStrategy the NaNStrategy to use
113 */
114 public NaturalRanking(NaNStrategy nanStrategy) {
115 super();
116 this.nanStrategy = nanStrategy;
117 tiesStrategy = DEFAULT_TIES_STRATEGY;
118 randomData = null;
119 }
120
121 /**
122 * Create a NaturalRanking with the given NaNStrategy and TiesStrategy.
123 *
124 * @param nanStrategy NaNStrategy to use
125 * @param tiesStrategy TiesStrategy to use
126 */
127 public NaturalRanking(NaNStrategy nanStrategy, TiesStrategy tiesStrategy) {
128 super();
129 this.nanStrategy = nanStrategy;
130 this.tiesStrategy = tiesStrategy;
131 randomData = new RandomDataImpl();
132 }
133
134 /**
135 * Create a NaturalRanking with TiesStrategy.RANDOM and the given
136 * RandomGenerator as the source of random data.
137 *
138 * @param randomGenerator source of random data
139 */
140 public NaturalRanking(RandomGenerator randomGenerator) {
141 super();
142 this.tiesStrategy = TiesStrategy.RANDOM;
143 nanStrategy = DEFAULT_NAN_STRATEGY;
144 randomData = new RandomDataImpl(randomGenerator);
145 }
146
147
148 /**
149 * Create a NaturalRanking with the given NaNStrategy, TiesStrategy.RANDOM
150 * and the given source of random data.
151 *
152 * @param nanStrategy NaNStrategy to use
153 * @param randomGenerator source of random data
154 */
155 public NaturalRanking(NaNStrategy nanStrategy,
156 RandomGenerator randomGenerator) {
157 super();
158 this.nanStrategy = nanStrategy;
159 this.tiesStrategy = TiesStrategy.RANDOM;
160 randomData = new RandomDataImpl(randomGenerator);
161 }
162
163 /**
164 * Return the NaNStrategy
165 *
166 * @return returns the NaNStrategy
167 */
168 public NaNStrategy getNanStrategy() {
169 return nanStrategy;
170 }
171
172 /**
173 * Return the TiesStrategy
174 *
175 * @return the TiesStrategy
176 */
177 public TiesStrategy getTiesStrategy() {
178 return tiesStrategy;
179 }
180
181 /**
182 * Rank <code>data</code> using the natural ordering on Doubles, with
183 * NaN values handled according to <code>nanStrategy</code> and ties
184 * resolved using <code>tiesStrategy.</code>
185 *
186 * @param data array to be ranked
187 * @return array of ranks
188 */
189 public double[] rank(double[] data) {
190
191 // Array recording initial positions of data to be ranked
192 IntDoublePair[] ranks = new IntDoublePair[data.length];
193 for (int i = 0; i < data.length; i++) {
194 ranks[i] = new IntDoublePair(data[i], i);
195 }
196
197 // Recode, remove or record positions of NaNs
198 List<Integer> nanPositions = null;
199 switch (nanStrategy) {
200 case MAXIMAL: // Replace NaNs with +INFs
201 recodeNaNs(ranks, Double.POSITIVE_INFINITY);
202 break;
203 case MINIMAL: // Replace NaNs with -INFs
204 recodeNaNs(ranks, Double.NEGATIVE_INFINITY);
205 break;
206 case REMOVED: // Drop NaNs from data
207 ranks = removeNaNs(ranks);
208 break;
209 case FIXED: // Record positions of NaNs
210 nanPositions = getNanPositions(ranks);
211 break;
212 default: // this should not happen unless NaNStrategy enum is changed
213 throw MathRuntimeException.createInternalError(null);
214 }
215
216 // Sort the IntDoublePairs
217 Arrays.sort(ranks);
218
219 // Walk the sorted array, filling output array using sorted positions,
220 // resolving ties as we go
221 double[] out = new double[ranks.length];
222 int pos = 1; // position in sorted array
223 out[ranks[0].getPosition()] = pos;
224 List<Integer> tiesTrace = new ArrayList<Integer>();
225 tiesTrace.add(ranks[0].getPosition());
226 for (int i = 1; i < ranks.length; i++) {
227 if (Double.compare(ranks[i].getValue(), ranks[i - 1].getValue()) > 0) {
228 // tie sequence has ended (or had length 1)
229 pos = i + 1;
230 if (tiesTrace.size() > 1) { // if seq is nontrivial, resolve
231 resolveTie(out, tiesTrace);
232 }
233 tiesTrace = new ArrayList<Integer>();
234 tiesTrace.add(ranks[i].getPosition());
235 } else {
236 // tie sequence continues
237 tiesTrace.add(ranks[i].getPosition());
238 }
239 out[ranks[i].getPosition()] = pos;
240 }
241 if (tiesTrace.size() > 1) { // handle tie sequence at end
242 resolveTie(out, tiesTrace);
243 }
244 if (nanStrategy == NaNStrategy.FIXED) {
245 restoreNaNs(out, nanPositions);
246 }
247 return out;
248 }
249
250 /**
251 * Returns an array that is a copy of the input array with IntDoublePairs
252 * having NaN values removed.
253 *
254 * @param ranks input array
255 * @return array with NaN-valued entries removed
256 */
257 private IntDoublePair[] removeNaNs(IntDoublePair[] ranks) {
258 if (!containsNaNs(ranks)) {
259 return ranks;
260 }
261 IntDoublePair[] outRanks = new IntDoublePair[ranks.length];
262 int j = 0;
263 for (int i = 0; i < ranks.length; i++) {
264 if (Double.isNaN(ranks[i].getValue())) {
265 // drop, but adjust original ranks of later elements
266 for (int k = i + 1; k < ranks.length; k++) {
267 ranks[k] = new IntDoublePair(
268 ranks[k].getValue(), ranks[k].getPosition() - 1);
269 }
270 } else {
271 outRanks[j] = new IntDoublePair(
272 ranks[i].getValue(), ranks[i].getPosition());
273 j++;
274 }
275 }
276 IntDoublePair[] returnRanks = new IntDoublePair[j];
277 System.arraycopy(outRanks, 0, returnRanks, 0, j);
278 return returnRanks;
279 }
280
281 /**
282 * Recodes NaN values to the given value.
283 *
284 * @param ranks array to recode
285 * @param value the value to replace NaNs with
286 */
287 private void recodeNaNs(IntDoublePair[] ranks, double value) {
288 for (int i = 0; i < ranks.length; i++) {
289 if (Double.isNaN(ranks[i].getValue())) {
290 ranks[i] = new IntDoublePair(
291 value, ranks[i].getPosition());
292 }
293 }
294 }
295
296 /**
297 * Checks for presence of NaNs in <code>ranks.</code>
298 *
299 * @param ranks array to be searched for NaNs
300 * @return true iff ranks contains one or more NaNs
301 */
302 private boolean containsNaNs(IntDoublePair[] ranks) {
303 for (int i = 0; i < ranks.length; i++) {
304 if (Double.isNaN(ranks[i].getValue())) {
305 return true;
306 }
307 }
308 return false;
309 }
310
311 /**
312 * Resolve a sequence of ties, using the configured {@link TiesStrategy}.
313 * The input <code>ranks</code> array is expected to take the same value
314 * for all indices in <code>tiesTrace</code>. The common value is recoded
315 * according to the tiesStrategy. For example, if ranks = <5,8,2,6,2,7,1,2>,
316 * tiesTrace = <2,4,7> and tiesStrategy is MINIMUM, ranks will be unchanged.
317 * The same array and trace with tiesStrategy AVERAGE will come out
318 * <5,8,3,6,3,7,1,3>.
319 *
320 * @param ranks array of ranks
321 * @param tiesTrace list of indices where <code>ranks</code> is constant
322 * -- that is, for any i and j in TiesTrace, <code> ranks[i] == ranks[j]
323 * </code>
324 */
325 private void resolveTie(double[] ranks, List<Integer> tiesTrace) {
326
327 // constant value of ranks over tiesTrace
328 final double c = ranks[tiesTrace.get(0)];
329
330 // length of sequence of tied ranks
331 final int length = tiesTrace.size();
332
333 switch (tiesStrategy) {
334 case AVERAGE: // Replace ranks with average
335 fill(ranks, tiesTrace, (2 * c + length - 1) / 2d);
336 break;
337 case MAXIMUM: // Replace ranks with maximum values
338 fill(ranks, tiesTrace, c + length - 1);
339 break;
340 case MINIMUM: // Replace ties with minimum
341 fill(ranks, tiesTrace, c);
342 break;
343 case RANDOM: // Fill with random integral values in [c, c + length - 1]
344 Iterator<Integer> iterator = tiesTrace.iterator();
345 long f = Math.round(c);
346 while (iterator.hasNext()) {
347 ranks[iterator.next()] =
348 randomData.nextLong(f, f + length - 1);
349 }
350 break;
351 case SEQUENTIAL: // Fill sequentially from c to c + length - 1
352 // walk and fill
353 iterator = tiesTrace.iterator();
354 f = Math.round(c);
355 int i = 0;
356 while (iterator.hasNext()) {
357 ranks[iterator.next()] = f + i++;
358 }
359 break;
360 default: // this should not happen unless TiesStrategy enum is changed
361 throw MathRuntimeException.createInternalError(null);
362 }
363 }
364
365 /**
366 * Sets<code>data[i] = value</code> for each i in <code>tiesTrace.</code>
367 *
368 * @param data array to modify
369 * @param tiesTrace list of index values to set
370 * @param value value to set
371 */
372 private void fill(double[] data, List<Integer> tiesTrace, double value) {
373 Iterator<Integer> iterator = tiesTrace.iterator();
374 while (iterator.hasNext()) {
375 data[iterator.next()] = value;
376 }
377 }
378
379 /**
380 * Set <code>ranks[i] = Double.NaN</code> for each i in <code>nanPositions.</code>
381 *
382 * @param ranks array to modify
383 * @param nanPositions list of index values to set to <code>Double.NaN</code>
384 */
385 private void restoreNaNs(double[] ranks, List<Integer> nanPositions) {
386 if (nanPositions.size() == 0) {
387 return;
388 }
389 Iterator<Integer> iterator = nanPositions.iterator();
390 while (iterator.hasNext()) {
391 ranks[iterator.next().intValue()] = Double.NaN;
392 }
393
394 }
395
396 /**
397 * Returns a list of indexes where <code>ranks</code> is <code>NaN.</code>
398 *
399 * @param ranks array to search for <code>NaNs</code>
400 * @return list of indexes i such that <code>ranks[i] = NaN</code>
401 */
402 private List<Integer> getNanPositions(IntDoublePair[] ranks) {
403 ArrayList<Integer> out = new ArrayList<Integer>();
404 for (int i = 0; i < ranks.length; i++) {
405 if (Double.isNaN(ranks[i].getValue())) {
406 out.add(Integer.valueOf(i));
407 }
408 }
409 return out;
410 }
411
412 /**
413 * Represents the position of a double value in an ordering.
414 * Comparable interface is implemented so Arrays.sort can be used
415 * to sort an array of IntDoublePairs by value. Note that the
416 * implicitly defined natural ordering is NOT consistent with equals.
417 */
418 private static class IntDoublePair implements Comparable<IntDoublePair> {
419
420 /** Value of the pair */
421 private final double value;
422
423 /** Original position of the pair */
424 private final int position;
425
426 /**
427 * Construct an IntDoublePair with the given value and position.
428 * @param value the value of the pair
429 * @param position the original position
430 */
431 public IntDoublePair(double value, int position) {
432 this.value = value;
433 this.position = position;
434 }
435
436 /**
437 * Compare this IntDoublePair to another pair.
438 * Only the <strong>values</strong> are compared.
439 *
440 * @param other the other pair to compare this to
441 * @return result of <code>Double.compare(value, other.value)</code>
442 */
443 public int compareTo(IntDoublePair other) {
444 return Double.compare(value, other.value);
445 }
446
447 /**
448 * Returns the value of the pair.
449 * @return value
450 */
451 public double getValue() {
452 return value;
453 }
454
455 /**
456 * Returns the original position of the pair.
457 * @return position
458 */
459 public int getPosition() {
460 return position;
461 }
462 }
463 }