001/*
002 * Copyright (C) 2014 The Guava Authors
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
005 * in compliance with the License. You may obtain a copy of the License at
006 *
007 * http://www.apache.org/licenses/LICENSE-2.0
008 *
009 * Unless required by applicable law or agreed to in writing, software distributed under the License
010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
011 * or implied. See the License for the specific language governing permissions and limitations under
012 * the License.
013 */
014
015package com.google.common.math;
016
017import static com.google.common.base.Preconditions.checkArgument;
018import static java.lang.Double.NEGATIVE_INFINITY;
019import static java.lang.Double.NaN;
020import static java.lang.Double.POSITIVE_INFINITY;
021import static java.util.Arrays.sort;
022import static java.util.Collections.unmodifiableMap;
023
024import com.google.common.annotations.Beta;
025import com.google.common.annotations.GwtIncompatible;
026import com.google.common.primitives.Doubles;
027import com.google.common.primitives.Ints;
028import java.math.RoundingMode;
029import java.util.Collection;
030import java.util.HashMap;
031import java.util.Map;
032
033/**
034 * Provides a fluent API for calculating
035 * <a href="http://en.wikipedia.org/wiki/Quantile">quantiles</a>.
036 *
037 * <h3>Examples</h3>
038 *
039 * <p>To compute the median:
040 * <pre>   {@code
041 *
042 *   double myMedian = median().compute(myDataset);}</pre>
043 *
044 * where {@link #median()} has been statically imported.
045 *
046 * <p>To compute the 99th percentile:
047 * <pre>   {@code
048 *
049 *   double myPercentile99 = percentiles().index(99).compute(myDataset);}</pre>
050 *
051 * where {@link #percentiles()} has been statically imported.
052 *
053 * <p>To compute median and the 90th and 99th percentiles:
054 * <pre>   {@code
055 *
056 *   Map<Integer, Double> myPercentiles =
057 *       percentiles().indexes(50, 90, 99).compute(myDataset);}</pre>
058 *
059 * where {@link #percentiles()} has been statically imported: {@code myPercentiles} maps the keys
060 * 50, 90, and 99, to their corresponding quantile values.
061 *
062 * <p>To compute quartiles, use {@link #quartiles()} instead of {@link #percentiles()}. To compute
063 * arbitrary q-quantiles, use {@link #scale scale(q)}.
064 *
065 * <p>These examples all take a copy of your dataset. If you have a double array, you are okay with
066 * it being arbitrarily reordered, and you want to avoid that copy, you can use
067 * {@code computeInPlace} instead of {@code compute}.
068 *
069 * <h3>Definition and notes on interpolation</h3>
070 *
071 * <p>The definition of the kth q-quantile of N values is as follows: define x = k * (N - 1) / q; if
072 * x is an integer, the result is the value which would appear at index x in the sorted dataset
073 * (unless there are {@link Double#NaN NaN} values, see below); otherwise, the result is the average
074 * of the values which would appear at the indexes floor(x) and ceil(x) weighted by (1-frac(x)) and
075 * frac(x) respectively. This is the same definition as used by Excel and by S, it is the Type 7
076 * definition in
077 * <a href="http://stat.ethz.ch/R-manual/R-devel/library/stats/html/quantile.html">R</a>, and it is
078 * described by
079 * <a href="http://en.wikipedia.org/wiki/Quantile#Estimating_the_quantiles_of_a_population">
080 * wikipedia</a> as providing "Linear interpolation of the modes for the order statistics for the
081 * uniform distribution on [0,1]."
082 *
083 * <h3>Handling of non-finite values</h3>
084 *
085 * <p>If any values in the input are {@link Double#NaN NaN} then all values returned are
086 * {@link Double#NaN NaN}. (This is the one occasion when the behaviour is not the same as you'd get
087 * from sorting with {@link java.util.Arrays#sort(double[]) Arrays.sort(double[])} or
088 * {@link java.util.Collections#sort(java.util.List) Collections.sort(List&lt;Double&gt;)} and
089 * selecting the required value(s). Those methods would sort {@link Double#NaN NaN} as if it is
090 * greater than any other value and place them at the end of the dataset, even after
091 * {@link Double#POSITIVE_INFINITY POSITIVE_INFINITY}.)
092 *
093 * <p>Otherwise, {@link Double#NEGATIVE_INFINITY NEGATIVE_INFINITY} and
094 * {@link Double#POSITIVE_INFINITY POSITIVE_INFINITY} sort to the beginning and the end of the
095 * dataset, as you would expect.
096 *
097 * <p>If required to do a weighted average between an infinity and a finite value, or between an
098 * infinite value and itself, the infinite value is returned. If required to do a weighted average
099 * between {@link Double#NEGATIVE_INFINITY NEGATIVE_INFINITY} and {@link Double#POSITIVE_INFINITY
100 * POSITIVE_INFINITY}, {@link Double#NaN NaN} is returned (note that this will only happen if the
101 * dataset contains no finite values).
102 *
103 * <h3>Performance</h3>
104 *
105 * <p>The average time complexity of the computation is O(N) in the size of the dataset. There is a
106 * worst case time complexity of O(N^2). You are extremely unlikely to hit this quadratic case on
107 * randomly ordered data (the probability decreases faster than exponentially in N), but if you are
108 * passing in unsanitized user data then a malicious user could force it. A light shuffle of the
109 * data using an unpredictable seed should normally be enough to thwart this attack.
110 *
111 * <p>The time taken to compute multiple quantiles on the same dataset using {@link Scale#indexes
112 * indexes} is generally less than the total time taken to compute each of them separately, and
113 * sometimes much less. For example, on a large enough dataset, computing the 90th and 99th
114 * percentiles together takes about 55% as long as computing them separately.
115 *
116 * <p>When calling {@link ScaleAndIndex#compute} (in {@linkplain ScaleAndIndexes#compute either
117 * form}), the memory requirement is 8*N bytes for the copy of the dataset plus an overhead which is
118 * independent of N (but depends on the quantiles being computed). When calling
119 * {@link ScaleAndIndex#computeInPlace computeInPlace} (in
120 * {@linkplain ScaleAndIndexes#computeInPlace either form}), only the overhead is required. The
121 * number of object allocations is independent of N in both cases.
122 *
123 * @author Pete Gillin
124 * @since 20.0
125 */
126@Beta
127@GwtIncompatible
128public final class Quantiles {
129
130  /**
131   * Specifies the computation of a median (i.e. the 1st 2-quantile).
132   */
133  public static ScaleAndIndex median() {
134    return scale(2).index(1);
135  }
136
137  /**
138   * Specifies the computation of quartiles (i.e. 4-quantiles).
139   */
140  public static Scale quartiles() {
141    return scale(4);
142  }
143
144  /**
145   * Specifies the computation of percentiles (i.e. 100-quantiles).
146   */
147  public static Scale percentiles() {
148    return scale(100);
149  }
150
151  /**
152   * Specifies the computation of q-quantiles.
153   *
154   * @param scale the scale for the quantiles to be calculated, i.e. the q of the q-quantiles, which
155   *     must be positive
156   */
157  public static Scale scale(int scale) {
158    return new Scale(scale);
159  }
160
161  /**
162   * Describes the point in a fluent API chain where only the scale (i.e. the q in q-quantiles) has
163   * been specified.
164   */
165  public static final class Scale {
166
167    private final int scale;
168
169    private Scale(int scale) {
170      checkArgument(scale > 0, "Quantile scale must be positive");
171      this.scale = scale;
172    }
173
174    /**
175     * Specifies a single quantile index to be calculated, i.e. the k in the kth q-quantile.
176     *
177     * @param index the quantile index, which must be in the inclusive range [0, q] for q-quantiles
178     */
179    public ScaleAndIndex index(int index) {
180      return new ScaleAndIndex(scale, index);
181    }
182
183    /**
184     * Specifies multiple quantile indexes to be calculated, each index being the k in the kth
185     * q-quantile.
186     *
187     * @param indexes the quantile indexes, each of which must be in the inclusive range [0, q] for
188     *     q-quantiles; the order of the indexes is unimportant, duplicates will be ignored, and the
189     *     set will be snapshotted when this method is called
190     */
191    public ScaleAndIndexes indexes(int... indexes) {
192      return new ScaleAndIndexes(scale, indexes.clone());
193    }
194
195    /**
196     * Specifies multiple quantile indexes to be calculated, each index being the k in the kth
197     * q-quantile.
198     *
199     * @param indexes the quantile indexes, each of which must be in the inclusive range [0, q] for
200     *     q-quantiles; the order of the indexes is unimportant, duplicates will be ignored, and the
201     *     set will be snapshotted when this method is called
202     */
203    public ScaleAndIndexes indexes(Collection<Integer> indexes) {
204      return new ScaleAndIndexes(scale, Ints.toArray(indexes));
205    }
206  }
207
208  /**
209   * Describes the point in a fluent API chain where the scale and a single quantile index (i.e. the
210   * q and the k in the kth q-quantile) have been specified.
211   */
212  public static final class ScaleAndIndex {
213
214    private final int scale;
215    private final int index;
216
217    private ScaleAndIndex(int scale, int index) {
218      checkIndex(index, scale);
219      this.scale = scale;
220      this.index = index;
221    }
222
223    /**
224     * Computes the quantile value of the given dataset.
225     *
226     * @param dataset the dataset to do the calculation on, which must be non-empty, which will be
227     *     cast to doubles (with any associated lost of precision), and which will not be mutated by
228     *     this call (it is copied instead)
229     * @return the quantile value
230     */
231    public double compute(Collection<? extends Number> dataset) {
232      return computeInPlace(Doubles.toArray(dataset));
233    }
234
235    /**
236     * Computes the quantile value of the given dataset.
237     *
238     * @param dataset the dataset to do the calculation on, which must be non-empty, which will not
239     *     be mutated by this call (it is copied instead)
240     * @return the quantile value
241     */
242    public double compute(double... dataset) {
243      return computeInPlace(dataset.clone());
244    }
245
246    /**
247     * Computes the quantile value of the given dataset.
248     *
249     * @param dataset the dataset to do the calculation on, which must be non-empty, which will be
250     *     cast to doubles (with any associated lost of precision), and which will not be mutated by
251     *     this call (it is copied instead)
252     * @return the quantile value
253     */
254    public double compute(long... dataset) {
255      return computeInPlace(longsToDoubles(dataset));
256    }
257
258    /**
259     * Computes the quantile value of the given dataset.
260     *
261     * @param dataset the dataset to do the calculation on, which must be non-empty, which will be
262     *     cast to doubles, and which will not be mutated by this call (it is copied instead)
263     * @return the quantile value
264     */
265    public double compute(int... dataset) {
266      return computeInPlace(intsToDoubles(dataset));
267    }
268
269    /**
270     * Computes the quantile value of the given dataset, performing the computation in-place.
271     *
272     * @param dataset the dataset to do the calculation on, which must be non-empty, and which will
273     *     be arbitrarily reordered by this method call
274     * @return the quantile value
275     */
276    public double computeInPlace(double... dataset) {
277      checkArgument(dataset.length > 0, "Cannot calculate quantiles of an empty dataset");
278      if (containsNaN(dataset)) {
279        return NaN;
280      }
281
282      // Calculate the quotient and remainder in the integer division x = k * (N-1) / q, i.e.
283      // index * (dataset.length - 1) / scale. If there is no remainder, we can just find the value
284      // whose index in the sorted dataset equals the quotient; if there is a remainder, we
285      // interpolate between that and the next value.
286
287      // Since index and (dataset.length - 1) are non-negative ints, their product can be expressed
288      // as a long, without risk of overflow:
289      long numerator = (long) index * (dataset.length - 1);
290      // Since scale is a positive int, index is in [0, scale], and (dataset.length - 1) is a
291      // non-negative int, we can do long-arithmetic on index * (dataset.length - 1) / scale to get
292      // a rounded ratio and a remainder which can be expressed as ints, without risk of overflow:
293      int quotient = (int) LongMath.divide(numerator, scale, RoundingMode.DOWN);
294      int remainder = (int) (numerator - (long) quotient * scale);
295      selectInPlace(quotient, dataset, 0, dataset.length - 1);
296      if (remainder == 0) {
297        return dataset[quotient];
298      } else {
299        selectInPlace(quotient + 1, dataset, quotient + 1, dataset.length - 1);
300        return interpolate(dataset[quotient], dataset[quotient + 1], remainder, scale);
301      }
302    }
303  }
304
305  /**
306   * Describes the point in a fluent API chain where the scale and a multiple quantile indexes (i.e.
307   * the q and a set of values for the k in the kth q-quantile) have been specified.
308   */
309  public static final class ScaleAndIndexes {
310
311    private final int scale;
312    private final int[] indexes;
313
314    private ScaleAndIndexes(int scale, int[] indexes) {
315      for (int index : indexes) {
316        checkIndex(index, scale);
317      }
318      this.scale = scale;
319      this.indexes = indexes;
320    }
321
322    /**
323     * Computes the quantile values of the given dataset.
324     *
325     * @param dataset the dataset to do the calculation on, which must be non-empty, which will be
326     *     cast to doubles (with any associated lost of precision), and which will not be mutated by
327     *     this call (it is copied instead)
328     * @return an unmodifiable map of results: the keys will be the specified quantile indexes, and
329     *     the values the corresponding quantile values
330     */
331    public Map<Integer, Double> compute(Collection<? extends Number> dataset) {
332      return computeInPlace(Doubles.toArray(dataset));
333    }
334
335    /**
336     * Computes the quantile values of the given dataset.
337     *
338     * @param dataset the dataset to do the calculation on, which must be non-empty, which will not
339     *     be mutated by this call (it is copied instead)
340     * @return an unmodifiable map of results: the keys will be the specified quantile indexes, and
341     *     the values the corresponding quantile values
342     */
343    public Map<Integer, Double> compute(double... dataset) {
344      return computeInPlace(dataset.clone());
345    }
346
347    /**
348     * Computes the quantile values of the given dataset.
349     *
350     * @param dataset the dataset to do the calculation on, which must be non-empty, which will be
351     *     cast to doubles (with any associated lost of precision), and which will not be mutated by
352     *     this call (it is copied instead)
353     * @return an unmodifiable map of results: the keys will be the specified quantile indexes, and
354     *     the values the corresponding quantile values
355     */
356    public Map<Integer, Double> compute(long... dataset) {
357      return computeInPlace(longsToDoubles(dataset));
358    }
359
360    /**
361     * Computes the quantile values of the given dataset.
362     *
363     * @param dataset the dataset to do the calculation on, which must be non-empty, which will be
364     *     cast to doubles, and which will not be mutated by this call (it is copied instead)
365     * @return an unmodifiable map of results: the keys will be the specified quantile indexes, and
366     *     the values the corresponding quantile values
367     */
368    public Map<Integer, Double> compute(int... dataset) {
369      return computeInPlace(intsToDoubles(dataset));
370    }
371
372    /**
373     * Computes the quantile values of the given dataset, performing the computation in-place.
374     *
375     * @param dataset the dataset to do the calculation on, which must be non-empty, and which will
376     *     be arbitrarily reordered by this method call
377     * @return an unmodifiable map of results: the keys will be the specified quantile indexes, and
378     *     the values the corresponding quantile values
379     */
380    public Map<Integer, Double> computeInPlace(double... dataset) {
381      checkArgument(dataset.length > 0, "Cannot calculate quantiles of an empty dataset");
382      if (containsNaN(dataset)) {
383        Map<Integer, Double> nanMap = new HashMap<Integer, Double>();
384        for (int index : indexes) {
385          nanMap.put(index, NaN);
386        }
387        return unmodifiableMap(nanMap);
388      }
389
390      // Calculate the quotients and remainders in the integer division x = k * (N - 1) / q, i.e.
391      // index * (dataset.length - 1) / scale for each index in indexes. For each, if there is no
392      // remainder, we can just select the value whose index in the sorted dataset equals the
393      // quotient; if there is a remainder, we interpolate between that and the next value.
394
395      int[] quotients = new int[indexes.length];
396      int[] remainders = new int[indexes.length];
397      // The indexes to select. In the worst case, we'll need one each side of each quantile.
398      int[] requiredSelections = new int[indexes.length * 2];
399      int requiredSelectionsCount = 0;
400      for (int i = 0; i < indexes.length; i++) {
401        // Since index and (dataset.length - 1) are non-negative ints, their product can be
402        // expressed as a long, without risk of overflow:
403        long numerator = (long) indexes[i] * (dataset.length - 1);
404        // Since scale is a positive int, index is in [0, scale], and (dataset.length - 1) is a
405        // non-negative int, we can do long-arithmetic on index * (dataset.length - 1) / scale to
406        // get a rounded ratio and a remainder which can be expressed as ints, without risk of
407        // overflow:
408        int quotient = (int) LongMath.divide(numerator, scale, RoundingMode.DOWN);
409        int remainder = (int) (numerator - (long) quotient * scale);
410        quotients[i] = quotient;
411        remainders[i] = remainder;
412        requiredSelections[requiredSelectionsCount] = quotient;
413        requiredSelectionsCount++;
414        if (remainder != 0) {
415          requiredSelections[requiredSelectionsCount] = quotient + 1;
416          requiredSelectionsCount++;
417        }
418      }
419      sort(requiredSelections, 0, requiredSelectionsCount);
420      selectAllInPlace(
421          requiredSelections, 0, requiredSelectionsCount - 1, dataset, 0, dataset.length - 1);
422      Map<Integer, Double> ret = new HashMap<Integer, Double>();
423      for (int i = 0; i < indexes.length; i++) {
424        int quotient = quotients[i];
425        int remainder = remainders[i];
426        if (remainder == 0) {
427          ret.put(indexes[i], dataset[quotient]);
428        } else {
429          ret.put(
430              indexes[i], interpolate(dataset[quotient], dataset[quotient + 1], remainder, scale));
431        }
432      }
433      return unmodifiableMap(ret);
434    }
435  }
436
437  /**
438   * Returns whether any of the values in {@code dataset} are {@code NaN}.
439   */
440  private static boolean containsNaN(double... dataset) {
441    for (double value : dataset) {
442      if (Double.isNaN(value)) {
443        return true;
444      }
445    }
446    return false;
447  }
448
449  /**
450   * Returns a value a fraction {@code (remainder / scale)} of the way between {@code lower} and
451   * {@code upper}. Assumes that {@code lower <= upper}. Correctly handles infinities (but not
452   * {@code NaN}).
453   */
454  private static double interpolate(double lower, double upper, double remainder, double scale) {
455    if (lower == NEGATIVE_INFINITY) {
456      if (upper == POSITIVE_INFINITY) {
457        // Return NaN when lower == NEGATIVE_INFINITY and upper == POSITIVE_INFINITY:
458        return NaN;
459      }
460      // Return NEGATIVE_INFINITY when NEGATIVE_INFINITY == lower <= upper < POSITIVE_INFINITY:
461      return NEGATIVE_INFINITY;
462    }
463    if (upper == POSITIVE_INFINITY) {
464      // Return POSITIVE_INFINITY when NEGATIVE_INFINITY < lower <= upper == POSITIVE_INFINITY:
465      return POSITIVE_INFINITY;
466    }
467    return lower + (upper - lower) * remainder / scale;
468  }
469
470  private static void checkIndex(int index, int scale) {
471    if (index < 0 || index > scale) {
472      throw new IllegalArgumentException(
473          "Quantile indexes must be between 0 and the scale, which is " + scale);
474    }
475  }
476
477  private static double[] longsToDoubles(long[] longs) {
478    int len = longs.length;
479    double[] doubles = new double[len];
480    for (int i = 0; i < len; i++) {
481      doubles[i] = longs[i];
482    }
483    return doubles;
484  }
485
486  private static double[] intsToDoubles(int[] ints) {
487    int len = ints.length;
488    double[] doubles = new double[len];
489    for (int i = 0; i < len; i++) {
490      doubles[i] = ints[i];
491    }
492    return doubles;
493  }
494
495  /**
496   * Performs an in-place selection to find the element which would appear at a given index in a
497   * dataset if it were sorted. The following preconditions should hold:
498   * <ul>
499   * <li>{@code required}, {@code from}, and {@code to} should all be indexes into {@code array};
500   * <li>{@code required} should be in the range [{@code from}, {@code to}];
501   * <li>all the values with indexes in the range [0, {@code from}) should be less than or equal to
502   * all the values with indexes in the range [{@code from}, {@code to}];
503   * <li>all the values with indexes in the range ({@code to}, {@code array.length - 1}] should be
504   * greater than or equal to all the values with indexes in the range [{@code from}, {@code to}].
505   * </ul>
506   * This method will reorder the values with indexes in the range [{@code from}, {@code to}] such
507   * that all the values with indexes in the range [{@code from}, {@code required}) are less than or
508   * equal to the value with index {@code required}, and all the values with indexes in the range
509   * ({@code required}, {@code to}] are greater than or equal to that value. Therefore, the value at
510   * {@code required} is the value which would appear at that index in the sorted dataset.
511   */
512  private static void selectInPlace(int required, double[] array, int from, int to) {
513    // If we are looking for the least element in the range, we can just do a linear search for it.
514    // (We will hit this whenever we are doing quantile interpolation: our first selection finds
515    // the lower value, our second one finds the upper value by looking for the next least element.)
516    if (required == from) {
517      int min = from;
518      for (int index = from + 1; index <= to; index++) {
519        if (array[min] > array[index]) {
520          min = index;
521        }
522      }
523      if (min != from) {
524        swap(array, min, from);
525      }
526      return;
527    }
528
529    // Let's play quickselect! We'll repeatedly partition the range [from, to] containing the
530    // required element, as long as it has more than one element.
531    while (to > from) {
532      int partitionPoint = partition(array, from, to);
533      if (partitionPoint >= required) {
534        to = partitionPoint - 1;
535      }
536      if (partitionPoint <= required) {
537        from = partitionPoint + 1;
538      }
539    }
540  }
541
542  /**
543   * Performs a partition operation on the slice of {@code array} with elements in the range
544   * [{@code from}, {@code to}]. Uses the median of {@code from}, {@code to}, and the midpoint
545   * between them as a pivot. Returns the index which the slice is partitioned around, i.e. if it
546   * returns {@code ret} then we know that the values with indexes in [{@code from}, {@code ret})
547   * are less than or equal to the value at {@code ret} and the values with indexes in ({@code ret},
548   * {@code to}] are greater than or equal to that.
549   */
550  private static int partition(double[] array, int from, int to) {
551    // Select a pivot, and move it to the start of the slice i.e. to index from.
552    movePivotToStartOfSlice(array, from, to);
553    double pivot = array[from];
554
555    // Move all elements with indexes in (from, to] which are greater than the pivot to the end of
556    // the array. Keep track of where those elements begin.
557    int partitionPoint = to;
558    for (int i = to; i > from; i--) {
559      if (array[i] > pivot) {
560        swap(array, partitionPoint, i);
561        partitionPoint--;
562      }
563    }
564
565    // We now know that all elements with indexes in (from, partitionPoint] are less than or equal
566    // to the pivot at from, and all elements with indexes in (partitionPoint, to] are greater than
567    // it. We swap the pivot into partitionPoint and we know the array is partitioned around that.
568    swap(array, from, partitionPoint);
569    return partitionPoint;
570  }
571
572  /**
573   * Selects the pivot to use, namely the median of the values at {@code from}, {@code to}, and
574   * halfway between the two (rounded down), from {@code array}, and ensure (by swapping elements if
575   * necessary) that that pivot value appears at the start of the slice i.e. at {@code from}.
576   * Expects that {@code from} is strictly less than {@code to}.
577   */
578  private static void movePivotToStartOfSlice(double[] array, int from, int to) {
579    int mid = (from + to) >>> 1;
580    // We want to make a swap such that either array[to] <= array[from] <= array[mid], or
581    // array[mid] <= array[from] <= array[to]. We know that from < to, so we know mid < to
582    // (although it's possible that mid == from, if to == from + 1). Note that the postcondition
583    // would be impossible to fulfil if mid == to unless we also have array[from] == array[to].
584    boolean toLessThanMid = (array[to] < array[mid]);
585    boolean midLessThanFrom = (array[mid] < array[from]);
586    boolean toLessThanFrom = (array[to] < array[from]);
587    if (toLessThanMid == midLessThanFrom) {
588      // Either array[to] < array[mid] < array[from] or array[from] <= array[mid] <= array[to].
589      swap(array, mid, from);
590    } else if (toLessThanMid != toLessThanFrom) {
591      // Either array[from] <= array[to] < array[mid] or array[mid] <= array[to] < array[from].
592      swap(array, from, to);
593    }
594    // The postcondition now holds. So the median, our chosen pivot, is at from.
595  }
596
597  /**
598   * Performs an in-place selection, like {@link #selectInPlace}, to select all the indexes
599   * {@code allRequired[i]} for {@code i} in the range [{@code requiredFrom}, {@code requiredTo}].
600   * These indexes must be sorted in the array and must all be in the range [{@code from},
601   * {@code to}].
602   */
603  private static void selectAllInPlace(
604      int[] allRequired, int requiredFrom, int requiredTo, double[] array, int from, int to) {
605    // Choose the first selection to do...
606    int requiredChosen = chooseNextSelection(allRequired, requiredFrom, requiredTo, from, to);
607    int required = allRequired[requiredChosen];
608
609    // ...do the first selection...
610    selectInPlace(required, array, from, to);
611
612    // ...then recursively perform the selections in the range below...
613    int requiredBelow = requiredChosen - 1;
614    while (requiredBelow >= requiredFrom && allRequired[requiredBelow] == required) {
615      requiredBelow--; // skip duplicates of required in the range below
616    }
617    if (requiredBelow >= requiredFrom) {
618      selectAllInPlace(allRequired, requiredFrom, requiredBelow, array, from, required - 1);
619    }
620
621    // ...and then recursively perform the selections in the range above.
622    int requiredAbove = requiredChosen + 1;
623    while (requiredAbove <= requiredTo && allRequired[requiredAbove] == required) {
624      requiredAbove++; // skip duplicates of required in the range above
625    }
626    if (requiredAbove <= requiredTo) {
627      selectAllInPlace(allRequired, requiredAbove, requiredTo, array, required + 1, to);
628    }
629  }
630
631  /**
632   * Chooses the next selection to do from the required selections. It is required that the array
633   * {@code allRequired} is sorted and that {@code allRequired[i]} are in the range [{@code from},
634   * {@code to}] for all {@code i} in the range [{@code requiredFrom}, {@code requiredTo}]. The
635   * value returned by this method is the {@code i} in that range such that {@code allRequired[i]}
636   * is as close as possible to the center of the range [{@code from}, {@code to}]. Choosing the
637   * value closest to the center of the range first is the most efficient strategy because it
638   * minimizes the size of the subranges from which the remaining selections must be done.
639   */
640  private static int chooseNextSelection(
641      int[] allRequired, int requiredFrom, int requiredTo, int from, int to) {
642    if (requiredFrom == requiredTo) {
643      return requiredFrom; // only one thing to choose, so choose it
644    }
645
646    // Find the center and round down. The true center is either centerFloor or halfway between
647    // centerFloor and centerFloor + 1.
648    int centerFloor = (from + to) >>> 1;
649
650    // Do a binary search until we're down to the range of two which encloses centerFloor (unless
651    // all values are lower or higher than centerFloor, in which case we find the two highest or
652    // lowest respectively). If centerFloor is in allRequired, we will definitely find it. If not,
653    // but centerFloor + 1 is, we'll definitely find that. The closest value to the true (unrounded)
654    // center will be at either low or high.
655    int low = requiredFrom;
656    int high = requiredTo;
657    while (high > low + 1) {
658      int mid = (low + high) >>> 1;
659      if (allRequired[mid] > centerFloor) {
660        high = mid;
661      } else if (allRequired[mid] < centerFloor) {
662        low = mid;
663      } else {
664        return mid; // allRequired[mid] = centerFloor, so we can't get closer than that
665      }
666    }
667
668    // Now pick the closest of the two candidates. Note that there is no rounding here.
669    if (from + to - allRequired[low] - allRequired[high] > 0) {
670      return high;
671    } else {
672      return low;
673    }
674  }
675
676  /**
677   * Swaps the values at {@code i} and {@code j} in {@code array}.
678   */
679  private static void swap(double[] array, int i, int j) {
680    double temp = array[i];
681    array[i] = array[j];
682    array[j] = temp;
683  }
684}