001/*
002 * Copyright (C) 2014 The Guava Authors
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
005 * in compliance with the License. You may obtain a copy of the License at
006 *
007 * http://www.apache.org/licenses/LICENSE-2.0
008 *
009 * Unless required by applicable law or agreed to in writing, software distributed under the License
010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
011 * or implied. See the License for the specific language governing permissions and limitations under
012 * the License.
013 */
014
015package com.google.common.math;
016
017import static com.google.common.base.Preconditions.checkArgument;
018import static java.lang.Double.NEGATIVE_INFINITY;
019import static java.lang.Double.NaN;
020import static java.lang.Double.POSITIVE_INFINITY;
021import static java.util.Arrays.sort;
022import static java.util.Collections.unmodifiableMap;
023
024import com.google.common.annotations.GwtIncompatible;
025import com.google.common.annotations.J2ktIncompatible;
026import com.google.common.primitives.Doubles;
027import com.google.common.primitives.Ints;
028import java.math.RoundingMode;
029import java.util.Collection;
030import java.util.LinkedHashMap;
031import java.util.Map;
032
033/**
034 * Provides a fluent API for calculating <a
035 * href="http://en.wikipedia.org/wiki/Quantile">quantiles</a>.
036 *
037 * <h3>Examples</h3>
038 *
039 * <p>To compute the median:
040 *
041 * <pre>{@code
042 * double myMedian = median().compute(myDataset);
043 * }</pre>
044 *
045 * where {@link #median()} has been statically imported.
046 *
047 * <p>To compute the 99th percentile:
048 *
049 * <pre>{@code
050 * double myPercentile99 = percentiles().index(99).compute(myDataset);
051 * }</pre>
052 *
053 * where {@link #percentiles()} has been statically imported.
054 *
055 * <p>To compute median and the 90th and 99th percentiles:
056 *
057 * <pre>{@code
058 * Map<Integer, Double> myPercentiles =
059 *     percentiles().indexes(50, 90, 99).compute(myDataset);
060 * }</pre>
061 *
062 * where {@link #percentiles()} has been statically imported: {@code myPercentiles} maps the keys
063 * 50, 90, and 99, to their corresponding quantile values.
064 *
065 * <p>To compute quartiles, use {@link #quartiles()} instead of {@link #percentiles()}. To compute
066 * arbitrary q-quantiles, use {@link #scale scale(q)}.
067 *
068 * <p>These examples all take a copy of your dataset. If you have a double array, you are okay with
069 * it being arbitrarily reordered, and you want to avoid that copy, you can use {@code
070 * computeInPlace} instead of {@code compute}.
071 *
072 * <h3>Definition and notes on interpolation</h3>
073 *
074 * <p>The definition of the kth q-quantile of N values is as follows: define x = k * (N - 1) / q; if
075 * x is an integer, the result is the value which would appear at index x in the sorted dataset
076 * (unless there are {@link Double#NaN NaN} values, see below); otherwise, the result is the average
077 * of the values which would appear at the indexes floor(x) and ceil(x) weighted by (1-frac(x)) and
078 * frac(x) respectively. This is the same definition as used by Excel and by S, it is the Type 7
079 * definition in <a
080 * href="http://stat.ethz.ch/R-manual/R-devel/library/stats/html/quantile.html">R</a>, and it is
081 * described by <a
082 * href="http://en.wikipedia.org/wiki/Quantile#Estimating_the_quantiles_of_a_population">
083 * wikipedia</a> as providing "Linear interpolation of the modes for the order statistics for the
084 * uniform distribution on [0,1]."
085 *
086 * <h3>Handling of non-finite values</h3>
087 *
088 * <p>If any values in the input are {@link Double#NaN NaN} then all values returned are {@link
089 * Double#NaN NaN}. (This is the one occasion when the behaviour is not the same as you'd get from
090 * sorting with {@link java.util.Arrays#sort(double[]) Arrays.sort(double[])} or {@link
091 * java.util.Collections#sort(java.util.List) Collections.sort(List&lt;Double&gt;)} and selecting
092 * the required value(s). Those methods would sort {@link Double#NaN NaN} as if it is greater than
093 * any other value and place them at the end of the dataset, even after {@link
094 * Double#POSITIVE_INFINITY POSITIVE_INFINITY}.)
095 *
096 * <p>Otherwise, {@link Double#NEGATIVE_INFINITY NEGATIVE_INFINITY} and {@link
097 * Double#POSITIVE_INFINITY POSITIVE_INFINITY} sort to the beginning and the end of the dataset, as
098 * you would expect.
099 *
100 * <p>If required to do a weighted average between an infinity and a finite value, or between an
101 * infinite value and itself, the infinite value is returned. If required to do a weighted average
102 * between {@link Double#NEGATIVE_INFINITY NEGATIVE_INFINITY} and {@link Double#POSITIVE_INFINITY
103 * POSITIVE_INFINITY}, {@link Double#NaN NaN} is returned (note that this will only happen if the
104 * dataset contains no finite values).
105 *
106 * <h3>Performance</h3>
107 *
108 * <p>The average time complexity of the computation is O(N) in the size of the dataset. There is a
109 * worst case time complexity of O(N^2). You are extremely unlikely to hit this quadratic case on
110 * randomly ordered data (the probability decreases faster than exponentially in N), but if you are
111 * passing in unsanitized user data then a malicious user could force it. A light shuffle of the
112 * data using an unpredictable seed should normally be enough to thwart this attack.
113 *
114 * <p>The time taken to compute multiple quantiles on the same dataset using {@link Scale#indexes
115 * indexes} is generally less than the total time taken to compute each of them separately, and
116 * sometimes much less. For example, on a large enough dataset, computing the 90th and 99th
117 * percentiles together takes about 55% as long as computing them separately.
118 *
119 * <p>When calling {@link ScaleAndIndex#compute} (in {@linkplain ScaleAndIndexes#compute either
120 * form}), the memory requirement is 8*N bytes for the copy of the dataset plus an overhead which is
121 * independent of N (but depends on the quantiles being computed). When calling {@link
122 * ScaleAndIndex#computeInPlace computeInPlace} (in {@linkplain ScaleAndIndexes#computeInPlace
123 * either form}), only the overhead is required. The number of object allocations is independent of
124 * N in both cases.
125 *
126 * @author Pete Gillin
127 * @since 20.0
128 */
129@J2ktIncompatible
130@GwtIncompatible
131public final class Quantiles {
132  /**
133   * Constructor for a type that is not meant to be instantiated.
134   *
135   * @deprecated Use the static factory methods of the class. There is no reason to create an
136   *     instance of {@link Quantiles}.
137   */
138  @Deprecated
139  public Quantiles() {}
140
141  /** Specifies the computation of a median (i.e. the 1st 2-quantile). */
142  public static ScaleAndIndex median() {
143    return scale(2).index(1);
144  }
145
146  /** Specifies the computation of quartiles (i.e. 4-quantiles). */
147  public static Scale quartiles() {
148    return scale(4);
149  }
150
151  /** Specifies the computation of percentiles (i.e. 100-quantiles). */
152  public static Scale percentiles() {
153    return scale(100);
154  }
155
156  /**
157   * Specifies the computation of q-quantiles.
158   *
159   * @param scale the scale for the quantiles to be calculated, i.e. the q of the q-quantiles, which
160   *     must be positive
161   */
162  public static Scale scale(int scale) {
163    return new Scale(scale);
164  }
165
166  /**
167   * Describes the point in a fluent API chain where only the scale (i.e. the q in q-quantiles) has
168   * been specified.
169   *
170   * @since 20.0
171   */
172  public static final class Scale {
173
174    private final int scale;
175
176    private Scale(int scale) {
177      checkArgument(scale > 0, "Quantile scale must be positive");
178      this.scale = scale;
179    }
180
181    /**
182     * Specifies a single quantile index to be calculated, i.e. the k in the kth q-quantile.
183     *
184     * @param index the quantile index, which must be in the inclusive range [0, q] for q-quantiles
185     */
186    public ScaleAndIndex index(int index) {
187      return new ScaleAndIndex(scale, index);
188    }
189
190    /**
191     * Specifies multiple quantile indexes to be calculated, each index being the k in the kth
192     * q-quantile.
193     *
194     * @param indexes the quantile indexes, each of which must be in the inclusive range [0, q] for
195     *     q-quantiles; the order of the indexes is unimportant, duplicates will be ignored, and the
196     *     set will be snapshotted when this method is called
197     * @throws IllegalArgumentException if {@code indexes} is empty
198     */
199    public ScaleAndIndexes indexes(int... indexes) {
200      return new ScaleAndIndexes(scale, indexes.clone());
201    }
202
203    /**
204     * Specifies multiple quantile indexes to be calculated, each index being the k in the kth
205     * q-quantile.
206     *
207     * @param indexes the quantile indexes, each of which must be in the inclusive range [0, q] for
208     *     q-quantiles; the order of the indexes is unimportant, duplicates will be ignored, and the
209     *     set will be snapshotted when this method is called
210     * @throws IllegalArgumentException if {@code indexes} is empty
211     */
212    public ScaleAndIndexes indexes(Collection<Integer> indexes) {
213      return new ScaleAndIndexes(scale, Ints.toArray(indexes));
214    }
215  }
216
217  /**
218   * Describes the point in a fluent API chain where the scale and a single quantile index (i.e. the
219   * q and the k in the kth q-quantile) have been specified.
220   *
221   * @since 20.0
222   */
223  public static final class ScaleAndIndex {
224
225    private final int scale;
226    private final int index;
227
228    private ScaleAndIndex(int scale, int index) {
229      checkIndex(index, scale);
230      this.scale = scale;
231      this.index = index;
232    }
233
234    /**
235     * Computes the quantile value of the given dataset.
236     *
237     * @param dataset the dataset to do the calculation on, which must be non-empty, which will be
238     *     cast to doubles (with any associated lost of precision), and which will not be mutated by
239     *     this call (it is copied instead)
240     * @return the quantile value
241     */
242    public double compute(Collection<? extends Number> dataset) {
243      return computeInPlace(Doubles.toArray(dataset));
244    }
245
246    /**
247     * Computes the quantile value of the given dataset.
248     *
249     * @param dataset the dataset to do the calculation on, which must be non-empty, which will not
250     *     be mutated by this call (it is copied instead)
251     * @return the quantile value
252     */
253    public double compute(double... dataset) {
254      return computeInPlace(dataset.clone());
255    }
256
257    /**
258     * Computes the quantile value of the given dataset.
259     *
260     * @param dataset the dataset to do the calculation on, which must be non-empty, which will be
261     *     cast to doubles (with any associated lost of precision), and which will not be mutated by
262     *     this call (it is copied instead)
263     * @return the quantile value
264     */
265    public double compute(long... dataset) {
266      return computeInPlace(longsToDoubles(dataset));
267    }
268
269    /**
270     * Computes the quantile value of the given dataset.
271     *
272     * @param dataset the dataset to do the calculation on, which must be non-empty, which will be
273     *     cast to doubles, and which will not be mutated by this call (it is copied instead)
274     * @return the quantile value
275     */
276    public double compute(int... dataset) {
277      return computeInPlace(intsToDoubles(dataset));
278    }
279
280    /**
281     * Computes the quantile value of the given dataset, performing the computation in-place.
282     *
283     * @param dataset the dataset to do the calculation on, which must be non-empty, and which will
284     *     be arbitrarily reordered by this method call
285     * @return the quantile value
286     */
287    public double computeInPlace(double... dataset) {
288      checkArgument(dataset.length > 0, "Cannot calculate quantiles of an empty dataset");
289      if (containsNaN(dataset)) {
290        return NaN;
291      }
292
293      // Calculate the quotient and remainder in the integer division x = k * (N-1) / q, i.e.
294      // index * (dataset.length - 1) / scale. If there is no remainder, we can just find the value
295      // whose index in the sorted dataset equals the quotient; if there is a remainder, we
296      // interpolate between that and the next value.
297
298      // Since index and (dataset.length - 1) are non-negative ints, their product can be expressed
299      // as a long, without risk of overflow:
300      long numerator = (long) index * (dataset.length - 1);
301      // Since scale is a positive int, index is in [0, scale], and (dataset.length - 1) is a
302      // non-negative int, we can do long-arithmetic on index * (dataset.length - 1) / scale to get
303      // a rounded ratio and a remainder which can be expressed as ints, without risk of overflow:
304      int quotient = (int) LongMath.divide(numerator, scale, RoundingMode.DOWN);
305      int remainder = (int) (numerator - (long) quotient * scale);
306      selectInPlace(quotient, dataset, 0, dataset.length - 1);
307      if (remainder == 0) {
308        return dataset[quotient];
309      } else {
310        selectInPlace(quotient + 1, dataset, quotient + 1, dataset.length - 1);
311        return interpolate(dataset[quotient], dataset[quotient + 1], remainder, scale);
312      }
313    }
314  }
315
316  /**
317   * Describes the point in a fluent API chain where the scale and a multiple quantile indexes (i.e.
318   * the q and a set of values for the k in the kth q-quantile) have been specified.
319   *
320   * @since 20.0
321   */
322  public static final class ScaleAndIndexes {
323
324    private final int scale;
325    private final int[] indexes;
326
327    private ScaleAndIndexes(int scale, int[] indexes) {
328      for (int index : indexes) {
329        checkIndex(index, scale);
330      }
331      checkArgument(indexes.length > 0, "Indexes must be a non empty array");
332      this.scale = scale;
333      this.indexes = indexes;
334    }
335
336    /**
337     * Computes the quantile values of the given dataset.
338     *
339     * @param dataset the dataset to do the calculation on, which must be non-empty, which will be
340     *     cast to doubles (with any associated lost of precision), and which will not be mutated by
341     *     this call (it is copied instead)
342     * @return an unmodifiable, ordered map of results: the keys will be the specified quantile
343     *     indexes, and the values the corresponding quantile values. When iterating, entries in the
344     *     map are ordered by quantile index in the same order they were passed to the {@code
345     *     indexes} method.
346     */
347    public Map<Integer, Double> compute(Collection<? extends Number> dataset) {
348      return computeInPlace(Doubles.toArray(dataset));
349    }
350
351    /**
352     * Computes the quantile values of the given dataset.
353     *
354     * @param dataset the dataset to do the calculation on, which must be non-empty, which will not
355     *     be mutated by this call (it is copied instead)
356     * @return an unmodifiable, ordered map of results: the keys will be the specified quantile
357     *     indexes, and the values the corresponding quantile values. When iterating, entries in the
358     *     map are ordered by quantile index in the same order they were passed to the {@code
359     *     indexes} method.
360     */
361    public Map<Integer, Double> compute(double... dataset) {
362      return computeInPlace(dataset.clone());
363    }
364
365    /**
366     * Computes the quantile values of the given dataset.
367     *
368     * @param dataset the dataset to do the calculation on, which must be non-empty, which will be
369     *     cast to doubles (with any associated lost of precision), and which will not be mutated by
370     *     this call (it is copied instead)
371     * @return an unmodifiable, ordered map of results: the keys will be the specified quantile
372     *     indexes, and the values the corresponding quantile values. When iterating, entries in the
373     *     map are ordered by quantile index in the same order they were passed to the {@code
374     *     indexes} method.
375     */
376    public Map<Integer, Double> compute(long... dataset) {
377      return computeInPlace(longsToDoubles(dataset));
378    }
379
380    /**
381     * Computes the quantile values of the given dataset.
382     *
383     * @param dataset the dataset to do the calculation on, which must be non-empty, which will be
384     *     cast to doubles, and which will not be mutated by this call (it is copied instead)
385     * @return an unmodifiable, ordered map of results: the keys will be the specified quantile
386     *     indexes, and the values the corresponding quantile values. When iterating, entries in the
387     *     map are ordered by quantile index in the same order they were passed to the {@code
388     *     indexes} method.
389     */
390    public Map<Integer, Double> compute(int... dataset) {
391      return computeInPlace(intsToDoubles(dataset));
392    }
393
394    /**
395     * Computes the quantile values of the given dataset, performing the computation in-place.
396     *
397     * @param dataset the dataset to do the calculation on, which must be non-empty, and which will
398     *     be arbitrarily reordered by this method call
399     * @return an unmodifiable, ordered map of results: the keys will be the specified quantile
400     *     indexes, and the values the corresponding quantile values. When iterating, entries in the
401     *     map are ordered by quantile index in the same order that the indexes were passed to the
402     *     {@code indexes} method.
403     */
404    public Map<Integer, Double> computeInPlace(double... dataset) {
405      checkArgument(dataset.length > 0, "Cannot calculate quantiles of an empty dataset");
406      if (containsNaN(dataset)) {
407        Map<Integer, Double> nanMap = new LinkedHashMap<>();
408        for (int index : indexes) {
409          nanMap.put(index, NaN);
410        }
411        return unmodifiableMap(nanMap);
412      }
413
414      // Calculate the quotients and remainders in the integer division x = k * (N - 1) / q, i.e.
415      // index * (dataset.length - 1) / scale for each index in indexes. For each, if there is no
416      // remainder, we can just select the value whose index in the sorted dataset equals the
417      // quotient; if there is a remainder, we interpolate between that and the next value.
418
419      int[] quotients = new int[indexes.length];
420      int[] remainders = new int[indexes.length];
421      // The indexes to select. In the worst case, we'll need one each side of each quantile.
422      int[] requiredSelections = new int[indexes.length * 2];
423      int requiredSelectionsCount = 0;
424      for (int i = 0; i < indexes.length; i++) {
425        // Since index and (dataset.length - 1) are non-negative ints, their product can be
426        // expressed as a long, without risk of overflow:
427        long numerator = (long) indexes[i] * (dataset.length - 1);
428        // Since scale is a positive int, index is in [0, scale], and (dataset.length - 1) is a
429        // non-negative int, we can do long-arithmetic on index * (dataset.length - 1) / scale to
430        // get a rounded ratio and a remainder which can be expressed as ints, without risk of
431        // overflow:
432        int quotient = (int) LongMath.divide(numerator, scale, RoundingMode.DOWN);
433        int remainder = (int) (numerator - (long) quotient * scale);
434        quotients[i] = quotient;
435        remainders[i] = remainder;
436        requiredSelections[requiredSelectionsCount] = quotient;
437        requiredSelectionsCount++;
438        if (remainder != 0) {
439          requiredSelections[requiredSelectionsCount] = quotient + 1;
440          requiredSelectionsCount++;
441        }
442      }
443      sort(requiredSelections, 0, requiredSelectionsCount);
444      selectAllInPlace(
445          requiredSelections, 0, requiredSelectionsCount - 1, dataset, 0, dataset.length - 1);
446      Map<Integer, Double> ret = new LinkedHashMap<>();
447      for (int i = 0; i < indexes.length; i++) {
448        int quotient = quotients[i];
449        int remainder = remainders[i];
450        if (remainder == 0) {
451          ret.put(indexes[i], dataset[quotient]);
452        } else {
453          ret.put(
454              indexes[i], interpolate(dataset[quotient], dataset[quotient + 1], remainder, scale));
455        }
456      }
457      return unmodifiableMap(ret);
458    }
459  }
460
461  /** Returns whether any of the values in {@code dataset} are {@code NaN}. */
462  private static boolean containsNaN(double... dataset) {
463    for (double value : dataset) {
464      if (Double.isNaN(value)) {
465        return true;
466      }
467    }
468    return false;
469  }
470
471  /**
472   * Returns a value a fraction {@code (remainder / scale)} of the way between {@code lower} and
473   * {@code upper}. Assumes that {@code lower <= upper}. Correctly handles infinities (but not
474   * {@code NaN}).
475   */
476  private static double interpolate(double lower, double upper, double remainder, double scale) {
477    if (lower == NEGATIVE_INFINITY) {
478      if (upper == POSITIVE_INFINITY) {
479        // Return NaN when lower == NEGATIVE_INFINITY and upper == POSITIVE_INFINITY:
480        return NaN;
481      }
482      // Return NEGATIVE_INFINITY when NEGATIVE_INFINITY == lower <= upper < POSITIVE_INFINITY:
483      return NEGATIVE_INFINITY;
484    }
485    if (upper == POSITIVE_INFINITY) {
486      // Return POSITIVE_INFINITY when NEGATIVE_INFINITY < lower <= upper == POSITIVE_INFINITY:
487      return POSITIVE_INFINITY;
488    }
489    return lower + (upper - lower) * remainder / scale;
490  }
491
492  private static void checkIndex(int index, int scale) {
493    if (index < 0 || index > scale) {
494      throw new IllegalArgumentException(
495          "Quantile indexes must be between 0 and the scale, which is " + scale);
496    }
497  }
498
499  private static double[] longsToDoubles(long[] longs) {
500    int len = longs.length;
501    double[] doubles = new double[len];
502    for (int i = 0; i < len; i++) {
503      doubles[i] = longs[i];
504    }
505    return doubles;
506  }
507
508  private static double[] intsToDoubles(int[] ints) {
509    int len = ints.length;
510    double[] doubles = new double[len];
511    for (int i = 0; i < len; i++) {
512      doubles[i] = ints[i];
513    }
514    return doubles;
515  }
516
517  /**
518   * Performs an in-place selection to find the element which would appear at a given index in a
519   * dataset if it were sorted. The following preconditions should hold:
520   *
521   * <ul>
522   *   <li>{@code required}, {@code from}, and {@code to} should all be indexes into {@code array};
523   *   <li>{@code required} should be in the range [{@code from}, {@code to}];
524   *   <li>all the values with indexes in the range [0, {@code from}) should be less than or equal
525   *       to all the values with indexes in the range [{@code from}, {@code to}];
526   *   <li>all the values with indexes in the range ({@code to}, {@code array.length - 1}] should be
527   *       greater than or equal to all the values with indexes in the range [{@code from}, {@code
528   *       to}].
529   * </ul>
530   *
531   * This method will reorder the values with indexes in the range [{@code from}, {@code to}] such
532   * that all the values with indexes in the range [{@code from}, {@code required}) are less than or
533   * equal to the value with index {@code required}, and all the values with indexes in the range
534   * ({@code required}, {@code to}] are greater than or equal to that value. Therefore, the value at
535   * {@code required} is the value which would appear at that index in the sorted dataset.
536   */
537  private static void selectInPlace(int required, double[] array, int from, int to) {
538    // If we are looking for the least element in the range, we can just do a linear search for it.
539    // (We will hit this whenever we are doing quantile interpolation: our first selection finds
540    // the lower value, our second one finds the upper value by looking for the next least element.)
541    if (required == from) {
542      int min = from;
543      for (int index = from + 1; index <= to; index++) {
544        if (array[min] > array[index]) {
545          min = index;
546        }
547      }
548      if (min != from) {
549        swap(array, min, from);
550      }
551      return;
552    }
553
554    // Let's play quickselect! We'll repeatedly partition the range [from, to] containing the
555    // required element, as long as it has more than one element.
556    while (to > from) {
557      int partitionPoint = partition(array, from, to);
558      if (partitionPoint >= required) {
559        to = partitionPoint - 1;
560      }
561      if (partitionPoint <= required) {
562        from = partitionPoint + 1;
563      }
564    }
565  }
566
567  /**
568   * Performs a partition operation on the slice of {@code array} with elements in the range [{@code
569   * from}, {@code to}]. Uses the median of {@code from}, {@code to}, and the midpoint between them
570   * as a pivot. Returns the index which the slice is partitioned around, i.e. if it returns {@code
571   * ret} then we know that the values with indexes in [{@code from}, {@code ret}) are less than or
572   * equal to the value at {@code ret} and the values with indexes in ({@code ret}, {@code to}] are
573   * greater than or equal to that.
574   */
575  private static int partition(double[] array, int from, int to) {
576    // Select a pivot, and move it to the start of the slice i.e. to index from.
577    movePivotToStartOfSlice(array, from, to);
578    double pivot = array[from];
579
580    // Move all elements with indexes in (from, to] which are greater than the pivot to the end of
581    // the array. Keep track of where those elements begin.
582    int partitionPoint = to;
583    for (int i = to; i > from; i--) {
584      if (array[i] > pivot) {
585        swap(array, partitionPoint, i);
586        partitionPoint--;
587      }
588    }
589
590    // We now know that all elements with indexes in (from, partitionPoint] are less than or equal
591    // to the pivot at from, and all elements with indexes in (partitionPoint, to] are greater than
592    // it. We swap the pivot into partitionPoint and we know the array is partitioned around that.
593    swap(array, from, partitionPoint);
594    return partitionPoint;
595  }
596
597  /**
598   * Selects the pivot to use, namely the median of the values at {@code from}, {@code to}, and
599   * halfway between the two (rounded down), from {@code array}, and ensure (by swapping elements if
600   * necessary) that that pivot value appears at the start of the slice i.e. at {@code from}.
601   * Expects that {@code from} is strictly less than {@code to}.
602   */
603  private static void movePivotToStartOfSlice(double[] array, int from, int to) {
604    int mid = (from + to) >>> 1;
605    // We want to make a swap such that either array[to] <= array[from] <= array[mid], or
606    // array[mid] <= array[from] <= array[to]. We know that from < to, so we know mid < to
607    // (although it's possible that mid == from, if to == from + 1). Note that the postcondition
608    // would be impossible to fulfil if mid == to unless we also have array[from] == array[to].
609    boolean toLessThanMid = (array[to] < array[mid]);
610    boolean midLessThanFrom = (array[mid] < array[from]);
611    boolean toLessThanFrom = (array[to] < array[from]);
612    if (toLessThanMid == midLessThanFrom) {
613      // Either array[to] < array[mid] < array[from] or array[from] <= array[mid] <= array[to].
614      swap(array, mid, from);
615    } else if (toLessThanMid != toLessThanFrom) {
616      // Either array[from] <= array[to] < array[mid] or array[mid] <= array[to] < array[from].
617      swap(array, from, to);
618    }
619    // The postcondition now holds. So the median, our chosen pivot, is at from.
620  }
621
622  /**
623   * Performs an in-place selection, like {@link #selectInPlace}, to select all the indexes {@code
624   * allRequired[i]} for {@code i} in the range [{@code requiredFrom}, {@code requiredTo}]. These
625   * indexes must be sorted in the array and must all be in the range [{@code from}, {@code to}].
626   */
627  private static void selectAllInPlace(
628      int[] allRequired, int requiredFrom, int requiredTo, double[] array, int from, int to) {
629    // Choose the first selection to do...
630    int requiredChosen = chooseNextSelection(allRequired, requiredFrom, requiredTo, from, to);
631    int required = allRequired[requiredChosen];
632
633    // ...do the first selection...
634    selectInPlace(required, array, from, to);
635
636    // ...then recursively perform the selections in the range below...
637    int requiredBelow = requiredChosen - 1;
638    while (requiredBelow >= requiredFrom && allRequired[requiredBelow] == required) {
639      requiredBelow--; // skip duplicates of required in the range below
640    }
641    if (requiredBelow >= requiredFrom) {
642      selectAllInPlace(allRequired, requiredFrom, requiredBelow, array, from, required - 1);
643    }
644
645    // ...and then recursively perform the selections in the range above.
646    int requiredAbove = requiredChosen + 1;
647    while (requiredAbove <= requiredTo && allRequired[requiredAbove] == required) {
648      requiredAbove++; // skip duplicates of required in the range above
649    }
650    if (requiredAbove <= requiredTo) {
651      selectAllInPlace(allRequired, requiredAbove, requiredTo, array, required + 1, to);
652    }
653  }
654
655  /**
656   * Chooses the next selection to do from the required selections. It is required that the array
657   * {@code allRequired} is sorted and that {@code allRequired[i]} are in the range [{@code from},
658   * {@code to}] for all {@code i} in the range [{@code requiredFrom}, {@code requiredTo}]. The
659   * value returned by this method is the {@code i} in that range such that {@code allRequired[i]}
660   * is as close as possible to the center of the range [{@code from}, {@code to}]. Choosing the
661   * value closest to the center of the range first is the most efficient strategy because it
662   * minimizes the size of the subranges from which the remaining selections must be done.
663   */
664  private static int chooseNextSelection(
665      int[] allRequired, int requiredFrom, int requiredTo, int from, int to) {
666    if (requiredFrom == requiredTo) {
667      return requiredFrom; // only one thing to choose, so choose it
668    }
669
670    // Find the center and round down. The true center is either centerFloor or halfway between
671    // centerFloor and centerFloor + 1.
672    int centerFloor = (from + to) >>> 1;
673
674    // Do a binary search until we're down to the range of two which encloses centerFloor (unless
675    // all values are lower or higher than centerFloor, in which case we find the two highest or
676    // lowest respectively). If centerFloor is in allRequired, we will definitely find it. If not,
677    // but centerFloor + 1 is, we'll definitely find that. The closest value to the true (unrounded)
678    // center will be at either low or high.
679    int low = requiredFrom;
680    int high = requiredTo;
681    while (high > low + 1) {
682      int mid = (low + high) >>> 1;
683      if (allRequired[mid] > centerFloor) {
684        high = mid;
685      } else if (allRequired[mid] < centerFloor) {
686        low = mid;
687      } else {
688        return mid; // allRequired[mid] = centerFloor, so we can't get closer than that
689      }
690    }
691
692    // Now pick the closest of the two candidates. Note that there is no rounding here.
693    if (from + to - allRequired[low] - allRequired[high] > 0) {
694      return high;
695    } else {
696      return low;
697    }
698  }
699
700  /** Swaps the values at {@code i} and {@code j} in {@code array}. */
701  private static void swap(double[] array, int i, int j) {
702    double temp = array[i];
703    array[i] = array[j];
704    array[j] = temp;
705  }
706}