001/*
002 * Copyright (C) 2014 The Guava Authors
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
005 * in compliance with the License. You may obtain a copy of the License at
006 *
007 * http://www.apache.org/licenses/LICENSE-2.0
008 *
009 * Unless required by applicable law or agreed to in writing, software distributed under the License
010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
011 * or implied. See the License for the specific language governing permissions and limitations under
012 * the License.
013 */
014
015package com.google.common.math;
016
017import static com.google.common.base.Preconditions.checkArgument;
018import static java.lang.Double.NEGATIVE_INFINITY;
019import static java.lang.Double.NaN;
020import static java.lang.Double.POSITIVE_INFINITY;
021import static java.util.Arrays.sort;
022import static java.util.Collections.unmodifiableMap;
023
024import com.google.common.annotations.GwtIncompatible;
025import com.google.common.annotations.J2ktIncompatible;
026import com.google.common.primitives.Doubles;
027import com.google.common.primitives.Ints;
028import java.math.RoundingMode;
029import java.util.Collection;
030import java.util.LinkedHashMap;
031import java.util.Map;
032
033/**
034 * Provides a fluent API for calculating <a
035 * href="http://en.wikipedia.org/wiki/Quantile">quantiles</a>.
036 *
037 * <h3>Examples</h3>
038 *
039 * <p>To compute the median:
040 *
041 * <pre>{@code
042 * double myMedian = median().compute(myDataset);
043 * }</pre>
044 *
045 * where {@link #median()} has been statically imported.
046 *
047 * <p>To compute the 99th percentile:
048 *
049 * <pre>{@code
050 * double myPercentile99 = percentiles().index(99).compute(myDataset);
051 * }</pre>
052 *
053 * where {@link #percentiles()} has been statically imported.
054 *
055 * <p>To compute median and the 90th and 99th percentiles:
056 *
057 * <pre>{@code
058 * Map<Integer, Double> myPercentiles =
059 *     percentiles().indexes(50, 90, 99).compute(myDataset);
060 * }</pre>
061 *
062 * where {@link #percentiles()} has been statically imported: {@code myPercentiles} maps the keys
063 * 50, 90, and 99, to their corresponding quantile values.
064 *
065 * <p>To compute quartiles, use {@link #quartiles()} instead of {@link #percentiles()}. To compute
066 * arbitrary q-quantiles, use {@link #scale scale(q)}.
067 *
068 * <p>These examples all take a copy of your dataset. If you have a double array, you are okay with
069 * it being arbitrarily reordered, and you want to avoid that copy, you can use {@code
070 * computeInPlace} instead of {@code compute}.
071 *
072 * <h3>Definition and notes on interpolation</h3>
073 *
074 * <p>The definition of the kth q-quantile of N values is as follows: define x = k * (N - 1) / q; if
075 * x is an integer, the result is the value which would appear at index x in the sorted dataset
076 * (unless there are {@link Double#NaN NaN} values, see below); otherwise, the result is the average
077 * of the values which would appear at the indexes floor(x) and ceil(x) weighted by (1-frac(x)) and
078 * frac(x) respectively. This is the same definition as used by Excel and by S, it is the Type 7
079 * definition in <a
080 * href="http://stat.ethz.ch/R-manual/R-devel/library/stats/html/quantile.html">R</a>, and it is
081 * described by <a
082 * href="http://en.wikipedia.org/wiki/Quantile#Estimating_the_quantiles_of_a_population">
083 * wikipedia</a> as providing "Linear interpolation of the modes for the order statistics for the
084 * uniform distribution on [0,1]."
085 *
086 * <h3>Handling of non-finite values</h3>
087 *
088 * <p>If any values in the input are {@link Double#NaN NaN} then all values returned are {@link
089 * Double#NaN NaN}. (This is the one occasion when the behaviour is not the same as you'd get from
090 * sorting with {@link java.util.Arrays#sort(double[]) Arrays.sort(double[])} or {@link
091 * java.util.Collections#sort(java.util.List) Collections.sort(List&lt;Double&gt;)} and selecting
092 * the required value(s). Those methods would sort {@link Double#NaN NaN} as if it is greater than
093 * any other value and place them at the end of the dataset, even after {@link
094 * Double#POSITIVE_INFINITY POSITIVE_INFINITY}.)
095 *
096 * <p>Otherwise, {@link Double#NEGATIVE_INFINITY NEGATIVE_INFINITY} and {@link
097 * Double#POSITIVE_INFINITY POSITIVE_INFINITY} sort to the beginning and the end of the dataset, as
098 * you would expect.
099 *
100 * <p>If required to do a weighted average between an infinity and a finite value, or between an
101 * infinite value and itself, the infinite value is returned. If required to do a weighted average
102 * between {@link Double#NEGATIVE_INFINITY NEGATIVE_INFINITY} and {@link Double#POSITIVE_INFINITY
103 * POSITIVE_INFINITY}, {@link Double#NaN NaN} is returned (note that this will only happen if the
104 * dataset contains no finite values).
105 *
106 * <h3>Performance</h3>
107 *
108 * <p>The average time complexity of the computation is O(N) in the size of the dataset. There is a
109 * worst case time complexity of O(N^2). You are extremely unlikely to hit this quadratic case on
110 * randomly ordered data (the probability decreases faster than exponentially in N), but if you are
111 * passing in unsanitized user data then a malicious user could force it. A light shuffle of the
112 * data using an unpredictable seed should normally be enough to thwart this attack.
113 *
114 * <p>The time taken to compute multiple quantiles on the same dataset using {@link Scale#indexes
115 * indexes} is generally less than the total time taken to compute each of them separately, and
116 * sometimes much less. For example, on a large enough dataset, computing the 90th and 99th
117 * percentiles together takes about 55% as long as computing them separately.
118 *
119 * <p>When calling {@link ScaleAndIndex#compute} (in {@linkplain ScaleAndIndexes#compute either
120 * form}), the memory requirement is 8*N bytes for the copy of the dataset plus an overhead which is
121 * independent of N (but depends on the quantiles being computed). When calling {@link
122 * ScaleAndIndex#computeInPlace computeInPlace} (in {@linkplain ScaleAndIndexes#computeInPlace
123 * either form}), only the overhead is required. The number of object allocations is independent of
124 * N in both cases.
125 *
126 * @author Pete Gillin
127 * @since 20.0
128 */
129@J2ktIncompatible
130@GwtIncompatible
131@ElementTypesAreNonnullByDefault
132public final class Quantiles {
133  /**
134   * Constructor for a type that is not meant to be instantiated.
135   *
136   * @deprecated Use the static factory methods of the class. There is no reason to create an
137   *     instance of {@link Quantiles}.
138   */
139  @Deprecated
140  public Quantiles() {}
141
142  /** Specifies the computation of a median (i.e. the 1st 2-quantile). */
143  public static ScaleAndIndex median() {
144    return scale(2).index(1);
145  }
146
147  /** Specifies the computation of quartiles (i.e. 4-quantiles). */
148  public static Scale quartiles() {
149    return scale(4);
150  }
151
152  /** Specifies the computation of percentiles (i.e. 100-quantiles). */
153  public static Scale percentiles() {
154    return scale(100);
155  }
156
157  /**
158   * Specifies the computation of q-quantiles.
159   *
160   * @param scale the scale for the quantiles to be calculated, i.e. the q of the q-quantiles, which
161   *     must be positive
162   */
163  public static Scale scale(int scale) {
164    return new Scale(scale);
165  }
166
167  /**
168   * Describes the point in a fluent API chain where only the scale (i.e. the q in q-quantiles) has
169   * been specified.
170   *
171   * @since 20.0
172   */
173  public static final class Scale {
174
175    private final int scale;
176
177    private Scale(int scale) {
178      checkArgument(scale > 0, "Quantile scale must be positive");
179      this.scale = scale;
180    }
181
182    /**
183     * Specifies a single quantile index to be calculated, i.e. the k in the kth q-quantile.
184     *
185     * @param index the quantile index, which must be in the inclusive range [0, q] for q-quantiles
186     */
187    public ScaleAndIndex index(int index) {
188      return new ScaleAndIndex(scale, index);
189    }
190
191    /**
192     * Specifies multiple quantile indexes to be calculated, each index being the k in the kth
193     * q-quantile.
194     *
195     * @param indexes the quantile indexes, each of which must be in the inclusive range [0, q] for
196     *     q-quantiles; the order of the indexes is unimportant, duplicates will be ignored, and the
197     *     set will be snapshotted when this method is called
198     * @throws IllegalArgumentException if {@code indexes} is empty
199     */
200    public ScaleAndIndexes indexes(int... indexes) {
201      return new ScaleAndIndexes(scale, indexes.clone());
202    }
203
204    /**
205     * Specifies multiple quantile indexes to be calculated, each index being the k in the kth
206     * q-quantile.
207     *
208     * @param indexes the quantile indexes, each of which must be in the inclusive range [0, q] for
209     *     q-quantiles; the order of the indexes is unimportant, duplicates will be ignored, and the
210     *     set will be snapshotted when this method is called
211     * @throws IllegalArgumentException if {@code indexes} is empty
212     */
213    public ScaleAndIndexes indexes(Collection<Integer> indexes) {
214      return new ScaleAndIndexes(scale, Ints.toArray(indexes));
215    }
216  }
217
218  /**
219   * Describes the point in a fluent API chain where the scale and a single quantile index (i.e. the
220   * q and the k in the kth q-quantile) have been specified.
221   *
222   * @since 20.0
223   */
224  public static final class ScaleAndIndex {
225
226    private final int scale;
227    private final int index;
228
229    private ScaleAndIndex(int scale, int index) {
230      checkIndex(index, scale);
231      this.scale = scale;
232      this.index = index;
233    }
234
235    /**
236     * Computes the quantile value of the given dataset.
237     *
238     * @param dataset the dataset to do the calculation on, which must be non-empty, which will be
239     *     cast to doubles (with any associated lost of precision), and which will not be mutated by
240     *     this call (it is copied instead)
241     * @return the quantile value
242     */
243    public double compute(Collection<? extends Number> dataset) {
244      return computeInPlace(Doubles.toArray(dataset));
245    }
246
247    /**
248     * Computes the quantile value of the given dataset.
249     *
250     * @param dataset the dataset to do the calculation on, which must be non-empty, which will not
251     *     be mutated by this call (it is copied instead)
252     * @return the quantile value
253     */
254    public double compute(double... dataset) {
255      return computeInPlace(dataset.clone());
256    }
257
258    /**
259     * Computes the quantile value of the given dataset.
260     *
261     * @param dataset the dataset to do the calculation on, which must be non-empty, which will be
262     *     cast to doubles (with any associated lost of precision), and which will not be mutated by
263     *     this call (it is copied instead)
264     * @return the quantile value
265     */
266    public double compute(long... dataset) {
267      return computeInPlace(longsToDoubles(dataset));
268    }
269
270    /**
271     * Computes the quantile value of the given dataset.
272     *
273     * @param dataset the dataset to do the calculation on, which must be non-empty, which will be
274     *     cast to doubles, and which will not be mutated by this call (it is copied instead)
275     * @return the quantile value
276     */
277    public double compute(int... dataset) {
278      return computeInPlace(intsToDoubles(dataset));
279    }
280
281    /**
282     * Computes the quantile value of the given dataset, performing the computation in-place.
283     *
284     * @param dataset the dataset to do the calculation on, which must be non-empty, and which will
285     *     be arbitrarily reordered by this method call
286     * @return the quantile value
287     */
288    public double computeInPlace(double... dataset) {
289      checkArgument(dataset.length > 0, "Cannot calculate quantiles of an empty dataset");
290      if (containsNaN(dataset)) {
291        return NaN;
292      }
293
294      // Calculate the quotient and remainder in the integer division x = k * (N-1) / q, i.e.
295      // index * (dataset.length - 1) / scale. If there is no remainder, we can just find the value
296      // whose index in the sorted dataset equals the quotient; if there is a remainder, we
297      // interpolate between that and the next value.
298
299      // Since index and (dataset.length - 1) are non-negative ints, their product can be expressed
300      // as a long, without risk of overflow:
301      long numerator = (long) index * (dataset.length - 1);
302      // Since scale is a positive int, index is in [0, scale], and (dataset.length - 1) is a
303      // non-negative int, we can do long-arithmetic on index * (dataset.length - 1) / scale to get
304      // a rounded ratio and a remainder which can be expressed as ints, without risk of overflow:
305      int quotient = (int) LongMath.divide(numerator, scale, RoundingMode.DOWN);
306      int remainder = (int) (numerator - (long) quotient * scale);
307      selectInPlace(quotient, dataset, 0, dataset.length - 1);
308      if (remainder == 0) {
309        return dataset[quotient];
310      } else {
311        selectInPlace(quotient + 1, dataset, quotient + 1, dataset.length - 1);
312        return interpolate(dataset[quotient], dataset[quotient + 1], remainder, scale);
313      }
314    }
315  }
316
317  /**
318   * Describes the point in a fluent API chain where the scale and a multiple quantile indexes (i.e.
319   * the q and a set of values for the k in the kth q-quantile) have been specified.
320   *
321   * @since 20.0
322   */
323  public static final class ScaleAndIndexes {
324
325    private final int scale;
326    private final int[] indexes;
327
328    private ScaleAndIndexes(int scale, int[] indexes) {
329      for (int index : indexes) {
330        checkIndex(index, scale);
331      }
332      checkArgument(indexes.length > 0, "Indexes must be a non empty array");
333      this.scale = scale;
334      this.indexes = indexes;
335    }
336
337    /**
338     * Computes the quantile values of the given dataset.
339     *
340     * @param dataset the dataset to do the calculation on, which must be non-empty, which will be
341     *     cast to doubles (with any associated lost of precision), and which will not be mutated by
342     *     this call (it is copied instead)
343     * @return an unmodifiable, ordered map of results: the keys will be the specified quantile
344     *     indexes, and the values the corresponding quantile values. When iterating, entries in the
345     *     map are ordered by quantile index in the same order they were passed to the {@code
346     *     indexes} method.
347     */
348    public Map<Integer, Double> compute(Collection<? extends Number> dataset) {
349      return computeInPlace(Doubles.toArray(dataset));
350    }
351
352    /**
353     * Computes the quantile values of the given dataset.
354     *
355     * @param dataset the dataset to do the calculation on, which must be non-empty, which will not
356     *     be mutated by this call (it is copied instead)
357     * @return an unmodifiable, ordered map of results: the keys will be the specified quantile
358     *     indexes, and the values the corresponding quantile values. When iterating, entries in the
359     *     map are ordered by quantile index in the same order they were passed to the {@code
360     *     indexes} method.
361     */
362    public Map<Integer, Double> compute(double... dataset) {
363      return computeInPlace(dataset.clone());
364    }
365
366    /**
367     * Computes the quantile values of the given dataset.
368     *
369     * @param dataset the dataset to do the calculation on, which must be non-empty, which will be
370     *     cast to doubles (with any associated lost of precision), and which will not be mutated by
371     *     this call (it is copied instead)
372     * @return an unmodifiable, ordered map of results: the keys will be the specified quantile
373     *     indexes, and the values the corresponding quantile values. When iterating, entries in the
374     *     map are ordered by quantile index in the same order they were passed to the {@code
375     *     indexes} method.
376     */
377    public Map<Integer, Double> compute(long... dataset) {
378      return computeInPlace(longsToDoubles(dataset));
379    }
380
381    /**
382     * Computes the quantile values of the given dataset.
383     *
384     * @param dataset the dataset to do the calculation on, which must be non-empty, which will be
385     *     cast to doubles, and which will not be mutated by this call (it is copied instead)
386     * @return an unmodifiable, ordered map of results: the keys will be the specified quantile
387     *     indexes, and the values the corresponding quantile values. When iterating, entries in the
388     *     map are ordered by quantile index in the same order they were passed to the {@code
389     *     indexes} method.
390     */
391    public Map<Integer, Double> compute(int... dataset) {
392      return computeInPlace(intsToDoubles(dataset));
393    }
394
395    /**
396     * Computes the quantile values of the given dataset, performing the computation in-place.
397     *
398     * @param dataset the dataset to do the calculation on, which must be non-empty, and which will
399     *     be arbitrarily reordered by this method call
400     * @return an unmodifiable, ordered map of results: the keys will be the specified quantile
401     *     indexes, and the values the corresponding quantile values. When iterating, entries in the
402     *     map are ordered by quantile index in the same order that the indexes were passed to the
403     *     {@code indexes} method.
404     */
405    public Map<Integer, Double> computeInPlace(double... dataset) {
406      checkArgument(dataset.length > 0, "Cannot calculate quantiles of an empty dataset");
407      if (containsNaN(dataset)) {
408        Map<Integer, Double> nanMap = new LinkedHashMap<>();
409        for (int index : indexes) {
410          nanMap.put(index, NaN);
411        }
412        return unmodifiableMap(nanMap);
413      }
414
415      // Calculate the quotients and remainders in the integer division x = k * (N - 1) / q, i.e.
416      // index * (dataset.length - 1) / scale for each index in indexes. For each, if there is no
417      // remainder, we can just select the value whose index in the sorted dataset equals the
418      // quotient; if there is a remainder, we interpolate between that and the next value.
419
420      int[] quotients = new int[indexes.length];
421      int[] remainders = new int[indexes.length];
422      // The indexes to select. In the worst case, we'll need one each side of each quantile.
423      int[] requiredSelections = new int[indexes.length * 2];
424      int requiredSelectionsCount = 0;
425      for (int i = 0; i < indexes.length; i++) {
426        // Since index and (dataset.length - 1) are non-negative ints, their product can be
427        // expressed as a long, without risk of overflow:
428        long numerator = (long) indexes[i] * (dataset.length - 1);
429        // Since scale is a positive int, index is in [0, scale], and (dataset.length - 1) is a
430        // non-negative int, we can do long-arithmetic on index * (dataset.length - 1) / scale to
431        // get a rounded ratio and a remainder which can be expressed as ints, without risk of
432        // overflow:
433        int quotient = (int) LongMath.divide(numerator, scale, RoundingMode.DOWN);
434        int remainder = (int) (numerator - (long) quotient * scale);
435        quotients[i] = quotient;
436        remainders[i] = remainder;
437        requiredSelections[requiredSelectionsCount] = quotient;
438        requiredSelectionsCount++;
439        if (remainder != 0) {
440          requiredSelections[requiredSelectionsCount] = quotient + 1;
441          requiredSelectionsCount++;
442        }
443      }
444      sort(requiredSelections, 0, requiredSelectionsCount);
445      selectAllInPlace(
446          requiredSelections, 0, requiredSelectionsCount - 1, dataset, 0, dataset.length - 1);
447      Map<Integer, Double> ret = new LinkedHashMap<>();
448      for (int i = 0; i < indexes.length; i++) {
449        int quotient = quotients[i];
450        int remainder = remainders[i];
451        if (remainder == 0) {
452          ret.put(indexes[i], dataset[quotient]);
453        } else {
454          ret.put(
455              indexes[i], interpolate(dataset[quotient], dataset[quotient + 1], remainder, scale));
456        }
457      }
458      return unmodifiableMap(ret);
459    }
460  }
461
462  /** Returns whether any of the values in {@code dataset} are {@code NaN}. */
463  private static boolean containsNaN(double... dataset) {
464    for (double value : dataset) {
465      if (Double.isNaN(value)) {
466        return true;
467      }
468    }
469    return false;
470  }
471
472  /**
473   * Returns a value a fraction {@code (remainder / scale)} of the way between {@code lower} and
474   * {@code upper}. Assumes that {@code lower <= upper}. Correctly handles infinities (but not
475   * {@code NaN}).
476   */
477  private static double interpolate(double lower, double upper, double remainder, double scale) {
478    if (lower == NEGATIVE_INFINITY) {
479      if (upper == POSITIVE_INFINITY) {
480        // Return NaN when lower == NEGATIVE_INFINITY and upper == POSITIVE_INFINITY:
481        return NaN;
482      }
483      // Return NEGATIVE_INFINITY when NEGATIVE_INFINITY == lower <= upper < POSITIVE_INFINITY:
484      return NEGATIVE_INFINITY;
485    }
486    if (upper == POSITIVE_INFINITY) {
487      // Return POSITIVE_INFINITY when NEGATIVE_INFINITY < lower <= upper == POSITIVE_INFINITY:
488      return POSITIVE_INFINITY;
489    }
490    return lower + (upper - lower) * remainder / scale;
491  }
492
493  private static void checkIndex(int index, int scale) {
494    if (index < 0 || index > scale) {
495      throw new IllegalArgumentException(
496          "Quantile indexes must be between 0 and the scale, which is " + scale);
497    }
498  }
499
500  private static double[] longsToDoubles(long[] longs) {
501    int len = longs.length;
502    double[] doubles = new double[len];
503    for (int i = 0; i < len; i++) {
504      doubles[i] = longs[i];
505    }
506    return doubles;
507  }
508
509  private static double[] intsToDoubles(int[] ints) {
510    int len = ints.length;
511    double[] doubles = new double[len];
512    for (int i = 0; i < len; i++) {
513      doubles[i] = ints[i];
514    }
515    return doubles;
516  }
517
518  /**
519   * Performs an in-place selection to find the element which would appear at a given index in a
520   * dataset if it were sorted. The following preconditions should hold:
521   *
522   * <ul>
523   *   <li>{@code required}, {@code from}, and {@code to} should all be indexes into {@code array};
524   *   <li>{@code required} should be in the range [{@code from}, {@code to}];
525   *   <li>all the values with indexes in the range [0, {@code from}) should be less than or equal
526   *       to all the values with indexes in the range [{@code from}, {@code to}];
527   *   <li>all the values with indexes in the range ({@code to}, {@code array.length - 1}] should be
528   *       greater than or equal to all the values with indexes in the range [{@code from}, {@code
529   *       to}].
530   * </ul>
531   *
532   * This method will reorder the values with indexes in the range [{@code from}, {@code to}] such
533   * that all the values with indexes in the range [{@code from}, {@code required}) are less than or
534   * equal to the value with index {@code required}, and all the values with indexes in the range
535   * ({@code required}, {@code to}] are greater than or equal to that value. Therefore, the value at
536   * {@code required} is the value which would appear at that index in the sorted dataset.
537   */
538  private static void selectInPlace(int required, double[] array, int from, int to) {
539    // If we are looking for the least element in the range, we can just do a linear search for it.
540    // (We will hit this whenever we are doing quantile interpolation: our first selection finds
541    // the lower value, our second one finds the upper value by looking for the next least element.)
542    if (required == from) {
543      int min = from;
544      for (int index = from + 1; index <= to; index++) {
545        if (array[min] > array[index]) {
546          min = index;
547        }
548      }
549      if (min != from) {
550        swap(array, min, from);
551      }
552      return;
553    }
554
555    // Let's play quickselect! We'll repeatedly partition the range [from, to] containing the
556    // required element, as long as it has more than one element.
557    while (to > from) {
558      int partitionPoint = partition(array, from, to);
559      if (partitionPoint >= required) {
560        to = partitionPoint - 1;
561      }
562      if (partitionPoint <= required) {
563        from = partitionPoint + 1;
564      }
565    }
566  }
567
568  /**
569   * Performs a partition operation on the slice of {@code array} with elements in the range [{@code
570   * from}, {@code to}]. Uses the median of {@code from}, {@code to}, and the midpoint between them
571   * as a pivot. Returns the index which the slice is partitioned around, i.e. if it returns {@code
572   * ret} then we know that the values with indexes in [{@code from}, {@code ret}) are less than or
573   * equal to the value at {@code ret} and the values with indexes in ({@code ret}, {@code to}] are
574   * greater than or equal to that.
575   */
576  private static int partition(double[] array, int from, int to) {
577    // Select a pivot, and move it to the start of the slice i.e. to index from.
578    movePivotToStartOfSlice(array, from, to);
579    double pivot = array[from];
580
581    // Move all elements with indexes in (from, to] which are greater than the pivot to the end of
582    // the array. Keep track of where those elements begin.
583    int partitionPoint = to;
584    for (int i = to; i > from; i--) {
585      if (array[i] > pivot) {
586        swap(array, partitionPoint, i);
587        partitionPoint--;
588      }
589    }
590
591    // We now know that all elements with indexes in (from, partitionPoint] are less than or equal
592    // to the pivot at from, and all elements with indexes in (partitionPoint, to] are greater than
593    // it. We swap the pivot into partitionPoint and we know the array is partitioned around that.
594    swap(array, from, partitionPoint);
595    return partitionPoint;
596  }
597
598  /**
599   * Selects the pivot to use, namely the median of the values at {@code from}, {@code to}, and
600   * halfway between the two (rounded down), from {@code array}, and ensure (by swapping elements if
601   * necessary) that that pivot value appears at the start of the slice i.e. at {@code from}.
602   * Expects that {@code from} is strictly less than {@code to}.
603   */
604  private static void movePivotToStartOfSlice(double[] array, int from, int to) {
605    int mid = (from + to) >>> 1;
606    // We want to make a swap such that either array[to] <= array[from] <= array[mid], or
607    // array[mid] <= array[from] <= array[to]. We know that from < to, so we know mid < to
608    // (although it's possible that mid == from, if to == from + 1). Note that the postcondition
609    // would be impossible to fulfil if mid == to unless we also have array[from] == array[to].
610    boolean toLessThanMid = (array[to] < array[mid]);
611    boolean midLessThanFrom = (array[mid] < array[from]);
612    boolean toLessThanFrom = (array[to] < array[from]);
613    if (toLessThanMid == midLessThanFrom) {
614      // Either array[to] < array[mid] < array[from] or array[from] <= array[mid] <= array[to].
615      swap(array, mid, from);
616    } else if (toLessThanMid != toLessThanFrom) {
617      // Either array[from] <= array[to] < array[mid] or array[mid] <= array[to] < array[from].
618      swap(array, from, to);
619    }
620    // The postcondition now holds. So the median, our chosen pivot, is at from.
621  }
622
623  /**
624   * Performs an in-place selection, like {@link #selectInPlace}, to select all the indexes {@code
625   * allRequired[i]} for {@code i} in the range [{@code requiredFrom}, {@code requiredTo}]. These
626   * indexes must be sorted in the array and must all be in the range [{@code from}, {@code to}].
627   */
628  private static void selectAllInPlace(
629      int[] allRequired, int requiredFrom, int requiredTo, double[] array, int from, int to) {
630    // Choose the first selection to do...
631    int requiredChosen = chooseNextSelection(allRequired, requiredFrom, requiredTo, from, to);
632    int required = allRequired[requiredChosen];
633
634    // ...do the first selection...
635    selectInPlace(required, array, from, to);
636
637    // ...then recursively perform the selections in the range below...
638    int requiredBelow = requiredChosen - 1;
639    while (requiredBelow >= requiredFrom && allRequired[requiredBelow] == required) {
640      requiredBelow--; // skip duplicates of required in the range below
641    }
642    if (requiredBelow >= requiredFrom) {
643      selectAllInPlace(allRequired, requiredFrom, requiredBelow, array, from, required - 1);
644    }
645
646    // ...and then recursively perform the selections in the range above.
647    int requiredAbove = requiredChosen + 1;
648    while (requiredAbove <= requiredTo && allRequired[requiredAbove] == required) {
649      requiredAbove++; // skip duplicates of required in the range above
650    }
651    if (requiredAbove <= requiredTo) {
652      selectAllInPlace(allRequired, requiredAbove, requiredTo, array, required + 1, to);
653    }
654  }
655
656  /**
657   * Chooses the next selection to do from the required selections. It is required that the array
658   * {@code allRequired} is sorted and that {@code allRequired[i]} are in the range [{@code from},
659   * {@code to}] for all {@code i} in the range [{@code requiredFrom}, {@code requiredTo}]. The
660   * value returned by this method is the {@code i} in that range such that {@code allRequired[i]}
661   * is as close as possible to the center of the range [{@code from}, {@code to}]. Choosing the
662   * value closest to the center of the range first is the most efficient strategy because it
663   * minimizes the size of the subranges from which the remaining selections must be done.
664   */
665  private static int chooseNextSelection(
666      int[] allRequired, int requiredFrom, int requiredTo, int from, int to) {
667    if (requiredFrom == requiredTo) {
668      return requiredFrom; // only one thing to choose, so choose it
669    }
670
671    // Find the center and round down. The true center is either centerFloor or halfway between
672    // centerFloor and centerFloor + 1.
673    int centerFloor = (from + to) >>> 1;
674
675    // Do a binary search until we're down to the range of two which encloses centerFloor (unless
676    // all values are lower or higher than centerFloor, in which case we find the two highest or
677    // lowest respectively). If centerFloor is in allRequired, we will definitely find it. If not,
678    // but centerFloor + 1 is, we'll definitely find that. The closest value to the true (unrounded)
679    // center will be at either low or high.
680    int low = requiredFrom;
681    int high = requiredTo;
682    while (high > low + 1) {
683      int mid = (low + high) >>> 1;
684      if (allRequired[mid] > centerFloor) {
685        high = mid;
686      } else if (allRequired[mid] < centerFloor) {
687        low = mid;
688      } else {
689        return mid; // allRequired[mid] = centerFloor, so we can't get closer than that
690      }
691    }
692
693    // Now pick the closest of the two candidates. Note that there is no rounding here.
694    if (from + to - allRequired[low] - allRequired[high] > 0) {
695      return high;
696    } else {
697      return low;
698    }
699  }
700
701  /** Swaps the values at {@code i} and {@code j} in {@code array}. */
702  private static void swap(double[] array, int i, int j) {
703    double temp = array[i];
704    array[i] = array[j];
705    array[j] = temp;
706  }
707}