001/* 002 * Copyright (C) 2014 The Guava Authors 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 005 * in compliance with the License. You may obtain a copy of the License at 006 * 007 * http://www.apache.org/licenses/LICENSE-2.0 008 * 009 * Unless required by applicable law or agreed to in writing, software distributed under the License 010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 011 * or implied. See the License for the specific language governing permissions and limitations under 012 * the License. 013 */ 014 015package com.google.common.math; 016 017import static com.google.common.base.Preconditions.checkArgument; 018import static java.lang.Double.NEGATIVE_INFINITY; 019import static java.lang.Double.NaN; 020import static java.lang.Double.POSITIVE_INFINITY; 021import static java.util.Arrays.sort; 022import static java.util.Collections.unmodifiableMap; 023 024import com.google.common.annotations.Beta; 025import com.google.common.annotations.GwtIncompatible; 026import com.google.common.primitives.Doubles; 027import com.google.common.primitives.Ints; 028import java.math.RoundingMode; 029import java.util.Collection; 030import java.util.HashMap; 031import java.util.Map; 032 033/** 034 * Provides a fluent API for calculating 035 * <a href="http://en.wikipedia.org/wiki/Quantile">quantiles</a>. 036 * 037 * <h3>Examples</h3> 038 * 039 * <p>To compute the median: 040 * <pre> {@code 041 * 042 * double myMedian = median().compute(myDataset);}</pre> 043 * 044 * where {@link #median()} has been statically imported. 045 * 046 * <p>To compute the 99th percentile: 047 * <pre> {@code 048 * 049 * double myPercentile99 = percentiles().index(99).compute(myDataset);}</pre> 050 * 051 * where {@link #percentiles()} has been statically imported. 052 * 053 * <p>To compute median and the 90th and 99th percentiles: 054 * <pre> {@code 055 * 056 * Map<Integer, Double> myPercentiles = 057 * percentiles().indexes(50, 90, 99).compute(myDataset);}</pre> 058 * 059 * where {@link #percentiles()} has been statically imported: {@code myPercentiles} maps the keys 060 * 50, 90, and 99, to their corresponding quantile values. 061 * 062 * <p>To compute quartiles, use {@link #quartiles()} instead of {@link #percentiles()}. To compute 063 * arbitrary q-quantiles, use {@link #scale scale(q)}. 064 * 065 * <p>These examples all take a copy of your dataset. If you have a double array, you are okay with 066 * it being arbitrarily reordered, and you want to avoid that copy, you can use 067 * {@code computeInPlace} instead of {@code compute}. 068 * 069 * <h3>Definition and notes on interpolation</h3> 070 * 071 * <p>The definition of the kth q-quantile of N values is as follows: define x = k * (N - 1) / q; if 072 * x is an integer, the result is the value which would appear at index x in the sorted dataset 073 * (unless there are {@link Double#NaN NaN} values, see below); otherwise, the result is the average 074 * of the values which would appear at the indexes floor(x) and ceil(x) weighted by (1-frac(x)) and 075 * frac(x) respectively. This is the same definition as used by Excel and by S, it is the Type 7 076 * definition in 077 * <a href="http://stat.ethz.ch/R-manual/R-devel/library/stats/html/quantile.html">R</a>, and it is 078 * described by 079 * <a href="http://en.wikipedia.org/wiki/Quantile#Estimating_the_quantiles_of_a_population"> 080 * wikipedia</a> as providing "Linear interpolation of the modes for the order statistics for the 081 * uniform distribution on [0,1]." 082 * 083 * <h3>Handling of non-finite values</h3> 084 * 085 * <p>If any values in the input are {@link Double#NaN NaN} then all values returned are 086 * {@link Double#NaN NaN}. (This is the one occasion when the behaviour is not the same as you'd get 087 * from sorting with {@link java.util.Arrays#sort(double[]) Arrays.sort(double[])} or 088 * {@link java.util.Collections#sort(java.util.List) Collections.sort(List<Double>)} and 089 * selecting the required value(s). Those methods would sort {@link Double#NaN NaN} as if it is 090 * greater than any other value and place them at the end of the dataset, even after 091 * {@link Double#POSITIVE_INFINITY POSITIVE_INFINITY}.) 092 * 093 * <p>Otherwise, {@link Double#NEGATIVE_INFINITY NEGATIVE_INFINITY} and 094 * {@link Double#POSITIVE_INFINITY POSITIVE_INFINITY} sort to the beginning and the end of the 095 * dataset, as you would expect. 096 * 097 * <p>If required to do a weighted average between an infinity and a finite value, or between an 098 * infinite value and itself, the infinite value is returned. If required to do a weighted average 099 * between {@link Double#NEGATIVE_INFINITY NEGATIVE_INFINITY} and {@link Double#POSITIVE_INFINITY 100 * POSITIVE_INFINITY}, {@link Double#NaN NaN} is returned (note that this will only happen if the 101 * dataset contains no finite values). 102 * 103 * <h3>Performance</h3> 104 * 105 * <p>The average time complexity of the computation is O(N) in the size of the dataset. There is a 106 * worst case time complexity of O(N^2). You are extremely unlikely to hit this quadratic case on 107 * randomly ordered data (the probability decreases faster than exponentially in N), but if you are 108 * passing in unsanitized user data then a malicious user could force it. A light shuffle of the 109 * data using an unpredictable seed should normally be enough to thwart this attack. 110 * 111 * <p>The time taken to compute multiple quantiles on the same dataset using {@link Scale#indexes 112 * indexes} is generally less than the total time taken to compute each of them separately, and 113 * sometimes much less. For example, on a large enough dataset, computing the 90th and 99th 114 * percentiles together takes about 55% as long as computing them separately. 115 * 116 * <p>When calling {@link ScaleAndIndex#compute} (in {@linkplain ScaleAndIndexes#compute either 117 * form}), the memory requirement is 8*N bytes for the copy of the dataset plus an overhead which is 118 * independent of N (but depends on the quantiles being computed). When calling 119 * {@link ScaleAndIndex#computeInPlace computeInPlace} (in 120 * {@linkplain ScaleAndIndexes#computeInPlace either form}), only the overhead is required. The 121 * number of object allocations is independent of N in both cases. 122 * 123 * @author Pete Gillin 124 * @since 20.0 125 */ 126@Beta 127@GwtIncompatible 128public final class Quantiles { 129 130 /** 131 * Specifies the computation of a median (i.e. the 1st 2-quantile). 132 */ 133 public static ScaleAndIndex median() { 134 return scale(2).index(1); 135 } 136 137 /** 138 * Specifies the computation of quartiles (i.e. 4-quantiles). 139 */ 140 public static Scale quartiles() { 141 return scale(4); 142 } 143 144 /** 145 * Specifies the computation of percentiles (i.e. 100-quantiles). 146 */ 147 public static Scale percentiles() { 148 return scale(100); 149 } 150 151 /** 152 * Specifies the computation of q-quantiles. 153 * 154 * @param scale the scale for the quantiles to be calculated, i.e. the q of the q-quantiles, which 155 * must be positive 156 */ 157 public static Scale scale(int scale) { 158 return new Scale(scale); 159 } 160 161 /** 162 * Describes the point in a fluent API chain where only the scale (i.e. the q in q-quantiles) has 163 * been specified. 164 * 165 * @since 20.0 166 */ 167 public static final class Scale { 168 169 private final int scale; 170 171 private Scale(int scale) { 172 checkArgument(scale > 0, "Quantile scale must be positive"); 173 this.scale = scale; 174 } 175 176 /** 177 * Specifies a single quantile index to be calculated, i.e. the k in the kth q-quantile. 178 * 179 * @param index the quantile index, which must be in the inclusive range [0, q] for q-quantiles 180 */ 181 public ScaleAndIndex index(int index) { 182 return new ScaleAndIndex(scale, index); 183 } 184 185 /** 186 * Specifies multiple quantile indexes to be calculated, each index being the k in the kth 187 * q-quantile. 188 * 189 * @param indexes the quantile indexes, each of which must be in the inclusive range [0, q] for 190 * q-quantiles; the order of the indexes is unimportant, duplicates will be ignored, and the 191 * set will be snapshotted when this method is called 192 */ 193 public ScaleAndIndexes indexes(int... indexes) { 194 return new ScaleAndIndexes(scale, indexes.clone()); 195 } 196 197 /** 198 * Specifies multiple quantile indexes to be calculated, each index being the k in the kth 199 * q-quantile. 200 * 201 * @param indexes the quantile indexes, each of which must be in the inclusive range [0, q] for 202 * q-quantiles; the order of the indexes is unimportant, duplicates will be ignored, and the 203 * set will be snapshotted when this method is called 204 */ 205 public ScaleAndIndexes indexes(Collection<Integer> indexes) { 206 return new ScaleAndIndexes(scale, Ints.toArray(indexes)); 207 } 208 } 209 210 /** 211 * Describes the point in a fluent API chain where the scale and a single quantile index (i.e. the 212 * q and the k in the kth q-quantile) have been specified. 213 * 214 * @since 20.0 215 */ 216 public static final class ScaleAndIndex { 217 218 private final int scale; 219 private final int index; 220 221 private ScaleAndIndex(int scale, int index) { 222 checkIndex(index, scale); 223 this.scale = scale; 224 this.index = index; 225 } 226 227 /** 228 * Computes the quantile value of the given dataset. 229 * 230 * @param dataset the dataset to do the calculation on, which must be non-empty, which will be 231 * cast to doubles (with any associated lost of precision), and which will not be mutated by 232 * this call (it is copied instead) 233 * @return the quantile value 234 */ 235 public double compute(Collection<? extends Number> dataset) { 236 return computeInPlace(Doubles.toArray(dataset)); 237 } 238 239 /** 240 * Computes the quantile value of the given dataset. 241 * 242 * @param dataset the dataset to do the calculation on, which must be non-empty, which will not 243 * be mutated by this call (it is copied instead) 244 * @return the quantile value 245 */ 246 public double compute(double... dataset) { 247 return computeInPlace(dataset.clone()); 248 } 249 250 /** 251 * Computes the quantile value of the given dataset. 252 * 253 * @param dataset the dataset to do the calculation on, which must be non-empty, which will be 254 * cast to doubles (with any associated lost of precision), and which will not be mutated by 255 * this call (it is copied instead) 256 * @return the quantile value 257 */ 258 public double compute(long... dataset) { 259 return computeInPlace(longsToDoubles(dataset)); 260 } 261 262 /** 263 * Computes the quantile value of the given dataset. 264 * 265 * @param dataset the dataset to do the calculation on, which must be non-empty, which will be 266 * cast to doubles, and which will not be mutated by this call (it is copied instead) 267 * @return the quantile value 268 */ 269 public double compute(int... dataset) { 270 return computeInPlace(intsToDoubles(dataset)); 271 } 272 273 /** 274 * Computes the quantile value of the given dataset, performing the computation in-place. 275 * 276 * @param dataset the dataset to do the calculation on, which must be non-empty, and which will 277 * be arbitrarily reordered by this method call 278 * @return the quantile value 279 */ 280 public double computeInPlace(double... dataset) { 281 checkArgument(dataset.length > 0, "Cannot calculate quantiles of an empty dataset"); 282 if (containsNaN(dataset)) { 283 return NaN; 284 } 285 286 // Calculate the quotient and remainder in the integer division x = k * (N-1) / q, i.e. 287 // index * (dataset.length - 1) / scale. If there is no remainder, we can just find the value 288 // whose index in the sorted dataset equals the quotient; if there is a remainder, we 289 // interpolate between that and the next value. 290 291 // Since index and (dataset.length - 1) are non-negative ints, their product can be expressed 292 // as a long, without risk of overflow: 293 long numerator = (long) index * (dataset.length - 1); 294 // Since scale is a positive int, index is in [0, scale], and (dataset.length - 1) is a 295 // non-negative int, we can do long-arithmetic on index * (dataset.length - 1) / scale to get 296 // a rounded ratio and a remainder which can be expressed as ints, without risk of overflow: 297 int quotient = (int) LongMath.divide(numerator, scale, RoundingMode.DOWN); 298 int remainder = (int) (numerator - (long) quotient * scale); 299 selectInPlace(quotient, dataset, 0, dataset.length - 1); 300 if (remainder == 0) { 301 return dataset[quotient]; 302 } else { 303 selectInPlace(quotient + 1, dataset, quotient + 1, dataset.length - 1); 304 return interpolate(dataset[quotient], dataset[quotient + 1], remainder, scale); 305 } 306 } 307 } 308 309 /** 310 * Describes the point in a fluent API chain where the scale and a multiple quantile indexes (i.e. 311 * the q and a set of values for the k in the kth q-quantile) have been specified. 312 * 313 * @since 20.0 314 */ 315 public static final class ScaleAndIndexes { 316 317 private final int scale; 318 private final int[] indexes; 319 320 private ScaleAndIndexes(int scale, int[] indexes) { 321 for (int index : indexes) { 322 checkIndex(index, scale); 323 } 324 this.scale = scale; 325 this.indexes = indexes; 326 } 327 328 /** 329 * Computes the quantile values of the given dataset. 330 * 331 * @param dataset the dataset to do the calculation on, which must be non-empty, which will be 332 * cast to doubles (with any associated lost of precision), and which will not be mutated by 333 * this call (it is copied instead) 334 * @return an unmodifiable map of results: the keys will be the specified quantile indexes, and 335 * the values the corresponding quantile values 336 */ 337 public Map<Integer, Double> compute(Collection<? extends Number> dataset) { 338 return computeInPlace(Doubles.toArray(dataset)); 339 } 340 341 /** 342 * Computes the quantile values of the given dataset. 343 * 344 * @param dataset the dataset to do the calculation on, which must be non-empty, which will not 345 * be mutated by this call (it is copied instead) 346 * @return an unmodifiable map of results: the keys will be the specified quantile indexes, and 347 * the values the corresponding quantile values 348 */ 349 public Map<Integer, Double> compute(double... dataset) { 350 return computeInPlace(dataset.clone()); 351 } 352 353 /** 354 * Computes the quantile values of the given dataset. 355 * 356 * @param dataset the dataset to do the calculation on, which must be non-empty, which will be 357 * cast to doubles (with any associated lost of precision), and which will not be mutated by 358 * this call (it is copied instead) 359 * @return an unmodifiable map of results: the keys will be the specified quantile indexes, and 360 * the values the corresponding quantile values 361 */ 362 public Map<Integer, Double> compute(long... dataset) { 363 return computeInPlace(longsToDoubles(dataset)); 364 } 365 366 /** 367 * Computes the quantile values of the given dataset. 368 * 369 * @param dataset the dataset to do the calculation on, which must be non-empty, which will be 370 * cast to doubles, and which will not be mutated by this call (it is copied instead) 371 * @return an unmodifiable map of results: the keys will be the specified quantile indexes, and 372 * the values the corresponding quantile values 373 */ 374 public Map<Integer, Double> compute(int... dataset) { 375 return computeInPlace(intsToDoubles(dataset)); 376 } 377 378 /** 379 * Computes the quantile values of the given dataset, performing the computation in-place. 380 * 381 * @param dataset the dataset to do the calculation on, which must be non-empty, and which will 382 * be arbitrarily reordered by this method call 383 * @return an unmodifiable map of results: the keys will be the specified quantile indexes, and 384 * the values the corresponding quantile values 385 */ 386 public Map<Integer, Double> computeInPlace(double... dataset) { 387 checkArgument(dataset.length > 0, "Cannot calculate quantiles of an empty dataset"); 388 if (containsNaN(dataset)) { 389 Map<Integer, Double> nanMap = new HashMap<>(); 390 for (int index : indexes) { 391 nanMap.put(index, NaN); 392 } 393 return unmodifiableMap(nanMap); 394 } 395 396 // Calculate the quotients and remainders in the integer division x = k * (N - 1) / q, i.e. 397 // index * (dataset.length - 1) / scale for each index in indexes. For each, if there is no 398 // remainder, we can just select the value whose index in the sorted dataset equals the 399 // quotient; if there is a remainder, we interpolate between that and the next value. 400 401 int[] quotients = new int[indexes.length]; 402 int[] remainders = new int[indexes.length]; 403 // The indexes to select. In the worst case, we'll need one each side of each quantile. 404 int[] requiredSelections = new int[indexes.length * 2]; 405 int requiredSelectionsCount = 0; 406 for (int i = 0; i < indexes.length; i++) { 407 // Since index and (dataset.length - 1) are non-negative ints, their product can be 408 // expressed as a long, without risk of overflow: 409 long numerator = (long) indexes[i] * (dataset.length - 1); 410 // Since scale is a positive int, index is in [0, scale], and (dataset.length - 1) is a 411 // non-negative int, we can do long-arithmetic on index * (dataset.length - 1) / scale to 412 // get a rounded ratio and a remainder which can be expressed as ints, without risk of 413 // overflow: 414 int quotient = (int) LongMath.divide(numerator, scale, RoundingMode.DOWN); 415 int remainder = (int) (numerator - (long) quotient * scale); 416 quotients[i] = quotient; 417 remainders[i] = remainder; 418 requiredSelections[requiredSelectionsCount] = quotient; 419 requiredSelectionsCount++; 420 if (remainder != 0) { 421 requiredSelections[requiredSelectionsCount] = quotient + 1; 422 requiredSelectionsCount++; 423 } 424 } 425 sort(requiredSelections, 0, requiredSelectionsCount); 426 selectAllInPlace( 427 requiredSelections, 0, requiredSelectionsCount - 1, dataset, 0, dataset.length - 1); 428 Map<Integer, Double> ret = new HashMap<>(); 429 for (int i = 0; i < indexes.length; i++) { 430 int quotient = quotients[i]; 431 int remainder = remainders[i]; 432 if (remainder == 0) { 433 ret.put(indexes[i], dataset[quotient]); 434 } else { 435 ret.put( 436 indexes[i], interpolate(dataset[quotient], dataset[quotient + 1], remainder, scale)); 437 } 438 } 439 return unmodifiableMap(ret); 440 } 441 } 442 443 /** 444 * Returns whether any of the values in {@code dataset} are {@code NaN}. 445 */ 446 private static boolean containsNaN(double... dataset) { 447 for (double value : dataset) { 448 if (Double.isNaN(value)) { 449 return true; 450 } 451 } 452 return false; 453 } 454 455 /** 456 * Returns a value a fraction {@code (remainder / scale)} of the way between {@code lower} and 457 * {@code upper}. Assumes that {@code lower <= upper}. Correctly handles infinities (but not 458 * {@code NaN}). 459 */ 460 private static double interpolate(double lower, double upper, double remainder, double scale) { 461 if (lower == NEGATIVE_INFINITY) { 462 if (upper == POSITIVE_INFINITY) { 463 // Return NaN when lower == NEGATIVE_INFINITY and upper == POSITIVE_INFINITY: 464 return NaN; 465 } 466 // Return NEGATIVE_INFINITY when NEGATIVE_INFINITY == lower <= upper < POSITIVE_INFINITY: 467 return NEGATIVE_INFINITY; 468 } 469 if (upper == POSITIVE_INFINITY) { 470 // Return POSITIVE_INFINITY when NEGATIVE_INFINITY < lower <= upper == POSITIVE_INFINITY: 471 return POSITIVE_INFINITY; 472 } 473 return lower + (upper - lower) * remainder / scale; 474 } 475 476 private static void checkIndex(int index, int scale) { 477 if (index < 0 || index > scale) { 478 throw new IllegalArgumentException( 479 "Quantile indexes must be between 0 and the scale, which is " + scale); 480 } 481 } 482 483 private static double[] longsToDoubles(long[] longs) { 484 int len = longs.length; 485 double[] doubles = new double[len]; 486 for (int i = 0; i < len; i++) { 487 doubles[i] = longs[i]; 488 } 489 return doubles; 490 } 491 492 private static double[] intsToDoubles(int[] ints) { 493 int len = ints.length; 494 double[] doubles = new double[len]; 495 for (int i = 0; i < len; i++) { 496 doubles[i] = ints[i]; 497 } 498 return doubles; 499 } 500 501 /** 502 * Performs an in-place selection to find the element which would appear at a given index in a 503 * dataset if it were sorted. The following preconditions should hold: 504 * <ul> 505 * <li>{@code required}, {@code from}, and {@code to} should all be indexes into {@code array}; 506 * <li>{@code required} should be in the range [{@code from}, {@code to}]; 507 * <li>all the values with indexes in the range [0, {@code from}) should be less than or equal to 508 * all the values with indexes in the range [{@code from}, {@code to}]; 509 * <li>all the values with indexes in the range ({@code to}, {@code array.length - 1}] should be 510 * greater than or equal to all the values with indexes in the range [{@code from}, {@code to}]. 511 * </ul> 512 * This method will reorder the values with indexes in the range [{@code from}, {@code to}] such 513 * that all the values with indexes in the range [{@code from}, {@code required}) are less than or 514 * equal to the value with index {@code required}, and all the values with indexes in the range 515 * ({@code required}, {@code to}] are greater than or equal to that value. Therefore, the value at 516 * {@code required} is the value which would appear at that index in the sorted dataset. 517 */ 518 private static void selectInPlace(int required, double[] array, int from, int to) { 519 // If we are looking for the least element in the range, we can just do a linear search for it. 520 // (We will hit this whenever we are doing quantile interpolation: our first selection finds 521 // the lower value, our second one finds the upper value by looking for the next least element.) 522 if (required == from) { 523 int min = from; 524 for (int index = from + 1; index <= to; index++) { 525 if (array[min] > array[index]) { 526 min = index; 527 } 528 } 529 if (min != from) { 530 swap(array, min, from); 531 } 532 return; 533 } 534 535 // Let's play quickselect! We'll repeatedly partition the range [from, to] containing the 536 // required element, as long as it has more than one element. 537 while (to > from) { 538 int partitionPoint = partition(array, from, to); 539 if (partitionPoint >= required) { 540 to = partitionPoint - 1; 541 } 542 if (partitionPoint <= required) { 543 from = partitionPoint + 1; 544 } 545 } 546 } 547 548 /** 549 * Performs a partition operation on the slice of {@code array} with elements in the range 550 * [{@code from}, {@code to}]. Uses the median of {@code from}, {@code to}, and the midpoint 551 * between them as a pivot. Returns the index which the slice is partitioned around, i.e. if it 552 * returns {@code ret} then we know that the values with indexes in [{@code from}, {@code ret}) 553 * are less than or equal to the value at {@code ret} and the values with indexes in ({@code ret}, 554 * {@code to}] are greater than or equal to that. 555 */ 556 private static int partition(double[] array, int from, int to) { 557 // Select a pivot, and move it to the start of the slice i.e. to index from. 558 movePivotToStartOfSlice(array, from, to); 559 double pivot = array[from]; 560 561 // Move all elements with indexes in (from, to] which are greater than the pivot to the end of 562 // the array. Keep track of where those elements begin. 563 int partitionPoint = to; 564 for (int i = to; i > from; i--) { 565 if (array[i] > pivot) { 566 swap(array, partitionPoint, i); 567 partitionPoint--; 568 } 569 } 570 571 // We now know that all elements with indexes in (from, partitionPoint] are less than or equal 572 // to the pivot at from, and all elements with indexes in (partitionPoint, to] are greater than 573 // it. We swap the pivot into partitionPoint and we know the array is partitioned around that. 574 swap(array, from, partitionPoint); 575 return partitionPoint; 576 } 577 578 /** 579 * Selects the pivot to use, namely the median of the values at {@code from}, {@code to}, and 580 * halfway between the two (rounded down), from {@code array}, and ensure (by swapping elements if 581 * necessary) that that pivot value appears at the start of the slice i.e. at {@code from}. 582 * Expects that {@code from} is strictly less than {@code to}. 583 */ 584 private static void movePivotToStartOfSlice(double[] array, int from, int to) { 585 int mid = (from + to) >>> 1; 586 // We want to make a swap such that either array[to] <= array[from] <= array[mid], or 587 // array[mid] <= array[from] <= array[to]. We know that from < to, so we know mid < to 588 // (although it's possible that mid == from, if to == from + 1). Note that the postcondition 589 // would be impossible to fulfil if mid == to unless we also have array[from] == array[to]. 590 boolean toLessThanMid = (array[to] < array[mid]); 591 boolean midLessThanFrom = (array[mid] < array[from]); 592 boolean toLessThanFrom = (array[to] < array[from]); 593 if (toLessThanMid == midLessThanFrom) { 594 // Either array[to] < array[mid] < array[from] or array[from] <= array[mid] <= array[to]. 595 swap(array, mid, from); 596 } else if (toLessThanMid != toLessThanFrom) { 597 // Either array[from] <= array[to] < array[mid] or array[mid] <= array[to] < array[from]. 598 swap(array, from, to); 599 } 600 // The postcondition now holds. So the median, our chosen pivot, is at from. 601 } 602 603 /** 604 * Performs an in-place selection, like {@link #selectInPlace}, to select all the indexes 605 * {@code allRequired[i]} for {@code i} in the range [{@code requiredFrom}, {@code requiredTo}]. 606 * These indexes must be sorted in the array and must all be in the range [{@code from}, 607 * {@code to}]. 608 */ 609 private static void selectAllInPlace( 610 int[] allRequired, int requiredFrom, int requiredTo, double[] array, int from, int to) { 611 // Choose the first selection to do... 612 int requiredChosen = chooseNextSelection(allRequired, requiredFrom, requiredTo, from, to); 613 int required = allRequired[requiredChosen]; 614 615 // ...do the first selection... 616 selectInPlace(required, array, from, to); 617 618 // ...then recursively perform the selections in the range below... 619 int requiredBelow = requiredChosen - 1; 620 while (requiredBelow >= requiredFrom && allRequired[requiredBelow] == required) { 621 requiredBelow--; // skip duplicates of required in the range below 622 } 623 if (requiredBelow >= requiredFrom) { 624 selectAllInPlace(allRequired, requiredFrom, requiredBelow, array, from, required - 1); 625 } 626 627 // ...and then recursively perform the selections in the range above. 628 int requiredAbove = requiredChosen + 1; 629 while (requiredAbove <= requiredTo && allRequired[requiredAbove] == required) { 630 requiredAbove++; // skip duplicates of required in the range above 631 } 632 if (requiredAbove <= requiredTo) { 633 selectAllInPlace(allRequired, requiredAbove, requiredTo, array, required + 1, to); 634 } 635 } 636 637 /** 638 * Chooses the next selection to do from the required selections. It is required that the array 639 * {@code allRequired} is sorted and that {@code allRequired[i]} are in the range [{@code from}, 640 * {@code to}] for all {@code i} in the range [{@code requiredFrom}, {@code requiredTo}]. The 641 * value returned by this method is the {@code i} in that range such that {@code allRequired[i]} 642 * is as close as possible to the center of the range [{@code from}, {@code to}]. Choosing the 643 * value closest to the center of the range first is the most efficient strategy because it 644 * minimizes the size of the subranges from which the remaining selections must be done. 645 */ 646 private static int chooseNextSelection( 647 int[] allRequired, int requiredFrom, int requiredTo, int from, int to) { 648 if (requiredFrom == requiredTo) { 649 return requiredFrom; // only one thing to choose, so choose it 650 } 651 652 // Find the center and round down. The true center is either centerFloor or halfway between 653 // centerFloor and centerFloor + 1. 654 int centerFloor = (from + to) >>> 1; 655 656 // Do a binary search until we're down to the range of two which encloses centerFloor (unless 657 // all values are lower or higher than centerFloor, in which case we find the two highest or 658 // lowest respectively). If centerFloor is in allRequired, we will definitely find it. If not, 659 // but centerFloor + 1 is, we'll definitely find that. The closest value to the true (unrounded) 660 // center will be at either low or high. 661 int low = requiredFrom; 662 int high = requiredTo; 663 while (high > low + 1) { 664 int mid = (low + high) >>> 1; 665 if (allRequired[mid] > centerFloor) { 666 high = mid; 667 } else if (allRequired[mid] < centerFloor) { 668 low = mid; 669 } else { 670 return mid; // allRequired[mid] = centerFloor, so we can't get closer than that 671 } 672 } 673 674 // Now pick the closest of the two candidates. Note that there is no rounding here. 675 if (from + to - allRequired[low] - allRequired[high] > 0) { 676 return high; 677 } else { 678 return low; 679 } 680 } 681 682 /** 683 * Swaps the values at {@code i} and {@code j} in {@code array}. 684 */ 685 private static void swap(double[] array, int i, int j) { 686 double temp = array[i]; 687 array[i] = array[j]; 688 array[j] = temp; 689 } 690}