001/* 002 * Copyright (C) 2012 The Guava Authors 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 005 * in compliance with the License. You may obtain a copy of the License at 006 * 007 * http://www.apache.org/licenses/LICENSE-2.0 008 * 009 * Unless required by applicable law or agreed to in writing, software distributed under the License 010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 011 * or implied. See the License for the specific language governing permissions and limitations under 012 * the License. 013 */ 014 015package com.google.common.math; 016 017import static com.google.common.base.Preconditions.checkArgument; 018import static com.google.common.base.Preconditions.checkNotNull; 019import static com.google.common.base.Preconditions.checkState; 020import static com.google.common.math.DoubleUtils.ensureNonNegative; 021import static com.google.common.math.StatsAccumulator.calculateNewMeanNonFinite; 022import static com.google.common.primitives.Doubles.isFinite; 023import static java.lang.Double.NaN; 024import static java.lang.Double.doubleToLongBits; 025import static java.lang.Double.isNaN; 026 027import com.google.common.annotations.Beta; 028import com.google.common.annotations.GwtIncompatible; 029import com.google.common.base.MoreObjects; 030import com.google.common.base.Objects; 031import java.io.Serializable; 032import java.nio.ByteBuffer; 033import java.nio.ByteOrder; 034import java.util.Iterator; 035import javax.annotation.CheckForNull; 036 037/** 038 * A bundle of statistical summary values -- sum, count, mean/average, min and max, and several 039 * forms of variance -- that were computed from a single set of zero or more floating-point values. 040 * 041 * <p>There are two ways to obtain a {@code Stats} instance: 042 * 043 * <ul> 044 * <li>If all the values you want to summarize are already known, use the appropriate {@code 045 * Stats.of} factory method below. Primitive arrays, iterables and iterators of any kind of 046 * {@code Number}, and primitive varargs are supported. 047 * <li>Or, to avoid storing up all the data first, create a {@link StatsAccumulator} instance, 048 * feed values to it as you get them, then call {@link StatsAccumulator#snapshot}. 049 * </ul> 050 * 051 * <p>Static convenience methods called {@code meanOf} are also provided for users who wish to 052 * calculate <i>only</i> the mean. 053 * 054 * <p><b>Java 8 users:</b> If you are not using any of the variance statistics, you may wish to use 055 * built-in JDK libraries instead of this class. 056 * 057 * @author Pete Gillin 058 * @author Kevin Bourrillion 059 * @since 20.0 060 */ 061@Beta 062@GwtIncompatible 063@ElementTypesAreNonnullByDefault 064public final class Stats implements Serializable { 065 066 private final long count; 067 private final double mean; 068 private final double sumOfSquaresOfDeltas; 069 private final double min; 070 private final double max; 071 072 /** 073 * Internal constructor. Users should use {@link #of} or {@link StatsAccumulator#snapshot}. 074 * 075 * <p>To ensure that the created instance obeys its contract, the parameters should satisfy the 076 * following constraints. This is the callers responsibility and is not enforced here. 077 * 078 * <ul> 079 * <li>If {@code count} is 0, {@code mean} may have any finite value (its only usage will be to 080 * get multiplied by 0 to calculate the sum), and the other parameters may have any values 081 * (they will not be used). 082 * <li>If {@code count} is 1, {@code sumOfSquaresOfDeltas} must be exactly 0.0 or {@link 083 * Double#NaN}. 084 * </ul> 085 */ 086 Stats(long count, double mean, double sumOfSquaresOfDeltas, double min, double max) { 087 this.count = count; 088 this.mean = mean; 089 this.sumOfSquaresOfDeltas = sumOfSquaresOfDeltas; 090 this.min = min; 091 this.max = max; 092 } 093 094 /** 095 * Returns statistics over a dataset containing the given values. 096 * 097 * @param values a series of values, which will be converted to {@code double} values (this may 098 * cause loss of precision) 099 */ 100 public static Stats of(Iterable<? extends Number> values) { 101 StatsAccumulator accumulator = new StatsAccumulator(); 102 accumulator.addAll(values); 103 return accumulator.snapshot(); 104 } 105 106 /** 107 * Returns statistics over a dataset containing the given values. The iterator will be completely 108 * consumed by this method. 109 * 110 * @param values a series of values, which will be converted to {@code double} values (this may 111 * cause loss of precision) 112 */ 113 public static Stats of(Iterator<? extends Number> values) { 114 StatsAccumulator accumulator = new StatsAccumulator(); 115 accumulator.addAll(values); 116 return accumulator.snapshot(); 117 } 118 119 /** 120 * Returns statistics over a dataset containing the given values. 121 * 122 * @param values a series of values 123 */ 124 public static Stats of(double... values) { 125 StatsAccumulator acummulator = new StatsAccumulator(); 126 acummulator.addAll(values); 127 return acummulator.snapshot(); 128 } 129 130 /** 131 * Returns statistics over a dataset containing the given values. 132 * 133 * @param values a series of values 134 */ 135 public static Stats of(int... values) { 136 StatsAccumulator acummulator = new StatsAccumulator(); 137 acummulator.addAll(values); 138 return acummulator.snapshot(); 139 } 140 141 /** 142 * Returns statistics over a dataset containing the given values. 143 * 144 * @param values a series of values, which will be converted to {@code double} values (this may 145 * cause loss of precision for longs of magnitude over 2^53 (slightly over 9e15)) 146 */ 147 public static Stats of(long... values) { 148 StatsAccumulator acummulator = new StatsAccumulator(); 149 acummulator.addAll(values); 150 return acummulator.snapshot(); 151 } 152 153 /** Returns the number of values. */ 154 public long count() { 155 return count; 156 } 157 158 /** 159 * Returns the <a href="http://en.wikipedia.org/wiki/Arithmetic_mean">arithmetic mean</a> of the 160 * values. The count must be non-zero. 161 * 162 * <p>If these values are a sample drawn from a population, this is also an unbiased estimator of 163 * the arithmetic mean of the population. 164 * 165 * <h3>Non-finite values</h3> 166 * 167 * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it 168 * contains both {@link Double#POSITIVE_INFINITY} and {@link Double#NEGATIVE_INFINITY} then the 169 * result is {@link Double#NaN}. If it contains {@link Double#POSITIVE_INFINITY} and finite values 170 * only or {@link Double#POSITIVE_INFINITY} only, the result is {@link Double#POSITIVE_INFINITY}. 171 * If it contains {@link Double#NEGATIVE_INFINITY} and finite values only or {@link 172 * Double#NEGATIVE_INFINITY} only, the result is {@link Double#NEGATIVE_INFINITY}. 173 * 174 * <p>If you only want to calculate the mean, use {@link #meanOf} instead of creating a {@link 175 * Stats} instance. 176 * 177 * @throws IllegalStateException if the dataset is empty 178 */ 179 public double mean() { 180 checkState(count != 0); 181 return mean; 182 } 183 184 /** 185 * Returns the sum of the values. 186 * 187 * <h3>Non-finite values</h3> 188 * 189 * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it 190 * contains both {@link Double#POSITIVE_INFINITY} and {@link Double#NEGATIVE_INFINITY} then the 191 * result is {@link Double#NaN}. If it contains {@link Double#POSITIVE_INFINITY} and finite values 192 * only or {@link Double#POSITIVE_INFINITY} only, the result is {@link Double#POSITIVE_INFINITY}. 193 * If it contains {@link Double#NEGATIVE_INFINITY} and finite values only or {@link 194 * Double#NEGATIVE_INFINITY} only, the result is {@link Double#NEGATIVE_INFINITY}. 195 */ 196 public double sum() { 197 return mean * count; 198 } 199 200 /** 201 * Returns the <a href="http://en.wikipedia.org/wiki/Variance#Population_variance">population 202 * variance</a> of the values. The count must be non-zero. 203 * 204 * <p>This is guaranteed to return zero if the dataset contains only exactly one finite value. It 205 * is not guaranteed to return zero when the dataset consists of the same value multiple times, 206 * due to numerical errors. However, it is guaranteed never to return a negative result. 207 * 208 * <h3>Non-finite values</h3> 209 * 210 * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link 211 * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}. 212 * 213 * @throws IllegalStateException if the dataset is empty 214 */ 215 public double populationVariance() { 216 checkState(count > 0); 217 if (isNaN(sumOfSquaresOfDeltas)) { 218 return NaN; 219 } 220 if (count == 1) { 221 return 0.0; 222 } 223 return ensureNonNegative(sumOfSquaresOfDeltas) / count(); 224 } 225 226 /** 227 * Returns the <a 228 * href="http://en.wikipedia.org/wiki/Standard_deviation#Definition_of_population_values"> 229 * population standard deviation</a> of the values. The count must be non-zero. 230 * 231 * <p>This is guaranteed to return zero if the dataset contains only exactly one finite value. It 232 * is not guaranteed to return zero when the dataset consists of the same value multiple times, 233 * due to numerical errors. However, it is guaranteed never to return a negative result. 234 * 235 * <h3>Non-finite values</h3> 236 * 237 * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link 238 * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}. 239 * 240 * @throws IllegalStateException if the dataset is empty 241 */ 242 public double populationStandardDeviation() { 243 return Math.sqrt(populationVariance()); 244 } 245 246 /** 247 * Returns the <a href="http://en.wikipedia.org/wiki/Variance#Sample_variance">unbiased sample 248 * variance</a> of the values. If this dataset is a sample drawn from a population, this is an 249 * unbiased estimator of the population variance of the population. The count must be greater than 250 * one. 251 * 252 * <p>This is not guaranteed to return zero when the dataset consists of the same value multiple 253 * times, due to numerical errors. However, it is guaranteed never to return a negative result. 254 * 255 * <h3>Non-finite values</h3> 256 * 257 * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link 258 * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}. 259 * 260 * @throws IllegalStateException if the dataset is empty or contains a single value 261 */ 262 public double sampleVariance() { 263 checkState(count > 1); 264 if (isNaN(sumOfSquaresOfDeltas)) { 265 return NaN; 266 } 267 return ensureNonNegative(sumOfSquaresOfDeltas) / (count - 1); 268 } 269 270 /** 271 * Returns the <a 272 * href="http://en.wikipedia.org/wiki/Standard_deviation#Corrected_sample_standard_deviation"> 273 * corrected sample standard deviation</a> of the values. If this dataset is a sample drawn from a 274 * population, this is an estimator of the population standard deviation of the population which 275 * is less biased than {@link #populationStandardDeviation()} (the unbiased estimator depends on 276 * the distribution). The count must be greater than one. 277 * 278 * <p>This is not guaranteed to return zero when the dataset consists of the same value multiple 279 * times, due to numerical errors. However, it is guaranteed never to return a negative result. 280 * 281 * <h3>Non-finite values</h3> 282 * 283 * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link 284 * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}. 285 * 286 * @throws IllegalStateException if the dataset is empty or contains a single value 287 */ 288 public double sampleStandardDeviation() { 289 return Math.sqrt(sampleVariance()); 290 } 291 292 /** 293 * Returns the lowest value in the dataset. The count must be non-zero. 294 * 295 * <h3>Non-finite values</h3> 296 * 297 * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it 298 * contains {@link Double#NEGATIVE_INFINITY} and not {@link Double#NaN} then the result is {@link 299 * Double#NEGATIVE_INFINITY}. If it contains {@link Double#POSITIVE_INFINITY} and finite values 300 * only then the result is the lowest finite value. If it contains {@link 301 * Double#POSITIVE_INFINITY} only then the result is {@link Double#POSITIVE_INFINITY}. 302 * 303 * @throws IllegalStateException if the dataset is empty 304 */ 305 public double min() { 306 checkState(count != 0); 307 return min; 308 } 309 310 /** 311 * Returns the highest value in the dataset. The count must be non-zero. 312 * 313 * <h3>Non-finite values</h3> 314 * 315 * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it 316 * contains {@link Double#POSITIVE_INFINITY} and not {@link Double#NaN} then the result is {@link 317 * Double#POSITIVE_INFINITY}. If it contains {@link Double#NEGATIVE_INFINITY} and finite values 318 * only then the result is the highest finite value. If it contains {@link 319 * Double#NEGATIVE_INFINITY} only then the result is {@link Double#NEGATIVE_INFINITY}. 320 * 321 * @throws IllegalStateException if the dataset is empty 322 */ 323 public double max() { 324 checkState(count != 0); 325 return max; 326 } 327 328 /** 329 * {@inheritDoc} 330 * 331 * <p><b>Note:</b> This tests exact equality of the calculated statistics, including the floating 332 * point values. Two instances are guaranteed to be considered equal if one is copied from the 333 * other using {@code second = new StatsAccumulator().addAll(first).snapshot()}, if both were 334 * obtained by calling {@code snapshot()} on the same {@link StatsAccumulator} without adding any 335 * values in between the two calls, or if one is obtained from the other after round-tripping 336 * through java serialization. However, floating point rounding errors mean that it may be false 337 * for some instances where the statistics are mathematically equal, including instances 338 * constructed from the same values in a different order... or (in the general case) even in the 339 * same order. (It is guaranteed to return true for instances constructed from the same values in 340 * the same order if {@code strictfp} is in effect, or if the system architecture guarantees 341 * {@code strictfp}-like semantics.) 342 */ 343 @Override 344 public boolean equals(@CheckForNull Object obj) { 345 if (obj == null) { 346 return false; 347 } 348 if (getClass() != obj.getClass()) { 349 return false; 350 } 351 Stats other = (Stats) obj; 352 return count == other.count 353 && doubleToLongBits(mean) == doubleToLongBits(other.mean) 354 && doubleToLongBits(sumOfSquaresOfDeltas) == doubleToLongBits(other.sumOfSquaresOfDeltas) 355 && doubleToLongBits(min) == doubleToLongBits(other.min) 356 && doubleToLongBits(max) == doubleToLongBits(other.max); 357 } 358 359 /** 360 * {@inheritDoc} 361 * 362 * <p><b>Note:</b> This hash code is consistent with exact equality of the calculated statistics, 363 * including the floating point values. See the note on {@link #equals} for details. 364 */ 365 @Override 366 public int hashCode() { 367 return Objects.hashCode(count, mean, sumOfSquaresOfDeltas, min, max); 368 } 369 370 @Override 371 public String toString() { 372 if (count() > 0) { 373 return MoreObjects.toStringHelper(this) 374 .add("count", count) 375 .add("mean", mean) 376 .add("populationStandardDeviation", populationStandardDeviation()) 377 .add("min", min) 378 .add("max", max) 379 .toString(); 380 } else { 381 return MoreObjects.toStringHelper(this).add("count", count).toString(); 382 } 383 } 384 385 double sumOfSquaresOfDeltas() { 386 return sumOfSquaresOfDeltas; 387 } 388 389 /** 390 * Returns the <a href="http://en.wikipedia.org/wiki/Arithmetic_mean">arithmetic mean</a> of the 391 * values. The count must be non-zero. 392 * 393 * <p>The definition of the mean is the same as {@link Stats#mean}. 394 * 395 * @param values a series of values, which will be converted to {@code double} values (this may 396 * cause loss of precision) 397 * @throws IllegalArgumentException if the dataset is empty 398 */ 399 public static double meanOf(Iterable<? extends Number> values) { 400 return meanOf(values.iterator()); 401 } 402 403 /** 404 * Returns the <a href="http://en.wikipedia.org/wiki/Arithmetic_mean">arithmetic mean</a> of the 405 * values. The count must be non-zero. 406 * 407 * <p>The definition of the mean is the same as {@link Stats#mean}. 408 * 409 * @param values a series of values, which will be converted to {@code double} values (this may 410 * cause loss of precision) 411 * @throws IllegalArgumentException if the dataset is empty 412 */ 413 public static double meanOf(Iterator<? extends Number> values) { 414 checkArgument(values.hasNext()); 415 long count = 1; 416 double mean = values.next().doubleValue(); 417 while (values.hasNext()) { 418 double value = values.next().doubleValue(); 419 count++; 420 if (isFinite(value) && isFinite(mean)) { 421 // Art of Computer Programming vol. 2, Knuth, 4.2.2, (15) 422 mean += (value - mean) / count; 423 } else { 424 mean = calculateNewMeanNonFinite(mean, value); 425 } 426 } 427 return mean; 428 } 429 430 /** 431 * Returns the <a href="http://en.wikipedia.org/wiki/Arithmetic_mean">arithmetic mean</a> of the 432 * values. The count must be non-zero. 433 * 434 * <p>The definition of the mean is the same as {@link Stats#mean}. 435 * 436 * @param values a series of values 437 * @throws IllegalArgumentException if the dataset is empty 438 */ 439 public static double meanOf(double... values) { 440 checkArgument(values.length > 0); 441 double mean = values[0]; 442 for (int index = 1; index < values.length; index++) { 443 double value = values[index]; 444 if (isFinite(value) && isFinite(mean)) { 445 // Art of Computer Programming vol. 2, Knuth, 4.2.2, (15) 446 mean += (value - mean) / (index + 1); 447 } else { 448 mean = calculateNewMeanNonFinite(mean, value); 449 } 450 } 451 return mean; 452 } 453 454 /** 455 * Returns the <a href="http://en.wikipedia.org/wiki/Arithmetic_mean">arithmetic mean</a> of the 456 * values. The count must be non-zero. 457 * 458 * <p>The definition of the mean is the same as {@link Stats#mean}. 459 * 460 * @param values a series of values 461 * @throws IllegalArgumentException if the dataset is empty 462 */ 463 public static double meanOf(int... values) { 464 checkArgument(values.length > 0); 465 double mean = values[0]; 466 for (int index = 1; index < values.length; index++) { 467 double value = values[index]; 468 if (isFinite(value) && isFinite(mean)) { 469 // Art of Computer Programming vol. 2, Knuth, 4.2.2, (15) 470 mean += (value - mean) / (index + 1); 471 } else { 472 mean = calculateNewMeanNonFinite(mean, value); 473 } 474 } 475 return mean; 476 } 477 478 /** 479 * Returns the <a href="http://en.wikipedia.org/wiki/Arithmetic_mean">arithmetic mean</a> of the 480 * values. The count must be non-zero. 481 * 482 * <p>The definition of the mean is the same as {@link Stats#mean}. 483 * 484 * @param values a series of values, which will be converted to {@code double} values (this may 485 * cause loss of precision for longs of magnitude over 2^53 (slightly over 9e15)) 486 * @throws IllegalArgumentException if the dataset is empty 487 */ 488 public static double meanOf(long... values) { 489 checkArgument(values.length > 0); 490 double mean = values[0]; 491 for (int index = 1; index < values.length; index++) { 492 double value = values[index]; 493 if (isFinite(value) && isFinite(mean)) { 494 // Art of Computer Programming vol. 2, Knuth, 4.2.2, (15) 495 mean += (value - mean) / (index + 1); 496 } else { 497 mean = calculateNewMeanNonFinite(mean, value); 498 } 499 } 500 return mean; 501 } 502 503 // Serialization helpers 504 505 /** The size of byte array representation in bytes. */ 506 static final int BYTES = (Long.SIZE + Double.SIZE * 4) / Byte.SIZE; 507 508 /** 509 * Gets a byte array representation of this instance. 510 * 511 * <p><b>Note:</b> No guarantees are made regarding stability of the representation between 512 * versions. 513 */ 514 public byte[] toByteArray() { 515 ByteBuffer buff = ByteBuffer.allocate(BYTES).order(ByteOrder.LITTLE_ENDIAN); 516 writeTo(buff); 517 return buff.array(); 518 } 519 520 /** 521 * Writes to the given {@link ByteBuffer} a byte representation of this instance. 522 * 523 * <p><b>Note:</b> No guarantees are made regarding stability of the representation between 524 * versions. 525 * 526 * @param buffer A {@link ByteBuffer} with at least BYTES {@link ByteBuffer#remaining}, ordered as 527 * {@link ByteOrder#LITTLE_ENDIAN}, to which a BYTES-long byte representation of this instance 528 * is written. In the process increases the position of {@link ByteBuffer} by BYTES. 529 */ 530 void writeTo(ByteBuffer buffer) { 531 checkNotNull(buffer); 532 checkArgument( 533 buffer.remaining() >= BYTES, 534 "Expected at least Stats.BYTES = %s remaining , got %s", 535 BYTES, 536 buffer.remaining()); 537 buffer 538 .putLong(count) 539 .putDouble(mean) 540 .putDouble(sumOfSquaresOfDeltas) 541 .putDouble(min) 542 .putDouble(max); 543 } 544 545 /** 546 * Creates a Stats instance from the given byte representation which was obtained by {@link 547 * #toByteArray}. 548 * 549 * <p><b>Note:</b> No guarantees are made regarding stability of the representation between 550 * versions. 551 */ 552 public static Stats fromByteArray(byte[] byteArray) { 553 checkNotNull(byteArray); 554 checkArgument( 555 byteArray.length == BYTES, 556 "Expected Stats.BYTES = %s remaining , got %s", 557 BYTES, 558 byteArray.length); 559 return readFrom(ByteBuffer.wrap(byteArray).order(ByteOrder.LITTLE_ENDIAN)); 560 } 561 562 /** 563 * Creates a Stats instance from the byte representation read from the given {@link ByteBuffer}. 564 * 565 * <p><b>Note:</b> No guarantees are made regarding stability of the representation between 566 * versions. 567 * 568 * @param buffer A {@link ByteBuffer} with at least BYTES {@link ByteBuffer#remaining}, ordered as 569 * {@link ByteOrder#LITTLE_ENDIAN}, from which a BYTES-long byte representation of this 570 * instance is read. In the process increases the position of {@link ByteBuffer} by BYTES. 571 */ 572 static Stats readFrom(ByteBuffer buffer) { 573 checkNotNull(buffer); 574 checkArgument( 575 buffer.remaining() >= BYTES, 576 "Expected at least Stats.BYTES = %s remaining , got %s", 577 BYTES, 578 buffer.remaining()); 579 return new Stats( 580 buffer.getLong(), 581 buffer.getDouble(), 582 buffer.getDouble(), 583 buffer.getDouble(), 584 buffer.getDouble()); 585 } 586 587 private static final long serialVersionUID = 0; 588}