001/* 002 * Copyright (C) 2012 The Guava Authors 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 005 * in compliance with the License. You may obtain a copy of the License at 006 * 007 * http://www.apache.org/licenses/LICENSE-2.0 008 * 009 * Unless required by applicable law or agreed to in writing, software distributed under the License 010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 011 * or implied. See the License for the specific language governing permissions and limitations under 012 * the License. 013 */ 014 015package com.google.common.math; 016 017import static com.google.common.base.Preconditions.checkArgument; 018import static com.google.common.base.Preconditions.checkNotNull; 019import static com.google.common.base.Preconditions.checkState; 020import static com.google.common.math.DoubleUtils.ensureNonNegative; 021import static com.google.common.math.StatsAccumulator.calculateNewMeanNonFinite; 022import static com.google.common.primitives.Doubles.isFinite; 023import static java.lang.Double.NaN; 024import static java.lang.Double.doubleToLongBits; 025import static java.lang.Double.isNaN; 026 027import com.google.common.annotations.Beta; 028import com.google.common.annotations.GwtIncompatible; 029import com.google.common.base.MoreObjects; 030import com.google.common.base.Objects; 031import java.io.Serializable; 032import java.nio.ByteBuffer; 033import java.nio.ByteOrder; 034import java.util.Iterator; 035import org.checkerframework.checker.nullness.compatqual.NullableDecl; 036 037/** 038 * A bundle of statistical summary values -- sum, count, mean/average, min and max, and several 039 * forms of variance -- that were computed from a single set of zero or more floating-point values. 040 * 041 * <p>There are two ways to obtain a {@code Stats} instance: 042 * 043 * <ul> 044 * <li>If all the values you want to summarize are already known, use the appropriate {@code 045 * Stats.of} factory method below. Primitive arrays, iterables and iterators of any kind of 046 * {@code Number}, and primitive varargs are supported. 047 * <li>Or, to avoid storing up all the data first, create a {@link StatsAccumulator} instance, 048 * feed values to it as you get them, then call {@link StatsAccumulator#snapshot}. 049 * </ul> 050 * 051 * <p>Static convenience methods called {@code meanOf} are also provided for users who wish to 052 * calculate <i>only</i> the mean. 053 * 054 * <p><b>Java 8 users:</b> If you are not using any of the variance statistics, you may wish to use 055 * built-in JDK libraries instead of this class. 056 * 057 * @author Pete Gillin 058 * @author Kevin Bourrillion 059 * @since 20.0 060 */ 061@Beta 062@GwtIncompatible 063public final class Stats implements Serializable { 064 065 private final long count; 066 private final double mean; 067 private final double sumOfSquaresOfDeltas; 068 private final double min; 069 private final double max; 070 071 /** 072 * Internal constructor. Users should use {@link #of} or {@link StatsAccumulator#snapshot}. 073 * 074 * <p>To ensure that the created instance obeys its contract, the parameters should satisfy the 075 * following constraints. This is the callers responsibility and is not enforced here. 076 * 077 * <ul> 078 * <li>If {@code count} is 0, {@code mean} may have any finite value (its only usage will be to 079 * get multiplied by 0 to calculate the sum), and the other parameters may have any values 080 * (they will not be used). 081 * <li>If {@code count} is 1, {@code sumOfSquaresOfDeltas} must be exactly 0.0 or {@link 082 * Double#NaN}. 083 * </ul> 084 */ 085 Stats(long count, double mean, double sumOfSquaresOfDeltas, double min, double max) { 086 this.count = count; 087 this.mean = mean; 088 this.sumOfSquaresOfDeltas = sumOfSquaresOfDeltas; 089 this.min = min; 090 this.max = max; 091 } 092 093 /** 094 * Returns statistics over a dataset containing the given values. 095 * 096 * @param values a series of values, which will be converted to {@code double} values (this may 097 * cause loss of precision) 098 */ 099 public static Stats of(Iterable<? extends Number> values) { 100 StatsAccumulator accumulator = new StatsAccumulator(); 101 accumulator.addAll(values); 102 return accumulator.snapshot(); 103 } 104 105 /** 106 * Returns statistics over a dataset containing the given values. The iterator will be completely 107 * consumed by this method. 108 * 109 * @param values a series of values, which will be converted to {@code double} values (this may 110 * cause loss of precision) 111 */ 112 public static Stats of(Iterator<? extends Number> values) { 113 StatsAccumulator accumulator = new StatsAccumulator(); 114 accumulator.addAll(values); 115 return accumulator.snapshot(); 116 } 117 118 /** 119 * Returns statistics over a dataset containing the given values. 120 * 121 * @param values a series of values 122 */ 123 public static Stats of(double... values) { 124 StatsAccumulator acummulator = new StatsAccumulator(); 125 acummulator.addAll(values); 126 return acummulator.snapshot(); 127 } 128 129 /** 130 * Returns statistics over a dataset containing the given values. 131 * 132 * @param values a series of values 133 */ 134 public static Stats of(int... values) { 135 StatsAccumulator acummulator = new StatsAccumulator(); 136 acummulator.addAll(values); 137 return acummulator.snapshot(); 138 } 139 140 /** 141 * Returns statistics over a dataset containing the given values. 142 * 143 * @param values a series of values, which will be converted to {@code double} values (this may 144 * cause loss of precision for longs of magnitude over 2^53 (slightly over 9e15)) 145 */ 146 public static Stats of(long... values) { 147 StatsAccumulator acummulator = new StatsAccumulator(); 148 acummulator.addAll(values); 149 return acummulator.snapshot(); 150 } 151 152 /** Returns the number of values. */ 153 public long count() { 154 return count; 155 } 156 157 /** 158 * Returns the <a href="http://en.wikipedia.org/wiki/Arithmetic_mean">arithmetic mean</a> of the 159 * values. The count must be non-zero. 160 * 161 * <p>If these values are a sample drawn from a population, this is also an unbiased estimator of 162 * the arithmetic mean of the population. 163 * 164 * <h3>Non-finite values</h3> 165 * 166 * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it 167 * contains both {@link Double#POSITIVE_INFINITY} and {@link Double#NEGATIVE_INFINITY} then the 168 * result is {@link Double#NaN}. If it contains {@link Double#POSITIVE_INFINITY} and finite values 169 * only or {@link Double#POSITIVE_INFINITY} only, the result is {@link Double#POSITIVE_INFINITY}. 170 * If it contains {@link Double#NEGATIVE_INFINITY} and finite values only or {@link 171 * Double#NEGATIVE_INFINITY} only, the result is {@link Double#NEGATIVE_INFINITY}. 172 * 173 * <p>If you only want to calculate the mean, use {@link #meanOf} instead of creating a {@link 174 * Stats} instance. 175 * 176 * @throws IllegalStateException if the dataset is empty 177 */ 178 public double mean() { 179 checkState(count != 0); 180 return mean; 181 } 182 183 /** 184 * Returns the sum of the values. 185 * 186 * <h3>Non-finite values</h3> 187 * 188 * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it 189 * contains both {@link Double#POSITIVE_INFINITY} and {@link Double#NEGATIVE_INFINITY} then the 190 * result is {@link Double#NaN}. If it contains {@link Double#POSITIVE_INFINITY} and finite values 191 * only or {@link Double#POSITIVE_INFINITY} only, the result is {@link Double#POSITIVE_INFINITY}. 192 * If it contains {@link Double#NEGATIVE_INFINITY} and finite values only or {@link 193 * Double#NEGATIVE_INFINITY} only, the result is {@link Double#NEGATIVE_INFINITY}. 194 */ 195 public double sum() { 196 return mean * count; 197 } 198 199 /** 200 * Returns the <a href="http://en.wikipedia.org/wiki/Variance#Population_variance">population 201 * variance</a> of the values. The count must be non-zero. 202 * 203 * <p>This is guaranteed to return zero if the dataset contains only exactly one finite value. It 204 * is not guaranteed to return zero when the dataset consists of the same value multiple times, 205 * due to numerical errors. However, it is guaranteed never to return a negative result. 206 * 207 * <h3>Non-finite values</h3> 208 * 209 * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link 210 * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}. 211 * 212 * @throws IllegalStateException if the dataset is empty 213 */ 214 public double populationVariance() { 215 checkState(count > 0); 216 if (isNaN(sumOfSquaresOfDeltas)) { 217 return NaN; 218 } 219 if (count == 1) { 220 return 0.0; 221 } 222 return ensureNonNegative(sumOfSquaresOfDeltas) / count(); 223 } 224 225 /** 226 * Returns the <a 227 * href="http://en.wikipedia.org/wiki/Standard_deviation#Definition_of_population_values"> 228 * population standard deviation</a> of the values. The count must be non-zero. 229 * 230 * <p>This is guaranteed to return zero if the dataset contains only exactly one finite value. It 231 * is not guaranteed to return zero when the dataset consists of the same value multiple times, 232 * due to numerical errors. However, it is guaranteed never to return a negative result. 233 * 234 * <h3>Non-finite values</h3> 235 * 236 * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link 237 * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}. 238 * 239 * @throws IllegalStateException if the dataset is empty 240 */ 241 public double populationStandardDeviation() { 242 return Math.sqrt(populationVariance()); 243 } 244 245 /** 246 * Returns the <a href="http://en.wikipedia.org/wiki/Variance#Sample_variance">unbiased sample 247 * variance</a> of the values. If this dataset is a sample drawn from a population, this is an 248 * unbiased estimator of the population variance of the population. The count must be greater than 249 * one. 250 * 251 * <p>This is not guaranteed to return zero when the dataset consists of the same value multiple 252 * times, due to numerical errors. However, it is guaranteed never to return a negative result. 253 * 254 * <h3>Non-finite values</h3> 255 * 256 * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link 257 * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}. 258 * 259 * @throws IllegalStateException if the dataset is empty or contains a single value 260 */ 261 public double sampleVariance() { 262 checkState(count > 1); 263 if (isNaN(sumOfSquaresOfDeltas)) { 264 return NaN; 265 } 266 return ensureNonNegative(sumOfSquaresOfDeltas) / (count - 1); 267 } 268 269 /** 270 * Returns the <a 271 * href="http://en.wikipedia.org/wiki/Standard_deviation#Corrected_sample_standard_deviation"> 272 * corrected sample standard deviation</a> of the values. If this dataset is a sample drawn from a 273 * population, this is an estimator of the population standard deviation of the population which 274 * is less biased than {@link #populationStandardDeviation()} (the unbiased estimator depends on 275 * the distribution). The count must be greater than one. 276 * 277 * <p>This is not guaranteed to return zero when the dataset consists of the same value multiple 278 * times, due to numerical errors. However, it is guaranteed never to return a negative result. 279 * 280 * <h3>Non-finite values</h3> 281 * 282 * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link 283 * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}. 284 * 285 * @throws IllegalStateException if the dataset is empty or contains a single value 286 */ 287 public double sampleStandardDeviation() { 288 return Math.sqrt(sampleVariance()); 289 } 290 291 /** 292 * Returns the lowest value in the dataset. The count must be non-zero. 293 * 294 * <h3>Non-finite values</h3> 295 * 296 * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it 297 * contains {@link Double#NEGATIVE_INFINITY} and not {@link Double#NaN} then the result is {@link 298 * Double#NEGATIVE_INFINITY}. If it contains {@link Double#POSITIVE_INFINITY} and finite values 299 * only then the result is the lowest finite value. If it contains {@link 300 * Double#POSITIVE_INFINITY} only then the result is {@link Double#POSITIVE_INFINITY}. 301 * 302 * @throws IllegalStateException if the dataset is empty 303 */ 304 public double min() { 305 checkState(count != 0); 306 return min; 307 } 308 309 /** 310 * Returns the highest value in the dataset. The count must be non-zero. 311 * 312 * <h3>Non-finite values</h3> 313 * 314 * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it 315 * contains {@link Double#POSITIVE_INFINITY} and not {@link Double#NaN} then the result is {@link 316 * Double#POSITIVE_INFINITY}. If it contains {@link Double#NEGATIVE_INFINITY} and finite values 317 * only then the result is the highest finite value. If it contains {@link 318 * Double#NEGATIVE_INFINITY} only then the result is {@link Double#NEGATIVE_INFINITY}. 319 * 320 * @throws IllegalStateException if the dataset is empty 321 */ 322 public double max() { 323 checkState(count != 0); 324 return max; 325 } 326 327 /** 328 * {@inheritDoc} 329 * 330 * <p><b>Note:</b> This tests exact equality of the calculated statistics, including the floating 331 * point values. Two instances are guaranteed to be considered equal if one is copied from the 332 * other using {@code second = new StatsAccumulator().addAll(first).snapshot()}, if both were 333 * obtained by calling {@code snapshot()} on the same {@link StatsAccumulator} without adding any 334 * values in between the two calls, or if one is obtained from the other after round-tripping 335 * through java serialization. However, floating point rounding errors mean that it may be false 336 * for some instances where the statistics are mathematically equal, including instances 337 * constructed from the same values in a different order... or (in the general case) even in the 338 * same order. (It is guaranteed to return true for instances constructed from the same values in 339 * the same order if {@code strictfp} is in effect, or if the system architecture guarantees 340 * {@code strictfp}-like semantics.) 341 */ 342 @Override 343 public boolean equals(@NullableDecl Object obj) { 344 if (obj == null) { 345 return false; 346 } 347 if (getClass() != obj.getClass()) { 348 return false; 349 } 350 Stats other = (Stats) obj; 351 return (count == other.count) 352 && (doubleToLongBits(mean) == doubleToLongBits(other.mean)) 353 && (doubleToLongBits(sumOfSquaresOfDeltas) == doubleToLongBits(other.sumOfSquaresOfDeltas)) 354 && (doubleToLongBits(min) == doubleToLongBits(other.min)) 355 && (doubleToLongBits(max) == doubleToLongBits(other.max)); 356 } 357 358 /** 359 * {@inheritDoc} 360 * 361 * <p><b>Note:</b> This hash code is consistent with exact equality of the calculated statistics, 362 * including the floating point values. See the note on {@link #equals} for details. 363 */ 364 @Override 365 public int hashCode() { 366 return Objects.hashCode(count, mean, sumOfSquaresOfDeltas, min, max); 367 } 368 369 @Override 370 public String toString() { 371 if (count() > 0) { 372 return MoreObjects.toStringHelper(this) 373 .add("count", count) 374 .add("mean", mean) 375 .add("populationStandardDeviation", populationStandardDeviation()) 376 .add("min", min) 377 .add("max", max) 378 .toString(); 379 } else { 380 return MoreObjects.toStringHelper(this).add("count", count).toString(); 381 } 382 } 383 384 double sumOfSquaresOfDeltas() { 385 return sumOfSquaresOfDeltas; 386 } 387 388 /** 389 * Returns the <a href="http://en.wikipedia.org/wiki/Arithmetic_mean">arithmetic mean</a> of the 390 * values. The count must be non-zero. 391 * 392 * <p>The definition of the mean is the same as {@link Stats#mean}. 393 * 394 * @param values a series of values, which will be converted to {@code double} values (this may 395 * cause loss of precision) 396 * @throws IllegalArgumentException if the dataset is empty 397 */ 398 public static double meanOf(Iterable<? extends Number> values) { 399 return meanOf(values.iterator()); 400 } 401 402 /** 403 * Returns the <a href="http://en.wikipedia.org/wiki/Arithmetic_mean">arithmetic mean</a> of the 404 * values. The count must be non-zero. 405 * 406 * <p>The definition of the mean is the same as {@link Stats#mean}. 407 * 408 * @param values a series of values, which will be converted to {@code double} values (this may 409 * cause loss of precision) 410 * @throws IllegalArgumentException if the dataset is empty 411 */ 412 public static double meanOf(Iterator<? extends Number> values) { 413 checkArgument(values.hasNext()); 414 long count = 1; 415 double mean = values.next().doubleValue(); 416 while (values.hasNext()) { 417 double value = values.next().doubleValue(); 418 count++; 419 if (isFinite(value) && isFinite(mean)) { 420 // Art of Computer Programming vol. 2, Knuth, 4.2.2, (15) 421 mean += (value - mean) / count; 422 } else { 423 mean = calculateNewMeanNonFinite(mean, value); 424 } 425 } 426 return mean; 427 } 428 429 /** 430 * Returns the <a href="http://en.wikipedia.org/wiki/Arithmetic_mean">arithmetic mean</a> of the 431 * values. The count must be non-zero. 432 * 433 * <p>The definition of the mean is the same as {@link Stats#mean}. 434 * 435 * @param values a series of values 436 * @throws IllegalArgumentException if the dataset is empty 437 */ 438 public static double meanOf(double... values) { 439 checkArgument(values.length > 0); 440 double mean = values[0]; 441 for (int index = 1; index < values.length; index++) { 442 double value = values[index]; 443 if (isFinite(value) && isFinite(mean)) { 444 // Art of Computer Programming vol. 2, Knuth, 4.2.2, (15) 445 mean += (value - mean) / (index + 1); 446 } else { 447 mean = calculateNewMeanNonFinite(mean, value); 448 } 449 } 450 return mean; 451 } 452 453 /** 454 * Returns the <a href="http://en.wikipedia.org/wiki/Arithmetic_mean">arithmetic mean</a> of the 455 * values. The count must be non-zero. 456 * 457 * <p>The definition of the mean is the same as {@link Stats#mean}. 458 * 459 * @param values a series of values 460 * @throws IllegalArgumentException if the dataset is empty 461 */ 462 public static double meanOf(int... values) { 463 checkArgument(values.length > 0); 464 double mean = values[0]; 465 for (int index = 1; index < values.length; index++) { 466 double value = values[index]; 467 if (isFinite(value) && isFinite(mean)) { 468 // Art of Computer Programming vol. 2, Knuth, 4.2.2, (15) 469 mean += (value - mean) / (index + 1); 470 } else { 471 mean = calculateNewMeanNonFinite(mean, value); 472 } 473 } 474 return mean; 475 } 476 477 /** 478 * Returns the <a href="http://en.wikipedia.org/wiki/Arithmetic_mean">arithmetic mean</a> of the 479 * values. The count must be non-zero. 480 * 481 * <p>The definition of the mean is the same as {@link Stats#mean}. 482 * 483 * @param values a series of values, which will be converted to {@code double} values (this may 484 * cause loss of precision for longs of magnitude over 2^53 (slightly over 9e15)) 485 * @throws IllegalArgumentException if the dataset is empty 486 */ 487 public static double meanOf(long... values) { 488 checkArgument(values.length > 0); 489 double mean = values[0]; 490 for (int index = 1; index < values.length; index++) { 491 double value = values[index]; 492 if (isFinite(value) && isFinite(mean)) { 493 // Art of Computer Programming vol. 2, Knuth, 4.2.2, (15) 494 mean += (value - mean) / (index + 1); 495 } else { 496 mean = calculateNewMeanNonFinite(mean, value); 497 } 498 } 499 return mean; 500 } 501 502 // Serialization helpers 503 504 /** The size of byte array representation in bytes. */ 505 static final int BYTES = (Long.SIZE + Double.SIZE * 4) / Byte.SIZE; 506 507 /** 508 * Gets a byte array representation of this instance. 509 * 510 * <p><b>Note:</b> No guarantees are made regarding stability of the representation between 511 * versions. 512 */ 513 public byte[] toByteArray() { 514 ByteBuffer buff = ByteBuffer.allocate(BYTES).order(ByteOrder.LITTLE_ENDIAN); 515 writeTo(buff); 516 return buff.array(); 517 } 518 519 /** 520 * Writes to the given {@link ByteBuffer} a byte representation of this instance. 521 * 522 * <p><b>Note:</b> No guarantees are made regarding stability of the representation between 523 * versions. 524 * 525 * @param buffer A {@link ByteBuffer} with at least BYTES {@link ByteBuffer#remaining}, ordered as 526 * {@link ByteOrder#LITTLE_ENDIAN}, to which a BYTES-long byte representation of this instance 527 * is written. In the process increases the position of {@link ByteBuffer} by BYTES. 528 */ 529 void writeTo(ByteBuffer buffer) { 530 checkNotNull(buffer); 531 checkArgument( 532 buffer.remaining() >= BYTES, 533 "Expected at least Stats.BYTES = %s remaining , got %s", 534 BYTES, 535 buffer.remaining()); 536 buffer 537 .putLong(count) 538 .putDouble(mean) 539 .putDouble(sumOfSquaresOfDeltas) 540 .putDouble(min) 541 .putDouble(max); 542 } 543 544 /** 545 * Creates a Stats instance from the given byte representation which was obtained by {@link 546 * #toByteArray}. 547 * 548 * <p><b>Note:</b> No guarantees are made regarding stability of the representation between 549 * versions. 550 */ 551 public static Stats fromByteArray(byte[] byteArray) { 552 checkNotNull(byteArray); 553 checkArgument( 554 byteArray.length == BYTES, 555 "Expected Stats.BYTES = %s remaining , got %s", 556 BYTES, 557 byteArray.length); 558 return readFrom(ByteBuffer.wrap(byteArray).order(ByteOrder.LITTLE_ENDIAN)); 559 } 560 561 /** 562 * Creates a Stats instance from the byte representation read from the given {@link ByteBuffer}. 563 * 564 * <p><b>Note:</b> No guarantees are made regarding stability of the representation between 565 * versions. 566 * 567 * @param buffer A {@link ByteBuffer} with at least BYTES {@link ByteBuffer#remaining}, ordered as 568 * {@link ByteOrder#LITTLE_ENDIAN}, from which a BYTES-long byte representation of this 569 * instance is read. In the process increases the position of {@link ByteBuffer} by BYTES. 570 */ 571 static Stats readFrom(ByteBuffer buffer) { 572 checkNotNull(buffer); 573 checkArgument( 574 buffer.remaining() >= BYTES, 575 "Expected at least Stats.BYTES = %s remaining , got %s", 576 BYTES, 577 buffer.remaining()); 578 return new Stats( 579 buffer.getLong(), 580 buffer.getDouble(), 581 buffer.getDouble(), 582 buffer.getDouble(), 583 buffer.getDouble()); 584 } 585 586 private static final long serialVersionUID = 0; 587}