001/* 002 * Copyright (C) 2012 The Guava Authors 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 005 * in compliance with the License. You may obtain a copy of the License at 006 * 007 * http://www.apache.org/licenses/LICENSE-2.0 008 * 009 * Unless required by applicable law or agreed to in writing, software distributed under the License 010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 011 * or implied. See the License for the specific language governing permissions and limitations under 012 * the License. 013 */ 014 015package com.google.common.math; 016 017import static com.google.common.base.Preconditions.checkArgument; 018import static com.google.common.base.Preconditions.checkNotNull; 019import static com.google.common.base.Preconditions.checkState; 020import static com.google.common.math.DoubleUtils.ensureNonNegative; 021import static com.google.common.math.StatsAccumulator.calculateNewMeanNonFinite; 022import static com.google.common.primitives.Doubles.isFinite; 023import static java.lang.Double.NaN; 024import static java.lang.Double.doubleToLongBits; 025import static java.lang.Double.isNaN; 026 027import com.google.common.annotations.Beta; 028import com.google.common.annotations.GwtIncompatible; 029import com.google.common.base.MoreObjects; 030import com.google.common.base.Objects; 031import java.io.Serializable; 032import java.nio.ByteBuffer; 033import java.nio.ByteOrder; 034import java.util.Iterator; 035import org.checkerframework.checker.nullness.qual.Nullable; 036 037/** 038 * A bundle of statistical summary values -- sum, count, mean/average, min and max, and several 039 * forms of variance -- that were computed from a single set of zero or more floating-point values. 040 * 041 * <p>There are two ways to obtain a {@code Stats} instance: 042 * 043 * <ul> 044 * <li>If all the values you want to summarize are already known, use the appropriate {@code 045 * Stats.of} factory method below. Primitive arrays, iterables and iterators of any kind of 046 * {@code Number}, and primitive varargs are supported. 047 * <li>Or, to avoid storing up all the data first, create a {@link StatsAccumulator} instance, 048 * feed values to it as you get them, then call {@link StatsAccumulator#snapshot}. 049 * </ul> 050 * 051 * <p>Static convenience methods called {@code meanOf} are also provided for users who wish to 052 * calculate <i>only</i> the mean. 053 * 054 * <p><b>Java 8 users:</b> If you are not using any of the variance statistics, you may wish to use 055 * built-in JDK libraries instead of this class. 056 * 057 * @author Pete Gillin 058 * @author Kevin Bourrillion 059 * @since 20.0 060 */ 061@Beta 062@GwtIncompatible 063public final class Stats implements Serializable { 064 065 private final long count; 066 private final double mean; 067 private final double sumOfSquaresOfDeltas; 068 private final double min; 069 private final double max; 070 071 /** 072 * Internal constructor. Users should use {@link #of} or {@link StatsAccumulator#snapshot}. 073 * 074 * <p>To ensure that the created instance obeys its contract, the parameters should satisfy the 075 * following constraints. This is the callers responsibility and is not enforced here. 076 * 077 * <ul> 078 * <li>If {@code count} is 0, {@code mean} may have any finite value (its only usage will be to 079 * get multiplied by 0 to calculate the sum), and the other parameters may have any values 080 * (they will not be used). 081 * <li>If {@code count} is 1, {@code sumOfSquaresOfDeltas} must be exactly 0.0 or {@link 082 * Double#NaN}. 083 * </ul> 084 */ 085 Stats(long count, double mean, double sumOfSquaresOfDeltas, double min, double max) { 086 this.count = count; 087 this.mean = mean; 088 this.sumOfSquaresOfDeltas = sumOfSquaresOfDeltas; 089 this.min = min; 090 this.max = max; 091 } 092 093 /** 094 * Returns statistics over a dataset containing the given values. 095 * 096 * @param values a series of values, which will be converted to {@code double} values (this may 097 * cause loss of precision) 098 */ 099 public static Stats of(Iterable<? extends Number> values) { 100 StatsAccumulator accumulator = new StatsAccumulator(); 101 accumulator.addAll(values); 102 return accumulator.snapshot(); 103 } 104 105 /** 106 * Returns statistics over a dataset containing the given values. 107 * 108 * @param values a series of values, which will be converted to {@code double} values (this may 109 * cause loss of precision) 110 */ 111 public static Stats of(Iterator<? extends Number> values) { 112 StatsAccumulator accumulator = new StatsAccumulator(); 113 accumulator.addAll(values); 114 return accumulator.snapshot(); 115 } 116 117 /** 118 * Returns statistics over a dataset containing the given values. 119 * 120 * @param values a series of values 121 */ 122 public static Stats of(double... values) { 123 StatsAccumulator acummulator = new StatsAccumulator(); 124 acummulator.addAll(values); 125 return acummulator.snapshot(); 126 } 127 128 /** 129 * Returns statistics over a dataset containing the given values. 130 * 131 * @param values a series of values 132 */ 133 public static Stats of(int... values) { 134 StatsAccumulator acummulator = new StatsAccumulator(); 135 acummulator.addAll(values); 136 return acummulator.snapshot(); 137 } 138 139 /** 140 * Returns statistics over a dataset containing the given values. 141 * 142 * @param values a series of values, which will be converted to {@code double} values (this may 143 * cause loss of precision for longs of magnitude over 2^53 (slightly over 9e15)) 144 */ 145 public static Stats of(long... values) { 146 StatsAccumulator acummulator = new StatsAccumulator(); 147 acummulator.addAll(values); 148 return acummulator.snapshot(); 149 } 150 151 /** Returns the number of values. */ 152 public long count() { 153 return count; 154 } 155 156 /** 157 * Returns the <a href="http://en.wikipedia.org/wiki/Arithmetic_mean">arithmetic mean</a> of the 158 * values. The count must be non-zero. 159 * 160 * <p>If these values are a sample drawn from a population, this is also an unbiased estimator of 161 * the arithmetic mean of the population. 162 * 163 * <h3>Non-finite values</h3> 164 * 165 * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it 166 * contains both {@link Double#POSITIVE_INFINITY} and {@link Double#NEGATIVE_INFINITY} then the 167 * result is {@link Double#NaN}. If it contains {@link Double#POSITIVE_INFINITY} and finite values 168 * only or {@link Double#POSITIVE_INFINITY} only, the result is {@link Double#POSITIVE_INFINITY}. 169 * If it contains {@link Double#NEGATIVE_INFINITY} and finite values only or {@link 170 * Double#NEGATIVE_INFINITY} only, the result is {@link Double#NEGATIVE_INFINITY}. 171 * 172 * <p>If you only want to calculate the mean, use {#meanOf} instead of creating a {@link Stats} 173 * instance. 174 * 175 * @throws IllegalStateException if the dataset is empty 176 */ 177 public double mean() { 178 checkState(count != 0); 179 return mean; 180 } 181 182 /** 183 * Returns the sum of the values. 184 * 185 * <h3>Non-finite values</h3> 186 * 187 * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it 188 * contains both {@link Double#POSITIVE_INFINITY} and {@link Double#NEGATIVE_INFINITY} then the 189 * result is {@link Double#NaN}. If it contains {@link Double#POSITIVE_INFINITY} and finite values 190 * only or {@link Double#POSITIVE_INFINITY} only, the result is {@link Double#POSITIVE_INFINITY}. 191 * If it contains {@link Double#NEGATIVE_INFINITY} and finite values only or {@link 192 * Double#NEGATIVE_INFINITY} only, the result is {@link Double#NEGATIVE_INFINITY}. 193 */ 194 public double sum() { 195 return mean * count; 196 } 197 198 /** 199 * Returns the <a href="http://en.wikipedia.org/wiki/Variance#Population_variance">population 200 * variance</a> of the values. The count must be non-zero. 201 * 202 * <p>This is guaranteed to return zero if the dataset contains only exactly one finite value. It 203 * is not guaranteed to return zero when the dataset consists of the same value multiple times, 204 * due to numerical errors. However, it is guaranteed never to return a negative result. 205 * 206 * <h3>Non-finite values</h3> 207 * 208 * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link 209 * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}. 210 * 211 * @throws IllegalStateException if the dataset is empty 212 */ 213 public double populationVariance() { 214 checkState(count > 0); 215 if (isNaN(sumOfSquaresOfDeltas)) { 216 return NaN; 217 } 218 if (count == 1) { 219 return 0.0; 220 } 221 return ensureNonNegative(sumOfSquaresOfDeltas) / count(); 222 } 223 224 /** 225 * Returns the <a 226 * href="http://en.wikipedia.org/wiki/Standard_deviation#Definition_of_population_values"> 227 * population standard deviation</a> of the values. The count must be non-zero. 228 * 229 * <p>This is guaranteed to return zero if the dataset contains only exactly one finite value. It 230 * is not guaranteed to return zero when the dataset consists of the same value multiple times, 231 * due to numerical errors. However, it is guaranteed never to return a negative result. 232 * 233 * <h3>Non-finite values</h3> 234 * 235 * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link 236 * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}. 237 * 238 * @throws IllegalStateException if the dataset is empty 239 */ 240 public double populationStandardDeviation() { 241 return Math.sqrt(populationVariance()); 242 } 243 244 /** 245 * Returns the <a href="http://en.wikipedia.org/wiki/Variance#Sample_variance">unbiased sample 246 * variance</a> of the values. If this dataset is a sample drawn from a population, this is an 247 * unbiased estimator of the population variance of the population. The count must be greater than 248 * one. 249 * 250 * <p>This is not guaranteed to return zero when the dataset consists of the same value multiple 251 * times, due to numerical errors. However, it is guaranteed never to return a negative result. 252 * 253 * <h3>Non-finite values</h3> 254 * 255 * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link 256 * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}. 257 * 258 * @throws IllegalStateException if the dataset is empty or contains a single value 259 */ 260 public double sampleVariance() { 261 checkState(count > 1); 262 if (isNaN(sumOfSquaresOfDeltas)) { 263 return NaN; 264 } 265 return ensureNonNegative(sumOfSquaresOfDeltas) / (count - 1); 266 } 267 268 /** 269 * Returns the <a 270 * href="http://en.wikipedia.org/wiki/Standard_deviation#Corrected_sample_standard_deviation"> 271 * corrected sample standard deviation</a> of the values. If this dataset is a sample drawn from a 272 * population, this is an estimator of the population standard deviation of the population which 273 * is less biased than {@link #populationStandardDeviation()} (the unbiased estimator depends on 274 * the distribution). The count must be greater than one. 275 * 276 * <p>This is not guaranteed to return zero when the dataset consists of the same value multiple 277 * times, due to numerical errors. However, it is guaranteed never to return a negative result. 278 * 279 * <h3>Non-finite values</h3> 280 * 281 * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link 282 * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}. 283 * 284 * @throws IllegalStateException if the dataset is empty or contains a single value 285 */ 286 public double sampleStandardDeviation() { 287 return Math.sqrt(sampleVariance()); 288 } 289 290 /** 291 * Returns the lowest value in the dataset. The count must be non-zero. 292 * 293 * <h3>Non-finite values</h3> 294 * 295 * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it 296 * contains {@link Double#NEGATIVE_INFINITY} and not {@link Double#NaN} then the result is {@link 297 * Double#NEGATIVE_INFINITY}. If it contains {@link Double#POSITIVE_INFINITY} and finite values 298 * only then the result is the lowest finite value. If it contains {@link 299 * Double#POSITIVE_INFINITY} only then the result is {@link Double#POSITIVE_INFINITY}. 300 * 301 * @throws IllegalStateException if the dataset is empty 302 */ 303 public double min() { 304 checkState(count != 0); 305 return min; 306 } 307 308 /** 309 * Returns the highest value in the dataset. The count must be non-zero. 310 * 311 * <h3>Non-finite values</h3> 312 * 313 * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it 314 * contains {@link Double#POSITIVE_INFINITY} and not {@link Double#NaN} then the result is {@link 315 * Double#POSITIVE_INFINITY}. If it contains {@link Double#NEGATIVE_INFINITY} and finite values 316 * only then the result is the highest finite value. If it contains {@link 317 * Double#NEGATIVE_INFINITY} only then the result is {@link Double#NEGATIVE_INFINITY}. 318 * 319 * @throws IllegalStateException if the dataset is empty 320 */ 321 public double max() { 322 checkState(count != 0); 323 return max; 324 } 325 326 /** 327 * {@inheritDoc} 328 * 329 * <p><b>Note:</b> This tests exact equality of the calculated statistics, including the floating 330 * point values. Two instances are guaranteed to be considered equal if one is copied from the 331 * other using {@code second = new StatsAccumulator().addAll(first).snapshot()}, if both were 332 * obtained by calling {@code snapshot()} on the same {@link StatsAccumulator} without adding any 333 * values in between the two calls, or if one is obtained from the other after round-tripping 334 * through java serialization. However, floating point rounding errors mean that it may be false 335 * for some instances where the statistics are mathematically equal, including instances 336 * constructed from the same values in a different order... or (in the general case) even in the 337 * same order. (It is guaranteed to return true for instances constructed from the same values in 338 * the same order if {@code strictfp} is in effect, or if the system architecture guarantees 339 * {@code strictfp}-like semantics.) 340 */ 341 @Override 342 public boolean equals(@Nullable Object obj) { 343 if (obj == null) { 344 return false; 345 } 346 if (getClass() != obj.getClass()) { 347 return false; 348 } 349 Stats other = (Stats) obj; 350 return (count == other.count) 351 && (doubleToLongBits(mean) == doubleToLongBits(other.mean)) 352 && (doubleToLongBits(sumOfSquaresOfDeltas) == doubleToLongBits(other.sumOfSquaresOfDeltas)) 353 && (doubleToLongBits(min) == doubleToLongBits(other.min)) 354 && (doubleToLongBits(max) == doubleToLongBits(other.max)); 355 } 356 357 /** 358 * {@inheritDoc} 359 * 360 * <p><b>Note:</b> This hash code is consistent with exact equality of the calculated statistics, 361 * including the floating point values. See the note on {@link #equals} for details. 362 */ 363 @Override 364 public int hashCode() { 365 return Objects.hashCode(count, mean, sumOfSquaresOfDeltas, min, max); 366 } 367 368 @Override 369 public String toString() { 370 if (count() > 0) { 371 return MoreObjects.toStringHelper(this) 372 .add("count", count) 373 .add("mean", mean) 374 .add("populationStandardDeviation", populationStandardDeviation()) 375 .add("min", min) 376 .add("max", max) 377 .toString(); 378 } else { 379 return MoreObjects.toStringHelper(this).add("count", count).toString(); 380 } 381 } 382 383 double sumOfSquaresOfDeltas() { 384 return sumOfSquaresOfDeltas; 385 } 386 387 /** 388 * Returns the <a href="http://en.wikipedia.org/wiki/Arithmetic_mean">arithmetic mean</a> of the 389 * values. The count must be non-zero. 390 * 391 * <p>The definition of the mean is the same as {@link Stats#mean}. 392 * 393 * @param values a series of values, which will be converted to {@code double} values (this may 394 * cause loss of precision) 395 * @throws IllegalArgumentException if the dataset is empty 396 */ 397 public static double meanOf(Iterable<? extends Number> values) { 398 return meanOf(values.iterator()); 399 } 400 401 /** 402 * Returns the <a href="http://en.wikipedia.org/wiki/Arithmetic_mean">arithmetic mean</a> of the 403 * values. The count must be non-zero. 404 * 405 * <p>The definition of the mean is the same as {@link Stats#mean}. 406 * 407 * @param values a series of values, which will be converted to {@code double} values (this may 408 * cause loss of precision) 409 * @throws IllegalArgumentException if the dataset is empty 410 */ 411 public static double meanOf(Iterator<? extends Number> values) { 412 checkArgument(values.hasNext()); 413 long count = 1; 414 double mean = values.next().doubleValue(); 415 while (values.hasNext()) { 416 double value = values.next().doubleValue(); 417 count++; 418 if (isFinite(value) && isFinite(mean)) { 419 // Art of Computer Programming vol. 2, Knuth, 4.2.2, (15) 420 mean += (value - mean) / count; 421 } else { 422 mean = calculateNewMeanNonFinite(mean, value); 423 } 424 } 425 return mean; 426 } 427 428 /** 429 * Returns the <a href="http://en.wikipedia.org/wiki/Arithmetic_mean">arithmetic mean</a> of the 430 * values. The count must be non-zero. 431 * 432 * <p>The definition of the mean is the same as {@link Stats#mean}. 433 * 434 * @param values a series of values 435 * @throws IllegalArgumentException if the dataset is empty 436 */ 437 public static double meanOf(double... values) { 438 checkArgument(values.length > 0); 439 double mean = values[0]; 440 for (int index = 1; index < values.length; index++) { 441 double value = values[index]; 442 if (isFinite(value) && isFinite(mean)) { 443 // Art of Computer Programming vol. 2, Knuth, 4.2.2, (15) 444 mean += (value - mean) / (index + 1); 445 } else { 446 mean = calculateNewMeanNonFinite(mean, value); 447 } 448 } 449 return mean; 450 } 451 452 /** 453 * Returns the <a href="http://en.wikipedia.org/wiki/Arithmetic_mean">arithmetic mean</a> of the 454 * values. The count must be non-zero. 455 * 456 * <p>The definition of the mean is the same as {@link Stats#mean}. 457 * 458 * @param values a series of values 459 * @throws IllegalArgumentException if the dataset is empty 460 */ 461 public static double meanOf(int... values) { 462 checkArgument(values.length > 0); 463 double mean = values[0]; 464 for (int index = 1; index < values.length; index++) { 465 double value = values[index]; 466 if (isFinite(value) && isFinite(mean)) { 467 // Art of Computer Programming vol. 2, Knuth, 4.2.2, (15) 468 mean += (value - mean) / (index + 1); 469 } else { 470 mean = calculateNewMeanNonFinite(mean, value); 471 } 472 } 473 return mean; 474 } 475 476 /** 477 * Returns the <a href="http://en.wikipedia.org/wiki/Arithmetic_mean">arithmetic mean</a> of the 478 * values. The count must be non-zero. 479 * 480 * <p>The definition of the mean is the same as {@link Stats#mean}. 481 * 482 * @param values a series of values, which will be converted to {@code double} values (this may 483 * cause loss of precision for longs of magnitude over 2^53 (slightly over 9e15)) 484 * @throws IllegalArgumentException if the dataset is empty 485 */ 486 public static double meanOf(long... values) { 487 checkArgument(values.length > 0); 488 double mean = values[0]; 489 for (int index = 1; index < values.length; index++) { 490 double value = values[index]; 491 if (isFinite(value) && isFinite(mean)) { 492 // Art of Computer Programming vol. 2, Knuth, 4.2.2, (15) 493 mean += (value - mean) / (index + 1); 494 } else { 495 mean = calculateNewMeanNonFinite(mean, value); 496 } 497 } 498 return mean; 499 } 500 501 // Serialization helpers 502 503 /** The size of byte array representation in bytes. */ 504 static final int BYTES = (Long.SIZE + Double.SIZE * 4) / Byte.SIZE; 505 506 /** 507 * Gets a byte array representation of this instance. 508 * 509 * <p><b>Note:</b> No guarantees are made regarding stability of the representation between 510 * versions. 511 */ 512 public byte[] toByteArray() { 513 ByteBuffer buff = ByteBuffer.allocate(BYTES).order(ByteOrder.LITTLE_ENDIAN); 514 writeTo(buff); 515 return buff.array(); 516 } 517 518 /** 519 * Writes to the given {@link ByteBuffer} a byte representation of this instance. 520 * 521 * <p><b>Note:</b> No guarantees are made regarding stability of the representation between 522 * versions. 523 * 524 * @param buffer A {@link ByteBuffer} with at least BYTES {@link ByteBuffer#remaining}, ordered as 525 * {@link ByteOrder#LITTLE_ENDIAN}, to which a BYTES-long byte representation of this instance 526 * is written. In the process increases the position of {@link ByteBuffer} by BYTES. 527 */ 528 void writeTo(ByteBuffer buffer) { 529 checkNotNull(buffer); 530 checkArgument( 531 buffer.remaining() >= BYTES, 532 "Expected at least Stats.BYTES = %s remaining , got %s", 533 BYTES, 534 buffer.remaining()); 535 buffer 536 .putLong(count) 537 .putDouble(mean) 538 .putDouble(sumOfSquaresOfDeltas) 539 .putDouble(min) 540 .putDouble(max); 541 } 542 543 /** 544 * Creates a Stats instance from the given byte representation which was obtained by {@link 545 * #toByteArray}. 546 * 547 * <p><b>Note:</b> No guarantees are made regarding stability of the representation between 548 * versions. 549 */ 550 public static Stats fromByteArray(byte[] byteArray) { 551 checkNotNull(byteArray); 552 checkArgument( 553 byteArray.length == BYTES, 554 "Expected Stats.BYTES = %s remaining , got %s", 555 BYTES, 556 byteArray.length); 557 return readFrom(ByteBuffer.wrap(byteArray).order(ByteOrder.LITTLE_ENDIAN)); 558 } 559 560 /** 561 * Creates a Stats instance from the byte representation read from the given {@link ByteBuffer}. 562 * 563 * <p><b>Note:</b> No guarantees are made regarding stability of the representation between 564 * versions. 565 * 566 * @param buffer A {@link ByteBuffer} with at least BYTES {@link ByteBuffer#remaining}, ordered as 567 * {@link ByteOrder#LITTLE_ENDIAN}, from which a BYTES-long byte representation of this 568 * instance is read. In the process increases the position of {@link ByteBuffer} by BYTES. 569 */ 570 static Stats readFrom(ByteBuffer buffer) { 571 checkNotNull(buffer); 572 checkArgument( 573 buffer.remaining() >= BYTES, 574 "Expected at least Stats.BYTES = %s remaining , got %s", 575 BYTES, 576 buffer.remaining()); 577 return new Stats( 578 buffer.getLong(), 579 buffer.getDouble(), 580 buffer.getDouble(), 581 buffer.getDouble(), 582 buffer.getDouble()); 583 } 584 585 private static final long serialVersionUID = 0; 586}