001/* 002 * Copyright (C) 2012 The Guava Authors 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 005 * in compliance with the License. You may obtain a copy of the License at 006 * 007 * http://www.apache.org/licenses/LICENSE-2.0 008 * 009 * Unless required by applicable law or agreed to in writing, software distributed under the License 010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 011 * or implied. See the License for the specific language governing permissions and limitations under 012 * the License. 013 */ 014 015package com.google.common.math; 016 017import static com.google.common.base.Preconditions.checkArgument; 018import static com.google.common.base.Preconditions.checkNotNull; 019import static com.google.common.base.Preconditions.checkState; 020import static com.google.common.math.DoubleUtils.ensureNonNegative; 021import static com.google.common.math.StatsAccumulator.calculateNewMeanNonFinite; 022import static com.google.common.primitives.Doubles.isFinite; 023import static java.lang.Double.NaN; 024import static java.lang.Double.doubleToLongBits; 025import static java.lang.Double.isNaN; 026 027import com.google.common.annotations.Beta; 028import com.google.common.annotations.GwtIncompatible; 029import com.google.common.base.MoreObjects; 030import com.google.common.base.Objects; 031import java.io.Serializable; 032import java.nio.ByteBuffer; 033import java.nio.ByteOrder; 034import java.util.Iterator; 035import javax.annotation.Nullable; 036 037/** 038 * A bundle of statistical summary values -- sum, count, mean/average, min and max, and several 039 * forms of variance -- that were computed from a single set of zero or more floating-point values. 040 * 041 * <p>There are two ways to obtain a {@code Stats} instance: 042 * 043 * <ul> 044 * <li>If all the values you want to summarize are already known, use the appropriate {@code 045 * Stats.of} factory method below. Primitive arrays, iterables and iterators of any kind of 046 * {@code Number}, and primitive varargs are supported. 047 * <li>Or, to avoid storing up all the data first, create a {@link StatsAccumulator} instance, feed 048 * values to it as you get them, then call {@link StatsAccumulator#snapshot}. 049 * </ul> 050 * 051 * <p>Static convenience methods called {@code meanOf} are also provided for users who wish to 052 * calculate <i>only</i> the mean. 053 * 054 * <p><b>Java 8 users:</b> If you are not using any of the variance statistics, you may wish to use 055 * built-in JDK libraries instead of this class. 056 * 057 * @author Pete Gillin 058 * @author Kevin Bourrillion 059 * @since 20.0 060 */ 061@Beta 062@GwtIncompatible 063public final class Stats implements Serializable { 064 065 private final long count; 066 private final double mean; 067 private final double sumOfSquaresOfDeltas; 068 private final double min; 069 private final double max; 070 071 /** 072 * Internal constructor. Users should use {@link #of} or {@link StatsAccumulator#snapshot}. 073 * 074 * <p>To ensure that the created instance obeys its contract, the parameters should satisfy the 075 * following constraints. This is the callers responsibility and is not enforced here. 076 * <ul> 077 * <li>If {@code count} is 0, {@code mean} may have any finite value (its only usage will be to 078 * get multiplied by 0 to calculate the sum), and the other parameters may have any values (they 079 * will not be used). 080 * <li>If {@code count} is 1, {@code sumOfSquaresOfDeltas} must be exactly 0.0 or 081 * {@link Double#NaN}. 082 * </ul> 083 */ 084 Stats(long count, double mean, double sumOfSquaresOfDeltas, double min, double max) { 085 this.count = count; 086 this.mean = mean; 087 this.sumOfSquaresOfDeltas = sumOfSquaresOfDeltas; 088 this.min = min; 089 this.max = max; 090 } 091 092 /** 093 * Returns statistics over a dataset containing the given values. 094 * 095 * @param values a series of values, which will be converted to {@code double} values (this may 096 * cause loss of precision) 097 */ 098 public static Stats of(Iterable<? extends Number> values) { 099 StatsAccumulator accumulator = new StatsAccumulator(); 100 accumulator.addAll(values); 101 return accumulator.snapshot(); 102 } 103 104 /** 105 * Returns statistics over a dataset containing the given values. 106 * 107 * @param values a series of values, which will be converted to {@code double} values (this may 108 * cause loss of precision) 109 */ 110 public static Stats of(Iterator<? extends Number> values) { 111 StatsAccumulator accumulator = new StatsAccumulator(); 112 accumulator.addAll(values); 113 return accumulator.snapshot(); 114 } 115 116 /** 117 * Returns statistics over a dataset containing the given values. 118 * 119 * @param values a series of values 120 */ 121 public static Stats of(double... values) { 122 StatsAccumulator acummulator = new StatsAccumulator(); 123 acummulator.addAll(values); 124 return acummulator.snapshot(); 125 } 126 127 /** 128 * Returns statistics over a dataset containing the given values. 129 * 130 * @param values a series of values 131 */ 132 public static Stats of(int... values) { 133 StatsAccumulator acummulator = new StatsAccumulator(); 134 acummulator.addAll(values); 135 return acummulator.snapshot(); 136 } 137 138 /** 139 * Returns statistics over a dataset containing the given values. 140 * 141 * @param values a series of values, which will be converted to {@code double} values (this may 142 * cause loss of precision for longs of magnitude over 2^53 (slightly over 9e15)) 143 */ 144 public static Stats of(long... values) { 145 StatsAccumulator acummulator = new StatsAccumulator(); 146 acummulator.addAll(values); 147 return acummulator.snapshot(); 148 } 149 150 /** 151 * Returns the number of values. 152 */ 153 public long count() { 154 return count; 155 } 156 157 /** 158 * Returns the <a href="http://en.wikipedia.org/wiki/Arithmetic_mean">arithmetic mean</a> of the 159 * values. The count must be non-zero. 160 * 161 * <p>If these values are a sample drawn from a population, this is also an unbiased estimator of 162 * the arithmetic mean of the population. 163 * 164 * <h3>Non-finite values</h3> 165 * 166 * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it 167 * contains both {@link Double#POSITIVE_INFINITY} and {@link Double#NEGATIVE_INFINITY} then the 168 * result is {@link Double#NaN}. If it contains {@link Double#POSITIVE_INFINITY} and finite values 169 * only or {@link Double#POSITIVE_INFINITY} only, the result is {@link Double#POSITIVE_INFINITY}. 170 * If it contains {@link Double#NEGATIVE_INFINITY} and finite values only or 171 * {@link Double#NEGATIVE_INFINITY} only, the result is {@link Double#NEGATIVE_INFINITY}. 172 * 173 * <p>If you only want to calculate the mean, use {#meanOf} instead of creating a {@link Stats} 174 * instance. 175 * 176 * @throws IllegalStateException if the dataset is empty 177 */ 178 public double mean() { 179 checkState(count != 0); 180 return mean; 181 } 182 183 /** 184 * Returns the sum of the values. 185 * 186 * <h3>Non-finite values</h3> 187 * 188 * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it 189 * contains both {@link Double#POSITIVE_INFINITY} and {@link Double#NEGATIVE_INFINITY} then the 190 * result is {@link Double#NaN}. If it contains {@link Double#POSITIVE_INFINITY} and finite values 191 * only or {@link Double#POSITIVE_INFINITY} only, the result is {@link Double#POSITIVE_INFINITY}. 192 * If it contains {@link Double#NEGATIVE_INFINITY} and finite values only or 193 * {@link Double#NEGATIVE_INFINITY} only, the result is {@link Double#NEGATIVE_INFINITY}. 194 */ 195 public double sum() { 196 return mean * count; 197 } 198 199 /** 200 * Returns the <a href="http://en.wikipedia.org/wiki/Variance#Population_variance">population 201 * variance</a> of the values. The count must be non-zero. 202 * 203 * <p>This is guaranteed to return zero if the dataset contains only exactly one finite value. 204 * It is not guaranteed to return zero when the dataset consists of the same value multiple times, 205 * due to numerical errors. However, it is guaranteed never to return a negative result. 206 * 207 * <h3>Non-finite values</h3> 208 * 209 * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, 210 * {@link Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}. 211 * 212 * @throws IllegalStateException if the dataset is empty 213 */ 214 public double populationVariance() { 215 checkState(count > 0); 216 if (isNaN(sumOfSquaresOfDeltas)) { 217 return NaN; 218 } 219 if (count == 1) { 220 return 0.0; 221 } 222 return ensureNonNegative(sumOfSquaresOfDeltas) / count(); 223 } 224 225 /** 226 * Returns the 227 * <a href="http://en.wikipedia.org/wiki/Standard_deviation#Definition_of_population_values"> 228 * population standard deviation</a> of the values. The count must be non-zero. 229 * 230 * <p>This is guaranteed to return zero if the dataset contains only exactly one finite value. 231 * It is not guaranteed to return zero when the dataset consists of the same value multiple times, 232 * due to numerical errors. However, it is guaranteed never to return a negative result. 233 * 234 * <h3>Non-finite values</h3> 235 * 236 * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, 237 * {@link Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}. 238 * 239 * @throws IllegalStateException if the dataset is empty 240 */ 241 public double populationStandardDeviation() { 242 return Math.sqrt(populationVariance()); 243 } 244 245 /** 246 * Returns the <a href="http://en.wikipedia.org/wiki/Variance#Sample_variance">unbiased sample 247 * variance</a> of the values. If this dataset is a sample drawn from a population, this is an 248 * unbiased estimator of the population variance of the population. The count must be greater than 249 * one. 250 * 251 * <p>This is not guaranteed to return zero when the dataset consists of the same value multiple 252 * times, due to numerical errors. However, it is guaranteed never to return a negative result. 253 * 254 * <h3>Non-finite values</h3> 255 * 256 * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, 257 * {@link Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}. 258 * 259 * @throws IllegalStateException if the dataset is empty or contains a single value 260 */ 261 public double sampleVariance() { 262 checkState(count > 1); 263 if (isNaN(sumOfSquaresOfDeltas)) { 264 return NaN; 265 } 266 return ensureNonNegative(sumOfSquaresOfDeltas) / (count - 1); 267 } 268 269 /** 270 * Returns the 271 * <a href="http://en.wikipedia.org/wiki/Standard_deviation#Corrected_sample_standard_deviation"> 272 * corrected sample standard deviation</a> of the values. If this dataset is a sample drawn from a 273 * population, this is an estimator of the population standard deviation of the population which 274 * is less biased than {@link #populationStandardDeviation()} (the unbiased estimator depends on 275 * the distribution). The count must be greater than one. 276 * 277 * <p>This is not guaranteed to return zero when the dataset consists of the same value multiple 278 * times, due to numerical errors. However, it is guaranteed never to return a negative result. 279 * 280 * <h3>Non-finite values</h3> 281 * 282 * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, 283 * {@link Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}. 284 * 285 * @throws IllegalStateException if the dataset is empty or contains a single value 286 */ 287 public double sampleStandardDeviation() { 288 return Math.sqrt(sampleVariance()); 289 } 290 291 /** 292 * Returns the lowest value in the dataset. The count must be non-zero. 293 * 294 * <h3>Non-finite values</h3> 295 * 296 * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it 297 * contains {@link Double#NEGATIVE_INFINITY} and not {@link Double#NaN} then the result is 298 * {@link Double#NEGATIVE_INFINITY}. If it contains {@link Double#POSITIVE_INFINITY} and finite 299 * values only then the result is the lowest finite value. If it contains 300 * {@link Double#POSITIVE_INFINITY} only then the result is {@link Double#POSITIVE_INFINITY}. 301 * 302 * @throws IllegalStateException if the dataset is empty 303 */ 304 public double min() { 305 checkState(count != 0); 306 return min; 307 } 308 309 /** 310 * Returns the highest value in the dataset. The count must be non-zero. 311 * 312 * <h3>Non-finite values</h3> 313 * 314 * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it 315 * contains {@link Double#POSITIVE_INFINITY} and not {@link Double#NaN} then the result is 316 * {@link Double#POSITIVE_INFINITY}. If it contains {@link Double#NEGATIVE_INFINITY} and finite 317 * values only then the result is the highest finite value. If it contains 318 * {@link Double#NEGATIVE_INFINITY} only then the result is {@link Double#NEGATIVE_INFINITY}. 319 * 320 * @throws IllegalStateException if the dataset is empty 321 */ 322 public double max() { 323 checkState(count != 0); 324 return max; 325 } 326 327 /** 328 * {@inheritDoc} 329 * 330 * <p><b>Note:</b> This tests exact equality of the calculated statistics, including the floating 331 * point values. Two instances are guaranteed to be considered equal if one is copied from the 332 * other using {@code second = new StatsAccumulator().addAll(first).snapshot()}, if both were 333 * obtained by calling {@code snapshot()} on the same {@link StatsAccumulator} without adding any 334 * values in between the two calls, or if one is obtained from the other after round-tripping 335 * through java serialization. However, floating point rounding errors mean that it may be false 336 * for some instances where the statistics are mathematically equal, including instances 337 * constructed from the same values in a different order... or (in the general case) even in the 338 * same order. (It is guaranteed to return true for instances constructed from the same values in 339 * the same order if {@code strictfp} is in effect, or if the system architecture guarantees 340 * {@code strictfp}-like semantics.) 341 */ 342 @Override 343 public boolean equals(@Nullable Object obj) { 344 if (obj == null) { 345 return false; 346 } 347 if (getClass() != obj.getClass()) { 348 return false; 349 } 350 Stats other = (Stats) obj; 351 return (count == other.count) 352 && (doubleToLongBits(mean) == doubleToLongBits(other.mean)) 353 && (doubleToLongBits(sumOfSquaresOfDeltas) == doubleToLongBits(other.sumOfSquaresOfDeltas)) 354 && (doubleToLongBits(min) == doubleToLongBits(other.min)) 355 && (doubleToLongBits(max) == doubleToLongBits(other.max)); 356 } 357 358 /** 359 * {@inheritDoc} 360 * 361 * <p><b>Note:</b> This hash code is consistent with exact equality of the calculated statistics, 362 * including the floating point values. See the note on {@link #equals} for details. 363 */ 364 @Override 365 public int hashCode() { 366 return Objects.hashCode(count, mean, sumOfSquaresOfDeltas, min, max); 367 } 368 369 @Override 370 public String toString() { 371 if (count() > 0) { 372 return MoreObjects.toStringHelper(this) 373 .add("count", count) 374 .add("mean", mean) 375 .add("populationStandardDeviation", populationStandardDeviation()) 376 .add("min", min) 377 .add("max", max) 378 .toString(); 379 } else { 380 return MoreObjects.toStringHelper(this).add("count", count).toString(); 381 } 382 } 383 384 double sumOfSquaresOfDeltas() { 385 return sumOfSquaresOfDeltas; 386 } 387 388 /** 389 * Returns the <a href="http://en.wikipedia.org/wiki/Arithmetic_mean">arithmetic mean</a> of the 390 * values. The count must be non-zero. 391 * 392 * <p>The definition of the mean is the same as {@link Stats#mean}. 393 * 394 * @param values a series of values, which will be converted to {@code double} values (this may 395 * cause loss of precision) 396 * @throws IllegalArgumentException if the dataset is empty 397 */ 398 public static double meanOf(Iterable<? extends Number> values) { 399 return meanOf(values.iterator()); 400 } 401 402 /** 403 * Returns the <a href="http://en.wikipedia.org/wiki/Arithmetic_mean">arithmetic mean</a> of the 404 * values. The count must be non-zero. 405 * 406 * <p>The definition of the mean is the same as {@link Stats#mean}. 407 * 408 * @param values a series of values, which will be converted to {@code double} values (this may 409 * cause loss of precision) 410 * @throws IllegalArgumentException if the dataset is empty 411 */ 412 public static double meanOf(Iterator<? extends Number> values) { 413 checkArgument(values.hasNext()); 414 long count = 1; 415 double mean = values.next().doubleValue(); 416 while (values.hasNext()) { 417 double value = values.next().doubleValue(); 418 count++; 419 if (isFinite(value) && isFinite(mean)) { 420 // Art of Computer Programming vol. 2, Knuth, 4.2.2, (15) 421 mean += (value - mean) / count; 422 } else { 423 mean = calculateNewMeanNonFinite(mean, value); 424 } 425 } 426 return mean; 427 } 428 429 /** 430 * Returns the <a href="http://en.wikipedia.org/wiki/Arithmetic_mean">arithmetic mean</a> of the 431 * values. The count must be non-zero. 432 * 433 * <p>The definition of the mean is the same as {@link Stats#mean}. 434 * 435 * @param values a series of values 436 * @throws IllegalArgumentException if the dataset is empty 437 */ 438 public static double meanOf(double... values) { 439 checkArgument(values.length > 0); 440 double mean = values[0]; 441 for (int index = 1; index < values.length; index++) { 442 double value = values[index]; 443 if (isFinite(value) && isFinite(mean)) { 444 // Art of Computer Programming vol. 2, Knuth, 4.2.2, (15) 445 mean += (value - mean) / (index + 1); 446 } else { 447 mean = calculateNewMeanNonFinite(mean, value); 448 } 449 } 450 return mean; 451 } 452 453 /** 454 * Returns the <a href="http://en.wikipedia.org/wiki/Arithmetic_mean">arithmetic mean</a> of the 455 * values. The count must be non-zero. 456 * 457 * <p>The definition of the mean is the same as {@link Stats#mean}. 458 * 459 * @param values a series of values 460 * @throws IllegalArgumentException if the dataset is empty 461 */ 462 public static double meanOf(int... values) { 463 checkArgument(values.length > 0); 464 double mean = values[0]; 465 for (int index = 1; index < values.length; index++) { 466 double value = values[index]; 467 if (isFinite(value) && isFinite(mean)) { 468 // Art of Computer Programming vol. 2, Knuth, 4.2.2, (15) 469 mean += (value - mean) / (index + 1); 470 } else { 471 mean = calculateNewMeanNonFinite(mean, value); 472 } 473 } 474 return mean; 475 } 476 477 /** 478 * Returns the <a href="http://en.wikipedia.org/wiki/Arithmetic_mean">arithmetic mean</a> of the 479 * values. The count must be non-zero. 480 * 481 * <p>The definition of the mean is the same as {@link Stats#mean}. 482 * 483 * @param values a series of values, which will be converted to {@code double} values (this may 484 * cause loss of precision for longs of magnitude over 2^53 (slightly over 9e15)) 485 * @throws IllegalArgumentException if the dataset is empty 486 */ 487 public static double meanOf(long... values) { 488 checkArgument(values.length > 0); 489 double mean = values[0]; 490 for (int index = 1; index < values.length; index++) { 491 double value = values[index]; 492 if (isFinite(value) && isFinite(mean)) { 493 // Art of Computer Programming vol. 2, Knuth, 4.2.2, (15) 494 mean += (value - mean) / (index + 1); 495 } else { 496 mean = calculateNewMeanNonFinite(mean, value); 497 } 498 } 499 return mean; 500 } 501 502 // Serialization helpers 503 504 /** 505 * The size of byte array representation in bytes. 506 */ 507 static final int BYTES = (Long.SIZE + Double.SIZE * 4) / Byte.SIZE; 508 509 /** 510 * Gets a byte array representation of this instance. 511 * 512 * <p><b>Note:</b> No guarantees are made regarding stability of the representation between 513 * versions. 514 */ 515 public byte[] toByteArray() { 516 ByteBuffer buff = ByteBuffer.allocate(BYTES).order(ByteOrder.LITTLE_ENDIAN); 517 writeTo(buff); 518 return buff.array(); 519 } 520 521 /** 522 * Writes to the given {@link ByteBuffer} a byte representation of this instance. 523 * 524 * <p><b>Note:</b> No guarantees are made regarding stability of the representation between 525 * versions. 526 * 527 * @param buffer A {@link ByteBuffer} with at least BYTES {@link ByteBuffer#remaining}, ordered as 528 * {@link ByteOrder#LITTLE_ENDIAN}, to which a BYTES-long byte representation of this instance 529 * is written. In the process increases the position of {@link ByteBuffer} by BYTES. 530 */ 531 void writeTo(ByteBuffer buffer) { 532 checkNotNull(buffer); 533 checkArgument( 534 buffer.remaining() >= BYTES, 535 "Expected at least Stats.BYTES = %s remaining , got %s", 536 BYTES, 537 buffer.remaining()); 538 buffer 539 .putLong(count) 540 .putDouble(mean) 541 .putDouble(sumOfSquaresOfDeltas) 542 .putDouble(min) 543 .putDouble(max); 544 } 545 546 /** 547 * Creates a Stats instance from the given byte representation which was obtained by 548 * {@link #toByteArray}. 549 * 550 * <p><b>Note:</b> No guarantees are made regarding stability of the representation between 551 * versions. 552 */ 553 public static Stats fromByteArray(byte[] byteArray) { 554 checkNotNull(byteArray); 555 checkArgument( 556 byteArray.length == BYTES, 557 "Expected Stats.BYTES = %s remaining , got %s", 558 BYTES, 559 byteArray.length); 560 return readFrom(ByteBuffer.wrap(byteArray).order(ByteOrder.LITTLE_ENDIAN)); 561 } 562 563 /** 564 * Creates a Stats instance from the byte representation read from the given {@link ByteBuffer}. 565 * 566 * <p><b>Note:</b> No guarantees are made regarding stability of the representation between 567 * versions. 568 * 569 * @param buffer A {@link ByteBuffer} with at least BYTES {@link ByteBuffer#remaining}, ordered as 570 * {@link ByteOrder#LITTLE_ENDIAN}, from which a BYTES-long byte representation of this 571 * instance is read. In the process increases the position of {@link ByteBuffer} by BYTES. 572 */ 573 static Stats readFrom(ByteBuffer buffer) { 574 checkNotNull(buffer); 575 checkArgument( 576 buffer.remaining() >= BYTES, 577 "Expected at least Stats.BYTES = %s remaining , got %s", 578 BYTES, 579 buffer.remaining()); 580 return new Stats( 581 buffer.getLong(), 582 buffer.getDouble(), 583 buffer.getDouble(), 584 buffer.getDouble(), 585 buffer.getDouble()); 586 } 587 588 private static final long serialVersionUID = 0; 589}