001/* 002 * Copyright (C) 2012 The Guava Authors 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 005 * in compliance with the License. You may obtain a copy of the License at 006 * 007 * http://www.apache.org/licenses/LICENSE-2.0 008 * 009 * Unless required by applicable law or agreed to in writing, software distributed under the License 010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 011 * or implied. See the License for the specific language governing permissions and limitations under 012 * the License. 013 */ 014 015package com.google.common.math; 016 017import static com.google.common.base.Preconditions.checkArgument; 018import static com.google.common.base.Preconditions.checkNotNull; 019import static com.google.common.base.Preconditions.checkState; 020import static com.google.common.math.DoubleUtils.ensureNonNegative; 021import static com.google.common.math.StatsAccumulator.calculateNewMeanNonFinite; 022import static com.google.common.primitives.Doubles.isFinite; 023import static java.lang.Double.NaN; 024import static java.lang.Double.doubleToLongBits; 025import static java.lang.Double.isNaN; 026 027import com.google.common.annotations.Beta; 028import com.google.common.annotations.GwtIncompatible; 029import com.google.common.base.MoreObjects; 030import com.google.common.base.Objects; 031import java.io.Serializable; 032import java.nio.ByteBuffer; 033import java.nio.ByteOrder; 034import java.util.Iterator; 035import javax.annotation.Nullable; 036 037/** 038 * A bundle of statistical summary values -- sum, count, mean/average, min and max, and several 039 * forms of variance -- that were computed from a single set of zero or more floating-point values. 040 * 041 * <p>There are two ways to obtain a {@code Stats} instance: 042 * 043 * <ul> 044 * <li>If all the values you want to summarize are already known, use the appropriate {@code 045 * Stats.of} factory method below. Primitive arrays, iterables and iterators of any kind of 046 * {@code Number}, and primitive varargs are supported. 047 * <li>Or, to avoid storing up all the data first, create a {@link StatsAccumulator} instance, feed 048 * values to it as you get them, then call {@link StatsAccumulator#snapshot}. 049 * </ul> 050 * 051 * <p>Static convenience methods called {@code meanOf} are also provided for users who wish to 052 * calculate <i>only</i> the mean. 053 * 054 * <p><b>Java 8 users:</b> If you are not using any of the variance statistics, you may wish to use 055 * built-in JDK libraries instead of this class. 056 * 057 * @author Pete Gillin 058 * @author Kevin Bourrillion 059 * @since 20.0 060 */ 061@Beta 062@GwtIncompatible 063public final class Stats implements Serializable { 064 065 private final long count; 066 private final double mean; 067 private final double sumOfSquaresOfDeltas; 068 private final double min; 069 private final double max; 070 071 /** 072 * Internal constructor. Users should use {@link #of} or {@link StatsAccumulator#snapshot}. 073 * 074 * <p>To ensure that the created instance obeys its contract, the parameters should satisfy the 075 * following constraints. This is the callers responsibility and is not enforced here. 076 * <ul> 077 * <li>If {@code count} is 0, {@code mean} may have any finite value (its only usage will be to 078 * get multiplied by 0 to calculate the sum), and the other parameters may have any values (they 079 * will not be used). 080 * <li>If {@code count} is 1, {@code sumOfSquaresOfDeltas} must be exactly 0.0 or 081 * {@link Double#NaN}. 082 * </ul> 083 */ 084 Stats(long count, double mean, double sumOfSquaresOfDeltas, double min, double max) { 085 this.count = count; 086 this.mean = mean; 087 this.sumOfSquaresOfDeltas = sumOfSquaresOfDeltas; 088 this.min = min; 089 this.max = max; 090 } 091 092 /** 093 * Returns statistics over a dataset containing the given values. 094 * 095 * @param values a series of values, which will be converted to {@code double} values (this may 096 * cause loss of precision) 097 */ 098 public static Stats of(Iterable<? extends Number> values) { 099 StatsAccumulator accumulator = new StatsAccumulator(); 100 accumulator.addAll(values); 101 return accumulator.snapshot(); 102 } 103 104 /** 105 * Returns statistics over a dataset containing the given values. 106 * 107 * @param values a series of values, which will be converted to {@code double} values (this may 108 * cause loss of precision) 109 */ 110 public static Stats of(Iterator<? extends Number> values) { 111 StatsAccumulator accumulator = new StatsAccumulator(); 112 accumulator.addAll(values); 113 return accumulator.snapshot(); 114 } 115 116 /** 117 * Returns statistics over a dataset containing the given values. 118 * 119 * @param values a series of values 120 */ 121 public static Stats of(double... values) { 122 StatsAccumulator acummulator = new StatsAccumulator(); 123 acummulator.addAll(values); 124 return acummulator.snapshot(); 125 } 126 127 /** 128 * Returns statistics over a dataset containing the given values. 129 * 130 * @param values a series of values 131 */ 132 public static Stats of(int... values) { 133 StatsAccumulator acummulator = new StatsAccumulator(); 134 acummulator.addAll(values); 135 return acummulator.snapshot(); 136 } 137 138 /** 139 * Returns statistics over a dataset containing the given values. 140 * 141 * @param values a series of values, which will be converted to {@code double} values (this may 142 * cause loss of precision for longs of magnitude over 2^53 (slightly over 9e15)) 143 */ 144 public static Stats of(long... values) { 145 StatsAccumulator acummulator = new StatsAccumulator(); 146 acummulator.addAll(values); 147 return acummulator.snapshot(); 148 } 149 150 /** 151 * Returns the number of values. 152 */ 153 public long count() { 154 return count; 155 } 156 157 /** 158 * Returns the <a href="http://en.wikipedia.org/wiki/Arithmetic_mean">arithmetic mean</a> of the 159 * values. The count must be non-zero. 160 * 161 * <p>If these values are a sample drawn from a population, this is also an unbiased estimator of 162 * the arithmetic mean of the population. 163 * 164 * <h3>Non-finite values</h3> 165 * 166 * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it 167 * contains both {@link Double#POSITIVE_INFINITY} and {@link Double#NEGATIVE_INFINITY} then the 168 * result is {@link Double#NaN}. If it contains {@link Double#POSITIVE_INFINITY} and finite values 169 * only or {@link Double#POSITIVE_INFINITY} only, the result is {@link Double#POSITIVE_INFINITY}. 170 * If it contains {@link Double#NEGATIVE_INFINITY} and finite values only or 171 * {@link Double#NEGATIVE_INFINITY} only, the result is {@link Double#NEGATIVE_INFINITY}. 172 * 173 * <p>If you only want to calculate the mean, use {#meanOf} instead of creating a {@link Stats} 174 * instance. 175 * 176 * @throws IllegalStateException if the dataset is empty 177 */ 178 public double mean() { 179 checkState(count != 0); 180 return mean; 181 } 182 183 /** 184 * Returns the sum of the values. 185 * 186 * <h3>Non-finite values</h3> 187 * 188 * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it 189 * contains both {@link Double#POSITIVE_INFINITY} and {@link Double#NEGATIVE_INFINITY} then the 190 * result is {@link Double#NaN}. If it contains {@link Double#POSITIVE_INFINITY} and finite values 191 * only or {@link Double#POSITIVE_INFINITY} only, the result is {@link Double#POSITIVE_INFINITY}. 192 * If it contains {@link Double#NEGATIVE_INFINITY} and finite values only or 193 * {@link Double#NEGATIVE_INFINITY} only, the result is {@link Double#NEGATIVE_INFINITY}. 194 */ 195 public double sum() { 196 return mean * count; 197 } 198 199 /** 200 * Returns the <a href="http://en.wikipedia.org/wiki/Variance#Population_variance">population 201 * variance</a> of the values. The count must be non-zero. 202 * 203 * <p>This is guaranteed to return zero if the dataset contains only exactly one finite value. 204 * It is not guaranteed to return zero when the dataset consists of the same value multiple times, 205 * due to numerical errors. However, it is guaranteed never to return a negative result. 206 * 207 * <h3>Non-finite values</h3> 208 * 209 * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, 210 * {@link Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}. 211 * 212 * @throws IllegalStateException if the dataset is empty 213 */ 214 public double populationVariance() { 215 checkState(count > 0); 216 if (isNaN(sumOfSquaresOfDeltas)) { 217 return NaN; 218 } 219 if (count == 1) { 220 return 0.0; 221 } 222 return ensureNonNegative(sumOfSquaresOfDeltas) / count(); 223 } 224 225 /** 226 * Returns the 227 * <a href="http://en.wikipedia.org/wiki/Standard_deviation#Definition_of_population_values"> 228 * population standard deviation</a> of the values. The count must be non-zero. 229 * 230 * <p>This is guaranteed to return zero if the dataset contains only exactly one finite value. 231 * It is not guaranteed to return zero when the dataset consists of the same value multiple times, 232 * due to numerical errors. However, it is guaranteed never to return a negative result. 233 * 234 * <h3>Non-finite values</h3> 235 * 236 * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, 237 * {@link Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}. 238 * 239 * @throws IllegalStateException if the dataset is empty 240 */ 241 public double populationStandardDeviation() { 242 return Math.sqrt(populationVariance()); 243 } 244 245 /** 246 * Returns the <a href="http://en.wikipedia.org/wiki/Variance#Sample_variance">unbaised sample 247 * variance</a> of the values. If this dataset is a sample drawn from a population, this is an 248 * unbiased estimator of the population variance of the population. The count must be greater than 249 * one. 250 * 251 * <p>This is not guaranteed to return zero when the dataset consists of the same value multiple 252 * times, due to numerical errors. However, it is guaranteed never to return a negative result. 253 * 254 * <h3>Non-finite values</h3> 255 * 256 * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, 257 * {@link Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}. 258 * 259 * @throws IllegalStateException if the dataset is empty or contains a single value 260 */ 261 public double sampleVariance() { 262 checkState(count > 1); 263 if (isNaN(sumOfSquaresOfDeltas)) { 264 return NaN; 265 } 266 return ensureNonNegative(sumOfSquaresOfDeltas) / (count - 1); 267 } 268 269 /** 270 * Returns the 271 * <a href="http://en.wikipedia.org/wiki/Standard_deviation#Corrected_sample_standard_deviation"> 272 * corrected sample standard deviation</a> of the values. If this dataset is a sample drawn from a 273 * population, this is an estimator of the population standard deviation of the population which 274 * is less biased than {@link #populationStandardDeviation()} (the unbiased estimator depends on 275 * the distribution). The count must be greater than one. 276 * 277 * <p>This is not guaranteed to return zero when the dataset consists of the same value multiple 278 * times, due to numerical errors. However, it is guaranteed never to return a negative result. 279 * 280 * <h3>Non-finite values</h3> 281 * 282 * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, 283 * {@link Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}. 284 * 285 * @throws IllegalStateException if the dataset is empty or contains a single value 286 */ 287 public double sampleStandardDeviation() { 288 return Math.sqrt(sampleVariance()); 289 } 290 291 /** 292 * Returns the lowest value in the dataset. The count must be non-zero. 293 * 294 * <h3>Non-finite values</h3> 295 * 296 * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it 297 * contains {@link Double#NEGATIVE_INFINITY} and not {@link Double#NaN} then the result is 298 * {@link Double#NEGATIVE_INFINITY}. If it contains {@link Double#POSITIVE_INFINITY} and finite 299 * values only then the result is the lowest finite value. If it contains 300 * {@link Double#POSITIVE_INFINITY} only then the result is {@link Double#POSITIVE_INFINITY}. 301 * 302 * @throws IllegalStateException if the dataset is empty 303 */ 304 public double min() { 305 checkState(count != 0); 306 return min; 307 } 308 309 /** 310 * Returns the highest value in the dataset. The count must be non-zero. 311 * 312 * <h3>Non-finite values</h3> 313 * 314 * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it 315 * contains {@link Double#POSITIVE_INFINITY} and not {@link Double#NaN} then the result is 316 * {@link Double#POSITIVE_INFINITY}. If it contains {@link Double#NEGATIVE_INFINITY} and finite 317 * values only then the result is the highest finite value. If it contains 318 * {@link Double#NEGATIVE_INFINITY} only then the result is {@link Double#NEGATIVE_INFINITY}. 319 * 320 * @throws IllegalStateException if the dataset is empty 321 */ 322 public double max() { 323 checkState(count != 0); 324 return max; 325 } 326 327 /** 328 * {@inheritDoc} 329 * 330 * <p><b>Note:</b> This tests exact equality of the calculated statistics, including the floating 331 * point values. It is definitely true for instances constructed from exactly the same values in 332 * the same order. It is also true for an instance round-tripped through java serialization. 333 * However, floating point rounding errors mean that it may be false for some instances where the 334 * statistics are mathematically equal, including the same values in a different order. 335 */ 336 @Override 337 public boolean equals(@Nullable Object obj) { 338 if (obj == null) { 339 return false; 340 } 341 if (getClass() != obj.getClass()) { 342 return false; 343 } 344 Stats other = (Stats) obj; 345 return (count == other.count) 346 && (doubleToLongBits(mean) == doubleToLongBits(other.mean)) 347 && (doubleToLongBits(sumOfSquaresOfDeltas) == doubleToLongBits(other.sumOfSquaresOfDeltas)) 348 && (doubleToLongBits(min) == doubleToLongBits(other.min)) 349 && (doubleToLongBits(max) == doubleToLongBits(other.max)); 350 } 351 352 /** 353 * {@inheritDoc} 354 * 355 * <p><b>Note:</b> This hash code is consistent with exact equality of the calculated statistics, 356 * including the floating point values. See the note on {@link #equals} for details. 357 */ 358 @Override 359 public int hashCode() { 360 return Objects.hashCode(count, mean, sumOfSquaresOfDeltas, min, max); 361 } 362 363 @Override 364 public String toString() { 365 if (count() > 0) { 366 return MoreObjects.toStringHelper(this) 367 .add("count", count) 368 .add("mean", mean) 369 .add("populationStandardDeviation", populationStandardDeviation()) 370 .add("min", min) 371 .add("max", max) 372 .toString(); 373 } else { 374 return MoreObjects.toStringHelper(this).add("count", count).toString(); 375 } 376 } 377 378 double sumOfSquaresOfDeltas() { 379 return sumOfSquaresOfDeltas; 380 } 381 382 /** 383 * Returns the <a href="http://en.wikipedia.org/wiki/Arithmetic_mean">arithmetic mean</a> of the 384 * values. The count must be non-zero. 385 * 386 * <p>The definition of the mean is the same as {@link Stats#mean}. 387 * 388 * @param values a series of values, which will be converted to {@code double} values (this may 389 * cause loss of precision) 390 * @throws IllegalArgumentException if the dataset is empty 391 */ 392 public static double meanOf(Iterable<? extends Number> values) { 393 return meanOf(values.iterator()); 394 } 395 396 /** 397 * Returns the <a href="http://en.wikipedia.org/wiki/Arithmetic_mean">arithmetic mean</a> of the 398 * values. The count must be non-zero. 399 * 400 * <p>The definition of the mean is the same as {@link Stats#mean}. 401 * 402 * @param values a series of values, which will be converted to {@code double} values (this may 403 * cause loss of precision) 404 * @throws IllegalArgumentException if the dataset is empty 405 */ 406 public static double meanOf(Iterator<? extends Number> values) { 407 checkArgument(values.hasNext()); 408 long count = 1; 409 double mean = values.next().doubleValue(); 410 while (values.hasNext()) { 411 double value = values.next().doubleValue(); 412 count++; 413 if (isFinite(value) && isFinite(mean)) { 414 // Art of Computer Programming vol. 2, Knuth, 4.2.2, (15) 415 mean += (value - mean) / count; 416 } else { 417 mean = calculateNewMeanNonFinite(mean, value); 418 } 419 } 420 return mean; 421 } 422 423 /** 424 * Returns the <a href="http://en.wikipedia.org/wiki/Arithmetic_mean">arithmetic mean</a> of the 425 * values. The count must be non-zero. 426 * 427 * <p>The definition of the mean is the same as {@link Stats#mean}. 428 * 429 * @param values a series of values 430 * @throws IllegalArgumentException if the dataset is empty 431 */ 432 public static double meanOf(double... values) { 433 checkArgument(values.length > 0); 434 double mean = values[0]; 435 for (int index = 1; index < values.length; index++) { 436 double value = values[index]; 437 if (isFinite(value) && isFinite(mean)) { 438 // Art of Computer Programming vol. 2, Knuth, 4.2.2, (15) 439 mean += (value - mean) / (index + 1); 440 } else { 441 mean = calculateNewMeanNonFinite(mean, value); 442 } 443 } 444 return mean; 445 } 446 447 /** 448 * Returns the <a href="http://en.wikipedia.org/wiki/Arithmetic_mean">arithmetic mean</a> of the 449 * values. The count must be non-zero. 450 * 451 * <p>The definition of the mean is the same as {@link Stats#mean}. 452 * 453 * @param values a series of values 454 * @throws IllegalArgumentException if the dataset is empty 455 */ 456 public static double meanOf(int... values) { 457 checkArgument(values.length > 0); 458 double mean = values[0]; 459 for (int index = 1; index < values.length; index++) { 460 double value = values[index]; 461 if (isFinite(value) && isFinite(mean)) { 462 // Art of Computer Programming vol. 2, Knuth, 4.2.2, (15) 463 mean += (value - mean) / (index + 1); 464 } else { 465 mean = calculateNewMeanNonFinite(mean, value); 466 } 467 } 468 return mean; 469 } 470 471 /** 472 * Returns the <a href="http://en.wikipedia.org/wiki/Arithmetic_mean">arithmetic mean</a> of the 473 * values. The count must be non-zero. 474 * 475 * <p>The definition of the mean is the same as {@link Stats#mean}. 476 * 477 * @param values a series of values, which will be converted to {@code double} values (this may 478 * cause loss of precision for longs of magnitude over 2^53 (slightly over 9e15)) 479 * @throws IllegalArgumentException if the dataset is empty 480 */ 481 public static double meanOf(long... values) { 482 checkArgument(values.length > 0); 483 double mean = values[0]; 484 for (int index = 1; index < values.length; index++) { 485 double value = values[index]; 486 if (isFinite(value) && isFinite(mean)) { 487 // Art of Computer Programming vol. 2, Knuth, 4.2.2, (15) 488 mean += (value - mean) / (index + 1); 489 } else { 490 mean = calculateNewMeanNonFinite(mean, value); 491 } 492 } 493 return mean; 494 } 495 496 // Serialization helpers 497 498 /** 499 * The size of byte array representaion in bytes. 500 */ 501 static final int BYTES = (Long.SIZE + Double.SIZE * 4) / Byte.SIZE; 502 503 /** 504 * Gets a byte array representation of this instance. 505 * 506 * <p><b>Note:</b> No guarantees are made regarding stability of the representation between 507 * versions. 508 */ 509 public byte[] toByteArray() { 510 ByteBuffer buff = ByteBuffer.allocate(BYTES).order(ByteOrder.LITTLE_ENDIAN); 511 writeTo(buff); 512 return buff.array(); 513 } 514 515 /** 516 * Writes to the given {@link ByteBuffer} a byte representation of this instance. 517 * 518 * <p><b>Note:</b> No guarantees are made regarding stability of the representation between 519 * versions. 520 * 521 * @param buffer A {@link ByteBuffer} with at least BYTES {@link ByteBuffer#remaining}, ordered as 522 * {@link ByteOrder#LITTLE_ENDIAN}, to which a BYTES-long byte representation of this instance 523 * is written. In the process increases the position of {@link ByteBuffer} by BYTES. 524 */ 525 void writeTo(ByteBuffer buffer) { 526 checkNotNull(buffer); 527 checkArgument( 528 buffer.remaining() >= BYTES, 529 "Expected at least Stats.BYTES = %s remaining , got %s", 530 BYTES, 531 buffer.remaining()); 532 buffer 533 .putLong(count) 534 .putDouble(mean) 535 .putDouble(sumOfSquaresOfDeltas) 536 .putDouble(min) 537 .putDouble(max); 538 } 539 540 /** 541 * Creates a Stats instance from the given byte representation which was obtained by 542 * {@link #toByteArray}. 543 * 544 * <p><b>Note:</b> No guarantees are made regarding stability of the representation between 545 * versions. 546 */ 547 public static Stats fromByteArray(byte[] byteArray) { 548 checkNotNull(byteArray); 549 checkArgument( 550 byteArray.length == BYTES, 551 "Expected Stats.BYTES = %s remaining , got %s", 552 BYTES, 553 byteArray.length); 554 return readFrom(ByteBuffer.wrap(byteArray).order(ByteOrder.LITTLE_ENDIAN)); 555 } 556 557 /** 558 * Creates a Stats instance from the byte representation read from the given {@link ByteBuffer}. 559 * 560 * <p><b>Note:</b> No guarantees are made regarding stability of the representation between 561 * versions. 562 * 563 * @param buffer A {@link ByteBuffer} with at least BYTES {@link ByteBuffer#remaining}, ordered as 564 * {@link ByteOrder#LITTLE_ENDIAN}, from which a BYTES-long byte representation of this 565 * instance is read. In the process increases the position of {@link ByteBuffer} by BYTES. 566 */ 567 static Stats readFrom(ByteBuffer buffer) { 568 checkNotNull(buffer); 569 checkArgument( 570 buffer.remaining() >= BYTES, 571 "Expected at least Stats.BYTES = %s remaining , got %s", 572 BYTES, 573 buffer.remaining()); 574 return new Stats( 575 buffer.getLong(), 576 buffer.getDouble(), 577 buffer.getDouble(), 578 buffer.getDouble(), 579 buffer.getDouble()); 580 } 581 582 private static final long serialVersionUID = 0; 583}