001/*
002 * Copyright (C) 2012 The Guava Authors
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
005 * in compliance with the License. You may obtain a copy of the License at
006 *
007 * http://www.apache.org/licenses/LICENSE-2.0
008 *
009 * Unless required by applicable law or agreed to in writing, software distributed under the License
010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
011 * or implied. See the License for the specific language governing permissions and limitations under
012 * the License.
013 */
014
015package com.google.common.math;
016
017import static com.google.common.base.Preconditions.checkArgument;
018import static com.google.common.base.Preconditions.checkNotNull;
019import static com.google.common.base.Preconditions.checkState;
020import static com.google.common.math.DoubleUtils.ensureNonNegative;
021import static com.google.common.math.StatsAccumulator.calculateNewMeanNonFinite;
022import static com.google.common.primitives.Doubles.isFinite;
023import static java.lang.Double.NaN;
024import static java.lang.Double.doubleToLongBits;
025import static java.lang.Double.isNaN;
026
027import com.google.common.annotations.Beta;
028import com.google.common.annotations.GwtIncompatible;
029import com.google.common.base.MoreObjects;
030import com.google.common.base.Objects;
031import java.io.Serializable;
032import java.nio.ByteBuffer;
033import java.nio.ByteOrder;
034import java.util.Iterator;
035import org.checkerframework.checker.nullness.qual.Nullable;
036
037/**
038 * A bundle of statistical summary values -- sum, count, mean/average, min and max, and several
039 * forms of variance -- that were computed from a single set of zero or more floating-point values.
040 *
041 * <p>There are two ways to obtain a {@code Stats} instance:
042 *
043 * <ul>
044 *   <li>If all the values you want to summarize are already known, use the appropriate {@code
045 *       Stats.of} factory method below. Primitive arrays, iterables and iterators of any kind of
046 *       {@code Number}, and primitive varargs are supported.
047 *   <li>Or, to avoid storing up all the data first, create a {@link StatsAccumulator} instance,
048 *       feed values to it as you get them, then call {@link StatsAccumulator#snapshot}.
049 * </ul>
050 *
051 * <p>Static convenience methods called {@code meanOf} are also provided for users who wish to
052 * calculate <i>only</i> the mean.
053 *
054 * <p><b>Java 8 users:</b> If you are not using any of the variance statistics, you may wish to use
055 * built-in JDK libraries instead of this class.
056 *
057 * @author Pete Gillin
058 * @author Kevin Bourrillion
059 * @since 20.0
060 */
061@Beta
062@GwtIncompatible
063public final class Stats implements Serializable {
064
065  private final long count;
066  private final double mean;
067  private final double sumOfSquaresOfDeltas;
068  private final double min;
069  private final double max;
070
071  /**
072   * Internal constructor. Users should use {@link #of} or {@link StatsAccumulator#snapshot}.
073   *
074   * <p>To ensure that the created instance obeys its contract, the parameters should satisfy the
075   * following constraints. This is the callers responsibility and is not enforced here.
076   *
077   * <ul>
078   *   <li>If {@code count} is 0, {@code mean} may have any finite value (its only usage will be to
079   *       get multiplied by 0 to calculate the sum), and the other parameters may have any values
080   *       (they will not be used).
081   *   <li>If {@code count} is 1, {@code sumOfSquaresOfDeltas} must be exactly 0.0 or {@link
082   *       Double#NaN}.
083   * </ul>
084   */
085  Stats(long count, double mean, double sumOfSquaresOfDeltas, double min, double max) {
086    this.count = count;
087    this.mean = mean;
088    this.sumOfSquaresOfDeltas = sumOfSquaresOfDeltas;
089    this.min = min;
090    this.max = max;
091  }
092
093  /**
094   * Returns statistics over a dataset containing the given values.
095   *
096   * @param values a series of values, which will be converted to {@code double} values (this may
097   *     cause loss of precision)
098   */
099  public static Stats of(Iterable<? extends Number> values) {
100    StatsAccumulator accumulator = new StatsAccumulator();
101    accumulator.addAll(values);
102    return accumulator.snapshot();
103  }
104
105  /**
106   * Returns statistics over a dataset containing the given values.
107   *
108   * @param values a series of values, which will be converted to {@code double} values (this may
109   *     cause loss of precision)
110   */
111  public static Stats of(Iterator<? extends Number> values) {
112    StatsAccumulator accumulator = new StatsAccumulator();
113    accumulator.addAll(values);
114    return accumulator.snapshot();
115  }
116
117  /**
118   * Returns statistics over a dataset containing the given values.
119   *
120   * @param values a series of values
121   */
122  public static Stats of(double... values) {
123    StatsAccumulator acummulator = new StatsAccumulator();
124    acummulator.addAll(values);
125    return acummulator.snapshot();
126  }
127
128  /**
129   * Returns statistics over a dataset containing the given values.
130   *
131   * @param values a series of values
132   */
133  public static Stats of(int... values) {
134    StatsAccumulator acummulator = new StatsAccumulator();
135    acummulator.addAll(values);
136    return acummulator.snapshot();
137  }
138
139  /**
140   * Returns statistics over a dataset containing the given values.
141   *
142   * @param values a series of values, which will be converted to {@code double} values (this may
143   *     cause loss of precision for longs of magnitude over 2^53 (slightly over 9e15))
144   */
145  public static Stats of(long... values) {
146    StatsAccumulator acummulator = new StatsAccumulator();
147    acummulator.addAll(values);
148    return acummulator.snapshot();
149  }
150
151  /** Returns the number of values. */
152  public long count() {
153    return count;
154  }
155
156  /**
157   * Returns the <a href="http://en.wikipedia.org/wiki/Arithmetic_mean">arithmetic mean</a> of the
158   * values. The count must be non-zero.
159   *
160   * <p>If these values are a sample drawn from a population, this is also an unbiased estimator of
161   * the arithmetic mean of the population.
162   *
163   * <h3>Non-finite values</h3>
164   *
165   * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it
166   * contains both {@link Double#POSITIVE_INFINITY} and {@link Double#NEGATIVE_INFINITY} then the
167   * result is {@link Double#NaN}. If it contains {@link Double#POSITIVE_INFINITY} and finite values
168   * only or {@link Double#POSITIVE_INFINITY} only, the result is {@link Double#POSITIVE_INFINITY}.
169   * If it contains {@link Double#NEGATIVE_INFINITY} and finite values only or {@link
170   * Double#NEGATIVE_INFINITY} only, the result is {@link Double#NEGATIVE_INFINITY}.
171   *
172   * <p>If you only want to calculate the mean, use {#meanOf} instead of creating a {@link Stats}
173   * instance.
174   *
175   * @throws IllegalStateException if the dataset is empty
176   */
177  public double mean() {
178    checkState(count != 0);
179    return mean;
180  }
181
182  /**
183   * Returns the sum of the values.
184   *
185   * <h3>Non-finite values</h3>
186   *
187   * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it
188   * contains both {@link Double#POSITIVE_INFINITY} and {@link Double#NEGATIVE_INFINITY} then the
189   * result is {@link Double#NaN}. If it contains {@link Double#POSITIVE_INFINITY} and finite values
190   * only or {@link Double#POSITIVE_INFINITY} only, the result is {@link Double#POSITIVE_INFINITY}.
191   * If it contains {@link Double#NEGATIVE_INFINITY} and finite values only or {@link
192   * Double#NEGATIVE_INFINITY} only, the result is {@link Double#NEGATIVE_INFINITY}.
193   */
194  public double sum() {
195    return mean * count;
196  }
197
198  /**
199   * Returns the <a href="http://en.wikipedia.org/wiki/Variance#Population_variance">population
200   * variance</a> of the values. The count must be non-zero.
201   *
202   * <p>This is guaranteed to return zero if the dataset contains only exactly one finite value. It
203   * is not guaranteed to return zero when the dataset consists of the same value multiple times,
204   * due to numerical errors. However, it is guaranteed never to return a negative result.
205   *
206   * <h3>Non-finite values</h3>
207   *
208   * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link
209   * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}.
210   *
211   * @throws IllegalStateException if the dataset is empty
212   */
213  public double populationVariance() {
214    checkState(count > 0);
215    if (isNaN(sumOfSquaresOfDeltas)) {
216      return NaN;
217    }
218    if (count == 1) {
219      return 0.0;
220    }
221    return ensureNonNegative(sumOfSquaresOfDeltas) / count();
222  }
223
224  /**
225   * Returns the <a
226   * href="http://en.wikipedia.org/wiki/Standard_deviation#Definition_of_population_values">
227   * population standard deviation</a> of the values. The count must be non-zero.
228   *
229   * <p>This is guaranteed to return zero if the dataset contains only exactly one finite value. It
230   * is not guaranteed to return zero when the dataset consists of the same value multiple times,
231   * due to numerical errors. However, it is guaranteed never to return a negative result.
232   *
233   * <h3>Non-finite values</h3>
234   *
235   * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link
236   * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}.
237   *
238   * @throws IllegalStateException if the dataset is empty
239   */
240  public double populationStandardDeviation() {
241    return Math.sqrt(populationVariance());
242  }
243
244  /**
245   * Returns the <a href="http://en.wikipedia.org/wiki/Variance#Sample_variance">unbiased sample
246   * variance</a> of the values. If this dataset is a sample drawn from a population, this is an
247   * unbiased estimator of the population variance of the population. The count must be greater than
248   * one.
249   *
250   * <p>This is not guaranteed to return zero when the dataset consists of the same value multiple
251   * times, due to numerical errors. However, it is guaranteed never to return a negative result.
252   *
253   * <h3>Non-finite values</h3>
254   *
255   * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link
256   * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}.
257   *
258   * @throws IllegalStateException if the dataset is empty or contains a single value
259   */
260  public double sampleVariance() {
261    checkState(count > 1);
262    if (isNaN(sumOfSquaresOfDeltas)) {
263      return NaN;
264    }
265    return ensureNonNegative(sumOfSquaresOfDeltas) / (count - 1);
266  }
267
268  /**
269   * Returns the <a
270   * href="http://en.wikipedia.org/wiki/Standard_deviation#Corrected_sample_standard_deviation">
271   * corrected sample standard deviation</a> of the values. If this dataset is a sample drawn from a
272   * population, this is an estimator of the population standard deviation of the population which
273   * is less biased than {@link #populationStandardDeviation()} (the unbiased estimator depends on
274   * the distribution). The count must be greater than one.
275   *
276   * <p>This is not guaranteed to return zero when the dataset consists of the same value multiple
277   * times, due to numerical errors. However, it is guaranteed never to return a negative result.
278   *
279   * <h3>Non-finite values</h3>
280   *
281   * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link
282   * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}.
283   *
284   * @throws IllegalStateException if the dataset is empty or contains a single value
285   */
286  public double sampleStandardDeviation() {
287    return Math.sqrt(sampleVariance());
288  }
289
290  /**
291   * Returns the lowest value in the dataset. The count must be non-zero.
292   *
293   * <h3>Non-finite values</h3>
294   *
295   * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it
296   * contains {@link Double#NEGATIVE_INFINITY} and not {@link Double#NaN} then the result is {@link
297   * Double#NEGATIVE_INFINITY}. If it contains {@link Double#POSITIVE_INFINITY} and finite values
298   * only then the result is the lowest finite value. If it contains {@link
299   * Double#POSITIVE_INFINITY} only then the result is {@link Double#POSITIVE_INFINITY}.
300   *
301   * @throws IllegalStateException if the dataset is empty
302   */
303  public double min() {
304    checkState(count != 0);
305    return min;
306  }
307
308  /**
309   * Returns the highest value in the dataset. The count must be non-zero.
310   *
311   * <h3>Non-finite values</h3>
312   *
313   * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it
314   * contains {@link Double#POSITIVE_INFINITY} and not {@link Double#NaN} then the result is {@link
315   * Double#POSITIVE_INFINITY}. If it contains {@link Double#NEGATIVE_INFINITY} and finite values
316   * only then the result is the highest finite value. If it contains {@link
317   * Double#NEGATIVE_INFINITY} only then the result is {@link Double#NEGATIVE_INFINITY}.
318   *
319   * @throws IllegalStateException if the dataset is empty
320   */
321  public double max() {
322    checkState(count != 0);
323    return max;
324  }
325
326  /**
327   * {@inheritDoc}
328   *
329   * <p><b>Note:</b> This tests exact equality of the calculated statistics, including the floating
330   * point values. Two instances are guaranteed to be considered equal if one is copied from the
331   * other using {@code second = new StatsAccumulator().addAll(first).snapshot()}, if both were
332   * obtained by calling {@code snapshot()} on the same {@link StatsAccumulator} without adding any
333   * values in between the two calls, or if one is obtained from the other after round-tripping
334   * through java serialization. However, floating point rounding errors mean that it may be false
335   * for some instances where the statistics are mathematically equal, including instances
336   * constructed from the same values in a different order... or (in the general case) even in the
337   * same order. (It is guaranteed to return true for instances constructed from the same values in
338   * the same order if {@code strictfp} is in effect, or if the system architecture guarantees
339   * {@code strictfp}-like semantics.)
340   */
341  @Override
342  public boolean equals(@Nullable Object obj) {
343    if (obj == null) {
344      return false;
345    }
346    if (getClass() != obj.getClass()) {
347      return false;
348    }
349    Stats other = (Stats) obj;
350    return (count == other.count)
351        && (doubleToLongBits(mean) == doubleToLongBits(other.mean))
352        && (doubleToLongBits(sumOfSquaresOfDeltas) == doubleToLongBits(other.sumOfSquaresOfDeltas))
353        && (doubleToLongBits(min) == doubleToLongBits(other.min))
354        && (doubleToLongBits(max) == doubleToLongBits(other.max));
355  }
356
357  /**
358   * {@inheritDoc}
359   *
360   * <p><b>Note:</b> This hash code is consistent with exact equality of the calculated statistics,
361   * including the floating point values. See the note on {@link #equals} for details.
362   */
363  @Override
364  public int hashCode() {
365    return Objects.hashCode(count, mean, sumOfSquaresOfDeltas, min, max);
366  }
367
368  @Override
369  public String toString() {
370    if (count() > 0) {
371      return MoreObjects.toStringHelper(this)
372          .add("count", count)
373          .add("mean", mean)
374          .add("populationStandardDeviation", populationStandardDeviation())
375          .add("min", min)
376          .add("max", max)
377          .toString();
378    } else {
379      return MoreObjects.toStringHelper(this).add("count", count).toString();
380    }
381  }
382
383  double sumOfSquaresOfDeltas() {
384    return sumOfSquaresOfDeltas;
385  }
386
387  /**
388   * Returns the <a href="http://en.wikipedia.org/wiki/Arithmetic_mean">arithmetic mean</a> of the
389   * values. The count must be non-zero.
390   *
391   * <p>The definition of the mean is the same as {@link Stats#mean}.
392   *
393   * @param values a series of values, which will be converted to {@code double} values (this may
394   *     cause loss of precision)
395   * @throws IllegalArgumentException if the dataset is empty
396   */
397  public static double meanOf(Iterable<? extends Number> values) {
398    return meanOf(values.iterator());
399  }
400
401  /**
402   * Returns the <a href="http://en.wikipedia.org/wiki/Arithmetic_mean">arithmetic mean</a> of the
403   * values. The count must be non-zero.
404   *
405   * <p>The definition of the mean is the same as {@link Stats#mean}.
406   *
407   * @param values a series of values, which will be converted to {@code double} values (this may
408   *     cause loss of precision)
409   * @throws IllegalArgumentException if the dataset is empty
410   */
411  public static double meanOf(Iterator<? extends Number> values) {
412    checkArgument(values.hasNext());
413    long count = 1;
414    double mean = values.next().doubleValue();
415    while (values.hasNext()) {
416      double value = values.next().doubleValue();
417      count++;
418      if (isFinite(value) && isFinite(mean)) {
419        // Art of Computer Programming vol. 2, Knuth, 4.2.2, (15)
420        mean += (value - mean) / count;
421      } else {
422        mean = calculateNewMeanNonFinite(mean, value);
423      }
424    }
425    return mean;
426  }
427
428  /**
429   * Returns the <a href="http://en.wikipedia.org/wiki/Arithmetic_mean">arithmetic mean</a> of the
430   * values. The count must be non-zero.
431   *
432   * <p>The definition of the mean is the same as {@link Stats#mean}.
433   *
434   * @param values a series of values
435   * @throws IllegalArgumentException if the dataset is empty
436   */
437  public static double meanOf(double... values) {
438    checkArgument(values.length > 0);
439    double mean = values[0];
440    for (int index = 1; index < values.length; index++) {
441      double value = values[index];
442      if (isFinite(value) && isFinite(mean)) {
443        // Art of Computer Programming vol. 2, Knuth, 4.2.2, (15)
444        mean += (value - mean) / (index + 1);
445      } else {
446        mean = calculateNewMeanNonFinite(mean, value);
447      }
448    }
449    return mean;
450  }
451
452  /**
453   * Returns the <a href="http://en.wikipedia.org/wiki/Arithmetic_mean">arithmetic mean</a> of the
454   * values. The count must be non-zero.
455   *
456   * <p>The definition of the mean is the same as {@link Stats#mean}.
457   *
458   * @param values a series of values
459   * @throws IllegalArgumentException if the dataset is empty
460   */
461  public static double meanOf(int... values) {
462    checkArgument(values.length > 0);
463    double mean = values[0];
464    for (int index = 1; index < values.length; index++) {
465      double value = values[index];
466      if (isFinite(value) && isFinite(mean)) {
467        // Art of Computer Programming vol. 2, Knuth, 4.2.2, (15)
468        mean += (value - mean) / (index + 1);
469      } else {
470        mean = calculateNewMeanNonFinite(mean, value);
471      }
472    }
473    return mean;
474  }
475
476  /**
477   * Returns the <a href="http://en.wikipedia.org/wiki/Arithmetic_mean">arithmetic mean</a> of the
478   * values. The count must be non-zero.
479   *
480   * <p>The definition of the mean is the same as {@link Stats#mean}.
481   *
482   * @param values a series of values, which will be converted to {@code double} values (this may
483   *     cause loss of precision for longs of magnitude over 2^53 (slightly over 9e15))
484   * @throws IllegalArgumentException if the dataset is empty
485   */
486  public static double meanOf(long... values) {
487    checkArgument(values.length > 0);
488    double mean = values[0];
489    for (int index = 1; index < values.length; index++) {
490      double value = values[index];
491      if (isFinite(value) && isFinite(mean)) {
492        // Art of Computer Programming vol. 2, Knuth, 4.2.2, (15)
493        mean += (value - mean) / (index + 1);
494      } else {
495        mean = calculateNewMeanNonFinite(mean, value);
496      }
497    }
498    return mean;
499  }
500
501  // Serialization helpers
502
503  /** The size of byte array representation in bytes. */
504  static final int BYTES = (Long.SIZE + Double.SIZE * 4) / Byte.SIZE;
505
506  /**
507   * Gets a byte array representation of this instance.
508   *
509   * <p><b>Note:</b> No guarantees are made regarding stability of the representation between
510   * versions.
511   */
512  public byte[] toByteArray() {
513    ByteBuffer buff = ByteBuffer.allocate(BYTES).order(ByteOrder.LITTLE_ENDIAN);
514    writeTo(buff);
515    return buff.array();
516  }
517
518  /**
519   * Writes to the given {@link ByteBuffer} a byte representation of this instance.
520   *
521   * <p><b>Note:</b> No guarantees are made regarding stability of the representation between
522   * versions.
523   *
524   * @param buffer A {@link ByteBuffer} with at least BYTES {@link ByteBuffer#remaining}, ordered as
525   *     {@link ByteOrder#LITTLE_ENDIAN}, to which a BYTES-long byte representation of this instance
526   *     is written. In the process increases the position of {@link ByteBuffer} by BYTES.
527   */
528  void writeTo(ByteBuffer buffer) {
529    checkNotNull(buffer);
530    checkArgument(
531        buffer.remaining() >= BYTES,
532        "Expected at least Stats.BYTES = %s remaining , got %s",
533        BYTES,
534        buffer.remaining());
535    buffer
536        .putLong(count)
537        .putDouble(mean)
538        .putDouble(sumOfSquaresOfDeltas)
539        .putDouble(min)
540        .putDouble(max);
541  }
542
543  /**
544   * Creates a Stats instance from the given byte representation which was obtained by {@link
545   * #toByteArray}.
546   *
547   * <p><b>Note:</b> No guarantees are made regarding stability of the representation between
548   * versions.
549   */
550  public static Stats fromByteArray(byte[] byteArray) {
551    checkNotNull(byteArray);
552    checkArgument(
553        byteArray.length == BYTES,
554        "Expected Stats.BYTES = %s remaining , got %s",
555        BYTES,
556        byteArray.length);
557    return readFrom(ByteBuffer.wrap(byteArray).order(ByteOrder.LITTLE_ENDIAN));
558  }
559
560  /**
561   * Creates a Stats instance from the byte representation read from the given {@link ByteBuffer}.
562   *
563   * <p><b>Note:</b> No guarantees are made regarding stability of the representation between
564   * versions.
565   *
566   * @param buffer A {@link ByteBuffer} with at least BYTES {@link ByteBuffer#remaining}, ordered as
567   *     {@link ByteOrder#LITTLE_ENDIAN}, from which a BYTES-long byte representation of this
568   *     instance is read. In the process increases the position of {@link ByteBuffer} by BYTES.
569   */
570  static Stats readFrom(ByteBuffer buffer) {
571    checkNotNull(buffer);
572    checkArgument(
573        buffer.remaining() >= BYTES,
574        "Expected at least Stats.BYTES = %s remaining , got %s",
575        BYTES,
576        buffer.remaining());
577    return new Stats(
578        buffer.getLong(),
579        buffer.getDouble(),
580        buffer.getDouble(),
581        buffer.getDouble(),
582        buffer.getDouble());
583  }
584
585  private static final long serialVersionUID = 0;
586}