001/*
002 * Copyright (C) 2012 The Guava Authors
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
005 * in compliance with the License. You may obtain a copy of the License at
006 *
007 * http://www.apache.org/licenses/LICENSE-2.0
008 *
009 * Unless required by applicable law or agreed to in writing, software distributed under the License
010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
011 * or implied. See the License for the specific language governing permissions and limitations under
012 * the License.
013 */
014
015package com.google.common.math;
016
017import static com.google.common.base.Preconditions.checkArgument;
018import static com.google.common.base.Preconditions.checkNotNull;
019import static com.google.common.base.Preconditions.checkState;
020import static com.google.common.math.DoubleUtils.ensureNonNegative;
021import static com.google.common.math.StatsAccumulator.calculateNewMeanNonFinite;
022import static com.google.common.primitives.Doubles.isFinite;
023import static java.lang.Double.NaN;
024import static java.lang.Double.doubleToLongBits;
025import static java.lang.Double.isNaN;
026
027import com.google.common.annotations.Beta;
028import com.google.common.annotations.GwtIncompatible;
029import com.google.common.base.MoreObjects;
030import com.google.common.base.Objects;
031import java.io.Serializable;
032import java.nio.ByteBuffer;
033import java.nio.ByteOrder;
034import java.util.Iterator;
035import org.checkerframework.checker.nullness.compatqual.NullableDecl;
036
037/**
038 * A bundle of statistical summary values -- sum, count, mean/average, min and max, and several
039 * forms of variance -- that were computed from a single set of zero or more floating-point values.
040 *
041 * <p>There are two ways to obtain a {@code Stats} instance:
042 *
043 * <ul>
044 *   <li>If all the values you want to summarize are already known, use the appropriate {@code
045 *       Stats.of} factory method below. Primitive arrays, iterables and iterators of any kind of
046 *       {@code Number}, and primitive varargs are supported.
047 *   <li>Or, to avoid storing up all the data first, create a {@link StatsAccumulator} instance,
048 *       feed values to it as you get them, then call {@link StatsAccumulator#snapshot}.
049 * </ul>
050 *
051 * <p>Static convenience methods called {@code meanOf} are also provided for users who wish to
052 * calculate <i>only</i> the mean.
053 *
054 * <p><b>Java 8 users:</b> If you are not using any of the variance statistics, you may wish to use
055 * built-in JDK libraries instead of this class.
056 *
057 * @author Pete Gillin
058 * @author Kevin Bourrillion
059 * @since 20.0
060 */
061@Beta
062@GwtIncompatible
063public final class Stats implements Serializable {
064
065  private final long count;
066  private final double mean;
067  private final double sumOfSquaresOfDeltas;
068  private final double min;
069  private final double max;
070
071  /**
072   * Internal constructor. Users should use {@link #of} or {@link StatsAccumulator#snapshot}.
073   *
074   * <p>To ensure that the created instance obeys its contract, the parameters should satisfy the
075   * following constraints. This is the callers responsibility and is not enforced here.
076   *
077   * <ul>
078   *   <li>If {@code count} is 0, {@code mean} may have any finite value (its only usage will be to
079   *       get multiplied by 0 to calculate the sum), and the other parameters may have any values
080   *       (they will not be used).
081   *   <li>If {@code count} is 1, {@code sumOfSquaresOfDeltas} must be exactly 0.0 or {@link
082   *       Double#NaN}.
083   * </ul>
084   */
085  Stats(long count, double mean, double sumOfSquaresOfDeltas, double min, double max) {
086    this.count = count;
087    this.mean = mean;
088    this.sumOfSquaresOfDeltas = sumOfSquaresOfDeltas;
089    this.min = min;
090    this.max = max;
091  }
092
093  /**
094   * Returns statistics over a dataset containing the given values.
095   *
096   * @param values a series of values, which will be converted to {@code double} values (this may
097   *     cause loss of precision)
098   */
099  public static Stats of(Iterable<? extends Number> values) {
100    StatsAccumulator accumulator = new StatsAccumulator();
101    accumulator.addAll(values);
102    return accumulator.snapshot();
103  }
104
105  /**
106   * Returns statistics over a dataset containing the given values. The iterator will be completely
107   * consumed by this method.
108   *
109   * @param values a series of values, which will be converted to {@code double} values (this may
110   *     cause loss of precision)
111   */
112  public static Stats of(Iterator<? extends Number> values) {
113    StatsAccumulator accumulator = new StatsAccumulator();
114    accumulator.addAll(values);
115    return accumulator.snapshot();
116  }
117
118  /**
119   * Returns statistics over a dataset containing the given values.
120   *
121   * @param values a series of values
122   */
123  public static Stats of(double... values) {
124    StatsAccumulator acummulator = new StatsAccumulator();
125    acummulator.addAll(values);
126    return acummulator.snapshot();
127  }
128
129  /**
130   * Returns statistics over a dataset containing the given values.
131   *
132   * @param values a series of values
133   */
134  public static Stats of(int... values) {
135    StatsAccumulator acummulator = new StatsAccumulator();
136    acummulator.addAll(values);
137    return acummulator.snapshot();
138  }
139
140  /**
141   * Returns statistics over a dataset containing the given values.
142   *
143   * @param values a series of values, which will be converted to {@code double} values (this may
144   *     cause loss of precision for longs of magnitude over 2^53 (slightly over 9e15))
145   */
146  public static Stats of(long... values) {
147    StatsAccumulator acummulator = new StatsAccumulator();
148    acummulator.addAll(values);
149    return acummulator.snapshot();
150  }
151
152  /** Returns the number of values. */
153  public long count() {
154    return count;
155  }
156
157  /**
158   * Returns the <a href="http://en.wikipedia.org/wiki/Arithmetic_mean">arithmetic mean</a> of the
159   * values. The count must be non-zero.
160   *
161   * <p>If these values are a sample drawn from a population, this is also an unbiased estimator of
162   * the arithmetic mean of the population.
163   *
164   * <h3>Non-finite values</h3>
165   *
166   * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it
167   * contains both {@link Double#POSITIVE_INFINITY} and {@link Double#NEGATIVE_INFINITY} then the
168   * result is {@link Double#NaN}. If it contains {@link Double#POSITIVE_INFINITY} and finite values
169   * only or {@link Double#POSITIVE_INFINITY} only, the result is {@link Double#POSITIVE_INFINITY}.
170   * If it contains {@link Double#NEGATIVE_INFINITY} and finite values only or {@link
171   * Double#NEGATIVE_INFINITY} only, the result is {@link Double#NEGATIVE_INFINITY}.
172   *
173   * <p>If you only want to calculate the mean, use {#meanOf} instead of creating a {@link Stats}
174   * instance.
175   *
176   * @throws IllegalStateException if the dataset is empty
177   */
178  public double mean() {
179    checkState(count != 0);
180    return mean;
181  }
182
183  /**
184   * Returns the sum of the values.
185   *
186   * <h3>Non-finite values</h3>
187   *
188   * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it
189   * contains both {@link Double#POSITIVE_INFINITY} and {@link Double#NEGATIVE_INFINITY} then the
190   * result is {@link Double#NaN}. If it contains {@link Double#POSITIVE_INFINITY} and finite values
191   * only or {@link Double#POSITIVE_INFINITY} only, the result is {@link Double#POSITIVE_INFINITY}.
192   * If it contains {@link Double#NEGATIVE_INFINITY} and finite values only or {@link
193   * Double#NEGATIVE_INFINITY} only, the result is {@link Double#NEGATIVE_INFINITY}.
194   */
195  public double sum() {
196    return mean * count;
197  }
198
199  /**
200   * Returns the <a href="http://en.wikipedia.org/wiki/Variance#Population_variance">population
201   * variance</a> of the values. The count must be non-zero.
202   *
203   * <p>This is guaranteed to return zero if the dataset contains only exactly one finite value. It
204   * is not guaranteed to return zero when the dataset consists of the same value multiple times,
205   * due to numerical errors. However, it is guaranteed never to return a negative result.
206   *
207   * <h3>Non-finite values</h3>
208   *
209   * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link
210   * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}.
211   *
212   * @throws IllegalStateException if the dataset is empty
213   */
214  public double populationVariance() {
215    checkState(count > 0);
216    if (isNaN(sumOfSquaresOfDeltas)) {
217      return NaN;
218    }
219    if (count == 1) {
220      return 0.0;
221    }
222    return ensureNonNegative(sumOfSquaresOfDeltas) / count();
223  }
224
225  /**
226   * Returns the <a
227   * href="http://en.wikipedia.org/wiki/Standard_deviation#Definition_of_population_values">
228   * population standard deviation</a> of the values. The count must be non-zero.
229   *
230   * <p>This is guaranteed to return zero if the dataset contains only exactly one finite value. It
231   * is not guaranteed to return zero when the dataset consists of the same value multiple times,
232   * due to numerical errors. However, it is guaranteed never to return a negative result.
233   *
234   * <h3>Non-finite values</h3>
235   *
236   * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link
237   * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}.
238   *
239   * @throws IllegalStateException if the dataset is empty
240   */
241  public double populationStandardDeviation() {
242    return Math.sqrt(populationVariance());
243  }
244
245  /**
246   * Returns the <a href="http://en.wikipedia.org/wiki/Variance#Sample_variance">unbiased sample
247   * variance</a> of the values. If this dataset is a sample drawn from a population, this is an
248   * unbiased estimator of the population variance of the population. The count must be greater than
249   * one.
250   *
251   * <p>This is not guaranteed to return zero when the dataset consists of the same value multiple
252   * times, due to numerical errors. However, it is guaranteed never to return a negative result.
253   *
254   * <h3>Non-finite values</h3>
255   *
256   * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link
257   * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}.
258   *
259   * @throws IllegalStateException if the dataset is empty or contains a single value
260   */
261  public double sampleVariance() {
262    checkState(count > 1);
263    if (isNaN(sumOfSquaresOfDeltas)) {
264      return NaN;
265    }
266    return ensureNonNegative(sumOfSquaresOfDeltas) / (count - 1);
267  }
268
269  /**
270   * Returns the <a
271   * href="http://en.wikipedia.org/wiki/Standard_deviation#Corrected_sample_standard_deviation">
272   * corrected sample standard deviation</a> of the values. If this dataset is a sample drawn from a
273   * population, this is an estimator of the population standard deviation of the population which
274   * is less biased than {@link #populationStandardDeviation()} (the unbiased estimator depends on
275   * the distribution). The count must be greater than one.
276   *
277   * <p>This is not guaranteed to return zero when the dataset consists of the same value multiple
278   * times, due to numerical errors. However, it is guaranteed never to return a negative result.
279   *
280   * <h3>Non-finite values</h3>
281   *
282   * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link
283   * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}.
284   *
285   * @throws IllegalStateException if the dataset is empty or contains a single value
286   */
287  public double sampleStandardDeviation() {
288    return Math.sqrt(sampleVariance());
289  }
290
291  /**
292   * Returns the lowest value in the dataset. The count must be non-zero.
293   *
294   * <h3>Non-finite values</h3>
295   *
296   * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it
297   * contains {@link Double#NEGATIVE_INFINITY} and not {@link Double#NaN} then the result is {@link
298   * Double#NEGATIVE_INFINITY}. If it contains {@link Double#POSITIVE_INFINITY} and finite values
299   * only then the result is the lowest finite value. If it contains {@link
300   * Double#POSITIVE_INFINITY} only then the result is {@link Double#POSITIVE_INFINITY}.
301   *
302   * @throws IllegalStateException if the dataset is empty
303   */
304  public double min() {
305    checkState(count != 0);
306    return min;
307  }
308
309  /**
310   * Returns the highest value in the dataset. The count must be non-zero.
311   *
312   * <h3>Non-finite values</h3>
313   *
314   * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it
315   * contains {@link Double#POSITIVE_INFINITY} and not {@link Double#NaN} then the result is {@link
316   * Double#POSITIVE_INFINITY}. If it contains {@link Double#NEGATIVE_INFINITY} and finite values
317   * only then the result is the highest finite value. If it contains {@link
318   * Double#NEGATIVE_INFINITY} only then the result is {@link Double#NEGATIVE_INFINITY}.
319   *
320   * @throws IllegalStateException if the dataset is empty
321   */
322  public double max() {
323    checkState(count != 0);
324    return max;
325  }
326
327  /**
328   * {@inheritDoc}
329   *
330   * <p><b>Note:</b> This tests exact equality of the calculated statistics, including the floating
331   * point values. Two instances are guaranteed to be considered equal if one is copied from the
332   * other using {@code second = new StatsAccumulator().addAll(first).snapshot()}, if both were
333   * obtained by calling {@code snapshot()} on the same {@link StatsAccumulator} without adding any
334   * values in between the two calls, or if one is obtained from the other after round-tripping
335   * through java serialization. However, floating point rounding errors mean that it may be false
336   * for some instances where the statistics are mathematically equal, including instances
337   * constructed from the same values in a different order... or (in the general case) even in the
338   * same order. (It is guaranteed to return true for instances constructed from the same values in
339   * the same order if {@code strictfp} is in effect, or if the system architecture guarantees
340   * {@code strictfp}-like semantics.)
341   */
342  @Override
343  public boolean equals(@NullableDecl Object obj) {
344    if (obj == null) {
345      return false;
346    }
347    if (getClass() != obj.getClass()) {
348      return false;
349    }
350    Stats other = (Stats) obj;
351    return (count == other.count)
352        && (doubleToLongBits(mean) == doubleToLongBits(other.mean))
353        && (doubleToLongBits(sumOfSquaresOfDeltas) == doubleToLongBits(other.sumOfSquaresOfDeltas))
354        && (doubleToLongBits(min) == doubleToLongBits(other.min))
355        && (doubleToLongBits(max) == doubleToLongBits(other.max));
356  }
357
358  /**
359   * {@inheritDoc}
360   *
361   * <p><b>Note:</b> This hash code is consistent with exact equality of the calculated statistics,
362   * including the floating point values. See the note on {@link #equals} for details.
363   */
364  @Override
365  public int hashCode() {
366    return Objects.hashCode(count, mean, sumOfSquaresOfDeltas, min, max);
367  }
368
369  @Override
370  public String toString() {
371    if (count() > 0) {
372      return MoreObjects.toStringHelper(this)
373          .add("count", count)
374          .add("mean", mean)
375          .add("populationStandardDeviation", populationStandardDeviation())
376          .add("min", min)
377          .add("max", max)
378          .toString();
379    } else {
380      return MoreObjects.toStringHelper(this).add("count", count).toString();
381    }
382  }
383
384  double sumOfSquaresOfDeltas() {
385    return sumOfSquaresOfDeltas;
386  }
387
388  /**
389   * Returns the <a href="http://en.wikipedia.org/wiki/Arithmetic_mean">arithmetic mean</a> of the
390   * values. The count must be non-zero.
391   *
392   * <p>The definition of the mean is the same as {@link Stats#mean}.
393   *
394   * @param values a series of values, which will be converted to {@code double} values (this may
395   *     cause loss of precision)
396   * @throws IllegalArgumentException if the dataset is empty
397   */
398  public static double meanOf(Iterable<? extends Number> values) {
399    return meanOf(values.iterator());
400  }
401
402  /**
403   * Returns the <a href="http://en.wikipedia.org/wiki/Arithmetic_mean">arithmetic mean</a> of the
404   * values. The count must be non-zero.
405   *
406   * <p>The definition of the mean is the same as {@link Stats#mean}.
407   *
408   * @param values a series of values, which will be converted to {@code double} values (this may
409   *     cause loss of precision)
410   * @throws IllegalArgumentException if the dataset is empty
411   */
412  public static double meanOf(Iterator<? extends Number> values) {
413    checkArgument(values.hasNext());
414    long count = 1;
415    double mean = values.next().doubleValue();
416    while (values.hasNext()) {
417      double value = values.next().doubleValue();
418      count++;
419      if (isFinite(value) && isFinite(mean)) {
420        // Art of Computer Programming vol. 2, Knuth, 4.2.2, (15)
421        mean += (value - mean) / count;
422      } else {
423        mean = calculateNewMeanNonFinite(mean, value);
424      }
425    }
426    return mean;
427  }
428
429  /**
430   * Returns the <a href="http://en.wikipedia.org/wiki/Arithmetic_mean">arithmetic mean</a> of the
431   * values. The count must be non-zero.
432   *
433   * <p>The definition of the mean is the same as {@link Stats#mean}.
434   *
435   * @param values a series of values
436   * @throws IllegalArgumentException if the dataset is empty
437   */
438  public static double meanOf(double... values) {
439    checkArgument(values.length > 0);
440    double mean = values[0];
441    for (int index = 1; index < values.length; index++) {
442      double value = values[index];
443      if (isFinite(value) && isFinite(mean)) {
444        // Art of Computer Programming vol. 2, Knuth, 4.2.2, (15)
445        mean += (value - mean) / (index + 1);
446      } else {
447        mean = calculateNewMeanNonFinite(mean, value);
448      }
449    }
450    return mean;
451  }
452
453  /**
454   * Returns the <a href="http://en.wikipedia.org/wiki/Arithmetic_mean">arithmetic mean</a> of the
455   * values. The count must be non-zero.
456   *
457   * <p>The definition of the mean is the same as {@link Stats#mean}.
458   *
459   * @param values a series of values
460   * @throws IllegalArgumentException if the dataset is empty
461   */
462  public static double meanOf(int... values) {
463    checkArgument(values.length > 0);
464    double mean = values[0];
465    for (int index = 1; index < values.length; index++) {
466      double value = values[index];
467      if (isFinite(value) && isFinite(mean)) {
468        // Art of Computer Programming vol. 2, Knuth, 4.2.2, (15)
469        mean += (value - mean) / (index + 1);
470      } else {
471        mean = calculateNewMeanNonFinite(mean, value);
472      }
473    }
474    return mean;
475  }
476
477  /**
478   * Returns the <a href="http://en.wikipedia.org/wiki/Arithmetic_mean">arithmetic mean</a> of the
479   * values. The count must be non-zero.
480   *
481   * <p>The definition of the mean is the same as {@link Stats#mean}.
482   *
483   * @param values a series of values, which will be converted to {@code double} values (this may
484   *     cause loss of precision for longs of magnitude over 2^53 (slightly over 9e15))
485   * @throws IllegalArgumentException if the dataset is empty
486   */
487  public static double meanOf(long... values) {
488    checkArgument(values.length > 0);
489    double mean = values[0];
490    for (int index = 1; index < values.length; index++) {
491      double value = values[index];
492      if (isFinite(value) && isFinite(mean)) {
493        // Art of Computer Programming vol. 2, Knuth, 4.2.2, (15)
494        mean += (value - mean) / (index + 1);
495      } else {
496        mean = calculateNewMeanNonFinite(mean, value);
497      }
498    }
499    return mean;
500  }
501
502  // Serialization helpers
503
504  /** The size of byte array representation in bytes. */
505  static final int BYTES = (Long.SIZE + Double.SIZE * 4) / Byte.SIZE;
506
507  /**
508   * Gets a byte array representation of this instance.
509   *
510   * <p><b>Note:</b> No guarantees are made regarding stability of the representation between
511   * versions.
512   */
513  public byte[] toByteArray() {
514    ByteBuffer buff = ByteBuffer.allocate(BYTES).order(ByteOrder.LITTLE_ENDIAN);
515    writeTo(buff);
516    return buff.array();
517  }
518
519  /**
520   * Writes to the given {@link ByteBuffer} a byte representation of this instance.
521   *
522   * <p><b>Note:</b> No guarantees are made regarding stability of the representation between
523   * versions.
524   *
525   * @param buffer A {@link ByteBuffer} with at least BYTES {@link ByteBuffer#remaining}, ordered as
526   *     {@link ByteOrder#LITTLE_ENDIAN}, to which a BYTES-long byte representation of this instance
527   *     is written. In the process increases the position of {@link ByteBuffer} by BYTES.
528   */
529  void writeTo(ByteBuffer buffer) {
530    checkNotNull(buffer);
531    checkArgument(
532        buffer.remaining() >= BYTES,
533        "Expected at least Stats.BYTES = %s remaining , got %s",
534        BYTES,
535        buffer.remaining());
536    buffer
537        .putLong(count)
538        .putDouble(mean)
539        .putDouble(sumOfSquaresOfDeltas)
540        .putDouble(min)
541        .putDouble(max);
542  }
543
544  /**
545   * Creates a Stats instance from the given byte representation which was obtained by {@link
546   * #toByteArray}.
547   *
548   * <p><b>Note:</b> No guarantees are made regarding stability of the representation between
549   * versions.
550   */
551  public static Stats fromByteArray(byte[] byteArray) {
552    checkNotNull(byteArray);
553    checkArgument(
554        byteArray.length == BYTES,
555        "Expected Stats.BYTES = %s remaining , got %s",
556        BYTES,
557        byteArray.length);
558    return readFrom(ByteBuffer.wrap(byteArray).order(ByteOrder.LITTLE_ENDIAN));
559  }
560
561  /**
562   * Creates a Stats instance from the byte representation read from the given {@link ByteBuffer}.
563   *
564   * <p><b>Note:</b> No guarantees are made regarding stability of the representation between
565   * versions.
566   *
567   * @param buffer A {@link ByteBuffer} with at least BYTES {@link ByteBuffer#remaining}, ordered as
568   *     {@link ByteOrder#LITTLE_ENDIAN}, from which a BYTES-long byte representation of this
569   *     instance is read. In the process increases the position of {@link ByteBuffer} by BYTES.
570   */
571  static Stats readFrom(ByteBuffer buffer) {
572    checkNotNull(buffer);
573    checkArgument(
574        buffer.remaining() >= BYTES,
575        "Expected at least Stats.BYTES = %s remaining , got %s",
576        BYTES,
577        buffer.remaining());
578    return new Stats(
579        buffer.getLong(),
580        buffer.getDouble(),
581        buffer.getDouble(),
582        buffer.getDouble(),
583        buffer.getDouble());
584  }
585
586  private static final long serialVersionUID = 0;
587}