001/*
002 * Copyright (C) 2012 The Guava Authors
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
005 * in compliance with the License. You may obtain a copy of the License at
006 *
007 * http://www.apache.org/licenses/LICENSE-2.0
008 *
009 * Unless required by applicable law or agreed to in writing, software distributed under the License
010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
011 * or implied. See the License for the specific language governing permissions and limitations under
012 * the License.
013 */
014
015package com.google.common.math;
016
017import static com.google.common.base.Preconditions.checkArgument;
018import static com.google.common.base.Preconditions.checkNotNull;
019import static com.google.common.base.Preconditions.checkState;
020import static com.google.common.math.DoubleUtils.ensureNonNegative;
021import static com.google.common.math.StatsAccumulator.calculateNewMeanNonFinite;
022import static com.google.common.primitives.Doubles.isFinite;
023import static java.lang.Double.NaN;
024import static java.lang.Double.doubleToLongBits;
025import static java.lang.Double.isNaN;
026
027import com.google.common.annotations.Beta;
028import com.google.common.annotations.GwtIncompatible;
029import com.google.common.base.MoreObjects;
030import com.google.common.base.Objects;
031import java.io.Serializable;
032import java.nio.ByteBuffer;
033import java.nio.ByteOrder;
034import java.util.Iterator;
035import javax.annotation.CheckForNull;
036
037/**
038 * A bundle of statistical summary values -- sum, count, mean/average, min and max, and several
039 * forms of variance -- that were computed from a single set of zero or more floating-point values.
040 *
041 * <p>There are two ways to obtain a {@code Stats} instance:
042 *
043 * <ul>
044 *   <li>If all the values you want to summarize are already known, use the appropriate {@code
045 *       Stats.of} factory method below. Primitive arrays, iterables and iterators of any kind of
046 *       {@code Number}, and primitive varargs are supported.
047 *   <li>Or, to avoid storing up all the data first, create a {@link StatsAccumulator} instance,
048 *       feed values to it as you get them, then call {@link StatsAccumulator#snapshot}.
049 * </ul>
050 *
051 * <p>Static convenience methods called {@code meanOf} are also provided for users who wish to
052 * calculate <i>only</i> the mean.
053 *
054 * <p><b>Java 8 users:</b> If you are not using any of the variance statistics, you may wish to use
055 * built-in JDK libraries instead of this class.
056 *
057 * @author Pete Gillin
058 * @author Kevin Bourrillion
059 * @since 20.0
060 */
061@Beta
062@GwtIncompatible
063@ElementTypesAreNonnullByDefault
064public final class Stats implements Serializable {
065
066  private final long count;
067  private final double mean;
068  private final double sumOfSquaresOfDeltas;
069  private final double min;
070  private final double max;
071
072  /**
073   * Internal constructor. Users should use {@link #of} or {@link StatsAccumulator#snapshot}.
074   *
075   * <p>To ensure that the created instance obeys its contract, the parameters should satisfy the
076   * following constraints. This is the callers responsibility and is not enforced here.
077   *
078   * <ul>
079   *   <li>If {@code count} is 0, {@code mean} may have any finite value (its only usage will be to
080   *       get multiplied by 0 to calculate the sum), and the other parameters may have any values
081   *       (they will not be used).
082   *   <li>If {@code count} is 1, {@code sumOfSquaresOfDeltas} must be exactly 0.0 or {@link
083   *       Double#NaN}.
084   * </ul>
085   */
086  Stats(long count, double mean, double sumOfSquaresOfDeltas, double min, double max) {
087    this.count = count;
088    this.mean = mean;
089    this.sumOfSquaresOfDeltas = sumOfSquaresOfDeltas;
090    this.min = min;
091    this.max = max;
092  }
093
094  /**
095   * Returns statistics over a dataset containing the given values.
096   *
097   * @param values a series of values, which will be converted to {@code double} values (this may
098   *     cause loss of precision)
099   */
100  public static Stats of(Iterable<? extends Number> values) {
101    StatsAccumulator accumulator = new StatsAccumulator();
102    accumulator.addAll(values);
103    return accumulator.snapshot();
104  }
105
106  /**
107   * Returns statistics over a dataset containing the given values. The iterator will be completely
108   * consumed by this method.
109   *
110   * @param values a series of values, which will be converted to {@code double} values (this may
111   *     cause loss of precision)
112   */
113  public static Stats of(Iterator<? extends Number> values) {
114    StatsAccumulator accumulator = new StatsAccumulator();
115    accumulator.addAll(values);
116    return accumulator.snapshot();
117  }
118
119  /**
120   * Returns statistics over a dataset containing the given values.
121   *
122   * @param values a series of values
123   */
124  public static Stats of(double... values) {
125    StatsAccumulator acummulator = new StatsAccumulator();
126    acummulator.addAll(values);
127    return acummulator.snapshot();
128  }
129
130  /**
131   * Returns statistics over a dataset containing the given values.
132   *
133   * @param values a series of values
134   */
135  public static Stats of(int... values) {
136    StatsAccumulator acummulator = new StatsAccumulator();
137    acummulator.addAll(values);
138    return acummulator.snapshot();
139  }
140
141  /**
142   * Returns statistics over a dataset containing the given values.
143   *
144   * @param values a series of values, which will be converted to {@code double} values (this may
145   *     cause loss of precision for longs of magnitude over 2^53 (slightly over 9e15))
146   */
147  public static Stats of(long... values) {
148    StatsAccumulator acummulator = new StatsAccumulator();
149    acummulator.addAll(values);
150    return acummulator.snapshot();
151  }
152
153  /** Returns the number of values. */
154  public long count() {
155    return count;
156  }
157
158  /**
159   * Returns the <a href="http://en.wikipedia.org/wiki/Arithmetic_mean">arithmetic mean</a> of the
160   * values. The count must be non-zero.
161   *
162   * <p>If these values are a sample drawn from a population, this is also an unbiased estimator of
163   * the arithmetic mean of the population.
164   *
165   * <h3>Non-finite values</h3>
166   *
167   * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it
168   * contains both {@link Double#POSITIVE_INFINITY} and {@link Double#NEGATIVE_INFINITY} then the
169   * result is {@link Double#NaN}. If it contains {@link Double#POSITIVE_INFINITY} and finite values
170   * only or {@link Double#POSITIVE_INFINITY} only, the result is {@link Double#POSITIVE_INFINITY}.
171   * If it contains {@link Double#NEGATIVE_INFINITY} and finite values only or {@link
172   * Double#NEGATIVE_INFINITY} only, the result is {@link Double#NEGATIVE_INFINITY}.
173   *
174   * <p>If you only want to calculate the mean, use {@link #meanOf} instead of creating a {@link
175   * Stats} instance.
176   *
177   * @throws IllegalStateException if the dataset is empty
178   */
179  public double mean() {
180    checkState(count != 0);
181    return mean;
182  }
183
184  /**
185   * Returns the sum of the values.
186   *
187   * <h3>Non-finite values</h3>
188   *
189   * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it
190   * contains both {@link Double#POSITIVE_INFINITY} and {@link Double#NEGATIVE_INFINITY} then the
191   * result is {@link Double#NaN}. If it contains {@link Double#POSITIVE_INFINITY} and finite values
192   * only or {@link Double#POSITIVE_INFINITY} only, the result is {@link Double#POSITIVE_INFINITY}.
193   * If it contains {@link Double#NEGATIVE_INFINITY} and finite values only or {@link
194   * Double#NEGATIVE_INFINITY} only, the result is {@link Double#NEGATIVE_INFINITY}.
195   */
196  public double sum() {
197    return mean * count;
198  }
199
200  /**
201   * Returns the <a href="http://en.wikipedia.org/wiki/Variance#Population_variance">population
202   * variance</a> of the values. The count must be non-zero.
203   *
204   * <p>This is guaranteed to return zero if the dataset contains only exactly one finite value. It
205   * is not guaranteed to return zero when the dataset consists of the same value multiple times,
206   * due to numerical errors. However, it is guaranteed never to return a negative result.
207   *
208   * <h3>Non-finite values</h3>
209   *
210   * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link
211   * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}.
212   *
213   * @throws IllegalStateException if the dataset is empty
214   */
215  public double populationVariance() {
216    checkState(count > 0);
217    if (isNaN(sumOfSquaresOfDeltas)) {
218      return NaN;
219    }
220    if (count == 1) {
221      return 0.0;
222    }
223    return ensureNonNegative(sumOfSquaresOfDeltas) / count();
224  }
225
226  /**
227   * Returns the <a
228   * href="http://en.wikipedia.org/wiki/Standard_deviation#Definition_of_population_values">
229   * population standard deviation</a> of the values. The count must be non-zero.
230   *
231   * <p>This is guaranteed to return zero if the dataset contains only exactly one finite value. It
232   * is not guaranteed to return zero when the dataset consists of the same value multiple times,
233   * due to numerical errors. However, it is guaranteed never to return a negative result.
234   *
235   * <h3>Non-finite values</h3>
236   *
237   * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link
238   * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}.
239   *
240   * @throws IllegalStateException if the dataset is empty
241   */
242  public double populationStandardDeviation() {
243    return Math.sqrt(populationVariance());
244  }
245
246  /**
247   * Returns the <a href="http://en.wikipedia.org/wiki/Variance#Sample_variance">unbiased sample
248   * variance</a> of the values. If this dataset is a sample drawn from a population, this is an
249   * unbiased estimator of the population variance of the population. The count must be greater than
250   * one.
251   *
252   * <p>This is not guaranteed to return zero when the dataset consists of the same value multiple
253   * times, due to numerical errors. However, it is guaranteed never to return a negative result.
254   *
255   * <h3>Non-finite values</h3>
256   *
257   * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link
258   * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}.
259   *
260   * @throws IllegalStateException if the dataset is empty or contains a single value
261   */
262  public double sampleVariance() {
263    checkState(count > 1);
264    if (isNaN(sumOfSquaresOfDeltas)) {
265      return NaN;
266    }
267    return ensureNonNegative(sumOfSquaresOfDeltas) / (count - 1);
268  }
269
270  /**
271   * Returns the <a
272   * href="http://en.wikipedia.org/wiki/Standard_deviation#Corrected_sample_standard_deviation">
273   * corrected sample standard deviation</a> of the values. If this dataset is a sample drawn from a
274   * population, this is an estimator of the population standard deviation of the population which
275   * is less biased than {@link #populationStandardDeviation()} (the unbiased estimator depends on
276   * the distribution). The count must be greater than one.
277   *
278   * <p>This is not guaranteed to return zero when the dataset consists of the same value multiple
279   * times, due to numerical errors. However, it is guaranteed never to return a negative result.
280   *
281   * <h3>Non-finite values</h3>
282   *
283   * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link
284   * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}.
285   *
286   * @throws IllegalStateException if the dataset is empty or contains a single value
287   */
288  public double sampleStandardDeviation() {
289    return Math.sqrt(sampleVariance());
290  }
291
292  /**
293   * Returns the lowest value in the dataset. The count must be non-zero.
294   *
295   * <h3>Non-finite values</h3>
296   *
297   * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it
298   * contains {@link Double#NEGATIVE_INFINITY} and not {@link Double#NaN} then the result is {@link
299   * Double#NEGATIVE_INFINITY}. If it contains {@link Double#POSITIVE_INFINITY} and finite values
300   * only then the result is the lowest finite value. If it contains {@link
301   * Double#POSITIVE_INFINITY} only then the result is {@link Double#POSITIVE_INFINITY}.
302   *
303   * @throws IllegalStateException if the dataset is empty
304   */
305  public double min() {
306    checkState(count != 0);
307    return min;
308  }
309
310  /**
311   * Returns the highest value in the dataset. The count must be non-zero.
312   *
313   * <h3>Non-finite values</h3>
314   *
315   * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it
316   * contains {@link Double#POSITIVE_INFINITY} and not {@link Double#NaN} then the result is {@link
317   * Double#POSITIVE_INFINITY}. If it contains {@link Double#NEGATIVE_INFINITY} and finite values
318   * only then the result is the highest finite value. If it contains {@link
319   * Double#NEGATIVE_INFINITY} only then the result is {@link Double#NEGATIVE_INFINITY}.
320   *
321   * @throws IllegalStateException if the dataset is empty
322   */
323  public double max() {
324    checkState(count != 0);
325    return max;
326  }
327
328  /**
329   * {@inheritDoc}
330   *
331   * <p><b>Note:</b> This tests exact equality of the calculated statistics, including the floating
332   * point values. Two instances are guaranteed to be considered equal if one is copied from the
333   * other using {@code second = new StatsAccumulator().addAll(first).snapshot()}, if both were
334   * obtained by calling {@code snapshot()} on the same {@link StatsAccumulator} without adding any
335   * values in between the two calls, or if one is obtained from the other after round-tripping
336   * through java serialization. However, floating point rounding errors mean that it may be false
337   * for some instances where the statistics are mathematically equal, including instances
338   * constructed from the same values in a different order... or (in the general case) even in the
339   * same order. (It is guaranteed to return true for instances constructed from the same values in
340   * the same order if {@code strictfp} is in effect, or if the system architecture guarantees
341   * {@code strictfp}-like semantics.)
342   */
343  @Override
344  public boolean equals(@CheckForNull Object obj) {
345    if (obj == null) {
346      return false;
347    }
348    if (getClass() != obj.getClass()) {
349      return false;
350    }
351    Stats other = (Stats) obj;
352    return count == other.count
353        && doubleToLongBits(mean) == doubleToLongBits(other.mean)
354        && doubleToLongBits(sumOfSquaresOfDeltas) == doubleToLongBits(other.sumOfSquaresOfDeltas)
355        && doubleToLongBits(min) == doubleToLongBits(other.min)
356        && doubleToLongBits(max) == doubleToLongBits(other.max);
357  }
358
359  /**
360   * {@inheritDoc}
361   *
362   * <p><b>Note:</b> This hash code is consistent with exact equality of the calculated statistics,
363   * including the floating point values. See the note on {@link #equals} for details.
364   */
365  @Override
366  public int hashCode() {
367    return Objects.hashCode(count, mean, sumOfSquaresOfDeltas, min, max);
368  }
369
370  @Override
371  public String toString() {
372    if (count() > 0) {
373      return MoreObjects.toStringHelper(this)
374          .add("count", count)
375          .add("mean", mean)
376          .add("populationStandardDeviation", populationStandardDeviation())
377          .add("min", min)
378          .add("max", max)
379          .toString();
380    } else {
381      return MoreObjects.toStringHelper(this).add("count", count).toString();
382    }
383  }
384
385  double sumOfSquaresOfDeltas() {
386    return sumOfSquaresOfDeltas;
387  }
388
389  /**
390   * Returns the <a href="http://en.wikipedia.org/wiki/Arithmetic_mean">arithmetic mean</a> of the
391   * values. The count must be non-zero.
392   *
393   * <p>The definition of the mean is the same as {@link Stats#mean}.
394   *
395   * @param values a series of values, which will be converted to {@code double} values (this may
396   *     cause loss of precision)
397   * @throws IllegalArgumentException if the dataset is empty
398   */
399  public static double meanOf(Iterable<? extends Number> values) {
400    return meanOf(values.iterator());
401  }
402
403  /**
404   * Returns the <a href="http://en.wikipedia.org/wiki/Arithmetic_mean">arithmetic mean</a> of the
405   * values. The count must be non-zero.
406   *
407   * <p>The definition of the mean is the same as {@link Stats#mean}.
408   *
409   * @param values a series of values, which will be converted to {@code double} values (this may
410   *     cause loss of precision)
411   * @throws IllegalArgumentException if the dataset is empty
412   */
413  public static double meanOf(Iterator<? extends Number> values) {
414    checkArgument(values.hasNext());
415    long count = 1;
416    double mean = values.next().doubleValue();
417    while (values.hasNext()) {
418      double value = values.next().doubleValue();
419      count++;
420      if (isFinite(value) && isFinite(mean)) {
421        // Art of Computer Programming vol. 2, Knuth, 4.2.2, (15)
422        mean += (value - mean) / count;
423      } else {
424        mean = calculateNewMeanNonFinite(mean, value);
425      }
426    }
427    return mean;
428  }
429
430  /**
431   * Returns the <a href="http://en.wikipedia.org/wiki/Arithmetic_mean">arithmetic mean</a> of the
432   * values. The count must be non-zero.
433   *
434   * <p>The definition of the mean is the same as {@link Stats#mean}.
435   *
436   * @param values a series of values
437   * @throws IllegalArgumentException if the dataset is empty
438   */
439  public static double meanOf(double... values) {
440    checkArgument(values.length > 0);
441    double mean = values[0];
442    for (int index = 1; index < values.length; index++) {
443      double value = values[index];
444      if (isFinite(value) && isFinite(mean)) {
445        // Art of Computer Programming vol. 2, Knuth, 4.2.2, (15)
446        mean += (value - mean) / (index + 1);
447      } else {
448        mean = calculateNewMeanNonFinite(mean, value);
449      }
450    }
451    return mean;
452  }
453
454  /**
455   * Returns the <a href="http://en.wikipedia.org/wiki/Arithmetic_mean">arithmetic mean</a> of the
456   * values. The count must be non-zero.
457   *
458   * <p>The definition of the mean is the same as {@link Stats#mean}.
459   *
460   * @param values a series of values
461   * @throws IllegalArgumentException if the dataset is empty
462   */
463  public static double meanOf(int... values) {
464    checkArgument(values.length > 0);
465    double mean = values[0];
466    for (int index = 1; index < values.length; index++) {
467      double value = values[index];
468      if (isFinite(value) && isFinite(mean)) {
469        // Art of Computer Programming vol. 2, Knuth, 4.2.2, (15)
470        mean += (value - mean) / (index + 1);
471      } else {
472        mean = calculateNewMeanNonFinite(mean, value);
473      }
474    }
475    return mean;
476  }
477
478  /**
479   * Returns the <a href="http://en.wikipedia.org/wiki/Arithmetic_mean">arithmetic mean</a> of the
480   * values. The count must be non-zero.
481   *
482   * <p>The definition of the mean is the same as {@link Stats#mean}.
483   *
484   * @param values a series of values, which will be converted to {@code double} values (this may
485   *     cause loss of precision for longs of magnitude over 2^53 (slightly over 9e15))
486   * @throws IllegalArgumentException if the dataset is empty
487   */
488  public static double meanOf(long... values) {
489    checkArgument(values.length > 0);
490    double mean = values[0];
491    for (int index = 1; index < values.length; index++) {
492      double value = values[index];
493      if (isFinite(value) && isFinite(mean)) {
494        // Art of Computer Programming vol. 2, Knuth, 4.2.2, (15)
495        mean += (value - mean) / (index + 1);
496      } else {
497        mean = calculateNewMeanNonFinite(mean, value);
498      }
499    }
500    return mean;
501  }
502
503  // Serialization helpers
504
505  /** The size of byte array representation in bytes. */
506  static final int BYTES = (Long.SIZE + Double.SIZE * 4) / Byte.SIZE;
507
508  /**
509   * Gets a byte array representation of this instance.
510   *
511   * <p><b>Note:</b> No guarantees are made regarding stability of the representation between
512   * versions.
513   */
514  public byte[] toByteArray() {
515    ByteBuffer buff = ByteBuffer.allocate(BYTES).order(ByteOrder.LITTLE_ENDIAN);
516    writeTo(buff);
517    return buff.array();
518  }
519
520  /**
521   * Writes to the given {@link ByteBuffer} a byte representation of this instance.
522   *
523   * <p><b>Note:</b> No guarantees are made regarding stability of the representation between
524   * versions.
525   *
526   * @param buffer A {@link ByteBuffer} with at least BYTES {@link ByteBuffer#remaining}, ordered as
527   *     {@link ByteOrder#LITTLE_ENDIAN}, to which a BYTES-long byte representation of this instance
528   *     is written. In the process increases the position of {@link ByteBuffer} by BYTES.
529   */
530  void writeTo(ByteBuffer buffer) {
531    checkNotNull(buffer);
532    checkArgument(
533        buffer.remaining() >= BYTES,
534        "Expected at least Stats.BYTES = %s remaining , got %s",
535        BYTES,
536        buffer.remaining());
537    buffer
538        .putLong(count)
539        .putDouble(mean)
540        .putDouble(sumOfSquaresOfDeltas)
541        .putDouble(min)
542        .putDouble(max);
543  }
544
545  /**
546   * Creates a Stats instance from the given byte representation which was obtained by {@link
547   * #toByteArray}.
548   *
549   * <p><b>Note:</b> No guarantees are made regarding stability of the representation between
550   * versions.
551   */
552  public static Stats fromByteArray(byte[] byteArray) {
553    checkNotNull(byteArray);
554    checkArgument(
555        byteArray.length == BYTES,
556        "Expected Stats.BYTES = %s remaining , got %s",
557        BYTES,
558        byteArray.length);
559    return readFrom(ByteBuffer.wrap(byteArray).order(ByteOrder.LITTLE_ENDIAN));
560  }
561
562  /**
563   * Creates a Stats instance from the byte representation read from the given {@link ByteBuffer}.
564   *
565   * <p><b>Note:</b> No guarantees are made regarding stability of the representation between
566   * versions.
567   *
568   * @param buffer A {@link ByteBuffer} with at least BYTES {@link ByteBuffer#remaining}, ordered as
569   *     {@link ByteOrder#LITTLE_ENDIAN}, from which a BYTES-long byte representation of this
570   *     instance is read. In the process increases the position of {@link ByteBuffer} by BYTES.
571   */
572  static Stats readFrom(ByteBuffer buffer) {
573    checkNotNull(buffer);
574    checkArgument(
575        buffer.remaining() >= BYTES,
576        "Expected at least Stats.BYTES = %s remaining , got %s",
577        BYTES,
578        buffer.remaining());
579    return new Stats(
580        buffer.getLong(),
581        buffer.getDouble(),
582        buffer.getDouble(),
583        buffer.getDouble(),
584        buffer.getDouble());
585  }
586
587  private static final long serialVersionUID = 0;
588}