001/*
002 * Copyright (C) 2012 The Guava Authors
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
005 * in compliance with the License. You may obtain a copy of the License at
006 *
007 * http://www.apache.org/licenses/LICENSE-2.0
008 *
009 * Unless required by applicable law or agreed to in writing, software distributed under the License
010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
011 * or implied. See the License for the specific language governing permissions and limitations under
012 * the License.
013 */
014
015package com.google.common.math;
016
017import static com.google.common.base.Preconditions.checkState;
018import static com.google.common.math.DoubleUtils.ensureNonNegative;
019import static com.google.common.primitives.Doubles.isFinite;
020import static java.lang.Double.NaN;
021import static java.lang.Double.isNaN;
022
023import com.google.common.annotations.GwtIncompatible;
024import com.google.common.annotations.J2ktIncompatible;
025import java.util.Iterator;
026import java.util.stream.DoubleStream;
027import java.util.stream.IntStream;
028import java.util.stream.LongStream;
029
030/**
031 * A mutable object which accumulates double values and tracks some basic statistics over all the
032 * values added so far. The values may be added singly or in groups. This class is not thread safe.
033 *
034 * @author Pete Gillin
035 * @author Kevin Bourrillion
036 * @since 20.0
037 */
038@J2ktIncompatible
039@GwtIncompatible
040@ElementTypesAreNonnullByDefault
041public final class StatsAccumulator {
042  /** Creates a new accumulator. */
043  public StatsAccumulator() {}
044
045  // These fields must satisfy the requirements of Stats' constructor as well as those of the stat
046  // methods of this class.
047  private long count = 0;
048  private double mean = 0.0; // any finite value will do, we only use it to multiply by zero for sum
049  private double sumOfSquaresOfDeltas = 0.0;
050  private double min = NaN; // any value will do
051  private double max = NaN; // any value will do
052
053  /** Adds the given value to the dataset. */
054  public void add(double value) {
055    if (count == 0) {
056      count = 1;
057      mean = value;
058      min = value;
059      max = value;
060      if (!isFinite(value)) {
061        sumOfSquaresOfDeltas = NaN;
062      }
063    } else {
064      count++;
065      if (isFinite(value) && isFinite(mean)) {
066        // Art of Computer Programming vol. 2, Knuth, 4.2.2, (15) and (16)
067        double delta = value - mean;
068        mean += delta / count;
069        sumOfSquaresOfDeltas += delta * (value - mean);
070      } else {
071        mean = calculateNewMeanNonFinite(mean, value);
072        sumOfSquaresOfDeltas = NaN;
073      }
074      min = Math.min(min, value);
075      max = Math.max(max, value);
076    }
077  }
078
079  /**
080   * Adds the given values to the dataset.
081   *
082   * @param values a series of values, which will be converted to {@code double} values (this may
083   *     cause loss of precision)
084   */
085  public void addAll(Iterable<? extends Number> values) {
086    for (Number value : values) {
087      add(value.doubleValue());
088    }
089  }
090
091  /**
092   * Adds the given values to the dataset.
093   *
094   * @param values a series of values, which will be converted to {@code double} values (this may
095   *     cause loss of precision)
096   */
097  public void addAll(Iterator<? extends Number> values) {
098    while (values.hasNext()) {
099      add(values.next().doubleValue());
100    }
101  }
102
103  /**
104   * Adds the given values to the dataset.
105   *
106   * @param values a series of values
107   */
108  public void addAll(double... values) {
109    for (double value : values) {
110      add(value);
111    }
112  }
113
114  /**
115   * Adds the given values to the dataset.
116   *
117   * @param values a series of values
118   */
119  public void addAll(int... values) {
120    for (int value : values) {
121      add(value);
122    }
123  }
124
125  /**
126   * Adds the given values to the dataset.
127   *
128   * @param values a series of values, which will be converted to {@code double} values (this may
129   *     cause loss of precision for longs of magnitude over 2^53 (slightly over 9e15))
130   */
131  public void addAll(long... values) {
132    for (long value : values) {
133      add(value);
134    }
135  }
136
137  /**
138   * Adds the given values to the dataset. The stream will be completely consumed by this method.
139   *
140   * @param values a series of values
141   * @since 33.4.0 (but since 28.2 in the JRE flavor)
142   */
143  @SuppressWarnings("Java7ApiChecker")
144  @IgnoreJRERequirement // Users will use this only if they're already using streams.
145  public void addAll(DoubleStream values) {
146    addAll(values.collect(StatsAccumulator::new, StatsAccumulator::add, StatsAccumulator::addAll));
147  }
148
149  /**
150   * Adds the given values to the dataset. The stream will be completely consumed by this method.
151   *
152   * @param values a series of values
153   * @since 33.4.0 (but since 28.2 in the JRE flavor)
154   */
155  @SuppressWarnings("Java7ApiChecker")
156  @IgnoreJRERequirement // Users will use this only if they're already using streams.
157  public void addAll(IntStream values) {
158    addAll(values.collect(StatsAccumulator::new, StatsAccumulator::add, StatsAccumulator::addAll));
159  }
160
161  /**
162   * Adds the given values to the dataset. The stream will be completely consumed by this method.
163   *
164   * @param values a series of values, which will be converted to {@code double} values (this may
165   *     cause loss of precision for longs of magnitude over 2^53 (slightly over 9e15))
166   * @since 33.4.0 (but since 28.2 in the JRE flavor)
167   */
168  @SuppressWarnings("Java7ApiChecker")
169  @IgnoreJRERequirement // Users will use this only if they're already using streams.
170  public void addAll(LongStream values) {
171    addAll(values.collect(StatsAccumulator::new, StatsAccumulator::add, StatsAccumulator::addAll));
172  }
173
174  /**
175   * Adds the given statistics to the dataset, as if the individual values used to compute the
176   * statistics had been added directly.
177   */
178  public void addAll(Stats values) {
179    if (values.count() == 0) {
180      return;
181    }
182    merge(values.count(), values.mean(), values.sumOfSquaresOfDeltas(), values.min(), values.max());
183  }
184
185  /**
186   * Adds the given statistics to the dataset, as if the individual values used to compute the
187   * statistics had been added directly.
188   *
189   * @since 28.2
190   */
191  public void addAll(StatsAccumulator values) {
192    if (values.count() == 0) {
193      return;
194    }
195    merge(values.count(), values.mean(), values.sumOfSquaresOfDeltas(), values.min(), values.max());
196  }
197
198  private void merge(
199      long otherCount,
200      double otherMean,
201      double otherSumOfSquaresOfDeltas,
202      double otherMin,
203      double otherMax) {
204    if (count == 0) {
205      count = otherCount;
206      mean = otherMean;
207      sumOfSquaresOfDeltas = otherSumOfSquaresOfDeltas;
208      min = otherMin;
209      max = otherMax;
210    } else {
211      count += otherCount;
212      if (isFinite(mean) && isFinite(otherMean)) {
213        // This is a generalized version of the calculation in add(double) above.
214        double delta = otherMean - mean;
215        mean += delta * otherCount / count;
216        sumOfSquaresOfDeltas += otherSumOfSquaresOfDeltas + delta * (otherMean - mean) * otherCount;
217      } else {
218        mean = calculateNewMeanNonFinite(mean, otherMean);
219        sumOfSquaresOfDeltas = NaN;
220      }
221      min = Math.min(min, otherMin);
222      max = Math.max(max, otherMax);
223    }
224  }
225
226  /** Returns an immutable snapshot of the current statistics. */
227  public Stats snapshot() {
228    return new Stats(count, mean, sumOfSquaresOfDeltas, min, max);
229  }
230
231  /** Returns the number of values. */
232  public long count() {
233    return count;
234  }
235
236  /**
237   * Returns the <a href="http://en.wikipedia.org/wiki/Arithmetic_mean">arithmetic mean</a> of the
238   * values. The count must be non-zero.
239   *
240   * <p>If these values are a sample drawn from a population, this is also an unbiased estimator of
241   * the arithmetic mean of the population.
242   *
243   * <h3>Non-finite values</h3>
244   *
245   * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it
246   * contains both {@link Double#POSITIVE_INFINITY} and {@link Double#NEGATIVE_INFINITY} then the
247   * result is {@link Double#NaN}. If it contains {@link Double#POSITIVE_INFINITY} and finite values
248   * only or {@link Double#POSITIVE_INFINITY} only, the result is {@link Double#POSITIVE_INFINITY}.
249   * If it contains {@link Double#NEGATIVE_INFINITY} and finite values only or {@link
250   * Double#NEGATIVE_INFINITY} only, the result is {@link Double#NEGATIVE_INFINITY}.
251   *
252   * @throws IllegalStateException if the dataset is empty
253   */
254  public double mean() {
255    checkState(count != 0);
256    return mean;
257  }
258
259  /**
260   * Returns the sum of the values.
261   *
262   * <h3>Non-finite values</h3>
263   *
264   * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it
265   * contains both {@link Double#POSITIVE_INFINITY} and {@link Double#NEGATIVE_INFINITY} then the
266   * result is {@link Double#NaN}. If it contains {@link Double#POSITIVE_INFINITY} and finite values
267   * only or {@link Double#POSITIVE_INFINITY} only, the result is {@link Double#POSITIVE_INFINITY}.
268   * If it contains {@link Double#NEGATIVE_INFINITY} and finite values only or {@link
269   * Double#NEGATIVE_INFINITY} only, the result is {@link Double#NEGATIVE_INFINITY}.
270   */
271  public final double sum() {
272    return mean * count;
273  }
274
275  /**
276   * Returns the <a href="http://en.wikipedia.org/wiki/Variance#Population_variance">population
277   * variance</a> of the values. The count must be non-zero.
278   *
279   * <p>This is guaranteed to return zero if the dataset contains only exactly one finite value. It
280   * is not guaranteed to return zero when the dataset consists of the same value multiple times,
281   * due to numerical errors. However, it is guaranteed never to return a negative result.
282   *
283   * <h3>Non-finite values</h3>
284   *
285   * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link
286   * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}.
287   *
288   * @throws IllegalStateException if the dataset is empty
289   */
290  public final double populationVariance() {
291    checkState(count != 0);
292    if (isNaN(sumOfSquaresOfDeltas)) {
293      return NaN;
294    }
295    if (count == 1) {
296      return 0.0;
297    }
298    return ensureNonNegative(sumOfSquaresOfDeltas) / count;
299  }
300
301  /**
302   * Returns the <a
303   * href="http://en.wikipedia.org/wiki/Standard_deviation#Definition_of_population_values">
304   * population standard deviation</a> of the values. The count must be non-zero.
305   *
306   * <p>This is guaranteed to return zero if the dataset contains only exactly one finite value. It
307   * is not guaranteed to return zero when the dataset consists of the same value multiple times,
308   * due to numerical errors. However, it is guaranteed never to return a negative result.
309   *
310   * <h3>Non-finite values</h3>
311   *
312   * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link
313   * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}.
314   *
315   * @throws IllegalStateException if the dataset is empty
316   */
317  public final double populationStandardDeviation() {
318    return Math.sqrt(populationVariance());
319  }
320
321  /**
322   * Returns the <a href="http://en.wikipedia.org/wiki/Variance#Sample_variance">unbiased sample
323   * variance</a> of the values. If this dataset is a sample drawn from a population, this is an
324   * unbiased estimator of the population variance of the population. The count must be greater than
325   * one.
326   *
327   * <p>This is not guaranteed to return zero when the dataset consists of the same value multiple
328   * times, due to numerical errors. However, it is guaranteed never to return a negative result.
329   *
330   * <h3>Non-finite values</h3>
331   *
332   * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link
333   * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}.
334   *
335   * @throws IllegalStateException if the dataset is empty or contains a single value
336   */
337  public final double sampleVariance() {
338    checkState(count > 1);
339    if (isNaN(sumOfSquaresOfDeltas)) {
340      return NaN;
341    }
342    return ensureNonNegative(sumOfSquaresOfDeltas) / (count - 1);
343  }
344
345  /**
346   * Returns the <a
347   * href="http://en.wikipedia.org/wiki/Standard_deviation#Corrected_sample_standard_deviation">
348   * corrected sample standard deviation</a> of the values. If this dataset is a sample drawn from a
349   * population, this is an estimator of the population standard deviation of the population which
350   * is less biased than {@link #populationStandardDeviation()} (the unbiased estimator depends on
351   * the distribution). The count must be greater than one.
352   *
353   * <p>This is not guaranteed to return zero when the dataset consists of the same value multiple
354   * times, due to numerical errors. However, it is guaranteed never to return a negative result.
355   *
356   * <h3>Non-finite values</h3>
357   *
358   * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link
359   * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}.
360   *
361   * @throws IllegalStateException if the dataset is empty or contains a single value
362   */
363  public final double sampleStandardDeviation() {
364    return Math.sqrt(sampleVariance());
365  }
366
367  /**
368   * Returns the lowest value in the dataset. The count must be non-zero.
369   *
370   * <h3>Non-finite values</h3>
371   *
372   * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it
373   * contains {@link Double#NEGATIVE_INFINITY} and not {@link Double#NaN} then the result is {@link
374   * Double#NEGATIVE_INFINITY}. If it contains {@link Double#POSITIVE_INFINITY} and finite values
375   * only then the result is the lowest finite value. If it contains {@link
376   * Double#POSITIVE_INFINITY} only then the result is {@link Double#POSITIVE_INFINITY}.
377   *
378   * @throws IllegalStateException if the dataset is empty
379   */
380  public double min() {
381    checkState(count != 0);
382    return min;
383  }
384
385  /**
386   * Returns the highest value in the dataset. The count must be non-zero.
387   *
388   * <h3>Non-finite values</h3>
389   *
390   * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it
391   * contains {@link Double#POSITIVE_INFINITY} and not {@link Double#NaN} then the result is {@link
392   * Double#POSITIVE_INFINITY}. If it contains {@link Double#NEGATIVE_INFINITY} and finite values
393   * only then the result is the highest finite value. If it contains {@link
394   * Double#NEGATIVE_INFINITY} only then the result is {@link Double#NEGATIVE_INFINITY}.
395   *
396   * @throws IllegalStateException if the dataset is empty
397   */
398  public double max() {
399    checkState(count != 0);
400    return max;
401  }
402
403  double sumOfSquaresOfDeltas() {
404    return sumOfSquaresOfDeltas;
405  }
406
407  /**
408   * Calculates the new value for the accumulated mean when a value is added, in the case where at
409   * least one of the previous mean and the value is non-finite.
410   */
411  static double calculateNewMeanNonFinite(double previousMean, double value) {
412    /*
413     * Desired behaviour is to match the results of applying the naive mean formula. In particular,
414     * the update formula can subtract infinities in cases where the naive formula would add them.
415     *
416     * Consequently:
417     * 1. If the previous mean is finite and the new value is non-finite then the new mean is that
418     *    value (whether it is NaN or infinity).
419     * 2. If the new value is finite and the previous mean is non-finite then the mean is unchanged
420     *    (whether it is NaN or infinity).
421     * 3. If both the previous mean and the new value are non-finite and...
422     * 3a. ...either or both is NaN (so mean != value) then the new mean is NaN.
423     * 3b. ...they are both the same infinities (so mean == value) then the mean is unchanged.
424     * 3c. ...they are different infinities (so mean != value) then the new mean is NaN.
425     */
426    if (isFinite(previousMean)) {
427      // This is case 1.
428      return value;
429    } else if (isFinite(value) || previousMean == value) {
430      // This is case 2. or 3b.
431      return previousMean;
432    } else {
433      // This is case 3a. or 3c.
434      return NaN;
435    }
436  }
437}