001/*
002 * Copyright (C) 2012 The Guava Authors
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
005 * in compliance with the License. You may obtain a copy of the License at
006 *
007 * http://www.apache.org/licenses/LICENSE-2.0
008 *
009 * Unless required by applicable law or agreed to in writing, software distributed under the License
010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
011 * or implied. See the License for the specific language governing permissions and limitations under
012 * the License.
013 */
014
015package com.google.common.math;
016
017import static com.google.common.base.Preconditions.checkState;
018import static com.google.common.math.DoubleUtils.ensureNonNegative;
019import static com.google.common.primitives.Doubles.isFinite;
020import static java.lang.Double.NaN;
021import static java.lang.Double.isNaN;
022
023import com.google.common.annotations.GwtIncompatible;
024import com.google.common.annotations.J2ktIncompatible;
025import java.util.Iterator;
026import java.util.stream.DoubleStream;
027import java.util.stream.IntStream;
028import java.util.stream.LongStream;
029
030/**
031 * A mutable object which accumulates double values and tracks some basic statistics over all the
032 * values added so far. The values may be added singly or in groups. This class is not thread safe.
033 *
034 * @author Pete Gillin
035 * @author Kevin Bourrillion
036 * @since 20.0
037 */
038@J2ktIncompatible
039@GwtIncompatible
040@ElementTypesAreNonnullByDefault
041public final class StatsAccumulator {
042  /** Creates a new accumulator. */
043  public StatsAccumulator() {}
044
045  // These fields must satisfy the requirements of Stats' constructor as well as those of the stat
046  // methods of this class.
047  private long count = 0;
048  private double mean = 0.0; // any finite value will do, we only use it to multiply by zero for sum
049  private double sumOfSquaresOfDeltas = 0.0;
050  private double min = NaN; // any value will do
051  private double max = NaN; // any value will do
052
053  /** Adds the given value to the dataset. */
054  public void add(double value) {
055    if (count == 0) {
056      count = 1;
057      mean = value;
058      min = value;
059      max = value;
060      if (!isFinite(value)) {
061        sumOfSquaresOfDeltas = NaN;
062      }
063    } else {
064      count++;
065      if (isFinite(value) && isFinite(mean)) {
066        // Art of Computer Programming vol. 2, Knuth, 4.2.2, (15) and (16)
067        double delta = value - mean;
068        mean += delta / count;
069        sumOfSquaresOfDeltas += delta * (value - mean);
070      } else {
071        mean = calculateNewMeanNonFinite(mean, value);
072        sumOfSquaresOfDeltas = NaN;
073      }
074      min = Math.min(min, value);
075      max = Math.max(max, value);
076    }
077  }
078
079  /**
080   * Adds the given values to the dataset.
081   *
082   * @param values a series of values, which will be converted to {@code double} values (this may
083   *     cause loss of precision)
084   */
085  public void addAll(Iterable<? extends Number> values) {
086    for (Number value : values) {
087      add(value.doubleValue());
088    }
089  }
090
091  /**
092   * Adds the given values to the dataset.
093   *
094   * @param values a series of values, which will be converted to {@code double} values (this may
095   *     cause loss of precision)
096   */
097  public void addAll(Iterator<? extends Number> values) {
098    while (values.hasNext()) {
099      add(values.next().doubleValue());
100    }
101  }
102
103  /**
104   * Adds the given values to the dataset.
105   *
106   * @param values a series of values
107   */
108  public void addAll(double... values) {
109    for (double value : values) {
110      add(value);
111    }
112  }
113
114  /**
115   * Adds the given values to the dataset.
116   *
117   * @param values a series of values
118   */
119  public void addAll(int... values) {
120    for (int value : values) {
121      add(value);
122    }
123  }
124
125  /**
126   * Adds the given values to the dataset.
127   *
128   * @param values a series of values, which will be converted to {@code double} values (this may
129   *     cause loss of precision for longs of magnitude over 2^53 (slightly over 9e15))
130   */
131  public void addAll(long... values) {
132    for (long value : values) {
133      add(value);
134    }
135  }
136
137  /**
138   * Adds the given values to the dataset. The stream will be completely consumed by this method.
139   *
140   * @param values a series of values
141   * @since 28.2 (but only since 33.4.0 in the Android flavor)
142   */
143  public void addAll(DoubleStream values) {
144    addAll(values.collect(StatsAccumulator::new, StatsAccumulator::add, StatsAccumulator::addAll));
145  }
146
147  /**
148   * Adds the given values to the dataset. The stream will be completely consumed by this method.
149   *
150   * @param values a series of values
151   * @since 28.2 (but only since 33.4.0 in the Android flavor)
152   */
153  public void addAll(IntStream values) {
154    addAll(values.collect(StatsAccumulator::new, StatsAccumulator::add, StatsAccumulator::addAll));
155  }
156
157  /**
158   * Adds the given values to the dataset. The stream will be completely consumed by this method.
159   *
160   * @param values a series of values, which will be converted to {@code double} values (this may
161   *     cause loss of precision for longs of magnitude over 2^53 (slightly over 9e15))
162   * @since 28.2 (but only since 33.4.0 in the Android flavor)
163   */
164  public void addAll(LongStream values) {
165    addAll(values.collect(StatsAccumulator::new, StatsAccumulator::add, StatsAccumulator::addAll));
166  }
167
168  /**
169   * Adds the given statistics to the dataset, as if the individual values used to compute the
170   * statistics had been added directly.
171   */
172  public void addAll(Stats values) {
173    if (values.count() == 0) {
174      return;
175    }
176    merge(values.count(), values.mean(), values.sumOfSquaresOfDeltas(), values.min(), values.max());
177  }
178
179  /**
180   * Adds the given statistics to the dataset, as if the individual values used to compute the
181   * statistics had been added directly.
182   *
183   * @since 28.2
184   */
185  public void addAll(StatsAccumulator values) {
186    if (values.count() == 0) {
187      return;
188    }
189    merge(values.count(), values.mean(), values.sumOfSquaresOfDeltas(), values.min(), values.max());
190  }
191
192  private void merge(
193      long otherCount,
194      double otherMean,
195      double otherSumOfSquaresOfDeltas,
196      double otherMin,
197      double otherMax) {
198    if (count == 0) {
199      count = otherCount;
200      mean = otherMean;
201      sumOfSquaresOfDeltas = otherSumOfSquaresOfDeltas;
202      min = otherMin;
203      max = otherMax;
204    } else {
205      count += otherCount;
206      if (isFinite(mean) && isFinite(otherMean)) {
207        // This is a generalized version of the calculation in add(double) above.
208        double delta = otherMean - mean;
209        mean += delta * otherCount / count;
210        sumOfSquaresOfDeltas += otherSumOfSquaresOfDeltas + delta * (otherMean - mean) * otherCount;
211      } else {
212        mean = calculateNewMeanNonFinite(mean, otherMean);
213        sumOfSquaresOfDeltas = NaN;
214      }
215      min = Math.min(min, otherMin);
216      max = Math.max(max, otherMax);
217    }
218  }
219
220  /** Returns an immutable snapshot of the current statistics. */
221  public Stats snapshot() {
222    return new Stats(count, mean, sumOfSquaresOfDeltas, min, max);
223  }
224
225  /** Returns the number of values. */
226  public long count() {
227    return count;
228  }
229
230  /**
231   * Returns the <a href="http://en.wikipedia.org/wiki/Arithmetic_mean">arithmetic mean</a> of the
232   * values. The count must be non-zero.
233   *
234   * <p>If these values are a sample drawn from a population, this is also an unbiased estimator of
235   * the arithmetic mean of the population.
236   *
237   * <h3>Non-finite values</h3>
238   *
239   * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it
240   * contains both {@link Double#POSITIVE_INFINITY} and {@link Double#NEGATIVE_INFINITY} then the
241   * result is {@link Double#NaN}. If it contains {@link Double#POSITIVE_INFINITY} and finite values
242   * only or {@link Double#POSITIVE_INFINITY} only, the result is {@link Double#POSITIVE_INFINITY}.
243   * If it contains {@link Double#NEGATIVE_INFINITY} and finite values only or {@link
244   * Double#NEGATIVE_INFINITY} only, the result is {@link Double#NEGATIVE_INFINITY}.
245   *
246   * @throws IllegalStateException if the dataset is empty
247   */
248  public double mean() {
249    checkState(count != 0);
250    return mean;
251  }
252
253  /**
254   * Returns the sum of the values.
255   *
256   * <h3>Non-finite values</h3>
257   *
258   * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it
259   * contains both {@link Double#POSITIVE_INFINITY} and {@link Double#NEGATIVE_INFINITY} then the
260   * result is {@link Double#NaN}. If it contains {@link Double#POSITIVE_INFINITY} and finite values
261   * only or {@link Double#POSITIVE_INFINITY} only, the result is {@link Double#POSITIVE_INFINITY}.
262   * If it contains {@link Double#NEGATIVE_INFINITY} and finite values only or {@link
263   * Double#NEGATIVE_INFINITY} only, the result is {@link Double#NEGATIVE_INFINITY}.
264   */
265  public final double sum() {
266    return mean * count;
267  }
268
269  /**
270   * Returns the <a href="http://en.wikipedia.org/wiki/Variance#Population_variance">population
271   * variance</a> of the values. The count must be non-zero.
272   *
273   * <p>This is guaranteed to return zero if the dataset contains only exactly one finite value. It
274   * is not guaranteed to return zero when the dataset consists of the same value multiple times,
275   * due to numerical errors. However, it is guaranteed never to return a negative result.
276   *
277   * <h3>Non-finite values</h3>
278   *
279   * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link
280   * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}.
281   *
282   * @throws IllegalStateException if the dataset is empty
283   */
284  public final double populationVariance() {
285    checkState(count != 0);
286    if (isNaN(sumOfSquaresOfDeltas)) {
287      return NaN;
288    }
289    if (count == 1) {
290      return 0.0;
291    }
292    return ensureNonNegative(sumOfSquaresOfDeltas) / count;
293  }
294
295  /**
296   * Returns the <a
297   * href="http://en.wikipedia.org/wiki/Standard_deviation#Definition_of_population_values">
298   * population standard deviation</a> of the values. The count must be non-zero.
299   *
300   * <p>This is guaranteed to return zero if the dataset contains only exactly one finite value. It
301   * is not guaranteed to return zero when the dataset consists of the same value multiple times,
302   * due to numerical errors. However, it is guaranteed never to return a negative result.
303   *
304   * <h3>Non-finite values</h3>
305   *
306   * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link
307   * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}.
308   *
309   * @throws IllegalStateException if the dataset is empty
310   */
311  public final double populationStandardDeviation() {
312    return Math.sqrt(populationVariance());
313  }
314
315  /**
316   * Returns the <a href="http://en.wikipedia.org/wiki/Variance#Sample_variance">unbiased sample
317   * variance</a> of the values. If this dataset is a sample drawn from a population, this is an
318   * unbiased estimator of the population variance of the population. The count must be greater than
319   * one.
320   *
321   * <p>This is not guaranteed to return zero when the dataset consists of the same value multiple
322   * times, due to numerical errors. However, it is guaranteed never to return a negative result.
323   *
324   * <h3>Non-finite values</h3>
325   *
326   * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link
327   * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}.
328   *
329   * @throws IllegalStateException if the dataset is empty or contains a single value
330   */
331  public final double sampleVariance() {
332    checkState(count > 1);
333    if (isNaN(sumOfSquaresOfDeltas)) {
334      return NaN;
335    }
336    return ensureNonNegative(sumOfSquaresOfDeltas) / (count - 1);
337  }
338
339  /**
340   * Returns the <a
341   * href="http://en.wikipedia.org/wiki/Standard_deviation#Corrected_sample_standard_deviation">
342   * corrected sample standard deviation</a> of the values. If this dataset is a sample drawn from a
343   * population, this is an estimator of the population standard deviation of the population which
344   * is less biased than {@link #populationStandardDeviation()} (the unbiased estimator depends on
345   * the distribution). The count must be greater than one.
346   *
347   * <p>This is not guaranteed to return zero when the dataset consists of the same value multiple
348   * times, due to numerical errors. However, it is guaranteed never to return a negative result.
349   *
350   * <h3>Non-finite values</h3>
351   *
352   * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link
353   * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}.
354   *
355   * @throws IllegalStateException if the dataset is empty or contains a single value
356   */
357  public final double sampleStandardDeviation() {
358    return Math.sqrt(sampleVariance());
359  }
360
361  /**
362   * Returns the lowest value in the dataset. The count must be non-zero.
363   *
364   * <h3>Non-finite values</h3>
365   *
366   * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it
367   * contains {@link Double#NEGATIVE_INFINITY} and not {@link Double#NaN} then the result is {@link
368   * Double#NEGATIVE_INFINITY}. If it contains {@link Double#POSITIVE_INFINITY} and finite values
369   * only then the result is the lowest finite value. If it contains {@link
370   * Double#POSITIVE_INFINITY} only then the result is {@link Double#POSITIVE_INFINITY}.
371   *
372   * @throws IllegalStateException if the dataset is empty
373   */
374  public double min() {
375    checkState(count != 0);
376    return min;
377  }
378
379  /**
380   * Returns the highest value in the dataset. The count must be non-zero.
381   *
382   * <h3>Non-finite values</h3>
383   *
384   * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it
385   * contains {@link Double#POSITIVE_INFINITY} and not {@link Double#NaN} then the result is {@link
386   * Double#POSITIVE_INFINITY}. If it contains {@link Double#NEGATIVE_INFINITY} and finite values
387   * only then the result is the highest finite value. If it contains {@link
388   * Double#NEGATIVE_INFINITY} only then the result is {@link Double#NEGATIVE_INFINITY}.
389   *
390   * @throws IllegalStateException if the dataset is empty
391   */
392  public double max() {
393    checkState(count != 0);
394    return max;
395  }
396
397  double sumOfSquaresOfDeltas() {
398    return sumOfSquaresOfDeltas;
399  }
400
401  /**
402   * Calculates the new value for the accumulated mean when a value is added, in the case where at
403   * least one of the previous mean and the value is non-finite.
404   */
405  static double calculateNewMeanNonFinite(double previousMean, double value) {
406    /*
407     * Desired behaviour is to match the results of applying the naive mean formula. In particular,
408     * the update formula can subtract infinities in cases where the naive formula would add them.
409     *
410     * Consequently:
411     * 1. If the previous mean is finite and the new value is non-finite then the new mean is that
412     *    value (whether it is NaN or infinity).
413     * 2. If the new value is finite and the previous mean is non-finite then the mean is unchanged
414     *    (whether it is NaN or infinity).
415     * 3. If both the previous mean and the new value are non-finite and...
416     * 3a. ...either or both is NaN (so mean != value) then the new mean is NaN.
417     * 3b. ...they are both the same infinities (so mean == value) then the mean is unchanged.
418     * 3c. ...they are different infinities (so mean != value) then the new mean is NaN.
419     */
420    if (isFinite(previousMean)) {
421      // This is case 1.
422      return value;
423    } else if (isFinite(value) || previousMean == value) {
424      // This is case 2. or 3b.
425      return previousMean;
426    } else {
427      // This is case 3a. or 3c.
428      return NaN;
429    }
430  }
431}