001/*
002 * Copyright (C) 2012 The Guava Authors
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
005 * in compliance with the License. You may obtain a copy of the License at
006 *
007 * http://www.apache.org/licenses/LICENSE-2.0
008 *
009 * Unless required by applicable law or agreed to in writing, software distributed under the License
010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
011 * or implied. See the License for the specific language governing permissions and limitations under
012 * the License.
013 */
014
015package com.google.common.math;
016
017import static com.google.common.base.Preconditions.checkState;
018import static com.google.common.math.DoubleUtils.ensureNonNegative;
019import static com.google.common.primitives.Doubles.isFinite;
020import static java.lang.Double.NaN;
021import static java.lang.Double.isNaN;
022
023import com.google.common.annotations.GwtIncompatible;
024import com.google.common.annotations.J2ktIncompatible;
025import java.util.Iterator;
026import java.util.stream.DoubleStream;
027import java.util.stream.IntStream;
028import java.util.stream.LongStream;
029
030/**
031 * A mutable object which accumulates double values and tracks some basic statistics over all the
032 * values added so far. The values may be added singly or in groups. This class is not thread safe.
033 *
034 * @author Pete Gillin
035 * @author Kevin Bourrillion
036 * @since 20.0
037 */
038@J2ktIncompatible
039@GwtIncompatible
040public final class StatsAccumulator {
041  /** Creates a new accumulator. */
042  public StatsAccumulator() {}
043
044  // These fields must satisfy the requirements of Stats' constructor as well as those of the stat
045  // methods of this class.
046  private long count = 0;
047  private double mean = 0.0; // any finite value will do, we only use it to multiply by zero for sum
048  private double sumOfSquaresOfDeltas = 0.0;
049  private double min = NaN; // any value will do
050  private double max = NaN; // any value will do
051
052  /** Adds the given value to the dataset. */
053  public void add(double value) {
054    if (count == 0) {
055      count = 1;
056      mean = value;
057      min = value;
058      max = value;
059      if (!isFinite(value)) {
060        sumOfSquaresOfDeltas = NaN;
061      }
062    } else {
063      count++;
064      if (isFinite(value) && isFinite(mean)) {
065        // Art of Computer Programming vol. 2, Knuth, 4.2.2, (15) and (16)
066        double delta = value - mean;
067        mean += delta / count;
068        sumOfSquaresOfDeltas += delta * (value - mean);
069      } else {
070        mean = calculateNewMeanNonFinite(mean, value);
071        sumOfSquaresOfDeltas = NaN;
072      }
073      min = Math.min(min, value);
074      max = Math.max(max, value);
075    }
076  }
077
078  /**
079   * Adds the given values to the dataset.
080   *
081   * @param values a series of values, which will be converted to {@code double} values (this may
082   *     cause loss of precision)
083   */
084  public void addAll(Iterable<? extends Number> values) {
085    for (Number value : values) {
086      add(value.doubleValue());
087    }
088  }
089
090  /**
091   * Adds the given values to the dataset.
092   *
093   * @param values a series of values, which will be converted to {@code double} values (this may
094   *     cause loss of precision)
095   */
096  public void addAll(Iterator<? extends Number> values) {
097    while (values.hasNext()) {
098      add(values.next().doubleValue());
099    }
100  }
101
102  /**
103   * Adds the given values to the dataset.
104   *
105   * @param values a series of values
106   */
107  public void addAll(double... values) {
108    for (double value : values) {
109      add(value);
110    }
111  }
112
113  /**
114   * Adds the given values to the dataset.
115   *
116   * @param values a series of values
117   */
118  public void addAll(int... values) {
119    for (int value : values) {
120      add(value);
121    }
122  }
123
124  /**
125   * Adds the given values to the dataset.
126   *
127   * @param values a series of values, which will be converted to {@code double} values (this may
128   *     cause loss of precision for longs of magnitude over 2^53 (slightly over 9e15))
129   */
130  public void addAll(long... values) {
131    for (long value : values) {
132      add(value);
133    }
134  }
135
136  /**
137   * Adds the given values to the dataset. The stream will be completely consumed by this method.
138   *
139   * @param values a series of values
140   * @since 33.4.0 (but since 28.2 in the JRE flavor)
141   */
142  @SuppressWarnings("Java7ApiChecker")
143  @IgnoreJRERequirement // Users will use this only if they're already using streams.
144  public void addAll(DoubleStream values) {
145    addAll(values.collect(StatsAccumulator::new, StatsAccumulator::add, StatsAccumulator::addAll));
146  }
147
148  /**
149   * Adds the given values to the dataset. The stream will be completely consumed by this method.
150   *
151   * @param values a series of values
152   * @since 33.4.0 (but since 28.2 in the JRE flavor)
153   */
154  @SuppressWarnings("Java7ApiChecker")
155  @IgnoreJRERequirement // Users will use this only if they're already using streams.
156  public void addAll(IntStream values) {
157    addAll(values.collect(StatsAccumulator::new, StatsAccumulator::add, StatsAccumulator::addAll));
158  }
159
160  /**
161   * Adds the given values to the dataset. The stream will be completely consumed by this method.
162   *
163   * @param values a series of values, which will be converted to {@code double} values (this may
164   *     cause loss of precision for longs of magnitude over 2^53 (slightly over 9e15))
165   * @since 33.4.0 (but since 28.2 in the JRE flavor)
166   */
167  @SuppressWarnings("Java7ApiChecker")
168  @IgnoreJRERequirement // Users will use this only if they're already using streams.
169  public void addAll(LongStream values) {
170    addAll(values.collect(StatsAccumulator::new, StatsAccumulator::add, StatsAccumulator::addAll));
171  }
172
173  /**
174   * Adds the given statistics to the dataset, as if the individual values used to compute the
175   * statistics had been added directly.
176   */
177  public void addAll(Stats values) {
178    if (values.count() == 0) {
179      return;
180    }
181    merge(values.count(), values.mean(), values.sumOfSquaresOfDeltas(), values.min(), values.max());
182  }
183
184  /**
185   * Adds the given statistics to the dataset, as if the individual values used to compute the
186   * statistics had been added directly.
187   *
188   * @since 28.2
189   */
190  public void addAll(StatsAccumulator values) {
191    if (values.count() == 0) {
192      return;
193    }
194    merge(values.count(), values.mean(), values.sumOfSquaresOfDeltas(), values.min(), values.max());
195  }
196
197  private void merge(
198      long otherCount,
199      double otherMean,
200      double otherSumOfSquaresOfDeltas,
201      double otherMin,
202      double otherMax) {
203    if (count == 0) {
204      count = otherCount;
205      mean = otherMean;
206      sumOfSquaresOfDeltas = otherSumOfSquaresOfDeltas;
207      min = otherMin;
208      max = otherMax;
209    } else {
210      count += otherCount;
211      if (isFinite(mean) && isFinite(otherMean)) {
212        // This is a generalized version of the calculation in add(double) above.
213        double delta = otherMean - mean;
214        mean += delta * otherCount / count;
215        sumOfSquaresOfDeltas += otherSumOfSquaresOfDeltas + delta * (otherMean - mean) * otherCount;
216      } else {
217        mean = calculateNewMeanNonFinite(mean, otherMean);
218        sumOfSquaresOfDeltas = NaN;
219      }
220      min = Math.min(min, otherMin);
221      max = Math.max(max, otherMax);
222    }
223  }
224
225  /** Returns an immutable snapshot of the current statistics. */
226  public Stats snapshot() {
227    return new Stats(count, mean, sumOfSquaresOfDeltas, min, max);
228  }
229
230  /** Returns the number of values. */
231  public long count() {
232    return count;
233  }
234
235  /**
236   * Returns the <a href="http://en.wikipedia.org/wiki/Arithmetic_mean">arithmetic mean</a> of the
237   * values. The count must be non-zero.
238   *
239   * <p>If these values are a sample drawn from a population, this is also an unbiased estimator of
240   * the arithmetic mean of the population.
241   *
242   * <h3>Non-finite values</h3>
243   *
244   * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it
245   * contains both {@link Double#POSITIVE_INFINITY} and {@link Double#NEGATIVE_INFINITY} then the
246   * result is {@link Double#NaN}. If it contains {@link Double#POSITIVE_INFINITY} and finite values
247   * only or {@link Double#POSITIVE_INFINITY} only, the result is {@link Double#POSITIVE_INFINITY}.
248   * If it contains {@link Double#NEGATIVE_INFINITY} and finite values only or {@link
249   * Double#NEGATIVE_INFINITY} only, the result is {@link Double#NEGATIVE_INFINITY}.
250   *
251   * @throws IllegalStateException if the dataset is empty
252   */
253  public double mean() {
254    checkState(count != 0);
255    return mean;
256  }
257
258  /**
259   * Returns the sum of the values.
260   *
261   * <h3>Non-finite values</h3>
262   *
263   * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it
264   * contains both {@link Double#POSITIVE_INFINITY} and {@link Double#NEGATIVE_INFINITY} then the
265   * result is {@link Double#NaN}. If it contains {@link Double#POSITIVE_INFINITY} and finite values
266   * only or {@link Double#POSITIVE_INFINITY} only, the result is {@link Double#POSITIVE_INFINITY}.
267   * If it contains {@link Double#NEGATIVE_INFINITY} and finite values only or {@link
268   * Double#NEGATIVE_INFINITY} only, the result is {@link Double#NEGATIVE_INFINITY}.
269   */
270  public final double sum() {
271    return mean * count;
272  }
273
274  /**
275   * Returns the <a href="http://en.wikipedia.org/wiki/Variance#Population_variance">population
276   * variance</a> of the values. The count must be non-zero.
277   *
278   * <p>This is guaranteed to return zero if the dataset contains only exactly one finite value. It
279   * is not guaranteed to return zero when the dataset consists of the same value multiple times,
280   * due to numerical errors. However, it is guaranteed never to return a negative result.
281   *
282   * <h3>Non-finite values</h3>
283   *
284   * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link
285   * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}.
286   *
287   * @throws IllegalStateException if the dataset is empty
288   */
289  public final double populationVariance() {
290    checkState(count != 0);
291    if (isNaN(sumOfSquaresOfDeltas)) {
292      return NaN;
293    }
294    if (count == 1) {
295      return 0.0;
296    }
297    return ensureNonNegative(sumOfSquaresOfDeltas) / count;
298  }
299
300  /**
301   * Returns the <a
302   * href="http://en.wikipedia.org/wiki/Standard_deviation#Definition_of_population_values">
303   * population standard deviation</a> of the values. The count must be non-zero.
304   *
305   * <p>This is guaranteed to return zero if the dataset contains only exactly one finite value. It
306   * is not guaranteed to return zero when the dataset consists of the same value multiple times,
307   * due to numerical errors. However, it is guaranteed never to return a negative result.
308   *
309   * <h3>Non-finite values</h3>
310   *
311   * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link
312   * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}.
313   *
314   * @throws IllegalStateException if the dataset is empty
315   */
316  public final double populationStandardDeviation() {
317    return Math.sqrt(populationVariance());
318  }
319
320  /**
321   * Returns the <a href="http://en.wikipedia.org/wiki/Variance#Sample_variance">unbiased sample
322   * variance</a> of the values. If this dataset is a sample drawn from a population, this is an
323   * unbiased estimator of the population variance of the population. The count must be greater than
324   * one.
325   *
326   * <p>This is not guaranteed to return zero when the dataset consists of the same value multiple
327   * times, due to numerical errors. However, it is guaranteed never to return a negative result.
328   *
329   * <h3>Non-finite values</h3>
330   *
331   * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link
332   * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}.
333   *
334   * @throws IllegalStateException if the dataset is empty or contains a single value
335   */
336  public final double sampleVariance() {
337    checkState(count > 1);
338    if (isNaN(sumOfSquaresOfDeltas)) {
339      return NaN;
340    }
341    return ensureNonNegative(sumOfSquaresOfDeltas) / (count - 1);
342  }
343
344  /**
345   * Returns the <a
346   * href="http://en.wikipedia.org/wiki/Standard_deviation#Corrected_sample_standard_deviation">
347   * corrected sample standard deviation</a> of the values. If this dataset is a sample drawn from a
348   * population, this is an estimator of the population standard deviation of the population which
349   * is less biased than {@link #populationStandardDeviation()} (the unbiased estimator depends on
350   * the distribution). The count must be greater than one.
351   *
352   * <p>This is not guaranteed to return zero when the dataset consists of the same value multiple
353   * times, due to numerical errors. However, it is guaranteed never to return a negative result.
354   *
355   * <h3>Non-finite values</h3>
356   *
357   * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link
358   * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}.
359   *
360   * @throws IllegalStateException if the dataset is empty or contains a single value
361   */
362  public final double sampleStandardDeviation() {
363    return Math.sqrt(sampleVariance());
364  }
365
366  /**
367   * Returns the lowest value in the dataset. The count must be non-zero.
368   *
369   * <h3>Non-finite values</h3>
370   *
371   * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it
372   * contains {@link Double#NEGATIVE_INFINITY} and not {@link Double#NaN} then the result is {@link
373   * Double#NEGATIVE_INFINITY}. If it contains {@link Double#POSITIVE_INFINITY} and finite values
374   * only then the result is the lowest finite value. If it contains {@link
375   * Double#POSITIVE_INFINITY} only then the result is {@link Double#POSITIVE_INFINITY}.
376   *
377   * @throws IllegalStateException if the dataset is empty
378   */
379  public double min() {
380    checkState(count != 0);
381    return min;
382  }
383
384  /**
385   * Returns the highest value in the dataset. The count must be non-zero.
386   *
387   * <h3>Non-finite values</h3>
388   *
389   * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it
390   * contains {@link Double#POSITIVE_INFINITY} and not {@link Double#NaN} then the result is {@link
391   * Double#POSITIVE_INFINITY}. If it contains {@link Double#NEGATIVE_INFINITY} and finite values
392   * only then the result is the highest finite value. If it contains {@link
393   * Double#NEGATIVE_INFINITY} only then the result is {@link Double#NEGATIVE_INFINITY}.
394   *
395   * @throws IllegalStateException if the dataset is empty
396   */
397  public double max() {
398    checkState(count != 0);
399    return max;
400  }
401
402  double sumOfSquaresOfDeltas() {
403    return sumOfSquaresOfDeltas;
404  }
405
406  /**
407   * Calculates the new value for the accumulated mean when a value is added, in the case where at
408   * least one of the previous mean and the value is non-finite.
409   */
410  static double calculateNewMeanNonFinite(double previousMean, double value) {
411    /*
412     * Desired behaviour is to match the results of applying the naive mean formula. In particular,
413     * the update formula can subtract infinities in cases where the naive formula would add them.
414     *
415     * Consequently:
416     * 1. If the previous mean is finite and the new value is non-finite then the new mean is that
417     *    value (whether it is NaN or infinity).
418     * 2. If the new value is finite and the previous mean is non-finite then the mean is unchanged
419     *    (whether it is NaN or infinity).
420     * 3. If both the previous mean and the new value are non-finite and...
421     * 3a. ...either or both is NaN (so mean != value) then the new mean is NaN.
422     * 3b. ...they are both the same infinities (so mean == value) then the mean is unchanged.
423     * 3c. ...they are different infinities (so mean != value) then the new mean is NaN.
424     */
425    if (isFinite(previousMean)) {
426      // This is case 1.
427      return value;
428    } else if (isFinite(value) || previousMean == value) {
429      // This is case 2. or 3b.
430      return previousMean;
431    } else {
432      // This is case 3a. or 3c.
433      return NaN;
434    }
435  }
436}