001/*
002 * Copyright (C) 2012 The Guava Authors
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
005 * in compliance with the License. You may obtain a copy of the License at
006 *
007 * http://www.apache.org/licenses/LICENSE-2.0
008 *
009 * Unless required by applicable law or agreed to in writing, software distributed under the License
010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
011 * or implied. See the License for the specific language governing permissions and limitations under
012 * the License.
013 */
014
015package com.google.common.math;
016
017import static com.google.common.base.Preconditions.checkState;
018import static com.google.common.math.DoubleUtils.ensureNonNegative;
019import static com.google.common.primitives.Doubles.isFinite;
020import static java.lang.Double.NaN;
021import static java.lang.Double.isNaN;
022
023import com.google.common.annotations.GwtIncompatible;
024import com.google.common.annotations.J2ktIncompatible;
025import java.util.Iterator;
026import java.util.stream.DoubleStream;
027import java.util.stream.IntStream;
028import java.util.stream.LongStream;
029
030/**
031 * A mutable object which accumulates double values and tracks some basic statistics over all the
032 * values added so far. The values may be added singly or in groups. This class is not thread safe.
033 *
034 * @author Pete Gillin
035 * @author Kevin Bourrillion
036 * @since 20.0
037 */
038@J2ktIncompatible
039@GwtIncompatible
040public final class StatsAccumulator {
041  /** Creates a new accumulator. */
042  public StatsAccumulator() {}
043
044  // These fields must satisfy the requirements of Stats' constructor as well as those of the stat
045  // methods of this class.
046  private long count = 0;
047  private double mean = 0.0; // any finite value will do, we only use it to multiply by zero for sum
048  private double sumOfSquaresOfDeltas = 0.0;
049  private double min = NaN; // any value will do
050  private double max = NaN; // any value will do
051
052  /** Adds the given value to the dataset. */
053  public void add(double value) {
054    if (count == 0) {
055      count = 1;
056      mean = value;
057      min = value;
058      max = value;
059      if (!isFinite(value)) {
060        sumOfSquaresOfDeltas = NaN;
061      }
062    } else {
063      count++;
064      if (isFinite(value) && isFinite(mean)) {
065        // Art of Computer Programming vol. 2, Knuth, 4.2.2, (15) and (16)
066        double delta = value - mean;
067        mean += delta / count;
068        sumOfSquaresOfDeltas += delta * (value - mean);
069      } else {
070        mean = calculateNewMeanNonFinite(mean, value);
071        sumOfSquaresOfDeltas = NaN;
072      }
073      min = Math.min(min, value);
074      max = Math.max(max, value);
075    }
076  }
077
078  /**
079   * Adds the given values to the dataset.
080   *
081   * @param values a series of values, which will be converted to {@code double} values (this may
082   *     cause loss of precision)
083   */
084  public void addAll(Iterable<? extends Number> values) {
085    for (Number value : values) {
086      add(value.doubleValue());
087    }
088  }
089
090  /**
091   * Adds the given values to the dataset.
092   *
093   * @param values a series of values, which will be converted to {@code double} values (this may
094   *     cause loss of precision)
095   */
096  public void addAll(Iterator<? extends Number> values) {
097    while (values.hasNext()) {
098      add(values.next().doubleValue());
099    }
100  }
101
102  /**
103   * Adds the given values to the dataset.
104   *
105   * @param values a series of values
106   */
107  public void addAll(double... values) {
108    for (double value : values) {
109      add(value);
110    }
111  }
112
113  /**
114   * Adds the given values to the dataset.
115   *
116   * @param values a series of values
117   */
118  public void addAll(int... values) {
119    for (int value : values) {
120      add(value);
121    }
122  }
123
124  /**
125   * Adds the given values to the dataset.
126   *
127   * @param values a series of values, which will be converted to {@code double} values (this may
128   *     cause loss of precision for longs of magnitude over 2^53 (slightly over 9e15))
129   */
130  public void addAll(long... values) {
131    for (long value : values) {
132      add(value);
133    }
134  }
135
136  /**
137   * Adds the given values to the dataset. The stream will be completely consumed by this method.
138   *
139   * @param values a series of values
140   * @since 28.2 (but only since 33.4.0 in the Android flavor)
141   */
142  public void addAll(DoubleStream values) {
143    addAll(values.collect(StatsAccumulator::new, StatsAccumulator::add, StatsAccumulator::addAll));
144  }
145
146  /**
147   * Adds the given values to the dataset. The stream will be completely consumed by this method.
148   *
149   * @param values a series of values
150   * @since 28.2 (but only since 33.4.0 in the Android flavor)
151   */
152  public void addAll(IntStream values) {
153    addAll(values.collect(StatsAccumulator::new, StatsAccumulator::add, StatsAccumulator::addAll));
154  }
155
156  /**
157   * Adds the given values to the dataset. The stream will be completely consumed by this method.
158   *
159   * @param values a series of values, which will be converted to {@code double} values (this may
160   *     cause loss of precision for longs of magnitude over 2^53 (slightly over 9e15))
161   * @since 28.2 (but only since 33.4.0 in the Android flavor)
162   */
163  public void addAll(LongStream values) {
164    addAll(values.collect(StatsAccumulator::new, StatsAccumulator::add, StatsAccumulator::addAll));
165  }
166
167  /**
168   * Adds the given statistics to the dataset, as if the individual values used to compute the
169   * statistics had been added directly.
170   */
171  public void addAll(Stats values) {
172    if (values.count() == 0) {
173      return;
174    }
175    merge(values.count(), values.mean(), values.sumOfSquaresOfDeltas(), values.min(), values.max());
176  }
177
178  /**
179   * Adds the given statistics to the dataset, as if the individual values used to compute the
180   * statistics had been added directly.
181   *
182   * @since 28.2
183   */
184  public void addAll(StatsAccumulator values) {
185    if (values.count() == 0) {
186      return;
187    }
188    merge(values.count(), values.mean(), values.sumOfSquaresOfDeltas(), values.min(), values.max());
189  }
190
191  private void merge(
192      long otherCount,
193      double otherMean,
194      double otherSumOfSquaresOfDeltas,
195      double otherMin,
196      double otherMax) {
197    if (count == 0) {
198      count = otherCount;
199      mean = otherMean;
200      sumOfSquaresOfDeltas = otherSumOfSquaresOfDeltas;
201      min = otherMin;
202      max = otherMax;
203    } else {
204      count += otherCount;
205      if (isFinite(mean) && isFinite(otherMean)) {
206        // This is a generalized version of the calculation in add(double) above.
207        double delta = otherMean - mean;
208        mean += delta * otherCount / count;
209        sumOfSquaresOfDeltas += otherSumOfSquaresOfDeltas + delta * (otherMean - mean) * otherCount;
210      } else {
211        mean = calculateNewMeanNonFinite(mean, otherMean);
212        sumOfSquaresOfDeltas = NaN;
213      }
214      min = Math.min(min, otherMin);
215      max = Math.max(max, otherMax);
216    }
217  }
218
219  /** Returns an immutable snapshot of the current statistics. */
220  public Stats snapshot() {
221    return new Stats(count, mean, sumOfSquaresOfDeltas, min, max);
222  }
223
224  /** Returns the number of values. */
225  public long count() {
226    return count;
227  }
228
229  /**
230   * Returns the <a href="http://en.wikipedia.org/wiki/Arithmetic_mean">arithmetic mean</a> of the
231   * values. The count must be non-zero.
232   *
233   * <p>If these values are a sample drawn from a population, this is also an unbiased estimator of
234   * the arithmetic mean of the population.
235   *
236   * <h3>Non-finite values</h3>
237   *
238   * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it
239   * contains both {@link Double#POSITIVE_INFINITY} and {@link Double#NEGATIVE_INFINITY} then the
240   * result is {@link Double#NaN}. If it contains {@link Double#POSITIVE_INFINITY} and finite values
241   * only or {@link Double#POSITIVE_INFINITY} only, the result is {@link Double#POSITIVE_INFINITY}.
242   * If it contains {@link Double#NEGATIVE_INFINITY} and finite values only or {@link
243   * Double#NEGATIVE_INFINITY} only, the result is {@link Double#NEGATIVE_INFINITY}.
244   *
245   * @throws IllegalStateException if the dataset is empty
246   */
247  public double mean() {
248    checkState(count != 0);
249    return mean;
250  }
251
252  /**
253   * Returns the sum of the values.
254   *
255   * <h3>Non-finite values</h3>
256   *
257   * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it
258   * contains both {@link Double#POSITIVE_INFINITY} and {@link Double#NEGATIVE_INFINITY} then the
259   * result is {@link Double#NaN}. If it contains {@link Double#POSITIVE_INFINITY} and finite values
260   * only or {@link Double#POSITIVE_INFINITY} only, the result is {@link Double#POSITIVE_INFINITY}.
261   * If it contains {@link Double#NEGATIVE_INFINITY} and finite values only or {@link
262   * Double#NEGATIVE_INFINITY} only, the result is {@link Double#NEGATIVE_INFINITY}.
263   */
264  public final double sum() {
265    return mean * count;
266  }
267
268  /**
269   * Returns the <a href="http://en.wikipedia.org/wiki/Variance#Population_variance">population
270   * variance</a> of the values. The count must be non-zero.
271   *
272   * <p>This is guaranteed to return zero if the dataset contains only exactly one finite value. It
273   * is not guaranteed to return zero when the dataset consists of the same value multiple times,
274   * due to numerical errors. However, it is guaranteed never to return a negative result.
275   *
276   * <h3>Non-finite values</h3>
277   *
278   * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link
279   * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}.
280   *
281   * @throws IllegalStateException if the dataset is empty
282   */
283  public final double populationVariance() {
284    checkState(count != 0);
285    if (isNaN(sumOfSquaresOfDeltas)) {
286      return NaN;
287    }
288    if (count == 1) {
289      return 0.0;
290    }
291    return ensureNonNegative(sumOfSquaresOfDeltas) / count;
292  }
293
294  /**
295   * Returns the <a
296   * href="http://en.wikipedia.org/wiki/Standard_deviation#Definition_of_population_values">
297   * population standard deviation</a> of the values. The count must be non-zero.
298   *
299   * <p>This is guaranteed to return zero if the dataset contains only exactly one finite value. It
300   * is not guaranteed to return zero when the dataset consists of the same value multiple times,
301   * due to numerical errors. However, it is guaranteed never to return a negative result.
302   *
303   * <h3>Non-finite values</h3>
304   *
305   * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link
306   * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}.
307   *
308   * @throws IllegalStateException if the dataset is empty
309   */
310  public final double populationStandardDeviation() {
311    return Math.sqrt(populationVariance());
312  }
313
314  /**
315   * Returns the <a href="http://en.wikipedia.org/wiki/Variance#Sample_variance">unbiased sample
316   * variance</a> of the values. If this dataset is a sample drawn from a population, this is an
317   * unbiased estimator of the population variance of the population. The count must be greater than
318   * one.
319   *
320   * <p>This is not guaranteed to return zero when the dataset consists of the same value multiple
321   * times, due to numerical errors. However, it is guaranteed never to return a negative result.
322   *
323   * <h3>Non-finite values</h3>
324   *
325   * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link
326   * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}.
327   *
328   * @throws IllegalStateException if the dataset is empty or contains a single value
329   */
330  public final double sampleVariance() {
331    checkState(count > 1);
332    if (isNaN(sumOfSquaresOfDeltas)) {
333      return NaN;
334    }
335    return ensureNonNegative(sumOfSquaresOfDeltas) / (count - 1);
336  }
337
338  /**
339   * Returns the <a
340   * href="http://en.wikipedia.org/wiki/Standard_deviation#Corrected_sample_standard_deviation">
341   * corrected sample standard deviation</a> of the values. If this dataset is a sample drawn from a
342   * population, this is an estimator of the population standard deviation of the population which
343   * is less biased than {@link #populationStandardDeviation()} (the unbiased estimator depends on
344   * the distribution). The count must be greater than one.
345   *
346   * <p>This is not guaranteed to return zero when the dataset consists of the same value multiple
347   * times, due to numerical errors. However, it is guaranteed never to return a negative result.
348   *
349   * <h3>Non-finite values</h3>
350   *
351   * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link
352   * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}.
353   *
354   * @throws IllegalStateException if the dataset is empty or contains a single value
355   */
356  public final double sampleStandardDeviation() {
357    return Math.sqrt(sampleVariance());
358  }
359
360  /**
361   * Returns the lowest value in the dataset. The count must be non-zero.
362   *
363   * <h3>Non-finite values</h3>
364   *
365   * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it
366   * contains {@link Double#NEGATIVE_INFINITY} and not {@link Double#NaN} then the result is {@link
367   * Double#NEGATIVE_INFINITY}. If it contains {@link Double#POSITIVE_INFINITY} and finite values
368   * only then the result is the lowest finite value. If it contains {@link
369   * Double#POSITIVE_INFINITY} only then the result is {@link Double#POSITIVE_INFINITY}.
370   *
371   * @throws IllegalStateException if the dataset is empty
372   */
373  public double min() {
374    checkState(count != 0);
375    return min;
376  }
377
378  /**
379   * Returns the highest value in the dataset. The count must be non-zero.
380   *
381   * <h3>Non-finite values</h3>
382   *
383   * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it
384   * contains {@link Double#POSITIVE_INFINITY} and not {@link Double#NaN} then the result is {@link
385   * Double#POSITIVE_INFINITY}. If it contains {@link Double#NEGATIVE_INFINITY} and finite values
386   * only then the result is the highest finite value. If it contains {@link
387   * Double#NEGATIVE_INFINITY} only then the result is {@link Double#NEGATIVE_INFINITY}.
388   *
389   * @throws IllegalStateException if the dataset is empty
390   */
391  public double max() {
392    checkState(count != 0);
393    return max;
394  }
395
396  double sumOfSquaresOfDeltas() {
397    return sumOfSquaresOfDeltas;
398  }
399
400  /**
401   * Calculates the new value for the accumulated mean when a value is added, in the case where at
402   * least one of the previous mean and the value is non-finite.
403   */
404  static double calculateNewMeanNonFinite(double previousMean, double value) {
405    /*
406     * Desired behaviour is to match the results of applying the naive mean formula. In particular,
407     * the update formula can subtract infinities in cases where the naive formula would add them.
408     *
409     * Consequently:
410     * 1. If the previous mean is finite and the new value is non-finite then the new mean is that
411     *    value (whether it is NaN or infinity).
412     * 2. If the new value is finite and the previous mean is non-finite then the mean is unchanged
413     *    (whether it is NaN or infinity).
414     * 3. If both the previous mean and the new value are non-finite and...
415     * 3a. ...either or both is NaN (so mean != value) then the new mean is NaN.
416     * 3b. ...they are both the same infinities (so mean == value) then the mean is unchanged.
417     * 3c. ...they are different infinities (so mean != value) then the new mean is NaN.
418     */
419    if (isFinite(previousMean)) {
420      // This is case 1.
421      return value;
422    } else if (isFinite(value) || previousMean == value) {
423      // This is case 2. or 3b.
424      return previousMean;
425    } else {
426      // This is case 3a. or 3c.
427      return NaN;
428    }
429  }
430}