001/*
002 * Copyright (C) 2012 The Guava Authors
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
005 * in compliance with the License. You may obtain a copy of the License at
006 *
007 * http://www.apache.org/licenses/LICENSE-2.0
008 *
009 * Unless required by applicable law or agreed to in writing, software distributed under the License
010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
011 * or implied. See the License for the specific language governing permissions and limitations under
012 * the License.
013 */
014
015package com.google.common.math;
016
017import static com.google.common.base.Preconditions.checkState;
018import static com.google.common.math.DoubleUtils.ensureNonNegative;
019import static com.google.common.primitives.Doubles.isFinite;
020import static java.lang.Double.NaN;
021import static java.lang.Double.isNaN;
022
023import com.google.common.annotations.Beta;
024import com.google.common.annotations.GwtIncompatible;
025import java.util.Iterator;
026
027/**
028 * A mutable object which accumulates double values and tracks some basic statistics over all the
029 * values added so far. The values may be added singly or in groups. This class is not thread safe.
030 *
031 * @author Pete Gillin
032 * @author Kevin Bourrillion
033 * @since 20.0
034 */
035@Beta
036@GwtIncompatible
037public final class StatsAccumulator {
038
039  // These fields must satisfy the requirements of Stats' constructor as well as those of the stat
040  // methods of this class.
041  private long count = 0;
042  private double mean = 0.0; // any finite value will do, we only use it to multiply by zero for sum
043  private double sumOfSquaresOfDeltas = 0.0;
044  private double min = NaN; // any value will do
045  private double max = NaN; // any value will do
046
047  /** Adds the given value to the dataset. */
048  public void add(double value) {
049    if (count == 0) {
050      count = 1;
051      mean = value;
052      min = value;
053      max = value;
054      if (!isFinite(value)) {
055        sumOfSquaresOfDeltas = NaN;
056      }
057    } else {
058      count++;
059      if (isFinite(value) && isFinite(mean)) {
060        // Art of Computer Programming vol. 2, Knuth, 4.2.2, (15) and (16)
061        double delta = value - mean;
062        mean += delta / count;
063        sumOfSquaresOfDeltas += delta * (value - mean);
064      } else {
065        mean = calculateNewMeanNonFinite(mean, value);
066        sumOfSquaresOfDeltas = NaN;
067      }
068      min = Math.min(min, value);
069      max = Math.max(max, value);
070    }
071  }
072
073  /**
074   * Adds the given values to the dataset.
075   *
076   * @param values a series of values, which will be converted to {@code double} values (this may
077   *     cause loss of precision)
078   */
079  public void addAll(Iterable<? extends Number> values) {
080    for (Number value : values) {
081      add(value.doubleValue());
082    }
083  }
084
085  /**
086   * Adds the given values to the dataset.
087   *
088   * @param values a series of values, which will be converted to {@code double} values (this may
089   *     cause loss of precision)
090   */
091  public void addAll(Iterator<? extends Number> values) {
092    while (values.hasNext()) {
093      add(values.next().doubleValue());
094    }
095  }
096
097  /**
098   * Adds the given values to the dataset.
099   *
100   * @param values a series of values
101   */
102  public void addAll(double... values) {
103    for (double value : values) {
104      add(value);
105    }
106  }
107
108  /**
109   * Adds the given values to the dataset.
110   *
111   * @param values a series of values
112   */
113  public void addAll(int... values) {
114    for (int value : values) {
115      add(value);
116    }
117  }
118
119  /**
120   * Adds the given values to the dataset.
121   *
122   * @param values a series of values, which will be converted to {@code double} values (this may
123   *     cause loss of precision for longs of magnitude over 2^53 (slightly over 9e15))
124   */
125  public void addAll(long... values) {
126    for (long value : values) {
127      add(value);
128    }
129  }
130
131  /**
132   * Adds the given statistics to the dataset, as if the individual values used to compute the
133   * statistics had been added directly.
134   */
135  public void addAll(Stats values) {
136    if (values.count() == 0) {
137      return;
138    }
139    merge(values.count(), values.mean(), values.sumOfSquaresOfDeltas(), values.min(), values.max());
140  }
141
142  /**
143   * Adds the given statistics to the dataset, as if the individual values used to compute the
144   * statistics had been added directly.
145   *
146   * @since 28.2
147   */
148  public void addAll(StatsAccumulator values) {
149    if (values.count() == 0) {
150      return;
151    }
152    merge(values.count(), values.mean(), values.sumOfSquaresOfDeltas(), values.min(), values.max());
153  }
154
155  private void merge(
156      long otherCount,
157      double otherMean,
158      double otherSumOfSquaresOfDeltas,
159      double otherMin,
160      double otherMax) {
161    if (count == 0) {
162      count = otherCount;
163      mean = otherMean;
164      sumOfSquaresOfDeltas = otherSumOfSquaresOfDeltas;
165      min = otherMin;
166      max = otherMax;
167    } else {
168      count += otherCount;
169      if (isFinite(mean) && isFinite(otherMean)) {
170        // This is a generalized version of the calculation in add(double) above.
171        double delta = otherMean - mean;
172        mean += delta * otherCount / count;
173        sumOfSquaresOfDeltas += otherSumOfSquaresOfDeltas + delta * (otherMean - mean) * otherCount;
174      } else {
175        mean = calculateNewMeanNonFinite(mean, otherMean);
176        sumOfSquaresOfDeltas = NaN;
177      }
178      min = Math.min(min, otherMin);
179      max = Math.max(max, otherMax);
180    }
181  }
182
183  /** Returns an immutable snapshot of the current statistics. */
184  public Stats snapshot() {
185    return new Stats(count, mean, sumOfSquaresOfDeltas, min, max);
186  }
187
188  /** Returns the number of values. */
189  public long count() {
190    return count;
191  }
192
193  /**
194   * Returns the <a href="http://en.wikipedia.org/wiki/Arithmetic_mean">arithmetic mean</a> of the
195   * values. The count must be non-zero.
196   *
197   * <p>If these values are a sample drawn from a population, this is also an unbiased estimator of
198   * the arithmetic mean of the population.
199   *
200   * <h3>Non-finite values</h3>
201   *
202   * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it
203   * contains both {@link Double#POSITIVE_INFINITY} and {@link Double#NEGATIVE_INFINITY} then the
204   * result is {@link Double#NaN}. If it contains {@link Double#POSITIVE_INFINITY} and finite values
205   * only or {@link Double#POSITIVE_INFINITY} only, the result is {@link Double#POSITIVE_INFINITY}.
206   * If it contains {@link Double#NEGATIVE_INFINITY} and finite values only or {@link
207   * Double#NEGATIVE_INFINITY} only, the result is {@link Double#NEGATIVE_INFINITY}.
208   *
209   * @throws IllegalStateException if the dataset is empty
210   */
211  public double mean() {
212    checkState(count != 0);
213    return mean;
214  }
215
216  /**
217   * Returns the sum of the values.
218   *
219   * <h3>Non-finite values</h3>
220   *
221   * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it
222   * contains both {@link Double#POSITIVE_INFINITY} and {@link Double#NEGATIVE_INFINITY} then the
223   * result is {@link Double#NaN}. If it contains {@link Double#POSITIVE_INFINITY} and finite values
224   * only or {@link Double#POSITIVE_INFINITY} only, the result is {@link Double#POSITIVE_INFINITY}.
225   * If it contains {@link Double#NEGATIVE_INFINITY} and finite values only or {@link
226   * Double#NEGATIVE_INFINITY} only, the result is {@link Double#NEGATIVE_INFINITY}.
227   */
228  public final double sum() {
229    return mean * count;
230  }
231
232  /**
233   * Returns the <a href="http://en.wikipedia.org/wiki/Variance#Population_variance">population
234   * variance</a> of the values. The count must be non-zero.
235   *
236   * <p>This is guaranteed to return zero if the dataset contains only exactly one finite value. It
237   * is not guaranteed to return zero when the dataset consists of the same value multiple times,
238   * due to numerical errors. However, it is guaranteed never to return a negative result.
239   *
240   * <h3>Non-finite values</h3>
241   *
242   * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link
243   * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}.
244   *
245   * @throws IllegalStateException if the dataset is empty
246   */
247  public final double populationVariance() {
248    checkState(count != 0);
249    if (isNaN(sumOfSquaresOfDeltas)) {
250      return NaN;
251    }
252    if (count == 1) {
253      return 0.0;
254    }
255    return ensureNonNegative(sumOfSquaresOfDeltas) / count;
256  }
257
258  /**
259   * Returns the <a
260   * href="http://en.wikipedia.org/wiki/Standard_deviation#Definition_of_population_values">
261   * population standard deviation</a> of the values. The count must be non-zero.
262   *
263   * <p>This is guaranteed to return zero if the dataset contains only exactly one finite value. It
264   * is not guaranteed to return zero when the dataset consists of the same value multiple times,
265   * due to numerical errors. However, it is guaranteed never to return a negative result.
266   *
267   * <h3>Non-finite values</h3>
268   *
269   * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link
270   * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}.
271   *
272   * @throws IllegalStateException if the dataset is empty
273   */
274  public final double populationStandardDeviation() {
275    return Math.sqrt(populationVariance());
276  }
277
278  /**
279   * Returns the <a href="http://en.wikipedia.org/wiki/Variance#Sample_variance">unbiased sample
280   * variance</a> of the values. If this dataset is a sample drawn from a population, this is an
281   * unbiased estimator of the population variance of the population. The count must be greater than
282   * one.
283   *
284   * <p>This is not guaranteed to return zero when the dataset consists of the same value multiple
285   * times, due to numerical errors. However, it is guaranteed never to return a negative result.
286   *
287   * <h3>Non-finite values</h3>
288   *
289   * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link
290   * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}.
291   *
292   * @throws IllegalStateException if the dataset is empty or contains a single value
293   */
294  public final double sampleVariance() {
295    checkState(count > 1);
296    if (isNaN(sumOfSquaresOfDeltas)) {
297      return NaN;
298    }
299    return ensureNonNegative(sumOfSquaresOfDeltas) / (count - 1);
300  }
301
302  /**
303   * Returns the <a
304   * href="http://en.wikipedia.org/wiki/Standard_deviation#Corrected_sample_standard_deviation">
305   * corrected sample standard deviation</a> of the values. If this dataset is a sample drawn from a
306   * population, this is an estimator of the population standard deviation of the population which
307   * is less biased than {@link #populationStandardDeviation()} (the unbiased estimator depends on
308   * the distribution). The count must be greater than one.
309   *
310   * <p>This is not guaranteed to return zero when the dataset consists of the same value multiple
311   * times, due to numerical errors. However, it is guaranteed never to return a negative result.
312   *
313   * <h3>Non-finite values</h3>
314   *
315   * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link
316   * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}.
317   *
318   * @throws IllegalStateException if the dataset is empty or contains a single value
319   */
320  public final double sampleStandardDeviation() {
321    return Math.sqrt(sampleVariance());
322  }
323
324  /**
325   * Returns the lowest value in the dataset. The count must be non-zero.
326   *
327   * <h3>Non-finite values</h3>
328   *
329   * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it
330   * contains {@link Double#NEGATIVE_INFINITY} and not {@link Double#NaN} then the result is {@link
331   * Double#NEGATIVE_INFINITY}. If it contains {@link Double#POSITIVE_INFINITY} and finite values
332   * only then the result is the lowest finite value. If it contains {@link
333   * Double#POSITIVE_INFINITY} only then the result is {@link Double#POSITIVE_INFINITY}.
334   *
335   * @throws IllegalStateException if the dataset is empty
336   */
337  public double min() {
338    checkState(count != 0);
339    return min;
340  }
341
342  /**
343   * Returns the highest value in the dataset. The count must be non-zero.
344   *
345   * <h3>Non-finite values</h3>
346   *
347   * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it
348   * contains {@link Double#POSITIVE_INFINITY} and not {@link Double#NaN} then the result is {@link
349   * Double#POSITIVE_INFINITY}. If it contains {@link Double#NEGATIVE_INFINITY} and finite values
350   * only then the result is the highest finite value. If it contains {@link
351   * Double#NEGATIVE_INFINITY} only then the result is {@link Double#NEGATIVE_INFINITY}.
352   *
353   * @throws IllegalStateException if the dataset is empty
354   */
355  public double max() {
356    checkState(count != 0);
357    return max;
358  }
359
360  double sumOfSquaresOfDeltas() {
361    return sumOfSquaresOfDeltas;
362  }
363
364  /**
365   * Calculates the new value for the accumulated mean when a value is added, in the case where at
366   * least one of the previous mean and the value is non-finite.
367   */
368  static double calculateNewMeanNonFinite(double previousMean, double value) {
369    /*
370     * Desired behaviour is to match the results of applying the naive mean formula. In particular,
371     * the update formula can subtract infinities in cases where the naive formula would add them.
372     *
373     * Consequently:
374     * 1. If the previous mean is finite and the new value is non-finite then the new mean is that
375     *    value (whether it is NaN or infinity).
376     * 2. If the new value is finite and the previous mean is non-finite then the mean is unchanged
377     *    (whether it is NaN or infinity).
378     * 3. If both the previous mean and the new value are non-finite and...
379     * 3a. ...either or both is NaN (so mean != value) then the new mean is NaN.
380     * 3b. ...they are both the same infinities (so mean == value) then the mean is unchanged.
381     * 3c. ...they are different infinities (so mean != value) then the new mean is NaN.
382     */
383    if (isFinite(previousMean)) {
384      // This is case 1.
385      return value;
386    } else if (isFinite(value) || previousMean == value) {
387      // This is case 2. or 3b.
388      return previousMean;
389    } else {
390      // This is case 3a. or 3c.
391      return NaN;
392    }
393  }
394}