001/*
002 * Copyright (C) 2012 The Guava Authors
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
005 * in compliance with the License. You may obtain a copy of the License at
006 *
007 * http://www.apache.org/licenses/LICENSE-2.0
008 *
009 * Unless required by applicable law or agreed to in writing, software distributed under the License
010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
011 * or implied. See the License for the specific language governing permissions and limitations under
012 * the License.
013 */
014
015package com.google.common.math;
016
017import static com.google.common.base.Preconditions.checkState;
018import static com.google.common.math.DoubleUtils.ensureNonNegative;
019import static com.google.common.primitives.Doubles.isFinite;
020import static java.lang.Double.NaN;
021import static java.lang.Double.isNaN;
022
023import com.google.common.annotations.Beta;
024import com.google.common.annotations.GwtIncompatible;
025import java.util.Iterator;
026
027/**
028 * A mutable object which accumulates double values and tracks some basic statistics over all the
029 * values added so far. The values may be added singly or in groups. This class is not thread safe.
030 *
031 * @author Pete Gillin
032 * @author Kevin Bourrillion
033 * @since 20.0
034 */
035@Beta
036@GwtIncompatible
037public final class StatsAccumulator {
038
039  // These fields must satisfy the requirements of Stats' constructor as well as those of the stat
040  // methods of this class.
041  private long count = 0;
042  private double mean = 0.0; // any finite value will do, we only use it to multiply by zero for sum
043  private double sumOfSquaresOfDeltas = 0.0;
044  private double min = NaN; // any value will do
045  private double max = NaN; // any value will do
046
047  /** Adds the given value to the dataset. */
048  public void add(double value) {
049    if (count == 0) {
050      count = 1;
051      mean = value;
052      min = value;
053      max = value;
054      if (!isFinite(value)) {
055        sumOfSquaresOfDeltas = NaN;
056      }
057    } else {
058      count++;
059      if (isFinite(value) && isFinite(mean)) {
060        // Art of Computer Programming vol. 2, Knuth, 4.2.2, (15) and (16)
061        double delta = value - mean;
062        mean += delta / count;
063        sumOfSquaresOfDeltas += delta * (value - mean);
064      } else {
065        mean = calculateNewMeanNonFinite(mean, value);
066        sumOfSquaresOfDeltas = NaN;
067      }
068      min = Math.min(min, value);
069      max = Math.max(max, value);
070    }
071  }
072
073  /**
074   * Adds the given values to the dataset.
075   *
076   * @param values a series of values, which will be converted to {@code double} values (this may
077   *     cause loss of precision)
078   */
079  public void addAll(Iterable<? extends Number> values) {
080    for (Number value : values) {
081      add(value.doubleValue());
082    }
083  }
084
085  /**
086   * Adds the given values to the dataset.
087   *
088   * @param values a series of values, which will be converted to {@code double} values (this may
089   *     cause loss of precision)
090   */
091  public void addAll(Iterator<? extends Number> values) {
092    while (values.hasNext()) {
093      add(values.next().doubleValue());
094    }
095  }
096
097  /**
098   * Adds the given values to the dataset.
099   *
100   * @param values a series of values
101   */
102  public void addAll(double... values) {
103    for (double value : values) {
104      add(value);
105    }
106  }
107
108  /**
109   * Adds the given values to the dataset.
110   *
111   * @param values a series of values
112   */
113  public void addAll(int... values) {
114    for (int value : values) {
115      add(value);
116    }
117  }
118
119  /**
120   * Adds the given values to the dataset.
121   *
122   * @param values a series of values, which will be converted to {@code double} values (this may
123   *     cause loss of precision for longs of magnitude over 2^53 (slightly over 9e15))
124   */
125  public void addAll(long... values) {
126    for (long value : values) {
127      add(value);
128    }
129  }
130
131  /**
132   * Adds the given statistics to the dataset, as if the individual values used to compute the
133   * statistics had been added directly.
134   */
135  public void addAll(Stats values) {
136    if (values.count() == 0) {
137      return;
138    }
139
140    if (count == 0) {
141      count = values.count();
142      mean = values.mean();
143      sumOfSquaresOfDeltas = values.sumOfSquaresOfDeltas();
144      min = values.min();
145      max = values.max();
146    } else {
147      count += values.count();
148      if (isFinite(mean) && isFinite(values.mean())) {
149        // This is a generalized version of the calculation in add(double) above.
150        double delta = values.mean() - mean;
151        mean += delta * values.count() / count;
152        sumOfSquaresOfDeltas +=
153            values.sumOfSquaresOfDeltas() + delta * (values.mean() - mean) * values.count();
154      } else {
155        mean = calculateNewMeanNonFinite(mean, values.mean());
156        sumOfSquaresOfDeltas = NaN;
157      }
158      min = Math.min(min, values.min());
159      max = Math.max(max, values.max());
160    }
161  }
162
163  /** Returns an immutable snapshot of the current statistics. */
164  public Stats snapshot() {
165    return new Stats(count, mean, sumOfSquaresOfDeltas, min, max);
166  }
167
168  /** Returns the number of values. */
169  public long count() {
170    return count;
171  }
172
173  /**
174   * Returns the <a href="http://en.wikipedia.org/wiki/Arithmetic_mean">arithmetic mean</a> of the
175   * values. The count must be non-zero.
176   *
177   * <p>If these values are a sample drawn from a population, this is also an unbiased estimator of
178   * the arithmetic mean of the population.
179   *
180   * <h3>Non-finite values</h3>
181   *
182   * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it
183   * contains both {@link Double#POSITIVE_INFINITY} and {@link Double#NEGATIVE_INFINITY} then the
184   * result is {@link Double#NaN}. If it contains {@link Double#POSITIVE_INFINITY} and finite values
185   * only or {@link Double#POSITIVE_INFINITY} only, the result is {@link Double#POSITIVE_INFINITY}.
186   * If it contains {@link Double#NEGATIVE_INFINITY} and finite values only or {@link
187   * Double#NEGATIVE_INFINITY} only, the result is {@link Double#NEGATIVE_INFINITY}.
188   *
189   * @throws IllegalStateException if the dataset is empty
190   */
191  public double mean() {
192    checkState(count != 0);
193    return mean;
194  }
195
196  /**
197   * Returns the sum of the values.
198   *
199   * <h3>Non-finite values</h3>
200   *
201   * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it
202   * contains both {@link Double#POSITIVE_INFINITY} and {@link Double#NEGATIVE_INFINITY} then the
203   * result is {@link Double#NaN}. If it contains {@link Double#POSITIVE_INFINITY} and finite values
204   * only or {@link Double#POSITIVE_INFINITY} only, the result is {@link Double#POSITIVE_INFINITY}.
205   * If it contains {@link Double#NEGATIVE_INFINITY} and finite values only or {@link
206   * Double#NEGATIVE_INFINITY} only, the result is {@link Double#NEGATIVE_INFINITY}.
207   */
208  public final double sum() {
209    return mean * count;
210  }
211
212  /**
213   * Returns the <a href="http://en.wikipedia.org/wiki/Variance#Population_variance">population
214   * variance</a> of the values. The count must be non-zero.
215   *
216   * <p>This is guaranteed to return zero if the dataset contains only exactly one finite value. It
217   * is not guaranteed to return zero when the dataset consists of the same value multiple times,
218   * due to numerical errors. However, it is guaranteed never to return a negative result.
219   *
220   * <h3>Non-finite values</h3>
221   *
222   * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link
223   * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}.
224   *
225   * @throws IllegalStateException if the dataset is empty
226   */
227  public final double populationVariance() {
228    checkState(count != 0);
229    if (isNaN(sumOfSquaresOfDeltas)) {
230      return NaN;
231    }
232    if (count == 1) {
233      return 0.0;
234    }
235    return ensureNonNegative(sumOfSquaresOfDeltas) / count;
236  }
237
238  /**
239   * Returns the <a
240   * href="http://en.wikipedia.org/wiki/Standard_deviation#Definition_of_population_values">
241   * population standard deviation</a> of the values. The count must be non-zero.
242   *
243   * <p>This is guaranteed to return zero if the dataset contains only exactly one finite value. It
244   * is not guaranteed to return zero when the dataset consists of the same value multiple times,
245   * due to numerical errors. However, it is guaranteed never to return a negative result.
246   *
247   * <h3>Non-finite values</h3>
248   *
249   * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link
250   * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}.
251   *
252   * @throws IllegalStateException if the dataset is empty
253   */
254  public final double populationStandardDeviation() {
255    return Math.sqrt(populationVariance());
256  }
257
258  /**
259   * Returns the <a href="http://en.wikipedia.org/wiki/Variance#Sample_variance">unbiased sample
260   * variance</a> of the values. If this dataset is a sample drawn from a population, this is an
261   * unbiased estimator of the population variance of the population. The count must be greater than
262   * one.
263   *
264   * <p>This is not guaranteed to return zero when the dataset consists of the same value multiple
265   * times, due to numerical errors. However, it is guaranteed never to return a negative result.
266   *
267   * <h3>Non-finite values</h3>
268   *
269   * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link
270   * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}.
271   *
272   * @throws IllegalStateException if the dataset is empty or contains a single value
273   */
274  public final double sampleVariance() {
275    checkState(count > 1);
276    if (isNaN(sumOfSquaresOfDeltas)) {
277      return NaN;
278    }
279    return ensureNonNegative(sumOfSquaresOfDeltas) / (count - 1);
280  }
281
282  /**
283   * Returns the <a
284   * href="http://en.wikipedia.org/wiki/Standard_deviation#Corrected_sample_standard_deviation">
285   * corrected sample standard deviation</a> of the values. If this dataset is a sample drawn from a
286   * population, this is an estimator of the population standard deviation of the population which
287   * is less biased than {@link #populationStandardDeviation()} (the unbiased estimator depends on
288   * the distribution). The count must be greater than one.
289   *
290   * <p>This is not guaranteed to return zero when the dataset consists of the same value multiple
291   * times, due to numerical errors. However, it is guaranteed never to return a negative result.
292   *
293   * <h3>Non-finite values</h3>
294   *
295   * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link
296   * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}.
297   *
298   * @throws IllegalStateException if the dataset is empty or contains a single value
299   */
300  public final double sampleStandardDeviation() {
301    return Math.sqrt(sampleVariance());
302  }
303
304  /**
305   * Returns the lowest value in the dataset. The count must be non-zero.
306   *
307   * <h3>Non-finite values</h3>
308   *
309   * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it
310   * contains {@link Double#NEGATIVE_INFINITY} and not {@link Double#NaN} then the result is {@link
311   * Double#NEGATIVE_INFINITY}. If it contains {@link Double#POSITIVE_INFINITY} and finite values
312   * only then the result is the lowest finite value. If it contains {@link
313   * Double#POSITIVE_INFINITY} only then the result is {@link Double#POSITIVE_INFINITY}.
314   *
315   * @throws IllegalStateException if the dataset is empty
316   */
317  public double min() {
318    checkState(count != 0);
319    return min;
320  }
321
322  /**
323   * Returns the highest value in the dataset. The count must be non-zero.
324   *
325   * <h3>Non-finite values</h3>
326   *
327   * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it
328   * contains {@link Double#POSITIVE_INFINITY} and not {@link Double#NaN} then the result is {@link
329   * Double#POSITIVE_INFINITY}. If it contains {@link Double#NEGATIVE_INFINITY} and finite values
330   * only then the result is the highest finite value. If it contains {@link
331   * Double#NEGATIVE_INFINITY} only then the result is {@link Double#NEGATIVE_INFINITY}.
332   *
333   * @throws IllegalStateException if the dataset is empty
334   */
335  public double max() {
336    checkState(count != 0);
337    return max;
338  }
339
340  double sumOfSquaresOfDeltas() {
341    return sumOfSquaresOfDeltas;
342  }
343
344  /**
345   * Calculates the new value for the accumulated mean when a value is added, in the case where at
346   * least one of the previous mean and the value is non-finite.
347   */
348  static double calculateNewMeanNonFinite(double previousMean, double value) {
349    /*
350     * Desired behaviour is to match the results of applying the naive mean formula. In particular,
351     * the update formula can subtract infinities in cases where the naive formula would add them.
352     *
353     * Consequently:
354     * 1. If the previous mean is finite and the new value is non-finite then the new mean is that
355     *    value (whether it is NaN or infinity).
356     * 2. If the new value is finite and the previous mean is non-finite then the mean is unchanged
357     *    (whether it is NaN or infinity).
358     * 3. If both the previous mean and the new value are non-finite and...
359     * 3a. ...either or both is NaN (so mean != value) then the new mean is NaN.
360     * 3b. ...they are both the same infinities (so mean == value) then the mean is unchanged.
361     * 3c. ...they are different infinities (so mean != value) then the new mean is NaN.
362     */
363    if (isFinite(previousMean)) {
364      // This is case 1.
365      return value;
366    } else if (isFinite(value) || previousMean == value) {
367      // This is case 2. or 3b.
368      return previousMean;
369    } else {
370      // This is case 3a. or 3c.
371      return NaN;
372    }
373  }
374}