001/* 002 * Copyright (C) 2012 The Guava Authors 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 005 * in compliance with the License. You may obtain a copy of the License at 006 * 007 * http://www.apache.org/licenses/LICENSE-2.0 008 * 009 * Unless required by applicable law or agreed to in writing, software distributed under the License 010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 011 * or implied. See the License for the specific language governing permissions and limitations under 012 * the License. 013 */ 014 015package com.google.common.math; 016 017import static com.google.common.base.Preconditions.checkState; 018import static com.google.common.math.DoubleUtils.ensureNonNegative; 019import static com.google.common.primitives.Doubles.isFinite; 020import static java.lang.Double.NaN; 021import static java.lang.Double.isNaN; 022 023import com.google.common.annotations.GwtIncompatible; 024import com.google.common.annotations.J2ktIncompatible; 025import java.util.Iterator; 026import java.util.stream.DoubleStream; 027import java.util.stream.IntStream; 028import java.util.stream.LongStream; 029 030/** 031 * A mutable object which accumulates double values and tracks some basic statistics over all the 032 * values added so far. The values may be added singly or in groups. This class is not thread safe. 033 * 034 * @author Pete Gillin 035 * @author Kevin Bourrillion 036 * @since 20.0 037 */ 038@J2ktIncompatible 039@GwtIncompatible 040@ElementTypesAreNonnullByDefault 041public final class StatsAccumulator { 042 /** Creates a new accumulator. */ 043 public StatsAccumulator() {} 044 045 // These fields must satisfy the requirements of Stats' constructor as well as those of the stat 046 // methods of this class. 047 private long count = 0; 048 private double mean = 0.0; // any finite value will do, we only use it to multiply by zero for sum 049 private double sumOfSquaresOfDeltas = 0.0; 050 private double min = NaN; // any value will do 051 private double max = NaN; // any value will do 052 053 /** Adds the given value to the dataset. */ 054 public void add(double value) { 055 if (count == 0) { 056 count = 1; 057 mean = value; 058 min = value; 059 max = value; 060 if (!isFinite(value)) { 061 sumOfSquaresOfDeltas = NaN; 062 } 063 } else { 064 count++; 065 if (isFinite(value) && isFinite(mean)) { 066 // Art of Computer Programming vol. 2, Knuth, 4.2.2, (15) and (16) 067 double delta = value - mean; 068 mean += delta / count; 069 sumOfSquaresOfDeltas += delta * (value - mean); 070 } else { 071 mean = calculateNewMeanNonFinite(mean, value); 072 sumOfSquaresOfDeltas = NaN; 073 } 074 min = Math.min(min, value); 075 max = Math.max(max, value); 076 } 077 } 078 079 /** 080 * Adds the given values to the dataset. 081 * 082 * @param values a series of values, which will be converted to {@code double} values (this may 083 * cause loss of precision) 084 */ 085 public void addAll(Iterable<? extends Number> values) { 086 for (Number value : values) { 087 add(value.doubleValue()); 088 } 089 } 090 091 /** 092 * Adds the given values to the dataset. 093 * 094 * @param values a series of values, which will be converted to {@code double} values (this may 095 * cause loss of precision) 096 */ 097 public void addAll(Iterator<? extends Number> values) { 098 while (values.hasNext()) { 099 add(values.next().doubleValue()); 100 } 101 } 102 103 /** 104 * Adds the given values to the dataset. 105 * 106 * @param values a series of values 107 */ 108 public void addAll(double... values) { 109 for (double value : values) { 110 add(value); 111 } 112 } 113 114 /** 115 * Adds the given values to the dataset. 116 * 117 * @param values a series of values 118 */ 119 public void addAll(int... values) { 120 for (int value : values) { 121 add(value); 122 } 123 } 124 125 /** 126 * Adds the given values to the dataset. 127 * 128 * @param values a series of values, which will be converted to {@code double} values (this may 129 * cause loss of precision for longs of magnitude over 2^53 (slightly over 9e15)) 130 */ 131 public void addAll(long... values) { 132 for (long value : values) { 133 add(value); 134 } 135 } 136 137 /** 138 * Adds the given values to the dataset. The stream will be completely consumed by this method. 139 * 140 * @param values a series of values 141 * @since 28.2 (but only since 33.4.0 in the Android flavor) 142 */ 143 public void addAll(DoubleStream values) { 144 addAll(values.collect(StatsAccumulator::new, StatsAccumulator::add, StatsAccumulator::addAll)); 145 } 146 147 /** 148 * Adds the given values to the dataset. The stream will be completely consumed by this method. 149 * 150 * @param values a series of values 151 * @since 28.2 (but only since 33.4.0 in the Android flavor) 152 */ 153 public void addAll(IntStream values) { 154 addAll(values.collect(StatsAccumulator::new, StatsAccumulator::add, StatsAccumulator::addAll)); 155 } 156 157 /** 158 * Adds the given values to the dataset. The stream will be completely consumed by this method. 159 * 160 * @param values a series of values, which will be converted to {@code double} values (this may 161 * cause loss of precision for longs of magnitude over 2^53 (slightly over 9e15)) 162 * @since 28.2 (but only since 33.4.0 in the Android flavor) 163 */ 164 public void addAll(LongStream values) { 165 addAll(values.collect(StatsAccumulator::new, StatsAccumulator::add, StatsAccumulator::addAll)); 166 } 167 168 /** 169 * Adds the given statistics to the dataset, as if the individual values used to compute the 170 * statistics had been added directly. 171 */ 172 public void addAll(Stats values) { 173 if (values.count() == 0) { 174 return; 175 } 176 merge(values.count(), values.mean(), values.sumOfSquaresOfDeltas(), values.min(), values.max()); 177 } 178 179 /** 180 * Adds the given statistics to the dataset, as if the individual values used to compute the 181 * statistics had been added directly. 182 * 183 * @since 28.2 184 */ 185 public void addAll(StatsAccumulator values) { 186 if (values.count() == 0) { 187 return; 188 } 189 merge(values.count(), values.mean(), values.sumOfSquaresOfDeltas(), values.min(), values.max()); 190 } 191 192 private void merge( 193 long otherCount, 194 double otherMean, 195 double otherSumOfSquaresOfDeltas, 196 double otherMin, 197 double otherMax) { 198 if (count == 0) { 199 count = otherCount; 200 mean = otherMean; 201 sumOfSquaresOfDeltas = otherSumOfSquaresOfDeltas; 202 min = otherMin; 203 max = otherMax; 204 } else { 205 count += otherCount; 206 if (isFinite(mean) && isFinite(otherMean)) { 207 // This is a generalized version of the calculation in add(double) above. 208 double delta = otherMean - mean; 209 mean += delta * otherCount / count; 210 sumOfSquaresOfDeltas += otherSumOfSquaresOfDeltas + delta * (otherMean - mean) * otherCount; 211 } else { 212 mean = calculateNewMeanNonFinite(mean, otherMean); 213 sumOfSquaresOfDeltas = NaN; 214 } 215 min = Math.min(min, otherMin); 216 max = Math.max(max, otherMax); 217 } 218 } 219 220 /** Returns an immutable snapshot of the current statistics. */ 221 public Stats snapshot() { 222 return new Stats(count, mean, sumOfSquaresOfDeltas, min, max); 223 } 224 225 /** Returns the number of values. */ 226 public long count() { 227 return count; 228 } 229 230 /** 231 * Returns the <a href="http://en.wikipedia.org/wiki/Arithmetic_mean">arithmetic mean</a> of the 232 * values. The count must be non-zero. 233 * 234 * <p>If these values are a sample drawn from a population, this is also an unbiased estimator of 235 * the arithmetic mean of the population. 236 * 237 * <h3>Non-finite values</h3> 238 * 239 * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it 240 * contains both {@link Double#POSITIVE_INFINITY} and {@link Double#NEGATIVE_INFINITY} then the 241 * result is {@link Double#NaN}. If it contains {@link Double#POSITIVE_INFINITY} and finite values 242 * only or {@link Double#POSITIVE_INFINITY} only, the result is {@link Double#POSITIVE_INFINITY}. 243 * If it contains {@link Double#NEGATIVE_INFINITY} and finite values only or {@link 244 * Double#NEGATIVE_INFINITY} only, the result is {@link Double#NEGATIVE_INFINITY}. 245 * 246 * @throws IllegalStateException if the dataset is empty 247 */ 248 public double mean() { 249 checkState(count != 0); 250 return mean; 251 } 252 253 /** 254 * Returns the sum of the values. 255 * 256 * <h3>Non-finite values</h3> 257 * 258 * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it 259 * contains both {@link Double#POSITIVE_INFINITY} and {@link Double#NEGATIVE_INFINITY} then the 260 * result is {@link Double#NaN}. If it contains {@link Double#POSITIVE_INFINITY} and finite values 261 * only or {@link Double#POSITIVE_INFINITY} only, the result is {@link Double#POSITIVE_INFINITY}. 262 * If it contains {@link Double#NEGATIVE_INFINITY} and finite values only or {@link 263 * Double#NEGATIVE_INFINITY} only, the result is {@link Double#NEGATIVE_INFINITY}. 264 */ 265 public final double sum() { 266 return mean * count; 267 } 268 269 /** 270 * Returns the <a href="http://en.wikipedia.org/wiki/Variance#Population_variance">population 271 * variance</a> of the values. The count must be non-zero. 272 * 273 * <p>This is guaranteed to return zero if the dataset contains only exactly one finite value. It 274 * is not guaranteed to return zero when the dataset consists of the same value multiple times, 275 * due to numerical errors. However, it is guaranteed never to return a negative result. 276 * 277 * <h3>Non-finite values</h3> 278 * 279 * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link 280 * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}. 281 * 282 * @throws IllegalStateException if the dataset is empty 283 */ 284 public final double populationVariance() { 285 checkState(count != 0); 286 if (isNaN(sumOfSquaresOfDeltas)) { 287 return NaN; 288 } 289 if (count == 1) { 290 return 0.0; 291 } 292 return ensureNonNegative(sumOfSquaresOfDeltas) / count; 293 } 294 295 /** 296 * Returns the <a 297 * href="http://en.wikipedia.org/wiki/Standard_deviation#Definition_of_population_values"> 298 * population standard deviation</a> of the values. The count must be non-zero. 299 * 300 * <p>This is guaranteed to return zero if the dataset contains only exactly one finite value. It 301 * is not guaranteed to return zero when the dataset consists of the same value multiple times, 302 * due to numerical errors. However, it is guaranteed never to return a negative result. 303 * 304 * <h3>Non-finite values</h3> 305 * 306 * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link 307 * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}. 308 * 309 * @throws IllegalStateException if the dataset is empty 310 */ 311 public final double populationStandardDeviation() { 312 return Math.sqrt(populationVariance()); 313 } 314 315 /** 316 * Returns the <a href="http://en.wikipedia.org/wiki/Variance#Sample_variance">unbiased sample 317 * variance</a> of the values. If this dataset is a sample drawn from a population, this is an 318 * unbiased estimator of the population variance of the population. The count must be greater than 319 * one. 320 * 321 * <p>This is not guaranteed to return zero when the dataset consists of the same value multiple 322 * times, due to numerical errors. However, it is guaranteed never to return a negative result. 323 * 324 * <h3>Non-finite values</h3> 325 * 326 * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link 327 * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}. 328 * 329 * @throws IllegalStateException if the dataset is empty or contains a single value 330 */ 331 public final double sampleVariance() { 332 checkState(count > 1); 333 if (isNaN(sumOfSquaresOfDeltas)) { 334 return NaN; 335 } 336 return ensureNonNegative(sumOfSquaresOfDeltas) / (count - 1); 337 } 338 339 /** 340 * Returns the <a 341 * href="http://en.wikipedia.org/wiki/Standard_deviation#Corrected_sample_standard_deviation"> 342 * corrected sample standard deviation</a> of the values. If this dataset is a sample drawn from a 343 * population, this is an estimator of the population standard deviation of the population which 344 * is less biased than {@link #populationStandardDeviation()} (the unbiased estimator depends on 345 * the distribution). The count must be greater than one. 346 * 347 * <p>This is not guaranteed to return zero when the dataset consists of the same value multiple 348 * times, due to numerical errors. However, it is guaranteed never to return a negative result. 349 * 350 * <h3>Non-finite values</h3> 351 * 352 * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link 353 * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}. 354 * 355 * @throws IllegalStateException if the dataset is empty or contains a single value 356 */ 357 public final double sampleStandardDeviation() { 358 return Math.sqrt(sampleVariance()); 359 } 360 361 /** 362 * Returns the lowest value in the dataset. The count must be non-zero. 363 * 364 * <h3>Non-finite values</h3> 365 * 366 * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it 367 * contains {@link Double#NEGATIVE_INFINITY} and not {@link Double#NaN} then the result is {@link 368 * Double#NEGATIVE_INFINITY}. If it contains {@link Double#POSITIVE_INFINITY} and finite values 369 * only then the result is the lowest finite value. If it contains {@link 370 * Double#POSITIVE_INFINITY} only then the result is {@link Double#POSITIVE_INFINITY}. 371 * 372 * @throws IllegalStateException if the dataset is empty 373 */ 374 public double min() { 375 checkState(count != 0); 376 return min; 377 } 378 379 /** 380 * Returns the highest value in the dataset. The count must be non-zero. 381 * 382 * <h3>Non-finite values</h3> 383 * 384 * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it 385 * contains {@link Double#POSITIVE_INFINITY} and not {@link Double#NaN} then the result is {@link 386 * Double#POSITIVE_INFINITY}. If it contains {@link Double#NEGATIVE_INFINITY} and finite values 387 * only then the result is the highest finite value. If it contains {@link 388 * Double#NEGATIVE_INFINITY} only then the result is {@link Double#NEGATIVE_INFINITY}. 389 * 390 * @throws IllegalStateException if the dataset is empty 391 */ 392 public double max() { 393 checkState(count != 0); 394 return max; 395 } 396 397 double sumOfSquaresOfDeltas() { 398 return sumOfSquaresOfDeltas; 399 } 400 401 /** 402 * Calculates the new value for the accumulated mean when a value is added, in the case where at 403 * least one of the previous mean and the value is non-finite. 404 */ 405 static double calculateNewMeanNonFinite(double previousMean, double value) { 406 /* 407 * Desired behaviour is to match the results of applying the naive mean formula. In particular, 408 * the update formula can subtract infinities in cases where the naive formula would add them. 409 * 410 * Consequently: 411 * 1. If the previous mean is finite and the new value is non-finite then the new mean is that 412 * value (whether it is NaN or infinity). 413 * 2. If the new value is finite and the previous mean is non-finite then the mean is unchanged 414 * (whether it is NaN or infinity). 415 * 3. If both the previous mean and the new value are non-finite and... 416 * 3a. ...either or both is NaN (so mean != value) then the new mean is NaN. 417 * 3b. ...they are both the same infinities (so mean == value) then the mean is unchanged. 418 * 3c. ...they are different infinities (so mean != value) then the new mean is NaN. 419 */ 420 if (isFinite(previousMean)) { 421 // This is case 1. 422 return value; 423 } else if (isFinite(value) || previousMean == value) { 424 // This is case 2. or 3b. 425 return previousMean; 426 } else { 427 // This is case 3a. or 3c. 428 return NaN; 429 } 430 } 431}