001/* 002 * Copyright (C) 2012 The Guava Authors 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 005 * in compliance with the License. You may obtain a copy of the License at 006 * 007 * http://www.apache.org/licenses/LICENSE-2.0 008 * 009 * Unless required by applicable law or agreed to in writing, software distributed under the License 010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 011 * or implied. See the License for the specific language governing permissions and limitations under 012 * the License. 013 */ 014 015package com.google.common.math; 016 017import static com.google.common.base.Preconditions.checkState; 018import static com.google.common.math.DoubleUtils.ensureNonNegative; 019import static com.google.common.primitives.Doubles.isFinite; 020import static java.lang.Double.NaN; 021import static java.lang.Double.isNaN; 022 023import com.google.common.annotations.GwtIncompatible; 024import com.google.common.annotations.J2ktIncompatible; 025import java.util.Iterator; 026import java.util.stream.DoubleStream; 027import java.util.stream.IntStream; 028import java.util.stream.LongStream; 029 030/** 031 * A mutable object which accumulates double values and tracks some basic statistics over all the 032 * values added so far. The values may be added singly or in groups. This class is not thread safe. 033 * 034 * @author Pete Gillin 035 * @author Kevin Bourrillion 036 * @since 20.0 037 */ 038@J2ktIncompatible 039@GwtIncompatible 040@ElementTypesAreNonnullByDefault 041public final class StatsAccumulator { 042 /** Creates a new accumulator. */ 043 public StatsAccumulator() {} 044 045 // These fields must satisfy the requirements of Stats' constructor as well as those of the stat 046 // methods of this class. 047 private long count = 0; 048 private double mean = 0.0; // any finite value will do, we only use it to multiply by zero for sum 049 private double sumOfSquaresOfDeltas = 0.0; 050 private double min = NaN; // any value will do 051 private double max = NaN; // any value will do 052 053 /** Adds the given value to the dataset. */ 054 public void add(double value) { 055 if (count == 0) { 056 count = 1; 057 mean = value; 058 min = value; 059 max = value; 060 if (!isFinite(value)) { 061 sumOfSquaresOfDeltas = NaN; 062 } 063 } else { 064 count++; 065 if (isFinite(value) && isFinite(mean)) { 066 // Art of Computer Programming vol. 2, Knuth, 4.2.2, (15) and (16) 067 double delta = value - mean; 068 mean += delta / count; 069 sumOfSquaresOfDeltas += delta * (value - mean); 070 } else { 071 mean = calculateNewMeanNonFinite(mean, value); 072 sumOfSquaresOfDeltas = NaN; 073 } 074 min = Math.min(min, value); 075 max = Math.max(max, value); 076 } 077 } 078 079 /** 080 * Adds the given values to the dataset. 081 * 082 * @param values a series of values, which will be converted to {@code double} values (this may 083 * cause loss of precision) 084 */ 085 public void addAll(Iterable<? extends Number> values) { 086 for (Number value : values) { 087 add(value.doubleValue()); 088 } 089 } 090 091 /** 092 * Adds the given values to the dataset. 093 * 094 * @param values a series of values, which will be converted to {@code double} values (this may 095 * cause loss of precision) 096 */ 097 public void addAll(Iterator<? extends Number> values) { 098 while (values.hasNext()) { 099 add(values.next().doubleValue()); 100 } 101 } 102 103 /** 104 * Adds the given values to the dataset. 105 * 106 * @param values a series of values 107 */ 108 public void addAll(double... values) { 109 for (double value : values) { 110 add(value); 111 } 112 } 113 114 /** 115 * Adds the given values to the dataset. 116 * 117 * @param values a series of values 118 */ 119 public void addAll(int... values) { 120 for (int value : values) { 121 add(value); 122 } 123 } 124 125 /** 126 * Adds the given values to the dataset. 127 * 128 * @param values a series of values, which will be converted to {@code double} values (this may 129 * cause loss of precision for longs of magnitude over 2^53 (slightly over 9e15)) 130 */ 131 public void addAll(long... values) { 132 for (long value : values) { 133 add(value); 134 } 135 } 136 137 /** 138 * Adds the given values to the dataset. The stream will be completely consumed by this method. 139 * 140 * @param values a series of values 141 * @since 33.4.0 (but since 28.2 in the JRE flavor) 142 */ 143 @SuppressWarnings("Java7ApiChecker") 144 @IgnoreJRERequirement // Users will use this only if they're already using streams. 145 public void addAll(DoubleStream values) { 146 addAll(values.collect(StatsAccumulator::new, StatsAccumulator::add, StatsAccumulator::addAll)); 147 } 148 149 /** 150 * Adds the given values to the dataset. The stream will be completely consumed by this method. 151 * 152 * @param values a series of values 153 * @since 33.4.0 (but since 28.2 in the JRE flavor) 154 */ 155 @SuppressWarnings("Java7ApiChecker") 156 @IgnoreJRERequirement // Users will use this only if they're already using streams. 157 public void addAll(IntStream values) { 158 addAll(values.collect(StatsAccumulator::new, StatsAccumulator::add, StatsAccumulator::addAll)); 159 } 160 161 /** 162 * Adds the given values to the dataset. The stream will be completely consumed by this method. 163 * 164 * @param values a series of values, which will be converted to {@code double} values (this may 165 * cause loss of precision for longs of magnitude over 2^53 (slightly over 9e15)) 166 * @since 33.4.0 (but since 28.2 in the JRE flavor) 167 */ 168 @SuppressWarnings("Java7ApiChecker") 169 @IgnoreJRERequirement // Users will use this only if they're already using streams. 170 public void addAll(LongStream values) { 171 addAll(values.collect(StatsAccumulator::new, StatsAccumulator::add, StatsAccumulator::addAll)); 172 } 173 174 /** 175 * Adds the given statistics to the dataset, as if the individual values used to compute the 176 * statistics had been added directly. 177 */ 178 public void addAll(Stats values) { 179 if (values.count() == 0) { 180 return; 181 } 182 merge(values.count(), values.mean(), values.sumOfSquaresOfDeltas(), values.min(), values.max()); 183 } 184 185 /** 186 * Adds the given statistics to the dataset, as if the individual values used to compute the 187 * statistics had been added directly. 188 * 189 * @since 28.2 190 */ 191 public void addAll(StatsAccumulator values) { 192 if (values.count() == 0) { 193 return; 194 } 195 merge(values.count(), values.mean(), values.sumOfSquaresOfDeltas(), values.min(), values.max()); 196 } 197 198 private void merge( 199 long otherCount, 200 double otherMean, 201 double otherSumOfSquaresOfDeltas, 202 double otherMin, 203 double otherMax) { 204 if (count == 0) { 205 count = otherCount; 206 mean = otherMean; 207 sumOfSquaresOfDeltas = otherSumOfSquaresOfDeltas; 208 min = otherMin; 209 max = otherMax; 210 } else { 211 count += otherCount; 212 if (isFinite(mean) && isFinite(otherMean)) { 213 // This is a generalized version of the calculation in add(double) above. 214 double delta = otherMean - mean; 215 mean += delta * otherCount / count; 216 sumOfSquaresOfDeltas += otherSumOfSquaresOfDeltas + delta * (otherMean - mean) * otherCount; 217 } else { 218 mean = calculateNewMeanNonFinite(mean, otherMean); 219 sumOfSquaresOfDeltas = NaN; 220 } 221 min = Math.min(min, otherMin); 222 max = Math.max(max, otherMax); 223 } 224 } 225 226 /** Returns an immutable snapshot of the current statistics. */ 227 public Stats snapshot() { 228 return new Stats(count, mean, sumOfSquaresOfDeltas, min, max); 229 } 230 231 /** Returns the number of values. */ 232 public long count() { 233 return count; 234 } 235 236 /** 237 * Returns the <a href="http://en.wikipedia.org/wiki/Arithmetic_mean">arithmetic mean</a> of the 238 * values. The count must be non-zero. 239 * 240 * <p>If these values are a sample drawn from a population, this is also an unbiased estimator of 241 * the arithmetic mean of the population. 242 * 243 * <h3>Non-finite values</h3> 244 * 245 * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it 246 * contains both {@link Double#POSITIVE_INFINITY} and {@link Double#NEGATIVE_INFINITY} then the 247 * result is {@link Double#NaN}. If it contains {@link Double#POSITIVE_INFINITY} and finite values 248 * only or {@link Double#POSITIVE_INFINITY} only, the result is {@link Double#POSITIVE_INFINITY}. 249 * If it contains {@link Double#NEGATIVE_INFINITY} and finite values only or {@link 250 * Double#NEGATIVE_INFINITY} only, the result is {@link Double#NEGATIVE_INFINITY}. 251 * 252 * @throws IllegalStateException if the dataset is empty 253 */ 254 public double mean() { 255 checkState(count != 0); 256 return mean; 257 } 258 259 /** 260 * Returns the sum of the values. 261 * 262 * <h3>Non-finite values</h3> 263 * 264 * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it 265 * contains both {@link Double#POSITIVE_INFINITY} and {@link Double#NEGATIVE_INFINITY} then the 266 * result is {@link Double#NaN}. If it contains {@link Double#POSITIVE_INFINITY} and finite values 267 * only or {@link Double#POSITIVE_INFINITY} only, the result is {@link Double#POSITIVE_INFINITY}. 268 * If it contains {@link Double#NEGATIVE_INFINITY} and finite values only or {@link 269 * Double#NEGATIVE_INFINITY} only, the result is {@link Double#NEGATIVE_INFINITY}. 270 */ 271 public final double sum() { 272 return mean * count; 273 } 274 275 /** 276 * Returns the <a href="http://en.wikipedia.org/wiki/Variance#Population_variance">population 277 * variance</a> of the values. The count must be non-zero. 278 * 279 * <p>This is guaranteed to return zero if the dataset contains only exactly one finite value. It 280 * is not guaranteed to return zero when the dataset consists of the same value multiple times, 281 * due to numerical errors. However, it is guaranteed never to return a negative result. 282 * 283 * <h3>Non-finite values</h3> 284 * 285 * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link 286 * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}. 287 * 288 * @throws IllegalStateException if the dataset is empty 289 */ 290 public final double populationVariance() { 291 checkState(count != 0); 292 if (isNaN(sumOfSquaresOfDeltas)) { 293 return NaN; 294 } 295 if (count == 1) { 296 return 0.0; 297 } 298 return ensureNonNegative(sumOfSquaresOfDeltas) / count; 299 } 300 301 /** 302 * Returns the <a 303 * href="http://en.wikipedia.org/wiki/Standard_deviation#Definition_of_population_values"> 304 * population standard deviation</a> of the values. The count must be non-zero. 305 * 306 * <p>This is guaranteed to return zero if the dataset contains only exactly one finite value. It 307 * is not guaranteed to return zero when the dataset consists of the same value multiple times, 308 * due to numerical errors. However, it is guaranteed never to return a negative result. 309 * 310 * <h3>Non-finite values</h3> 311 * 312 * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link 313 * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}. 314 * 315 * @throws IllegalStateException if the dataset is empty 316 */ 317 public final double populationStandardDeviation() { 318 return Math.sqrt(populationVariance()); 319 } 320 321 /** 322 * Returns the <a href="http://en.wikipedia.org/wiki/Variance#Sample_variance">unbiased sample 323 * variance</a> of the values. If this dataset is a sample drawn from a population, this is an 324 * unbiased estimator of the population variance of the population. The count must be greater than 325 * one. 326 * 327 * <p>This is not guaranteed to return zero when the dataset consists of the same value multiple 328 * times, due to numerical errors. However, it is guaranteed never to return a negative result. 329 * 330 * <h3>Non-finite values</h3> 331 * 332 * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link 333 * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}. 334 * 335 * @throws IllegalStateException if the dataset is empty or contains a single value 336 */ 337 public final double sampleVariance() { 338 checkState(count > 1); 339 if (isNaN(sumOfSquaresOfDeltas)) { 340 return NaN; 341 } 342 return ensureNonNegative(sumOfSquaresOfDeltas) / (count - 1); 343 } 344 345 /** 346 * Returns the <a 347 * href="http://en.wikipedia.org/wiki/Standard_deviation#Corrected_sample_standard_deviation"> 348 * corrected sample standard deviation</a> of the values. If this dataset is a sample drawn from a 349 * population, this is an estimator of the population standard deviation of the population which 350 * is less biased than {@link #populationStandardDeviation()} (the unbiased estimator depends on 351 * the distribution). The count must be greater than one. 352 * 353 * <p>This is not guaranteed to return zero when the dataset consists of the same value multiple 354 * times, due to numerical errors. However, it is guaranteed never to return a negative result. 355 * 356 * <h3>Non-finite values</h3> 357 * 358 * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link 359 * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}. 360 * 361 * @throws IllegalStateException if the dataset is empty or contains a single value 362 */ 363 public final double sampleStandardDeviation() { 364 return Math.sqrt(sampleVariance()); 365 } 366 367 /** 368 * Returns the lowest value in the dataset. The count must be non-zero. 369 * 370 * <h3>Non-finite values</h3> 371 * 372 * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it 373 * contains {@link Double#NEGATIVE_INFINITY} and not {@link Double#NaN} then the result is {@link 374 * Double#NEGATIVE_INFINITY}. If it contains {@link Double#POSITIVE_INFINITY} and finite values 375 * only then the result is the lowest finite value. If it contains {@link 376 * Double#POSITIVE_INFINITY} only then the result is {@link Double#POSITIVE_INFINITY}. 377 * 378 * @throws IllegalStateException if the dataset is empty 379 */ 380 public double min() { 381 checkState(count != 0); 382 return min; 383 } 384 385 /** 386 * Returns the highest value in the dataset. The count must be non-zero. 387 * 388 * <h3>Non-finite values</h3> 389 * 390 * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it 391 * contains {@link Double#POSITIVE_INFINITY} and not {@link Double#NaN} then the result is {@link 392 * Double#POSITIVE_INFINITY}. If it contains {@link Double#NEGATIVE_INFINITY} and finite values 393 * only then the result is the highest finite value. If it contains {@link 394 * Double#NEGATIVE_INFINITY} only then the result is {@link Double#NEGATIVE_INFINITY}. 395 * 396 * @throws IllegalStateException if the dataset is empty 397 */ 398 public double max() { 399 checkState(count != 0); 400 return max; 401 } 402 403 double sumOfSquaresOfDeltas() { 404 return sumOfSquaresOfDeltas; 405 } 406 407 /** 408 * Calculates the new value for the accumulated mean when a value is added, in the case where at 409 * least one of the previous mean and the value is non-finite. 410 */ 411 static double calculateNewMeanNonFinite(double previousMean, double value) { 412 /* 413 * Desired behaviour is to match the results of applying the naive mean formula. In particular, 414 * the update formula can subtract infinities in cases where the naive formula would add them. 415 * 416 * Consequently: 417 * 1. If the previous mean is finite and the new value is non-finite then the new mean is that 418 * value (whether it is NaN or infinity). 419 * 2. If the new value is finite and the previous mean is non-finite then the mean is unchanged 420 * (whether it is NaN or infinity). 421 * 3. If both the previous mean and the new value are non-finite and... 422 * 3a. ...either or both is NaN (so mean != value) then the new mean is NaN. 423 * 3b. ...they are both the same infinities (so mean == value) then the mean is unchanged. 424 * 3c. ...they are different infinities (so mean != value) then the new mean is NaN. 425 */ 426 if (isFinite(previousMean)) { 427 // This is case 1. 428 return value; 429 } else if (isFinite(value) || previousMean == value) { 430 // This is case 2. or 3b. 431 return previousMean; 432 } else { 433 // This is case 3a. or 3c. 434 return NaN; 435 } 436 } 437}