001/* 002 * Copyright (C) 2012 The Guava Authors 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 005 * in compliance with the License. You may obtain a copy of the License at 006 * 007 * http://www.apache.org/licenses/LICENSE-2.0 008 * 009 * Unless required by applicable law or agreed to in writing, software distributed under the License 010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 011 * or implied. See the License for the specific language governing permissions and limitations under 012 * the License. 013 */ 014 015package com.google.common.math; 016 017import static com.google.common.base.Preconditions.checkState; 018import static com.google.common.math.DoubleUtils.ensureNonNegative; 019import static com.google.common.primitives.Doubles.isFinite; 020import static java.lang.Double.NaN; 021import static java.lang.Double.isNaN; 022 023import com.google.common.annotations.GwtIncompatible; 024import com.google.common.annotations.J2ktIncompatible; 025import java.util.Iterator; 026import java.util.stream.DoubleStream; 027import java.util.stream.IntStream; 028import java.util.stream.LongStream; 029 030/** 031 * A mutable object which accumulates double values and tracks some basic statistics over all the 032 * values added so far. The values may be added singly or in groups. This class is not thread safe. 033 * 034 * @author Pete Gillin 035 * @author Kevin Bourrillion 036 * @since 20.0 037 */ 038@J2ktIncompatible 039@GwtIncompatible 040public final class StatsAccumulator { 041 /** Creates a new accumulator. */ 042 public StatsAccumulator() {} 043 044 // These fields must satisfy the requirements of Stats' constructor as well as those of the stat 045 // methods of this class. 046 private long count = 0; 047 private double mean = 0.0; // any finite value will do, we only use it to multiply by zero for sum 048 private double sumOfSquaresOfDeltas = 0.0; 049 private double min = NaN; // any value will do 050 private double max = NaN; // any value will do 051 052 /** Adds the given value to the dataset. */ 053 public void add(double value) { 054 if (count == 0) { 055 count = 1; 056 mean = value; 057 min = value; 058 max = value; 059 if (!isFinite(value)) { 060 sumOfSquaresOfDeltas = NaN; 061 } 062 } else { 063 count++; 064 if (isFinite(value) && isFinite(mean)) { 065 // Art of Computer Programming vol. 2, Knuth, 4.2.2, (15) and (16) 066 double delta = value - mean; 067 mean += delta / count; 068 sumOfSquaresOfDeltas += delta * (value - mean); 069 } else { 070 mean = calculateNewMeanNonFinite(mean, value); 071 sumOfSquaresOfDeltas = NaN; 072 } 073 min = Math.min(min, value); 074 max = Math.max(max, value); 075 } 076 } 077 078 /** 079 * Adds the given values to the dataset. 080 * 081 * @param values a series of values, which will be converted to {@code double} values (this may 082 * cause loss of precision) 083 */ 084 public void addAll(Iterable<? extends Number> values) { 085 for (Number value : values) { 086 add(value.doubleValue()); 087 } 088 } 089 090 /** 091 * Adds the given values to the dataset. 092 * 093 * @param values a series of values, which will be converted to {@code double} values (this may 094 * cause loss of precision) 095 */ 096 public void addAll(Iterator<? extends Number> values) { 097 while (values.hasNext()) { 098 add(values.next().doubleValue()); 099 } 100 } 101 102 /** 103 * Adds the given values to the dataset. 104 * 105 * @param values a series of values 106 */ 107 public void addAll(double... values) { 108 for (double value : values) { 109 add(value); 110 } 111 } 112 113 /** 114 * Adds the given values to the dataset. 115 * 116 * @param values a series of values 117 */ 118 public void addAll(int... values) { 119 for (int value : values) { 120 add(value); 121 } 122 } 123 124 /** 125 * Adds the given values to the dataset. 126 * 127 * @param values a series of values, which will be converted to {@code double} values (this may 128 * cause loss of precision for longs of magnitude over 2^53 (slightly over 9e15)) 129 */ 130 public void addAll(long... values) { 131 for (long value : values) { 132 add(value); 133 } 134 } 135 136 /** 137 * Adds the given values to the dataset. The stream will be completely consumed by this method. 138 * 139 * @param values a series of values 140 * @since 33.4.0 (but since 28.2 in the JRE flavor) 141 */ 142 @SuppressWarnings("Java7ApiChecker") 143 @IgnoreJRERequirement // Users will use this only if they're already using streams. 144 public void addAll(DoubleStream values) { 145 addAll(values.collect(StatsAccumulator::new, StatsAccumulator::add, StatsAccumulator::addAll)); 146 } 147 148 /** 149 * Adds the given values to the dataset. The stream will be completely consumed by this method. 150 * 151 * @param values a series of values 152 * @since 33.4.0 (but since 28.2 in the JRE flavor) 153 */ 154 @SuppressWarnings("Java7ApiChecker") 155 @IgnoreJRERequirement // Users will use this only if they're already using streams. 156 public void addAll(IntStream values) { 157 addAll(values.collect(StatsAccumulator::new, StatsAccumulator::add, StatsAccumulator::addAll)); 158 } 159 160 /** 161 * Adds the given values to the dataset. The stream will be completely consumed by this method. 162 * 163 * @param values a series of values, which will be converted to {@code double} values (this may 164 * cause loss of precision for longs of magnitude over 2^53 (slightly over 9e15)) 165 * @since 33.4.0 (but since 28.2 in the JRE flavor) 166 */ 167 @SuppressWarnings("Java7ApiChecker") 168 @IgnoreJRERequirement // Users will use this only if they're already using streams. 169 public void addAll(LongStream values) { 170 addAll(values.collect(StatsAccumulator::new, StatsAccumulator::add, StatsAccumulator::addAll)); 171 } 172 173 /** 174 * Adds the given statistics to the dataset, as if the individual values used to compute the 175 * statistics had been added directly. 176 */ 177 public void addAll(Stats values) { 178 if (values.count() == 0) { 179 return; 180 } 181 merge(values.count(), values.mean(), values.sumOfSquaresOfDeltas(), values.min(), values.max()); 182 } 183 184 /** 185 * Adds the given statistics to the dataset, as if the individual values used to compute the 186 * statistics had been added directly. 187 * 188 * @since 28.2 189 */ 190 public void addAll(StatsAccumulator values) { 191 if (values.count() == 0) { 192 return; 193 } 194 merge(values.count(), values.mean(), values.sumOfSquaresOfDeltas(), values.min(), values.max()); 195 } 196 197 private void merge( 198 long otherCount, 199 double otherMean, 200 double otherSumOfSquaresOfDeltas, 201 double otherMin, 202 double otherMax) { 203 if (count == 0) { 204 count = otherCount; 205 mean = otherMean; 206 sumOfSquaresOfDeltas = otherSumOfSquaresOfDeltas; 207 min = otherMin; 208 max = otherMax; 209 } else { 210 count += otherCount; 211 if (isFinite(mean) && isFinite(otherMean)) { 212 // This is a generalized version of the calculation in add(double) above. 213 double delta = otherMean - mean; 214 mean += delta * otherCount / count; 215 sumOfSquaresOfDeltas += otherSumOfSquaresOfDeltas + delta * (otherMean - mean) * otherCount; 216 } else { 217 mean = calculateNewMeanNonFinite(mean, otherMean); 218 sumOfSquaresOfDeltas = NaN; 219 } 220 min = Math.min(min, otherMin); 221 max = Math.max(max, otherMax); 222 } 223 } 224 225 /** Returns an immutable snapshot of the current statistics. */ 226 public Stats snapshot() { 227 return new Stats(count, mean, sumOfSquaresOfDeltas, min, max); 228 } 229 230 /** Returns the number of values. */ 231 public long count() { 232 return count; 233 } 234 235 /** 236 * Returns the <a href="http://en.wikipedia.org/wiki/Arithmetic_mean">arithmetic mean</a> of the 237 * values. The count must be non-zero. 238 * 239 * <p>If these values are a sample drawn from a population, this is also an unbiased estimator of 240 * the arithmetic mean of the population. 241 * 242 * <h3>Non-finite values</h3> 243 * 244 * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it 245 * contains both {@link Double#POSITIVE_INFINITY} and {@link Double#NEGATIVE_INFINITY} then the 246 * result is {@link Double#NaN}. If it contains {@link Double#POSITIVE_INFINITY} and finite values 247 * only or {@link Double#POSITIVE_INFINITY} only, the result is {@link Double#POSITIVE_INFINITY}. 248 * If it contains {@link Double#NEGATIVE_INFINITY} and finite values only or {@link 249 * Double#NEGATIVE_INFINITY} only, the result is {@link Double#NEGATIVE_INFINITY}. 250 * 251 * @throws IllegalStateException if the dataset is empty 252 */ 253 public double mean() { 254 checkState(count != 0); 255 return mean; 256 } 257 258 /** 259 * Returns the sum of the values. 260 * 261 * <h3>Non-finite values</h3> 262 * 263 * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it 264 * contains both {@link Double#POSITIVE_INFINITY} and {@link Double#NEGATIVE_INFINITY} then the 265 * result is {@link Double#NaN}. If it contains {@link Double#POSITIVE_INFINITY} and finite values 266 * only or {@link Double#POSITIVE_INFINITY} only, the result is {@link Double#POSITIVE_INFINITY}. 267 * If it contains {@link Double#NEGATIVE_INFINITY} and finite values only or {@link 268 * Double#NEGATIVE_INFINITY} only, the result is {@link Double#NEGATIVE_INFINITY}. 269 */ 270 public final double sum() { 271 return mean * count; 272 } 273 274 /** 275 * Returns the <a href="http://en.wikipedia.org/wiki/Variance#Population_variance">population 276 * variance</a> of the values. The count must be non-zero. 277 * 278 * <p>This is guaranteed to return zero if the dataset contains only exactly one finite value. It 279 * is not guaranteed to return zero when the dataset consists of the same value multiple times, 280 * due to numerical errors. However, it is guaranteed never to return a negative result. 281 * 282 * <h3>Non-finite values</h3> 283 * 284 * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link 285 * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}. 286 * 287 * @throws IllegalStateException if the dataset is empty 288 */ 289 public final double populationVariance() { 290 checkState(count != 0); 291 if (isNaN(sumOfSquaresOfDeltas)) { 292 return NaN; 293 } 294 if (count == 1) { 295 return 0.0; 296 } 297 return ensureNonNegative(sumOfSquaresOfDeltas) / count; 298 } 299 300 /** 301 * Returns the <a 302 * href="http://en.wikipedia.org/wiki/Standard_deviation#Definition_of_population_values"> 303 * population standard deviation</a> of the values. The count must be non-zero. 304 * 305 * <p>This is guaranteed to return zero if the dataset contains only exactly one finite value. It 306 * is not guaranteed to return zero when the dataset consists of the same value multiple times, 307 * due to numerical errors. However, it is guaranteed never to return a negative result. 308 * 309 * <h3>Non-finite values</h3> 310 * 311 * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link 312 * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}. 313 * 314 * @throws IllegalStateException if the dataset is empty 315 */ 316 public final double populationStandardDeviation() { 317 return Math.sqrt(populationVariance()); 318 } 319 320 /** 321 * Returns the <a href="http://en.wikipedia.org/wiki/Variance#Sample_variance">unbiased sample 322 * variance</a> of the values. If this dataset is a sample drawn from a population, this is an 323 * unbiased estimator of the population variance of the population. The count must be greater than 324 * one. 325 * 326 * <p>This is not guaranteed to return zero when the dataset consists of the same value multiple 327 * times, due to numerical errors. However, it is guaranteed never to return a negative result. 328 * 329 * <h3>Non-finite values</h3> 330 * 331 * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link 332 * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}. 333 * 334 * @throws IllegalStateException if the dataset is empty or contains a single value 335 */ 336 public final double sampleVariance() { 337 checkState(count > 1); 338 if (isNaN(sumOfSquaresOfDeltas)) { 339 return NaN; 340 } 341 return ensureNonNegative(sumOfSquaresOfDeltas) / (count - 1); 342 } 343 344 /** 345 * Returns the <a 346 * href="http://en.wikipedia.org/wiki/Standard_deviation#Corrected_sample_standard_deviation"> 347 * corrected sample standard deviation</a> of the values. If this dataset is a sample drawn from a 348 * population, this is an estimator of the population standard deviation of the population which 349 * is less biased than {@link #populationStandardDeviation()} (the unbiased estimator depends on 350 * the distribution). The count must be greater than one. 351 * 352 * <p>This is not guaranteed to return zero when the dataset consists of the same value multiple 353 * times, due to numerical errors. However, it is guaranteed never to return a negative result. 354 * 355 * <h3>Non-finite values</h3> 356 * 357 * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link 358 * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}. 359 * 360 * @throws IllegalStateException if the dataset is empty or contains a single value 361 */ 362 public final double sampleStandardDeviation() { 363 return Math.sqrt(sampleVariance()); 364 } 365 366 /** 367 * Returns the lowest value in the dataset. The count must be non-zero. 368 * 369 * <h3>Non-finite values</h3> 370 * 371 * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it 372 * contains {@link Double#NEGATIVE_INFINITY} and not {@link Double#NaN} then the result is {@link 373 * Double#NEGATIVE_INFINITY}. If it contains {@link Double#POSITIVE_INFINITY} and finite values 374 * only then the result is the lowest finite value. If it contains {@link 375 * Double#POSITIVE_INFINITY} only then the result is {@link Double#POSITIVE_INFINITY}. 376 * 377 * @throws IllegalStateException if the dataset is empty 378 */ 379 public double min() { 380 checkState(count != 0); 381 return min; 382 } 383 384 /** 385 * Returns the highest value in the dataset. The count must be non-zero. 386 * 387 * <h3>Non-finite values</h3> 388 * 389 * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it 390 * contains {@link Double#POSITIVE_INFINITY} and not {@link Double#NaN} then the result is {@link 391 * Double#POSITIVE_INFINITY}. If it contains {@link Double#NEGATIVE_INFINITY} and finite values 392 * only then the result is the highest finite value. If it contains {@link 393 * Double#NEGATIVE_INFINITY} only then the result is {@link Double#NEGATIVE_INFINITY}. 394 * 395 * @throws IllegalStateException if the dataset is empty 396 */ 397 public double max() { 398 checkState(count != 0); 399 return max; 400 } 401 402 double sumOfSquaresOfDeltas() { 403 return sumOfSquaresOfDeltas; 404 } 405 406 /** 407 * Calculates the new value for the accumulated mean when a value is added, in the case where at 408 * least one of the previous mean and the value is non-finite. 409 */ 410 static double calculateNewMeanNonFinite(double previousMean, double value) { 411 /* 412 * Desired behaviour is to match the results of applying the naive mean formula. In particular, 413 * the update formula can subtract infinities in cases where the naive formula would add them. 414 * 415 * Consequently: 416 * 1. If the previous mean is finite and the new value is non-finite then the new mean is that 417 * value (whether it is NaN or infinity). 418 * 2. If the new value is finite and the previous mean is non-finite then the mean is unchanged 419 * (whether it is NaN or infinity). 420 * 3. If both the previous mean and the new value are non-finite and... 421 * 3a. ...either or both is NaN (so mean != value) then the new mean is NaN. 422 * 3b. ...they are both the same infinities (so mean == value) then the mean is unchanged. 423 * 3c. ...they are different infinities (so mean != value) then the new mean is NaN. 424 */ 425 if (isFinite(previousMean)) { 426 // This is case 1. 427 return value; 428 } else if (isFinite(value) || previousMean == value) { 429 // This is case 2. or 3b. 430 return previousMean; 431 } else { 432 // This is case 3a. or 3c. 433 return NaN; 434 } 435 } 436}