001/* 002 * Copyright (C) 2012 The Guava Authors 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 005 * in compliance with the License. You may obtain a copy of the License at 006 * 007 * http://www.apache.org/licenses/LICENSE-2.0 008 * 009 * Unless required by applicable law or agreed to in writing, software distributed under the License 010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 011 * or implied. See the License for the specific language governing permissions and limitations under 012 * the License. 013 */ 014 015package com.google.common.math; 016 017import static com.google.common.base.Preconditions.checkState; 018import static com.google.common.math.DoubleUtils.ensureNonNegative; 019import static com.google.common.primitives.Doubles.isFinite; 020import static java.lang.Double.NaN; 021import static java.lang.Double.isNaN; 022 023import com.google.common.annotations.GwtIncompatible; 024import com.google.common.annotations.J2ktIncompatible; 025import java.util.Iterator; 026import java.util.stream.DoubleStream; 027import java.util.stream.IntStream; 028import java.util.stream.LongStream; 029 030/** 031 * A mutable object which accumulates double values and tracks some basic statistics over all the 032 * values added so far. The values may be added singly or in groups. This class is not thread safe. 033 * 034 * @author Pete Gillin 035 * @author Kevin Bourrillion 036 * @since 20.0 037 */ 038@J2ktIncompatible 039@GwtIncompatible 040public final class StatsAccumulator { 041 /** Creates a new accumulator. */ 042 public StatsAccumulator() {} 043 044 // These fields must satisfy the requirements of Stats' constructor as well as those of the stat 045 // methods of this class. 046 private long count = 0; 047 private double mean = 0.0; // any finite value will do, we only use it to multiply by zero for sum 048 private double sumOfSquaresOfDeltas = 0.0; 049 private double min = NaN; // any value will do 050 private double max = NaN; // any value will do 051 052 /** Adds the given value to the dataset. */ 053 public void add(double value) { 054 if (count == 0) { 055 count = 1; 056 mean = value; 057 min = value; 058 max = value; 059 if (!isFinite(value)) { 060 sumOfSquaresOfDeltas = NaN; 061 } 062 } else { 063 count++; 064 if (isFinite(value) && isFinite(mean)) { 065 // Art of Computer Programming vol. 2, Knuth, 4.2.2, (15) and (16) 066 double delta = value - mean; 067 mean += delta / count; 068 sumOfSquaresOfDeltas += delta * (value - mean); 069 } else { 070 mean = calculateNewMeanNonFinite(mean, value); 071 sumOfSquaresOfDeltas = NaN; 072 } 073 min = Math.min(min, value); 074 max = Math.max(max, value); 075 } 076 } 077 078 /** 079 * Adds the given values to the dataset. 080 * 081 * @param values a series of values, which will be converted to {@code double} values (this may 082 * cause loss of precision) 083 */ 084 public void addAll(Iterable<? extends Number> values) { 085 for (Number value : values) { 086 add(value.doubleValue()); 087 } 088 } 089 090 /** 091 * Adds the given values to the dataset. 092 * 093 * @param values a series of values, which will be converted to {@code double} values (this may 094 * cause loss of precision) 095 */ 096 public void addAll(Iterator<? extends Number> values) { 097 while (values.hasNext()) { 098 add(values.next().doubleValue()); 099 } 100 } 101 102 /** 103 * Adds the given values to the dataset. 104 * 105 * @param values a series of values 106 */ 107 public void addAll(double... values) { 108 for (double value : values) { 109 add(value); 110 } 111 } 112 113 /** 114 * Adds the given values to the dataset. 115 * 116 * @param values a series of values 117 */ 118 public void addAll(int... values) { 119 for (int value : values) { 120 add(value); 121 } 122 } 123 124 /** 125 * Adds the given values to the dataset. 126 * 127 * @param values a series of values, which will be converted to {@code double} values (this may 128 * cause loss of precision for longs of magnitude over 2^53 (slightly over 9e15)) 129 */ 130 public void addAll(long... values) { 131 for (long value : values) { 132 add(value); 133 } 134 } 135 136 /** 137 * Adds the given values to the dataset. The stream will be completely consumed by this method. 138 * 139 * @param values a series of values 140 * @since 28.2 (but only since 33.4.0 in the Android flavor) 141 */ 142 public void addAll(DoubleStream values) { 143 addAll(values.collect(StatsAccumulator::new, StatsAccumulator::add, StatsAccumulator::addAll)); 144 } 145 146 /** 147 * Adds the given values to the dataset. The stream will be completely consumed by this method. 148 * 149 * @param values a series of values 150 * @since 28.2 (but only since 33.4.0 in the Android flavor) 151 */ 152 public void addAll(IntStream values) { 153 addAll(values.collect(StatsAccumulator::new, StatsAccumulator::add, StatsAccumulator::addAll)); 154 } 155 156 /** 157 * Adds the given values to the dataset. The stream will be completely consumed by this method. 158 * 159 * @param values a series of values, which will be converted to {@code double} values (this may 160 * cause loss of precision for longs of magnitude over 2^53 (slightly over 9e15)) 161 * @since 28.2 (but only since 33.4.0 in the Android flavor) 162 */ 163 public void addAll(LongStream values) { 164 addAll(values.collect(StatsAccumulator::new, StatsAccumulator::add, StatsAccumulator::addAll)); 165 } 166 167 /** 168 * Adds the given statistics to the dataset, as if the individual values used to compute the 169 * statistics had been added directly. 170 */ 171 public void addAll(Stats values) { 172 if (values.count() == 0) { 173 return; 174 } 175 merge(values.count(), values.mean(), values.sumOfSquaresOfDeltas(), values.min(), values.max()); 176 } 177 178 /** 179 * Adds the given statistics to the dataset, as if the individual values used to compute the 180 * statistics had been added directly. 181 * 182 * @since 28.2 183 */ 184 public void addAll(StatsAccumulator values) { 185 if (values.count() == 0) { 186 return; 187 } 188 merge(values.count(), values.mean(), values.sumOfSquaresOfDeltas(), values.min(), values.max()); 189 } 190 191 private void merge( 192 long otherCount, 193 double otherMean, 194 double otherSumOfSquaresOfDeltas, 195 double otherMin, 196 double otherMax) { 197 if (count == 0) { 198 count = otherCount; 199 mean = otherMean; 200 sumOfSquaresOfDeltas = otherSumOfSquaresOfDeltas; 201 min = otherMin; 202 max = otherMax; 203 } else { 204 count += otherCount; 205 if (isFinite(mean) && isFinite(otherMean)) { 206 // This is a generalized version of the calculation in add(double) above. 207 double delta = otherMean - mean; 208 mean += delta * otherCount / count; 209 sumOfSquaresOfDeltas += otherSumOfSquaresOfDeltas + delta * (otherMean - mean) * otherCount; 210 } else { 211 mean = calculateNewMeanNonFinite(mean, otherMean); 212 sumOfSquaresOfDeltas = NaN; 213 } 214 min = Math.min(min, otherMin); 215 max = Math.max(max, otherMax); 216 } 217 } 218 219 /** Returns an immutable snapshot of the current statistics. */ 220 public Stats snapshot() { 221 return new Stats(count, mean, sumOfSquaresOfDeltas, min, max); 222 } 223 224 /** Returns the number of values. */ 225 public long count() { 226 return count; 227 } 228 229 /** 230 * Returns the <a href="http://en.wikipedia.org/wiki/Arithmetic_mean">arithmetic mean</a> of the 231 * values. The count must be non-zero. 232 * 233 * <p>If these values are a sample drawn from a population, this is also an unbiased estimator of 234 * the arithmetic mean of the population. 235 * 236 * <h3>Non-finite values</h3> 237 * 238 * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it 239 * contains both {@link Double#POSITIVE_INFINITY} and {@link Double#NEGATIVE_INFINITY} then the 240 * result is {@link Double#NaN}. If it contains {@link Double#POSITIVE_INFINITY} and finite values 241 * only or {@link Double#POSITIVE_INFINITY} only, the result is {@link Double#POSITIVE_INFINITY}. 242 * If it contains {@link Double#NEGATIVE_INFINITY} and finite values only or {@link 243 * Double#NEGATIVE_INFINITY} only, the result is {@link Double#NEGATIVE_INFINITY}. 244 * 245 * @throws IllegalStateException if the dataset is empty 246 */ 247 public double mean() { 248 checkState(count != 0); 249 return mean; 250 } 251 252 /** 253 * Returns the sum of the values. 254 * 255 * <h3>Non-finite values</h3> 256 * 257 * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it 258 * contains both {@link Double#POSITIVE_INFINITY} and {@link Double#NEGATIVE_INFINITY} then the 259 * result is {@link Double#NaN}. If it contains {@link Double#POSITIVE_INFINITY} and finite values 260 * only or {@link Double#POSITIVE_INFINITY} only, the result is {@link Double#POSITIVE_INFINITY}. 261 * If it contains {@link Double#NEGATIVE_INFINITY} and finite values only or {@link 262 * Double#NEGATIVE_INFINITY} only, the result is {@link Double#NEGATIVE_INFINITY}. 263 */ 264 public final double sum() { 265 return mean * count; 266 } 267 268 /** 269 * Returns the <a href="http://en.wikipedia.org/wiki/Variance#Population_variance">population 270 * variance</a> of the values. The count must be non-zero. 271 * 272 * <p>This is guaranteed to return zero if the dataset contains only exactly one finite value. It 273 * is not guaranteed to return zero when the dataset consists of the same value multiple times, 274 * due to numerical errors. However, it is guaranteed never to return a negative result. 275 * 276 * <h3>Non-finite values</h3> 277 * 278 * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link 279 * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}. 280 * 281 * @throws IllegalStateException if the dataset is empty 282 */ 283 public final double populationVariance() { 284 checkState(count != 0); 285 if (isNaN(sumOfSquaresOfDeltas)) { 286 return NaN; 287 } 288 if (count == 1) { 289 return 0.0; 290 } 291 return ensureNonNegative(sumOfSquaresOfDeltas) / count; 292 } 293 294 /** 295 * Returns the <a 296 * href="http://en.wikipedia.org/wiki/Standard_deviation#Definition_of_population_values"> 297 * population standard deviation</a> of the values. The count must be non-zero. 298 * 299 * <p>This is guaranteed to return zero if the dataset contains only exactly one finite value. It 300 * is not guaranteed to return zero when the dataset consists of the same value multiple times, 301 * due to numerical errors. However, it is guaranteed never to return a negative result. 302 * 303 * <h3>Non-finite values</h3> 304 * 305 * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link 306 * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}. 307 * 308 * @throws IllegalStateException if the dataset is empty 309 */ 310 public final double populationStandardDeviation() { 311 return Math.sqrt(populationVariance()); 312 } 313 314 /** 315 * Returns the <a href="http://en.wikipedia.org/wiki/Variance#Sample_variance">unbiased sample 316 * variance</a> of the values. If this dataset is a sample drawn from a population, this is an 317 * unbiased estimator of the population variance of the population. The count must be greater than 318 * one. 319 * 320 * <p>This is not guaranteed to return zero when the dataset consists of the same value multiple 321 * times, due to numerical errors. However, it is guaranteed never to return a negative result. 322 * 323 * <h3>Non-finite values</h3> 324 * 325 * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link 326 * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}. 327 * 328 * @throws IllegalStateException if the dataset is empty or contains a single value 329 */ 330 public final double sampleVariance() { 331 checkState(count > 1); 332 if (isNaN(sumOfSquaresOfDeltas)) { 333 return NaN; 334 } 335 return ensureNonNegative(sumOfSquaresOfDeltas) / (count - 1); 336 } 337 338 /** 339 * Returns the <a 340 * href="http://en.wikipedia.org/wiki/Standard_deviation#Corrected_sample_standard_deviation"> 341 * corrected sample standard deviation</a> of the values. If this dataset is a sample drawn from a 342 * population, this is an estimator of the population standard deviation of the population which 343 * is less biased than {@link #populationStandardDeviation()} (the unbiased estimator depends on 344 * the distribution). The count must be greater than one. 345 * 346 * <p>This is not guaranteed to return zero when the dataset consists of the same value multiple 347 * times, due to numerical errors. However, it is guaranteed never to return a negative result. 348 * 349 * <h3>Non-finite values</h3> 350 * 351 * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link 352 * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}. 353 * 354 * @throws IllegalStateException if the dataset is empty or contains a single value 355 */ 356 public final double sampleStandardDeviation() { 357 return Math.sqrt(sampleVariance()); 358 } 359 360 /** 361 * Returns the lowest value in the dataset. The count must be non-zero. 362 * 363 * <h3>Non-finite values</h3> 364 * 365 * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it 366 * contains {@link Double#NEGATIVE_INFINITY} and not {@link Double#NaN} then the result is {@link 367 * Double#NEGATIVE_INFINITY}. If it contains {@link Double#POSITIVE_INFINITY} and finite values 368 * only then the result is the lowest finite value. If it contains {@link 369 * Double#POSITIVE_INFINITY} only then the result is {@link Double#POSITIVE_INFINITY}. 370 * 371 * @throws IllegalStateException if the dataset is empty 372 */ 373 public double min() { 374 checkState(count != 0); 375 return min; 376 } 377 378 /** 379 * Returns the highest value in the dataset. The count must be non-zero. 380 * 381 * <h3>Non-finite values</h3> 382 * 383 * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it 384 * contains {@link Double#POSITIVE_INFINITY} and not {@link Double#NaN} then the result is {@link 385 * Double#POSITIVE_INFINITY}. If it contains {@link Double#NEGATIVE_INFINITY} and finite values 386 * only then the result is the highest finite value. If it contains {@link 387 * Double#NEGATIVE_INFINITY} only then the result is {@link Double#NEGATIVE_INFINITY}. 388 * 389 * @throws IllegalStateException if the dataset is empty 390 */ 391 public double max() { 392 checkState(count != 0); 393 return max; 394 } 395 396 double sumOfSquaresOfDeltas() { 397 return sumOfSquaresOfDeltas; 398 } 399 400 /** 401 * Calculates the new value for the accumulated mean when a value is added, in the case where at 402 * least one of the previous mean and the value is non-finite. 403 */ 404 static double calculateNewMeanNonFinite(double previousMean, double value) { 405 /* 406 * Desired behaviour is to match the results of applying the naive mean formula. In particular, 407 * the update formula can subtract infinities in cases where the naive formula would add them. 408 * 409 * Consequently: 410 * 1. If the previous mean is finite and the new value is non-finite then the new mean is that 411 * value (whether it is NaN or infinity). 412 * 2. If the new value is finite and the previous mean is non-finite then the mean is unchanged 413 * (whether it is NaN or infinity). 414 * 3. If both the previous mean and the new value are non-finite and... 415 * 3a. ...either or both is NaN (so mean != value) then the new mean is NaN. 416 * 3b. ...they are both the same infinities (so mean == value) then the mean is unchanged. 417 * 3c. ...they are different infinities (so mean != value) then the new mean is NaN. 418 */ 419 if (isFinite(previousMean)) { 420 // This is case 1. 421 return value; 422 } else if (isFinite(value) || previousMean == value) { 423 // This is case 2. or 3b. 424 return previousMean; 425 } else { 426 // This is case 3a. or 3c. 427 return NaN; 428 } 429 } 430}