001/* 002 * Copyright (C) 2012 The Guava Authors 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 005 * in compliance with the License. You may obtain a copy of the License at 006 * 007 * http://www.apache.org/licenses/LICENSE-2.0 008 * 009 * Unless required by applicable law or agreed to in writing, software distributed under the License 010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 011 * or implied. See the License for the specific language governing permissions and limitations under 012 * the License. 013 */ 014 015package com.google.common.math; 016 017import static com.google.common.base.Preconditions.checkState; 018import static com.google.common.math.DoubleUtils.ensureNonNegative; 019import static com.google.common.primitives.Doubles.isFinite; 020import static java.lang.Double.NaN; 021import static java.lang.Double.isNaN; 022 023import com.google.common.annotations.Beta; 024import com.google.common.annotations.GwtIncompatible; 025import java.util.Iterator; 026import java.util.stream.DoubleStream; 027import java.util.stream.IntStream; 028import java.util.stream.LongStream; 029 030/** 031 * A mutable object which accumulates double values and tracks some basic statistics over all the 032 * values added so far. The values may be added singly or in groups. This class is not thread safe. 033 * 034 * @author Pete Gillin 035 * @author Kevin Bourrillion 036 * @since 20.0 037 */ 038@Beta 039@GwtIncompatible 040public final class StatsAccumulator { 041 042 // These fields must satisfy the requirements of Stats' constructor as well as those of the stat 043 // methods of this class. 044 private long count = 0; 045 private double mean = 0.0; // any finite value will do, we only use it to multiply by zero for sum 046 private double sumOfSquaresOfDeltas = 0.0; 047 private double min = NaN; // any value will do 048 private double max = NaN; // any value will do 049 050 /** Adds the given value to the dataset. */ 051 public void add(double value) { 052 if (count == 0) { 053 count = 1; 054 mean = value; 055 min = value; 056 max = value; 057 if (!isFinite(value)) { 058 sumOfSquaresOfDeltas = NaN; 059 } 060 } else { 061 count++; 062 if (isFinite(value) && isFinite(mean)) { 063 // Art of Computer Programming vol. 2, Knuth, 4.2.2, (15) and (16) 064 double delta = value - mean; 065 mean += delta / count; 066 sumOfSquaresOfDeltas += delta * (value - mean); 067 } else { 068 mean = calculateNewMeanNonFinite(mean, value); 069 sumOfSquaresOfDeltas = NaN; 070 } 071 min = Math.min(min, value); 072 max = Math.max(max, value); 073 } 074 } 075 076 /** 077 * Adds the given values to the dataset. 078 * 079 * @param values a series of values, which will be converted to {@code double} values (this may 080 * cause loss of precision) 081 */ 082 public void addAll(Iterable<? extends Number> values) { 083 for (Number value : values) { 084 add(value.doubleValue()); 085 } 086 } 087 088 /** 089 * Adds the given values to the dataset. 090 * 091 * @param values a series of values, which will be converted to {@code double} values (this may 092 * cause loss of precision) 093 */ 094 public void addAll(Iterator<? extends Number> values) { 095 while (values.hasNext()) { 096 add(values.next().doubleValue()); 097 } 098 } 099 100 /** 101 * Adds the given values to the dataset. 102 * 103 * @param values a series of values 104 */ 105 public void addAll(double... values) { 106 for (double value : values) { 107 add(value); 108 } 109 } 110 111 /** 112 * Adds the given values to the dataset. 113 * 114 * @param values a series of values 115 */ 116 public void addAll(int... values) { 117 for (int value : values) { 118 add(value); 119 } 120 } 121 122 /** 123 * Adds the given values to the dataset. 124 * 125 * @param values a series of values, which will be converted to {@code double} values (this may 126 * cause loss of precision for longs of magnitude over 2^53 (slightly over 9e15)) 127 */ 128 public void addAll(long... values) { 129 for (long value : values) { 130 add(value); 131 } 132 } 133 134 /** 135 * Adds the given values to the dataset. The stream will be completely consumed by this method. 136 * 137 * @param values a series of values 138 * @since 28.2 139 */ 140 public void addAll(DoubleStream values) { 141 addAll(values.collect(StatsAccumulator::new, StatsAccumulator::add, StatsAccumulator::addAll)); 142 } 143 144 /** 145 * Adds the given values to the dataset. The stream will be completely consumed by this method. 146 * 147 * @param values a series of values 148 * @since 28.2 149 */ 150 public void addAll(IntStream values) { 151 addAll(values.collect(StatsAccumulator::new, StatsAccumulator::add, StatsAccumulator::addAll)); 152 } 153 154 /** 155 * Adds the given values to the dataset. The stream will be completely consumed by this method. 156 * 157 * @param values a series of values, which will be converted to {@code double} values (this may 158 * cause loss of precision for longs of magnitude over 2^53 (slightly over 9e15)) 159 * @since 28.2 160 */ 161 public void addAll(LongStream values) { 162 addAll(values.collect(StatsAccumulator::new, StatsAccumulator::add, StatsAccumulator::addAll)); 163 } 164 165 /** 166 * Adds the given statistics to the dataset, as if the individual values used to compute the 167 * statistics had been added directly. 168 */ 169 public void addAll(Stats values) { 170 if (values.count() == 0) { 171 return; 172 } 173 merge(values.count(), values.mean(), values.sumOfSquaresOfDeltas(), values.min(), values.max()); 174 } 175 176 /** 177 * Adds the given statistics to the dataset, as if the individual values used to compute the 178 * statistics had been added directly. 179 * 180 * @since 28.2 181 */ 182 public void addAll(StatsAccumulator values) { 183 if (values.count() == 0) { 184 return; 185 } 186 merge(values.count(), values.mean(), values.sumOfSquaresOfDeltas(), values.min(), values.max()); 187 } 188 189 private void merge( 190 long otherCount, 191 double otherMean, 192 double otherSumOfSquaresOfDeltas, 193 double otherMin, 194 double otherMax) { 195 if (count == 0) { 196 count = otherCount; 197 mean = otherMean; 198 sumOfSquaresOfDeltas = otherSumOfSquaresOfDeltas; 199 min = otherMin; 200 max = otherMax; 201 } else { 202 count += otherCount; 203 if (isFinite(mean) && isFinite(otherMean)) { 204 // This is a generalized version of the calculation in add(double) above. 205 double delta = otherMean - mean; 206 mean += delta * otherCount / count; 207 sumOfSquaresOfDeltas += otherSumOfSquaresOfDeltas + delta * (otherMean - mean) * otherCount; 208 } else { 209 mean = calculateNewMeanNonFinite(mean, otherMean); 210 sumOfSquaresOfDeltas = NaN; 211 } 212 min = Math.min(min, otherMin); 213 max = Math.max(max, otherMax); 214 } 215 } 216 217 /** Returns an immutable snapshot of the current statistics. */ 218 public Stats snapshot() { 219 return new Stats(count, mean, sumOfSquaresOfDeltas, min, max); 220 } 221 222 /** Returns the number of values. */ 223 public long count() { 224 return count; 225 } 226 227 /** 228 * Returns the <a href="http://en.wikipedia.org/wiki/Arithmetic_mean">arithmetic mean</a> of the 229 * values. The count must be non-zero. 230 * 231 * <p>If these values are a sample drawn from a population, this is also an unbiased estimator of 232 * the arithmetic mean of the population. 233 * 234 * <h3>Non-finite values</h3> 235 * 236 * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it 237 * contains both {@link Double#POSITIVE_INFINITY} and {@link Double#NEGATIVE_INFINITY} then the 238 * result is {@link Double#NaN}. If it contains {@link Double#POSITIVE_INFINITY} and finite values 239 * only or {@link Double#POSITIVE_INFINITY} only, the result is {@link Double#POSITIVE_INFINITY}. 240 * If it contains {@link Double#NEGATIVE_INFINITY} and finite values only or {@link 241 * Double#NEGATIVE_INFINITY} only, the result is {@link Double#NEGATIVE_INFINITY}. 242 * 243 * @throws IllegalStateException if the dataset is empty 244 */ 245 public double mean() { 246 checkState(count != 0); 247 return mean; 248 } 249 250 /** 251 * Returns the sum of the values. 252 * 253 * <h3>Non-finite values</h3> 254 * 255 * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it 256 * contains both {@link Double#POSITIVE_INFINITY} and {@link Double#NEGATIVE_INFINITY} then the 257 * result is {@link Double#NaN}. If it contains {@link Double#POSITIVE_INFINITY} and finite values 258 * only or {@link Double#POSITIVE_INFINITY} only, the result is {@link Double#POSITIVE_INFINITY}. 259 * If it contains {@link Double#NEGATIVE_INFINITY} and finite values only or {@link 260 * Double#NEGATIVE_INFINITY} only, the result is {@link Double#NEGATIVE_INFINITY}. 261 */ 262 public final double sum() { 263 return mean * count; 264 } 265 266 /** 267 * Returns the <a href="http://en.wikipedia.org/wiki/Variance#Population_variance">population 268 * variance</a> of the values. The count must be non-zero. 269 * 270 * <p>This is guaranteed to return zero if the dataset contains only exactly one finite value. It 271 * is not guaranteed to return zero when the dataset consists of the same value multiple times, 272 * due to numerical errors. However, it is guaranteed never to return a negative result. 273 * 274 * <h3>Non-finite values</h3> 275 * 276 * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link 277 * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}. 278 * 279 * @throws IllegalStateException if the dataset is empty 280 */ 281 public final double populationVariance() { 282 checkState(count != 0); 283 if (isNaN(sumOfSquaresOfDeltas)) { 284 return NaN; 285 } 286 if (count == 1) { 287 return 0.0; 288 } 289 return ensureNonNegative(sumOfSquaresOfDeltas) / count; 290 } 291 292 /** 293 * Returns the <a 294 * href="http://en.wikipedia.org/wiki/Standard_deviation#Definition_of_population_values"> 295 * population standard deviation</a> of the values. The count must be non-zero. 296 * 297 * <p>This is guaranteed to return zero if the dataset contains only exactly one finite value. It 298 * is not guaranteed to return zero when the dataset consists of the same value multiple times, 299 * due to numerical errors. However, it is guaranteed never to return a negative result. 300 * 301 * <h3>Non-finite values</h3> 302 * 303 * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link 304 * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}. 305 * 306 * @throws IllegalStateException if the dataset is empty 307 */ 308 public final double populationStandardDeviation() { 309 return Math.sqrt(populationVariance()); 310 } 311 312 /** 313 * Returns the <a href="http://en.wikipedia.org/wiki/Variance#Sample_variance">unbiased sample 314 * variance</a> of the values. If this dataset is a sample drawn from a population, this is an 315 * unbiased estimator of the population variance of the population. The count must be greater than 316 * one. 317 * 318 * <p>This is not guaranteed to return zero when the dataset consists of the same value multiple 319 * times, due to numerical errors. However, it is guaranteed never to return a negative result. 320 * 321 * <h3>Non-finite values</h3> 322 * 323 * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link 324 * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}. 325 * 326 * @throws IllegalStateException if the dataset is empty or contains a single value 327 */ 328 public final double sampleVariance() { 329 checkState(count > 1); 330 if (isNaN(sumOfSquaresOfDeltas)) { 331 return NaN; 332 } 333 return ensureNonNegative(sumOfSquaresOfDeltas) / (count - 1); 334 } 335 336 /** 337 * Returns the <a 338 * href="http://en.wikipedia.org/wiki/Standard_deviation#Corrected_sample_standard_deviation"> 339 * corrected sample standard deviation</a> of the values. If this dataset is a sample drawn from a 340 * population, this is an estimator of the population standard deviation of the population which 341 * is less biased than {@link #populationStandardDeviation()} (the unbiased estimator depends on 342 * the distribution). The count must be greater than one. 343 * 344 * <p>This is not guaranteed to return zero when the dataset consists of the same value multiple 345 * times, due to numerical errors. However, it is guaranteed never to return a negative result. 346 * 347 * <h3>Non-finite values</h3> 348 * 349 * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link 350 * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}. 351 * 352 * @throws IllegalStateException if the dataset is empty or contains a single value 353 */ 354 public final double sampleStandardDeviation() { 355 return Math.sqrt(sampleVariance()); 356 } 357 358 /** 359 * Returns the lowest value in the dataset. The count must be non-zero. 360 * 361 * <h3>Non-finite values</h3> 362 * 363 * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it 364 * contains {@link Double#NEGATIVE_INFINITY} and not {@link Double#NaN} then the result is {@link 365 * Double#NEGATIVE_INFINITY}. If it contains {@link Double#POSITIVE_INFINITY} and finite values 366 * only then the result is the lowest finite value. If it contains {@link 367 * Double#POSITIVE_INFINITY} only then the result is {@link Double#POSITIVE_INFINITY}. 368 * 369 * @throws IllegalStateException if the dataset is empty 370 */ 371 public double min() { 372 checkState(count != 0); 373 return min; 374 } 375 376 /** 377 * Returns the highest value in the dataset. The count must be non-zero. 378 * 379 * <h3>Non-finite values</h3> 380 * 381 * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it 382 * contains {@link Double#POSITIVE_INFINITY} and not {@link Double#NaN} then the result is {@link 383 * Double#POSITIVE_INFINITY}. If it contains {@link Double#NEGATIVE_INFINITY} and finite values 384 * only then the result is the highest finite value. If it contains {@link 385 * Double#NEGATIVE_INFINITY} only then the result is {@link Double#NEGATIVE_INFINITY}. 386 * 387 * @throws IllegalStateException if the dataset is empty 388 */ 389 public double max() { 390 checkState(count != 0); 391 return max; 392 } 393 394 double sumOfSquaresOfDeltas() { 395 return sumOfSquaresOfDeltas; 396 } 397 398 /** 399 * Calculates the new value for the accumulated mean when a value is added, in the case where at 400 * least one of the previous mean and the value is non-finite. 401 */ 402 static double calculateNewMeanNonFinite(double previousMean, double value) { 403 /* 404 * Desired behaviour is to match the results of applying the naive mean formula. In particular, 405 * the update formula can subtract infinities in cases where the naive formula would add them. 406 * 407 * Consequently: 408 * 1. If the previous mean is finite and the new value is non-finite then the new mean is that 409 * value (whether it is NaN or infinity). 410 * 2. If the new value is finite and the previous mean is non-finite then the mean is unchanged 411 * (whether it is NaN or infinity). 412 * 3. If both the previous mean and the new value are non-finite and... 413 * 3a. ...either or both is NaN (so mean != value) then the new mean is NaN. 414 * 3b. ...they are both the same infinities (so mean == value) then the mean is unchanged. 415 * 3c. ...they are different infinities (so mean != value) then the new mean is NaN. 416 */ 417 if (isFinite(previousMean)) { 418 // This is case 1. 419 return value; 420 } else if (isFinite(value) || previousMean == value) { 421 // This is case 2. or 3b. 422 return previousMean; 423 } else { 424 // This is case 3a. or 3c. 425 return NaN; 426 } 427 } 428}