001/* 002 * Copyright (C) 2012 The Guava Authors 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 005 * in compliance with the License. You may obtain a copy of the License at 006 * 007 * http://www.apache.org/licenses/LICENSE-2.0 008 * 009 * Unless required by applicable law or agreed to in writing, software distributed under the License 010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 011 * or implied. See the License for the specific language governing permissions and limitations under 012 * the License. 013 */ 014 015package com.google.common.math; 016 017import static com.google.common.base.Preconditions.checkState; 018import static com.google.common.math.DoubleUtils.ensureNonNegative; 019import static com.google.common.primitives.Doubles.isFinite; 020import static java.lang.Double.NaN; 021import static java.lang.Double.isNaN; 022 023import com.google.common.annotations.Beta; 024import com.google.common.annotations.GwtIncompatible; 025import java.util.Iterator; 026 027/** 028 * A mutable object which accumulates double values and tracks some basic statistics over all the 029 * values added so far. The values may be added singly or in groups. This class is not thread safe. 030 * 031 * @author Pete Gillin 032 * @author Kevin Bourrillion 033 * @since 20.0 034 */ 035@Beta 036@GwtIncompatible 037public final class StatsAccumulator { 038 039 // These fields must satisfy the requirements of Stats' constructor as well as those of the stat 040 // methods of this class. 041 private long count = 0; 042 private double mean = 0.0; // any finite value will do, we only use it to multiply by zero for sum 043 private double sumOfSquaresOfDeltas = 0.0; 044 private double min = NaN; // any value will do 045 private double max = NaN; // any value will do 046 047 /** Adds the given value to the dataset. */ 048 public void add(double value) { 049 if (count == 0) { 050 count = 1; 051 mean = value; 052 min = value; 053 max = value; 054 if (!isFinite(value)) { 055 sumOfSquaresOfDeltas = NaN; 056 } 057 } else { 058 count++; 059 if (isFinite(value) && isFinite(mean)) { 060 // Art of Computer Programming vol. 2, Knuth, 4.2.2, (15) and (16) 061 double delta = value - mean; 062 mean += delta / count; 063 sumOfSquaresOfDeltas += delta * (value - mean); 064 } else { 065 mean = calculateNewMeanNonFinite(mean, value); 066 sumOfSquaresOfDeltas = NaN; 067 } 068 min = Math.min(min, value); 069 max = Math.max(max, value); 070 } 071 } 072 073 /** 074 * Adds the given values to the dataset. 075 * 076 * @param values a series of values, which will be converted to {@code double} values (this may 077 * cause loss of precision) 078 */ 079 public void addAll(Iterable<? extends Number> values) { 080 for (Number value : values) { 081 add(value.doubleValue()); 082 } 083 } 084 085 /** 086 * Adds the given values to the dataset. 087 * 088 * @param values a series of values, which will be converted to {@code double} values (this may 089 * cause loss of precision) 090 */ 091 public void addAll(Iterator<? extends Number> values) { 092 while (values.hasNext()) { 093 add(values.next().doubleValue()); 094 } 095 } 096 097 /** 098 * Adds the given values to the dataset. 099 * 100 * @param values a series of values 101 */ 102 public void addAll(double... values) { 103 for (double value : values) { 104 add(value); 105 } 106 } 107 108 /** 109 * Adds the given values to the dataset. 110 * 111 * @param values a series of values 112 */ 113 public void addAll(int... values) { 114 for (int value : values) { 115 add(value); 116 } 117 } 118 119 /** 120 * Adds the given values to the dataset. 121 * 122 * @param values a series of values, which will be converted to {@code double} values (this may 123 * cause loss of precision for longs of magnitude over 2^53 (slightly over 9e15)) 124 */ 125 public void addAll(long... values) { 126 for (long value : values) { 127 add(value); 128 } 129 } 130 131 /** 132 * Adds the given statistics to the dataset, as if the individual values used to compute the 133 * statistics had been added directly. 134 */ 135 public void addAll(Stats values) { 136 if (values.count() == 0) { 137 return; 138 } 139 merge(values.count(), values.mean(), values.sumOfSquaresOfDeltas(), values.min(), values.max()); 140 } 141 142 /** 143 * Adds the given statistics to the dataset, as if the individual values used to compute the 144 * statistics had been added directly. 145 * 146 * @since 28.2 147 */ 148 public void addAll(StatsAccumulator values) { 149 if (values.count() == 0) { 150 return; 151 } 152 merge(values.count(), values.mean(), values.sumOfSquaresOfDeltas(), values.min(), values.max()); 153 } 154 155 private void merge( 156 long otherCount, 157 double otherMean, 158 double otherSumOfSquaresOfDeltas, 159 double otherMin, 160 double otherMax) { 161 if (count == 0) { 162 count = otherCount; 163 mean = otherMean; 164 sumOfSquaresOfDeltas = otherSumOfSquaresOfDeltas; 165 min = otherMin; 166 max = otherMax; 167 } else { 168 count += otherCount; 169 if (isFinite(mean) && isFinite(otherMean)) { 170 // This is a generalized version of the calculation in add(double) above. 171 double delta = otherMean - mean; 172 mean += delta * otherCount / count; 173 sumOfSquaresOfDeltas += otherSumOfSquaresOfDeltas + delta * (otherMean - mean) * otherCount; 174 } else { 175 mean = calculateNewMeanNonFinite(mean, otherMean); 176 sumOfSquaresOfDeltas = NaN; 177 } 178 min = Math.min(min, otherMin); 179 max = Math.max(max, otherMax); 180 } 181 } 182 183 /** Returns an immutable snapshot of the current statistics. */ 184 public Stats snapshot() { 185 return new Stats(count, mean, sumOfSquaresOfDeltas, min, max); 186 } 187 188 /** Returns the number of values. */ 189 public long count() { 190 return count; 191 } 192 193 /** 194 * Returns the <a href="http://en.wikipedia.org/wiki/Arithmetic_mean">arithmetic mean</a> of the 195 * values. The count must be non-zero. 196 * 197 * <p>If these values are a sample drawn from a population, this is also an unbiased estimator of 198 * the arithmetic mean of the population. 199 * 200 * <h3>Non-finite values</h3> 201 * 202 * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it 203 * contains both {@link Double#POSITIVE_INFINITY} and {@link Double#NEGATIVE_INFINITY} then the 204 * result is {@link Double#NaN}. If it contains {@link Double#POSITIVE_INFINITY} and finite values 205 * only or {@link Double#POSITIVE_INFINITY} only, the result is {@link Double#POSITIVE_INFINITY}. 206 * If it contains {@link Double#NEGATIVE_INFINITY} and finite values only or {@link 207 * Double#NEGATIVE_INFINITY} only, the result is {@link Double#NEGATIVE_INFINITY}. 208 * 209 * @throws IllegalStateException if the dataset is empty 210 */ 211 public double mean() { 212 checkState(count != 0); 213 return mean; 214 } 215 216 /** 217 * Returns the sum of the values. 218 * 219 * <h3>Non-finite values</h3> 220 * 221 * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it 222 * contains both {@link Double#POSITIVE_INFINITY} and {@link Double#NEGATIVE_INFINITY} then the 223 * result is {@link Double#NaN}. If it contains {@link Double#POSITIVE_INFINITY} and finite values 224 * only or {@link Double#POSITIVE_INFINITY} only, the result is {@link Double#POSITIVE_INFINITY}. 225 * If it contains {@link Double#NEGATIVE_INFINITY} and finite values only or {@link 226 * Double#NEGATIVE_INFINITY} only, the result is {@link Double#NEGATIVE_INFINITY}. 227 */ 228 public final double sum() { 229 return mean * count; 230 } 231 232 /** 233 * Returns the <a href="http://en.wikipedia.org/wiki/Variance#Population_variance">population 234 * variance</a> of the values. The count must be non-zero. 235 * 236 * <p>This is guaranteed to return zero if the dataset contains only exactly one finite value. It 237 * is not guaranteed to return zero when the dataset consists of the same value multiple times, 238 * due to numerical errors. However, it is guaranteed never to return a negative result. 239 * 240 * <h3>Non-finite values</h3> 241 * 242 * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link 243 * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}. 244 * 245 * @throws IllegalStateException if the dataset is empty 246 */ 247 public final double populationVariance() { 248 checkState(count != 0); 249 if (isNaN(sumOfSquaresOfDeltas)) { 250 return NaN; 251 } 252 if (count == 1) { 253 return 0.0; 254 } 255 return ensureNonNegative(sumOfSquaresOfDeltas) / count; 256 } 257 258 /** 259 * Returns the <a 260 * href="http://en.wikipedia.org/wiki/Standard_deviation#Definition_of_population_values"> 261 * population standard deviation</a> of the values. The count must be non-zero. 262 * 263 * <p>This is guaranteed to return zero if the dataset contains only exactly one finite value. It 264 * is not guaranteed to return zero when the dataset consists of the same value multiple times, 265 * due to numerical errors. However, it is guaranteed never to return a negative result. 266 * 267 * <h3>Non-finite values</h3> 268 * 269 * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link 270 * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}. 271 * 272 * @throws IllegalStateException if the dataset is empty 273 */ 274 public final double populationStandardDeviation() { 275 return Math.sqrt(populationVariance()); 276 } 277 278 /** 279 * Returns the <a href="http://en.wikipedia.org/wiki/Variance#Sample_variance">unbiased sample 280 * variance</a> of the values. If this dataset is a sample drawn from a population, this is an 281 * unbiased estimator of the population variance of the population. The count must be greater than 282 * one. 283 * 284 * <p>This is not guaranteed to return zero when the dataset consists of the same value multiple 285 * times, due to numerical errors. However, it is guaranteed never to return a negative result. 286 * 287 * <h3>Non-finite values</h3> 288 * 289 * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link 290 * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}. 291 * 292 * @throws IllegalStateException if the dataset is empty or contains a single value 293 */ 294 public final double sampleVariance() { 295 checkState(count > 1); 296 if (isNaN(sumOfSquaresOfDeltas)) { 297 return NaN; 298 } 299 return ensureNonNegative(sumOfSquaresOfDeltas) / (count - 1); 300 } 301 302 /** 303 * Returns the <a 304 * href="http://en.wikipedia.org/wiki/Standard_deviation#Corrected_sample_standard_deviation"> 305 * corrected sample standard deviation</a> of the values. If this dataset is a sample drawn from a 306 * population, this is an estimator of the population standard deviation of the population which 307 * is less biased than {@link #populationStandardDeviation()} (the unbiased estimator depends on 308 * the distribution). The count must be greater than one. 309 * 310 * <p>This is not guaranteed to return zero when the dataset consists of the same value multiple 311 * times, due to numerical errors. However, it is guaranteed never to return a negative result. 312 * 313 * <h3>Non-finite values</h3> 314 * 315 * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link 316 * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}. 317 * 318 * @throws IllegalStateException if the dataset is empty or contains a single value 319 */ 320 public final double sampleStandardDeviation() { 321 return Math.sqrt(sampleVariance()); 322 } 323 324 /** 325 * Returns the lowest value in the dataset. The count must be non-zero. 326 * 327 * <h3>Non-finite values</h3> 328 * 329 * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it 330 * contains {@link Double#NEGATIVE_INFINITY} and not {@link Double#NaN} then the result is {@link 331 * Double#NEGATIVE_INFINITY}. If it contains {@link Double#POSITIVE_INFINITY} and finite values 332 * only then the result is the lowest finite value. If it contains {@link 333 * Double#POSITIVE_INFINITY} only then the result is {@link Double#POSITIVE_INFINITY}. 334 * 335 * @throws IllegalStateException if the dataset is empty 336 */ 337 public double min() { 338 checkState(count != 0); 339 return min; 340 } 341 342 /** 343 * Returns the highest value in the dataset. The count must be non-zero. 344 * 345 * <h3>Non-finite values</h3> 346 * 347 * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it 348 * contains {@link Double#POSITIVE_INFINITY} and not {@link Double#NaN} then the result is {@link 349 * Double#POSITIVE_INFINITY}. If it contains {@link Double#NEGATIVE_INFINITY} and finite values 350 * only then the result is the highest finite value. If it contains {@link 351 * Double#NEGATIVE_INFINITY} only then the result is {@link Double#NEGATIVE_INFINITY}. 352 * 353 * @throws IllegalStateException if the dataset is empty 354 */ 355 public double max() { 356 checkState(count != 0); 357 return max; 358 } 359 360 double sumOfSquaresOfDeltas() { 361 return sumOfSquaresOfDeltas; 362 } 363 364 /** 365 * Calculates the new value for the accumulated mean when a value is added, in the case where at 366 * least one of the previous mean and the value is non-finite. 367 */ 368 static double calculateNewMeanNonFinite(double previousMean, double value) { 369 /* 370 * Desired behaviour is to match the results of applying the naive mean formula. In particular, 371 * the update formula can subtract infinities in cases where the naive formula would add them. 372 * 373 * Consequently: 374 * 1. If the previous mean is finite and the new value is non-finite then the new mean is that 375 * value (whether it is NaN or infinity). 376 * 2. If the new value is finite and the previous mean is non-finite then the mean is unchanged 377 * (whether it is NaN or infinity). 378 * 3. If both the previous mean and the new value are non-finite and... 379 * 3a. ...either or both is NaN (so mean != value) then the new mean is NaN. 380 * 3b. ...they are both the same infinities (so mean == value) then the mean is unchanged. 381 * 3c. ...they are different infinities (so mean != value) then the new mean is NaN. 382 */ 383 if (isFinite(previousMean)) { 384 // This is case 1. 385 return value; 386 } else if (isFinite(value) || previousMean == value) { 387 // This is case 2. or 3b. 388 return previousMean; 389 } else { 390 // This is case 3a. or 3c. 391 return NaN; 392 } 393 } 394}