001/* 002 * Copyright (C) 2012 The Guava Authors 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 005 * in compliance with the License. You may obtain a copy of the License at 006 * 007 * http://www.apache.org/licenses/LICENSE-2.0 008 * 009 * Unless required by applicable law or agreed to in writing, software distributed under the License 010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 011 * or implied. See the License for the specific language governing permissions and limitations under 012 * the License. 013 */ 014 015package com.google.common.math; 016 017import static com.google.common.base.Preconditions.checkState; 018import static com.google.common.math.DoubleUtils.ensureNonNegative; 019import static com.google.common.primitives.Doubles.isFinite; 020import static java.lang.Double.NaN; 021import static java.lang.Double.isNaN; 022 023import com.google.common.annotations.Beta; 024import com.google.common.annotations.GwtIncompatible; 025import java.util.Iterator; 026 027/** 028 * A mutable object which accumulates double values and tracks some basic statistics over all the 029 * values added so far. The values may be added singly or in groups. This class is not thread safe. 030 * 031 * @author Pete Gillin 032 * @author Kevin Bourrillion 033 * @since 20.0 034 */ 035@Beta 036@GwtIncompatible 037public final class StatsAccumulator { 038 039 // These fields must satisfy the requirements of Stats' constructor as well as those of the stat 040 // methods of this class. 041 private long count = 0; 042 private double mean = 0.0; // any finite value will do, we only use it to multiply by zero for sum 043 private double sumOfSquaresOfDeltas = 0.0; 044 private double min = NaN; // any value will do 045 private double max = NaN; // any value will do 046 047 /** 048 * Adds the given value to the dataset. 049 */ 050 public void add(double value) { 051 if (count == 0) { 052 count = 1; 053 mean = value; 054 min = value; 055 max = value; 056 if (!isFinite(value)) { 057 sumOfSquaresOfDeltas = NaN; 058 } 059 } else { 060 count++; 061 if (isFinite(value) && isFinite(mean)) { 062 // Art of Computer Programming vol. 2, Knuth, 4.2.2, (15) and (16) 063 double delta = value - mean; 064 mean += delta / count; 065 sumOfSquaresOfDeltas += delta * (value - mean); 066 } else { 067 mean = calculateNewMeanNonFinite(mean, value); 068 sumOfSquaresOfDeltas = NaN; 069 } 070 min = Math.min(min, value); 071 max = Math.max(max, value); 072 } 073 } 074 075 /** 076 * Adds the given values to the dataset. 077 * 078 * @param values a series of values, which will be converted to {@code double} values (this may 079 * cause loss of precision) 080 */ 081 public void addAll(Iterable<? extends Number> values) { 082 for (Number value : values) { 083 add(value.doubleValue()); 084 } 085 } 086 087 /** 088 * Adds the given values to the dataset. 089 * 090 * @param values a series of values, which will be converted to {@code double} values (this may 091 * cause loss of precision) 092 */ 093 public void addAll(Iterator<? extends Number> values) { 094 while (values.hasNext()) { 095 add(values.next().doubleValue()); 096 } 097 } 098 099 /** 100 * Adds the given values to the dataset. 101 * 102 * @param values a series of values 103 */ 104 public void addAll(double... values) { 105 for (double value : values) { 106 add(value); 107 } 108 } 109 110 /** 111 * Adds the given values to the dataset. 112 * 113 * @param values a series of values 114 */ 115 public void addAll(int... values) { 116 for (int value : values) { 117 add(value); 118 } 119 } 120 121 /** 122 * Adds the given values to the dataset. 123 * 124 * @param values a series of values, which will be converted to {@code double} values (this may 125 * cause loss of precision for longs of magnitude over 2^53 (slightly over 9e15)) 126 */ 127 public void addAll(long... values) { 128 for (long value : values) { 129 add(value); 130 } 131 } 132 133 /** 134 * Adds the given statistics to the dataset, as if the individual values used to compute the 135 * statistics had been added directly. 136 */ 137 public void addAll(Stats values) { 138 if (values.count() == 0) { 139 return; 140 } 141 142 if (count == 0) { 143 count = values.count(); 144 mean = values.mean(); 145 sumOfSquaresOfDeltas = values.sumOfSquaresOfDeltas(); 146 min = values.min(); 147 max = values.max(); 148 } else { 149 count += values.count(); 150 if (isFinite(mean) && isFinite(values.mean())) { 151 // This is a generalized version of the calculation in add(double) above. 152 double delta = values.mean() - mean; 153 mean += delta * values.count() / count; 154 sumOfSquaresOfDeltas += 155 values.sumOfSquaresOfDeltas() + delta * (values.mean() - mean) * values.count(); 156 } else { 157 mean = calculateNewMeanNonFinite(mean, values.mean()); 158 sumOfSquaresOfDeltas = NaN; 159 } 160 min = Math.min(min, values.min()); 161 max = Math.max(max, values.max()); 162 } 163 } 164 165 /** 166 * Returns an immutable snapshot of the current statistics. 167 */ 168 public Stats snapshot() { 169 return new Stats(count, mean, sumOfSquaresOfDeltas, min, max); 170 } 171 172 /** 173 * Returns the number of values. 174 */ 175 public long count() { 176 return count; 177 } 178 179 /** 180 * Returns the <a href="http://en.wikipedia.org/wiki/Arithmetic_mean">arithmetic mean</a> of the 181 * values. The count must be non-zero. 182 * 183 * <p>If these values are a sample drawn from a population, this is also an unbiased estimator of 184 * the arithmetic mean of the population. 185 * 186 * <h3>Non-finite values</h3> 187 * 188 * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it 189 * contains both {@link Double#POSITIVE_INFINITY} and {@link Double#NEGATIVE_INFINITY} then the 190 * result is {@link Double#NaN}. If it contains {@link Double#POSITIVE_INFINITY} and finite values 191 * only or {@link Double#POSITIVE_INFINITY} only, the result is {@link Double#POSITIVE_INFINITY}. 192 * If it contains {@link Double#NEGATIVE_INFINITY} and finite values only or 193 * {@link Double#NEGATIVE_INFINITY} only, the result is {@link Double#NEGATIVE_INFINITY}. 194 * 195 * @throws IllegalStateException if the dataset is empty 196 */ 197 public double mean() { 198 checkState(count != 0); 199 return mean; 200 } 201 202 /** 203 * Returns the sum of the values. 204 * 205 * <h3>Non-finite values</h3> 206 * 207 * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it 208 * contains both {@link Double#POSITIVE_INFINITY} and {@link Double#NEGATIVE_INFINITY} then the 209 * result is {@link Double#NaN}. If it contains {@link Double#POSITIVE_INFINITY} and finite values 210 * only or {@link Double#POSITIVE_INFINITY} only, the result is {@link Double#POSITIVE_INFINITY}. 211 * If it contains {@link Double#NEGATIVE_INFINITY} and finite values only or 212 * {@link Double#NEGATIVE_INFINITY} only, the result is {@link Double#NEGATIVE_INFINITY}. 213 */ 214 public final double sum() { 215 return mean * count; 216 } 217 218 /** 219 * Returns the <a href="http://en.wikipedia.org/wiki/Variance#Population_variance">population 220 * variance</a> of the values. The count must be non-zero. 221 * 222 * <p>This is guaranteed to return zero if the dataset contains only exactly one finite value. 223 * It is not guaranteed to return zero when the dataset consists of the same value multiple times, 224 * due to numerical errors. However, it is guaranteed never to return a negative result. 225 * 226 * <h3>Non-finite values</h3> 227 * 228 * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, 229 * {@link Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}. 230 * 231 * @throws IllegalStateException if the dataset is empty 232 */ 233 public final double populationVariance() { 234 checkState(count != 0); 235 if (isNaN(sumOfSquaresOfDeltas)) { 236 return NaN; 237 } 238 if (count == 1) { 239 return 0.0; 240 } 241 return ensureNonNegative(sumOfSquaresOfDeltas) / count; 242 } 243 244 /** 245 * Returns the 246 * <a href="http://en.wikipedia.org/wiki/Standard_deviation#Definition_of_population_values"> 247 * population standard deviation</a> of the values. The count must be non-zero. 248 * 249 * <p>This is guaranteed to return zero if the dataset contains only exactly one finite value. 250 * It is not guaranteed to return zero when the dataset consists of the same value multiple times, 251 * due to numerical errors. However, it is guaranteed never to return a negative result. 252 * 253 * <h3>Non-finite values</h3> 254 * 255 * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, 256 * {@link Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}. 257 * 258 * @throws IllegalStateException if the dataset is empty 259 */ 260 public final double populationStandardDeviation() { 261 return Math.sqrt(populationVariance()); 262 } 263 264 /** 265 * Returns the <a href="http://en.wikipedia.org/wiki/Variance#Sample_variance">unbiased sample 266 * variance</a> of the values. If this dataset is a sample drawn from a population, this is an 267 * unbiased estimator of the population variance of the population. The count must be greater than 268 * one. 269 * 270 * <p>This is not guaranteed to return zero when the dataset consists of the same value multiple 271 * times, due to numerical errors. However, it is guaranteed never to return a negative result. 272 * 273 * <h3>Non-finite values</h3> 274 * 275 * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, 276 * {@link Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}. 277 * 278 * @throws IllegalStateException if the dataset is empty or contains a single value 279 */ 280 public final double sampleVariance() { 281 checkState(count > 1); 282 if (isNaN(sumOfSquaresOfDeltas)) { 283 return NaN; 284 } 285 return ensureNonNegative(sumOfSquaresOfDeltas) / (count - 1); 286 } 287 288 /** 289 * Returns the 290 * <a href="http://en.wikipedia.org/wiki/Standard_deviation#Corrected_sample_standard_deviation"> 291 * corrected sample standard deviation</a> of the values. If this dataset is a sample drawn from a 292 * population, this is an estimator of the population standard deviation of the population which 293 * is less biased than {@link #populationStandardDeviation()} (the unbiased estimator depends on 294 * the distribution). The count must be greater than one. 295 * 296 * <p>This is not guaranteed to return zero when the dataset consists of the same value multiple 297 * times, due to numerical errors. However, it is guaranteed never to return a negative result. 298 * 299 * <h3>Non-finite values</h3> 300 * 301 * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, 302 * {@link Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}. 303 * 304 * @throws IllegalStateException if the dataset is empty or contains a single value 305 */ 306 public final double sampleStandardDeviation() { 307 return Math.sqrt(sampleVariance()); 308 } 309 310 /** 311 * Returns the lowest value in the dataset. The count must be non-zero. 312 * 313 * <h3>Non-finite values</h3> 314 * 315 * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it 316 * contains {@link Double#NEGATIVE_INFINITY} and not {@link Double#NaN} then the result is 317 * {@link Double#NEGATIVE_INFINITY}. If it contains {@link Double#POSITIVE_INFINITY} and finite 318 * values only then the result is the lowest finite value. If it contains 319 * {@link Double#POSITIVE_INFINITY} only then the result is {@link Double#POSITIVE_INFINITY}. 320 * 321 * @throws IllegalStateException if the dataset is empty 322 */ 323 public double min() { 324 checkState(count != 0); 325 return min; 326 } 327 328 /** 329 * Returns the highest value in the dataset. The count must be non-zero. 330 * 331 * <h3>Non-finite values</h3> 332 * 333 * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it 334 * contains {@link Double#POSITIVE_INFINITY} and not {@link Double#NaN} then the result is 335 * {@link Double#POSITIVE_INFINITY}. If it contains {@link Double#NEGATIVE_INFINITY} and finite 336 * values only then the result is the highest finite value. If it contains 337 * {@link Double#NEGATIVE_INFINITY} only then the result is {@link Double#NEGATIVE_INFINITY}. 338 * 339 * @throws IllegalStateException if the dataset is empty 340 */ 341 public double max() { 342 checkState(count != 0); 343 return max; 344 } 345 346 double sumOfSquaresOfDeltas() { 347 return sumOfSquaresOfDeltas; 348 } 349 350 /** 351 * Calculates the new value for the accumulated mean when a value is added, in the case where at 352 * least one of the previous mean and the value is non-finite. 353 */ 354 static double calculateNewMeanNonFinite(double previousMean, double value) { 355 /* 356 * Desired behaviour is to match the results of applying the naive mean formula. In particular, 357 * the update formula can subtract infinities in cases where the naive formula would add them. 358 * 359 * Consequently: 360 * 1. If the previous mean is finite and the new value is non-finite then the new mean is that 361 * value (whether it is NaN or infinity). 362 * 2. If the new value is finite and the previous mean is non-finite then the mean is unchanged 363 * (whether it is NaN or infinity). 364 * 3. If both the previous mean and the new value are non-finite and... 365 * 3a. ...either or both is NaN (so mean != value) then the new mean is NaN. 366 * 3b. ...they are both the same infinities (so mean == value) then the mean is unchanged. 367 * 3c. ...they are different infinities (so mean != value) then the new mean is NaN. 368 */ 369 if (isFinite(previousMean)) { 370 // This is case 1. 371 return value; 372 } else if (isFinite(value) || previousMean == value) { 373 // This is case 2. or 3b. 374 return previousMean; 375 } else { 376 // This is case 3a. or 3c. 377 return NaN; 378 } 379 } 380}