001/* 002 * Copyright (C) 2012 The Guava Authors 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 005 * in compliance with the License. You may obtain a copy of the License at 006 * 007 * http://www.apache.org/licenses/LICENSE-2.0 008 * 009 * Unless required by applicable law or agreed to in writing, software distributed under the License 010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 011 * or implied. See the License for the specific language governing permissions and limitations under 012 * the License. 013 */ 014 015package com.google.common.math; 016 017import static com.google.common.base.Preconditions.checkState; 018import static com.google.common.math.DoubleUtils.ensureNonNegative; 019import static com.google.common.primitives.Doubles.isFinite; 020import static java.lang.Double.NaN; 021import static java.lang.Double.isNaN; 022 023import com.google.common.annotations.Beta; 024import com.google.common.annotations.GwtIncompatible; 025import java.util.Iterator; 026 027/** 028 * A mutable object which accumulates double values and tracks some basic statistics over all the 029 * values added so far. The values may be added singly or in groups. This class is not thread safe. 030 * 031 * @author Pete Gillin 032 * @author Kevin Bourrillion 033 * @since 20.0 034 */ 035@Beta 036@GwtIncompatible 037public final class StatsAccumulator { 038 039 // These fields must satisfy the requirements of Stats' constructor as well as those of the stat 040 // methods of this class. 041 private long count = 0; 042 private double mean = 0.0; // any finite value will do, we only use it to multiply by zero for sum 043 private double sumOfSquaresOfDeltas = 0.0; 044 private double min = NaN; // any value will do 045 private double max = NaN; // any value will do 046 047 /** Adds the given value to the dataset. */ 048 public void add(double value) { 049 if (count == 0) { 050 count = 1; 051 mean = value; 052 min = value; 053 max = value; 054 if (!isFinite(value)) { 055 sumOfSquaresOfDeltas = NaN; 056 } 057 } else { 058 count++; 059 if (isFinite(value) && isFinite(mean)) { 060 // Art of Computer Programming vol. 2, Knuth, 4.2.2, (15) and (16) 061 double delta = value - mean; 062 mean += delta / count; 063 sumOfSquaresOfDeltas += delta * (value - mean); 064 } else { 065 mean = calculateNewMeanNonFinite(mean, value); 066 sumOfSquaresOfDeltas = NaN; 067 } 068 min = Math.min(min, value); 069 max = Math.max(max, value); 070 } 071 } 072 073 /** 074 * Adds the given values to the dataset. 075 * 076 * @param values a series of values, which will be converted to {@code double} values (this may 077 * cause loss of precision) 078 */ 079 public void addAll(Iterable<? extends Number> values) { 080 for (Number value : values) { 081 add(value.doubleValue()); 082 } 083 } 084 085 /** 086 * Adds the given values to the dataset. 087 * 088 * @param values a series of values, which will be converted to {@code double} values (this may 089 * cause loss of precision) 090 */ 091 public void addAll(Iterator<? extends Number> values) { 092 while (values.hasNext()) { 093 add(values.next().doubleValue()); 094 } 095 } 096 097 /** 098 * Adds the given values to the dataset. 099 * 100 * @param values a series of values 101 */ 102 public void addAll(double... values) { 103 for (double value : values) { 104 add(value); 105 } 106 } 107 108 /** 109 * Adds the given values to the dataset. 110 * 111 * @param values a series of values 112 */ 113 public void addAll(int... values) { 114 for (int value : values) { 115 add(value); 116 } 117 } 118 119 /** 120 * Adds the given values to the dataset. 121 * 122 * @param values a series of values, which will be converted to {@code double} values (this may 123 * cause loss of precision for longs of magnitude over 2^53 (slightly over 9e15)) 124 */ 125 public void addAll(long... values) { 126 for (long value : values) { 127 add(value); 128 } 129 } 130 131 /** 132 * Adds the given statistics to the dataset, as if the individual values used to compute the 133 * statistics had been added directly. 134 */ 135 public void addAll(Stats values) { 136 if (values.count() == 0) { 137 return; 138 } 139 140 if (count == 0) { 141 count = values.count(); 142 mean = values.mean(); 143 sumOfSquaresOfDeltas = values.sumOfSquaresOfDeltas(); 144 min = values.min(); 145 max = values.max(); 146 } else { 147 count += values.count(); 148 if (isFinite(mean) && isFinite(values.mean())) { 149 // This is a generalized version of the calculation in add(double) above. 150 double delta = values.mean() - mean; 151 mean += delta * values.count() / count; 152 sumOfSquaresOfDeltas += 153 values.sumOfSquaresOfDeltas() + delta * (values.mean() - mean) * values.count(); 154 } else { 155 mean = calculateNewMeanNonFinite(mean, values.mean()); 156 sumOfSquaresOfDeltas = NaN; 157 } 158 min = Math.min(min, values.min()); 159 max = Math.max(max, values.max()); 160 } 161 } 162 163 /** Returns an immutable snapshot of the current statistics. */ 164 public Stats snapshot() { 165 return new Stats(count, mean, sumOfSquaresOfDeltas, min, max); 166 } 167 168 /** Returns the number of values. */ 169 public long count() { 170 return count; 171 } 172 173 /** 174 * Returns the <a href="http://en.wikipedia.org/wiki/Arithmetic_mean">arithmetic mean</a> of the 175 * values. The count must be non-zero. 176 * 177 * <p>If these values are a sample drawn from a population, this is also an unbiased estimator of 178 * the arithmetic mean of the population. 179 * 180 * <h3>Non-finite values</h3> 181 * 182 * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it 183 * contains both {@link Double#POSITIVE_INFINITY} and {@link Double#NEGATIVE_INFINITY} then the 184 * result is {@link Double#NaN}. If it contains {@link Double#POSITIVE_INFINITY} and finite values 185 * only or {@link Double#POSITIVE_INFINITY} only, the result is {@link Double#POSITIVE_INFINITY}. 186 * If it contains {@link Double#NEGATIVE_INFINITY} and finite values only or {@link 187 * Double#NEGATIVE_INFINITY} only, the result is {@link Double#NEGATIVE_INFINITY}. 188 * 189 * @throws IllegalStateException if the dataset is empty 190 */ 191 public double mean() { 192 checkState(count != 0); 193 return mean; 194 } 195 196 /** 197 * Returns the sum of the values. 198 * 199 * <h3>Non-finite values</h3> 200 * 201 * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it 202 * contains both {@link Double#POSITIVE_INFINITY} and {@link Double#NEGATIVE_INFINITY} then the 203 * result is {@link Double#NaN}. If it contains {@link Double#POSITIVE_INFINITY} and finite values 204 * only or {@link Double#POSITIVE_INFINITY} only, the result is {@link Double#POSITIVE_INFINITY}. 205 * If it contains {@link Double#NEGATIVE_INFINITY} and finite values only or {@link 206 * Double#NEGATIVE_INFINITY} only, the result is {@link Double#NEGATIVE_INFINITY}. 207 */ 208 public final double sum() { 209 return mean * count; 210 } 211 212 /** 213 * Returns the <a href="http://en.wikipedia.org/wiki/Variance#Population_variance">population 214 * variance</a> of the values. The count must be non-zero. 215 * 216 * <p>This is guaranteed to return zero if the dataset contains only exactly one finite value. It 217 * is not guaranteed to return zero when the dataset consists of the same value multiple times, 218 * due to numerical errors. However, it is guaranteed never to return a negative result. 219 * 220 * <h3>Non-finite values</h3> 221 * 222 * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link 223 * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}. 224 * 225 * @throws IllegalStateException if the dataset is empty 226 */ 227 public final double populationVariance() { 228 checkState(count != 0); 229 if (isNaN(sumOfSquaresOfDeltas)) { 230 return NaN; 231 } 232 if (count == 1) { 233 return 0.0; 234 } 235 return ensureNonNegative(sumOfSquaresOfDeltas) / count; 236 } 237 238 /** 239 * Returns the <a 240 * href="http://en.wikipedia.org/wiki/Standard_deviation#Definition_of_population_values"> 241 * population standard deviation</a> of the values. The count must be non-zero. 242 * 243 * <p>This is guaranteed to return zero if the dataset contains only exactly one finite value. It 244 * is not guaranteed to return zero when the dataset consists of the same value multiple times, 245 * due to numerical errors. However, it is guaranteed never to return a negative result. 246 * 247 * <h3>Non-finite values</h3> 248 * 249 * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link 250 * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}. 251 * 252 * @throws IllegalStateException if the dataset is empty 253 */ 254 public final double populationStandardDeviation() { 255 return Math.sqrt(populationVariance()); 256 } 257 258 /** 259 * Returns the <a href="http://en.wikipedia.org/wiki/Variance#Sample_variance">unbiased sample 260 * variance</a> of the values. If this dataset is a sample drawn from a population, this is an 261 * unbiased estimator of the population variance of the population. The count must be greater than 262 * one. 263 * 264 * <p>This is not guaranteed to return zero when the dataset consists of the same value multiple 265 * times, due to numerical errors. However, it is guaranteed never to return a negative result. 266 * 267 * <h3>Non-finite values</h3> 268 * 269 * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link 270 * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}. 271 * 272 * @throws IllegalStateException if the dataset is empty or contains a single value 273 */ 274 public final double sampleVariance() { 275 checkState(count > 1); 276 if (isNaN(sumOfSquaresOfDeltas)) { 277 return NaN; 278 } 279 return ensureNonNegative(sumOfSquaresOfDeltas) / (count - 1); 280 } 281 282 /** 283 * Returns the <a 284 * href="http://en.wikipedia.org/wiki/Standard_deviation#Corrected_sample_standard_deviation"> 285 * corrected sample standard deviation</a> of the values. If this dataset is a sample drawn from a 286 * population, this is an estimator of the population standard deviation of the population which 287 * is less biased than {@link #populationStandardDeviation()} (the unbiased estimator depends on 288 * the distribution). The count must be greater than one. 289 * 290 * <p>This is not guaranteed to return zero when the dataset consists of the same value multiple 291 * times, due to numerical errors. However, it is guaranteed never to return a negative result. 292 * 293 * <h3>Non-finite values</h3> 294 * 295 * <p>If the dataset contains any non-finite values ({@link Double#POSITIVE_INFINITY}, {@link 296 * Double#NEGATIVE_INFINITY}, or {@link Double#NaN}) then the result is {@link Double#NaN}. 297 * 298 * @throws IllegalStateException if the dataset is empty or contains a single value 299 */ 300 public final double sampleStandardDeviation() { 301 return Math.sqrt(sampleVariance()); 302 } 303 304 /** 305 * Returns the lowest value in the dataset. The count must be non-zero. 306 * 307 * <h3>Non-finite values</h3> 308 * 309 * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it 310 * contains {@link Double#NEGATIVE_INFINITY} and not {@link Double#NaN} then the result is {@link 311 * Double#NEGATIVE_INFINITY}. If it contains {@link Double#POSITIVE_INFINITY} and finite values 312 * only then the result is the lowest finite value. If it contains {@link 313 * Double#POSITIVE_INFINITY} only then the result is {@link Double#POSITIVE_INFINITY}. 314 * 315 * @throws IllegalStateException if the dataset is empty 316 */ 317 public double min() { 318 checkState(count != 0); 319 return min; 320 } 321 322 /** 323 * Returns the highest value in the dataset. The count must be non-zero. 324 * 325 * <h3>Non-finite values</h3> 326 * 327 * <p>If the dataset contains {@link Double#NaN} then the result is {@link Double#NaN}. If it 328 * contains {@link Double#POSITIVE_INFINITY} and not {@link Double#NaN} then the result is {@link 329 * Double#POSITIVE_INFINITY}. If it contains {@link Double#NEGATIVE_INFINITY} and finite values 330 * only then the result is the highest finite value. If it contains {@link 331 * Double#NEGATIVE_INFINITY} only then the result is {@link Double#NEGATIVE_INFINITY}. 332 * 333 * @throws IllegalStateException if the dataset is empty 334 */ 335 public double max() { 336 checkState(count != 0); 337 return max; 338 } 339 340 double sumOfSquaresOfDeltas() { 341 return sumOfSquaresOfDeltas; 342 } 343 344 /** 345 * Calculates the new value for the accumulated mean when a value is added, in the case where at 346 * least one of the previous mean and the value is non-finite. 347 */ 348 static double calculateNewMeanNonFinite(double previousMean, double value) { 349 /* 350 * Desired behaviour is to match the results of applying the naive mean formula. In particular, 351 * the update formula can subtract infinities in cases where the naive formula would add them. 352 * 353 * Consequently: 354 * 1. If the previous mean is finite and the new value is non-finite then the new mean is that 355 * value (whether it is NaN or infinity). 356 * 2. If the new value is finite and the previous mean is non-finite then the mean is unchanged 357 * (whether it is NaN or infinity). 358 * 3. If both the previous mean and the new value are non-finite and... 359 * 3a. ...either or both is NaN (so mean != value) then the new mean is NaN. 360 * 3b. ...they are both the same infinities (so mean == value) then the mean is unchanged. 361 * 3c. ...they are different infinities (so mean != value) then the new mean is NaN. 362 */ 363 if (isFinite(previousMean)) { 364 // This is case 1. 365 return value; 366 } else if (isFinite(value) || previousMean == value) { 367 // This is case 2. or 3b. 368 return previousMean; 369 } else { 370 // This is case 3a. or 3c. 371 return NaN; 372 } 373 } 374}