001/* 002 * Copyright (C) 2011 The Guava Authors 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 005 * in compliance with the License. You may obtain a copy of the License at 006 * 007 * http://www.apache.org/licenses/LICENSE-2.0 008 * 009 * Unless required by applicable law or agreed to in writing, software distributed under the License 010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 011 * or implied. See the License for the specific language governing permissions and limitations under 012 * the License. 013 */ 014 015package com.google.common.hash; 016 017import static com.google.common.base.Preconditions.checkArgument; 018import static com.google.common.base.Preconditions.checkNotNull; 019import static com.google.common.base.Throwables.throwIfUnchecked; 020import static java.lang.invoke.MethodType.methodType; 021 022import com.google.errorprone.annotations.Immutable; 023import com.google.j2objc.annotations.J2ObjCIncompatible; 024import java.lang.invoke.MethodHandle; 025import java.lang.invoke.MethodHandles; 026import java.security.Key; 027import java.util.ArrayList; 028import java.util.Arrays; 029import java.util.Collections; 030import java.util.Iterator; 031import java.util.List; 032import java.util.zip.Adler32; 033import java.util.zip.CRC32; 034import java.util.zip.Checksum; 035import javax.annotation.CheckForNull; 036import javax.crypto.spec.SecretKeySpec; 037 038/** 039 * Static methods to obtain {@link HashFunction} instances, and other static hashing-related 040 * utilities. 041 * 042 * <p>A comparison of the various hash functions can be found <a 043 * href="http://goo.gl/jS7HH">here</a>. 044 * 045 * @author Kevin Bourrillion 046 * @author Dimitris Andreou 047 * @author Kurt Alfred Kluever 048 * @since 11.0 049 */ 050@ElementTypesAreNonnullByDefault 051public final class Hashing { 052 /** 053 * Returns a general-purpose, <b>temporary-use</b>, non-cryptographic hash function. The algorithm 054 * the returned function implements is unspecified and subject to change without notice. 055 * 056 * <p><b>Warning:</b> a new random seed for these functions is chosen each time the {@code 057 * Hashing} class is loaded. <b>Do not use this method</b> if hash codes may escape the current 058 * process in any way, for example being sent over RPC, or saved to disk. For a general-purpose, 059 * non-cryptographic hash function that will never change behavior, we suggest {@link 060 * #murmur3_128}. 061 * 062 * <p>Repeated calls to this method on the same loaded {@code Hashing} class, using the same value 063 * for {@code minimumBits}, will return identically-behaving {@link HashFunction} instances. 064 * 065 * @param minimumBits a positive integer. This can be arbitrarily large. The returned {@link 066 * HashFunction} instance may use memory proportional to this integer. 067 * @return a hash function, described above, that produces hash codes of length {@code 068 * minimumBits} or greater 069 */ 070 public static HashFunction goodFastHash(int minimumBits) { 071 int bits = checkPositiveAndMakeMultipleOf32(minimumBits); 072 073 if (bits == 32) { 074 return Murmur3_32HashFunction.GOOD_FAST_HASH_32; 075 } 076 if (bits <= 128) { 077 return Murmur3_128HashFunction.GOOD_FAST_HASH_128; 078 } 079 080 // Otherwise, join together some 128-bit murmur3s 081 int hashFunctionsNeeded = (bits + 127) / 128; 082 HashFunction[] hashFunctions = new HashFunction[hashFunctionsNeeded]; 083 hashFunctions[0] = Murmur3_128HashFunction.GOOD_FAST_HASH_128; 084 int seed = GOOD_FAST_HASH_SEED; 085 for (int i = 1; i < hashFunctionsNeeded; i++) { 086 seed += 1500450271; // a prime; shouldn't matter 087 hashFunctions[i] = murmur3_128(seed); 088 } 089 return new ConcatenatedHashFunction(hashFunctions); 090 } 091 092 /** 093 * Used to randomize {@link #goodFastHash} instances, so that programs which persist anything 094 * dependent on the hash codes they produce will fail sooner. 095 */ 096 @SuppressWarnings("GoodTime") // reading system time without TimeSource 097 static final int GOOD_FAST_HASH_SEED = (int) System.currentTimeMillis(); 098 099 /** 100 * Returns a hash function implementing the <a 101 * href="https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp">32-bit murmur3 102 * algorithm, x86 variant</a> (little-endian variant), using the given seed value, <b>with a known 103 * bug</b> as described in the deprecation text. 104 * 105 * <p>The C++ equivalent is the MurmurHash3_x86_32 function (Murmur3A), which however does not 106 * have the bug. 107 * 108 * @deprecated This implementation produces incorrect hash values from the {@link 109 * HashFunction#hashString} method if the string contains non-BMP characters. Use {@link 110 * #murmur3_32_fixed(int)} instead. 111 */ 112 @Deprecated 113 public static HashFunction murmur3_32(int seed) { 114 return new Murmur3_32HashFunction(seed, /* supplementaryPlaneFix= */ false); 115 } 116 117 /** 118 * Returns a hash function implementing the <a 119 * href="https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp">32-bit murmur3 120 * algorithm, x86 variant</a> (little-endian variant), using the given seed value, <b>with a known 121 * bug</b> as described in the deprecation text. 122 * 123 * <p>The C++ equivalent is the MurmurHash3_x86_32 function (Murmur3A), which however does not 124 * have the bug. 125 * 126 * @deprecated This implementation produces incorrect hash values from the {@link 127 * HashFunction#hashString} method if the string contains non-BMP characters. Use {@link 128 * #murmur3_32_fixed()} instead. 129 */ 130 @Deprecated 131 public static HashFunction murmur3_32() { 132 return Murmur3_32HashFunction.MURMUR3_32; 133 } 134 135 /** 136 * Returns a hash function implementing the <a 137 * href="https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp">32-bit murmur3 138 * algorithm, x86 variant</a> (little-endian variant), using the given seed value. 139 * 140 * <p>The exact C++ equivalent is the MurmurHash3_x86_32 function (Murmur3A). 141 * 142 * <p>This method is called {@code murmur3_32_fixed} because it fixes a bug in the {@code 143 * HashFunction} returned by the original {@code murmur3_32} method. 144 * 145 * @since 31.0 146 */ 147 public static HashFunction murmur3_32_fixed(int seed) { 148 return new Murmur3_32HashFunction(seed, /* supplementaryPlaneFix= */ true); 149 } 150 151 /** 152 * Returns a hash function implementing the <a 153 * href="https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp">32-bit murmur3 154 * algorithm, x86 variant</a> (little-endian variant), using a seed value of zero. 155 * 156 * <p>The exact C++ equivalent is the MurmurHash3_x86_32 function (Murmur3A). 157 * 158 * <p>This method is called {@code murmur3_32_fixed} because it fixes a bug in the {@code 159 * HashFunction} returned by the original {@code murmur3_32} method. 160 * 161 * @since 31.0 162 */ 163 public static HashFunction murmur3_32_fixed() { 164 return Murmur3_32HashFunction.MURMUR3_32_FIXED; 165 } 166 167 /** 168 * Returns a hash function implementing the <a 169 * href="https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp">128-bit murmur3 170 * algorithm, x64 variant</a> (little-endian variant), using the given seed value. 171 * 172 * <p>The exact C++ equivalent is the MurmurHash3_x64_128 function (Murmur3F). 173 */ 174 public static HashFunction murmur3_128(int seed) { 175 return new Murmur3_128HashFunction(seed); 176 } 177 178 /** 179 * Returns a hash function implementing the <a 180 * href="https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp">128-bit murmur3 181 * algorithm, x64 variant</a> (little-endian variant), using a seed value of zero. 182 * 183 * <p>The exact C++ equivalent is the MurmurHash3_x64_128 function (Murmur3F). 184 */ 185 public static HashFunction murmur3_128() { 186 return Murmur3_128HashFunction.MURMUR3_128; 187 } 188 189 /** 190 * Returns a hash function implementing the <a href="https://131002.net/siphash/">64-bit 191 * SipHash-2-4 algorithm</a> using a seed value of {@code k = 00 01 02 ...}. 192 * 193 * @since 15.0 194 */ 195 public static HashFunction sipHash24() { 196 return SipHashFunction.SIP_HASH_24; 197 } 198 199 /** 200 * Returns a hash function implementing the <a href="https://131002.net/siphash/">64-bit 201 * SipHash-2-4 algorithm</a> using the given seed. 202 * 203 * @since 15.0 204 */ 205 public static HashFunction sipHash24(long k0, long k1) { 206 return new SipHashFunction(2, 4, k0, k1); 207 } 208 209 /** 210 * Returns a hash function implementing the MD5 hash algorithm (128 hash bits). 211 * 212 * @deprecated If you must interoperate with a system that requires MD5, then use this method, 213 * despite its deprecation. But if you can choose your hash function, avoid MD5, which is 214 * neither fast nor secure. As of January 2017, we suggest: 215 * <ul> 216 * <li>For security: 217 * {@link Hashing#sha256} or a higher-level API. 218 * <li>For speed: {@link Hashing#goodFastHash}, though see its docs for caveats. 219 * </ul> 220 */ 221 @Deprecated 222 public static HashFunction md5() { 223 return Md5Holder.MD5; 224 } 225 226 private static class Md5Holder { 227 static final HashFunction MD5 = new MessageDigestHashFunction("MD5", "Hashing.md5()"); 228 } 229 230 /** 231 * Returns a hash function implementing the SHA-1 algorithm (160 hash bits). 232 * 233 * @deprecated If you must interoperate with a system that requires SHA-1, then use this method, 234 * despite its deprecation. But if you can choose your hash function, avoid SHA-1, which is 235 * neither fast nor secure. As of January 2017, we suggest: 236 * <ul> 237 * <li>For security: 238 * {@link Hashing#sha256} or a higher-level API. 239 * <li>For speed: {@link Hashing#goodFastHash}, though see its docs for caveats. 240 * </ul> 241 */ 242 @Deprecated 243 public static HashFunction sha1() { 244 return Sha1Holder.SHA_1; 245 } 246 247 private static class Sha1Holder { 248 static final HashFunction SHA_1 = new MessageDigestHashFunction("SHA-1", "Hashing.sha1()"); 249 } 250 251 /** Returns a hash function implementing the SHA-256 algorithm (256 hash bits). */ 252 public static HashFunction sha256() { 253 return Sha256Holder.SHA_256; 254 } 255 256 private static class Sha256Holder { 257 static final HashFunction SHA_256 = 258 new MessageDigestHashFunction("SHA-256", "Hashing.sha256()"); 259 } 260 261 /** 262 * Returns a hash function implementing the SHA-384 algorithm (384 hash bits). 263 * 264 * @since 19.0 265 */ 266 public static HashFunction sha384() { 267 return Sha384Holder.SHA_384; 268 } 269 270 private static class Sha384Holder { 271 static final HashFunction SHA_384 = 272 new MessageDigestHashFunction("SHA-384", "Hashing.sha384()"); 273 } 274 275 /** Returns a hash function implementing the SHA-512 algorithm (512 hash bits). */ 276 public static HashFunction sha512() { 277 return Sha512Holder.SHA_512; 278 } 279 280 private static class Sha512Holder { 281 static final HashFunction SHA_512 = 282 new MessageDigestHashFunction("SHA-512", "Hashing.sha512()"); 283 } 284 285 /** 286 * Returns a hash function implementing the Message Authentication Code (MAC) algorithm, using the 287 * MD5 (128 hash bits) hash function and the given secret key. 288 * 289 * @param key the secret key 290 * @throws IllegalArgumentException if the given key is inappropriate for initializing this MAC 291 * @since 20.0 292 */ 293 public static HashFunction hmacMd5(Key key) { 294 return new MacHashFunction("HmacMD5", key, hmacToString("hmacMd5", key)); 295 } 296 297 /** 298 * Returns a hash function implementing the Message Authentication Code (MAC) algorithm, using the 299 * MD5 (128 hash bits) hash function and a {@link SecretKeySpec} created from the given byte array 300 * and the MD5 algorithm. 301 * 302 * @param key the key material of the secret key 303 * @since 20.0 304 */ 305 public static HashFunction hmacMd5(byte[] key) { 306 return hmacMd5(new SecretKeySpec(checkNotNull(key), "HmacMD5")); 307 } 308 309 /** 310 * Returns a hash function implementing the Message Authentication Code (MAC) algorithm, using the 311 * SHA-1 (160 hash bits) hash function and the given secret key. 312 * 313 * @param key the secret key 314 * @throws IllegalArgumentException if the given key is inappropriate for initializing this MAC 315 * @since 20.0 316 */ 317 public static HashFunction hmacSha1(Key key) { 318 return new MacHashFunction("HmacSHA1", key, hmacToString("hmacSha1", key)); 319 } 320 321 /** 322 * Returns a hash function implementing the Message Authentication Code (MAC) algorithm, using the 323 * SHA-1 (160 hash bits) hash function and a {@link SecretKeySpec} created from the given byte 324 * array and the SHA-1 algorithm. 325 * 326 * @param key the key material of the secret key 327 * @since 20.0 328 */ 329 public static HashFunction hmacSha1(byte[] key) { 330 return hmacSha1(new SecretKeySpec(checkNotNull(key), "HmacSHA1")); 331 } 332 333 /** 334 * Returns a hash function implementing the Message Authentication Code (MAC) algorithm, using the 335 * SHA-256 (256 hash bits) hash function and the given secret key. 336 * 337 * @param key the secret key 338 * @throws IllegalArgumentException if the given key is inappropriate for initializing this MAC 339 * @since 20.0 340 */ 341 public static HashFunction hmacSha256(Key key) { 342 return new MacHashFunction("HmacSHA256", key, hmacToString("hmacSha256", key)); 343 } 344 345 /** 346 * Returns a hash function implementing the Message Authentication Code (MAC) algorithm, using the 347 * SHA-256 (256 hash bits) hash function and a {@link SecretKeySpec} created from the given byte 348 * array and the SHA-256 algorithm. 349 * 350 * @param key the key material of the secret key 351 * @since 20.0 352 */ 353 public static HashFunction hmacSha256(byte[] key) { 354 return hmacSha256(new SecretKeySpec(checkNotNull(key), "HmacSHA256")); 355 } 356 357 /** 358 * Returns a hash function implementing the Message Authentication Code (MAC) algorithm, using the 359 * SHA-512 (512 hash bits) hash function and the given secret key. 360 * 361 * @param key the secret key 362 * @throws IllegalArgumentException if the given key is inappropriate for initializing this MAC 363 * @since 20.0 364 */ 365 public static HashFunction hmacSha512(Key key) { 366 return new MacHashFunction("HmacSHA512", key, hmacToString("hmacSha512", key)); 367 } 368 369 /** 370 * Returns a hash function implementing the Message Authentication Code (MAC) algorithm, using the 371 * SHA-512 (512 hash bits) hash function and a {@link SecretKeySpec} created from the given byte 372 * array and the SHA-512 algorithm. 373 * 374 * @param key the key material of the secret key 375 * @since 20.0 376 */ 377 public static HashFunction hmacSha512(byte[] key) { 378 return hmacSha512(new SecretKeySpec(checkNotNull(key), "HmacSHA512")); 379 } 380 381 private static String hmacToString(String methodName, Key key) { 382 return "Hashing." 383 + methodName 384 + "(Key[algorithm=" 385 + key.getAlgorithm() 386 + ", format=" 387 + key.getFormat() 388 + "])"; 389 } 390 391 /** 392 * Returns a hash function implementing the CRC32C checksum algorithm (32 hash bits) as described 393 * by RFC 3720, Section 12.1. 394 * 395 * <p>This function is best understood as a <a 396 * href="https://en.wikipedia.org/wiki/Checksum">checksum</a> rather than a true <a 397 * href="https://en.wikipedia.org/wiki/Hash_function">hash function</a>. 398 * 399 * @since 18.0 400 */ 401 public static HashFunction crc32c() { 402 return Crc32CSupplier.HASH_FUNCTION; 403 } 404 405 @Immutable 406 private enum Crc32CSupplier implements ImmutableSupplier<HashFunction> { 407 @J2ObjCIncompatible 408 JAVA_UTIL_ZIP { 409 @Override 410 public HashFunction get() { 411 return ChecksumType.CRC_32C.hashFunction; 412 } 413 }, 414 ABSTRACT_HASH_FUNCTION { 415 @Override 416 public HashFunction get() { 417 return Crc32cHashFunction.CRC_32_C; 418 } 419 }; 420 421 static final HashFunction HASH_FUNCTION = pickFunction().get(); 422 423 private static Crc32CSupplier pickFunction() { 424 Crc32CSupplier[] functions = values(); 425 426 if (functions.length == 1) { 427 // We're running under J2ObjC. 428 return functions[0]; 429 } 430 431 // We can't refer to JAVA_UTIL_ZIP directly at compile time because of J2ObjC. 432 Crc32CSupplier javaUtilZip = functions[0]; 433 434 try { 435 Class.forName("java.util.zip.CRC32C"); 436 return javaUtilZip; 437 } catch (ClassNotFoundException runningUnderJava8) { 438 return ABSTRACT_HASH_FUNCTION; 439 } 440 } 441 } 442 443 /** 444 * Returns a hash function implementing the CRC-32 checksum algorithm (32 hash bits). 445 * 446 * <p>To get the {@code long} value equivalent to {@link Checksum#getValue()} for a {@code 447 * HashCode} produced by this function, use {@link HashCode#padToLong()}. 448 * 449 * <p>This function is best understood as a <a 450 * href="https://en.wikipedia.org/wiki/Checksum">checksum</a> rather than a true <a 451 * href="https://en.wikipedia.org/wiki/Hash_function">hash function</a>. 452 * 453 * @since 14.0 454 */ 455 public static HashFunction crc32() { 456 return ChecksumType.CRC_32.hashFunction; 457 } 458 459 /** 460 * Returns a hash function implementing the Adler-32 checksum algorithm (32 hash bits). 461 * 462 * <p>To get the {@code long} value equivalent to {@link Checksum#getValue()} for a {@code 463 * HashCode} produced by this function, use {@link HashCode#padToLong()}. 464 * 465 * <p>This function is best understood as a <a 466 * href="https://en.wikipedia.org/wiki/Checksum">checksum</a> rather than a true <a 467 * href="https://en.wikipedia.org/wiki/Hash_function">hash function</a>. 468 * 469 * @since 14.0 470 */ 471 public static HashFunction adler32() { 472 return ChecksumType.ADLER_32.hashFunction; 473 } 474 475 @Immutable 476 enum ChecksumType implements ImmutableSupplier<Checksum> { 477 CRC_32("Hashing.crc32()") { 478 @Override 479 public Checksum get() { 480 return new CRC32(); 481 } 482 }, 483 @J2ObjCIncompatible 484 CRC_32C("Hashing.crc32c()") { 485 @Override 486 public Checksum get() { 487 return Crc32cMethodHandles.newCrc32c(); 488 } 489 }, 490 ADLER_32("Hashing.adler32()") { 491 @Override 492 public Checksum get() { 493 return new Adler32(); 494 } 495 }; 496 497 public final HashFunction hashFunction; 498 499 ChecksumType(String toString) { 500 this.hashFunction = new ChecksumHashFunction(this, 32, toString); 501 } 502 } 503 504 @J2ObjCIncompatible 505 @SuppressWarnings("unused") 506 private static final class Crc32cMethodHandles { 507 private static final MethodHandle CONSTRUCTOR = crc32cConstructor(); 508 509 @IgnoreJRERequirement // https://github.com/mojohaus/animal-sniffer/issues/67 510 static Checksum newCrc32c() { 511 try { 512 return (Checksum) CONSTRUCTOR.invokeExact(); 513 } catch (Throwable e) { 514 throwIfUnchecked(e); 515 // That constructor has no `throws` clause. 516 throw newLinkageError(e); 517 } 518 } 519 520 private static MethodHandle crc32cConstructor() { 521 try { 522 Class<?> clazz = Class.forName("java.util.zip.CRC32C"); 523 /* 524 * We can't cast to CRC32C at the call site because we support building with Java 8 525 * (https://github.com/google/guava/issues/6549). So we have to use asType() to change from 526 * CRC32C to Checksum. This may carry some performance cost 527 * (https://stackoverflow.com/a/22321671/28465), but I'd have to benchmark more carefully to 528 * even detect it. 529 */ 530 return MethodHandles.lookup() 531 .findConstructor(clazz, methodType(void.class)) 532 .asType(methodType(Checksum.class)); 533 } catch (ClassNotFoundException e) { 534 // We check that the class is available before calling this method. 535 throw new AssertionError(e); 536 } catch (IllegalAccessException e) { 537 // That API is public. 538 throw newLinkageError(e); 539 } catch (NoSuchMethodException e) { 540 // That constructor exists. 541 throw newLinkageError(e); 542 } 543 } 544 545 private static LinkageError newLinkageError(Throwable cause) { 546 return new LinkageError(cause.toString(), cause); 547 } 548 } 549 550 /** 551 * Returns a hash function implementing FarmHash's Fingerprint64, an open-source algorithm. 552 * 553 * <p>This is designed for generating persistent fingerprints of strings. It isn't 554 * cryptographically secure, but it produces a high-quality hash with fewer collisions than some 555 * alternatives we've used in the past. 556 * 557 * <p>FarmHash fingerprints are encoded by {@link HashCode#asBytes} in little-endian order. This 558 * means {@link HashCode#asLong} is guaranteed to return the same value that 559 * farmhash::Fingerprint64() would for the same input (when compared using {@link 560 * com.google.common.primitives.UnsignedLongs}'s encoding of 64-bit unsigned numbers). 561 * 562 * <p>This function is best understood as a <a 563 * href="https://en.wikipedia.org/wiki/Fingerprint_(computing)">fingerprint</a> rather than a true 564 * <a href="https://en.wikipedia.org/wiki/Hash_function">hash function</a>. 565 * 566 * @since 20.0 567 */ 568 public static HashFunction farmHashFingerprint64() { 569 return FarmHashFingerprint64.FARMHASH_FINGERPRINT_64; 570 } 571 572 /** 573 * Returns a hash function implementing the Fingerprint2011 hashing function (64 hash bits). 574 * 575 * <p>This is designed for generating persistent fingerprints of strings. It isn't 576 * cryptographically secure, but it produces a high-quality hash with few collisions. Fingerprints 577 * generated using this are byte-wise identical to those created using the C++ version, but note 578 * that this uses unsigned integers (see {@link com.google.common.primitives.UnsignedInts}). 579 * Comparisons between the two should take this into account. 580 * 581 * <p>Fingerprint2011() is a form of Murmur2 on strings up to 32 bytes and a form of CityHash for 582 * longer strings. It could have been one or the other throughout. The main advantage of the 583 * combination is that CityHash has a bunch of special cases for short strings that don't need to 584 * be replicated here. The result will never be 0 or 1. 585 * 586 * <p>This function is best understood as a <a 587 * href="https://en.wikipedia.org/wiki/Fingerprint_(computing)">fingerprint</a> rather than a true 588 * <a href="https://en.wikipedia.org/wiki/Hash_function">hash function</a>. 589 * 590 * @since 31.1 591 */ 592 public static HashFunction fingerprint2011() { 593 return Fingerprint2011.FINGERPRINT_2011; 594 } 595 596 /** 597 * Assigns to {@code hashCode} a "bucket" in the range {@code [0, buckets)}, in a uniform manner 598 * that minimizes the need for remapping as {@code buckets} grows. That is, {@code 599 * consistentHash(h, n)} equals: 600 * 601 * <ul> 602 * <li>{@code n - 1}, with approximate probability {@code 1/n} 603 * <li>{@code consistentHash(h, n - 1)}, otherwise (probability {@code 1 - 1/n}) 604 * </ul> 605 * 606 * <p>This method is suitable for the common use case of dividing work among buckets that meet the 607 * following conditions: 608 * 609 * <ul> 610 * <li>You want to assign the same fraction of inputs to each bucket. 611 * <li>When you reduce the number of buckets, you can accept that the most recently added 612 * buckets will be removed first. More concretely, if you are dividing traffic among tasks, 613 * you can decrease the number of tasks from 15 and 10, killing off the final 5 tasks, and 614 * {@code consistentHash} will handle it. If, however, you are dividing traffic among 615 * servers {@code alpha}, {@code bravo}, and {@code charlie} and you occasionally need to 616 * take each of the servers offline, {@code consistentHash} will be a poor fit: It provides 617 * no way for you to specify which of the three buckets is disappearing. Thus, if your 618 * buckets change from {@code [alpha, bravo, charlie]} to {@code [bravo, charlie]}, it will 619 * assign all the old {@code alpha} traffic to {@code bravo} and all the old {@code bravo} 620 * traffic to {@code charlie}, rather than letting {@code bravo} keep its traffic. 621 * </ul> 622 * 623 * <p>See the <a href="http://en.wikipedia.org/wiki/Consistent_hashing">Wikipedia article on 624 * consistent hashing</a> for more information. 625 */ 626 public static int consistentHash(HashCode hashCode, int buckets) { 627 return consistentHash(hashCode.padToLong(), buckets); 628 } 629 630 /** 631 * Assigns to {@code input} a "bucket" in the range {@code [0, buckets)}, in a uniform manner that 632 * minimizes the need for remapping as {@code buckets} grows. That is, {@code consistentHash(h, 633 * n)} equals: 634 * 635 * <ul> 636 * <li>{@code n - 1}, with approximate probability {@code 1/n} 637 * <li>{@code consistentHash(h, n - 1)}, otherwise (probability {@code 1 - 1/n}) 638 * </ul> 639 * 640 * <p>This method is suitable for the common use case of dividing work among buckets that meet the 641 * following conditions: 642 * 643 * <ul> 644 * <li>You want to assign the same fraction of inputs to each bucket. 645 * <li>When you reduce the number of buckets, you can accept that the most recently added 646 * buckets will be removed first. More concretely, if you are dividing traffic among tasks, 647 * you can decrease the number of tasks from 15 and 10, killing off the final 5 tasks, and 648 * {@code consistentHash} will handle it. If, however, you are dividing traffic among 649 * servers {@code alpha}, {@code bravo}, and {@code charlie} and you occasionally need to 650 * take each of the servers offline, {@code consistentHash} will be a poor fit: It provides 651 * no way for you to specify which of the three buckets is disappearing. Thus, if your 652 * buckets change from {@code [alpha, bravo, charlie]} to {@code [bravo, charlie]}, it will 653 * assign all the old {@code alpha} traffic to {@code bravo} and all the old {@code bravo} 654 * traffic to {@code charlie}, rather than letting {@code bravo} keep its traffic. 655 * </ul> 656 * 657 * <p>See the <a href="http://en.wikipedia.org/wiki/Consistent_hashing">Wikipedia article on 658 * consistent hashing</a> for more information. 659 */ 660 public static int consistentHash(long input, int buckets) { 661 checkArgument(buckets > 0, "buckets must be positive: %s", buckets); 662 LinearCongruentialGenerator generator = new LinearCongruentialGenerator(input); 663 int candidate = 0; 664 int next; 665 666 // Jump from bucket to bucket until we go out of range 667 while (true) { 668 next = (int) ((candidate + 1) / generator.nextDouble()); 669 if (next >= 0 && next < buckets) { 670 candidate = next; 671 } else { 672 return candidate; 673 } 674 } 675 } 676 677 /** 678 * Returns a hash code, having the same bit length as each of the input hash codes, that combines 679 * the information of these hash codes in an ordered fashion. That is, whenever two equal hash 680 * codes are produced by two calls to this method, it is <i>as likely as possible</i> that each 681 * was computed from the <i>same</i> input hash codes in the <i>same</i> order. 682 * 683 * @throws IllegalArgumentException if {@code hashCodes} is empty, or the hash codes do not all 684 * have the same bit length 685 */ 686 public static HashCode combineOrdered(Iterable<HashCode> hashCodes) { 687 Iterator<HashCode> iterator = hashCodes.iterator(); 688 checkArgument(iterator.hasNext(), "Must be at least 1 hash code to combine."); 689 int bits = iterator.next().bits(); 690 byte[] resultBytes = new byte[bits / 8]; 691 for (HashCode hashCode : hashCodes) { 692 byte[] nextBytes = hashCode.asBytes(); 693 checkArgument( 694 nextBytes.length == resultBytes.length, "All hashcodes must have the same bit length."); 695 for (int i = 0; i < nextBytes.length; i++) { 696 resultBytes[i] = (byte) (resultBytes[i] * 37 ^ nextBytes[i]); 697 } 698 } 699 return HashCode.fromBytesNoCopy(resultBytes); 700 } 701 702 /** 703 * Returns a hash code, having the same bit length as each of the input hash codes, that combines 704 * the information of these hash codes in an unordered fashion. That is, whenever two equal hash 705 * codes are produced by two calls to this method, it is <i>as likely as possible</i> that each 706 * was computed from the <i>same</i> input hash codes in <i>some</i> order. 707 * 708 * @throws IllegalArgumentException if {@code hashCodes} is empty, or the hash codes do not all 709 * have the same bit length 710 */ 711 public static HashCode combineUnordered(Iterable<HashCode> hashCodes) { 712 Iterator<HashCode> iterator = hashCodes.iterator(); 713 checkArgument(iterator.hasNext(), "Must be at least 1 hash code to combine."); 714 byte[] resultBytes = new byte[iterator.next().bits() / 8]; 715 for (HashCode hashCode : hashCodes) { 716 byte[] nextBytes = hashCode.asBytes(); 717 checkArgument( 718 nextBytes.length == resultBytes.length, "All hashcodes must have the same bit length."); 719 for (int i = 0; i < nextBytes.length; i++) { 720 resultBytes[i] += nextBytes[i]; 721 } 722 } 723 return HashCode.fromBytesNoCopy(resultBytes); 724 } 725 726 /** Checks that the passed argument is positive, and ceils it to a multiple of 32. */ 727 static int checkPositiveAndMakeMultipleOf32(int bits) { 728 checkArgument(bits > 0, "Number of bits must be positive"); 729 return (bits + 31) & ~31; 730 } 731 732 /** 733 * Returns a hash function which computes its hash code by concatenating the hash codes of the 734 * underlying hash functions together. This can be useful if you need to generate hash codes of a 735 * specific length. 736 * 737 * <p>For example, if you need 1024-bit hash codes, you could join two {@link Hashing#sha512} hash 738 * functions together: {@code Hashing.concatenating(Hashing.sha512(), Hashing.sha512())}. 739 * 740 * @since 19.0 741 */ 742 public static HashFunction concatenating( 743 HashFunction first, HashFunction second, HashFunction... rest) { 744 // We can't use Lists.asList() here because there's no hash->collect dependency 745 List<HashFunction> list = new ArrayList<>(); 746 list.add(first); 747 list.add(second); 748 Collections.addAll(list, rest); 749 return new ConcatenatedHashFunction(list.toArray(new HashFunction[0])); 750 } 751 752 /** 753 * Returns a hash function which computes its hash code by concatenating the hash codes of the 754 * underlying hash functions together. This can be useful if you need to generate hash codes of a 755 * specific length. 756 * 757 * <p>For example, if you need 1024-bit hash codes, you could join two {@link Hashing#sha512} hash 758 * functions together: {@code Hashing.concatenating(Hashing.sha512(), Hashing.sha512())}. 759 * 760 * @since 19.0 761 */ 762 public static HashFunction concatenating(Iterable<HashFunction> hashFunctions) { 763 checkNotNull(hashFunctions); 764 // We can't use Iterables.toArray() here because there's no hash->collect dependency 765 List<HashFunction> list = new ArrayList<>(); 766 for (HashFunction hashFunction : hashFunctions) { 767 list.add(hashFunction); 768 } 769 checkArgument(!list.isEmpty(), "number of hash functions (%s) must be > 0", list.size()); 770 return new ConcatenatedHashFunction(list.toArray(new HashFunction[0])); 771 } 772 773 private static final class ConcatenatedHashFunction extends AbstractCompositeHashFunction { 774 775 private ConcatenatedHashFunction(HashFunction... functions) { 776 super(functions); 777 for (HashFunction function : functions) { 778 checkArgument( 779 function.bits() % 8 == 0, 780 "the number of bits (%s) in hashFunction (%s) must be divisible by 8", 781 function.bits(), 782 function); 783 } 784 } 785 786 @Override 787 HashCode makeHash(Hasher[] hashers) { 788 byte[] bytes = new byte[bits() / 8]; 789 int i = 0; 790 for (Hasher hasher : hashers) { 791 HashCode newHash = hasher.hash(); 792 i += newHash.writeBytesTo(bytes, i, newHash.bits() / 8); 793 } 794 return HashCode.fromBytesNoCopy(bytes); 795 } 796 797 @Override 798 public int bits() { 799 int bitSum = 0; 800 for (HashFunction function : functions) { 801 bitSum += function.bits(); 802 } 803 return bitSum; 804 } 805 806 @Override 807 public boolean equals(@CheckForNull Object object) { 808 if (object instanceof ConcatenatedHashFunction) { 809 ConcatenatedHashFunction other = (ConcatenatedHashFunction) object; 810 return Arrays.equals(functions, other.functions); 811 } 812 return false; 813 } 814 815 @Override 816 public int hashCode() { 817 return Arrays.hashCode(functions); 818 } 819 } 820 821 /** 822 * Linear CongruentialGenerator to use for consistent hashing. See 823 * http://en.wikipedia.org/wiki/Linear_congruential_generator 824 */ 825 private static final class LinearCongruentialGenerator { 826 private long state; 827 828 public LinearCongruentialGenerator(long seed) { 829 this.state = seed; 830 } 831 832 public double nextDouble() { 833 state = 2862933555777941757L * state + 1; 834 return ((double) ((int) (state >>> 33) + 1)) / 0x1.0p31; 835 } 836 } 837 838 private Hashing() {} 839}