001/* 002 * Copyright (C) 2011 The Guava Authors 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 005 * in compliance with the License. You may obtain a copy of the License at 006 * 007 * http://www.apache.org/licenses/LICENSE-2.0 008 * 009 * Unless required by applicable law or agreed to in writing, software distributed under the License 010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 011 * or implied. See the License for the specific language governing permissions and limitations under 012 * the License. 013 */ 014 015package com.google.common.hash; 016 017import static com.google.common.base.Preconditions.checkArgument; 018import static com.google.common.base.Preconditions.checkNotNull; 019import static com.google.common.base.Throwables.throwIfUnchecked; 020import static java.lang.invoke.MethodType.methodType; 021 022import com.google.errorprone.annotations.Immutable; 023import com.google.j2objc.annotations.J2ObjCIncompatible; 024import java.lang.invoke.MethodHandle; 025import java.lang.invoke.MethodHandles; 026import java.lang.reflect.UndeclaredThrowableException; 027import java.security.Key; 028import java.util.ArrayList; 029import java.util.Arrays; 030import java.util.Collections; 031import java.util.Iterator; 032import java.util.List; 033import java.util.zip.Adler32; 034import java.util.zip.CRC32; 035import java.util.zip.Checksum; 036import javax.annotation.CheckForNull; 037import javax.crypto.spec.SecretKeySpec; 038 039/** 040 * Static methods to obtain {@link HashFunction} instances, and other static hashing-related 041 * utilities. 042 * 043 * <p>A comparison of the various hash functions can be found <a 044 * href="https://docs.google.com/spreadsheets/d/1_q2EVcxA2HjcrlVMbaqXwMj31h9M5-Bqj_m8vITOwwk/">here</a>. 045 * 046 * @author Kevin Bourrillion 047 * @author Dimitris Andreou 048 * @author Kurt Alfred Kluever 049 * @since 11.0 050 */ 051@ElementTypesAreNonnullByDefault 052public final class Hashing { 053 /** 054 * Returns a general-purpose, <b>temporary-use</b>, non-cryptographic hash function. The algorithm 055 * the returned function implements is unspecified and subject to change without notice. 056 * 057 * <p><b>Warning:</b> a new random seed for these functions is chosen each time the {@code 058 * Hashing} class is loaded. <b>Do not use this method</b> if hash codes may escape the current 059 * process in any way, for example being sent over RPC, or saved to disk. For a general-purpose, 060 * non-cryptographic hash function that will never change behavior, we suggest {@link 061 * #murmur3_128}. 062 * 063 * <p>Repeated calls to this method on the same loaded {@code Hashing} class, using the same value 064 * for {@code minimumBits}, will return identically-behaving {@link HashFunction} instances. 065 * 066 * @param minimumBits a positive integer. This can be arbitrarily large. The returned {@link 067 * HashFunction} instance may use memory proportional to this integer. 068 * @return a hash function, described above, that produces hash codes of length {@code 069 * minimumBits} or greater 070 */ 071 public static HashFunction goodFastHash(int minimumBits) { 072 int bits = checkPositiveAndMakeMultipleOf32(minimumBits); 073 074 if (bits == 32) { 075 return Murmur3_32HashFunction.GOOD_FAST_HASH_32; 076 } 077 if (bits <= 128) { 078 return Murmur3_128HashFunction.GOOD_FAST_HASH_128; 079 } 080 081 // Otherwise, join together some 128-bit murmur3s 082 int hashFunctionsNeeded = (bits + 127) / 128; 083 HashFunction[] hashFunctions = new HashFunction[hashFunctionsNeeded]; 084 hashFunctions[0] = Murmur3_128HashFunction.GOOD_FAST_HASH_128; 085 int seed = GOOD_FAST_HASH_SEED; 086 for (int i = 1; i < hashFunctionsNeeded; i++) { 087 seed += 1500450271; // a prime; shouldn't matter 088 hashFunctions[i] = murmur3_128(seed); 089 } 090 return new ConcatenatedHashFunction(hashFunctions); 091 } 092 093 /** 094 * Used to randomize {@link #goodFastHash} instances, so that programs which persist anything 095 * dependent on the hash codes they produce will fail sooner. 096 */ 097 @SuppressWarnings("GoodTime") // reading system time without TimeSource 098 static final int GOOD_FAST_HASH_SEED = (int) System.currentTimeMillis(); 099 100 /** 101 * Returns a hash function implementing the <a 102 * href="https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp">32-bit murmur3 103 * algorithm, x86 variant</a> (little-endian variant), using the given seed value, <b>with a known 104 * bug</b> as described in the deprecation text. 105 * 106 * <p>The C++ equivalent is the MurmurHash3_x86_32 function (Murmur3A), which however does not 107 * have the bug. 108 * 109 * @deprecated This implementation produces incorrect hash values from the {@link 110 * HashFunction#hashString} method if the string contains non-BMP characters. Use {@link 111 * #murmur3_32_fixed(int)} instead. 112 */ 113 @Deprecated 114 public static HashFunction murmur3_32(int seed) { 115 return new Murmur3_32HashFunction(seed, /* supplementaryPlaneFix= */ false); 116 } 117 118 /** 119 * Returns a hash function implementing the <a 120 * href="https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp">32-bit murmur3 121 * algorithm, x86 variant</a> (little-endian variant), using the given seed value, <b>with a known 122 * bug</b> as described in the deprecation text. 123 * 124 * <p>The C++ equivalent is the MurmurHash3_x86_32 function (Murmur3A), which however does not 125 * have the bug. 126 * 127 * @deprecated This implementation produces incorrect hash values from the {@link 128 * HashFunction#hashString} method if the string contains non-BMP characters. Use {@link 129 * #murmur3_32_fixed()} instead. 130 */ 131 @Deprecated 132 public static HashFunction murmur3_32() { 133 return Murmur3_32HashFunction.MURMUR3_32; 134 } 135 136 /** 137 * Returns a hash function implementing the <a 138 * href="https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp">32-bit murmur3 139 * algorithm, x86 variant</a> (little-endian variant), using the given seed value. 140 * 141 * <p>The exact C++ equivalent is the MurmurHash3_x86_32 function (Murmur3A). 142 * 143 * <p>This method is called {@code murmur3_32_fixed} because it fixes a bug in the {@code 144 * HashFunction} returned by the original {@code murmur3_32} method. 145 * 146 * @since 31.0 147 */ 148 public static HashFunction murmur3_32_fixed(int seed) { 149 return new Murmur3_32HashFunction(seed, /* supplementaryPlaneFix= */ true); 150 } 151 152 /** 153 * Returns a hash function implementing the <a 154 * href="https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp">32-bit murmur3 155 * algorithm, x86 variant</a> (little-endian variant), using a seed value of zero. 156 * 157 * <p>The exact C++ equivalent is the MurmurHash3_x86_32 function (Murmur3A). 158 * 159 * <p>This method is called {@code murmur3_32_fixed} because it fixes a bug in the {@code 160 * HashFunction} returned by the original {@code murmur3_32} method. 161 * 162 * @since 31.0 163 */ 164 public static HashFunction murmur3_32_fixed() { 165 return Murmur3_32HashFunction.MURMUR3_32_FIXED; 166 } 167 168 /** 169 * Returns a hash function implementing the <a 170 * href="https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp">128-bit murmur3 171 * algorithm, x64 variant</a> (little-endian variant), using the given seed value. 172 * 173 * <p>The exact C++ equivalent is the MurmurHash3_x64_128 function (Murmur3F). 174 */ 175 public static HashFunction murmur3_128(int seed) { 176 return new Murmur3_128HashFunction(seed); 177 } 178 179 /** 180 * Returns a hash function implementing the <a 181 * href="https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp">128-bit murmur3 182 * algorithm, x64 variant</a> (little-endian variant), using a seed value of zero. 183 * 184 * <p>The exact C++ equivalent is the MurmurHash3_x64_128 function (Murmur3F). 185 */ 186 public static HashFunction murmur3_128() { 187 return Murmur3_128HashFunction.MURMUR3_128; 188 } 189 190 /** 191 * Returns a hash function implementing the <a href="https://131002.net/siphash/">64-bit 192 * SipHash-2-4 algorithm</a> using a seed value of {@code k = 00 01 02 ...}. 193 * 194 * @since 15.0 195 */ 196 public static HashFunction sipHash24() { 197 return SipHashFunction.SIP_HASH_24; 198 } 199 200 /** 201 * Returns a hash function implementing the <a href="https://131002.net/siphash/">64-bit 202 * SipHash-2-4 algorithm</a> using the given seed. 203 * 204 * @since 15.0 205 */ 206 public static HashFunction sipHash24(long k0, long k1) { 207 return new SipHashFunction(2, 4, k0, k1); 208 } 209 210 /** 211 * Returns a hash function implementing the MD5 hash algorithm (128 hash bits). 212 * 213 * @deprecated If you must interoperate with a system that requires MD5, then use this method, 214 * despite its deprecation. But if you can choose your hash function, avoid MD5, which is 215 * neither fast nor secure. As of January 2017, we suggest: 216 * <ul> 217 * <li>For security: 218 * {@link Hashing#sha256} or a higher-level API. 219 * <li>For speed: {@link Hashing#goodFastHash}, though see its docs for caveats. 220 * </ul> 221 */ 222 @Deprecated 223 public static HashFunction md5() { 224 return Md5Holder.MD5; 225 } 226 227 private static class Md5Holder { 228 static final HashFunction MD5 = new MessageDigestHashFunction("MD5", "Hashing.md5()"); 229 } 230 231 /** 232 * Returns a hash function implementing the SHA-1 algorithm (160 hash bits). 233 * 234 * @deprecated If you must interoperate with a system that requires SHA-1, then use this method, 235 * despite its deprecation. But if you can choose your hash function, avoid SHA-1, which is 236 * neither fast nor secure. As of January 2017, we suggest: 237 * <ul> 238 * <li>For security: 239 * {@link Hashing#sha256} or a higher-level API. 240 * <li>For speed: {@link Hashing#goodFastHash}, though see its docs for caveats. 241 * </ul> 242 */ 243 @Deprecated 244 public static HashFunction sha1() { 245 return Sha1Holder.SHA_1; 246 } 247 248 private static class Sha1Holder { 249 static final HashFunction SHA_1 = new MessageDigestHashFunction("SHA-1", "Hashing.sha1()"); 250 } 251 252 /** Returns a hash function implementing the SHA-256 algorithm (256 hash bits). */ 253 public static HashFunction sha256() { 254 return Sha256Holder.SHA_256; 255 } 256 257 private static class Sha256Holder { 258 static final HashFunction SHA_256 = 259 new MessageDigestHashFunction("SHA-256", "Hashing.sha256()"); 260 } 261 262 /** 263 * Returns a hash function implementing the SHA-384 algorithm (384 hash bits). 264 * 265 * @since 19.0 266 */ 267 public static HashFunction sha384() { 268 return Sha384Holder.SHA_384; 269 } 270 271 private static class Sha384Holder { 272 static final HashFunction SHA_384 = 273 new MessageDigestHashFunction("SHA-384", "Hashing.sha384()"); 274 } 275 276 /** Returns a hash function implementing the SHA-512 algorithm (512 hash bits). */ 277 public static HashFunction sha512() { 278 return Sha512Holder.SHA_512; 279 } 280 281 private static class Sha512Holder { 282 static final HashFunction SHA_512 = 283 new MessageDigestHashFunction("SHA-512", "Hashing.sha512()"); 284 } 285 286 /** 287 * Returns a hash function implementing the Message Authentication Code (MAC) algorithm, using the 288 * MD5 (128 hash bits) hash function and the given secret key. 289 * 290 * <p>If you are designing a new system that needs HMAC, prefer {@link #hmacSha256} or other 291 * future-proof algorithms <a 292 * href="https://datatracker.ietf.org/doc/html/rfc6151#section-2.3">over {@code hmacMd5}</a>. 293 * 294 * @param key the secret key 295 * @throws IllegalArgumentException if the given key is inappropriate for initializing this MAC 296 * @since 20.0 297 */ 298 public static HashFunction hmacMd5(Key key) { 299 return new MacHashFunction("HmacMD5", key, hmacToString("hmacMd5", key)); 300 } 301 302 /** 303 * Returns a hash function implementing the Message Authentication Code (MAC) algorithm, using the 304 * MD5 (128 hash bits) hash function and a {@link SecretKeySpec} created from the given byte array 305 * and the MD5 algorithm. 306 * 307 * <p>If you are designing a new system that needs HMAC, prefer {@link #hmacSha256} or other 308 * future-proof algorithms <a 309 * href="https://datatracker.ietf.org/doc/html/rfc6151#section-2.3">over {@code hmacMd5}</a>. 310 * 311 * @param key the key material of the secret key 312 * @since 20.0 313 */ 314 public static HashFunction hmacMd5(byte[] key) { 315 return hmacMd5(new SecretKeySpec(checkNotNull(key), "HmacMD5")); 316 } 317 318 /** 319 * Returns a hash function implementing the Message Authentication Code (MAC) algorithm, using the 320 * SHA-1 (160 hash bits) hash function and the given secret key. 321 * 322 * @param key the secret key 323 * @throws IllegalArgumentException if the given key is inappropriate for initializing this MAC 324 * @since 20.0 325 */ 326 public static HashFunction hmacSha1(Key key) { 327 return new MacHashFunction("HmacSHA1", key, hmacToString("hmacSha1", key)); 328 } 329 330 /** 331 * Returns a hash function implementing the Message Authentication Code (MAC) algorithm, using the 332 * SHA-1 (160 hash bits) hash function and a {@link SecretKeySpec} created from the given byte 333 * array and the SHA-1 algorithm. 334 * 335 * @param key the key material of the secret key 336 * @since 20.0 337 */ 338 public static HashFunction hmacSha1(byte[] key) { 339 return hmacSha1(new SecretKeySpec(checkNotNull(key), "HmacSHA1")); 340 } 341 342 /** 343 * Returns a hash function implementing the Message Authentication Code (MAC) algorithm, using the 344 * SHA-256 (256 hash bits) hash function and the given secret key. 345 * 346 * @param key the secret key 347 * @throws IllegalArgumentException if the given key is inappropriate for initializing this MAC 348 * @since 20.0 349 */ 350 public static HashFunction hmacSha256(Key key) { 351 return new MacHashFunction("HmacSHA256", key, hmacToString("hmacSha256", key)); 352 } 353 354 /** 355 * Returns a hash function implementing the Message Authentication Code (MAC) algorithm, using the 356 * SHA-256 (256 hash bits) hash function and a {@link SecretKeySpec} created from the given byte 357 * array and the SHA-256 algorithm. 358 * 359 * @param key the key material of the secret key 360 * @since 20.0 361 */ 362 public static HashFunction hmacSha256(byte[] key) { 363 return hmacSha256(new SecretKeySpec(checkNotNull(key), "HmacSHA256")); 364 } 365 366 /** 367 * Returns a hash function implementing the Message Authentication Code (MAC) algorithm, using the 368 * SHA-512 (512 hash bits) hash function and the given secret key. 369 * 370 * @param key the secret key 371 * @throws IllegalArgumentException if the given key is inappropriate for initializing this MAC 372 * @since 20.0 373 */ 374 public static HashFunction hmacSha512(Key key) { 375 return new MacHashFunction("HmacSHA512", key, hmacToString("hmacSha512", key)); 376 } 377 378 /** 379 * Returns a hash function implementing the Message Authentication Code (MAC) algorithm, using the 380 * SHA-512 (512 hash bits) hash function and a {@link SecretKeySpec} created from the given byte 381 * array and the SHA-512 algorithm. 382 * 383 * @param key the key material of the secret key 384 * @since 20.0 385 */ 386 public static HashFunction hmacSha512(byte[] key) { 387 return hmacSha512(new SecretKeySpec(checkNotNull(key), "HmacSHA512")); 388 } 389 390 private static String hmacToString(String methodName, Key key) { 391 return "Hashing." 392 + methodName 393 + "(Key[algorithm=" 394 + key.getAlgorithm() 395 + ", format=" 396 + key.getFormat() 397 + "])"; 398 } 399 400 /** 401 * Returns a hash function implementing the CRC32C checksum algorithm (32 hash bits) as described 402 * by RFC 3720, Section 12.1. 403 * 404 * <p>This function is best understood as a <a 405 * href="https://en.wikipedia.org/wiki/Checksum">checksum</a> rather than a true <a 406 * href="https://en.wikipedia.org/wiki/Hash_function">hash function</a>. 407 * 408 * @since 18.0 409 */ 410 public static HashFunction crc32c() { 411 return Crc32CSupplier.HASH_FUNCTION; 412 } 413 414 @Immutable 415 private enum Crc32CSupplier implements ImmutableSupplier<HashFunction> { 416 @J2ObjCIncompatible 417 JAVA_UTIL_ZIP { 418 @Override 419 public HashFunction get() { 420 return ChecksumType.CRC_32C.hashFunction; 421 } 422 }, 423 ABSTRACT_HASH_FUNCTION { 424 @Override 425 public HashFunction get() { 426 return Crc32cHashFunction.CRC_32_C; 427 } 428 }; 429 430 static final HashFunction HASH_FUNCTION = pickFunction().get(); 431 432 private static Crc32CSupplier pickFunction() { 433 Crc32CSupplier[] functions = values(); 434 435 if (functions.length == 1) { 436 // We're running under J2ObjC. 437 return functions[0]; 438 } 439 440 // We can't refer to JAVA_UTIL_ZIP directly at compile time because of J2ObjC. 441 Crc32CSupplier javaUtilZip = functions[0]; 442 443 try { 444 Class.forName("java.util.zip.CRC32C"); 445 return javaUtilZip; 446 } catch (ClassNotFoundException runningUnderJava8) { 447 return ABSTRACT_HASH_FUNCTION; 448 } 449 } 450 } 451 452 /** 453 * Returns a hash function implementing the CRC-32 checksum algorithm (32 hash bits). 454 * 455 * <p>To get the {@code long} value equivalent to {@link Checksum#getValue()} for a {@code 456 * HashCode} produced by this function, use {@link HashCode#padToLong()}. 457 * 458 * <p>This function is best understood as a <a 459 * href="https://en.wikipedia.org/wiki/Checksum">checksum</a> rather than a true <a 460 * href="https://en.wikipedia.org/wiki/Hash_function">hash function</a>. 461 * 462 * @since 14.0 463 */ 464 public static HashFunction crc32() { 465 return ChecksumType.CRC_32.hashFunction; 466 } 467 468 /** 469 * Returns a hash function implementing the Adler-32 checksum algorithm (32 hash bits). 470 * 471 * <p>To get the {@code long} value equivalent to {@link Checksum#getValue()} for a {@code 472 * HashCode} produced by this function, use {@link HashCode#padToLong()}. 473 * 474 * <p>This function is best understood as a <a 475 * href="https://en.wikipedia.org/wiki/Checksum">checksum</a> rather than a true <a 476 * href="https://en.wikipedia.org/wiki/Hash_function">hash function</a>. 477 * 478 * @since 14.0 479 */ 480 public static HashFunction adler32() { 481 return ChecksumType.ADLER_32.hashFunction; 482 } 483 484 @Immutable 485 enum ChecksumType implements ImmutableSupplier<Checksum> { 486 CRC_32("Hashing.crc32()") { 487 @Override 488 public Checksum get() { 489 return new CRC32(); 490 } 491 }, 492 @J2ObjCIncompatible 493 CRC_32C("Hashing.crc32c()") { 494 @Override 495 public Checksum get() { 496 return Crc32cMethodHandles.newCrc32c(); 497 } 498 }, 499 ADLER_32("Hashing.adler32()") { 500 @Override 501 public Checksum get() { 502 return new Adler32(); 503 } 504 }; 505 506 public final HashFunction hashFunction; 507 508 ChecksumType(String toString) { 509 this.hashFunction = new ChecksumHashFunction(this, 32, toString); 510 } 511 } 512 513 @J2ObjCIncompatible 514 @SuppressWarnings("unused") 515 private static final class Crc32cMethodHandles { 516 private static final MethodHandle CONSTRUCTOR = crc32cConstructor(); 517 518 @IgnoreJRERequirement // https://github.com/mojohaus/animal-sniffer/issues/67 519 static Checksum newCrc32c() { 520 try { 521 return (Checksum) CONSTRUCTOR.invokeExact(); 522 } catch (Throwable e) { 523 throwIfUnchecked(e); 524 // This should be impossible, since the constructor has no `throws` clause. 525 throw new UndeclaredThrowableException(e); 526 } 527 } 528 529 private static MethodHandle crc32cConstructor() { 530 try { 531 Class<?> clazz = Class.forName("java.util.zip.CRC32C"); 532 /* 533 * We can't cast to CRC32C at the call site because we support building with Java 8 534 * (https://github.com/google/guava/issues/6549). So we have to use asType() to change from 535 * CRC32C to Checksum. This may carry some performance cost 536 * (https://stackoverflow.com/a/22321671/28465), but I'd have to benchmark more carefully to 537 * even detect it. 538 */ 539 return MethodHandles.lookup() 540 .findConstructor(clazz, methodType(void.class)) 541 .asType(methodType(Checksum.class)); 542 } catch (ClassNotFoundException e) { 543 // We check that the class is available before calling this method. 544 throw new AssertionError(e); 545 } catch (IllegalAccessException e) { 546 // That API is public. 547 throw newLinkageError(e); 548 } catch (NoSuchMethodException e) { 549 // That constructor exists. 550 throw newLinkageError(e); 551 } 552 } 553 554 private static LinkageError newLinkageError(Throwable cause) { 555 return new LinkageError(cause.toString(), cause); 556 } 557 } 558 559 /** 560 * Returns a hash function implementing FarmHash's Fingerprint64, an open-source algorithm. 561 * 562 * <p>This is designed for generating persistent fingerprints of strings. It isn't 563 * cryptographically secure, but it produces a high-quality hash with fewer collisions than some 564 * alternatives we've used in the past. 565 * 566 * <p>FarmHash fingerprints are encoded by {@link HashCode#asBytes} in little-endian order. This 567 * means {@link HashCode#asLong} is guaranteed to return the same value that 568 * farmhash::Fingerprint64() would for the same input (when compared using {@link 569 * com.google.common.primitives.UnsignedLongs}'s encoding of 64-bit unsigned numbers). 570 * 571 * <p>This function is best understood as a <a 572 * href="https://en.wikipedia.org/wiki/Fingerprint_(computing)">fingerprint</a> rather than a true 573 * <a href="https://en.wikipedia.org/wiki/Hash_function">hash function</a>. 574 * 575 * @since 20.0 576 */ 577 public static HashFunction farmHashFingerprint64() { 578 return FarmHashFingerprint64.FARMHASH_FINGERPRINT_64; 579 } 580 581 /** 582 * Returns a hash function implementing the Fingerprint2011 hashing function (64 hash bits). 583 * 584 * <p>This is designed for generating persistent fingerprints of strings. It isn't 585 * cryptographically secure, but it produces a high-quality hash with few collisions. Fingerprints 586 * generated using this are byte-wise identical to those created using the C++ version, but note 587 * that this uses unsigned integers (see {@link com.google.common.primitives.UnsignedInts}). 588 * Comparisons between the two should take this into account. 589 * 590 * <p>Fingerprint2011() is a form of Murmur2 on strings up to 32 bytes and a form of CityHash for 591 * longer strings. It could have been one or the other throughout. The main advantage of the 592 * combination is that CityHash has a bunch of special cases for short strings that don't need to 593 * be replicated here. The result will never be 0 or 1. 594 * 595 * <p>This function is best understood as a <a 596 * href="https://en.wikipedia.org/wiki/Fingerprint_(computing)">fingerprint</a> rather than a true 597 * <a href="https://en.wikipedia.org/wiki/Hash_function">hash function</a>. 598 * 599 * @since 31.1 600 */ 601 public static HashFunction fingerprint2011() { 602 return Fingerprint2011.FINGERPRINT_2011; 603 } 604 605 /** 606 * Assigns to {@code hashCode} a "bucket" in the range {@code [0, buckets)}, in a uniform manner 607 * that minimizes the need for remapping as {@code buckets} grows. That is, {@code 608 * consistentHash(h, n)} equals: 609 * 610 * <ul> 611 * <li>{@code n - 1}, with approximate probability {@code 1/n} 612 * <li>{@code consistentHash(h, n - 1)}, otherwise (probability {@code 1 - 1/n}) 613 * </ul> 614 * 615 * <p>This method is suitable for the common use case of dividing work among buckets that meet the 616 * following conditions: 617 * 618 * <ul> 619 * <li>You want to assign the same fraction of inputs to each bucket. 620 * <li>When you reduce the number of buckets, you can accept that the most recently added 621 * buckets will be removed first. More concretely, if you are dividing traffic among tasks, 622 * you can decrease the number of tasks from 15 and 10, killing off the final 5 tasks, and 623 * {@code consistentHash} will handle it. If, however, you are dividing traffic among 624 * servers {@code alpha}, {@code bravo}, and {@code charlie} and you occasionally need to 625 * take each of the servers offline, {@code consistentHash} will be a poor fit: It provides 626 * no way for you to specify which of the three buckets is disappearing. Thus, if your 627 * buckets change from {@code [alpha, bravo, charlie]} to {@code [bravo, charlie]}, it will 628 * assign all the old {@code alpha} traffic to {@code bravo} and all the old {@code bravo} 629 * traffic to {@code charlie}, rather than letting {@code bravo} keep its traffic. 630 * </ul> 631 * 632 * <p>See the <a href="http://en.wikipedia.org/wiki/Consistent_hashing">Wikipedia article on 633 * consistent hashing</a> for more information. 634 */ 635 public static int consistentHash(HashCode hashCode, int buckets) { 636 return consistentHash(hashCode.padToLong(), buckets); 637 } 638 639 /** 640 * Assigns to {@code input} a "bucket" in the range {@code [0, buckets)}, in a uniform manner that 641 * minimizes the need for remapping as {@code buckets} grows. That is, {@code consistentHash(h, 642 * n)} equals: 643 * 644 * <ul> 645 * <li>{@code n - 1}, with approximate probability {@code 1/n} 646 * <li>{@code consistentHash(h, n - 1)}, otherwise (probability {@code 1 - 1/n}) 647 * </ul> 648 * 649 * <p>This method is suitable for the common use case of dividing work among buckets that meet the 650 * following conditions: 651 * 652 * <ul> 653 * <li>You want to assign the same fraction of inputs to each bucket. 654 * <li>When you reduce the number of buckets, you can accept that the most recently added 655 * buckets will be removed first. More concretely, if you are dividing traffic among tasks, 656 * you can decrease the number of tasks from 15 and 10, killing off the final 5 tasks, and 657 * {@code consistentHash} will handle it. If, however, you are dividing traffic among 658 * servers {@code alpha}, {@code bravo}, and {@code charlie} and you occasionally need to 659 * take each of the servers offline, {@code consistentHash} will be a poor fit: It provides 660 * no way for you to specify which of the three buckets is disappearing. Thus, if your 661 * buckets change from {@code [alpha, bravo, charlie]} to {@code [bravo, charlie]}, it will 662 * assign all the old {@code alpha} traffic to {@code bravo} and all the old {@code bravo} 663 * traffic to {@code charlie}, rather than letting {@code bravo} keep its traffic. 664 * </ul> 665 * 666 * <p>See the <a href="http://en.wikipedia.org/wiki/Consistent_hashing">Wikipedia article on 667 * consistent hashing</a> for more information. 668 */ 669 public static int consistentHash(long input, int buckets) { 670 checkArgument(buckets > 0, "buckets must be positive: %s", buckets); 671 LinearCongruentialGenerator generator = new LinearCongruentialGenerator(input); 672 int candidate = 0; 673 int next; 674 675 // Jump from bucket to bucket until we go out of range 676 while (true) { 677 next = (int) ((candidate + 1) / generator.nextDouble()); 678 if (next >= 0 && next < buckets) { 679 candidate = next; 680 } else { 681 return candidate; 682 } 683 } 684 } 685 686 /** 687 * Returns a hash code, having the same bit length as each of the input hash codes, that combines 688 * the information of these hash codes in an ordered fashion. That is, whenever two equal hash 689 * codes are produced by two calls to this method, it is <i>as likely as possible</i> that each 690 * was computed from the <i>same</i> input hash codes in the <i>same</i> order. 691 * 692 * @throws IllegalArgumentException if {@code hashCodes} is empty, or the hash codes do not all 693 * have the same bit length 694 */ 695 public static HashCode combineOrdered(Iterable<HashCode> hashCodes) { 696 Iterator<HashCode> iterator = hashCodes.iterator(); 697 checkArgument(iterator.hasNext(), "Must be at least 1 hash code to combine."); 698 int bits = iterator.next().bits(); 699 byte[] resultBytes = new byte[bits / 8]; 700 for (HashCode hashCode : hashCodes) { 701 byte[] nextBytes = hashCode.asBytes(); 702 checkArgument( 703 nextBytes.length == resultBytes.length, "All hashcodes must have the same bit length."); 704 for (int i = 0; i < nextBytes.length; i++) { 705 resultBytes[i] = (byte) (resultBytes[i] * 37 ^ nextBytes[i]); 706 } 707 } 708 return HashCode.fromBytesNoCopy(resultBytes); 709 } 710 711 /** 712 * Returns a hash code, having the same bit length as each of the input hash codes, that combines 713 * the information of these hash codes in an unordered fashion. That is, whenever two equal hash 714 * codes are produced by two calls to this method, it is <i>as likely as possible</i> that each 715 * was computed from the <i>same</i> input hash codes in <i>some</i> order. 716 * 717 * @throws IllegalArgumentException if {@code hashCodes} is empty, or the hash codes do not all 718 * have the same bit length 719 */ 720 public static HashCode combineUnordered(Iterable<HashCode> hashCodes) { 721 Iterator<HashCode> iterator = hashCodes.iterator(); 722 checkArgument(iterator.hasNext(), "Must be at least 1 hash code to combine."); 723 byte[] resultBytes = new byte[iterator.next().bits() / 8]; 724 for (HashCode hashCode : hashCodes) { 725 byte[] nextBytes = hashCode.asBytes(); 726 checkArgument( 727 nextBytes.length == resultBytes.length, "All hashcodes must have the same bit length."); 728 for (int i = 0; i < nextBytes.length; i++) { 729 resultBytes[i] += nextBytes[i]; 730 } 731 } 732 return HashCode.fromBytesNoCopy(resultBytes); 733 } 734 735 /** Checks that the passed argument is positive, and ceils it to a multiple of 32. */ 736 static int checkPositiveAndMakeMultipleOf32(int bits) { 737 checkArgument(bits > 0, "Number of bits must be positive"); 738 return (bits + 31) & ~31; 739 } 740 741 /** 742 * Returns a hash function which computes its hash code by concatenating the hash codes of the 743 * underlying hash functions together. This can be useful if you need to generate hash codes of a 744 * specific length. 745 * 746 * <p>For example, if you need 1024-bit hash codes, you could join two {@link Hashing#sha512} hash 747 * functions together: {@code Hashing.concatenating(Hashing.sha512(), Hashing.sha512())}. 748 * 749 * @since 19.0 750 */ 751 public static HashFunction concatenating( 752 HashFunction first, HashFunction second, HashFunction... rest) { 753 // We can't use Lists.asList() here because there's no hash->collect dependency 754 List<HashFunction> list = new ArrayList<>(); 755 list.add(first); 756 list.add(second); 757 Collections.addAll(list, rest); 758 return new ConcatenatedHashFunction(list.toArray(new HashFunction[0])); 759 } 760 761 /** 762 * Returns a hash function which computes its hash code by concatenating the hash codes of the 763 * underlying hash functions together. This can be useful if you need to generate hash codes of a 764 * specific length. 765 * 766 * <p>For example, if you need 1024-bit hash codes, you could join two {@link Hashing#sha512} hash 767 * functions together: {@code Hashing.concatenating(Hashing.sha512(), Hashing.sha512())}. 768 * 769 * @since 19.0 770 */ 771 public static HashFunction concatenating(Iterable<HashFunction> hashFunctions) { 772 checkNotNull(hashFunctions); 773 // We can't use Iterables.toArray() here because there's no hash->collect dependency 774 List<HashFunction> list = new ArrayList<>(); 775 for (HashFunction hashFunction : hashFunctions) { 776 list.add(hashFunction); 777 } 778 checkArgument(!list.isEmpty(), "number of hash functions (%s) must be > 0", list.size()); 779 return new ConcatenatedHashFunction(list.toArray(new HashFunction[0])); 780 } 781 782 private static final class ConcatenatedHashFunction extends AbstractCompositeHashFunction { 783 784 private ConcatenatedHashFunction(HashFunction... functions) { 785 super(functions); 786 for (HashFunction function : functions) { 787 checkArgument( 788 function.bits() % 8 == 0, 789 "the number of bits (%s) in hashFunction (%s) must be divisible by 8", 790 function.bits(), 791 function); 792 } 793 } 794 795 @Override 796 HashCode makeHash(Hasher[] hashers) { 797 byte[] bytes = new byte[bits() / 8]; 798 int i = 0; 799 for (Hasher hasher : hashers) { 800 HashCode newHash = hasher.hash(); 801 i += newHash.writeBytesTo(bytes, i, newHash.bits() / 8); 802 } 803 return HashCode.fromBytesNoCopy(bytes); 804 } 805 806 @Override 807 public int bits() { 808 int bitSum = 0; 809 for (HashFunction function : functions) { 810 bitSum += function.bits(); 811 } 812 return bitSum; 813 } 814 815 @Override 816 public boolean equals(@CheckForNull Object object) { 817 if (object instanceof ConcatenatedHashFunction) { 818 ConcatenatedHashFunction other = (ConcatenatedHashFunction) object; 819 return Arrays.equals(functions, other.functions); 820 } 821 return false; 822 } 823 824 @Override 825 public int hashCode() { 826 return Arrays.hashCode(functions); 827 } 828 } 829 830 /** 831 * Linear CongruentialGenerator to use for consistent hashing. See 832 * http://en.wikipedia.org/wiki/Linear_congruential_generator 833 */ 834 private static final class LinearCongruentialGenerator { 835 private long state; 836 837 public LinearCongruentialGenerator(long seed) { 838 this.state = seed; 839 } 840 841 public double nextDouble() { 842 state = 2862933555777941757L * state + 1; 843 return ((double) ((int) (state >>> 33) + 1)) / 0x1.0p31; 844 } 845 } 846 847 private Hashing() {} 848}