001/* 002 * Copyright (C) 2011 The Guava Authors 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 005 * in compliance with the License. You may obtain a copy of the License at 006 * 007 * http://www.apache.org/licenses/LICENSE-2.0 008 * 009 * Unless required by applicable law or agreed to in writing, software distributed under the License 010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 011 * or implied. See the License for the specific language governing permissions and limitations under 012 * the License. 013 */ 014 015package com.google.common.hash; 016 017import static com.google.common.base.Preconditions.checkArgument; 018import static com.google.common.base.Preconditions.checkNotNull; 019import static com.google.common.hash.SneakyThrows.sneakyThrow; 020import static java.lang.invoke.MethodType.methodType; 021 022import com.google.errorprone.annotations.Immutable; 023import com.google.j2objc.annotations.J2ObjCIncompatible; 024import java.lang.invoke.MethodHandle; 025import java.lang.invoke.MethodHandles; 026import java.security.Key; 027import java.util.ArrayList; 028import java.util.Arrays; 029import java.util.Collections; 030import java.util.Iterator; 031import java.util.List; 032import java.util.zip.Adler32; 033import java.util.zip.CRC32; 034import java.util.zip.Checksum; 035import javax.crypto.spec.SecretKeySpec; 036import org.jspecify.annotations.Nullable; 037 038/** 039 * Static methods to obtain {@link HashFunction} instances, and other static hashing-related 040 * utilities. 041 * 042 * <p>A comparison of the various hash functions can be found <a 043 * href="https://docs.google.com/spreadsheets/d/1_q2EVcxA2HjcrlVMbaqXwMj31h9M5-Bqj_m8vITOwwk/">here</a>. 044 * 045 * @author Kevin Bourrillion 046 * @author Dimitris Andreou 047 * @author Kurt Alfred Kluever 048 * @since 11.0 049 */ 050public final class Hashing { 051 /** 052 * Returns a general-purpose, <b>temporary-use</b>, non-cryptographic hash function. The algorithm 053 * the returned function implements is unspecified and subject to change without notice. 054 * 055 * <p><b>Warning:</b> a new random seed for these functions is chosen each time the {@code 056 * Hashing} class is loaded. <b>Do not use this method</b> if hash codes may escape the current 057 * process in any way, for example being sent over RPC, or saved to disk. For a general-purpose, 058 * non-cryptographic hash function that will never change behavior, we suggest {@link 059 * #murmur3_128}. 060 * 061 * <p>Repeated calls to this method on the same loaded {@code Hashing} class, using the same value 062 * for {@code minimumBits}, will return identically-behaving {@link HashFunction} instances. 063 * 064 * @param minimumBits a positive integer. This can be arbitrarily large. The returned {@link 065 * HashFunction} instance may use memory proportional to this integer. 066 * @return a hash function, described above, that produces hash codes of length {@code 067 * minimumBits} or greater 068 */ 069 public static HashFunction goodFastHash(int minimumBits) { 070 int bits = checkPositiveAndMakeMultipleOf32(minimumBits); 071 072 if (bits == 32) { 073 return Murmur3_32HashFunction.GOOD_FAST_HASH_32; 074 } 075 if (bits <= 128) { 076 return Murmur3_128HashFunction.GOOD_FAST_HASH_128; 077 } 078 079 // Otherwise, join together some 128-bit murmur3s 080 int hashFunctionsNeeded = (bits + 127) / 128; 081 HashFunction[] hashFunctions = new HashFunction[hashFunctionsNeeded]; 082 hashFunctions[0] = Murmur3_128HashFunction.GOOD_FAST_HASH_128; 083 int seed = GOOD_FAST_HASH_SEED; 084 for (int i = 1; i < hashFunctionsNeeded; i++) { 085 seed += 1500450271; // a prime; shouldn't matter 086 hashFunctions[i] = murmur3_128(seed); 087 } 088 return new ConcatenatedHashFunction(hashFunctions); 089 } 090 091 /** 092 * Used to randomize {@link #goodFastHash} instances, so that programs which persist anything 093 * dependent on the hash codes they produce will fail sooner. 094 */ 095 @SuppressWarnings("GoodTime") // reading system time without TimeSource 096 static final int GOOD_FAST_HASH_SEED = (int) System.currentTimeMillis(); 097 098 /** 099 * Returns a hash function implementing the <a 100 * href="https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp">32-bit murmur3 101 * algorithm, x86 variant</a> (little-endian variant), using the given seed value, <b>with a known 102 * bug</b> as described in the deprecation text. 103 * 104 * <p>The C++ equivalent is the MurmurHash3_x86_32 function (Murmur3A), which however does not 105 * have the bug. 106 * 107 * @deprecated This implementation produces incorrect hash values from the {@link 108 * HashFunction#hashString} method if the string contains non-BMP characters. Use {@link 109 * #murmur3_32_fixed(int)} instead. 110 */ 111 @Deprecated 112 @SuppressWarnings("IdentifierName") // the best we could do for adjacent digit blocks 113 public static HashFunction murmur3_32(int seed) { 114 return new Murmur3_32HashFunction(seed, /* supplementaryPlaneFix= */ false); 115 } 116 117 /** 118 * Returns a hash function implementing the <a 119 * href="https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp">32-bit murmur3 120 * algorithm, x86 variant</a> (little-endian variant), using the given seed value, <b>with a known 121 * bug</b> as described in the deprecation text. 122 * 123 * <p>The C++ equivalent is the MurmurHash3_x86_32 function (Murmur3A), which however does not 124 * have the bug. 125 * 126 * @deprecated This implementation produces incorrect hash values from the {@link 127 * HashFunction#hashString} method if the string contains non-BMP characters. Use {@link 128 * #murmur3_32_fixed()} instead. 129 */ 130 @Deprecated 131 @SuppressWarnings("IdentifierName") // the best we could do for adjacent digit blocks 132 public static HashFunction murmur3_32() { 133 return Murmur3_32HashFunction.MURMUR3_32; 134 } 135 136 /** 137 * Returns a hash function implementing the <a 138 * href="https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp">32-bit murmur3 139 * algorithm, x86 variant</a> (little-endian variant), using the given seed value. 140 * 141 * <p>The exact C++ equivalent is the MurmurHash3_x86_32 function (Murmur3A). 142 * 143 * <p>This method is called {@code murmur3_32_fixed} because it fixes a bug in the {@code 144 * HashFunction} returned by the original {@code murmur3_32} method. 145 * 146 * @since 31.0 147 */ 148 @SuppressWarnings("IdentifierName") // the best we could do for adjacent digit blocks 149 public static HashFunction murmur3_32_fixed(int seed) { 150 return new Murmur3_32HashFunction(seed, /* supplementaryPlaneFix= */ true); 151 } 152 153 /** 154 * Returns a hash function implementing the <a 155 * href="https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp">32-bit murmur3 156 * algorithm, x86 variant</a> (little-endian variant), using a seed value of zero. 157 * 158 * <p>The exact C++ equivalent is the MurmurHash3_x86_32 function (Murmur3A). 159 * 160 * <p>This method is called {@code murmur3_32_fixed} because it fixes a bug in the {@code 161 * HashFunction} returned by the original {@code murmur3_32} method. 162 * 163 * @since 31.0 164 */ 165 @SuppressWarnings("IdentifierName") // the best we could do for adjacent digit blocks 166 public static HashFunction murmur3_32_fixed() { 167 return Murmur3_32HashFunction.MURMUR3_32_FIXED; 168 } 169 170 /** 171 * Returns a hash function implementing the <a 172 * href="https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp">128-bit murmur3 173 * algorithm, x64 variant</a> (little-endian variant), using the given seed value. 174 * 175 * <p>The exact C++ equivalent is the MurmurHash3_x64_128 function (Murmur3F). 176 */ 177 @SuppressWarnings("IdentifierName") // the best we could do for adjacent digit blocks 178 public static HashFunction murmur3_128(int seed) { 179 return new Murmur3_128HashFunction(seed); 180 } 181 182 /** 183 * Returns a hash function implementing the <a 184 * href="https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp">128-bit murmur3 185 * algorithm, x64 variant</a> (little-endian variant), using a seed value of zero. 186 * 187 * <p>The exact C++ equivalent is the MurmurHash3_x64_128 function (Murmur3F). 188 */ 189 @SuppressWarnings("IdentifierName") // the best we could do for adjacent digit blocks 190 public static HashFunction murmur3_128() { 191 return Murmur3_128HashFunction.MURMUR3_128; 192 } 193 194 /** 195 * Returns a hash function implementing the <a href="https://131002.net/siphash/">64-bit 196 * SipHash-2-4 algorithm</a> using a seed value of {@code k = 00 01 02 ...}. 197 * 198 * @since 15.0 199 */ 200 public static HashFunction sipHash24() { 201 return SipHashFunction.SIP_HASH_24; 202 } 203 204 /** 205 * Returns a hash function implementing the <a href="https://131002.net/siphash/">64-bit 206 * SipHash-2-4 algorithm</a> using the given seed. 207 * 208 * @since 15.0 209 */ 210 public static HashFunction sipHash24(long k0, long k1) { 211 return new SipHashFunction(2, 4, k0, k1); 212 } 213 214 /** 215 * Returns a hash function implementing the MD5 hash algorithm (128 hash bits). 216 * 217 * @deprecated If you must interoperate with a system that requires MD5, then use this method, 218 * despite its deprecation. But if you can choose your hash function, avoid MD5, which is 219 * neither fast nor secure. As of January 2017, we suggest: 220 * <ul> 221 * <li>For security: 222 * {@link Hashing#sha256} or a higher-level API. 223 * <li>For speed: {@link Hashing#goodFastHash}, though see its docs for caveats. 224 * </ul> 225 */ 226 @Deprecated 227 public static HashFunction md5() { 228 return Md5Holder.MD5; 229 } 230 231 private static class Md5Holder { 232 static final HashFunction MD5 = new MessageDigestHashFunction("MD5", "Hashing.md5()"); 233 } 234 235 /** 236 * Returns a hash function implementing the SHA-1 algorithm (160 hash bits). 237 * 238 * @deprecated If you must interoperate with a system that requires SHA-1, then use this method, 239 * despite its deprecation. But if you can choose your hash function, avoid SHA-1, which is 240 * neither fast nor secure. As of January 2017, we suggest: 241 * <ul> 242 * <li>For security: 243 * {@link Hashing#sha256} or a higher-level API. 244 * <li>For speed: {@link Hashing#goodFastHash}, though see its docs for caveats. 245 * </ul> 246 */ 247 @Deprecated 248 public static HashFunction sha1() { 249 return Sha1Holder.SHA_1; 250 } 251 252 private static class Sha1Holder { 253 static final HashFunction SHA_1 = new MessageDigestHashFunction("SHA-1", "Hashing.sha1()"); 254 } 255 256 /** Returns a hash function implementing the SHA-256 algorithm (256 hash bits). */ 257 public static HashFunction sha256() { 258 return Sha256Holder.SHA_256; 259 } 260 261 private static class Sha256Holder { 262 static final HashFunction SHA_256 = 263 new MessageDigestHashFunction("SHA-256", "Hashing.sha256()"); 264 } 265 266 /** 267 * Returns a hash function implementing the SHA-384 algorithm (384 hash bits). 268 * 269 * @since 19.0 270 */ 271 public static HashFunction sha384() { 272 return Sha384Holder.SHA_384; 273 } 274 275 private static class Sha384Holder { 276 static final HashFunction SHA_384 = 277 new MessageDigestHashFunction("SHA-384", "Hashing.sha384()"); 278 } 279 280 /** Returns a hash function implementing the SHA-512 algorithm (512 hash bits). */ 281 public static HashFunction sha512() { 282 return Sha512Holder.SHA_512; 283 } 284 285 private static class Sha512Holder { 286 static final HashFunction SHA_512 = 287 new MessageDigestHashFunction("SHA-512", "Hashing.sha512()"); 288 } 289 290 /** 291 * Returns a hash function implementing the Message Authentication Code (MAC) algorithm, using the 292 * MD5 (128 hash bits) hash function and the given secret key. 293 * 294 * <p>If you are designing a new system that needs HMAC, prefer {@link #hmacSha256} or other 295 * future-proof algorithms <a 296 * href="https://datatracker.ietf.org/doc/html/rfc6151#section-2.3">over {@code hmacMd5}</a>. 297 * 298 * @param key the secret key 299 * @throws IllegalArgumentException if the given key is inappropriate for initializing this MAC 300 * @since 20.0 301 */ 302 public static HashFunction hmacMd5(Key key) { 303 return new MacHashFunction("HmacMD5", key, hmacToString("hmacMd5", key)); 304 } 305 306 /** 307 * Returns a hash function implementing the Message Authentication Code (MAC) algorithm, using the 308 * MD5 (128 hash bits) hash function and a {@link SecretKeySpec} created from the given byte array 309 * and the MD5 algorithm. 310 * 311 * <p>If you are designing a new system that needs HMAC, prefer {@link #hmacSha256} or other 312 * future-proof algorithms <a 313 * href="https://datatracker.ietf.org/doc/html/rfc6151#section-2.3">over {@code hmacMd5}</a>. 314 * 315 * @param key the key material of the secret key 316 * @since 20.0 317 */ 318 public static HashFunction hmacMd5(byte[] key) { 319 return hmacMd5(new SecretKeySpec(checkNotNull(key), "HmacMD5")); 320 } 321 322 /** 323 * Returns a hash function implementing the Message Authentication Code (MAC) algorithm, using the 324 * SHA-1 (160 hash bits) hash function and the given secret key. 325 * 326 * @param key the secret key 327 * @throws IllegalArgumentException if the given key is inappropriate for initializing this MAC 328 * @since 20.0 329 */ 330 public static HashFunction hmacSha1(Key key) { 331 return new MacHashFunction("HmacSHA1", key, hmacToString("hmacSha1", key)); 332 } 333 334 /** 335 * Returns a hash function implementing the Message Authentication Code (MAC) algorithm, using the 336 * SHA-1 (160 hash bits) hash function and a {@link SecretKeySpec} created from the given byte 337 * array and the SHA-1 algorithm. 338 * 339 * @param key the key material of the secret key 340 * @since 20.0 341 */ 342 public static HashFunction hmacSha1(byte[] key) { 343 return hmacSha1(new SecretKeySpec(checkNotNull(key), "HmacSHA1")); 344 } 345 346 /** 347 * Returns a hash function implementing the Message Authentication Code (MAC) algorithm, using the 348 * SHA-256 (256 hash bits) hash function and the given secret key. 349 * 350 * @param key the secret key 351 * @throws IllegalArgumentException if the given key is inappropriate for initializing this MAC 352 * @since 20.0 353 */ 354 public static HashFunction hmacSha256(Key key) { 355 return new MacHashFunction("HmacSHA256", key, hmacToString("hmacSha256", key)); 356 } 357 358 /** 359 * Returns a hash function implementing the Message Authentication Code (MAC) algorithm, using the 360 * SHA-256 (256 hash bits) hash function and a {@link SecretKeySpec} created from the given byte 361 * array and the SHA-256 algorithm. 362 * 363 * @param key the key material of the secret key 364 * @since 20.0 365 */ 366 public static HashFunction hmacSha256(byte[] key) { 367 return hmacSha256(new SecretKeySpec(checkNotNull(key), "HmacSHA256")); 368 } 369 370 /** 371 * Returns a hash function implementing the Message Authentication Code (MAC) algorithm, using the 372 * SHA-512 (512 hash bits) hash function and the given secret key. 373 * 374 * @param key the secret key 375 * @throws IllegalArgumentException if the given key is inappropriate for initializing this MAC 376 * @since 20.0 377 */ 378 public static HashFunction hmacSha512(Key key) { 379 return new MacHashFunction("HmacSHA512", key, hmacToString("hmacSha512", key)); 380 } 381 382 /** 383 * Returns a hash function implementing the Message Authentication Code (MAC) algorithm, using the 384 * SHA-512 (512 hash bits) hash function and a {@link SecretKeySpec} created from the given byte 385 * array and the SHA-512 algorithm. 386 * 387 * @param key the key material of the secret key 388 * @since 20.0 389 */ 390 public static HashFunction hmacSha512(byte[] key) { 391 return hmacSha512(new SecretKeySpec(checkNotNull(key), "HmacSHA512")); 392 } 393 394 private static String hmacToString(String methodName, Key key) { 395 return "Hashing." 396 + methodName 397 + "(Key[algorithm=" 398 + key.getAlgorithm() 399 + ", format=" 400 + key.getFormat() 401 + "])"; 402 } 403 404 /** 405 * Returns a hash function implementing the CRC32C checksum algorithm (32 hash bits) as described 406 * by RFC 3720, Section 12.1. 407 * 408 * <p>This function is best understood as a <a 409 * href="https://en.wikipedia.org/wiki/Checksum">checksum</a> rather than a true <a 410 * href="https://en.wikipedia.org/wiki/Hash_function">hash function</a>. 411 * 412 * @since 18.0 413 */ 414 public static HashFunction crc32c() { 415 return Crc32CSupplier.HASH_FUNCTION; 416 } 417 418 @Immutable 419 private enum Crc32CSupplier implements ImmutableSupplier<HashFunction> { 420 @J2ObjCIncompatible 421 JAVA_UTIL_ZIP { 422 @Override 423 public HashFunction get() { 424 return ChecksumType.CRC_32C.hashFunction; 425 } 426 }, 427 ABSTRACT_HASH_FUNCTION { 428 @Override 429 public HashFunction get() { 430 return Crc32cHashFunction.CRC_32_C; 431 } 432 }; 433 434 static final HashFunction HASH_FUNCTION = pickFunction().get(); 435 436 private static Crc32CSupplier pickFunction() { 437 Crc32CSupplier[] functions = values(); 438 439 if (functions.length == 1) { 440 // We're running under J2ObjC. 441 return functions[0]; 442 } 443 444 // We can't refer to JAVA_UTIL_ZIP directly at compile time because of J2ObjC. 445 Crc32CSupplier javaUtilZip = functions[0]; 446 447 try { 448 Class.forName("java.util.zip.CRC32C"); 449 return javaUtilZip; 450 } catch (ClassNotFoundException runningUnderJava8) { 451 return ABSTRACT_HASH_FUNCTION; 452 } 453 } 454 } 455 456 /** 457 * Returns a hash function implementing the CRC-32 checksum algorithm (32 hash bits). 458 * 459 * <p>To get the {@code long} value equivalent to {@link Checksum#getValue()} for a {@code 460 * HashCode} produced by this function, use {@link HashCode#padToLong()}. 461 * 462 * <p>This function is best understood as a <a 463 * href="https://en.wikipedia.org/wiki/Checksum">checksum</a> rather than a true <a 464 * href="https://en.wikipedia.org/wiki/Hash_function">hash function</a>. 465 * 466 * @since 14.0 467 */ 468 public static HashFunction crc32() { 469 return ChecksumType.CRC_32.hashFunction; 470 } 471 472 /** 473 * Returns a hash function implementing the Adler-32 checksum algorithm (32 hash bits). 474 * 475 * <p>To get the {@code long} value equivalent to {@link Checksum#getValue()} for a {@code 476 * HashCode} produced by this function, use {@link HashCode#padToLong()}. 477 * 478 * <p>This function is best understood as a <a 479 * href="https://en.wikipedia.org/wiki/Checksum">checksum</a> rather than a true <a 480 * href="https://en.wikipedia.org/wiki/Hash_function">hash function</a>. 481 * 482 * @since 14.0 483 */ 484 public static HashFunction adler32() { 485 return ChecksumType.ADLER_32.hashFunction; 486 } 487 488 @Immutable 489 enum ChecksumType implements ImmutableSupplier<Checksum> { 490 CRC_32("Hashing.crc32()") { 491 @Override 492 public Checksum get() { 493 return new CRC32(); 494 } 495 }, 496 @J2ObjCIncompatible 497 CRC_32C("Hashing.crc32c()") { 498 @Override 499 public Checksum get() { 500 return Crc32cMethodHandles.newCrc32c(); 501 } 502 }, 503 ADLER_32("Hashing.adler32()") { 504 @Override 505 public Checksum get() { 506 return new Adler32(); 507 } 508 }; 509 510 public final HashFunction hashFunction; 511 512 ChecksumType(String toString) { 513 this.hashFunction = new ChecksumHashFunction(this, 32, toString); 514 } 515 } 516 517 @J2ObjCIncompatible 518 @SuppressWarnings("unused") 519 private static final class Crc32cMethodHandles { 520 private static final MethodHandle CONSTRUCTOR = crc32cConstructor(); 521 522 @IgnoreJRERequirement // https://github.com/mojohaus/animal-sniffer/issues/67 523 static Checksum newCrc32c() { 524 try { 525 return (Checksum) CONSTRUCTOR.invokeExact(); 526 } catch (Throwable e) { 527 // The constructor has no `throws` clause. 528 throw sneakyThrow(e); 529 } 530 } 531 532 private static MethodHandle crc32cConstructor() { 533 try { 534 Class<?> clazz = Class.forName("java.util.zip.CRC32C"); 535 /* 536 * We can't cast to CRC32C at the call site because we support building with Java 8 537 * (https://github.com/google/guava/issues/6549). So we have to use asType() to change from 538 * CRC32C to Checksum. This may carry some performance cost 539 * (https://stackoverflow.com/a/22321671/28465), but I'd have to benchmark more carefully to 540 * even detect it. 541 */ 542 return MethodHandles.lookup() 543 .findConstructor(clazz, methodType(void.class)) 544 .asType(methodType(Checksum.class)); 545 } catch (ClassNotFoundException e) { 546 // We check that the class is available before calling this method. 547 throw new AssertionError(e); 548 } catch (IllegalAccessException e) { 549 // That API is public. 550 throw newLinkageError(e); 551 } catch (NoSuchMethodException e) { 552 // That constructor exists. 553 throw newLinkageError(e); 554 } 555 } 556 557 private static LinkageError newLinkageError(Throwable cause) { 558 return new LinkageError(cause.toString(), cause); 559 } 560 } 561 562 /** 563 * Returns a hash function implementing FarmHash's Fingerprint64, an open-source algorithm. 564 * 565 * <p>This is designed for generating persistent fingerprints of strings. It isn't 566 * cryptographically secure, but it produces a high-quality hash with fewer collisions than some 567 * alternatives we've used in the past. 568 * 569 * <p>FarmHash fingerprints are encoded by {@link HashCode#asBytes} in little-endian order. This 570 * means {@link HashCode#asLong} is guaranteed to return the same value that 571 * farmhash::Fingerprint64() would for the same input (when compared using {@link 572 * com.google.common.primitives.UnsignedLongs}'s encoding of 64-bit unsigned numbers). 573 * 574 * <p>This function is best understood as a <a 575 * href="https://en.wikipedia.org/wiki/Fingerprint_(computing)">fingerprint</a> rather than a true 576 * <a href="https://en.wikipedia.org/wiki/Hash_function">hash function</a>. 577 * 578 * @since 20.0 579 */ 580 public static HashFunction farmHashFingerprint64() { 581 return FarmHashFingerprint64.FARMHASH_FINGERPRINT_64; 582 } 583 584 /** 585 * Returns a hash function implementing the Fingerprint2011 hashing function (64 hash bits). 586 * 587 * <p>This is designed for generating persistent fingerprints of strings. It isn't 588 * cryptographically secure, but it produces a high-quality hash with few collisions. Fingerprints 589 * generated using this are byte-wise identical to those created using the C++ version, but note 590 * that this uses unsigned integers (see {@link com.google.common.primitives.UnsignedInts}). 591 * Comparisons between the two should take this into account. 592 * 593 * <p>Fingerprint2011() is a form of Murmur2 on strings up to 32 bytes and a form of CityHash for 594 * longer strings. It could have been one or the other throughout. The main advantage of the 595 * combination is that CityHash has a bunch of special cases for short strings that don't need to 596 * be replicated here. The result will never be 0 or 1. 597 * 598 * <p>This function is best understood as a <a 599 * href="https://en.wikipedia.org/wiki/Fingerprint_(computing)">fingerprint</a> rather than a true 600 * <a href="https://en.wikipedia.org/wiki/Hash_function">hash function</a>. 601 * 602 * @since 31.1 603 */ 604 public static HashFunction fingerprint2011() { 605 return Fingerprint2011.FINGERPRINT_2011; 606 } 607 608 /** 609 * Assigns to {@code hashCode} a "bucket" in the range {@code [0, buckets)}, in a uniform manner 610 * that minimizes the need for remapping as {@code buckets} grows. That is, {@code 611 * consistentHash(h, n)} equals: 612 * 613 * <ul> 614 * <li>{@code n - 1}, with approximate probability {@code 1/n} 615 * <li>{@code consistentHash(h, n - 1)}, otherwise (probability {@code 1 - 1/n}) 616 * </ul> 617 * 618 * <p>This method is suitable for the common use case of dividing work among buckets that meet the 619 * following conditions: 620 * 621 * <ul> 622 * <li>You want to assign the same fraction of inputs to each bucket. 623 * <li>When you reduce the number of buckets, you can accept that the most recently added 624 * buckets will be removed first. More concretely, if you are dividing traffic among tasks, 625 * you can decrease the number of tasks from 15 and 10, killing off the final 5 tasks, and 626 * {@code consistentHash} will handle it. If, however, you are dividing traffic among 627 * servers {@code alpha}, {@code bravo}, and {@code charlie} and you occasionally need to 628 * take each of the servers offline, {@code consistentHash} will be a poor fit: It provides 629 * no way for you to specify which of the three buckets is disappearing. Thus, if your 630 * buckets change from {@code [alpha, bravo, charlie]} to {@code [bravo, charlie]}, it will 631 * assign all the old {@code alpha} traffic to {@code bravo} and all the old {@code bravo} 632 * traffic to {@code charlie}, rather than letting {@code bravo} keep its traffic. 633 * </ul> 634 * 635 * <p>See the <a href="http://en.wikipedia.org/wiki/Consistent_hashing">Wikipedia article on 636 * consistent hashing</a> for more information. 637 */ 638 public static int consistentHash(HashCode hashCode, int buckets) { 639 return consistentHash(hashCode.padToLong(), buckets); 640 } 641 642 /** 643 * Assigns to {@code input} a "bucket" in the range {@code [0, buckets)}, in a uniform manner that 644 * minimizes the need for remapping as {@code buckets} grows. That is, {@code consistentHash(h, 645 * n)} equals: 646 * 647 * <ul> 648 * <li>{@code n - 1}, with approximate probability {@code 1/n} 649 * <li>{@code consistentHash(h, n - 1)}, otherwise (probability {@code 1 - 1/n}) 650 * </ul> 651 * 652 * <p>This method is suitable for the common use case of dividing work among buckets that meet the 653 * following conditions: 654 * 655 * <ul> 656 * <li>You want to assign the same fraction of inputs to each bucket. 657 * <li>When you reduce the number of buckets, you can accept that the most recently added 658 * buckets will be removed first. More concretely, if you are dividing traffic among tasks, 659 * you can decrease the number of tasks from 15 and 10, killing off the final 5 tasks, and 660 * {@code consistentHash} will handle it. If, however, you are dividing traffic among 661 * servers {@code alpha}, {@code bravo}, and {@code charlie} and you occasionally need to 662 * take each of the servers offline, {@code consistentHash} will be a poor fit: It provides 663 * no way for you to specify which of the three buckets is disappearing. Thus, if your 664 * buckets change from {@code [alpha, bravo, charlie]} to {@code [bravo, charlie]}, it will 665 * assign all the old {@code alpha} traffic to {@code bravo} and all the old {@code bravo} 666 * traffic to {@code charlie}, rather than letting {@code bravo} keep its traffic. 667 * </ul> 668 * 669 * <p>See the <a href="http://en.wikipedia.org/wiki/Consistent_hashing">Wikipedia article on 670 * consistent hashing</a> for more information. 671 */ 672 public static int consistentHash(long input, int buckets) { 673 checkArgument(buckets > 0, "buckets must be positive: %s", buckets); 674 LinearCongruentialGenerator generator = new LinearCongruentialGenerator(input); 675 int candidate = 0; 676 int next; 677 678 // Jump from bucket to bucket until we go out of range 679 while (true) { 680 next = (int) ((candidate + 1) / generator.nextDouble()); 681 if (next >= 0 && next < buckets) { 682 candidate = next; 683 } else { 684 return candidate; 685 } 686 } 687 } 688 689 /** 690 * Returns a hash code, having the same bit length as each of the input hash codes, that combines 691 * the information of these hash codes in an ordered fashion. That is, whenever two equal hash 692 * codes are produced by two calls to this method, it is <i>as likely as possible</i> that each 693 * was computed from the <i>same</i> input hash codes in the <i>same</i> order. 694 * 695 * @throws IllegalArgumentException if {@code hashCodes} is empty, or the hash codes do not all 696 * have the same bit length 697 */ 698 public static HashCode combineOrdered(Iterable<HashCode> hashCodes) { 699 Iterator<HashCode> iterator = hashCodes.iterator(); 700 checkArgument(iterator.hasNext(), "Must be at least 1 hash code to combine."); 701 int bits = iterator.next().bits(); 702 byte[] resultBytes = new byte[bits / 8]; 703 for (HashCode hashCode : hashCodes) { 704 byte[] nextBytes = hashCode.asBytes(); 705 checkArgument( 706 nextBytes.length == resultBytes.length, "All hashcodes must have the same bit length."); 707 for (int i = 0; i < nextBytes.length; i++) { 708 resultBytes[i] = (byte) (resultBytes[i] * 37 ^ nextBytes[i]); 709 } 710 } 711 return HashCode.fromBytesNoCopy(resultBytes); 712 } 713 714 /** 715 * Returns a hash code, having the same bit length as each of the input hash codes, that combines 716 * the information of these hash codes in an unordered fashion. That is, whenever two equal hash 717 * codes are produced by two calls to this method, it is <i>as likely as possible</i> that each 718 * was computed from the <i>same</i> input hash codes in <i>some</i> order. 719 * 720 * @throws IllegalArgumentException if {@code hashCodes} is empty, or the hash codes do not all 721 * have the same bit length 722 */ 723 public static HashCode combineUnordered(Iterable<HashCode> hashCodes) { 724 Iterator<HashCode> iterator = hashCodes.iterator(); 725 checkArgument(iterator.hasNext(), "Must be at least 1 hash code to combine."); 726 byte[] resultBytes = new byte[iterator.next().bits() / 8]; 727 for (HashCode hashCode : hashCodes) { 728 byte[] nextBytes = hashCode.asBytes(); 729 checkArgument( 730 nextBytes.length == resultBytes.length, "All hashcodes must have the same bit length."); 731 for (int i = 0; i < nextBytes.length; i++) { 732 resultBytes[i] += nextBytes[i]; 733 } 734 } 735 return HashCode.fromBytesNoCopy(resultBytes); 736 } 737 738 /** Checks that the passed argument is positive, and ceils it to a multiple of 32. */ 739 static int checkPositiveAndMakeMultipleOf32(int bits) { 740 checkArgument(bits > 0, "Number of bits must be positive"); 741 return (bits + 31) & ~31; 742 } 743 744 /** 745 * Returns a hash function which computes its hash code by concatenating the hash codes of the 746 * underlying hash functions together. This can be useful if you need to generate hash codes of a 747 * specific length. 748 * 749 * <p>For example, if you need 1024-bit hash codes, you could join two {@link Hashing#sha512} hash 750 * functions together: {@code Hashing.concatenating(Hashing.sha512(), Hashing.sha512())}. 751 * 752 * @since 19.0 753 */ 754 public static HashFunction concatenating( 755 HashFunction first, HashFunction second, HashFunction... rest) { 756 // We can't use Lists.asList() here because there's no hash->collect dependency 757 List<HashFunction> list = new ArrayList<>(); 758 list.add(first); 759 list.add(second); 760 Collections.addAll(list, rest); 761 return new ConcatenatedHashFunction(list.toArray(new HashFunction[0])); 762 } 763 764 /** 765 * Returns a hash function which computes its hash code by concatenating the hash codes of the 766 * underlying hash functions together. This can be useful if you need to generate hash codes of a 767 * specific length. 768 * 769 * <p>For example, if you need 1024-bit hash codes, you could join two {@link Hashing#sha512} hash 770 * functions together: {@code Hashing.concatenating(Hashing.sha512(), Hashing.sha512())}. 771 * 772 * @since 19.0 773 */ 774 public static HashFunction concatenating(Iterable<HashFunction> hashFunctions) { 775 checkNotNull(hashFunctions); 776 // We can't use Iterables.toArray() here because there's no hash->collect dependency 777 List<HashFunction> list = new ArrayList<>(); 778 for (HashFunction hashFunction : hashFunctions) { 779 list.add(hashFunction); 780 } 781 checkArgument(!list.isEmpty(), "number of hash functions (%s) must be > 0", list.size()); 782 return new ConcatenatedHashFunction(list.toArray(new HashFunction[0])); 783 } 784 785 private static final class ConcatenatedHashFunction extends AbstractCompositeHashFunction { 786 787 private ConcatenatedHashFunction(HashFunction... functions) { 788 super(functions); 789 for (HashFunction function : functions) { 790 checkArgument( 791 function.bits() % 8 == 0, 792 "the number of bits (%s) in hashFunction (%s) must be divisible by 8", 793 function.bits(), 794 function); 795 } 796 } 797 798 @Override 799 HashCode makeHash(Hasher[] hashers) { 800 byte[] bytes = new byte[bits() / 8]; 801 int i = 0; 802 for (Hasher hasher : hashers) { 803 HashCode newHash = hasher.hash(); 804 i += newHash.writeBytesTo(bytes, i, newHash.bits() / 8); 805 } 806 return HashCode.fromBytesNoCopy(bytes); 807 } 808 809 @Override 810 public int bits() { 811 int bitSum = 0; 812 for (HashFunction function : functions) { 813 bitSum += function.bits(); 814 } 815 return bitSum; 816 } 817 818 @Override 819 public boolean equals(@Nullable Object object) { 820 if (object instanceof ConcatenatedHashFunction) { 821 ConcatenatedHashFunction other = (ConcatenatedHashFunction) object; 822 return Arrays.equals(functions, other.functions); 823 } 824 return false; 825 } 826 827 @Override 828 public int hashCode() { 829 return Arrays.hashCode(functions); 830 } 831 } 832 833 /** 834 * Linear CongruentialGenerator to use for consistent hashing. See 835 * http://en.wikipedia.org/wiki/Linear_congruential_generator 836 */ 837 private static final class LinearCongruentialGenerator { 838 private long state; 839 840 public LinearCongruentialGenerator(long seed) { 841 this.state = seed; 842 } 843 844 public double nextDouble() { 845 state = 2862933555777941757L * state + 1; 846 return ((double) ((int) (state >>> 33) + 1)) / 0x1.0p31; 847 } 848 } 849 850 private Hashing() {} 851}