001/* 002 * Copyright (C) 2011 The Guava Authors 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 005 * in compliance with the License. You may obtain a copy of the License at 006 * 007 * http://www.apache.org/licenses/LICENSE-2.0 008 * 009 * Unless required by applicable law or agreed to in writing, software distributed under the License 010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 011 * or implied. See the License for the specific language governing permissions and limitations under 012 * the License. 013 */ 014 015package com.google.common.hash; 016 017import static com.google.common.base.Preconditions.checkArgument; 018import static com.google.common.base.Preconditions.checkNotNull; 019 020import com.google.errorprone.annotations.Immutable; 021import java.security.Key; 022import java.util.ArrayList; 023import java.util.Arrays; 024import java.util.Collections; 025import java.util.Iterator; 026import java.util.List; 027import java.util.zip.Adler32; 028import java.util.zip.CRC32; 029import java.util.zip.Checksum; 030import javax.crypto.spec.SecretKeySpec; 031import org.jspecify.annotations.Nullable; 032 033/** 034 * Static methods to obtain {@link HashFunction} instances, and other static hashing-related 035 * utilities. 036 * 037 * <p>A comparison of the various hash functions can be found <a 038 * href="https://docs.google.com/spreadsheets/d/1_q2EVcxA2HjcrlVMbaqXwMj31h9M5-Bqj_m8vITOwwk/">here</a>. 039 * 040 * @author Kevin Bourrillion 041 * @author Dimitris Andreou 042 * @author Kurt Alfred Kluever 043 * @since 11.0 044 */ 045public final class Hashing { 046 /** 047 * Returns a general-purpose, <b>temporary-use</b>, non-cryptographic hash function. The algorithm 048 * the returned function implements is unspecified and subject to change without notice. 049 * 050 * <p><b>Warning:</b> a new random seed for these functions is chosen each time the {@code 051 * Hashing} class is loaded. <b>Do not use this method</b> if hash codes may escape the current 052 * process in any way, for example being sent over RPC, or saved to disk. For a general-purpose, 053 * non-cryptographic hash function that will never change behavior, we suggest {@link 054 * #murmur3_128}. 055 * 056 * <p>Repeated calls to this method on the same loaded {@code Hashing} class, using the same value 057 * for {@code minimumBits}, will return identically-behaving {@link HashFunction} instances. 058 * 059 * @param minimumBits a positive integer. This can be arbitrarily large. The returned {@link 060 * HashFunction} instance may use memory proportional to this integer. 061 * @return a hash function, described above, that produces hash codes of length {@code 062 * minimumBits} or greater 063 */ 064 public static HashFunction goodFastHash(int minimumBits) { 065 int bits = checkPositiveAndMakeMultipleOf32(minimumBits); 066 067 if (bits == 32) { 068 return Murmur3_32HashFunction.GOOD_FAST_HASH_32; 069 } 070 if (bits <= 128) { 071 return Murmur3_128HashFunction.GOOD_FAST_HASH_128; 072 } 073 074 // Otherwise, join together some 128-bit murmur3s 075 int hashFunctionsNeeded = (bits + 127) / 128; 076 HashFunction[] hashFunctions = new HashFunction[hashFunctionsNeeded]; 077 hashFunctions[0] = Murmur3_128HashFunction.GOOD_FAST_HASH_128; 078 int seed = GOOD_FAST_HASH_SEED; 079 for (int i = 1; i < hashFunctionsNeeded; i++) { 080 seed += 1500450271; // a prime; shouldn't matter 081 hashFunctions[i] = murmur3_128(seed); 082 } 083 return new ConcatenatedHashFunction(hashFunctions); 084 } 085 086 /** 087 * Used to randomize {@link #goodFastHash} instances, so that programs which persist anything 088 * dependent on the hash codes they produce will fail sooner. 089 */ 090 @SuppressWarnings("GoodTime") // reading system time without TimeSource 091 static final int GOOD_FAST_HASH_SEED = (int) System.currentTimeMillis(); 092 093 /** 094 * Returns a hash function implementing the <a 095 * href="https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp">32-bit murmur3 096 * algorithm, x86 variant</a> (little-endian variant), using the given seed value, <b>with a known 097 * bug</b> as described in the deprecation text. 098 * 099 * <p>The C++ equivalent is the MurmurHash3_x86_32 function (Murmur3A), which however does not 100 * have the bug. 101 * 102 * @deprecated This implementation produces incorrect hash values from the {@link 103 * HashFunction#hashString} method if the string contains non-BMP characters. Use {@link 104 * #murmur3_32_fixed(int)} instead. 105 */ 106 @Deprecated 107 @SuppressWarnings("IdentifierName") // the best we could do for adjacent digit blocks 108 public static HashFunction murmur3_32(int seed) { 109 return new Murmur3_32HashFunction(seed, /* supplementaryPlaneFix= */ false); 110 } 111 112 /** 113 * Returns a hash function implementing the <a 114 * href="https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp">32-bit murmur3 115 * algorithm, x86 variant</a> (little-endian variant), using the given seed value, <b>with a known 116 * bug</b> as described in the deprecation text. 117 * 118 * <p>The C++ equivalent is the MurmurHash3_x86_32 function (Murmur3A), which however does not 119 * have the bug. 120 * 121 * @deprecated This implementation produces incorrect hash values from the {@link 122 * HashFunction#hashString} method if the string contains non-BMP characters. Use {@link 123 * #murmur3_32_fixed()} instead. 124 */ 125 @Deprecated 126 @SuppressWarnings("IdentifierName") // the best we could do for adjacent digit blocks 127 public static HashFunction murmur3_32() { 128 return Murmur3_32HashFunction.MURMUR3_32; 129 } 130 131 /** 132 * Returns a hash function implementing the <a 133 * href="https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp">32-bit murmur3 134 * algorithm, x86 variant</a> (little-endian variant), using the given seed value. 135 * 136 * <p>The exact C++ equivalent is the MurmurHash3_x86_32 function (Murmur3A). 137 * 138 * <p>This method is called {@code murmur3_32_fixed} because it fixes a bug in the {@code 139 * HashFunction} returned by the original {@code murmur3_32} method. 140 * 141 * @since 31.0 142 */ 143 @SuppressWarnings("IdentifierName") // the best we could do for adjacent digit blocks 144 public static HashFunction murmur3_32_fixed(int seed) { 145 return new Murmur3_32HashFunction(seed, /* supplementaryPlaneFix= */ true); 146 } 147 148 /** 149 * Returns a hash function implementing the <a 150 * href="https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp">32-bit murmur3 151 * algorithm, x86 variant</a> (little-endian variant), using a seed value of zero. 152 * 153 * <p>The exact C++ equivalent is the MurmurHash3_x86_32 function (Murmur3A). 154 * 155 * <p>This method is called {@code murmur3_32_fixed} because it fixes a bug in the {@code 156 * HashFunction} returned by the original {@code murmur3_32} method. 157 * 158 * @since 31.0 159 */ 160 @SuppressWarnings("IdentifierName") // the best we could do for adjacent digit blocks 161 public static HashFunction murmur3_32_fixed() { 162 return Murmur3_32HashFunction.MURMUR3_32_FIXED; 163 } 164 165 /** 166 * Returns a hash function implementing the <a 167 * href="https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp">128-bit murmur3 168 * algorithm, x64 variant</a> (little-endian variant), using the given seed value. 169 * 170 * <p>The exact C++ equivalent is the MurmurHash3_x64_128 function (Murmur3F). 171 */ 172 @SuppressWarnings("IdentifierName") // the best we could do for adjacent digit blocks 173 public static HashFunction murmur3_128(int seed) { 174 return new Murmur3_128HashFunction(seed); 175 } 176 177 /** 178 * Returns a hash function implementing the <a 179 * href="https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp">128-bit murmur3 180 * algorithm, x64 variant</a> (little-endian variant), using a seed value of zero. 181 * 182 * <p>The exact C++ equivalent is the MurmurHash3_x64_128 function (Murmur3F). 183 */ 184 @SuppressWarnings("IdentifierName") // the best we could do for adjacent digit blocks 185 public static HashFunction murmur3_128() { 186 return Murmur3_128HashFunction.MURMUR3_128; 187 } 188 189 /** 190 * Returns a hash function implementing the <a href="https://131002.net/siphash/">64-bit 191 * SipHash-2-4 algorithm</a> using a seed value of {@code k = 00 01 02 ...}. 192 * 193 * @since 15.0 194 */ 195 public static HashFunction sipHash24() { 196 return SipHashFunction.SIP_HASH_24; 197 } 198 199 /** 200 * Returns a hash function implementing the <a href="https://131002.net/siphash/">64-bit 201 * SipHash-2-4 algorithm</a> using the given seed. 202 * 203 * @since 15.0 204 */ 205 public static HashFunction sipHash24(long k0, long k1) { 206 return new SipHashFunction(2, 4, k0, k1); 207 } 208 209 /** 210 * Returns a hash function implementing the MD5 hash algorithm (128 hash bits). 211 * 212 * @deprecated If you must interoperate with a system that requires MD5, then use this method, 213 * despite its deprecation. But if you can choose your hash function, avoid MD5, which is 214 * neither fast nor secure. As of January 2017, we suggest: 215 * <ul> 216 * <li>For security: 217 * {@link Hashing#sha256} or a higher-level API. 218 * <li>For speed: {@link Hashing#goodFastHash}, though see its docs for caveats. 219 * </ul> 220 */ 221 @Deprecated 222 public static HashFunction md5() { 223 return Md5Holder.MD5; 224 } 225 226 private static class Md5Holder { 227 static final HashFunction MD5 = new MessageDigestHashFunction("MD5", "Hashing.md5()"); 228 } 229 230 /** 231 * Returns a hash function implementing the SHA-1 algorithm (160 hash bits). 232 * 233 * @deprecated If you must interoperate with a system that requires SHA-1, then use this method, 234 * despite its deprecation. But if you can choose your hash function, avoid SHA-1, which is 235 * neither fast nor secure. As of January 2017, we suggest: 236 * <ul> 237 * <li>For security: 238 * {@link Hashing#sha256} or a higher-level API. 239 * <li>For speed: {@link Hashing#goodFastHash}, though see its docs for caveats. 240 * </ul> 241 */ 242 @Deprecated 243 public static HashFunction sha1() { 244 return Sha1Holder.SHA_1; 245 } 246 247 private static class Sha1Holder { 248 static final HashFunction SHA_1 = new MessageDigestHashFunction("SHA-1", "Hashing.sha1()"); 249 } 250 251 /** Returns a hash function implementing the SHA-256 algorithm (256 hash bits). */ 252 public static HashFunction sha256() { 253 return Sha256Holder.SHA_256; 254 } 255 256 private static class Sha256Holder { 257 static final HashFunction SHA_256 = 258 new MessageDigestHashFunction("SHA-256", "Hashing.sha256()"); 259 } 260 261 /** 262 * Returns a hash function implementing the SHA-384 algorithm (384 hash bits). 263 * 264 * @since 19.0 265 */ 266 public static HashFunction sha384() { 267 return Sha384Holder.SHA_384; 268 } 269 270 private static class Sha384Holder { 271 static final HashFunction SHA_384 = 272 new MessageDigestHashFunction("SHA-384", "Hashing.sha384()"); 273 } 274 275 /** Returns a hash function implementing the SHA-512 algorithm (512 hash bits). */ 276 public static HashFunction sha512() { 277 return Sha512Holder.SHA_512; 278 } 279 280 private static class Sha512Holder { 281 static final HashFunction SHA_512 = 282 new MessageDigestHashFunction("SHA-512", "Hashing.sha512()"); 283 } 284 285 /** 286 * Returns a hash function implementing the Message Authentication Code (MAC) algorithm, using the 287 * MD5 (128 hash bits) hash function and the given secret key. 288 * 289 * <p>If you are designing a new system that needs HMAC, prefer {@link #hmacSha256} or other 290 * future-proof algorithms <a 291 * href="https://datatracker.ietf.org/doc/html/rfc6151#section-2.3">over {@code hmacMd5}</a>. 292 * 293 * @param key the secret key 294 * @throws IllegalArgumentException if the given key is inappropriate for initializing this MAC 295 * @since 20.0 296 */ 297 public static HashFunction hmacMd5(Key key) { 298 return new MacHashFunction("HmacMD5", key, hmacToString("hmacMd5", key)); 299 } 300 301 /** 302 * Returns a hash function implementing the Message Authentication Code (MAC) algorithm, using the 303 * MD5 (128 hash bits) hash function and a {@link SecretKeySpec} created from the given byte array 304 * and the MD5 algorithm. 305 * 306 * <p>If you are designing a new system that needs HMAC, prefer {@link #hmacSha256} or other 307 * future-proof algorithms <a 308 * href="https://datatracker.ietf.org/doc/html/rfc6151#section-2.3">over {@code hmacMd5}</a>. 309 * 310 * @param key the key material of the secret key 311 * @since 20.0 312 */ 313 public static HashFunction hmacMd5(byte[] key) { 314 return hmacMd5(new SecretKeySpec(checkNotNull(key), "HmacMD5")); 315 } 316 317 /** 318 * Returns a hash function implementing the Message Authentication Code (MAC) algorithm, using the 319 * SHA-1 (160 hash bits) hash function and the given secret key. 320 * 321 * @param key the secret key 322 * @throws IllegalArgumentException if the given key is inappropriate for initializing this MAC 323 * @since 20.0 324 */ 325 public static HashFunction hmacSha1(Key key) { 326 return new MacHashFunction("HmacSHA1", key, hmacToString("hmacSha1", key)); 327 } 328 329 /** 330 * Returns a hash function implementing the Message Authentication Code (MAC) algorithm, using the 331 * SHA-1 (160 hash bits) hash function and a {@link SecretKeySpec} created from the given byte 332 * array and the SHA-1 algorithm. 333 * 334 * @param key the key material of the secret key 335 * @since 20.0 336 */ 337 public static HashFunction hmacSha1(byte[] key) { 338 return hmacSha1(new SecretKeySpec(checkNotNull(key), "HmacSHA1")); 339 } 340 341 /** 342 * Returns a hash function implementing the Message Authentication Code (MAC) algorithm, using the 343 * SHA-256 (256 hash bits) hash function and the given secret key. 344 * 345 * @param key the secret key 346 * @throws IllegalArgumentException if the given key is inappropriate for initializing this MAC 347 * @since 20.0 348 */ 349 public static HashFunction hmacSha256(Key key) { 350 return new MacHashFunction("HmacSHA256", key, hmacToString("hmacSha256", key)); 351 } 352 353 /** 354 * Returns a hash function implementing the Message Authentication Code (MAC) algorithm, using the 355 * SHA-256 (256 hash bits) hash function and a {@link SecretKeySpec} created from the given byte 356 * array and the SHA-256 algorithm. 357 * 358 * @param key the key material of the secret key 359 * @since 20.0 360 */ 361 public static HashFunction hmacSha256(byte[] key) { 362 return hmacSha256(new SecretKeySpec(checkNotNull(key), "HmacSHA256")); 363 } 364 365 /** 366 * Returns a hash function implementing the Message Authentication Code (MAC) algorithm, using the 367 * SHA-512 (512 hash bits) hash function and the given secret key. 368 * 369 * @param key the secret key 370 * @throws IllegalArgumentException if the given key is inappropriate for initializing this MAC 371 * @since 20.0 372 */ 373 public static HashFunction hmacSha512(Key key) { 374 return new MacHashFunction("HmacSHA512", key, hmacToString("hmacSha512", key)); 375 } 376 377 /** 378 * Returns a hash function implementing the Message Authentication Code (MAC) algorithm, using the 379 * SHA-512 (512 hash bits) hash function and a {@link SecretKeySpec} created from the given byte 380 * array and the SHA-512 algorithm. 381 * 382 * @param key the key material of the secret key 383 * @since 20.0 384 */ 385 public static HashFunction hmacSha512(byte[] key) { 386 return hmacSha512(new SecretKeySpec(checkNotNull(key), "HmacSHA512")); 387 } 388 389 private static String hmacToString(String methodName, Key key) { 390 return "Hashing." 391 + methodName 392 + "(Key[algorithm=" 393 + key.getAlgorithm() 394 + ", format=" 395 + key.getFormat() 396 + "])"; 397 } 398 399 /** 400 * Returns a hash function implementing the CRC32C checksum algorithm (32 hash bits) as described 401 * by RFC 3720, Section 12.1. 402 * 403 * <p>This function is best understood as a <a 404 * href="https://en.wikipedia.org/wiki/Checksum">checksum</a> rather than a true <a 405 * href="https://en.wikipedia.org/wiki/Hash_function">hash function</a>. 406 * 407 * @since 18.0 408 */ 409 public static HashFunction crc32c() { 410 return Crc32cHashFunction.CRC_32_C; 411 } 412 413 /** 414 * Returns a hash function implementing the CRC-32 checksum algorithm (32 hash bits). 415 * 416 * <p>To get the {@code long} value equivalent to {@link Checksum#getValue()} for a {@code 417 * HashCode} produced by this function, use {@link HashCode#padToLong()}. 418 * 419 * <p>This function is best understood as a <a 420 * href="https://en.wikipedia.org/wiki/Checksum">checksum</a> rather than a true <a 421 * href="https://en.wikipedia.org/wiki/Hash_function">hash function</a>. 422 * 423 * @since 14.0 424 */ 425 public static HashFunction crc32() { 426 return ChecksumType.CRC_32.hashFunction; 427 } 428 429 /** 430 * Returns a hash function implementing the Adler-32 checksum algorithm (32 hash bits). 431 * 432 * <p>To get the {@code long} value equivalent to {@link Checksum#getValue()} for a {@code 433 * HashCode} produced by this function, use {@link HashCode#padToLong()}. 434 * 435 * <p>This function is best understood as a <a 436 * href="https://en.wikipedia.org/wiki/Checksum">checksum</a> rather than a true <a 437 * href="https://en.wikipedia.org/wiki/Hash_function">hash function</a>. 438 * 439 * @since 14.0 440 */ 441 public static HashFunction adler32() { 442 return ChecksumType.ADLER_32.hashFunction; 443 } 444 445 @Immutable 446 enum ChecksumType implements ImmutableSupplier<Checksum> { 447 CRC_32("Hashing.crc32()") { 448 @Override 449 public Checksum get() { 450 return new CRC32(); 451 } 452 }, 453 ADLER_32("Hashing.adler32()") { 454 @Override 455 public Checksum get() { 456 return new Adler32(); 457 } 458 }; 459 460 public final HashFunction hashFunction; 461 462 ChecksumType(String toString) { 463 this.hashFunction = new ChecksumHashFunction(this, 32, toString); 464 } 465 } 466 467 /** 468 * Returns a hash function implementing FarmHash's Fingerprint64, an open-source algorithm. 469 * 470 * <p>This is designed for generating persistent fingerprints of strings. It isn't 471 * cryptographically secure, but it produces a high-quality hash with fewer collisions than some 472 * alternatives we've used in the past. 473 * 474 * <p>FarmHash fingerprints are encoded by {@link HashCode#asBytes} in little-endian order. This 475 * means {@link HashCode#asLong} is guaranteed to return the same value that 476 * farmhash::Fingerprint64() would for the same input (when compared using {@link 477 * com.google.common.primitives.UnsignedLongs}'s encoding of 64-bit unsigned numbers). 478 * 479 * <p>This function is best understood as a <a 480 * href="https://en.wikipedia.org/wiki/Fingerprint_(computing)">fingerprint</a> rather than a true 481 * <a href="https://en.wikipedia.org/wiki/Hash_function">hash function</a>. 482 * 483 * @since 20.0 484 */ 485 public static HashFunction farmHashFingerprint64() { 486 return FarmHashFingerprint64.FARMHASH_FINGERPRINT_64; 487 } 488 489 /** 490 * Returns a hash function implementing the Fingerprint2011 hashing function (64 hash bits). 491 * 492 * <p>This is designed for generating persistent fingerprints of strings. It isn't 493 * cryptographically secure, but it produces a high-quality hash with few collisions. Fingerprints 494 * generated using this are byte-wise identical to those created using the C++ version, but note 495 * that this uses unsigned integers (see {@link com.google.common.primitives.UnsignedInts}). 496 * Comparisons between the two should take this into account. 497 * 498 * <p>Fingerprint2011() is a form of Murmur2 on strings up to 32 bytes and a form of CityHash for 499 * longer strings. It could have been one or the other throughout. The main advantage of the 500 * combination is that CityHash has a bunch of special cases for short strings that don't need to 501 * be replicated here. The result will never be 0 or 1. 502 * 503 * <p>This function is best understood as a <a 504 * href="https://en.wikipedia.org/wiki/Fingerprint_(computing)">fingerprint</a> rather than a true 505 * <a href="https://en.wikipedia.org/wiki/Hash_function">hash function</a>. 506 * 507 * @since 31.1 508 */ 509 public static HashFunction fingerprint2011() { 510 return Fingerprint2011.FINGERPRINT_2011; 511 } 512 513 /** 514 * Assigns to {@code hashCode} a "bucket" in the range {@code [0, buckets)}, in a uniform manner 515 * that minimizes the need for remapping as {@code buckets} grows. That is, {@code 516 * consistentHash(h, n)} equals: 517 * 518 * <ul> 519 * <li>{@code n - 1}, with approximate probability {@code 1/n} 520 * <li>{@code consistentHash(h, n - 1)}, otherwise (probability {@code 1 - 1/n}) 521 * </ul> 522 * 523 * <p>This method is suitable for the common use case of dividing work among buckets that meet the 524 * following conditions: 525 * 526 * <ul> 527 * <li>You want to assign the same fraction of inputs to each bucket. 528 * <li>When you reduce the number of buckets, you can accept that the most recently added 529 * buckets will be removed first. More concretely, if you are dividing traffic among tasks, 530 * you can decrease the number of tasks from 15 and 10, killing off the final 5 tasks, and 531 * {@code consistentHash} will handle it. If, however, you are dividing traffic among 532 * servers {@code alpha}, {@code bravo}, and {@code charlie} and you occasionally need to 533 * take each of the servers offline, {@code consistentHash} will be a poor fit: It provides 534 * no way for you to specify which of the three buckets is disappearing. Thus, if your 535 * buckets change from {@code [alpha, bravo, charlie]} to {@code [bravo, charlie]}, it will 536 * assign all the old {@code alpha} traffic to {@code bravo} and all the old {@code bravo} 537 * traffic to {@code charlie}, rather than letting {@code bravo} keep its traffic. 538 * </ul> 539 * 540 * <p>See the <a href="http://en.wikipedia.org/wiki/Consistent_hashing">Wikipedia article on 541 * consistent hashing</a> for more information. 542 */ 543 public static int consistentHash(HashCode hashCode, int buckets) { 544 return consistentHash(hashCode.padToLong(), buckets); 545 } 546 547 /** 548 * Assigns to {@code input} a "bucket" in the range {@code [0, buckets)}, in a uniform manner that 549 * minimizes the need for remapping as {@code buckets} grows. That is, {@code consistentHash(h, 550 * n)} equals: 551 * 552 * <ul> 553 * <li>{@code n - 1}, with approximate probability {@code 1/n} 554 * <li>{@code consistentHash(h, n - 1)}, otherwise (probability {@code 1 - 1/n}) 555 * </ul> 556 * 557 * <p>This method is suitable for the common use case of dividing work among buckets that meet the 558 * following conditions: 559 * 560 * <ul> 561 * <li>You want to assign the same fraction of inputs to each bucket. 562 * <li>When you reduce the number of buckets, you can accept that the most recently added 563 * buckets will be removed first. More concretely, if you are dividing traffic among tasks, 564 * you can decrease the number of tasks from 15 and 10, killing off the final 5 tasks, and 565 * {@code consistentHash} will handle it. If, however, you are dividing traffic among 566 * servers {@code alpha}, {@code bravo}, and {@code charlie} and you occasionally need to 567 * take each of the servers offline, {@code consistentHash} will be a poor fit: It provides 568 * no way for you to specify which of the three buckets is disappearing. Thus, if your 569 * buckets change from {@code [alpha, bravo, charlie]} to {@code [bravo, charlie]}, it will 570 * assign all the old {@code alpha} traffic to {@code bravo} and all the old {@code bravo} 571 * traffic to {@code charlie}, rather than letting {@code bravo} keep its traffic. 572 * </ul> 573 * 574 * <p>See the <a href="http://en.wikipedia.org/wiki/Consistent_hashing">Wikipedia article on 575 * consistent hashing</a> for more information. 576 */ 577 public static int consistentHash(long input, int buckets) { 578 checkArgument(buckets > 0, "buckets must be positive: %s", buckets); 579 LinearCongruentialGenerator generator = new LinearCongruentialGenerator(input); 580 int candidate = 0; 581 int next; 582 583 // Jump from bucket to bucket until we go out of range 584 while (true) { 585 next = (int) ((candidate + 1) / generator.nextDouble()); 586 if (next >= 0 && next < buckets) { 587 candidate = next; 588 } else { 589 return candidate; 590 } 591 } 592 } 593 594 /** 595 * Returns a hash code, having the same bit length as each of the input hash codes, that combines 596 * the information of these hash codes in an ordered fashion. That is, whenever two equal hash 597 * codes are produced by two calls to this method, it is <i>as likely as possible</i> that each 598 * was computed from the <i>same</i> input hash codes in the <i>same</i> order. 599 * 600 * @throws IllegalArgumentException if {@code hashCodes} is empty, or the hash codes do not all 601 * have the same bit length 602 */ 603 public static HashCode combineOrdered(Iterable<HashCode> hashCodes) { 604 Iterator<HashCode> iterator = hashCodes.iterator(); 605 checkArgument(iterator.hasNext(), "Must be at least 1 hash code to combine."); 606 int bits = iterator.next().bits(); 607 byte[] resultBytes = new byte[bits / 8]; 608 for (HashCode hashCode : hashCodes) { 609 byte[] nextBytes = hashCode.asBytes(); 610 checkArgument( 611 nextBytes.length == resultBytes.length, "All hashcodes must have the same bit length."); 612 for (int i = 0; i < nextBytes.length; i++) { 613 resultBytes[i] = (byte) (resultBytes[i] * 37 ^ nextBytes[i]); 614 } 615 } 616 return HashCode.fromBytesNoCopy(resultBytes); 617 } 618 619 /** 620 * Returns a hash code, having the same bit length as each of the input hash codes, that combines 621 * the information of these hash codes in an unordered fashion. That is, whenever two equal hash 622 * codes are produced by two calls to this method, it is <i>as likely as possible</i> that each 623 * was computed from the <i>same</i> input hash codes in <i>some</i> order. 624 * 625 * @throws IllegalArgumentException if {@code hashCodes} is empty, or the hash codes do not all 626 * have the same bit length 627 */ 628 public static HashCode combineUnordered(Iterable<HashCode> hashCodes) { 629 Iterator<HashCode> iterator = hashCodes.iterator(); 630 checkArgument(iterator.hasNext(), "Must be at least 1 hash code to combine."); 631 byte[] resultBytes = new byte[iterator.next().bits() / 8]; 632 for (HashCode hashCode : hashCodes) { 633 byte[] nextBytes = hashCode.asBytes(); 634 checkArgument( 635 nextBytes.length == resultBytes.length, "All hashcodes must have the same bit length."); 636 for (int i = 0; i < nextBytes.length; i++) { 637 resultBytes[i] += nextBytes[i]; 638 } 639 } 640 return HashCode.fromBytesNoCopy(resultBytes); 641 } 642 643 /** Checks that the passed argument is positive, and ceils it to a multiple of 32. */ 644 static int checkPositiveAndMakeMultipleOf32(int bits) { 645 checkArgument(bits > 0, "Number of bits must be positive"); 646 return (bits + 31) & ~31; 647 } 648 649 /** 650 * Returns a hash function which computes its hash code by concatenating the hash codes of the 651 * underlying hash functions together. This can be useful if you need to generate hash codes of a 652 * specific length. 653 * 654 * <p>For example, if you need 1024-bit hash codes, you could join two {@link Hashing#sha512} hash 655 * functions together: {@code Hashing.concatenating(Hashing.sha512(), Hashing.sha512())}. 656 * 657 * @since 19.0 658 */ 659 public static HashFunction concatenating( 660 HashFunction first, HashFunction second, HashFunction... rest) { 661 // We can't use Lists.asList() here because there's no hash->collect dependency 662 List<HashFunction> list = new ArrayList<>(); 663 list.add(first); 664 list.add(second); 665 Collections.addAll(list, rest); 666 return new ConcatenatedHashFunction(list.toArray(new HashFunction[0])); 667 } 668 669 /** 670 * Returns a hash function which computes its hash code by concatenating the hash codes of the 671 * underlying hash functions together. This can be useful if you need to generate hash codes of a 672 * specific length. 673 * 674 * <p>For example, if you need 1024-bit hash codes, you could join two {@link Hashing#sha512} hash 675 * functions together: {@code Hashing.concatenating(Hashing.sha512(), Hashing.sha512())}. 676 * 677 * @since 19.0 678 */ 679 public static HashFunction concatenating(Iterable<HashFunction> hashFunctions) { 680 checkNotNull(hashFunctions); 681 // We can't use Iterables.toArray() here because there's no hash->collect dependency 682 List<HashFunction> list = new ArrayList<>(); 683 for (HashFunction hashFunction : hashFunctions) { 684 list.add(hashFunction); 685 } 686 checkArgument(!list.isEmpty(), "number of hash functions (%s) must be > 0", list.size()); 687 return new ConcatenatedHashFunction(list.toArray(new HashFunction[0])); 688 } 689 690 private static final class ConcatenatedHashFunction extends AbstractCompositeHashFunction { 691 692 private ConcatenatedHashFunction(HashFunction... functions) { 693 super(functions); 694 for (HashFunction function : functions) { 695 checkArgument( 696 function.bits() % 8 == 0, 697 "the number of bits (%s) in hashFunction (%s) must be divisible by 8", 698 function.bits(), 699 function); 700 } 701 } 702 703 @Override 704 HashCode makeHash(Hasher[] hashers) { 705 byte[] bytes = new byte[bits() / 8]; 706 int i = 0; 707 for (Hasher hasher : hashers) { 708 HashCode newHash = hasher.hash(); 709 i += newHash.writeBytesTo(bytes, i, newHash.bits() / 8); 710 } 711 return HashCode.fromBytesNoCopy(bytes); 712 } 713 714 @Override 715 public int bits() { 716 int bitSum = 0; 717 for (HashFunction function : functions) { 718 bitSum += function.bits(); 719 } 720 return bitSum; 721 } 722 723 @Override 724 public boolean equals(@Nullable Object object) { 725 if (object instanceof ConcatenatedHashFunction) { 726 ConcatenatedHashFunction other = (ConcatenatedHashFunction) object; 727 return Arrays.equals(functions, other.functions); 728 } 729 return false; 730 } 731 732 @Override 733 public int hashCode() { 734 return Arrays.hashCode(functions); 735 } 736 } 737 738 /** 739 * Linear CongruentialGenerator to use for consistent hashing. See 740 * http://en.wikipedia.org/wiki/Linear_congruential_generator 741 */ 742 private static final class LinearCongruentialGenerator { 743 private long state; 744 745 public LinearCongruentialGenerator(long seed) { 746 this.state = seed; 747 } 748 749 public double nextDouble() { 750 state = 2862933555777941757L * state + 1; 751 return ((double) ((int) (state >>> 33) + 1)) / 0x1.0p31; 752 } 753 } 754 755 private Hashing() {} 756}