001/* 002 * Copyright (C) 2011 The Guava Authors 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 005 * in compliance with the License. You may obtain a copy of the License at 006 * 007 * http://www.apache.org/licenses/LICENSE-2.0 008 * 009 * Unless required by applicable law or agreed to in writing, software distributed under the License 010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 011 * or implied. See the License for the specific language governing permissions and limitations under 012 * the License. 013 */ 014 015package com.google.common.hash; 016 017import static com.google.common.base.Preconditions.checkArgument; 018import static com.google.common.base.Preconditions.checkNotNull; 019 020import com.google.errorprone.annotations.Immutable; 021import java.security.Key; 022import java.util.ArrayList; 023import java.util.Arrays; 024import java.util.Collections; 025import java.util.Iterator; 026import java.util.List; 027import java.util.zip.Adler32; 028import java.util.zip.CRC32; 029import java.util.zip.Checksum; 030import javax.annotation.CheckForNull; 031import javax.crypto.spec.SecretKeySpec; 032 033/** 034 * Static methods to obtain {@link HashFunction} instances, and other static hashing-related 035 * utilities. 036 * 037 * <p>A comparison of the various hash functions can be found <a 038 * href="https://docs.google.com/spreadsheets/d/1_q2EVcxA2HjcrlVMbaqXwMj31h9M5-Bqj_m8vITOwwk/">here</a>. 039 * 040 * @author Kevin Bourrillion 041 * @author Dimitris Andreou 042 * @author Kurt Alfred Kluever 043 * @since 11.0 044 */ 045public final class Hashing { 046 /** 047 * Returns a general-purpose, <b>temporary-use</b>, non-cryptographic hash function. The algorithm 048 * the returned function implements is unspecified and subject to change without notice. 049 * 050 * <p><b>Warning:</b> a new random seed for these functions is chosen each time the {@code 051 * Hashing} class is loaded. <b>Do not use this method</b> if hash codes may escape the current 052 * process in any way, for example being sent over RPC, or saved to disk. For a general-purpose, 053 * non-cryptographic hash function that will never change behavior, we suggest {@link 054 * #murmur3_128}. 055 * 056 * <p>Repeated calls to this method on the same loaded {@code Hashing} class, using the same value 057 * for {@code minimumBits}, will return identically-behaving {@link HashFunction} instances. 058 * 059 * @param minimumBits a positive integer. This can be arbitrarily large. The returned {@link 060 * HashFunction} instance may use memory proportional to this integer. 061 * @return a hash function, described above, that produces hash codes of length {@code 062 * minimumBits} or greater 063 */ 064 public static HashFunction goodFastHash(int minimumBits) { 065 int bits = checkPositiveAndMakeMultipleOf32(minimumBits); 066 067 if (bits == 32) { 068 return Murmur3_32HashFunction.GOOD_FAST_HASH_32; 069 } 070 if (bits <= 128) { 071 return Murmur3_128HashFunction.GOOD_FAST_HASH_128; 072 } 073 074 // Otherwise, join together some 128-bit murmur3s 075 int hashFunctionsNeeded = (bits + 127) / 128; 076 HashFunction[] hashFunctions = new HashFunction[hashFunctionsNeeded]; 077 hashFunctions[0] = Murmur3_128HashFunction.GOOD_FAST_HASH_128; 078 int seed = GOOD_FAST_HASH_SEED; 079 for (int i = 1; i < hashFunctionsNeeded; i++) { 080 seed += 1500450271; // a prime; shouldn't matter 081 hashFunctions[i] = murmur3_128(seed); 082 } 083 return new ConcatenatedHashFunction(hashFunctions); 084 } 085 086 /** 087 * Used to randomize {@link #goodFastHash} instances, so that programs which persist anything 088 * dependent on the hash codes they produce will fail sooner. 089 */ 090 @SuppressWarnings("GoodTime") // reading system time without TimeSource 091 static final int GOOD_FAST_HASH_SEED = (int) System.currentTimeMillis(); 092 093 /** 094 * Returns a hash function implementing the <a 095 * href="https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp">32-bit murmur3 096 * algorithm, x86 variant</a> (little-endian variant), using the given seed value, <b>with a known 097 * bug</b> as described in the deprecation text. 098 * 099 * <p>The C++ equivalent is the MurmurHash3_x86_32 function (Murmur3A), which however does not 100 * have the bug. 101 * 102 * @deprecated This implementation produces incorrect hash values from the {@link 103 * HashFunction#hashString} method if the string contains non-BMP characters. Use {@link 104 * #murmur3_32_fixed(int)} instead. 105 */ 106 @Deprecated 107 public static HashFunction murmur3_32(int seed) { 108 return new Murmur3_32HashFunction(seed, /* supplementaryPlaneFix= */ false); 109 } 110 111 /** 112 * Returns a hash function implementing the <a 113 * href="https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp">32-bit murmur3 114 * algorithm, x86 variant</a> (little-endian variant), using the given seed value, <b>with a known 115 * bug</b> as described in the deprecation text. 116 * 117 * <p>The C++ equivalent is the MurmurHash3_x86_32 function (Murmur3A), which however does not 118 * have the bug. 119 * 120 * @deprecated This implementation produces incorrect hash values from the {@link 121 * HashFunction#hashString} method if the string contains non-BMP characters. Use {@link 122 * #murmur3_32_fixed()} instead. 123 */ 124 @Deprecated 125 public static HashFunction murmur3_32() { 126 return Murmur3_32HashFunction.MURMUR3_32; 127 } 128 129 /** 130 * Returns a hash function implementing the <a 131 * href="https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp">32-bit murmur3 132 * algorithm, x86 variant</a> (little-endian variant), using the given seed value. 133 * 134 * <p>The exact C++ equivalent is the MurmurHash3_x86_32 function (Murmur3A). 135 * 136 * <p>This method is called {@code murmur3_32_fixed} because it fixes a bug in the {@code 137 * HashFunction} returned by the original {@code murmur3_32} method. 138 * 139 * @since 31.0 140 */ 141 public static HashFunction murmur3_32_fixed(int seed) { 142 return new Murmur3_32HashFunction(seed, /* supplementaryPlaneFix= */ true); 143 } 144 145 /** 146 * Returns a hash function implementing the <a 147 * href="https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp">32-bit murmur3 148 * algorithm, x86 variant</a> (little-endian variant), using a seed value of zero. 149 * 150 * <p>The exact C++ equivalent is the MurmurHash3_x86_32 function (Murmur3A). 151 * 152 * <p>This method is called {@code murmur3_32_fixed} because it fixes a bug in the {@code 153 * HashFunction} returned by the original {@code murmur3_32} method. 154 * 155 * @since 31.0 156 */ 157 public static HashFunction murmur3_32_fixed() { 158 return Murmur3_32HashFunction.MURMUR3_32_FIXED; 159 } 160 161 /** 162 * Returns a hash function implementing the <a 163 * href="https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp">128-bit murmur3 164 * algorithm, x64 variant</a> (little-endian variant), using the given seed value. 165 * 166 * <p>The exact C++ equivalent is the MurmurHash3_x64_128 function (Murmur3F). 167 */ 168 public static HashFunction murmur3_128(int seed) { 169 return new Murmur3_128HashFunction(seed); 170 } 171 172 /** 173 * Returns a hash function implementing the <a 174 * href="https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp">128-bit murmur3 175 * algorithm, x64 variant</a> (little-endian variant), using a seed value of zero. 176 * 177 * <p>The exact C++ equivalent is the MurmurHash3_x64_128 function (Murmur3F). 178 */ 179 public static HashFunction murmur3_128() { 180 return Murmur3_128HashFunction.MURMUR3_128; 181 } 182 183 /** 184 * Returns a hash function implementing the <a href="https://131002.net/siphash/">64-bit 185 * SipHash-2-4 algorithm</a> using a seed value of {@code k = 00 01 02 ...}. 186 * 187 * @since 15.0 188 */ 189 public static HashFunction sipHash24() { 190 return SipHashFunction.SIP_HASH_24; 191 } 192 193 /** 194 * Returns a hash function implementing the <a href="https://131002.net/siphash/">64-bit 195 * SipHash-2-4 algorithm</a> using the given seed. 196 * 197 * @since 15.0 198 */ 199 public static HashFunction sipHash24(long k0, long k1) { 200 return new SipHashFunction(2, 4, k0, k1); 201 } 202 203 /** 204 * Returns a hash function implementing the MD5 hash algorithm (128 hash bits). 205 * 206 * @deprecated If you must interoperate with a system that requires MD5, then use this method, 207 * despite its deprecation. But if you can choose your hash function, avoid MD5, which is 208 * neither fast nor secure. As of January 2017, we suggest: 209 * <ul> 210 * <li>For security: 211 * {@link Hashing#sha256} or a higher-level API. 212 * <li>For speed: {@link Hashing#goodFastHash}, though see its docs for caveats. 213 * </ul> 214 */ 215 @Deprecated 216 public static HashFunction md5() { 217 return Md5Holder.MD5; 218 } 219 220 private static class Md5Holder { 221 static final HashFunction MD5 = new MessageDigestHashFunction("MD5", "Hashing.md5()"); 222 } 223 224 /** 225 * Returns a hash function implementing the SHA-1 algorithm (160 hash bits). 226 * 227 * @deprecated If you must interoperate with a system that requires SHA-1, then use this method, 228 * despite its deprecation. But if you can choose your hash function, avoid SHA-1, which is 229 * neither fast nor secure. As of January 2017, we suggest: 230 * <ul> 231 * <li>For security: 232 * {@link Hashing#sha256} or a higher-level API. 233 * <li>For speed: {@link Hashing#goodFastHash}, though see its docs for caveats. 234 * </ul> 235 */ 236 @Deprecated 237 public static HashFunction sha1() { 238 return Sha1Holder.SHA_1; 239 } 240 241 private static class Sha1Holder { 242 static final HashFunction SHA_1 = new MessageDigestHashFunction("SHA-1", "Hashing.sha1()"); 243 } 244 245 /** Returns a hash function implementing the SHA-256 algorithm (256 hash bits). */ 246 public static HashFunction sha256() { 247 return Sha256Holder.SHA_256; 248 } 249 250 private static class Sha256Holder { 251 static final HashFunction SHA_256 = 252 new MessageDigestHashFunction("SHA-256", "Hashing.sha256()"); 253 } 254 255 /** 256 * Returns a hash function implementing the SHA-384 algorithm (384 hash bits). 257 * 258 * @since 19.0 259 */ 260 public static HashFunction sha384() { 261 return Sha384Holder.SHA_384; 262 } 263 264 private static class Sha384Holder { 265 static final HashFunction SHA_384 = 266 new MessageDigestHashFunction("SHA-384", "Hashing.sha384()"); 267 } 268 269 /** Returns a hash function implementing the SHA-512 algorithm (512 hash bits). */ 270 public static HashFunction sha512() { 271 return Sha512Holder.SHA_512; 272 } 273 274 private static class Sha512Holder { 275 static final HashFunction SHA_512 = 276 new MessageDigestHashFunction("SHA-512", "Hashing.sha512()"); 277 } 278 279 /** 280 * Returns a hash function implementing the Message Authentication Code (MAC) algorithm, using the 281 * MD5 (128 hash bits) hash function and the given secret key. 282 * 283 * <p>If you are designing a new system that needs HMAC, prefer {@link #hmacSha256} or other 284 * future-proof algorithms <a 285 * href="https://datatracker.ietf.org/doc/html/rfc6151#section-2.3">over {@code hmacMd5}</a>. 286 * 287 * @param key the secret key 288 * @throws IllegalArgumentException if the given key is inappropriate for initializing this MAC 289 * @since 20.0 290 */ 291 public static HashFunction hmacMd5(Key key) { 292 return new MacHashFunction("HmacMD5", key, hmacToString("hmacMd5", key)); 293 } 294 295 /** 296 * Returns a hash function implementing the Message Authentication Code (MAC) algorithm, using the 297 * MD5 (128 hash bits) hash function and a {@link SecretKeySpec} created from the given byte array 298 * and the MD5 algorithm. 299 * 300 * <p>If you are designing a new system that needs HMAC, prefer {@link #hmacSha256} or other 301 * future-proof algorithms <a 302 * href="https://datatracker.ietf.org/doc/html/rfc6151#section-2.3">over {@code hmacMd5}</a>. 303 * 304 * @param key the key material of the secret key 305 * @since 20.0 306 */ 307 public static HashFunction hmacMd5(byte[] key) { 308 return hmacMd5(new SecretKeySpec(checkNotNull(key), "HmacMD5")); 309 } 310 311 /** 312 * Returns a hash function implementing the Message Authentication Code (MAC) algorithm, using the 313 * SHA-1 (160 hash bits) hash function and the given secret key. 314 * 315 * @param key the secret key 316 * @throws IllegalArgumentException if the given key is inappropriate for initializing this MAC 317 * @since 20.0 318 */ 319 public static HashFunction hmacSha1(Key key) { 320 return new MacHashFunction("HmacSHA1", key, hmacToString("hmacSha1", key)); 321 } 322 323 /** 324 * Returns a hash function implementing the Message Authentication Code (MAC) algorithm, using the 325 * SHA-1 (160 hash bits) hash function and a {@link SecretKeySpec} created from the given byte 326 * array and the SHA-1 algorithm. 327 * 328 * @param key the key material of the secret key 329 * @since 20.0 330 */ 331 public static HashFunction hmacSha1(byte[] key) { 332 return hmacSha1(new SecretKeySpec(checkNotNull(key), "HmacSHA1")); 333 } 334 335 /** 336 * Returns a hash function implementing the Message Authentication Code (MAC) algorithm, using the 337 * SHA-256 (256 hash bits) hash function and the given secret key. 338 * 339 * @param key the secret key 340 * @throws IllegalArgumentException if the given key is inappropriate for initializing this MAC 341 * @since 20.0 342 */ 343 public static HashFunction hmacSha256(Key key) { 344 return new MacHashFunction("HmacSHA256", key, hmacToString("hmacSha256", key)); 345 } 346 347 /** 348 * Returns a hash function implementing the Message Authentication Code (MAC) algorithm, using the 349 * SHA-256 (256 hash bits) hash function and a {@link SecretKeySpec} created from the given byte 350 * array and the SHA-256 algorithm. 351 * 352 * @param key the key material of the secret key 353 * @since 20.0 354 */ 355 public static HashFunction hmacSha256(byte[] key) { 356 return hmacSha256(new SecretKeySpec(checkNotNull(key), "HmacSHA256")); 357 } 358 359 /** 360 * Returns a hash function implementing the Message Authentication Code (MAC) algorithm, using the 361 * SHA-512 (512 hash bits) hash function and the given secret key. 362 * 363 * @param key the secret key 364 * @throws IllegalArgumentException if the given key is inappropriate for initializing this MAC 365 * @since 20.0 366 */ 367 public static HashFunction hmacSha512(Key key) { 368 return new MacHashFunction("HmacSHA512", key, hmacToString("hmacSha512", key)); 369 } 370 371 /** 372 * Returns a hash function implementing the Message Authentication Code (MAC) algorithm, using the 373 * SHA-512 (512 hash bits) hash function and a {@link SecretKeySpec} created from the given byte 374 * array and the SHA-512 algorithm. 375 * 376 * @param key the key material of the secret key 377 * @since 20.0 378 */ 379 public static HashFunction hmacSha512(byte[] key) { 380 return hmacSha512(new SecretKeySpec(checkNotNull(key), "HmacSHA512")); 381 } 382 383 private static String hmacToString(String methodName, Key key) { 384 return "Hashing." 385 + methodName 386 + "(Key[algorithm=" 387 + key.getAlgorithm() 388 + ", format=" 389 + key.getFormat() 390 + "])"; 391 } 392 393 /** 394 * Returns a hash function implementing the CRC32C checksum algorithm (32 hash bits) as described 395 * by RFC 3720, Section 12.1. 396 * 397 * <p>This function is best understood as a <a 398 * href="https://en.wikipedia.org/wiki/Checksum">checksum</a> rather than a true <a 399 * href="https://en.wikipedia.org/wiki/Hash_function">hash function</a>. 400 * 401 * @since 18.0 402 */ 403 public static HashFunction crc32c() { 404 return Crc32cHashFunction.CRC_32_C; 405 } 406 407 /** 408 * Returns a hash function implementing the CRC-32 checksum algorithm (32 hash bits). 409 * 410 * <p>To get the {@code long} value equivalent to {@link Checksum#getValue()} for a {@code 411 * HashCode} produced by this function, use {@link HashCode#padToLong()}. 412 * 413 * <p>This function is best understood as a <a 414 * href="https://en.wikipedia.org/wiki/Checksum">checksum</a> rather than a true <a 415 * href="https://en.wikipedia.org/wiki/Hash_function">hash function</a>. 416 * 417 * @since 14.0 418 */ 419 public static HashFunction crc32() { 420 return ChecksumType.CRC_32.hashFunction; 421 } 422 423 /** 424 * Returns a hash function implementing the Adler-32 checksum algorithm (32 hash bits). 425 * 426 * <p>To get the {@code long} value equivalent to {@link Checksum#getValue()} for a {@code 427 * HashCode} produced by this function, use {@link HashCode#padToLong()}. 428 * 429 * <p>This function is best understood as a <a 430 * href="https://en.wikipedia.org/wiki/Checksum">checksum</a> rather than a true <a 431 * href="https://en.wikipedia.org/wiki/Hash_function">hash function</a>. 432 * 433 * @since 14.0 434 */ 435 public static HashFunction adler32() { 436 return ChecksumType.ADLER_32.hashFunction; 437 } 438 439 @Immutable 440 enum ChecksumType implements ImmutableSupplier<Checksum> { 441 CRC_32("Hashing.crc32()") { 442 @Override 443 public Checksum get() { 444 return new CRC32(); 445 } 446 }, 447 ADLER_32("Hashing.adler32()") { 448 @Override 449 public Checksum get() { 450 return new Adler32(); 451 } 452 }; 453 454 public final HashFunction hashFunction; 455 456 ChecksumType(String toString) { 457 this.hashFunction = new ChecksumHashFunction(this, 32, toString); 458 } 459 } 460 461 /** 462 * Returns a hash function implementing FarmHash's Fingerprint64, an open-source algorithm. 463 * 464 * <p>This is designed for generating persistent fingerprints of strings. It isn't 465 * cryptographically secure, but it produces a high-quality hash with fewer collisions than some 466 * alternatives we've used in the past. 467 * 468 * <p>FarmHash fingerprints are encoded by {@link HashCode#asBytes} in little-endian order. This 469 * means {@link HashCode#asLong} is guaranteed to return the same value that 470 * farmhash::Fingerprint64() would for the same input (when compared using {@link 471 * com.google.common.primitives.UnsignedLongs}'s encoding of 64-bit unsigned numbers). 472 * 473 * <p>This function is best understood as a <a 474 * href="https://en.wikipedia.org/wiki/Fingerprint_(computing)">fingerprint</a> rather than a true 475 * <a href="https://en.wikipedia.org/wiki/Hash_function">hash function</a>. 476 * 477 * @since 20.0 478 */ 479 public static HashFunction farmHashFingerprint64() { 480 return FarmHashFingerprint64.FARMHASH_FINGERPRINT_64; 481 } 482 483 /** 484 * Returns a hash function implementing the Fingerprint2011 hashing function (64 hash bits). 485 * 486 * <p>This is designed for generating persistent fingerprints of strings. It isn't 487 * cryptographically secure, but it produces a high-quality hash with few collisions. Fingerprints 488 * generated using this are byte-wise identical to those created using the C++ version, but note 489 * that this uses unsigned integers (see {@link com.google.common.primitives.UnsignedInts}). 490 * Comparisons between the two should take this into account. 491 * 492 * <p>Fingerprint2011() is a form of Murmur2 on strings up to 32 bytes and a form of CityHash for 493 * longer strings. It could have been one or the other throughout. The main advantage of the 494 * combination is that CityHash has a bunch of special cases for short strings that don't need to 495 * be replicated here. The result will never be 0 or 1. 496 * 497 * <p>This function is best understood as a <a 498 * href="https://en.wikipedia.org/wiki/Fingerprint_(computing)">fingerprint</a> rather than a true 499 * <a href="https://en.wikipedia.org/wiki/Hash_function">hash function</a>. 500 * 501 * @since 31.1 502 */ 503 public static HashFunction fingerprint2011() { 504 return Fingerprint2011.FINGERPRINT_2011; 505 } 506 507 /** 508 * Assigns to {@code hashCode} a "bucket" in the range {@code [0, buckets)}, in a uniform manner 509 * that minimizes the need for remapping as {@code buckets} grows. That is, {@code 510 * consistentHash(h, n)} equals: 511 * 512 * <ul> 513 * <li>{@code n - 1}, with approximate probability {@code 1/n} 514 * <li>{@code consistentHash(h, n - 1)}, otherwise (probability {@code 1 - 1/n}) 515 * </ul> 516 * 517 * <p>This method is suitable for the common use case of dividing work among buckets that meet the 518 * following conditions: 519 * 520 * <ul> 521 * <li>You want to assign the same fraction of inputs to each bucket. 522 * <li>When you reduce the number of buckets, you can accept that the most recently added 523 * buckets will be removed first. More concretely, if you are dividing traffic among tasks, 524 * you can decrease the number of tasks from 15 and 10, killing off the final 5 tasks, and 525 * {@code consistentHash} will handle it. If, however, you are dividing traffic among 526 * servers {@code alpha}, {@code bravo}, and {@code charlie} and you occasionally need to 527 * take each of the servers offline, {@code consistentHash} will be a poor fit: It provides 528 * no way for you to specify which of the three buckets is disappearing. Thus, if your 529 * buckets change from {@code [alpha, bravo, charlie]} to {@code [bravo, charlie]}, it will 530 * assign all the old {@code alpha} traffic to {@code bravo} and all the old {@code bravo} 531 * traffic to {@code charlie}, rather than letting {@code bravo} keep its traffic. 532 * </ul> 533 * 534 * <p>See the <a href="http://en.wikipedia.org/wiki/Consistent_hashing">Wikipedia article on 535 * consistent hashing</a> for more information. 536 */ 537 public static int consistentHash(HashCode hashCode, int buckets) { 538 return consistentHash(hashCode.padToLong(), buckets); 539 } 540 541 /** 542 * Assigns to {@code input} a "bucket" in the range {@code [0, buckets)}, in a uniform manner that 543 * minimizes the need for remapping as {@code buckets} grows. That is, {@code consistentHash(h, 544 * n)} equals: 545 * 546 * <ul> 547 * <li>{@code n - 1}, with approximate probability {@code 1/n} 548 * <li>{@code consistentHash(h, n - 1)}, otherwise (probability {@code 1 - 1/n}) 549 * </ul> 550 * 551 * <p>This method is suitable for the common use case of dividing work among buckets that meet the 552 * following conditions: 553 * 554 * <ul> 555 * <li>You want to assign the same fraction of inputs to each bucket. 556 * <li>When you reduce the number of buckets, you can accept that the most recently added 557 * buckets will be removed first. More concretely, if you are dividing traffic among tasks, 558 * you can decrease the number of tasks from 15 and 10, killing off the final 5 tasks, and 559 * {@code consistentHash} will handle it. If, however, you are dividing traffic among 560 * servers {@code alpha}, {@code bravo}, and {@code charlie} and you occasionally need to 561 * take each of the servers offline, {@code consistentHash} will be a poor fit: It provides 562 * no way for you to specify which of the three buckets is disappearing. Thus, if your 563 * buckets change from {@code [alpha, bravo, charlie]} to {@code [bravo, charlie]}, it will 564 * assign all the old {@code alpha} traffic to {@code bravo} and all the old {@code bravo} 565 * traffic to {@code charlie}, rather than letting {@code bravo} keep its traffic. 566 * </ul> 567 * 568 * <p>See the <a href="http://en.wikipedia.org/wiki/Consistent_hashing">Wikipedia article on 569 * consistent hashing</a> for more information. 570 */ 571 public static int consistentHash(long input, int buckets) { 572 checkArgument(buckets > 0, "buckets must be positive: %s", buckets); 573 LinearCongruentialGenerator generator = new LinearCongruentialGenerator(input); 574 int candidate = 0; 575 int next; 576 577 // Jump from bucket to bucket until we go out of range 578 while (true) { 579 next = (int) ((candidate + 1) / generator.nextDouble()); 580 if (next >= 0 && next < buckets) { 581 candidate = next; 582 } else { 583 return candidate; 584 } 585 } 586 } 587 588 /** 589 * Returns a hash code, having the same bit length as each of the input hash codes, that combines 590 * the information of these hash codes in an ordered fashion. That is, whenever two equal hash 591 * codes are produced by two calls to this method, it is <i>as likely as possible</i> that each 592 * was computed from the <i>same</i> input hash codes in the <i>same</i> order. 593 * 594 * @throws IllegalArgumentException if {@code hashCodes} is empty, or the hash codes do not all 595 * have the same bit length 596 */ 597 public static HashCode combineOrdered(Iterable<HashCode> hashCodes) { 598 Iterator<HashCode> iterator = hashCodes.iterator(); 599 checkArgument(iterator.hasNext(), "Must be at least 1 hash code to combine."); 600 int bits = iterator.next().bits(); 601 byte[] resultBytes = new byte[bits / 8]; 602 for (HashCode hashCode : hashCodes) { 603 byte[] nextBytes = hashCode.asBytes(); 604 checkArgument( 605 nextBytes.length == resultBytes.length, "All hashcodes must have the same bit length."); 606 for (int i = 0; i < nextBytes.length; i++) { 607 resultBytes[i] = (byte) (resultBytes[i] * 37 ^ nextBytes[i]); 608 } 609 } 610 return HashCode.fromBytesNoCopy(resultBytes); 611 } 612 613 /** 614 * Returns a hash code, having the same bit length as each of the input hash codes, that combines 615 * the information of these hash codes in an unordered fashion. That is, whenever two equal hash 616 * codes are produced by two calls to this method, it is <i>as likely as possible</i> that each 617 * was computed from the <i>same</i> input hash codes in <i>some</i> order. 618 * 619 * @throws IllegalArgumentException if {@code hashCodes} is empty, or the hash codes do not all 620 * have the same bit length 621 */ 622 public static HashCode combineUnordered(Iterable<HashCode> hashCodes) { 623 Iterator<HashCode> iterator = hashCodes.iterator(); 624 checkArgument(iterator.hasNext(), "Must be at least 1 hash code to combine."); 625 byte[] resultBytes = new byte[iterator.next().bits() / 8]; 626 for (HashCode hashCode : hashCodes) { 627 byte[] nextBytes = hashCode.asBytes(); 628 checkArgument( 629 nextBytes.length == resultBytes.length, "All hashcodes must have the same bit length."); 630 for (int i = 0; i < nextBytes.length; i++) { 631 resultBytes[i] += nextBytes[i]; 632 } 633 } 634 return HashCode.fromBytesNoCopy(resultBytes); 635 } 636 637 /** Checks that the passed argument is positive, and ceils it to a multiple of 32. */ 638 static int checkPositiveAndMakeMultipleOf32(int bits) { 639 checkArgument(bits > 0, "Number of bits must be positive"); 640 return (bits + 31) & ~31; 641 } 642 643 /** 644 * Returns a hash function which computes its hash code by concatenating the hash codes of the 645 * underlying hash functions together. This can be useful if you need to generate hash codes of a 646 * specific length. 647 * 648 * <p>For example, if you need 1024-bit hash codes, you could join two {@link Hashing#sha512} hash 649 * functions together: {@code Hashing.concatenating(Hashing.sha512(), Hashing.sha512())}. 650 * 651 * @since 19.0 652 */ 653 public static HashFunction concatenating( 654 HashFunction first, HashFunction second, HashFunction... rest) { 655 // We can't use Lists.asList() here because there's no hash->collect dependency 656 List<HashFunction> list = new ArrayList<>(); 657 list.add(first); 658 list.add(second); 659 Collections.addAll(list, rest); 660 return new ConcatenatedHashFunction(list.toArray(new HashFunction[0])); 661 } 662 663 /** 664 * Returns a hash function which computes its hash code by concatenating the hash codes of the 665 * underlying hash functions together. This can be useful if you need to generate hash codes of a 666 * specific length. 667 * 668 * <p>For example, if you need 1024-bit hash codes, you could join two {@link Hashing#sha512} hash 669 * functions together: {@code Hashing.concatenating(Hashing.sha512(), Hashing.sha512())}. 670 * 671 * @since 19.0 672 */ 673 public static HashFunction concatenating(Iterable<HashFunction> hashFunctions) { 674 checkNotNull(hashFunctions); 675 // We can't use Iterables.toArray() here because there's no hash->collect dependency 676 List<HashFunction> list = new ArrayList<>(); 677 for (HashFunction hashFunction : hashFunctions) { 678 list.add(hashFunction); 679 } 680 checkArgument(!list.isEmpty(), "number of hash functions (%s) must be > 0", list.size()); 681 return new ConcatenatedHashFunction(list.toArray(new HashFunction[0])); 682 } 683 684 private static final class ConcatenatedHashFunction extends AbstractCompositeHashFunction { 685 686 private ConcatenatedHashFunction(HashFunction... functions) { 687 super(functions); 688 for (HashFunction function : functions) { 689 checkArgument( 690 function.bits() % 8 == 0, 691 "the number of bits (%s) in hashFunction (%s) must be divisible by 8", 692 function.bits(), 693 function); 694 } 695 } 696 697 @Override 698 HashCode makeHash(Hasher[] hashers) { 699 byte[] bytes = new byte[bits() / 8]; 700 int i = 0; 701 for (Hasher hasher : hashers) { 702 HashCode newHash = hasher.hash(); 703 i += newHash.writeBytesTo(bytes, i, newHash.bits() / 8); 704 } 705 return HashCode.fromBytesNoCopy(bytes); 706 } 707 708 @Override 709 public int bits() { 710 int bitSum = 0; 711 for (HashFunction function : functions) { 712 bitSum += function.bits(); 713 } 714 return bitSum; 715 } 716 717 @Override 718 public boolean equals(@CheckForNull Object object) { 719 if (object instanceof ConcatenatedHashFunction) { 720 ConcatenatedHashFunction other = (ConcatenatedHashFunction) object; 721 return Arrays.equals(functions, other.functions); 722 } 723 return false; 724 } 725 726 @Override 727 public int hashCode() { 728 return Arrays.hashCode(functions); 729 } 730 } 731 732 /** 733 * Linear CongruentialGenerator to use for consistent hashing. See 734 * http://en.wikipedia.org/wiki/Linear_congruential_generator 735 */ 736 private static final class LinearCongruentialGenerator { 737 private long state; 738 739 public LinearCongruentialGenerator(long seed) { 740 this.state = seed; 741 } 742 743 public double nextDouble() { 744 state = 2862933555777941757L * state + 1; 745 return ((double) ((int) (state >>> 33) + 1)) / 0x1.0p31; 746 } 747 } 748 749 private Hashing() {} 750}