001/* 002 * Copyright (C) 2011 The Guava Authors 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 005 * in compliance with the License. You may obtain a copy of the License at 006 * 007 * http://www.apache.org/licenses/LICENSE-2.0 008 * 009 * Unless required by applicable law or agreed to in writing, software distributed under the License 010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 011 * or implied. See the License for the specific language governing permissions and limitations under 012 * the License. 013 */ 014 015package com.google.common.hash; 016 017import static com.google.common.base.Preconditions.checkArgument; 018import static com.google.common.base.Preconditions.checkNotNull; 019 020import com.google.errorprone.annotations.Immutable; 021import java.security.Key; 022import java.util.ArrayList; 023import java.util.Arrays; 024import java.util.Collections; 025import java.util.Iterator; 026import java.util.List; 027import java.util.zip.Adler32; 028import java.util.zip.CRC32; 029import java.util.zip.Checksum; 030import javax.annotation.CheckForNull; 031import javax.crypto.spec.SecretKeySpec; 032 033/** 034 * Static methods to obtain {@link HashFunction} instances, and other static hashing-related 035 * utilities. 036 * 037 * <p>A comparison of the various hash functions can be found <a 038 * href="http://goo.gl/jS7HH">here</a>. 039 * 040 * @author Kevin Bourrillion 041 * @author Dimitris Andreou 042 * @author Kurt Alfred Kluever 043 * @since 11.0 044 */ 045@ElementTypesAreNonnullByDefault 046public final class Hashing { 047 /** 048 * Returns a general-purpose, <b>temporary-use</b>, non-cryptographic hash function. The algorithm 049 * the returned function implements is unspecified and subject to change without notice. 050 * 051 * <p><b>Warning:</b> a new random seed for these functions is chosen each time the {@code 052 * Hashing} class is loaded. <b>Do not use this method</b> if hash codes may escape the current 053 * process in any way, for example being sent over RPC, or saved to disk. For a general-purpose, 054 * non-cryptographic hash function that will never change behavior, we suggest {@link 055 * #murmur3_128}. 056 * 057 * <p>Repeated calls to this method on the same loaded {@code Hashing} class, using the same value 058 * for {@code minimumBits}, will return identically-behaving {@link HashFunction} instances. 059 * 060 * @param minimumBits a positive integer. This can be arbitrarily large. The returned {@link 061 * HashFunction} instance may use memory proportional to this integer. 062 * @return a hash function, described above, that produces hash codes of length {@code 063 * minimumBits} or greater 064 */ 065 public static HashFunction goodFastHash(int minimumBits) { 066 int bits = checkPositiveAndMakeMultipleOf32(minimumBits); 067 068 if (bits == 32) { 069 return Murmur3_32HashFunction.GOOD_FAST_HASH_32; 070 } 071 if (bits <= 128) { 072 return Murmur3_128HashFunction.GOOD_FAST_HASH_128; 073 } 074 075 // Otherwise, join together some 128-bit murmur3s 076 int hashFunctionsNeeded = (bits + 127) / 128; 077 HashFunction[] hashFunctions = new HashFunction[hashFunctionsNeeded]; 078 hashFunctions[0] = Murmur3_128HashFunction.GOOD_FAST_HASH_128; 079 int seed = GOOD_FAST_HASH_SEED; 080 for (int i = 1; i < hashFunctionsNeeded; i++) { 081 seed += 1500450271; // a prime; shouldn't matter 082 hashFunctions[i] = murmur3_128(seed); 083 } 084 return new ConcatenatedHashFunction(hashFunctions); 085 } 086 087 /** 088 * Used to randomize {@link #goodFastHash} instances, so that programs which persist anything 089 * dependent on the hash codes they produce will fail sooner. 090 */ 091 @SuppressWarnings("GoodTime") // reading system time without TimeSource 092 static final int GOOD_FAST_HASH_SEED = (int) System.currentTimeMillis(); 093 094 /** 095 * Returns a hash function implementing the <a 096 * href="https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp">32-bit murmur3 097 * algorithm, x86 variant</a> (little-endian variant), using the given seed value, <b>with a known 098 * bug</b> as described in the deprecation text. 099 * 100 * <p>The C++ equivalent is the MurmurHash3_x86_32 function (Murmur3A), which however does not 101 * have the bug. 102 * 103 * @deprecated This implementation produces incorrect hash values from the {@link 104 * HashFunction#hashString} method if the string contains non-BMP characters. Use {@link 105 * #murmur3_32_fixed(int)} instead. 106 */ 107 @Deprecated 108 public static HashFunction murmur3_32(int seed) { 109 return new Murmur3_32HashFunction(seed, /* supplementaryPlaneFix= */ false); 110 } 111 112 /** 113 * Returns a hash function implementing the <a 114 * href="https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp">32-bit murmur3 115 * algorithm, x86 variant</a> (little-endian variant), using the given seed value, <b>with a known 116 * bug</b> as described in the deprecation text. 117 * 118 * <p>The C++ equivalent is the MurmurHash3_x86_32 function (Murmur3A), which however does not 119 * have the bug. 120 * 121 * @deprecated This implementation produces incorrect hash values from the {@link 122 * HashFunction#hashString} method if the string contains non-BMP characters. Use {@link 123 * #murmur3_32_fixed()} instead. 124 */ 125 @Deprecated 126 public static HashFunction murmur3_32() { 127 return Murmur3_32HashFunction.MURMUR3_32; 128 } 129 130 /** 131 * Returns a hash function implementing the <a 132 * href="https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp">32-bit murmur3 133 * algorithm, x86 variant</a> (little-endian variant), using the given seed value. 134 * 135 * <p>The exact C++ equivalent is the MurmurHash3_x86_32 function (Murmur3A). 136 * 137 * <p>This method is called {@code murmur3_32_fixed} because it fixes a bug in the {@code 138 * HashFunction} returned by the original {@code murmur3_32} method. 139 * 140 * @since 31.0 141 */ 142 public static HashFunction murmur3_32_fixed(int seed) { 143 return new Murmur3_32HashFunction(seed, /* supplementaryPlaneFix= */ true); 144 } 145 146 /** 147 * Returns a hash function implementing the <a 148 * href="https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp">32-bit murmur3 149 * algorithm, x86 variant</a> (little-endian variant), using a seed value of zero. 150 * 151 * <p>The exact C++ equivalent is the MurmurHash3_x86_32 function (Murmur3A). 152 * 153 * <p>This method is called {@code murmur3_32_fixed} because it fixes a bug in the {@code 154 * HashFunction} returned by the original {@code murmur3_32} method. 155 * 156 * @since 31.0 157 */ 158 public static HashFunction murmur3_32_fixed() { 159 return Murmur3_32HashFunction.MURMUR3_32_FIXED; 160 } 161 162 /** 163 * Returns a hash function implementing the <a 164 * href="https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp">128-bit murmur3 165 * algorithm, x64 variant</a> (little-endian variant), using the given seed value. 166 * 167 * <p>The exact C++ equivalent is the MurmurHash3_x64_128 function (Murmur3F). 168 */ 169 public static HashFunction murmur3_128(int seed) { 170 return new Murmur3_128HashFunction(seed); 171 } 172 173 /** 174 * Returns a hash function implementing the <a 175 * href="https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp">128-bit murmur3 176 * algorithm, x64 variant</a> (little-endian variant), using a seed value of zero. 177 * 178 * <p>The exact C++ equivalent is the MurmurHash3_x64_128 function (Murmur3F). 179 */ 180 public static HashFunction murmur3_128() { 181 return Murmur3_128HashFunction.MURMUR3_128; 182 } 183 184 /** 185 * Returns a hash function implementing the <a href="https://131002.net/siphash/">64-bit 186 * SipHash-2-4 algorithm</a> using a seed value of {@code k = 00 01 02 ...}. 187 * 188 * @since 15.0 189 */ 190 public static HashFunction sipHash24() { 191 return SipHashFunction.SIP_HASH_24; 192 } 193 194 /** 195 * Returns a hash function implementing the <a href="https://131002.net/siphash/">64-bit 196 * SipHash-2-4 algorithm</a> using the given seed. 197 * 198 * @since 15.0 199 */ 200 public static HashFunction sipHash24(long k0, long k1) { 201 return new SipHashFunction(2, 4, k0, k1); 202 } 203 204 /** 205 * Returns a hash function implementing the MD5 hash algorithm (128 hash bits). 206 * 207 * @deprecated If you must interoperate with a system that requires MD5, then use this method, 208 * despite its deprecation. But if you can choose your hash function, avoid MD5, which is 209 * neither fast nor secure. As of January 2017, we suggest: 210 * <ul> 211 * <li>For security: 212 * {@link Hashing#sha256} or a higher-level API. 213 * <li>For speed: {@link Hashing#goodFastHash}, though see its docs for caveats. 214 * </ul> 215 */ 216 @Deprecated 217 public static HashFunction md5() { 218 return Md5Holder.MD5; 219 } 220 221 private static class Md5Holder { 222 static final HashFunction MD5 = new MessageDigestHashFunction("MD5", "Hashing.md5()"); 223 } 224 225 /** 226 * Returns a hash function implementing the SHA-1 algorithm (160 hash bits). 227 * 228 * @deprecated If you must interoperate with a system that requires SHA-1, then use this method, 229 * despite its deprecation. But if you can choose your hash function, avoid SHA-1, which is 230 * neither fast nor secure. As of January 2017, we suggest: 231 * <ul> 232 * <li>For security: 233 * {@link Hashing#sha256} or a higher-level API. 234 * <li>For speed: {@link Hashing#goodFastHash}, though see its docs for caveats. 235 * </ul> 236 */ 237 @Deprecated 238 public static HashFunction sha1() { 239 return Sha1Holder.SHA_1; 240 } 241 242 private static class Sha1Holder { 243 static final HashFunction SHA_1 = new MessageDigestHashFunction("SHA-1", "Hashing.sha1()"); 244 } 245 246 /** Returns a hash function implementing the SHA-256 algorithm (256 hash bits). */ 247 public static HashFunction sha256() { 248 return Sha256Holder.SHA_256; 249 } 250 251 private static class Sha256Holder { 252 static final HashFunction SHA_256 = 253 new MessageDigestHashFunction("SHA-256", "Hashing.sha256()"); 254 } 255 256 /** 257 * Returns a hash function implementing the SHA-384 algorithm (384 hash bits). 258 * 259 * @since 19.0 260 */ 261 public static HashFunction sha384() { 262 return Sha384Holder.SHA_384; 263 } 264 265 private static class Sha384Holder { 266 static final HashFunction SHA_384 = 267 new MessageDigestHashFunction("SHA-384", "Hashing.sha384()"); 268 } 269 270 /** Returns a hash function implementing the SHA-512 algorithm (512 hash bits). */ 271 public static HashFunction sha512() { 272 return Sha512Holder.SHA_512; 273 } 274 275 private static class Sha512Holder { 276 static final HashFunction SHA_512 = 277 new MessageDigestHashFunction("SHA-512", "Hashing.sha512()"); 278 } 279 280 /** 281 * Returns a hash function implementing the Message Authentication Code (MAC) algorithm, using the 282 * MD5 (128 hash bits) hash function and the given secret key. 283 * 284 * <p>If you are designing a new system that needs HMAC, prefer {@link #hmacSha256} or other 285 * future-proof algorithms <a 286 * href="https://datatracker.ietf.org/doc/html/rfc6151#section-2.3">over {@code hmacMd5}</a>. 287 * 288 * @param key the secret key 289 * @throws IllegalArgumentException if the given key is inappropriate for initializing this MAC 290 * @since 20.0 291 */ 292 public static HashFunction hmacMd5(Key key) { 293 return new MacHashFunction("HmacMD5", key, hmacToString("hmacMd5", key)); 294 } 295 296 /** 297 * Returns a hash function implementing the Message Authentication Code (MAC) algorithm, using the 298 * MD5 (128 hash bits) hash function and a {@link SecretKeySpec} created from the given byte array 299 * and the MD5 algorithm. 300 * 301 * <p>If you are designing a new system that needs HMAC, prefer {@link #hmacSha256} or other 302 * future-proof algorithms <a 303 * href="https://datatracker.ietf.org/doc/html/rfc6151#section-2.3">over {@code hmacMd5}</a>. 304 * 305 * @param key the key material of the secret key 306 * @since 20.0 307 */ 308 public static HashFunction hmacMd5(byte[] key) { 309 return hmacMd5(new SecretKeySpec(checkNotNull(key), "HmacMD5")); 310 } 311 312 /** 313 * Returns a hash function implementing the Message Authentication Code (MAC) algorithm, using the 314 * SHA-1 (160 hash bits) hash function and the given secret key. 315 * 316 * @param key the secret key 317 * @throws IllegalArgumentException if the given key is inappropriate for initializing this MAC 318 * @since 20.0 319 */ 320 public static HashFunction hmacSha1(Key key) { 321 return new MacHashFunction("HmacSHA1", key, hmacToString("hmacSha1", key)); 322 } 323 324 /** 325 * Returns a hash function implementing the Message Authentication Code (MAC) algorithm, using the 326 * SHA-1 (160 hash bits) hash function and a {@link SecretKeySpec} created from the given byte 327 * array and the SHA-1 algorithm. 328 * 329 * @param key the key material of the secret key 330 * @since 20.0 331 */ 332 public static HashFunction hmacSha1(byte[] key) { 333 return hmacSha1(new SecretKeySpec(checkNotNull(key), "HmacSHA1")); 334 } 335 336 /** 337 * Returns a hash function implementing the Message Authentication Code (MAC) algorithm, using the 338 * SHA-256 (256 hash bits) hash function and the given secret key. 339 * 340 * @param key the secret key 341 * @throws IllegalArgumentException if the given key is inappropriate for initializing this MAC 342 * @since 20.0 343 */ 344 public static HashFunction hmacSha256(Key key) { 345 return new MacHashFunction("HmacSHA256", key, hmacToString("hmacSha256", key)); 346 } 347 348 /** 349 * Returns a hash function implementing the Message Authentication Code (MAC) algorithm, using the 350 * SHA-256 (256 hash bits) hash function and a {@link SecretKeySpec} created from the given byte 351 * array and the SHA-256 algorithm. 352 * 353 * @param key the key material of the secret key 354 * @since 20.0 355 */ 356 public static HashFunction hmacSha256(byte[] key) { 357 return hmacSha256(new SecretKeySpec(checkNotNull(key), "HmacSHA256")); 358 } 359 360 /** 361 * Returns a hash function implementing the Message Authentication Code (MAC) algorithm, using the 362 * SHA-512 (512 hash bits) hash function and the given secret key. 363 * 364 * @param key the secret key 365 * @throws IllegalArgumentException if the given key is inappropriate for initializing this MAC 366 * @since 20.0 367 */ 368 public static HashFunction hmacSha512(Key key) { 369 return new MacHashFunction("HmacSHA512", key, hmacToString("hmacSha512", key)); 370 } 371 372 /** 373 * Returns a hash function implementing the Message Authentication Code (MAC) algorithm, using the 374 * SHA-512 (512 hash bits) hash function and a {@link SecretKeySpec} created from the given byte 375 * array and the SHA-512 algorithm. 376 * 377 * @param key the key material of the secret key 378 * @since 20.0 379 */ 380 public static HashFunction hmacSha512(byte[] key) { 381 return hmacSha512(new SecretKeySpec(checkNotNull(key), "HmacSHA512")); 382 } 383 384 private static String hmacToString(String methodName, Key key) { 385 return "Hashing." 386 + methodName 387 + "(Key[algorithm=" 388 + key.getAlgorithm() 389 + ", format=" 390 + key.getFormat() 391 + "])"; 392 } 393 394 /** 395 * Returns a hash function implementing the CRC32C checksum algorithm (32 hash bits) as described 396 * by RFC 3720, Section 12.1. 397 * 398 * <p>This function is best understood as a <a 399 * href="https://en.wikipedia.org/wiki/Checksum">checksum</a> rather than a true <a 400 * href="https://en.wikipedia.org/wiki/Hash_function">hash function</a>. 401 * 402 * @since 18.0 403 */ 404 public static HashFunction crc32c() { 405 return Crc32cHashFunction.CRC_32_C; 406 } 407 408 /** 409 * Returns a hash function implementing the CRC-32 checksum algorithm (32 hash bits). 410 * 411 * <p>To get the {@code long} value equivalent to {@link Checksum#getValue()} for a {@code 412 * HashCode} produced by this function, use {@link HashCode#padToLong()}. 413 * 414 * <p>This function is best understood as a <a 415 * href="https://en.wikipedia.org/wiki/Checksum">checksum</a> rather than a true <a 416 * href="https://en.wikipedia.org/wiki/Hash_function">hash function</a>. 417 * 418 * @since 14.0 419 */ 420 public static HashFunction crc32() { 421 return ChecksumType.CRC_32.hashFunction; 422 } 423 424 /** 425 * Returns a hash function implementing the Adler-32 checksum algorithm (32 hash bits). 426 * 427 * <p>To get the {@code long} value equivalent to {@link Checksum#getValue()} for a {@code 428 * HashCode} produced by this function, use {@link HashCode#padToLong()}. 429 * 430 * <p>This function is best understood as a <a 431 * href="https://en.wikipedia.org/wiki/Checksum">checksum</a> rather than a true <a 432 * href="https://en.wikipedia.org/wiki/Hash_function">hash function</a>. 433 * 434 * @since 14.0 435 */ 436 public static HashFunction adler32() { 437 return ChecksumType.ADLER_32.hashFunction; 438 } 439 440 @Immutable 441 enum ChecksumType implements ImmutableSupplier<Checksum> { 442 CRC_32("Hashing.crc32()") { 443 @Override 444 public Checksum get() { 445 return new CRC32(); 446 } 447 }, 448 ADLER_32("Hashing.adler32()") { 449 @Override 450 public Checksum get() { 451 return new Adler32(); 452 } 453 }; 454 455 public final HashFunction hashFunction; 456 457 ChecksumType(String toString) { 458 this.hashFunction = new ChecksumHashFunction(this, 32, toString); 459 } 460 } 461 462 /** 463 * Returns a hash function implementing FarmHash's Fingerprint64, an open-source algorithm. 464 * 465 * <p>This is designed for generating persistent fingerprints of strings. It isn't 466 * cryptographically secure, but it produces a high-quality hash with fewer collisions than some 467 * alternatives we've used in the past. 468 * 469 * <p>FarmHash fingerprints are encoded by {@link HashCode#asBytes} in little-endian order. This 470 * means {@link HashCode#asLong} is guaranteed to return the same value that 471 * farmhash::Fingerprint64() would for the same input (when compared using {@link 472 * com.google.common.primitives.UnsignedLongs}'s encoding of 64-bit unsigned numbers). 473 * 474 * <p>This function is best understood as a <a 475 * href="https://en.wikipedia.org/wiki/Fingerprint_(computing)">fingerprint</a> rather than a true 476 * <a href="https://en.wikipedia.org/wiki/Hash_function">hash function</a>. 477 * 478 * @since 20.0 479 */ 480 public static HashFunction farmHashFingerprint64() { 481 return FarmHashFingerprint64.FARMHASH_FINGERPRINT_64; 482 } 483 484 /** 485 * Returns a hash function implementing the Fingerprint2011 hashing function (64 hash bits). 486 * 487 * <p>This is designed for generating persistent fingerprints of strings. It isn't 488 * cryptographically secure, but it produces a high-quality hash with few collisions. Fingerprints 489 * generated using this are byte-wise identical to those created using the C++ version, but note 490 * that this uses unsigned integers (see {@link com.google.common.primitives.UnsignedInts}). 491 * Comparisons between the two should take this into account. 492 * 493 * <p>Fingerprint2011() is a form of Murmur2 on strings up to 32 bytes and a form of CityHash for 494 * longer strings. It could have been one or the other throughout. The main advantage of the 495 * combination is that CityHash has a bunch of special cases for short strings that don't need to 496 * be replicated here. The result will never be 0 or 1. 497 * 498 * <p>This function is best understood as a <a 499 * href="https://en.wikipedia.org/wiki/Fingerprint_(computing)">fingerprint</a> rather than a true 500 * <a href="https://en.wikipedia.org/wiki/Hash_function">hash function</a>. 501 * 502 * @since 31.1 503 */ 504 public static HashFunction fingerprint2011() { 505 return Fingerprint2011.FINGERPRINT_2011; 506 } 507 508 /** 509 * Assigns to {@code hashCode} a "bucket" in the range {@code [0, buckets)}, in a uniform manner 510 * that minimizes the need for remapping as {@code buckets} grows. That is, {@code 511 * consistentHash(h, n)} equals: 512 * 513 * <ul> 514 * <li>{@code n - 1}, with approximate probability {@code 1/n} 515 * <li>{@code consistentHash(h, n - 1)}, otherwise (probability {@code 1 - 1/n}) 516 * </ul> 517 * 518 * <p>This method is suitable for the common use case of dividing work among buckets that meet the 519 * following conditions: 520 * 521 * <ul> 522 * <li>You want to assign the same fraction of inputs to each bucket. 523 * <li>When you reduce the number of buckets, you can accept that the most recently added 524 * buckets will be removed first. More concretely, if you are dividing traffic among tasks, 525 * you can decrease the number of tasks from 15 and 10, killing off the final 5 tasks, and 526 * {@code consistentHash} will handle it. If, however, you are dividing traffic among 527 * servers {@code alpha}, {@code bravo}, and {@code charlie} and you occasionally need to 528 * take each of the servers offline, {@code consistentHash} will be a poor fit: It provides 529 * no way for you to specify which of the three buckets is disappearing. Thus, if your 530 * buckets change from {@code [alpha, bravo, charlie]} to {@code [bravo, charlie]}, it will 531 * assign all the old {@code alpha} traffic to {@code bravo} and all the old {@code bravo} 532 * traffic to {@code charlie}, rather than letting {@code bravo} keep its traffic. 533 * </ul> 534 * 535 * <p>See the <a href="http://en.wikipedia.org/wiki/Consistent_hashing">Wikipedia article on 536 * consistent hashing</a> for more information. 537 */ 538 public static int consistentHash(HashCode hashCode, int buckets) { 539 return consistentHash(hashCode.padToLong(), buckets); 540 } 541 542 /** 543 * Assigns to {@code input} a "bucket" in the range {@code [0, buckets)}, in a uniform manner that 544 * minimizes the need for remapping as {@code buckets} grows. That is, {@code consistentHash(h, 545 * n)} equals: 546 * 547 * <ul> 548 * <li>{@code n - 1}, with approximate probability {@code 1/n} 549 * <li>{@code consistentHash(h, n - 1)}, otherwise (probability {@code 1 - 1/n}) 550 * </ul> 551 * 552 * <p>This method is suitable for the common use case of dividing work among buckets that meet the 553 * following conditions: 554 * 555 * <ul> 556 * <li>You want to assign the same fraction of inputs to each bucket. 557 * <li>When you reduce the number of buckets, you can accept that the most recently added 558 * buckets will be removed first. More concretely, if you are dividing traffic among tasks, 559 * you can decrease the number of tasks from 15 and 10, killing off the final 5 tasks, and 560 * {@code consistentHash} will handle it. If, however, you are dividing traffic among 561 * servers {@code alpha}, {@code bravo}, and {@code charlie} and you occasionally need to 562 * take each of the servers offline, {@code consistentHash} will be a poor fit: It provides 563 * no way for you to specify which of the three buckets is disappearing. Thus, if your 564 * buckets change from {@code [alpha, bravo, charlie]} to {@code [bravo, charlie]}, it will 565 * assign all the old {@code alpha} traffic to {@code bravo} and all the old {@code bravo} 566 * traffic to {@code charlie}, rather than letting {@code bravo} keep its traffic. 567 * </ul> 568 * 569 * <p>See the <a href="http://en.wikipedia.org/wiki/Consistent_hashing">Wikipedia article on 570 * consistent hashing</a> for more information. 571 */ 572 public static int consistentHash(long input, int buckets) { 573 checkArgument(buckets > 0, "buckets must be positive: %s", buckets); 574 LinearCongruentialGenerator generator = new LinearCongruentialGenerator(input); 575 int candidate = 0; 576 int next; 577 578 // Jump from bucket to bucket until we go out of range 579 while (true) { 580 next = (int) ((candidate + 1) / generator.nextDouble()); 581 if (next >= 0 && next < buckets) { 582 candidate = next; 583 } else { 584 return candidate; 585 } 586 } 587 } 588 589 /** 590 * Returns a hash code, having the same bit length as each of the input hash codes, that combines 591 * the information of these hash codes in an ordered fashion. That is, whenever two equal hash 592 * codes are produced by two calls to this method, it is <i>as likely as possible</i> that each 593 * was computed from the <i>same</i> input hash codes in the <i>same</i> order. 594 * 595 * @throws IllegalArgumentException if {@code hashCodes} is empty, or the hash codes do not all 596 * have the same bit length 597 */ 598 public static HashCode combineOrdered(Iterable<HashCode> hashCodes) { 599 Iterator<HashCode> iterator = hashCodes.iterator(); 600 checkArgument(iterator.hasNext(), "Must be at least 1 hash code to combine."); 601 int bits = iterator.next().bits(); 602 byte[] resultBytes = new byte[bits / 8]; 603 for (HashCode hashCode : hashCodes) { 604 byte[] nextBytes = hashCode.asBytes(); 605 checkArgument( 606 nextBytes.length == resultBytes.length, "All hashcodes must have the same bit length."); 607 for (int i = 0; i < nextBytes.length; i++) { 608 resultBytes[i] = (byte) (resultBytes[i] * 37 ^ nextBytes[i]); 609 } 610 } 611 return HashCode.fromBytesNoCopy(resultBytes); 612 } 613 614 /** 615 * Returns a hash code, having the same bit length as each of the input hash codes, that combines 616 * the information of these hash codes in an unordered fashion. That is, whenever two equal hash 617 * codes are produced by two calls to this method, it is <i>as likely as possible</i> that each 618 * was computed from the <i>same</i> input hash codes in <i>some</i> order. 619 * 620 * @throws IllegalArgumentException if {@code hashCodes} is empty, or the hash codes do not all 621 * have the same bit length 622 */ 623 public static HashCode combineUnordered(Iterable<HashCode> hashCodes) { 624 Iterator<HashCode> iterator = hashCodes.iterator(); 625 checkArgument(iterator.hasNext(), "Must be at least 1 hash code to combine."); 626 byte[] resultBytes = new byte[iterator.next().bits() / 8]; 627 for (HashCode hashCode : hashCodes) { 628 byte[] nextBytes = hashCode.asBytes(); 629 checkArgument( 630 nextBytes.length == resultBytes.length, "All hashcodes must have the same bit length."); 631 for (int i = 0; i < nextBytes.length; i++) { 632 resultBytes[i] += nextBytes[i]; 633 } 634 } 635 return HashCode.fromBytesNoCopy(resultBytes); 636 } 637 638 /** Checks that the passed argument is positive, and ceils it to a multiple of 32. */ 639 static int checkPositiveAndMakeMultipleOf32(int bits) { 640 checkArgument(bits > 0, "Number of bits must be positive"); 641 return (bits + 31) & ~31; 642 } 643 644 /** 645 * Returns a hash function which computes its hash code by concatenating the hash codes of the 646 * underlying hash functions together. This can be useful if you need to generate hash codes of a 647 * specific length. 648 * 649 * <p>For example, if you need 1024-bit hash codes, you could join two {@link Hashing#sha512} hash 650 * functions together: {@code Hashing.concatenating(Hashing.sha512(), Hashing.sha512())}. 651 * 652 * @since 19.0 653 */ 654 public static HashFunction concatenating( 655 HashFunction first, HashFunction second, HashFunction... rest) { 656 // We can't use Lists.asList() here because there's no hash->collect dependency 657 List<HashFunction> list = new ArrayList<>(); 658 list.add(first); 659 list.add(second); 660 Collections.addAll(list, rest); 661 return new ConcatenatedHashFunction(list.toArray(new HashFunction[0])); 662 } 663 664 /** 665 * Returns a hash function which computes its hash code by concatenating the hash codes of the 666 * underlying hash functions together. This can be useful if you need to generate hash codes of a 667 * specific length. 668 * 669 * <p>For example, if you need 1024-bit hash codes, you could join two {@link Hashing#sha512} hash 670 * functions together: {@code Hashing.concatenating(Hashing.sha512(), Hashing.sha512())}. 671 * 672 * @since 19.0 673 */ 674 public static HashFunction concatenating(Iterable<HashFunction> hashFunctions) { 675 checkNotNull(hashFunctions); 676 // We can't use Iterables.toArray() here because there's no hash->collect dependency 677 List<HashFunction> list = new ArrayList<>(); 678 for (HashFunction hashFunction : hashFunctions) { 679 list.add(hashFunction); 680 } 681 checkArgument(!list.isEmpty(), "number of hash functions (%s) must be > 0", list.size()); 682 return new ConcatenatedHashFunction(list.toArray(new HashFunction[0])); 683 } 684 685 private static final class ConcatenatedHashFunction extends AbstractCompositeHashFunction { 686 687 private ConcatenatedHashFunction(HashFunction... functions) { 688 super(functions); 689 for (HashFunction function : functions) { 690 checkArgument( 691 function.bits() % 8 == 0, 692 "the number of bits (%s) in hashFunction (%s) must be divisible by 8", 693 function.bits(), 694 function); 695 } 696 } 697 698 @Override 699 HashCode makeHash(Hasher[] hashers) { 700 byte[] bytes = new byte[bits() / 8]; 701 int i = 0; 702 for (Hasher hasher : hashers) { 703 HashCode newHash = hasher.hash(); 704 i += newHash.writeBytesTo(bytes, i, newHash.bits() / 8); 705 } 706 return HashCode.fromBytesNoCopy(bytes); 707 } 708 709 @Override 710 public int bits() { 711 int bitSum = 0; 712 for (HashFunction function : functions) { 713 bitSum += function.bits(); 714 } 715 return bitSum; 716 } 717 718 @Override 719 public boolean equals(@CheckForNull Object object) { 720 if (object instanceof ConcatenatedHashFunction) { 721 ConcatenatedHashFunction other = (ConcatenatedHashFunction) object; 722 return Arrays.equals(functions, other.functions); 723 } 724 return false; 725 } 726 727 @Override 728 public int hashCode() { 729 return Arrays.hashCode(functions); 730 } 731 } 732 733 /** 734 * Linear CongruentialGenerator to use for consistent hashing. See 735 * http://en.wikipedia.org/wiki/Linear_congruential_generator 736 */ 737 private static final class LinearCongruentialGenerator { 738 private long state; 739 740 public LinearCongruentialGenerator(long seed) { 741 this.state = seed; 742 } 743 744 public double nextDouble() { 745 state = 2862933555777941757L * state + 1; 746 return ((double) ((int) (state >>> 33) + 1)) / 0x1.0p31; 747 } 748 } 749 750 private Hashing() {} 751}