001/* 002 * Copyright (C) 2011 The Guava Authors 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 005 * in compliance with the License. You may obtain a copy of the License at 006 * 007 * http://www.apache.org/licenses/LICENSE-2.0 008 * 009 * Unless required by applicable law or agreed to in writing, software distributed under the License 010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 011 * or implied. See the License for the specific language governing permissions and limitations under 012 * the License. 013 */ 014 015package com.google.common.hash; 016 017import static com.google.common.base.Preconditions.checkArgument; 018import static com.google.common.base.Preconditions.checkNotNull; 019 020import com.google.errorprone.annotations.Immutable; 021import java.security.Key; 022import java.util.ArrayList; 023import java.util.Arrays; 024import java.util.Collections; 025import java.util.Iterator; 026import java.util.List; 027import java.util.zip.Adler32; 028import java.util.zip.CRC32; 029import java.util.zip.Checksum; 030import javax.annotation.CheckForNull; 031import javax.crypto.spec.SecretKeySpec; 032 033/** 034 * Static methods to obtain {@link HashFunction} instances, and other static hashing-related 035 * utilities. 036 * 037 * <p>A comparison of the various hash functions can be found <a 038 * href="http://goo.gl/jS7HH">here</a>. 039 * 040 * @author Kevin Bourrillion 041 * @author Dimitris Andreou 042 * @author Kurt Alfred Kluever 043 * @since 11.0 044 */ 045@ElementTypesAreNonnullByDefault 046public final class Hashing { 047 /** 048 * Returns a general-purpose, <b>temporary-use</b>, non-cryptographic hash function. The algorithm 049 * the returned function implements is unspecified and subject to change without notice. 050 * 051 * <p><b>Warning:</b> a new random seed for these functions is chosen each time the {@code 052 * Hashing} class is loaded. <b>Do not use this method</b> if hash codes may escape the current 053 * process in any way, for example being sent over RPC, or saved to disk. For a general-purpose, 054 * non-cryptographic hash function that will never change behavior, we suggest {@link 055 * #murmur3_128}. 056 * 057 * <p>Repeated calls to this method on the same loaded {@code Hashing} class, using the same value 058 * for {@code minimumBits}, will return identically-behaving {@link HashFunction} instances. 059 * 060 * @param minimumBits a positive integer (can be arbitrarily large) 061 * @return a hash function, described above, that produces hash codes of length {@code 062 * minimumBits} or greater 063 */ 064 public static HashFunction goodFastHash(int minimumBits) { 065 int bits = checkPositiveAndMakeMultipleOf32(minimumBits); 066 067 if (bits == 32) { 068 return Murmur3_32HashFunction.GOOD_FAST_HASH_32; 069 } 070 if (bits <= 128) { 071 return Murmur3_128HashFunction.GOOD_FAST_HASH_128; 072 } 073 074 // Otherwise, join together some 128-bit murmur3s 075 int hashFunctionsNeeded = (bits + 127) / 128; 076 HashFunction[] hashFunctions = new HashFunction[hashFunctionsNeeded]; 077 hashFunctions[0] = Murmur3_128HashFunction.GOOD_FAST_HASH_128; 078 int seed = GOOD_FAST_HASH_SEED; 079 for (int i = 1; i < hashFunctionsNeeded; i++) { 080 seed += 1500450271; // a prime; shouldn't matter 081 hashFunctions[i] = murmur3_128(seed); 082 } 083 return new ConcatenatedHashFunction(hashFunctions); 084 } 085 086 /** 087 * Used to randomize {@link #goodFastHash} instances, so that programs which persist anything 088 * dependent on the hash codes they produce will fail sooner. 089 */ 090 @SuppressWarnings("GoodTime") // reading system time without TimeSource 091 static final int GOOD_FAST_HASH_SEED = (int) System.currentTimeMillis(); 092 093 /** 094 * Returns a hash function implementing the <a 095 * href="https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp">32-bit murmur3 096 * algorithm, x86 variant</a> (little-endian variant), using the given seed value, <b>with a known 097 * bug</b> as described in the deprecation text. 098 * 099 * <p>The C++ equivalent is the MurmurHash3_x86_32 function (Murmur3A), which however does not 100 * have the bug. 101 * 102 * @deprecated This implementation produces incorrect hash values from the {@link 103 * HashFunction#hashString} method if the string contains non-BMP characters. Use {@link 104 * #murmur3_32_fixed(int)} instead. 105 */ 106 @Deprecated 107 public static HashFunction murmur3_32(int seed) { 108 return new Murmur3_32HashFunction(seed, /* supplementaryPlaneFix= */ false); 109 } 110 111 /** 112 * Returns a hash function implementing the <a 113 * href="https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp">32-bit murmur3 114 * algorithm, x86 variant</a> (little-endian variant), using the given seed value, <b>with a known 115 * bug</b> as described in the deprecation text. 116 * 117 * <p>The C++ equivalent is the MurmurHash3_x86_32 function (Murmur3A), which however does not 118 * have the bug. 119 * 120 * @deprecated This implementation produces incorrect hash values from the {@link 121 * HashFunction#hashString} method if the string contains non-BMP characters. Use {@link 122 * #murmur3_32_fixed()} instead. 123 */ 124 @Deprecated 125 public static HashFunction murmur3_32() { 126 return Murmur3_32HashFunction.MURMUR3_32; 127 } 128 129 /** 130 * Returns a hash function implementing the <a 131 * href="https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp">32-bit murmur3 132 * algorithm, x86 variant</a> (little-endian variant), using the given seed value. 133 * 134 * <p>The exact C++ equivalent is the MurmurHash3_x86_32 function (Murmur3A). 135 * 136 * <p>This method is called {@code murmur3_32_fixed} because it fixes a bug in the {@code 137 * HashFunction} returned by the original {@code murmur3_32} method. 138 * 139 * @since 31.0 140 */ 141 public static HashFunction murmur3_32_fixed(int seed) { 142 return new Murmur3_32HashFunction(seed, /* supplementaryPlaneFix= */ true); 143 } 144 145 /** 146 * Returns a hash function implementing the <a 147 * href="https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp">32-bit murmur3 148 * algorithm, x86 variant</a> (little-endian variant), using a seed value of zero. 149 * 150 * <p>The exact C++ equivalent is the MurmurHash3_x86_32 function (Murmur3A). 151 * 152 * <p>This method is called {@code murmur3_32_fixed} because it fixes a bug in the {@code 153 * HashFunction} returned by the original {@code murmur3_32} method. 154 * 155 * @since 31.0 156 */ 157 public static HashFunction murmur3_32_fixed() { 158 return Murmur3_32HashFunction.MURMUR3_32_FIXED; 159 } 160 161 /** 162 * Returns a hash function implementing the <a 163 * href="https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp">128-bit murmur3 164 * algorithm, x64 variant</a> (little-endian variant), using the given seed value. 165 * 166 * <p>The exact C++ equivalent is the MurmurHash3_x64_128 function (Murmur3F). 167 */ 168 public static HashFunction murmur3_128(int seed) { 169 return new Murmur3_128HashFunction(seed); 170 } 171 172 /** 173 * Returns a hash function implementing the <a 174 * href="https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp">128-bit murmur3 175 * algorithm, x64 variant</a> (little-endian variant), using a seed value of zero. 176 * 177 * <p>The exact C++ equivalent is the MurmurHash3_x64_128 function (Murmur3F). 178 */ 179 public static HashFunction murmur3_128() { 180 return Murmur3_128HashFunction.MURMUR3_128; 181 } 182 183 /** 184 * Returns a hash function implementing the <a href="https://131002.net/siphash/">64-bit 185 * SipHash-2-4 algorithm</a> using a seed value of {@code k = 00 01 02 ...}. 186 * 187 * @since 15.0 188 */ 189 public static HashFunction sipHash24() { 190 return SipHashFunction.SIP_HASH_24; 191 } 192 193 /** 194 * Returns a hash function implementing the <a href="https://131002.net/siphash/">64-bit 195 * SipHash-2-4 algorithm</a> using the given seed. 196 * 197 * @since 15.0 198 */ 199 public static HashFunction sipHash24(long k0, long k1) { 200 return new SipHashFunction(2, 4, k0, k1); 201 } 202 203 /** 204 * Returns a hash function implementing the MD5 hash algorithm (128 hash bits). 205 * 206 * @deprecated If you must interoperate with a system that requires MD5, then use this method, 207 * despite its deprecation. But if you can choose your hash function, avoid MD5, which is 208 * neither fast nor secure. As of January 2017, we suggest: 209 * <ul> 210 * <li>For security: 211 * {@link Hashing#sha256} or a higher-level API. 212 * <li>For speed: {@link Hashing#goodFastHash}, though see its docs for caveats. 213 * </ul> 214 */ 215 @Deprecated 216 public static HashFunction md5() { 217 return Md5Holder.MD5; 218 } 219 220 private static class Md5Holder { 221 static final HashFunction MD5 = new MessageDigestHashFunction("MD5", "Hashing.md5()"); 222 } 223 224 /** 225 * Returns a hash function implementing the SHA-1 algorithm (160 hash bits). 226 * 227 * @deprecated If you must interoperate with a system that requires SHA-1, then use this method, 228 * despite its deprecation. But if you can choose your hash function, avoid SHA-1, which is 229 * neither fast nor secure. As of January 2017, we suggest: 230 * <ul> 231 * <li>For security: 232 * {@link Hashing#sha256} or a higher-level API. 233 * <li>For speed: {@link Hashing#goodFastHash}, though see its docs for caveats. 234 * </ul> 235 */ 236 @Deprecated 237 public static HashFunction sha1() { 238 return Sha1Holder.SHA_1; 239 } 240 241 private static class Sha1Holder { 242 static final HashFunction SHA_1 = new MessageDigestHashFunction("SHA-1", "Hashing.sha1()"); 243 } 244 245 /** Returns a hash function implementing the SHA-256 algorithm (256 hash bits). */ 246 public static HashFunction sha256() { 247 return Sha256Holder.SHA_256; 248 } 249 250 private static class Sha256Holder { 251 static final HashFunction SHA_256 = 252 new MessageDigestHashFunction("SHA-256", "Hashing.sha256()"); 253 } 254 255 /** 256 * Returns a hash function implementing the SHA-384 algorithm (384 hash bits). 257 * 258 * @since 19.0 259 */ 260 public static HashFunction sha384() { 261 return Sha384Holder.SHA_384; 262 } 263 264 private static class Sha384Holder { 265 static final HashFunction SHA_384 = 266 new MessageDigestHashFunction("SHA-384", "Hashing.sha384()"); 267 } 268 269 /** Returns a hash function implementing the SHA-512 algorithm (512 hash bits). */ 270 public static HashFunction sha512() { 271 return Sha512Holder.SHA_512; 272 } 273 274 private static class Sha512Holder { 275 static final HashFunction SHA_512 = 276 new MessageDigestHashFunction("SHA-512", "Hashing.sha512()"); 277 } 278 279 /** 280 * Returns a hash function implementing the Message Authentication Code (MAC) algorithm, using the 281 * MD5 (128 hash bits) hash function and the given secret key. 282 * 283 * @param key the secret key 284 * @throws IllegalArgumentException if the given key is inappropriate for initializing this MAC 285 * @since 20.0 286 */ 287 public static HashFunction hmacMd5(Key key) { 288 return new MacHashFunction("HmacMD5", key, hmacToString("hmacMd5", key)); 289 } 290 291 /** 292 * Returns a hash function implementing the Message Authentication Code (MAC) algorithm, using the 293 * MD5 (128 hash bits) hash function and a {@link SecretKeySpec} created from the given byte array 294 * and the MD5 algorithm. 295 * 296 * @param key the key material of the secret key 297 * @since 20.0 298 */ 299 public static HashFunction hmacMd5(byte[] key) { 300 return hmacMd5(new SecretKeySpec(checkNotNull(key), "HmacMD5")); 301 } 302 303 /** 304 * Returns a hash function implementing the Message Authentication Code (MAC) algorithm, using the 305 * SHA-1 (160 hash bits) hash function and the given secret key. 306 * 307 * @param key the secret key 308 * @throws IllegalArgumentException if the given key is inappropriate for initializing this MAC 309 * @since 20.0 310 */ 311 public static HashFunction hmacSha1(Key key) { 312 return new MacHashFunction("HmacSHA1", key, hmacToString("hmacSha1", key)); 313 } 314 315 /** 316 * Returns a hash function implementing the Message Authentication Code (MAC) algorithm, using the 317 * SHA-1 (160 hash bits) hash function and a {@link SecretKeySpec} created from the given byte 318 * array and the SHA-1 algorithm. 319 * 320 * @param key the key material of the secret key 321 * @since 20.0 322 */ 323 public static HashFunction hmacSha1(byte[] key) { 324 return hmacSha1(new SecretKeySpec(checkNotNull(key), "HmacSHA1")); 325 } 326 327 /** 328 * Returns a hash function implementing the Message Authentication Code (MAC) algorithm, using the 329 * SHA-256 (256 hash bits) hash function and the given secret key. 330 * 331 * @param key the secret key 332 * @throws IllegalArgumentException if the given key is inappropriate for initializing this MAC 333 * @since 20.0 334 */ 335 public static HashFunction hmacSha256(Key key) { 336 return new MacHashFunction("HmacSHA256", key, hmacToString("hmacSha256", key)); 337 } 338 339 /** 340 * Returns a hash function implementing the Message Authentication Code (MAC) algorithm, using the 341 * SHA-256 (256 hash bits) hash function and a {@link SecretKeySpec} created from the given byte 342 * array and the SHA-256 algorithm. 343 * 344 * @param key the key material of the secret key 345 * @since 20.0 346 */ 347 public static HashFunction hmacSha256(byte[] key) { 348 return hmacSha256(new SecretKeySpec(checkNotNull(key), "HmacSHA256")); 349 } 350 351 /** 352 * Returns a hash function implementing the Message Authentication Code (MAC) algorithm, using the 353 * SHA-512 (512 hash bits) hash function and the given secret key. 354 * 355 * @param key the secret key 356 * @throws IllegalArgumentException if the given key is inappropriate for initializing this MAC 357 * @since 20.0 358 */ 359 public static HashFunction hmacSha512(Key key) { 360 return new MacHashFunction("HmacSHA512", key, hmacToString("hmacSha512", key)); 361 } 362 363 /** 364 * Returns a hash function implementing the Message Authentication Code (MAC) algorithm, using the 365 * SHA-512 (512 hash bits) hash function and a {@link SecretKeySpec} created from the given byte 366 * array and the SHA-512 algorithm. 367 * 368 * @param key the key material of the secret key 369 * @since 20.0 370 */ 371 public static HashFunction hmacSha512(byte[] key) { 372 return hmacSha512(new SecretKeySpec(checkNotNull(key), "HmacSHA512")); 373 } 374 375 private static String hmacToString(String methodName, Key key) { 376 return "Hashing." 377 + methodName 378 + "(Key[algorithm=" 379 + key.getAlgorithm() 380 + ", format=" 381 + key.getFormat() 382 + "])"; 383 } 384 385 /** 386 * Returns a hash function implementing the CRC32C checksum algorithm (32 hash bits) as described 387 * by RFC 3720, Section 12.1. 388 * 389 * <p>This function is best understood as a <a 390 * href="https://en.wikipedia.org/wiki/Checksum">checksum</a> rather than a true <a 391 * href="https://en.wikipedia.org/wiki/Hash_function">hash function</a>. 392 * 393 * @since 18.0 394 */ 395 public static HashFunction crc32c() { 396 return Crc32cHashFunction.CRC_32_C; 397 } 398 399 /** 400 * Returns a hash function implementing the CRC-32 checksum algorithm (32 hash bits). 401 * 402 * <p>To get the {@code long} value equivalent to {@link Checksum#getValue()} for a {@code 403 * HashCode} produced by this function, use {@link HashCode#padToLong()}. 404 * 405 * <p>This function is best understood as a <a 406 * href="https://en.wikipedia.org/wiki/Checksum">checksum</a> rather than a true <a 407 * href="https://en.wikipedia.org/wiki/Hash_function">hash function</a>. 408 * 409 * @since 14.0 410 */ 411 public static HashFunction crc32() { 412 return ChecksumType.CRC_32.hashFunction; 413 } 414 415 /** 416 * Returns a hash function implementing the Adler-32 checksum algorithm (32 hash bits). 417 * 418 * <p>To get the {@code long} value equivalent to {@link Checksum#getValue()} for a {@code 419 * HashCode} produced by this function, use {@link HashCode#padToLong()}. 420 * 421 * <p>This function is best understood as a <a 422 * href="https://en.wikipedia.org/wiki/Checksum">checksum</a> rather than a true <a 423 * href="https://en.wikipedia.org/wiki/Hash_function">hash function</a>. 424 * 425 * @since 14.0 426 */ 427 public static HashFunction adler32() { 428 return ChecksumType.ADLER_32.hashFunction; 429 } 430 431 @Immutable 432 enum ChecksumType implements ImmutableSupplier<Checksum> { 433 CRC_32("Hashing.crc32()") { 434 @Override 435 public Checksum get() { 436 return new CRC32(); 437 } 438 }, 439 ADLER_32("Hashing.adler32()") { 440 @Override 441 public Checksum get() { 442 return new Adler32(); 443 } 444 }; 445 446 public final HashFunction hashFunction; 447 448 ChecksumType(String toString) { 449 this.hashFunction = new ChecksumHashFunction(this, 32, toString); 450 } 451 } 452 453 /** 454 * Returns a hash function implementing FarmHash's Fingerprint64, an open-source algorithm. 455 * 456 * <p>This is designed for generating persistent fingerprints of strings. It isn't 457 * cryptographically secure, but it produces a high-quality hash with fewer collisions than some 458 * alternatives we've used in the past. 459 * 460 * <p>FarmHash fingerprints are encoded by {@link HashCode#asBytes} in little-endian order. This 461 * means {@link HashCode#asLong} is guaranteed to return the same value that 462 * farmhash::Fingerprint64() would for the same input (when compared using {@link 463 * com.google.common.primitives.UnsignedLongs}'s encoding of 64-bit unsigned numbers). 464 * 465 * <p>This function is best understood as a <a 466 * href="https://en.wikipedia.org/wiki/Fingerprint_(computing)">fingerprint</a> rather than a true 467 * <a href="https://en.wikipedia.org/wiki/Hash_function">hash function</a>. 468 * 469 * @since 20.0 470 */ 471 public static HashFunction farmHashFingerprint64() { 472 return FarmHashFingerprint64.FARMHASH_FINGERPRINT_64; 473 } 474 475 /** 476 * Returns a hash function implementing the Fingerprint2011 hashing function (64 hash bits). 477 * 478 * <p>This is designed for generating persistent fingerprints of strings. It isn't 479 * cryptographically secure, but it produces a high-quality hash with few collisions. Fingerprints 480 * generated using this are byte-wise identical to those created using the C++ version, but note 481 * that this uses unsigned integers (see {@link com.google.common.primitives.UnsignedInts}). 482 * Comparisons between the two should take this into account. 483 * 484 * <p>Fingerprint2011() is a form of Murmur2 on strings up to 32 bytes and a form of CityHash for 485 * longer strings. It could have been one or the other throughout. The main advantage of the 486 * combination is that CityHash has a bunch of special cases for short strings that don't need to 487 * be replicated here. The result will never be 0 or 1. 488 * 489 * <p>This function is best understood as a <a 490 * href="https://en.wikipedia.org/wiki/Fingerprint_(computing)">fingerprint</a> rather than a true 491 * <a href="https://en.wikipedia.org/wiki/Hash_function">hash function</a>. 492 * 493 * @since 31.1 494 */ 495 public static HashFunction fingerprint2011() { 496 return Fingerprint2011.FINGERPRINT_2011; 497 } 498 499 /** 500 * Assigns to {@code hashCode} a "bucket" in the range {@code [0, buckets)}, in a uniform manner 501 * that minimizes the need for remapping as {@code buckets} grows. That is, {@code 502 * consistentHash(h, n)} equals: 503 * 504 * <ul> 505 * <li>{@code n - 1}, with approximate probability {@code 1/n} 506 * <li>{@code consistentHash(h, n - 1)}, otherwise (probability {@code 1 - 1/n}) 507 * </ul> 508 * 509 * <p>This method is suitable for the common use case of dividing work among buckets that meet the 510 * following conditions: 511 * 512 * <ul> 513 * <li>You want to assign the same fraction of inputs to each bucket. 514 * <li>When you reduce the number of buckets, you can accept that the most recently added 515 * buckets will be removed first. More concretely, if you are dividing traffic among tasks, 516 * you can decrease the number of tasks from 15 and 10, killing off the final 5 tasks, and 517 * {@code consistentHash} will handle it. If, however, you are dividing traffic among 518 * servers {@code alpha}, {@code bravo}, and {@code charlie} and you occasionally need to 519 * take each of the servers offline, {@code consistentHash} will be a poor fit: It provides 520 * no way for you to specify which of the three buckets is disappearing. Thus, if your 521 * buckets change from {@code [alpha, bravo, charlie]} to {@code [bravo, charlie]}, it will 522 * assign all the old {@code alpha} traffic to {@code bravo} and all the old {@code bravo} 523 * traffic to {@code charlie}, rather than letting {@code bravo} keep its traffic. 524 * </ul> 525 * 526 * <p>See the <a href="http://en.wikipedia.org/wiki/Consistent_hashing">Wikipedia article on 527 * consistent hashing</a> for more information. 528 */ 529 public static int consistentHash(HashCode hashCode, int buckets) { 530 return consistentHash(hashCode.padToLong(), buckets); 531 } 532 533 /** 534 * Assigns to {@code input} a "bucket" in the range {@code [0, buckets)}, in a uniform manner that 535 * minimizes the need for remapping as {@code buckets} grows. That is, {@code consistentHash(h, 536 * n)} equals: 537 * 538 * <ul> 539 * <li>{@code n - 1}, with approximate probability {@code 1/n} 540 * <li>{@code consistentHash(h, n - 1)}, otherwise (probability {@code 1 - 1/n}) 541 * </ul> 542 * 543 * <p>This method is suitable for the common use case of dividing work among buckets that meet the 544 * following conditions: 545 * 546 * <ul> 547 * <li>You want to assign the same fraction of inputs to each bucket. 548 * <li>When you reduce the number of buckets, you can accept that the most recently added 549 * buckets will be removed first. More concretely, if you are dividing traffic among tasks, 550 * you can decrease the number of tasks from 15 and 10, killing off the final 5 tasks, and 551 * {@code consistentHash} will handle it. If, however, you are dividing traffic among 552 * servers {@code alpha}, {@code bravo}, and {@code charlie} and you occasionally need to 553 * take each of the servers offline, {@code consistentHash} will be a poor fit: It provides 554 * no way for you to specify which of the three buckets is disappearing. Thus, if your 555 * buckets change from {@code [alpha, bravo, charlie]} to {@code [bravo, charlie]}, it will 556 * assign all the old {@code alpha} traffic to {@code bravo} and all the old {@code bravo} 557 * traffic to {@code charlie}, rather than letting {@code bravo} keep its traffic. 558 * </ul> 559 * 560 * <p>See the <a href="http://en.wikipedia.org/wiki/Consistent_hashing">Wikipedia article on 561 * consistent hashing</a> for more information. 562 */ 563 public static int consistentHash(long input, int buckets) { 564 checkArgument(buckets > 0, "buckets must be positive: %s", buckets); 565 LinearCongruentialGenerator generator = new LinearCongruentialGenerator(input); 566 int candidate = 0; 567 int next; 568 569 // Jump from bucket to bucket until we go out of range 570 while (true) { 571 next = (int) ((candidate + 1) / generator.nextDouble()); 572 if (next >= 0 && next < buckets) { 573 candidate = next; 574 } else { 575 return candidate; 576 } 577 } 578 } 579 580 /** 581 * Returns a hash code, having the same bit length as each of the input hash codes, that combines 582 * the information of these hash codes in an ordered fashion. That is, whenever two equal hash 583 * codes are produced by two calls to this method, it is <i>as likely as possible</i> that each 584 * was computed from the <i>same</i> input hash codes in the <i>same</i> order. 585 * 586 * @throws IllegalArgumentException if {@code hashCodes} is empty, or the hash codes do not all 587 * have the same bit length 588 */ 589 public static HashCode combineOrdered(Iterable<HashCode> hashCodes) { 590 Iterator<HashCode> iterator = hashCodes.iterator(); 591 checkArgument(iterator.hasNext(), "Must be at least 1 hash code to combine."); 592 int bits = iterator.next().bits(); 593 byte[] resultBytes = new byte[bits / 8]; 594 for (HashCode hashCode : hashCodes) { 595 byte[] nextBytes = hashCode.asBytes(); 596 checkArgument( 597 nextBytes.length == resultBytes.length, "All hashcodes must have the same bit length."); 598 for (int i = 0; i < nextBytes.length; i++) { 599 resultBytes[i] = (byte) (resultBytes[i] * 37 ^ nextBytes[i]); 600 } 601 } 602 return HashCode.fromBytesNoCopy(resultBytes); 603 } 604 605 /** 606 * Returns a hash code, having the same bit length as each of the input hash codes, that combines 607 * the information of these hash codes in an unordered fashion. That is, whenever two equal hash 608 * codes are produced by two calls to this method, it is <i>as likely as possible</i> that each 609 * was computed from the <i>same</i> input hash codes in <i>some</i> order. 610 * 611 * @throws IllegalArgumentException if {@code hashCodes} is empty, or the hash codes do not all 612 * have the same bit length 613 */ 614 public static HashCode combineUnordered(Iterable<HashCode> hashCodes) { 615 Iterator<HashCode> iterator = hashCodes.iterator(); 616 checkArgument(iterator.hasNext(), "Must be at least 1 hash code to combine."); 617 byte[] resultBytes = new byte[iterator.next().bits() / 8]; 618 for (HashCode hashCode : hashCodes) { 619 byte[] nextBytes = hashCode.asBytes(); 620 checkArgument( 621 nextBytes.length == resultBytes.length, "All hashcodes must have the same bit length."); 622 for (int i = 0; i < nextBytes.length; i++) { 623 resultBytes[i] += nextBytes[i]; 624 } 625 } 626 return HashCode.fromBytesNoCopy(resultBytes); 627 } 628 629 /** Checks that the passed argument is positive, and ceils it to a multiple of 32. */ 630 static int checkPositiveAndMakeMultipleOf32(int bits) { 631 checkArgument(bits > 0, "Number of bits must be positive"); 632 return (bits + 31) & ~31; 633 } 634 635 /** 636 * Returns a hash function which computes its hash code by concatenating the hash codes of the 637 * underlying hash functions together. This can be useful if you need to generate hash codes of a 638 * specific length. 639 * 640 * <p>For example, if you need 1024-bit hash codes, you could join two {@link Hashing#sha512} hash 641 * functions together: {@code Hashing.concatenating(Hashing.sha512(), Hashing.sha512())}. 642 * 643 * @since 19.0 644 */ 645 public static HashFunction concatenating( 646 HashFunction first, HashFunction second, HashFunction... rest) { 647 // We can't use Lists.asList() here because there's no hash->collect dependency 648 List<HashFunction> list = new ArrayList<>(); 649 list.add(first); 650 list.add(second); 651 Collections.addAll(list, rest); 652 return new ConcatenatedHashFunction(list.toArray(new HashFunction[0])); 653 } 654 655 /** 656 * Returns a hash function which computes its hash code by concatenating the hash codes of the 657 * underlying hash functions together. This can be useful if you need to generate hash codes of a 658 * specific length. 659 * 660 * <p>For example, if you need 1024-bit hash codes, you could join two {@link Hashing#sha512} hash 661 * functions together: {@code Hashing.concatenating(Hashing.sha512(), Hashing.sha512())}. 662 * 663 * @since 19.0 664 */ 665 public static HashFunction concatenating(Iterable<HashFunction> hashFunctions) { 666 checkNotNull(hashFunctions); 667 // We can't use Iterables.toArray() here because there's no hash->collect dependency 668 List<HashFunction> list = new ArrayList<>(); 669 for (HashFunction hashFunction : hashFunctions) { 670 list.add(hashFunction); 671 } 672 checkArgument(!list.isEmpty(), "number of hash functions (%s) must be > 0", list.size()); 673 return new ConcatenatedHashFunction(list.toArray(new HashFunction[0])); 674 } 675 676 private static final class ConcatenatedHashFunction extends AbstractCompositeHashFunction { 677 678 private ConcatenatedHashFunction(HashFunction... functions) { 679 super(functions); 680 for (HashFunction function : functions) { 681 checkArgument( 682 function.bits() % 8 == 0, 683 "the number of bits (%s) in hashFunction (%s) must be divisible by 8", 684 function.bits(), 685 function); 686 } 687 } 688 689 @Override 690 HashCode makeHash(Hasher[] hashers) { 691 byte[] bytes = new byte[bits() / 8]; 692 int i = 0; 693 for (Hasher hasher : hashers) { 694 HashCode newHash = hasher.hash(); 695 i += newHash.writeBytesTo(bytes, i, newHash.bits() / 8); 696 } 697 return HashCode.fromBytesNoCopy(bytes); 698 } 699 700 @Override 701 public int bits() { 702 int bitSum = 0; 703 for (HashFunction function : functions) { 704 bitSum += function.bits(); 705 } 706 return bitSum; 707 } 708 709 @Override 710 public boolean equals(@CheckForNull Object object) { 711 if (object instanceof ConcatenatedHashFunction) { 712 ConcatenatedHashFunction other = (ConcatenatedHashFunction) object; 713 return Arrays.equals(functions, other.functions); 714 } 715 return false; 716 } 717 718 @Override 719 public int hashCode() { 720 return Arrays.hashCode(functions); 721 } 722 } 723 724 /** 725 * Linear CongruentialGenerator to use for consistent hashing. See 726 * http://en.wikipedia.org/wiki/Linear_congruential_generator 727 */ 728 private static final class LinearCongruentialGenerator { 729 private long state; 730 731 public LinearCongruentialGenerator(long seed) { 732 this.state = seed; 733 } 734 735 public double nextDouble() { 736 state = 2862933555777941757L * state + 1; 737 return ((double) ((int) (state >>> 33) + 1)) / 0x1.0p31; 738 } 739 } 740 741 private Hashing() {} 742}