001/*
002 * Copyright (C) 2011 The Guava Authors
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
005 * in compliance with the License. You may obtain a copy of the License at
006 *
007 * http://www.apache.org/licenses/LICENSE-2.0
008 *
009 * Unless required by applicable law or agreed to in writing, software distributed under the License
010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
011 * or implied. See the License for the specific language governing permissions and limitations under
012 * the License.
013 */
014
015package com.google.common.hash;
016
017import static com.google.common.base.Preconditions.checkArgument;
018import static com.google.common.base.Preconditions.checkNotNull;
019import static com.google.common.hash.SneakyThrows.sneakyThrow;
020import static java.lang.invoke.MethodType.methodType;
021
022import com.google.errorprone.annotations.Immutable;
023import com.google.j2objc.annotations.J2ObjCIncompatible;
024import java.lang.invoke.MethodHandle;
025import java.lang.invoke.MethodHandles;
026import java.security.Key;
027import java.util.ArrayList;
028import java.util.Arrays;
029import java.util.Collections;
030import java.util.Iterator;
031import java.util.List;
032import java.util.zip.Adler32;
033import java.util.zip.CRC32;
034import java.util.zip.Checksum;
035import javax.crypto.spec.SecretKeySpec;
036import org.jspecify.annotations.Nullable;
037
038/**
039 * Static methods to obtain {@link HashFunction} instances, and other static hashing-related
040 * utilities.
041 *
042 * <p>A comparison of the various hash functions can be found <a
043 * href="https://docs.google.com/spreadsheets/d/1_q2EVcxA2HjcrlVMbaqXwMj31h9M5-Bqj_m8vITOwwk/">here</a>.
044 *
045 * @author Kevin Bourrillion
046 * @author Dimitris Andreou
047 * @author Kurt Alfred Kluever
048 * @since 11.0
049 */
050public final class Hashing {
051  /**
052   * Returns a general-purpose, <b>temporary-use</b>, non-cryptographic hash function. The algorithm
053   * the returned function implements is unspecified and subject to change without notice.
054   *
055   * <p><b>Warning:</b> a new random seed for these functions is chosen each time the {@code
056   * Hashing} class is loaded. <b>Do not use this method</b> if hash codes may escape the current
057   * process in any way, for example being sent over RPC, or saved to disk. For a general-purpose,
058   * non-cryptographic hash function that will never change behavior, we suggest {@link
059   * #murmur3_128}.
060   *
061   * <p>Repeated calls to this method on the same loaded {@code Hashing} class, using the same value
062   * for {@code minimumBits}, will return identically-behaving {@link HashFunction} instances.
063   *
064   * @param minimumBits a positive integer. This can be arbitrarily large. The returned {@link
065   *     HashFunction} instance may use memory proportional to this integer.
066   * @return a hash function, described above, that produces hash codes of length {@code
067   *     minimumBits} or greater
068   */
069  public static HashFunction goodFastHash(int minimumBits) {
070    int bits = checkPositiveAndMakeMultipleOf32(minimumBits);
071
072    if (bits == 32) {
073      return Murmur3_32HashFunction.GOOD_FAST_HASH_32;
074    }
075    if (bits <= 128) {
076      return Murmur3_128HashFunction.GOOD_FAST_HASH_128;
077    }
078
079    // Otherwise, join together some 128-bit murmur3s
080    int hashFunctionsNeeded = (bits + 127) / 128;
081    HashFunction[] hashFunctions = new HashFunction[hashFunctionsNeeded];
082    hashFunctions[0] = Murmur3_128HashFunction.GOOD_FAST_HASH_128;
083    int seed = GOOD_FAST_HASH_SEED;
084    for (int i = 1; i < hashFunctionsNeeded; i++) {
085      seed += 1500450271; // a prime; shouldn't matter
086      hashFunctions[i] = murmur3_128(seed);
087    }
088    return new ConcatenatedHashFunction(hashFunctions);
089  }
090
091  /**
092   * Used to randomize {@link #goodFastHash} instances, so that programs which persist anything
093   * dependent on the hash codes they produce will fail sooner.
094   */
095  @SuppressWarnings("GoodTime") // reading system time without TimeSource
096  static final int GOOD_FAST_HASH_SEED = (int) System.currentTimeMillis();
097
098  /**
099   * Returns a hash function implementing the <a
100   * href="https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp">32-bit murmur3
101   * algorithm, x86 variant</a> (little-endian variant), using the given seed value, <b>with a known
102   * bug</b> as described in the deprecation text.
103   *
104   * <p>The C++ equivalent is the MurmurHash3_x86_32 function (Murmur3A), which however does not
105   * have the bug.
106   *
107   * @deprecated This implementation produces incorrect hash values from the {@link
108   *     HashFunction#hashString} method if the string contains non-BMP characters. Use {@link
109   *     #murmur3_32_fixed(int)} instead.
110   */
111  @Deprecated
112  @SuppressWarnings("IdentifierName") // the best we could do for adjacent digit blocks
113  public static HashFunction murmur3_32(int seed) {
114    return new Murmur3_32HashFunction(seed, /* supplementaryPlaneFix= */ false);
115  }
116
117  /**
118   * Returns a hash function implementing the <a
119   * href="https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp">32-bit murmur3
120   * algorithm, x86 variant</a> (little-endian variant), using the given seed value, <b>with a known
121   * bug</b> as described in the deprecation text.
122   *
123   * <p>The C++ equivalent is the MurmurHash3_x86_32 function (Murmur3A), which however does not
124   * have the bug.
125   *
126   * @deprecated This implementation produces incorrect hash values from the {@link
127   *     HashFunction#hashString} method if the string contains non-BMP characters. Use {@link
128   *     #murmur3_32_fixed()} instead.
129   */
130  @Deprecated
131  @SuppressWarnings("IdentifierName") // the best we could do for adjacent digit blocks
132  public static HashFunction murmur3_32() {
133    return Murmur3_32HashFunction.MURMUR3_32;
134  }
135
136  /**
137   * Returns a hash function implementing the <a
138   * href="https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp">32-bit murmur3
139   * algorithm, x86 variant</a> (little-endian variant), using the given seed value.
140   *
141   * <p>The exact C++ equivalent is the MurmurHash3_x86_32 function (Murmur3A).
142   *
143   * <p>This method is called {@code murmur3_32_fixed} because it fixes a bug in the {@code
144   * HashFunction} returned by the original {@code murmur3_32} method.
145   *
146   * @since 31.0
147   */
148  @SuppressWarnings("IdentifierName") // the best we could do for adjacent digit blocks
149  public static HashFunction murmur3_32_fixed(int seed) {
150    return new Murmur3_32HashFunction(seed, /* supplementaryPlaneFix= */ true);
151  }
152
153  /**
154   * Returns a hash function implementing the <a
155   * href="https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp">32-bit murmur3
156   * algorithm, x86 variant</a> (little-endian variant), using a seed value of zero.
157   *
158   * <p>The exact C++ equivalent is the MurmurHash3_x86_32 function (Murmur3A).
159   *
160   * <p>This method is called {@code murmur3_32_fixed} because it fixes a bug in the {@code
161   * HashFunction} returned by the original {@code murmur3_32} method.
162   *
163   * @since 31.0
164   */
165  @SuppressWarnings("IdentifierName") // the best we could do for adjacent digit blocks
166  public static HashFunction murmur3_32_fixed() {
167    return Murmur3_32HashFunction.MURMUR3_32_FIXED;
168  }
169
170  /**
171   * Returns a hash function implementing the <a
172   * href="https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp">128-bit murmur3
173   * algorithm, x64 variant</a> (little-endian variant), using the given seed value.
174   *
175   * <p>The exact C++ equivalent is the MurmurHash3_x64_128 function (Murmur3F).
176   */
177  @SuppressWarnings("IdentifierName") // the best we could do for adjacent digit blocks
178  public static HashFunction murmur3_128(int seed) {
179    return new Murmur3_128HashFunction(seed);
180  }
181
182  /**
183   * Returns a hash function implementing the <a
184   * href="https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp">128-bit murmur3
185   * algorithm, x64 variant</a> (little-endian variant), using a seed value of zero.
186   *
187   * <p>The exact C++ equivalent is the MurmurHash3_x64_128 function (Murmur3F).
188   */
189  @SuppressWarnings("IdentifierName") // the best we could do for adjacent digit blocks
190  public static HashFunction murmur3_128() {
191    return Murmur3_128HashFunction.MURMUR3_128;
192  }
193
194  /**
195   * Returns a hash function implementing the <a href="https://131002.net/siphash/">64-bit
196   * SipHash-2-4 algorithm</a> using a seed value of {@code k = 00 01 02 ...}.
197   *
198   * @since 15.0
199   */
200  public static HashFunction sipHash24() {
201    return SipHashFunction.SIP_HASH_24;
202  }
203
204  /**
205   * Returns a hash function implementing the <a href="https://131002.net/siphash/">64-bit
206   * SipHash-2-4 algorithm</a> using the given seed.
207   *
208   * @since 15.0
209   */
210  public static HashFunction sipHash24(long k0, long k1) {
211    return new SipHashFunction(2, 4, k0, k1);
212  }
213
214  /**
215   * Returns a hash function implementing the MD5 hash algorithm (128 hash bits).
216   *
217   * @deprecated If you must interoperate with a system that requires MD5, then use this method,
218   *     despite its deprecation. But if you can choose your hash function, avoid MD5, which is
219   *     neither fast nor secure. As of January 2017, we suggest:
220   *     <ul>
221   *       <li>For security:
222   *           {@link Hashing#sha256} or a higher-level API.
223   *       <li>For speed: {@link Hashing#goodFastHash}, though see its docs for caveats.
224   *     </ul>
225   */
226  @Deprecated
227  public static HashFunction md5() {
228    return Md5Holder.MD5;
229  }
230
231  private static class Md5Holder {
232    static final HashFunction MD5 = new MessageDigestHashFunction("MD5", "Hashing.md5()");
233  }
234
235  /**
236   * Returns a hash function implementing the SHA-1 algorithm (160 hash bits).
237   *
238   * @deprecated If you must interoperate with a system that requires SHA-1, then use this method,
239   *     despite its deprecation. But if you can choose your hash function, avoid SHA-1, which is
240   *     neither fast nor secure. As of January 2017, we suggest:
241   *     <ul>
242   *       <li>For security:
243   *           {@link Hashing#sha256} or a higher-level API.
244   *       <li>For speed: {@link Hashing#goodFastHash}, though see its docs for caveats.
245   *     </ul>
246   */
247  @Deprecated
248  public static HashFunction sha1() {
249    return Sha1Holder.SHA_1;
250  }
251
252  private static class Sha1Holder {
253    static final HashFunction SHA_1 = new MessageDigestHashFunction("SHA-1", "Hashing.sha1()");
254  }
255
256  /** Returns a hash function implementing the SHA-256 algorithm (256 hash bits). */
257  public static HashFunction sha256() {
258    return Sha256Holder.SHA_256;
259  }
260
261  private static class Sha256Holder {
262    static final HashFunction SHA_256 =
263        new MessageDigestHashFunction("SHA-256", "Hashing.sha256()");
264  }
265
266  /**
267   * Returns a hash function implementing the SHA-384 algorithm (384 hash bits).
268   *
269   * @since 19.0
270   */
271  public static HashFunction sha384() {
272    return Sha384Holder.SHA_384;
273  }
274
275  private static class Sha384Holder {
276    static final HashFunction SHA_384 =
277        new MessageDigestHashFunction("SHA-384", "Hashing.sha384()");
278  }
279
280  /** Returns a hash function implementing the SHA-512 algorithm (512 hash bits). */
281  public static HashFunction sha512() {
282    return Sha512Holder.SHA_512;
283  }
284
285  private static class Sha512Holder {
286    static final HashFunction SHA_512 =
287        new MessageDigestHashFunction("SHA-512", "Hashing.sha512()");
288  }
289
290  /**
291   * Returns a hash function implementing the Message Authentication Code (MAC) algorithm, using the
292   * MD5 (128 hash bits) hash function and the given secret key.
293   *
294   * <p>If you are designing a new system that needs HMAC, prefer {@link #hmacSha256} or other
295   * future-proof algorithms <a
296   * href="https://datatracker.ietf.org/doc/html/rfc6151#section-2.3">over {@code hmacMd5}</a>.
297   *
298   * @param key the secret key
299   * @throws IllegalArgumentException if the given key is inappropriate for initializing this MAC
300   * @since 20.0
301   */
302  public static HashFunction hmacMd5(Key key) {
303    return new MacHashFunction("HmacMD5", key, hmacToString("hmacMd5", key));
304  }
305
306  /**
307   * Returns a hash function implementing the Message Authentication Code (MAC) algorithm, using the
308   * MD5 (128 hash bits) hash function and a {@link SecretKeySpec} created from the given byte array
309   * and the MD5 algorithm.
310   *
311   * <p>If you are designing a new system that needs HMAC, prefer {@link #hmacSha256} or other
312   * future-proof algorithms <a
313   * href="https://datatracker.ietf.org/doc/html/rfc6151#section-2.3">over {@code hmacMd5}</a>.
314   *
315   * @param key the key material of the secret key
316   * @since 20.0
317   */
318  public static HashFunction hmacMd5(byte[] key) {
319    return hmacMd5(new SecretKeySpec(checkNotNull(key), "HmacMD5"));
320  }
321
322  /**
323   * Returns a hash function implementing the Message Authentication Code (MAC) algorithm, using the
324   * SHA-1 (160 hash bits) hash function and the given secret key.
325   *
326   * @param key the secret key
327   * @throws IllegalArgumentException if the given key is inappropriate for initializing this MAC
328   * @since 20.0
329   */
330  public static HashFunction hmacSha1(Key key) {
331    return new MacHashFunction("HmacSHA1", key, hmacToString("hmacSha1", key));
332  }
333
334  /**
335   * Returns a hash function implementing the Message Authentication Code (MAC) algorithm, using the
336   * SHA-1 (160 hash bits) hash function and a {@link SecretKeySpec} created from the given byte
337   * array and the SHA-1 algorithm.
338   *
339   * @param key the key material of the secret key
340   * @since 20.0
341   */
342  public static HashFunction hmacSha1(byte[] key) {
343    return hmacSha1(new SecretKeySpec(checkNotNull(key), "HmacSHA1"));
344  }
345
346  /**
347   * Returns a hash function implementing the Message Authentication Code (MAC) algorithm, using the
348   * SHA-256 (256 hash bits) hash function and the given secret key.
349   *
350   * @param key the secret key
351   * @throws IllegalArgumentException if the given key is inappropriate for initializing this MAC
352   * @since 20.0
353   */
354  public static HashFunction hmacSha256(Key key) {
355    return new MacHashFunction("HmacSHA256", key, hmacToString("hmacSha256", key));
356  }
357
358  /**
359   * Returns a hash function implementing the Message Authentication Code (MAC) algorithm, using the
360   * SHA-256 (256 hash bits) hash function and a {@link SecretKeySpec} created from the given byte
361   * array and the SHA-256 algorithm.
362   *
363   * @param key the key material of the secret key
364   * @since 20.0
365   */
366  public static HashFunction hmacSha256(byte[] key) {
367    return hmacSha256(new SecretKeySpec(checkNotNull(key), "HmacSHA256"));
368  }
369
370  /**
371   * Returns a hash function implementing the Message Authentication Code (MAC) algorithm, using the
372   * SHA-512 (512 hash bits) hash function and the given secret key.
373   *
374   * @param key the secret key
375   * @throws IllegalArgumentException if the given key is inappropriate for initializing this MAC
376   * @since 20.0
377   */
378  public static HashFunction hmacSha512(Key key) {
379    return new MacHashFunction("HmacSHA512", key, hmacToString("hmacSha512", key));
380  }
381
382  /**
383   * Returns a hash function implementing the Message Authentication Code (MAC) algorithm, using the
384   * SHA-512 (512 hash bits) hash function and a {@link SecretKeySpec} created from the given byte
385   * array and the SHA-512 algorithm.
386   *
387   * @param key the key material of the secret key
388   * @since 20.0
389   */
390  public static HashFunction hmacSha512(byte[] key) {
391    return hmacSha512(new SecretKeySpec(checkNotNull(key), "HmacSHA512"));
392  }
393
394  private static String hmacToString(String methodName, Key key) {
395    return "Hashing."
396        + methodName
397        + "(Key[algorithm="
398        + key.getAlgorithm()
399        + ", format="
400        + key.getFormat()
401        + "])";
402  }
403
404  /**
405   * Returns a hash function implementing the CRC32C checksum algorithm (32 hash bits) as described
406   * by RFC 3720, Section 12.1.
407   *
408   * <p>This function is best understood as a <a
409   * href="https://en.wikipedia.org/wiki/Checksum">checksum</a> rather than a true <a
410   * href="https://en.wikipedia.org/wiki/Hash_function">hash function</a>.
411   *
412   * @since 18.0
413   */
414  public static HashFunction crc32c() {
415    return Crc32CSupplier.HASH_FUNCTION;
416  }
417
418  @Immutable
419  private enum Crc32CSupplier implements ImmutableSupplier<HashFunction> {
420    @J2ObjCIncompatible
421    JAVA_UTIL_ZIP {
422      @Override
423      public HashFunction get() {
424        return ChecksumType.CRC_32C.hashFunction;
425      }
426    },
427    ABSTRACT_HASH_FUNCTION {
428      @Override
429      public HashFunction get() {
430        return Crc32cHashFunction.CRC_32_C;
431      }
432    };
433
434    static final HashFunction HASH_FUNCTION = pickFunction().get();
435
436    private static Crc32CSupplier pickFunction() {
437      Crc32CSupplier[] functions = values();
438
439      if (functions.length == 1) {
440        // We're running under J2ObjC.
441        return functions[0];
442      }
443
444      // We can't refer to JAVA_UTIL_ZIP directly at compile time because of J2ObjC.
445      Crc32CSupplier javaUtilZip = functions[0];
446
447      try {
448        Class.forName("java.util.zip.CRC32C");
449        return javaUtilZip;
450      } catch (ClassNotFoundException runningUnderJava8) {
451        return ABSTRACT_HASH_FUNCTION;
452      }
453    }
454  }
455
456  /**
457   * Returns a hash function implementing the CRC-32 checksum algorithm (32 hash bits).
458   *
459   * <p>To get the {@code long} value equivalent to {@link Checksum#getValue()} for a {@code
460   * HashCode} produced by this function, use {@link HashCode#padToLong()}.
461   *
462   * <p>This function is best understood as a <a
463   * href="https://en.wikipedia.org/wiki/Checksum">checksum</a> rather than a true <a
464   * href="https://en.wikipedia.org/wiki/Hash_function">hash function</a>.
465   *
466   * @since 14.0
467   */
468  public static HashFunction crc32() {
469    return ChecksumType.CRC_32.hashFunction;
470  }
471
472  /**
473   * Returns a hash function implementing the Adler-32 checksum algorithm (32 hash bits).
474   *
475   * <p>To get the {@code long} value equivalent to {@link Checksum#getValue()} for a {@code
476   * HashCode} produced by this function, use {@link HashCode#padToLong()}.
477   *
478   * <p>This function is best understood as a <a
479   * href="https://en.wikipedia.org/wiki/Checksum">checksum</a> rather than a true <a
480   * href="https://en.wikipedia.org/wiki/Hash_function">hash function</a>.
481   *
482   * @since 14.0
483   */
484  public static HashFunction adler32() {
485    return ChecksumType.ADLER_32.hashFunction;
486  }
487
488  @Immutable
489  enum ChecksumType implements ImmutableSupplier<Checksum> {
490    CRC_32("Hashing.crc32()") {
491      @Override
492      public Checksum get() {
493        return new CRC32();
494      }
495    },
496    @J2ObjCIncompatible
497    CRC_32C("Hashing.crc32c()") {
498      @Override
499      public Checksum get() {
500        return Crc32cMethodHandles.newCrc32c();
501      }
502    },
503    ADLER_32("Hashing.adler32()") {
504      @Override
505      public Checksum get() {
506        return new Adler32();
507      }
508    };
509
510    public final HashFunction hashFunction;
511
512    ChecksumType(String toString) {
513      this.hashFunction = new ChecksumHashFunction(this, 32, toString);
514    }
515  }
516
517  @J2ObjCIncompatible
518  @SuppressWarnings("unused")
519  private static final class Crc32cMethodHandles {
520    private static final MethodHandle CONSTRUCTOR = crc32cConstructor();
521
522    @IgnoreJRERequirement // https://github.com/mojohaus/animal-sniffer/issues/67
523    static Checksum newCrc32c() {
524      try {
525        return (Checksum) CONSTRUCTOR.invokeExact();
526      } catch (Throwable e) {
527        // The constructor has no `throws` clause.
528        throw sneakyThrow(e);
529      }
530    }
531
532    private static MethodHandle crc32cConstructor() {
533      try {
534        Class<?> clazz = Class.forName("java.util.zip.CRC32C");
535        /*
536         * We can't cast to CRC32C at the call site because we support building with Java 8
537         * (https://github.com/google/guava/issues/6549). So we have to use asType() to change from
538         * CRC32C to Checksum. This may carry some performance cost
539         * (https://stackoverflow.com/a/22321671/28465), but I'd have to benchmark more carefully to
540         * even detect it.
541         */
542        return MethodHandles.lookup()
543            .findConstructor(clazz, methodType(void.class))
544            .asType(methodType(Checksum.class));
545      } catch (ClassNotFoundException e) {
546        // We check that the class is available before calling this method.
547        throw new AssertionError(e);
548      } catch (IllegalAccessException e) {
549        // That API is public.
550        throw newLinkageError(e);
551      } catch (NoSuchMethodException e) {
552        // That constructor exists.
553        throw newLinkageError(e);
554      }
555    }
556
557    private static LinkageError newLinkageError(Throwable cause) {
558      return new LinkageError(cause.toString(), cause);
559    }
560  }
561
562  /**
563   * Returns a hash function implementing FarmHash's Fingerprint64, an open-source algorithm.
564   *
565   * <p>This is designed for generating persistent fingerprints of strings. It isn't
566   * cryptographically secure, but it produces a high-quality hash with fewer collisions than some
567   * alternatives we've used in the past.
568   *
569   * <p>FarmHash fingerprints are encoded by {@link HashCode#asBytes} in little-endian order. This
570   * means {@link HashCode#asLong} is guaranteed to return the same value that
571   * farmhash::Fingerprint64() would for the same input (when compared using {@link
572   * com.google.common.primitives.UnsignedLongs}'s encoding of 64-bit unsigned numbers).
573   *
574   * <p>This function is best understood as a <a
575   * href="https://en.wikipedia.org/wiki/Fingerprint_(computing)">fingerprint</a> rather than a true
576   * <a href="https://en.wikipedia.org/wiki/Hash_function">hash function</a>.
577   *
578   * @since 20.0
579   */
580  public static HashFunction farmHashFingerprint64() {
581    return FarmHashFingerprint64.FARMHASH_FINGERPRINT_64;
582  }
583
584  /**
585   * Returns a hash function implementing the Fingerprint2011 hashing function (64 hash bits).
586   *
587   * <p>This is designed for generating persistent fingerprints of strings. It isn't
588   * cryptographically secure, but it produces a high-quality hash with few collisions. Fingerprints
589   * generated using this are byte-wise identical to those created using the C++ version, but note
590   * that this uses unsigned integers (see {@link com.google.common.primitives.UnsignedInts}).
591   * Comparisons between the two should take this into account.
592   *
593   * <p>Fingerprint2011() is a form of Murmur2 on strings up to 32 bytes and a form of CityHash for
594   * longer strings. It could have been one or the other throughout. The main advantage of the
595   * combination is that CityHash has a bunch of special cases for short strings that don't need to
596   * be replicated here. The result will never be 0 or 1.
597   *
598   * <p>This function is best understood as a <a
599   * href="https://en.wikipedia.org/wiki/Fingerprint_(computing)">fingerprint</a> rather than a true
600   * <a href="https://en.wikipedia.org/wiki/Hash_function">hash function</a>.
601   *
602   * @since 31.1
603   */
604  public static HashFunction fingerprint2011() {
605    return Fingerprint2011.FINGERPRINT_2011;
606  }
607
608  /**
609   * Assigns to {@code hashCode} a "bucket" in the range {@code [0, buckets)}, in a uniform manner
610   * that minimizes the need for remapping as {@code buckets} grows. That is, {@code
611   * consistentHash(h, n)} equals:
612   *
613   * <ul>
614   *   <li>{@code n - 1}, with approximate probability {@code 1/n}
615   *   <li>{@code consistentHash(h, n - 1)}, otherwise (probability {@code 1 - 1/n})
616   * </ul>
617   *
618   * <p>This method is suitable for the common use case of dividing work among buckets that meet the
619   * following conditions:
620   *
621   * <ul>
622   *   <li>You want to assign the same fraction of inputs to each bucket.
623   *   <li>When you reduce the number of buckets, you can accept that the most recently added
624   *       buckets will be removed first. More concretely, if you are dividing traffic among tasks,
625   *       you can decrease the number of tasks from 15 and 10, killing off the final 5 tasks, and
626   *       {@code consistentHash} will handle it. If, however, you are dividing traffic among
627   *       servers {@code alpha}, {@code bravo}, and {@code charlie} and you occasionally need to
628   *       take each of the servers offline, {@code consistentHash} will be a poor fit: It provides
629   *       no way for you to specify which of the three buckets is disappearing. Thus, if your
630   *       buckets change from {@code [alpha, bravo, charlie]} to {@code [bravo, charlie]}, it will
631   *       assign all the old {@code alpha} traffic to {@code bravo} and all the old {@code bravo}
632   *       traffic to {@code charlie}, rather than letting {@code bravo} keep its traffic.
633   * </ul>
634   *
635   * <p>See the <a href="http://en.wikipedia.org/wiki/Consistent_hashing">Wikipedia article on
636   * consistent hashing</a> for more information.
637   */
638  public static int consistentHash(HashCode hashCode, int buckets) {
639    return consistentHash(hashCode.padToLong(), buckets);
640  }
641
642  /**
643   * Assigns to {@code input} a "bucket" in the range {@code [0, buckets)}, in a uniform manner that
644   * minimizes the need for remapping as {@code buckets} grows. That is, {@code consistentHash(h,
645   * n)} equals:
646   *
647   * <ul>
648   *   <li>{@code n - 1}, with approximate probability {@code 1/n}
649   *   <li>{@code consistentHash(h, n - 1)}, otherwise (probability {@code 1 - 1/n})
650   * </ul>
651   *
652   * <p>This method is suitable for the common use case of dividing work among buckets that meet the
653   * following conditions:
654   *
655   * <ul>
656   *   <li>You want to assign the same fraction of inputs to each bucket.
657   *   <li>When you reduce the number of buckets, you can accept that the most recently added
658   *       buckets will be removed first. More concretely, if you are dividing traffic among tasks,
659   *       you can decrease the number of tasks from 15 and 10, killing off the final 5 tasks, and
660   *       {@code consistentHash} will handle it. If, however, you are dividing traffic among
661   *       servers {@code alpha}, {@code bravo}, and {@code charlie} and you occasionally need to
662   *       take each of the servers offline, {@code consistentHash} will be a poor fit: It provides
663   *       no way for you to specify which of the three buckets is disappearing. Thus, if your
664   *       buckets change from {@code [alpha, bravo, charlie]} to {@code [bravo, charlie]}, it will
665   *       assign all the old {@code alpha} traffic to {@code bravo} and all the old {@code bravo}
666   *       traffic to {@code charlie}, rather than letting {@code bravo} keep its traffic.
667   * </ul>
668   *
669   * <p>See the <a href="http://en.wikipedia.org/wiki/Consistent_hashing">Wikipedia article on
670   * consistent hashing</a> for more information.
671   */
672  public static int consistentHash(long input, int buckets) {
673    checkArgument(buckets > 0, "buckets must be positive: %s", buckets);
674    LinearCongruentialGenerator generator = new LinearCongruentialGenerator(input);
675    int candidate = 0;
676    int next;
677
678    // Jump from bucket to bucket until we go out of range
679    while (true) {
680      next = (int) ((candidate + 1) / generator.nextDouble());
681      if (next >= 0 && next < buckets) {
682        candidate = next;
683      } else {
684        return candidate;
685      }
686    }
687  }
688
689  /**
690   * Returns a hash code, having the same bit length as each of the input hash codes, that combines
691   * the information of these hash codes in an ordered fashion. That is, whenever two equal hash
692   * codes are produced by two calls to this method, it is <i>as likely as possible</i> that each
693   * was computed from the <i>same</i> input hash codes in the <i>same</i> order.
694   *
695   * @throws IllegalArgumentException if {@code hashCodes} is empty, or the hash codes do not all
696   *     have the same bit length
697   */
698  public static HashCode combineOrdered(Iterable<HashCode> hashCodes) {
699    Iterator<HashCode> iterator = hashCodes.iterator();
700    checkArgument(iterator.hasNext(), "Must be at least 1 hash code to combine.");
701    int bits = iterator.next().bits();
702    byte[] resultBytes = new byte[bits / 8];
703    for (HashCode hashCode : hashCodes) {
704      byte[] nextBytes = hashCode.asBytes();
705      checkArgument(
706          nextBytes.length == resultBytes.length, "All hashcodes must have the same bit length.");
707      for (int i = 0; i < nextBytes.length; i++) {
708        resultBytes[i] = (byte) (resultBytes[i] * 37 ^ nextBytes[i]);
709      }
710    }
711    return HashCode.fromBytesNoCopy(resultBytes);
712  }
713
714  /**
715   * Returns a hash code, having the same bit length as each of the input hash codes, that combines
716   * the information of these hash codes in an unordered fashion. That is, whenever two equal hash
717   * codes are produced by two calls to this method, it is <i>as likely as possible</i> that each
718   * was computed from the <i>same</i> input hash codes in <i>some</i> order.
719   *
720   * @throws IllegalArgumentException if {@code hashCodes} is empty, or the hash codes do not all
721   *     have the same bit length
722   */
723  public static HashCode combineUnordered(Iterable<HashCode> hashCodes) {
724    Iterator<HashCode> iterator = hashCodes.iterator();
725    checkArgument(iterator.hasNext(), "Must be at least 1 hash code to combine.");
726    byte[] resultBytes = new byte[iterator.next().bits() / 8];
727    for (HashCode hashCode : hashCodes) {
728      byte[] nextBytes = hashCode.asBytes();
729      checkArgument(
730          nextBytes.length == resultBytes.length, "All hashcodes must have the same bit length.");
731      for (int i = 0; i < nextBytes.length; i++) {
732        resultBytes[i] += nextBytes[i];
733      }
734    }
735    return HashCode.fromBytesNoCopy(resultBytes);
736  }
737
738  /** Checks that the passed argument is positive, and ceils it to a multiple of 32. */
739  static int checkPositiveAndMakeMultipleOf32(int bits) {
740    checkArgument(bits > 0, "Number of bits must be positive");
741    return (bits + 31) & ~31;
742  }
743
744  /**
745   * Returns a hash function which computes its hash code by concatenating the hash codes of the
746   * underlying hash functions together. This can be useful if you need to generate hash codes of a
747   * specific length.
748   *
749   * <p>For example, if you need 1024-bit hash codes, you could join two {@link Hashing#sha512} hash
750   * functions together: {@code Hashing.concatenating(Hashing.sha512(), Hashing.sha512())}.
751   *
752   * @since 19.0
753   */
754  public static HashFunction concatenating(
755      HashFunction first, HashFunction second, HashFunction... rest) {
756    // We can't use Lists.asList() here because there's no hash->collect dependency
757    List<HashFunction> list = new ArrayList<>();
758    list.add(first);
759    list.add(second);
760    Collections.addAll(list, rest);
761    return new ConcatenatedHashFunction(list.toArray(new HashFunction[0]));
762  }
763
764  /**
765   * Returns a hash function which computes its hash code by concatenating the hash codes of the
766   * underlying hash functions together. This can be useful if you need to generate hash codes of a
767   * specific length.
768   *
769   * <p>For example, if you need 1024-bit hash codes, you could join two {@link Hashing#sha512} hash
770   * functions together: {@code Hashing.concatenating(Hashing.sha512(), Hashing.sha512())}.
771   *
772   * @since 19.0
773   */
774  public static HashFunction concatenating(Iterable<HashFunction> hashFunctions) {
775    checkNotNull(hashFunctions);
776    // We can't use Iterables.toArray() here because there's no hash->collect dependency
777    List<HashFunction> list = new ArrayList<>();
778    for (HashFunction hashFunction : hashFunctions) {
779      list.add(hashFunction);
780    }
781    checkArgument(!list.isEmpty(), "number of hash functions (%s) must be > 0", list.size());
782    return new ConcatenatedHashFunction(list.toArray(new HashFunction[0]));
783  }
784
785  private static final class ConcatenatedHashFunction extends AbstractCompositeHashFunction {
786
787    private ConcatenatedHashFunction(HashFunction... functions) {
788      super(functions);
789      for (HashFunction function : functions) {
790        checkArgument(
791            function.bits() % 8 == 0,
792            "the number of bits (%s) in hashFunction (%s) must be divisible by 8",
793            function.bits(),
794            function);
795      }
796    }
797
798    @Override
799    HashCode makeHash(Hasher[] hashers) {
800      byte[] bytes = new byte[bits() / 8];
801      int i = 0;
802      for (Hasher hasher : hashers) {
803        HashCode newHash = hasher.hash();
804        i += newHash.writeBytesTo(bytes, i, newHash.bits() / 8);
805      }
806      return HashCode.fromBytesNoCopy(bytes);
807    }
808
809    @Override
810    public int bits() {
811      int bitSum = 0;
812      for (HashFunction function : functions) {
813        bitSum += function.bits();
814      }
815      return bitSum;
816    }
817
818    @Override
819    public boolean equals(@Nullable Object object) {
820      if (object instanceof ConcatenatedHashFunction) {
821        ConcatenatedHashFunction other = (ConcatenatedHashFunction) object;
822        return Arrays.equals(functions, other.functions);
823      }
824      return false;
825    }
826
827    @Override
828    public int hashCode() {
829      return Arrays.hashCode(functions);
830    }
831  }
832
833  /**
834   * Linear CongruentialGenerator to use for consistent hashing. See
835   * http://en.wikipedia.org/wiki/Linear_congruential_generator
836   */
837  private static final class LinearCongruentialGenerator {
838    private long state;
839
840    public LinearCongruentialGenerator(long seed) {
841      this.state = seed;
842    }
843
844    public double nextDouble() {
845      state = 2862933555777941757L * state + 1;
846      return ((double) ((int) (state >>> 33) + 1)) / 0x1.0p31;
847    }
848  }
849
850  private Hashing() {}
851}