001/*
002 * Copyright (C) 2011 The Guava Authors
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
005 * in compliance with the License. You may obtain a copy of the License at
006 *
007 * http://www.apache.org/licenses/LICENSE-2.0
008 *
009 * Unless required by applicable law or agreed to in writing, software distributed under the License
010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
011 * or implied. See the License for the specific language governing permissions and limitations under
012 * the License.
013 */
014
015package com.google.common.hash;
016
017import static com.google.common.base.Preconditions.checkArgument;
018import static com.google.common.base.Preconditions.checkNotNull;
019import static com.google.common.base.Throwables.throwIfUnchecked;
020import static java.lang.invoke.MethodType.methodType;
021
022import com.google.errorprone.annotations.Immutable;
023import com.google.j2objc.annotations.J2ObjCIncompatible;
024import java.lang.invoke.MethodHandle;
025import java.lang.invoke.MethodHandles;
026import java.lang.reflect.UndeclaredThrowableException;
027import java.security.Key;
028import java.util.ArrayList;
029import java.util.Arrays;
030import java.util.Collections;
031import java.util.Iterator;
032import java.util.List;
033import java.util.zip.Adler32;
034import java.util.zip.CRC32;
035import java.util.zip.Checksum;
036import javax.annotation.CheckForNull;
037import javax.crypto.spec.SecretKeySpec;
038
039/**
040 * Static methods to obtain {@link HashFunction} instances, and other static hashing-related
041 * utilities.
042 *
043 * <p>A comparison of the various hash functions can be found <a
044 * href="https://docs.google.com/spreadsheets/d/1_q2EVcxA2HjcrlVMbaqXwMj31h9M5-Bqj_m8vITOwwk/">here</a>.
045 *
046 * @author Kevin Bourrillion
047 * @author Dimitris Andreou
048 * @author Kurt Alfred Kluever
049 * @since 11.0
050 */
051@ElementTypesAreNonnullByDefault
052public final class Hashing {
053  /**
054   * Returns a general-purpose, <b>temporary-use</b>, non-cryptographic hash function. The algorithm
055   * the returned function implements is unspecified and subject to change without notice.
056   *
057   * <p><b>Warning:</b> a new random seed for these functions is chosen each time the {@code
058   * Hashing} class is loaded. <b>Do not use this method</b> if hash codes may escape the current
059   * process in any way, for example being sent over RPC, or saved to disk. For a general-purpose,
060   * non-cryptographic hash function that will never change behavior, we suggest {@link
061   * #murmur3_128}.
062   *
063   * <p>Repeated calls to this method on the same loaded {@code Hashing} class, using the same value
064   * for {@code minimumBits}, will return identically-behaving {@link HashFunction} instances.
065   *
066   * @param minimumBits a positive integer. This can be arbitrarily large. The returned {@link
067   *     HashFunction} instance may use memory proportional to this integer.
068   * @return a hash function, described above, that produces hash codes of length {@code
069   *     minimumBits} or greater
070   */
071  public static HashFunction goodFastHash(int minimumBits) {
072    int bits = checkPositiveAndMakeMultipleOf32(minimumBits);
073
074    if (bits == 32) {
075      return Murmur3_32HashFunction.GOOD_FAST_HASH_32;
076    }
077    if (bits <= 128) {
078      return Murmur3_128HashFunction.GOOD_FAST_HASH_128;
079    }
080
081    // Otherwise, join together some 128-bit murmur3s
082    int hashFunctionsNeeded = (bits + 127) / 128;
083    HashFunction[] hashFunctions = new HashFunction[hashFunctionsNeeded];
084    hashFunctions[0] = Murmur3_128HashFunction.GOOD_FAST_HASH_128;
085    int seed = GOOD_FAST_HASH_SEED;
086    for (int i = 1; i < hashFunctionsNeeded; i++) {
087      seed += 1500450271; // a prime; shouldn't matter
088      hashFunctions[i] = murmur3_128(seed);
089    }
090    return new ConcatenatedHashFunction(hashFunctions);
091  }
092
093  /**
094   * Used to randomize {@link #goodFastHash} instances, so that programs which persist anything
095   * dependent on the hash codes they produce will fail sooner.
096   */
097  @SuppressWarnings("GoodTime") // reading system time without TimeSource
098  static final int GOOD_FAST_HASH_SEED = (int) System.currentTimeMillis();
099
100  /**
101   * Returns a hash function implementing the <a
102   * href="https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp">32-bit murmur3
103   * algorithm, x86 variant</a> (little-endian variant), using the given seed value, <b>with a known
104   * bug</b> as described in the deprecation text.
105   *
106   * <p>The C++ equivalent is the MurmurHash3_x86_32 function (Murmur3A), which however does not
107   * have the bug.
108   *
109   * @deprecated This implementation produces incorrect hash values from the {@link
110   *     HashFunction#hashString} method if the string contains non-BMP characters. Use {@link
111   *     #murmur3_32_fixed(int)} instead.
112   */
113  @Deprecated
114  public static HashFunction murmur3_32(int seed) {
115    return new Murmur3_32HashFunction(seed, /* supplementaryPlaneFix= */ false);
116  }
117
118  /**
119   * Returns a hash function implementing the <a
120   * href="https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp">32-bit murmur3
121   * algorithm, x86 variant</a> (little-endian variant), using the given seed value, <b>with a known
122   * bug</b> as described in the deprecation text.
123   *
124   * <p>The C++ equivalent is the MurmurHash3_x86_32 function (Murmur3A), which however does not
125   * have the bug.
126   *
127   * @deprecated This implementation produces incorrect hash values from the {@link
128   *     HashFunction#hashString} method if the string contains non-BMP characters. Use {@link
129   *     #murmur3_32_fixed()} instead.
130   */
131  @Deprecated
132  public static HashFunction murmur3_32() {
133    return Murmur3_32HashFunction.MURMUR3_32;
134  }
135
136  /**
137   * Returns a hash function implementing the <a
138   * href="https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp">32-bit murmur3
139   * algorithm, x86 variant</a> (little-endian variant), using the given seed value.
140   *
141   * <p>The exact C++ equivalent is the MurmurHash3_x86_32 function (Murmur3A).
142   *
143   * <p>This method is called {@code murmur3_32_fixed} because it fixes a bug in the {@code
144   * HashFunction} returned by the original {@code murmur3_32} method.
145   *
146   * @since 31.0
147   */
148  public static HashFunction murmur3_32_fixed(int seed) {
149    return new Murmur3_32HashFunction(seed, /* supplementaryPlaneFix= */ true);
150  }
151
152  /**
153   * Returns a hash function implementing the <a
154   * href="https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp">32-bit murmur3
155   * algorithm, x86 variant</a> (little-endian variant), using a seed value of zero.
156   *
157   * <p>The exact C++ equivalent is the MurmurHash3_x86_32 function (Murmur3A).
158   *
159   * <p>This method is called {@code murmur3_32_fixed} because it fixes a bug in the {@code
160   * HashFunction} returned by the original {@code murmur3_32} method.
161   *
162   * @since 31.0
163   */
164  public static HashFunction murmur3_32_fixed() {
165    return Murmur3_32HashFunction.MURMUR3_32_FIXED;
166  }
167
168  /**
169   * Returns a hash function implementing the <a
170   * href="https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp">128-bit murmur3
171   * algorithm, x64 variant</a> (little-endian variant), using the given seed value.
172   *
173   * <p>The exact C++ equivalent is the MurmurHash3_x64_128 function (Murmur3F).
174   */
175  public static HashFunction murmur3_128(int seed) {
176    return new Murmur3_128HashFunction(seed);
177  }
178
179  /**
180   * Returns a hash function implementing the <a
181   * href="https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp">128-bit murmur3
182   * algorithm, x64 variant</a> (little-endian variant), using a seed value of zero.
183   *
184   * <p>The exact C++ equivalent is the MurmurHash3_x64_128 function (Murmur3F).
185   */
186  public static HashFunction murmur3_128() {
187    return Murmur3_128HashFunction.MURMUR3_128;
188  }
189
190  /**
191   * Returns a hash function implementing the <a href="https://131002.net/siphash/">64-bit
192   * SipHash-2-4 algorithm</a> using a seed value of {@code k = 00 01 02 ...}.
193   *
194   * @since 15.0
195   */
196  public static HashFunction sipHash24() {
197    return SipHashFunction.SIP_HASH_24;
198  }
199
200  /**
201   * Returns a hash function implementing the <a href="https://131002.net/siphash/">64-bit
202   * SipHash-2-4 algorithm</a> using the given seed.
203   *
204   * @since 15.0
205   */
206  public static HashFunction sipHash24(long k0, long k1) {
207    return new SipHashFunction(2, 4, k0, k1);
208  }
209
210  /**
211   * Returns a hash function implementing the MD5 hash algorithm (128 hash bits).
212   *
213   * @deprecated If you must interoperate with a system that requires MD5, then use this method,
214   *     despite its deprecation. But if you can choose your hash function, avoid MD5, which is
215   *     neither fast nor secure. As of January 2017, we suggest:
216   *     <ul>
217   *       <li>For security:
218   *           {@link Hashing#sha256} or a higher-level API.
219   *       <li>For speed: {@link Hashing#goodFastHash}, though see its docs for caveats.
220   *     </ul>
221   */
222  @Deprecated
223  public static HashFunction md5() {
224    return Md5Holder.MD5;
225  }
226
227  private static class Md5Holder {
228    static final HashFunction MD5 = new MessageDigestHashFunction("MD5", "Hashing.md5()");
229  }
230
231  /**
232   * Returns a hash function implementing the SHA-1 algorithm (160 hash bits).
233   *
234   * @deprecated If you must interoperate with a system that requires SHA-1, then use this method,
235   *     despite its deprecation. But if you can choose your hash function, avoid SHA-1, which is
236   *     neither fast nor secure. As of January 2017, we suggest:
237   *     <ul>
238   *       <li>For security:
239   *           {@link Hashing#sha256} or a higher-level API.
240   *       <li>For speed: {@link Hashing#goodFastHash}, though see its docs for caveats.
241   *     </ul>
242   */
243  @Deprecated
244  public static HashFunction sha1() {
245    return Sha1Holder.SHA_1;
246  }
247
248  private static class Sha1Holder {
249    static final HashFunction SHA_1 = new MessageDigestHashFunction("SHA-1", "Hashing.sha1()");
250  }
251
252  /** Returns a hash function implementing the SHA-256 algorithm (256 hash bits). */
253  public static HashFunction sha256() {
254    return Sha256Holder.SHA_256;
255  }
256
257  private static class Sha256Holder {
258    static final HashFunction SHA_256 =
259        new MessageDigestHashFunction("SHA-256", "Hashing.sha256()");
260  }
261
262  /**
263   * Returns a hash function implementing the SHA-384 algorithm (384 hash bits).
264   *
265   * @since 19.0
266   */
267  public static HashFunction sha384() {
268    return Sha384Holder.SHA_384;
269  }
270
271  private static class Sha384Holder {
272    static final HashFunction SHA_384 =
273        new MessageDigestHashFunction("SHA-384", "Hashing.sha384()");
274  }
275
276  /** Returns a hash function implementing the SHA-512 algorithm (512 hash bits). */
277  public static HashFunction sha512() {
278    return Sha512Holder.SHA_512;
279  }
280
281  private static class Sha512Holder {
282    static final HashFunction SHA_512 =
283        new MessageDigestHashFunction("SHA-512", "Hashing.sha512()");
284  }
285
286  /**
287   * Returns a hash function implementing the Message Authentication Code (MAC) algorithm, using the
288   * MD5 (128 hash bits) hash function and the given secret key.
289   *
290   * <p>If you are designing a new system that needs HMAC, prefer {@link #hmacSha256} or other
291   * future-proof algorithms <a
292   * href="https://datatracker.ietf.org/doc/html/rfc6151#section-2.3">over {@code hmacMd5}</a>.
293   *
294   * @param key the secret key
295   * @throws IllegalArgumentException if the given key is inappropriate for initializing this MAC
296   * @since 20.0
297   */
298  public static HashFunction hmacMd5(Key key) {
299    return new MacHashFunction("HmacMD5", key, hmacToString("hmacMd5", key));
300  }
301
302  /**
303   * Returns a hash function implementing the Message Authentication Code (MAC) algorithm, using the
304   * MD5 (128 hash bits) hash function and a {@link SecretKeySpec} created from the given byte array
305   * and the MD5 algorithm.
306   *
307   * <p>If you are designing a new system that needs HMAC, prefer {@link #hmacSha256} or other
308   * future-proof algorithms <a
309   * href="https://datatracker.ietf.org/doc/html/rfc6151#section-2.3">over {@code hmacMd5}</a>.
310   *
311   * @param key the key material of the secret key
312   * @since 20.0
313   */
314  public static HashFunction hmacMd5(byte[] key) {
315    return hmacMd5(new SecretKeySpec(checkNotNull(key), "HmacMD5"));
316  }
317
318  /**
319   * Returns a hash function implementing the Message Authentication Code (MAC) algorithm, using the
320   * SHA-1 (160 hash bits) hash function and the given secret key.
321   *
322   * @param key the secret key
323   * @throws IllegalArgumentException if the given key is inappropriate for initializing this MAC
324   * @since 20.0
325   */
326  public static HashFunction hmacSha1(Key key) {
327    return new MacHashFunction("HmacSHA1", key, hmacToString("hmacSha1", key));
328  }
329
330  /**
331   * Returns a hash function implementing the Message Authentication Code (MAC) algorithm, using the
332   * SHA-1 (160 hash bits) hash function and a {@link SecretKeySpec} created from the given byte
333   * array and the SHA-1 algorithm.
334   *
335   * @param key the key material of the secret key
336   * @since 20.0
337   */
338  public static HashFunction hmacSha1(byte[] key) {
339    return hmacSha1(new SecretKeySpec(checkNotNull(key), "HmacSHA1"));
340  }
341
342  /**
343   * Returns a hash function implementing the Message Authentication Code (MAC) algorithm, using the
344   * SHA-256 (256 hash bits) hash function and the given secret key.
345   *
346   * @param key the secret key
347   * @throws IllegalArgumentException if the given key is inappropriate for initializing this MAC
348   * @since 20.0
349   */
350  public static HashFunction hmacSha256(Key key) {
351    return new MacHashFunction("HmacSHA256", key, hmacToString("hmacSha256", key));
352  }
353
354  /**
355   * Returns a hash function implementing the Message Authentication Code (MAC) algorithm, using the
356   * SHA-256 (256 hash bits) hash function and a {@link SecretKeySpec} created from the given byte
357   * array and the SHA-256 algorithm.
358   *
359   * @param key the key material of the secret key
360   * @since 20.0
361   */
362  public static HashFunction hmacSha256(byte[] key) {
363    return hmacSha256(new SecretKeySpec(checkNotNull(key), "HmacSHA256"));
364  }
365
366  /**
367   * Returns a hash function implementing the Message Authentication Code (MAC) algorithm, using the
368   * SHA-512 (512 hash bits) hash function and the given secret key.
369   *
370   * @param key the secret key
371   * @throws IllegalArgumentException if the given key is inappropriate for initializing this MAC
372   * @since 20.0
373   */
374  public static HashFunction hmacSha512(Key key) {
375    return new MacHashFunction("HmacSHA512", key, hmacToString("hmacSha512", key));
376  }
377
378  /**
379   * Returns a hash function implementing the Message Authentication Code (MAC) algorithm, using the
380   * SHA-512 (512 hash bits) hash function and a {@link SecretKeySpec} created from the given byte
381   * array and the SHA-512 algorithm.
382   *
383   * @param key the key material of the secret key
384   * @since 20.0
385   */
386  public static HashFunction hmacSha512(byte[] key) {
387    return hmacSha512(new SecretKeySpec(checkNotNull(key), "HmacSHA512"));
388  }
389
390  private static String hmacToString(String methodName, Key key) {
391    return "Hashing."
392        + methodName
393        + "(Key[algorithm="
394        + key.getAlgorithm()
395        + ", format="
396        + key.getFormat()
397        + "])";
398  }
399
400  /**
401   * Returns a hash function implementing the CRC32C checksum algorithm (32 hash bits) as described
402   * by RFC 3720, Section 12.1.
403   *
404   * <p>This function is best understood as a <a
405   * href="https://en.wikipedia.org/wiki/Checksum">checksum</a> rather than a true <a
406   * href="https://en.wikipedia.org/wiki/Hash_function">hash function</a>.
407   *
408   * @since 18.0
409   */
410  public static HashFunction crc32c() {
411    return Crc32CSupplier.HASH_FUNCTION;
412  }
413
414  @Immutable
415  private enum Crc32CSupplier implements ImmutableSupplier<HashFunction> {
416    @J2ObjCIncompatible
417    JAVA_UTIL_ZIP {
418      @Override
419      public HashFunction get() {
420        return ChecksumType.CRC_32C.hashFunction;
421      }
422    },
423    ABSTRACT_HASH_FUNCTION {
424      @Override
425      public HashFunction get() {
426        return Crc32cHashFunction.CRC_32_C;
427      }
428    };
429
430    static final HashFunction HASH_FUNCTION = pickFunction().get();
431
432    private static Crc32CSupplier pickFunction() {
433      Crc32CSupplier[] functions = values();
434
435      if (functions.length == 1) {
436        // We're running under J2ObjC.
437        return functions[0];
438      }
439
440      // We can't refer to JAVA_UTIL_ZIP directly at compile time because of J2ObjC.
441      Crc32CSupplier javaUtilZip = functions[0];
442
443      try {
444        Class.forName("java.util.zip.CRC32C");
445        return javaUtilZip;
446      } catch (ClassNotFoundException runningUnderJava8) {
447        return ABSTRACT_HASH_FUNCTION;
448      }
449    }
450  }
451
452  /**
453   * Returns a hash function implementing the CRC-32 checksum algorithm (32 hash bits).
454   *
455   * <p>To get the {@code long} value equivalent to {@link Checksum#getValue()} for a {@code
456   * HashCode} produced by this function, use {@link HashCode#padToLong()}.
457   *
458   * <p>This function is best understood as a <a
459   * href="https://en.wikipedia.org/wiki/Checksum">checksum</a> rather than a true <a
460   * href="https://en.wikipedia.org/wiki/Hash_function">hash function</a>.
461   *
462   * @since 14.0
463   */
464  public static HashFunction crc32() {
465    return ChecksumType.CRC_32.hashFunction;
466  }
467
468  /**
469   * Returns a hash function implementing the Adler-32 checksum algorithm (32 hash bits).
470   *
471   * <p>To get the {@code long} value equivalent to {@link Checksum#getValue()} for a {@code
472   * HashCode} produced by this function, use {@link HashCode#padToLong()}.
473   *
474   * <p>This function is best understood as a <a
475   * href="https://en.wikipedia.org/wiki/Checksum">checksum</a> rather than a true <a
476   * href="https://en.wikipedia.org/wiki/Hash_function">hash function</a>.
477   *
478   * @since 14.0
479   */
480  public static HashFunction adler32() {
481    return ChecksumType.ADLER_32.hashFunction;
482  }
483
484  @Immutable
485  enum ChecksumType implements ImmutableSupplier<Checksum> {
486    CRC_32("Hashing.crc32()") {
487      @Override
488      public Checksum get() {
489        return new CRC32();
490      }
491    },
492    @J2ObjCIncompatible
493    CRC_32C("Hashing.crc32c()") {
494      @Override
495      public Checksum get() {
496        return Crc32cMethodHandles.newCrc32c();
497      }
498    },
499    ADLER_32("Hashing.adler32()") {
500      @Override
501      public Checksum get() {
502        return new Adler32();
503      }
504    };
505
506    public final HashFunction hashFunction;
507
508    ChecksumType(String toString) {
509      this.hashFunction = new ChecksumHashFunction(this, 32, toString);
510    }
511  }
512
513  @J2ObjCIncompatible
514  @SuppressWarnings("unused")
515  private static final class Crc32cMethodHandles {
516    private static final MethodHandle CONSTRUCTOR = crc32cConstructor();
517
518    @IgnoreJRERequirement // https://github.com/mojohaus/animal-sniffer/issues/67
519    static Checksum newCrc32c() {
520      try {
521        return (Checksum) CONSTRUCTOR.invokeExact();
522      } catch (Throwable e) {
523        throwIfUnchecked(e);
524        // This should be impossible, since the constructor has no `throws` clause.
525        throw new UndeclaredThrowableException(e);
526      }
527    }
528
529    private static MethodHandle crc32cConstructor() {
530      try {
531        Class<?> clazz = Class.forName("java.util.zip.CRC32C");
532        /*
533         * We can't cast to CRC32C at the call site because we support building with Java 8
534         * (https://github.com/google/guava/issues/6549). So we have to use asType() to change from
535         * CRC32C to Checksum. This may carry some performance cost
536         * (https://stackoverflow.com/a/22321671/28465), but I'd have to benchmark more carefully to
537         * even detect it.
538         */
539        return MethodHandles.lookup()
540            .findConstructor(clazz, methodType(void.class))
541            .asType(methodType(Checksum.class));
542      } catch (ClassNotFoundException e) {
543        // We check that the class is available before calling this method.
544        throw new AssertionError(e);
545      } catch (IllegalAccessException e) {
546        // That API is public.
547        throw newLinkageError(e);
548      } catch (NoSuchMethodException e) {
549        // That constructor exists.
550        throw newLinkageError(e);
551      }
552    }
553
554    private static LinkageError newLinkageError(Throwable cause) {
555      return new LinkageError(cause.toString(), cause);
556    }
557  }
558
559  /**
560   * Returns a hash function implementing FarmHash's Fingerprint64, an open-source algorithm.
561   *
562   * <p>This is designed for generating persistent fingerprints of strings. It isn't
563   * cryptographically secure, but it produces a high-quality hash with fewer collisions than some
564   * alternatives we've used in the past.
565   *
566   * <p>FarmHash fingerprints are encoded by {@link HashCode#asBytes} in little-endian order. This
567   * means {@link HashCode#asLong} is guaranteed to return the same value that
568   * farmhash::Fingerprint64() would for the same input (when compared using {@link
569   * com.google.common.primitives.UnsignedLongs}'s encoding of 64-bit unsigned numbers).
570   *
571   * <p>This function is best understood as a <a
572   * href="https://en.wikipedia.org/wiki/Fingerprint_(computing)">fingerprint</a> rather than a true
573   * <a href="https://en.wikipedia.org/wiki/Hash_function">hash function</a>.
574   *
575   * @since 20.0
576   */
577  public static HashFunction farmHashFingerprint64() {
578    return FarmHashFingerprint64.FARMHASH_FINGERPRINT_64;
579  }
580
581  /**
582   * Returns a hash function implementing the Fingerprint2011 hashing function (64 hash bits).
583   *
584   * <p>This is designed for generating persistent fingerprints of strings. It isn't
585   * cryptographically secure, but it produces a high-quality hash with few collisions. Fingerprints
586   * generated using this are byte-wise identical to those created using the C++ version, but note
587   * that this uses unsigned integers (see {@link com.google.common.primitives.UnsignedInts}).
588   * Comparisons between the two should take this into account.
589   *
590   * <p>Fingerprint2011() is a form of Murmur2 on strings up to 32 bytes and a form of CityHash for
591   * longer strings. It could have been one or the other throughout. The main advantage of the
592   * combination is that CityHash has a bunch of special cases for short strings that don't need to
593   * be replicated here. The result will never be 0 or 1.
594   *
595   * <p>This function is best understood as a <a
596   * href="https://en.wikipedia.org/wiki/Fingerprint_(computing)">fingerprint</a> rather than a true
597   * <a href="https://en.wikipedia.org/wiki/Hash_function">hash function</a>.
598   *
599   * @since 31.1
600   */
601  public static HashFunction fingerprint2011() {
602    return Fingerprint2011.FINGERPRINT_2011;
603  }
604
605  /**
606   * Assigns to {@code hashCode} a "bucket" in the range {@code [0, buckets)}, in a uniform manner
607   * that minimizes the need for remapping as {@code buckets} grows. That is, {@code
608   * consistentHash(h, n)} equals:
609   *
610   * <ul>
611   *   <li>{@code n - 1}, with approximate probability {@code 1/n}
612   *   <li>{@code consistentHash(h, n - 1)}, otherwise (probability {@code 1 - 1/n})
613   * </ul>
614   *
615   * <p>This method is suitable for the common use case of dividing work among buckets that meet the
616   * following conditions:
617   *
618   * <ul>
619   *   <li>You want to assign the same fraction of inputs to each bucket.
620   *   <li>When you reduce the number of buckets, you can accept that the most recently added
621   *       buckets will be removed first. More concretely, if you are dividing traffic among tasks,
622   *       you can decrease the number of tasks from 15 and 10, killing off the final 5 tasks, and
623   *       {@code consistentHash} will handle it. If, however, you are dividing traffic among
624   *       servers {@code alpha}, {@code bravo}, and {@code charlie} and you occasionally need to
625   *       take each of the servers offline, {@code consistentHash} will be a poor fit: It provides
626   *       no way for you to specify which of the three buckets is disappearing. Thus, if your
627   *       buckets change from {@code [alpha, bravo, charlie]} to {@code [bravo, charlie]}, it will
628   *       assign all the old {@code alpha} traffic to {@code bravo} and all the old {@code bravo}
629   *       traffic to {@code charlie}, rather than letting {@code bravo} keep its traffic.
630   * </ul>
631   *
632   * <p>See the <a href="http://en.wikipedia.org/wiki/Consistent_hashing">Wikipedia article on
633   * consistent hashing</a> for more information.
634   */
635  public static int consistentHash(HashCode hashCode, int buckets) {
636    return consistentHash(hashCode.padToLong(), buckets);
637  }
638
639  /**
640   * Assigns to {@code input} a "bucket" in the range {@code [0, buckets)}, in a uniform manner that
641   * minimizes the need for remapping as {@code buckets} grows. That is, {@code consistentHash(h,
642   * n)} equals:
643   *
644   * <ul>
645   *   <li>{@code n - 1}, with approximate probability {@code 1/n}
646   *   <li>{@code consistentHash(h, n - 1)}, otherwise (probability {@code 1 - 1/n})
647   * </ul>
648   *
649   * <p>This method is suitable for the common use case of dividing work among buckets that meet the
650   * following conditions:
651   *
652   * <ul>
653   *   <li>You want to assign the same fraction of inputs to each bucket.
654   *   <li>When you reduce the number of buckets, you can accept that the most recently added
655   *       buckets will be removed first. More concretely, if you are dividing traffic among tasks,
656   *       you can decrease the number of tasks from 15 and 10, killing off the final 5 tasks, and
657   *       {@code consistentHash} will handle it. If, however, you are dividing traffic among
658   *       servers {@code alpha}, {@code bravo}, and {@code charlie} and you occasionally need to
659   *       take each of the servers offline, {@code consistentHash} will be a poor fit: It provides
660   *       no way for you to specify which of the three buckets is disappearing. Thus, if your
661   *       buckets change from {@code [alpha, bravo, charlie]} to {@code [bravo, charlie]}, it will
662   *       assign all the old {@code alpha} traffic to {@code bravo} and all the old {@code bravo}
663   *       traffic to {@code charlie}, rather than letting {@code bravo} keep its traffic.
664   * </ul>
665   *
666   * <p>See the <a href="http://en.wikipedia.org/wiki/Consistent_hashing">Wikipedia article on
667   * consistent hashing</a> for more information.
668   */
669  public static int consistentHash(long input, int buckets) {
670    checkArgument(buckets > 0, "buckets must be positive: %s", buckets);
671    LinearCongruentialGenerator generator = new LinearCongruentialGenerator(input);
672    int candidate = 0;
673    int next;
674
675    // Jump from bucket to bucket until we go out of range
676    while (true) {
677      next = (int) ((candidate + 1) / generator.nextDouble());
678      if (next >= 0 && next < buckets) {
679        candidate = next;
680      } else {
681        return candidate;
682      }
683    }
684  }
685
686  /**
687   * Returns a hash code, having the same bit length as each of the input hash codes, that combines
688   * the information of these hash codes in an ordered fashion. That is, whenever two equal hash
689   * codes are produced by two calls to this method, it is <i>as likely as possible</i> that each
690   * was computed from the <i>same</i> input hash codes in the <i>same</i> order.
691   *
692   * @throws IllegalArgumentException if {@code hashCodes} is empty, or the hash codes do not all
693   *     have the same bit length
694   */
695  public static HashCode combineOrdered(Iterable<HashCode> hashCodes) {
696    Iterator<HashCode> iterator = hashCodes.iterator();
697    checkArgument(iterator.hasNext(), "Must be at least 1 hash code to combine.");
698    int bits = iterator.next().bits();
699    byte[] resultBytes = new byte[bits / 8];
700    for (HashCode hashCode : hashCodes) {
701      byte[] nextBytes = hashCode.asBytes();
702      checkArgument(
703          nextBytes.length == resultBytes.length, "All hashcodes must have the same bit length.");
704      for (int i = 0; i < nextBytes.length; i++) {
705        resultBytes[i] = (byte) (resultBytes[i] * 37 ^ nextBytes[i]);
706      }
707    }
708    return HashCode.fromBytesNoCopy(resultBytes);
709  }
710
711  /**
712   * Returns a hash code, having the same bit length as each of the input hash codes, that combines
713   * the information of these hash codes in an unordered fashion. That is, whenever two equal hash
714   * codes are produced by two calls to this method, it is <i>as likely as possible</i> that each
715   * was computed from the <i>same</i> input hash codes in <i>some</i> order.
716   *
717   * @throws IllegalArgumentException if {@code hashCodes} is empty, or the hash codes do not all
718   *     have the same bit length
719   */
720  public static HashCode combineUnordered(Iterable<HashCode> hashCodes) {
721    Iterator<HashCode> iterator = hashCodes.iterator();
722    checkArgument(iterator.hasNext(), "Must be at least 1 hash code to combine.");
723    byte[] resultBytes = new byte[iterator.next().bits() / 8];
724    for (HashCode hashCode : hashCodes) {
725      byte[] nextBytes = hashCode.asBytes();
726      checkArgument(
727          nextBytes.length == resultBytes.length, "All hashcodes must have the same bit length.");
728      for (int i = 0; i < nextBytes.length; i++) {
729        resultBytes[i] += nextBytes[i];
730      }
731    }
732    return HashCode.fromBytesNoCopy(resultBytes);
733  }
734
735  /** Checks that the passed argument is positive, and ceils it to a multiple of 32. */
736  static int checkPositiveAndMakeMultipleOf32(int bits) {
737    checkArgument(bits > 0, "Number of bits must be positive");
738    return (bits + 31) & ~31;
739  }
740
741  /**
742   * Returns a hash function which computes its hash code by concatenating the hash codes of the
743   * underlying hash functions together. This can be useful if you need to generate hash codes of a
744   * specific length.
745   *
746   * <p>For example, if you need 1024-bit hash codes, you could join two {@link Hashing#sha512} hash
747   * functions together: {@code Hashing.concatenating(Hashing.sha512(), Hashing.sha512())}.
748   *
749   * @since 19.0
750   */
751  public static HashFunction concatenating(
752      HashFunction first, HashFunction second, HashFunction... rest) {
753    // We can't use Lists.asList() here because there's no hash->collect dependency
754    List<HashFunction> list = new ArrayList<>();
755    list.add(first);
756    list.add(second);
757    Collections.addAll(list, rest);
758    return new ConcatenatedHashFunction(list.toArray(new HashFunction[0]));
759  }
760
761  /**
762   * Returns a hash function which computes its hash code by concatenating the hash codes of the
763   * underlying hash functions together. This can be useful if you need to generate hash codes of a
764   * specific length.
765   *
766   * <p>For example, if you need 1024-bit hash codes, you could join two {@link Hashing#sha512} hash
767   * functions together: {@code Hashing.concatenating(Hashing.sha512(), Hashing.sha512())}.
768   *
769   * @since 19.0
770   */
771  public static HashFunction concatenating(Iterable<HashFunction> hashFunctions) {
772    checkNotNull(hashFunctions);
773    // We can't use Iterables.toArray() here because there's no hash->collect dependency
774    List<HashFunction> list = new ArrayList<>();
775    for (HashFunction hashFunction : hashFunctions) {
776      list.add(hashFunction);
777    }
778    checkArgument(!list.isEmpty(), "number of hash functions (%s) must be > 0", list.size());
779    return new ConcatenatedHashFunction(list.toArray(new HashFunction[0]));
780  }
781
782  private static final class ConcatenatedHashFunction extends AbstractCompositeHashFunction {
783
784    private ConcatenatedHashFunction(HashFunction... functions) {
785      super(functions);
786      for (HashFunction function : functions) {
787        checkArgument(
788            function.bits() % 8 == 0,
789            "the number of bits (%s) in hashFunction (%s) must be divisible by 8",
790            function.bits(),
791            function);
792      }
793    }
794
795    @Override
796    HashCode makeHash(Hasher[] hashers) {
797      byte[] bytes = new byte[bits() / 8];
798      int i = 0;
799      for (Hasher hasher : hashers) {
800        HashCode newHash = hasher.hash();
801        i += newHash.writeBytesTo(bytes, i, newHash.bits() / 8);
802      }
803      return HashCode.fromBytesNoCopy(bytes);
804    }
805
806    @Override
807    public int bits() {
808      int bitSum = 0;
809      for (HashFunction function : functions) {
810        bitSum += function.bits();
811      }
812      return bitSum;
813    }
814
815    @Override
816    public boolean equals(@CheckForNull Object object) {
817      if (object instanceof ConcatenatedHashFunction) {
818        ConcatenatedHashFunction other = (ConcatenatedHashFunction) object;
819        return Arrays.equals(functions, other.functions);
820      }
821      return false;
822    }
823
824    @Override
825    public int hashCode() {
826      return Arrays.hashCode(functions);
827    }
828  }
829
830  /**
831   * Linear CongruentialGenerator to use for consistent hashing. See
832   * http://en.wikipedia.org/wiki/Linear_congruential_generator
833   */
834  private static final class LinearCongruentialGenerator {
835    private long state;
836
837    public LinearCongruentialGenerator(long seed) {
838      this.state = seed;
839    }
840
841    public double nextDouble() {
842      state = 2862933555777941757L * state + 1;
843      return ((double) ((int) (state >>> 33) + 1)) / 0x1.0p31;
844    }
845  }
846
847  private Hashing() {}
848}