001    /*
002     * Copyright (C) 2011 The Guava Authors
003     *
004     * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
005     * in compliance with the License. You may obtain a copy of the License at
006     *
007     * http://www.apache.org/licenses/LICENSE-2.0
008     *
009     * Unless required by applicable law or agreed to in writing, software distributed under the License
010     * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
011     * or implied. See the License for the specific language governing permissions and limitations under
012     * the License.
013     */
014    
015    package com.google.common.hash;
016    
017    import static com.google.common.base.Preconditions.checkArgument;
018    
019    import com.google.common.annotations.Beta;
020    import com.google.common.annotations.VisibleForTesting;
021    import com.google.common.primitives.UnsignedInts;
022    
023    import java.nio.ByteBuffer;
024    import java.security.MessageDigest;
025    import java.util.Iterator;
026    
027    /**
028     * Static methods to obtain {@link HashFunction} instances, and other static
029     * hashing-related utilities.
030     *
031     * @author Kevin Bourrillion
032     * @author Dimitris Andreou
033     * @author Kurt Alfred Kluever
034     * @since 11.0
035     */
036    @Beta
037    public final class Hashing {
038      private Hashing() {}
039    
040      /**
041       * Used to randomize goodFastHash() instances, so that programs which persist anything
042       * dependent on hashcodes of those, will fail sooner than later.
043       */
044      private static final int GOOD_FAST_HASH_SEED = (int) System.currentTimeMillis();
045    
046      /**
047       * Returns a general-purpose, <b>non-cryptographic-strength</b>, streaming hash function that
048       * produces hash codes of length at least {@code minimumBits}. Users without specific
049       * compatibility requirements and who do not persist the hash codes are encouraged to
050       * choose this hash function.
051       *
052       * <p><b>Warning: the implementation is unspecified and is subject to change.</b>
053       *
054       * @throws IllegalArgumentException if {@code minimumBits} is not positive
055       */
056      public static HashFunction goodFastHash(int minimumBits) {
057        int bits = checkPositiveAndMakeMultipleOf32(minimumBits);
058    
059        if (bits == 32) {
060          return murmur3_32(GOOD_FAST_HASH_SEED);
061        } else if (bits <= 128) {
062          return murmur3_128(GOOD_FAST_HASH_SEED);
063        } else {
064          // Join some 128-bit murmur3s
065          int hashFunctionsNeeded = (bits + 127) / 128;
066          HashFunction[] hashFunctions = new HashFunction[hashFunctionsNeeded];
067          int seed = GOOD_FAST_HASH_SEED;
068          for (int i = 0; i < hashFunctionsNeeded; i++) {
069            hashFunctions[i] = murmur3_128(seed);
070            seed += 1500450271; // a prime; shouldn't matter
071          }
072          return new ConcatenatedHashFunction(hashFunctions);
073        }
074      }
075    
076      /**
077       * Returns a hash function implementing the
078       * <a href="http://smhasher.googlecode.com/svn/trunk/MurmurHash3.cpp">32-bit murmur3
079       * algorithm</a> (little-endian variant), using the given seed value.
080       */
081      public static HashFunction murmur3_32(int seed) {
082        return new Murmur3_32HashFunction(seed);
083      }
084    
085      /**
086       * Returns a hash function implementing the
087       * <a href="http://smhasher.googlecode.com/svn/trunk/MurmurHash3.cpp">32-bit murmur3
088       * algorithm</a> (little-endian variant), using a seed value of zero.
089       */
090      public static HashFunction murmur3_32() {
091        return MURMUR3_32;
092      }
093    
094      private static final Murmur3_32HashFunction MURMUR3_32 = new Murmur3_32HashFunction(0);
095    
096      /**
097       * Returns a hash function implementing the
098       * <a href="http://smhasher.googlecode.com/svn/trunk/MurmurHash3.cpp">
099       * 128-bit murmur3 algorithm, x64 variant</a> (little-endian variant), using the given seed
100       * value.
101       */
102      public static HashFunction murmur3_128(int seed) {
103        return new Murmur3_128HashFunction(seed);
104      }
105    
106      /**
107       * Returns a hash function implementing the
108       * <a href="http://smhasher.googlecode.com/svn/trunk/MurmurHash3.cpp">
109       * 128-bit murmur3 algorithm, x64 variant</a>  (little-endian variant), using a seed value
110       * of zero.
111       */
112      public static HashFunction murmur3_128() {
113        return MURMUR3_128;
114      }
115    
116      private static final Murmur3_128HashFunction MURMUR3_128 = new Murmur3_128HashFunction(0);
117    
118      /**
119       * Returns a hash function implementing the MD5 hash algorithm (128 hash bits) by delegating to
120       * the MD5 {@link MessageDigest}.
121       */
122      public static HashFunction md5() {
123        return MD5;
124      }
125    
126      private static final HashFunction MD5 = new MessageDigestHashFunction("MD5");
127    
128      /**
129       * Returns a hash function implementing the SHA-1 algorithm (160 hash bits) by delegating to the
130       * SHA-1 {@link MessageDigest}.
131       */
132      public static HashFunction sha1() {
133        return SHA_1;
134      }
135    
136      private static final HashFunction SHA_1 = new MessageDigestHashFunction("SHA-1");
137    
138      /**
139       * Returns a hash function implementing the SHA-256 algorithm (256 hash bits) by delegating to
140       * the SHA-256 {@link MessageDigest}.
141       */
142      public static HashFunction sha256() {
143        return SHA_256;
144      }
145    
146      private static final HashFunction SHA_256 = new MessageDigestHashFunction("SHA-256");
147    
148      /**
149       * Returns a hash function implementing the SHA-512 algorithm (512 hash bits) by delegating to the
150       * SHA-512 {@link MessageDigest}.
151       */
152      public static HashFunction sha512() {
153        return SHA_512;
154      }
155    
156      private static final HashFunction SHA_512 = new MessageDigestHashFunction("SHA-512");
157    
158      /**
159       * If {@code hashCode} has enough bits, returns {@code hashCode.asLong()}, otherwise
160       * returns a {@code long} value with {@code hashCode.asInt()} as the least-significant
161       * four bytes and {@code 0x00} as each of the most-significant four bytes.
162       */
163      public static long padToLong(HashCode hashCode) {
164        return (hashCode.bits() < 64) ? UnsignedInts.toLong(hashCode.asInt()) : hashCode.asLong();
165      }
166    
167      /**
168       * Assigns to {@code hashCode} a "bucket" in the range {@code [0, buckets)}, in a uniform
169       * manner that minimizes the need for remapping as {@code buckets} grows. That is,
170       * {@code consistentHash(h, n)} equals:
171       *
172       * <ul>
173       * <li>{@code n - 1}, with approximate probability {@code 1/n}
174       * <li>{@code consistentHash(h, n - 1)}, otherwise (probability {@code 1 - 1/n})
175       * </ul>
176       *
177       * <p>See the <a href="http://en.wikipedia.org/wiki/Consistent_hashing">wikipedia
178       * article on consistent hashing</a> for more information.
179       */
180      public static int consistentHash(HashCode hashCode, int buckets) {
181        return consistentHash(padToLong(hashCode), buckets);
182      }
183    
184      /**
185       * Assigns to {@code input} a "bucket" in the range {@code [0, buckets)}, in a uniform
186       * manner that minimizes the need for remapping as {@code buckets} grows. That is,
187       * {@code consistentHash(h, n)} equals:
188       *
189       * <ul>
190       * <li>{@code n - 1}, with approximate probability {@code 1/n}
191       * <li>{@code consistentHash(h, n - 1)}, otherwise (probability {@code 1 - 1/n})
192       * </ul>
193       *
194       * <p>See the <a href="http://en.wikipedia.org/wiki/Consistent_hashing">wikipedia
195       * article on consistent hashing</a> for more information.
196       */
197      public static int consistentHash(long input, int buckets) {
198        checkArgument(buckets > 0, "buckets must be positive: %s", buckets);
199        long h = input;
200        int candidate = 0;
201        int next;
202    
203        // Jump from bucket to bucket until we go out of range
204        while (true) {
205          // See http://en.wikipedia.org/wiki/Linear_congruential_generator
206          // These values for a and m come from the C++ version of this function.
207          h = 2862933555777941757L * h + 1;
208          double inv = 0x1.0p31 / ((int) (h >>> 33) + 1);
209          next = (int) ((candidate + 1) * inv);
210    
211          if (next >= 0 && next < buckets) {
212            candidate = next;
213          } else {
214            return candidate;
215          }
216        }
217      }
218    
219      /**
220       * Returns a hash code, having the same bit length as each of the input hash codes,
221       * that combines the information of these hash codes in an ordered fashion. That
222       * is, whenever two equal hash codes are produced by two calls to this method, it
223       * is <i>as likely as possible</i> that each was computed from the <i>same</i>
224       * input hash codes in the <i>same</i> order.
225       *
226       * @throws IllegalArgumentException if {@code hashCodes} is empty, or the hash codes
227       *     do not all have the same bit length
228       */
229      public static HashCode combineOrdered(Iterable<HashCode> hashCodes) {
230        Iterator<HashCode> iterator = hashCodes.iterator();
231        checkArgument(iterator.hasNext(), "Must be at least 1 hash code to combine.");
232        int bits = iterator.next().bits();
233        byte[] resultBytes = new byte[bits / 8];
234        for (HashCode hashCode : hashCodes) {
235          byte[] nextBytes = hashCode.asBytes();
236          checkArgument(nextBytes.length == resultBytes.length,
237              "All hashcodes must have the same bit length.");
238          for (int i = 0; i < nextBytes.length; i++) {
239            resultBytes[i] = (byte) (resultBytes[i] * 37 ^ nextBytes[i]);
240          }
241        }
242        return HashCodes.fromBytesNoCopy(resultBytes);
243      }
244    
245      /**
246       * Returns a hash code, having the same bit length as each of the input hash codes,
247       * that combines the information of these hash codes in an unordered fashion. That
248       * is, whenever two equal hash codes are produced by two calls to this method, it
249       * is <i>as likely as possible</i> that each was computed from the <i>same</i>
250       * input hash codes in <i>some</i> order.
251       *
252       * @throws IllegalArgumentException if {@code hashCodes} is empty, or the hash codes
253       *     do not all have the same bit length
254       */
255      public static HashCode combineUnordered(Iterable<HashCode> hashCodes) {
256        Iterator<HashCode> iterator = hashCodes.iterator();
257        checkArgument(iterator.hasNext(), "Must be at least 1 hash code to combine.");
258        byte[] resultBytes = new byte[iterator.next().bits() / 8];
259        for (HashCode hashCode : hashCodes) {
260          byte[] nextBytes = hashCode.asBytes();
261          checkArgument(nextBytes.length == resultBytes.length,
262              "All hashcodes must have the same bit length.");
263          for (int i = 0; i < nextBytes.length; i++) {
264            resultBytes[i] += nextBytes[i];
265          }
266        }
267        return HashCodes.fromBytesNoCopy(resultBytes);
268      }
269    
270      /**
271       * Checks that the passed argument is positive, and ceils it to a multiple of 32.
272       */
273      static int checkPositiveAndMakeMultipleOf32(int bits) {
274        checkArgument(bits > 0, "Number of bits must be positive");
275        return (bits + 31) & ~31;
276      }
277    
278      // TODO(kevinb): Maybe expose this class via a static Hashing method?
279      @VisibleForTesting
280      static final class ConcatenatedHashFunction extends AbstractCompositeHashFunction {
281        private final int bits;
282    
283        ConcatenatedHashFunction(HashFunction... functions) {
284          super(functions);
285          int bitSum = 0;
286          for (HashFunction function : functions) {
287            bitSum += function.bits();
288          }
289          this.bits = bitSum;
290        }
291    
292        @Override
293        HashCode makeHash(Hasher[] hashers) {
294          // TODO(user): Get rid of the ByteBuffer here?
295          byte[] bytes = new byte[bits / 8];
296          ByteBuffer buffer = ByteBuffer.wrap(bytes);
297          for (Hasher hasher : hashers) {
298            buffer.put(hasher.hash().asBytes());
299          }
300          return HashCodes.fromBytesNoCopy(bytes);
301        }
302    
303        @Override
304        public int bits() {
305          return bits;
306        }
307      }
308    }