001    /*
002     * Copyright (C) 2011 The Guava Authors
003     *
004     * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
005     * in compliance with the License. You may obtain a copy of the License at
006     *
007     * http://www.apache.org/licenses/LICENSE-2.0
008     *
009     * Unless required by applicable law or agreed to in writing, software distributed under the License
010     * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
011     * or implied. See the License for the specific language governing permissions and limitations under
012     * the License.
013     */
014    
015    package com.google.common.hash;
016    
017    import static com.google.common.base.Preconditions.checkArgument;
018    
019    import com.google.common.annotations.Beta;
020    import com.google.common.primitives.UnsignedInts;
021    
022    import java.nio.ByteBuffer;
023    import java.security.MessageDigest;
024    import java.util.Iterator;
025    
026    /**
027     * Static methods to obtain {@link HashFunction} instances, and other static
028     * hashing-related utilities.
029     *
030     * @author Kevin Bourrillion
031     * @author Dimitris Andreou
032     * @author Kurt Alfred Kluever
033     * @since 11.0
034     */
035    @Beta
036    public final class Hashing {
037      private Hashing() {}
038    
039      /**
040       * Returns a general-purpose, <b>non-cryptographic-strength</b>, streaming hash function that
041       * produces hash codes of length at least {@code minimumBits}. Users without specific
042       * compatibility requirements and who do not persist the hash codes are encouraged to
043       * choose this hash function.
044       *
045       * <p><b>Warning: the implementation is unspecified and is subject to change.</b>
046       *
047       * @throws IllegalArgumentException if {@code minimumBits} is not positive
048       */
049      public static HashFunction goodFastHash(int minimumBits) {
050        int bits = checkPositiveAndMakeMultipleOf32(minimumBits);
051    
052        if (bits == 32) {
053          return murmur3_32();
054        } else if (bits <= 128) {
055          return murmur3_128();
056        } else {
057          // Join some 128-bit murmur3s
058          int hashFunctionsNeeded = (bits + 127) / 128;
059          HashFunction[] hashFunctions = new HashFunction[hashFunctionsNeeded];
060          for (int i = 0; i < hashFunctionsNeeded; i++) {
061            hashFunctions[i] = murmur3_128(i * 1500450271 /* a prime; shouldn't matter */);
062          }
063          return new ConcatenatedHashFunction(hashFunctions);
064        }
065      }
066    
067      /**
068       * Returns a hash function implementing the
069       * <a href="http://smhasher.googlecode.com/svn/trunk/MurmurHash3.cpp">32-bit murmur3
070       * algorithm</a> (little-endian variant), using the given seed value.
071       */
072      public static HashFunction murmur3_32(int seed) {
073        return new Murmur3_32HashFunction(seed);
074      }
075    
076      /**
077       * Returns a hash function implementing the
078       * <a href="http://smhasher.googlecode.com/svn/trunk/MurmurHash3.cpp">32-bit murmur3
079       * algorithm</a> (little-endian variant), using a seed value of zero.
080       */
081      public static HashFunction murmur3_32() {
082        return MURMUR3_32;
083      }
084    
085      private static final Murmur3_32HashFunction MURMUR3_32 = new Murmur3_32HashFunction(0);
086    
087      /**
088       * Returns a hash function implementing the
089       * <a href="http://smhasher.googlecode.com/svn/trunk/MurmurHash3.cpp">
090       * 128-bit murmur3 algorithm, x64 variant</a> (little-endian variant), using the given seed
091       * value.
092       */
093      public static HashFunction murmur3_128(int seed) {
094        return new Murmur3_128HashFunction(seed);
095      }
096    
097      /**
098       * Returns a hash function implementing the
099       * <a href="http://smhasher.googlecode.com/svn/trunk/MurmurHash3.cpp">
100       * 128-bit murmur3 algorithm, x64 variant</a>  (little-endian variant), using a seed value
101       * of zero.
102       */
103      public static HashFunction murmur3_128() {
104        return MURMUR3_128;
105      }
106    
107      private static final Murmur3_128HashFunction MURMUR3_128 = new Murmur3_128HashFunction(0);
108    
109      /**
110       * Returns a hash function implementing the MD5 hash algorithm by delegating to the MD5
111       * {@link MessageDigest}.
112       */
113      public static HashFunction md5() {
114        return MD5;
115      }
116    
117      private static final HashFunction MD5 = new MessageDigestHashFunction("MD5");
118    
119      /**
120       * Returns a hash function implementing the SHA-1 algorithm by delegating to the SHA-1
121       * {@link MessageDigest}.
122       */
123      public static HashFunction sha1() {
124        return SHA_1;
125      }
126    
127      private static final HashFunction SHA_1 = new MessageDigestHashFunction("SHA-1");
128    
129      /**
130       * Returns a hash function implementing the SHA-256 algorithm by delegating to the SHA-256
131       * {@link MessageDigest}.
132       */
133      public static HashFunction sha256() {
134        return SHA_256;
135      }
136    
137      private static final HashFunction SHA_256 = new MessageDigestHashFunction("SHA-256");
138    
139      /**
140       * Returns a hash function implementing the SHA-512 algorithm by delegating to the SHA-512
141       * {@link MessageDigest}.
142       */
143      public static HashFunction sha512() {
144        return SHA_512;
145      }
146    
147      private static final HashFunction SHA_512 = new MessageDigestHashFunction("SHA-512");
148    
149      /**
150       * If {@code hashCode} has enough bits, returns {@code hashCode.asLong()}, otherwise
151       * returns a {@code long} value with {@code hashCode.asInt()} as the least-significant
152       * four bytes and {@code 0x00} as each of the most-significant four bytes.
153       */
154      public static long padToLong(HashCode hashCode) {
155        return (hashCode.bits() < 64) ? UnsignedInts.toLong(hashCode.asInt()) : hashCode.asLong();
156      }
157    
158      /**
159       * Assigns to {@code hashCode} a "bucket" in the range {@code [0, buckets)}, in a uniform
160       * manner that minimizes the need for remapping as {@code buckets} grows. That is,
161       * {@code consistentHash(h, n)} equals:
162       *
163       * <ul>
164       * <li>{@code n - 1}, with approximate probability {@code 1/n}
165       * <li>{@code consistentHash(h, n - 1)}, otherwise (probability {@code 1 - 1/n})
166       * </ul>
167       *
168       * <p>See the <a href="http://en.wikipedia.org/wiki/Consistent_hashing">wikipedia
169       * article on consistent hashing</a> for more information.
170       */
171      public static int consistentHash(HashCode hashCode, int buckets) {
172        return consistentHash(padToLong(hashCode), buckets);
173      }
174    
175      /**
176       * Assigns to {@code input} a "bucket" in the range {@code [0, buckets)}, in a uniform
177       * manner that minimizes the need for remapping as {@code buckets} grows. That is,
178       * {@code consistentHash(h, n)} equals:
179       *
180       * <ul>
181       * <li>{@code n - 1}, with approximate probability {@code 1/n}
182       * <li>{@code consistentHash(h, n - 1)}, otherwise (probability {@code 1 - 1/n})
183       * </ul>
184       *
185       * <p>See the <a href="http://en.wikipedia.org/wiki/Consistent_hashing">wikipedia
186       * article on consistent hashing</a> for more information.
187       */
188      public static int consistentHash(long input, int buckets) {
189        checkArgument(buckets > 0, "buckets must be positive: %s", buckets);
190        long h = input;
191        int candidate = 0;
192        int next;
193    
194        // Jump from bucket to bucket until we go out of range
195        while (true) {
196          // See http://en.wikipedia.org/wiki/Linear_congruential_generator
197          // These values for a and m come from the C++ version of this function.
198          h = 2862933555777941757L * h + 1;
199          double inv = 0x1.0p31 / ((int) (h >>> 33) + 1);
200          next = (int) ((candidate + 1) * inv);
201    
202          if (next >= 0 && next < buckets) {
203            candidate = next;
204          } else {
205            return candidate;
206          }
207        }
208      }
209    
210      /**
211       * Returns a hash code, having the same bit length as each of the input hash codes,
212       * that combines the information of these hash codes in an ordered fashion. That
213       * is, whenever two equal hash codes are produced by two calls to this method, it
214       * is <i>as likely as possible</i> that each was computed from the <i>same</i>
215       * input hash codes in the <i>same</i> order.
216       *
217       * @throws IllegalArgumentException if {@code hashCodes} is empty, or the hash codes
218       *     do not all have the same bit length
219       */
220      public static HashCode combineOrdered(Iterable<HashCode> hashCodes) {
221        Iterator<HashCode> iterator = hashCodes.iterator();
222        checkArgument(iterator.hasNext(), "Must be at least 1 hash code to combine.");
223        int bits = iterator.next().bits();
224        byte[] resultBytes = new byte[bits / 8];
225        for (HashCode hashCode : hashCodes) {
226          byte[] nextBytes = hashCode.asBytes();
227          checkArgument(nextBytes.length == resultBytes.length,
228              "All hashcodes must have the same bit length.");
229          for (int i = 0; i < nextBytes.length; i++) {
230            resultBytes[i] = (byte) (resultBytes[i] * 37 ^ nextBytes[i]);
231          }
232        }
233        return HashCodes.fromBytes(resultBytes);
234      }
235    
236      /**
237       * Returns a hash code, having the same bit length as each of the input hash codes,
238       * that combines the information of these hash codes in an unordered fashion. That
239       * is, whenever two equal hash codes are produced by two calls to this method, it
240       * is <i>as likely as possible</i> that each was computed from the <i>same</i>
241       * input hash codes in <i>some</i> order.
242       *
243       * @throws IllegalArgumentException if {@code hashCodes} is empty, or the hash codes
244       *     do not all have the same bit length
245       */
246      public static HashCode combineUnordered(Iterable<HashCode> hashCodes) {
247        Iterator<HashCode> iterator = hashCodes.iterator();
248        checkArgument(iterator.hasNext(), "Must be at least 1 hash code to combine.");
249        byte[] resultBytes = new byte[iterator.next().bits() / 8];
250        for (HashCode hashCode : hashCodes) {
251          byte[] nextBytes = hashCode.asBytes();
252          checkArgument(nextBytes.length == resultBytes.length,
253              "All hashcodes must have the same bit length.");
254          for (int i = 0; i < nextBytes.length; i++) {
255            resultBytes[i] += nextBytes[i];
256          }
257        }
258        return HashCodes.fromBytes(resultBytes);
259      }
260    
261      /**
262       * Checks that the passed argument is positive, and ceils it to a multiple of 32.
263       */
264      static int checkPositiveAndMakeMultipleOf32(int bits) {
265        checkArgument(bits > 0, "Number of bits must be positive");
266        return (bits + 31) & ~31;
267      }
268    
269      // TODO(kevinb): probably expose this via a Hashing method at some point?
270      private static class ConcatenatedHashFunction extends AbstractCompositeHashFunction {
271        final int bits;
272    
273        ConcatenatedHashFunction(HashFunction[] functions) {
274          super(functions);
275          int bitSum = 0;
276          for (HashFunction f : this.functions) {
277            bitSum += f.bits();
278          }
279          this.bits = bitSum;
280        }
281    
282        @Override
283        HashCode makeHash(Hasher[] hashers) {
284          byte[] bytes = new byte[bits / 8];
285          ByteBuffer buffer = ByteBuffer.wrap(bytes);
286          for (Hasher hasher : hashers) {
287            buffer.put(hasher.hash().asBytes());
288          }
289          return HashCodes.fromBytes(bytes);
290        }
291    
292        @Override
293        public int bits() {
294          return bits;
295        }
296      }
297    }