001/* 002 * Copyright (C) 2011 The Guava Authors 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 005 * in compliance with the License. You may obtain a copy of the License at 006 * 007 * http://www.apache.org/licenses/LICENSE-2.0 008 * 009 * Unless required by applicable law or agreed to in writing, software distributed under the License 010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 011 * or implied. See the License for the specific language governing permissions and limitations under 012 * the License. 013 */ 014 015package com.google.common.hash; 016 017import static com.google.common.base.Preconditions.checkArgument; 018 019import com.google.common.annotations.Beta; 020import com.google.common.annotations.VisibleForTesting; 021import com.google.common.base.Supplier; 022 023import java.nio.ByteBuffer; 024import java.security.MessageDigest; 025import java.util.Iterator; 026import java.util.zip.Adler32; 027import java.util.zip.CRC32; 028import java.util.zip.Checksum; 029 030/** 031 * Static methods to obtain {@link HashFunction} instances, and other static hashing-related 032 * utilities. 033 * 034 * @author Kevin Bourrillion 035 * @author Dimitris Andreou 036 * @author Kurt Alfred Kluever 037 * @since 11.0 038 */ 039@Beta 040public final class Hashing { 041 private Hashing() {} 042 043 /** 044 * Used to randomize {@link #goodFastHash} instances, so that programs which persist anything 045 * dependent on hashcodes of those, will fail sooner than later. 046 */ 047 private static final int GOOD_FAST_HASH_SEED = (int) System.currentTimeMillis(); 048 049 // Used by goodFastHash when minimumBits == 32. 050 private static final HashFunction GOOD_FAST_HASH_FUNCTION_32 = murmur3_32(GOOD_FAST_HASH_SEED); 051 052 // Used by goodFastHash when 32 < minimumBits <= 128. 053 private static final HashFunction GOOD_FAST_HASH_FUNCTION_128 = murmur3_128(GOOD_FAST_HASH_SEED); 054 055 /** 056 * Returns a general-purpose, <b>non-cryptographic-strength</b>, streaming hash function that 057 * produces hash codes of length at least {@code minimumBits}. Users without specific 058 * compatibility requirements and who do not persist the hash codes are encouraged to 059 * choose this hash function. 060 * 061 * <p>Repeated calls to {@link #goodFastHash} with the same {@code minimumBits} value will 062 * return {@link HashFunction} instances with identical behavior (but not necessarily the 063 * same instance) for the duration of the current virtual machine. 064 * 065 * <p><b>Warning: the implementation is unspecified and is subject to change.</b> 066 * 067 * @throws IllegalArgumentException if {@code minimumBits} is not positive 068 */ 069 public static HashFunction goodFastHash(int minimumBits) { 070 int bits = checkPositiveAndMakeMultipleOf32(minimumBits); 071 072 if (bits == 32) { 073 return GOOD_FAST_HASH_FUNCTION_32; 074 } 075 if (bits <= 128) { 076 return GOOD_FAST_HASH_FUNCTION_128; 077 } 078 079 // Otherwise, join together some 128-bit murmur3s 080 int hashFunctionsNeeded = (bits + 127) / 128; 081 HashFunction[] hashFunctions = new HashFunction[hashFunctionsNeeded]; 082 hashFunctions[0] = GOOD_FAST_HASH_FUNCTION_128; 083 int seed = GOOD_FAST_HASH_SEED; 084 for (int i = 1; i < hashFunctionsNeeded; i++) { 085 seed += 1500450271; // a prime; shouldn't matter 086 hashFunctions[i] = murmur3_128(seed); 087 } 088 return new ConcatenatedHashFunction(hashFunctions); 089 } 090 091 /** 092 * Returns a hash function implementing the 093 * <a href="http://smhasher.googlecode.com/svn/trunk/MurmurHash3.cpp"> 094 * 32-bit murmur3 algorithm, x86 variant</a> (little-endian variant), 095 * using the given seed value. 096 * 097 * <p>The exact C++ equivalent is the MurmurHash3_x86_32 function (Murmur3A). 098 */ 099 public static HashFunction murmur3_32(int seed) { 100 return new Murmur3_32HashFunction(seed); 101 } 102 103 /** 104 * Returns a hash function implementing the 105 * <a href="http://smhasher.googlecode.com/svn/trunk/MurmurHash3.cpp"> 106 * 32-bit murmur3 algorithm, x86 variant</a> (little-endian variant), 107 * using a seed value of zero. 108 * 109 * <p>The exact C++ equivalent is the MurmurHash3_x86_32 function (Murmur3A). 110 */ 111 public static HashFunction murmur3_32() { 112 return MURMUR3_32; 113 } 114 115 private static final HashFunction MURMUR3_32 = new Murmur3_32HashFunction(0); 116 117 /** 118 * Returns a hash function implementing the 119 * <a href="http://smhasher.googlecode.com/svn/trunk/MurmurHash3.cpp"> 120 * 128-bit murmur3 algorithm, x64 variant</a> (little-endian variant), 121 * using the given seed value. 122 * 123 * <p>The exact C++ equivalent is the MurmurHash3_x64_128 function (Murmur3F). 124 */ 125 public static HashFunction murmur3_128(int seed) { 126 return new Murmur3_128HashFunction(seed); 127 } 128 129 /** 130 * Returns a hash function implementing the 131 * <a href="http://smhasher.googlecode.com/svn/trunk/MurmurHash3.cpp"> 132 * 128-bit murmur3 algorithm, x64 variant</a> (little-endian variant), 133 * using a seed value of zero. 134 * 135 * <p>The exact C++ equivalent is the MurmurHash3_x64_128 function (Murmur3F). 136 */ 137 public static HashFunction murmur3_128() { 138 return MURMUR3_128; 139 } 140 141 private static final HashFunction MURMUR3_128 = new Murmur3_128HashFunction(0); 142 143 /** 144 * Returns a hash function implementing the MD5 hash algorithm (128 hash bits) by delegating to 145 * the MD5 {@link MessageDigest}. 146 */ 147 public static HashFunction md5() { 148 return MD5; 149 } 150 151 private static final HashFunction MD5 = new MessageDigestHashFunction("MD5", "Hashing.md5()"); 152 153 /** 154 * Returns a hash function implementing the SHA-1 algorithm (160 hash bits) by delegating to the 155 * SHA-1 {@link MessageDigest}. 156 */ 157 public static HashFunction sha1() { 158 return SHA_1; 159 } 160 161 private static final HashFunction SHA_1 = 162 new MessageDigestHashFunction("SHA-1", "Hashing.sha1()"); 163 164 /** 165 * Returns a hash function implementing the SHA-256 algorithm (256 hash bits) by delegating to 166 * the SHA-256 {@link MessageDigest}. 167 */ 168 public static HashFunction sha256() { 169 return SHA_256; 170 } 171 172 private static final HashFunction SHA_256 = 173 new MessageDigestHashFunction("SHA-256", "Hashing.sha256()"); 174 175 /** 176 * Returns a hash function implementing the SHA-512 algorithm (512 hash bits) by delegating to the 177 * SHA-512 {@link MessageDigest}. 178 */ 179 public static HashFunction sha512() { 180 return SHA_512; 181 } 182 183 private static final HashFunction SHA_512 = 184 new MessageDigestHashFunction("SHA-512", "Hashing.sha512()"); 185 186 /** 187 * Returns a hash function implementing the CRC-32 checksum algorithm (32 hash bits) by delegating 188 * to the {@link CRC32} {@link Checksum}. 189 * 190 * <p>To get the {@code long} value equivalent to {@link Checksum#getValue()} for a 191 * {@code HashCode} produced by this function, use {@link HashCode#padToLong()}. 192 * 193 * @since 14.0 194 */ 195 public static HashFunction crc32() { 196 return CRC_32; 197 } 198 199 private static final HashFunction CRC_32 = 200 checksumHashFunction(ChecksumType.CRC_32, "Hashing.crc32()"); 201 202 /** 203 * Returns a hash function implementing the Adler-32 checksum algorithm (32 hash bits) by 204 * delegating to the {@link Adler32} {@link Checksum}. 205 * 206 * <p>To get the {@code long} value equivalent to {@link Checksum#getValue()} for a 207 * {@code HashCode} produced by this function, use {@link HashCode#padToLong()}. 208 * 209 * @since 14.0 210 */ 211 public static HashFunction adler32() { 212 return ADLER_32; 213 } 214 215 private static final HashFunction ADLER_32 = 216 checksumHashFunction(ChecksumType.ADLER_32, "Hashing.adler32()"); 217 218 private static HashFunction checksumHashFunction(ChecksumType type, String toString) { 219 return new ChecksumHashFunction(type, type.bits, toString); 220 } 221 222 enum ChecksumType implements Supplier<Checksum> { 223 CRC_32(32) { 224 @Override 225 public Checksum get() { 226 return new CRC32(); 227 } 228 }, 229 ADLER_32(32) { 230 @Override 231 public Checksum get() { 232 return new Adler32(); 233 } 234 }; 235 236 private final int bits; 237 238 ChecksumType(int bits) { 239 this.bits = bits; 240 } 241 242 @Override 243 public abstract Checksum get(); 244 } 245 246 // Lazy initialization holder class idiom. 247 248 /** 249 * If {@code hashCode} has enough bits, returns {@code hashCode.asLong()}, otherwise 250 * returns a {@code long} value with {@code hashCode.asInt()} as the least-significant 251 * four bytes and {@code 0x00} as each of the most-significant four bytes. 252 * 253 * @deprecated Use {@code HashCode.padToLong()} instead. This method is scheduled to be 254 * removed in Guava 15.0. 255 */ 256 @Deprecated 257 public static long padToLong(HashCode hashCode) { 258 return hashCode.padToLong(); 259 } 260 261 /** 262 * Assigns to {@code hashCode} a "bucket" in the range {@code [0, buckets)}, in a uniform 263 * manner that minimizes the need for remapping as {@code buckets} grows. That is, 264 * {@code consistentHash(h, n)} equals: 265 * 266 * <ul> 267 * <li>{@code n - 1}, with approximate probability {@code 1/n} 268 * <li>{@code consistentHash(h, n - 1)}, otherwise (probability {@code 1 - 1/n}) 269 * </ul> 270 * 271 * <p>See the <a href="http://en.wikipedia.org/wiki/Consistent_hashing">wikipedia 272 * article on consistent hashing</a> for more information. 273 * <p> 274 * If you might want to have weights for the buckets in the future, take a look at 275 * {@code weightedConsistentHash}. 276 */ 277 public static int consistentHash(HashCode hashCode, int buckets) { 278 return consistentHash(hashCode.padToLong(), buckets); 279 } 280 281 /** 282 * Assigns to {@code input} a "bucket" in the range {@code [0, buckets)}, in a uniform 283 * manner that minimizes the need for remapping as {@code buckets} grows. That is, 284 * {@code consistentHash(h, n)} equals: 285 * 286 * <ul> 287 * <li>{@code n - 1}, with approximate probability {@code 1/n} 288 * <li>{@code consistentHash(h, n - 1)}, otherwise (probability {@code 1 - 1/n}) 289 * </ul> 290 * 291 * <p>See the <a href="http://en.wikipedia.org/wiki/Consistent_hashing">wikipedia 292 * article on consistent hashing</a> for more information. 293 * <p> 294 * If you might want to have weights for the buckets in the future, take a look at 295 * {@code weightedConsistentHash}. 296 */ 297 public static int consistentHash(long input, int buckets) { 298 checkArgument(buckets > 0, "buckets must be positive: %s", buckets); 299 LinearCongruentialGenerator generator = new LinearCongruentialGenerator(input); 300 int candidate = 0; 301 int next; 302 303 // Jump from bucket to bucket until we go out of range 304 while (true) { 305 next = (int) ((candidate + 1) / generator.nextDouble()); 306 if (next >= 0 && next < buckets) { 307 candidate = next; 308 } else { 309 return candidate; 310 } 311 } 312 } 313 314 /** 315 * Returns a hash code, having the same bit length as each of the input hash codes, 316 * that combines the information of these hash codes in an ordered fashion. That 317 * is, whenever two equal hash codes are produced by two calls to this method, it 318 * is <i>as likely as possible</i> that each was computed from the <i>same</i> 319 * input hash codes in the <i>same</i> order. 320 * 321 * @throws IllegalArgumentException if {@code hashCodes} is empty, or the hash codes 322 * do not all have the same bit length 323 */ 324 public static HashCode combineOrdered(Iterable<HashCode> hashCodes) { 325 Iterator<HashCode> iterator = hashCodes.iterator(); 326 checkArgument(iterator.hasNext(), "Must be at least 1 hash code to combine."); 327 int bits = iterator.next().bits(); 328 byte[] resultBytes = new byte[bits / 8]; 329 for (HashCode hashCode : hashCodes) { 330 byte[] nextBytes = hashCode.asBytes(); 331 checkArgument(nextBytes.length == resultBytes.length, 332 "All hashcodes must have the same bit length."); 333 for (int i = 0; i < nextBytes.length; i++) { 334 resultBytes[i] = (byte) (resultBytes[i] * 37 ^ nextBytes[i]); 335 } 336 } 337 return HashCodes.fromBytesNoCopy(resultBytes); 338 } 339 340 /** 341 * Returns a hash code, having the same bit length as each of the input hash codes, 342 * that combines the information of these hash codes in an unordered fashion. That 343 * is, whenever two equal hash codes are produced by two calls to this method, it 344 * is <i>as likely as possible</i> that each was computed from the <i>same</i> 345 * input hash codes in <i>some</i> order. 346 * 347 * @throws IllegalArgumentException if {@code hashCodes} is empty, or the hash codes 348 * do not all have the same bit length 349 */ 350 public static HashCode combineUnordered(Iterable<HashCode> hashCodes) { 351 Iterator<HashCode> iterator = hashCodes.iterator(); 352 checkArgument(iterator.hasNext(), "Must be at least 1 hash code to combine."); 353 byte[] resultBytes = new byte[iterator.next().bits() / 8]; 354 for (HashCode hashCode : hashCodes) { 355 byte[] nextBytes = hashCode.asBytes(); 356 checkArgument(nextBytes.length == resultBytes.length, 357 "All hashcodes must have the same bit length."); 358 for (int i = 0; i < nextBytes.length; i++) { 359 resultBytes[i] += nextBytes[i]; 360 } 361 } 362 return HashCodes.fromBytesNoCopy(resultBytes); 363 } 364 365 /** 366 * Checks that the passed argument is positive, and ceils it to a multiple of 32. 367 */ 368 static int checkPositiveAndMakeMultipleOf32(int bits) { 369 checkArgument(bits > 0, "Number of bits must be positive"); 370 return (bits + 31) & ~31; 371 } 372 373 // TODO(kevinb): Maybe expose this class via a static Hashing method? 374 @VisibleForTesting 375 static final class ConcatenatedHashFunction extends AbstractCompositeHashFunction { 376 private final int bits; 377 378 ConcatenatedHashFunction(HashFunction... functions) { 379 super(functions); 380 int bitSum = 0; 381 for (HashFunction function : functions) { 382 bitSum += function.bits(); 383 } 384 this.bits = bitSum; 385 } 386 387 @Override 388 HashCode makeHash(Hasher[] hashers) { 389 // TODO(user): Get rid of the ByteBuffer here? 390 byte[] bytes = new byte[bits / 8]; 391 ByteBuffer buffer = ByteBuffer.wrap(bytes); 392 for (Hasher hasher : hashers) { 393 buffer.put(hasher.hash().asBytes()); 394 } 395 return HashCodes.fromBytesNoCopy(bytes); 396 } 397 398 @Override 399 public int bits() { 400 return bits; 401 } 402 } 403 404 /** 405 * Linear CongruentialGenerator to use for consistent hashing. 406 * See http://en.wikipedia.org/wiki/Linear_congruential_generator 407 */ 408 private static final class LinearCongruentialGenerator { 409 private long state; 410 411 public LinearCongruentialGenerator(long seed) { 412 this.state = seed; 413 } 414 415 public double nextDouble() { 416 state = 2862933555777941757L * state + 1; 417 return ((double) ((int) (state >>> 33) + 1)) / (0x1.0p31); 418 } 419 } 420}