001/* 002 * Copyright (C) 2011 The Guava Authors 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 005 * in compliance with the License. You may obtain a copy of the License at 006 * 007 * http://www.apache.org/licenses/LICENSE-2.0 008 * 009 * Unless required by applicable law or agreed to in writing, software distributed under the License 010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 011 * or implied. See the License for the specific language governing permissions and limitations under 012 * the License. 013 */ 014 015package com.google.common.hash; 016 017import static com.google.common.base.Preconditions.checkArgument; 018 019import com.google.common.annotations.Beta; 020import com.google.common.annotations.VisibleForTesting; 021import com.google.common.base.Supplier; 022 023import java.security.MessageDigest; 024import java.util.Iterator; 025import java.util.zip.Adler32; 026import java.util.zip.CRC32; 027import java.util.zip.Checksum; 028 029import javax.annotation.Nullable; 030 031/** 032 * Static methods to obtain {@link HashFunction} instances, and other static hashing-related 033 * utilities. 034 * 035 * <p>A comparison of the various hash functions can be found 036 * <a href="http://goo.gl/jS7HH">here</a>. 037 * 038 * @author Kevin Bourrillion 039 * @author Dimitris Andreou 040 * @author Kurt Alfred Kluever 041 * @since 11.0 042 */ 043@Beta 044public final class Hashing { 045 /** 046 * Returns a general-purpose, <b>temporary-use</b>, non-cryptographic hash function. The algorithm 047 * the returned function implements is unspecified and subject to change without notice. 048 * 049 * <p><b>Warning:</b> a new random seed for these functions is chosen each time the {@code 050 * Hashing} class is loaded. <b>Do not use this method</b> if hash codes may escape the current 051 * process in any way, for example being sent over RPC, or saved to disk. 052 * 053 * <p>Repeated calls to this method on the same loaded {@code Hashing} class, using the same value 054 * for {@code minimumBits}, will return identically-behaving {@link HashFunction} instances. 055 * 056 * @param minimumBits a positive integer (can be arbitrarily large) 057 * @return a hash function, described above, that produces hash codes of length {@code 058 * minimumBits} or greater 059 */ 060 public static HashFunction goodFastHash(int minimumBits) { 061 int bits = checkPositiveAndMakeMultipleOf32(minimumBits); 062 063 if (bits == 32) { 064 return Murmur3_32Holder.GOOD_FAST_HASH_FUNCTION_32; 065 } 066 if (bits <= 128) { 067 return Murmur3_128Holder.GOOD_FAST_HASH_FUNCTION_128; 068 } 069 070 // Otherwise, join together some 128-bit murmur3s 071 int hashFunctionsNeeded = (bits + 127) / 128; 072 HashFunction[] hashFunctions = new HashFunction[hashFunctionsNeeded]; 073 hashFunctions[0] = Murmur3_128Holder.GOOD_FAST_HASH_FUNCTION_128; 074 int seed = GOOD_FAST_HASH_SEED; 075 for (int i = 1; i < hashFunctionsNeeded; i++) { 076 seed += 1500450271; // a prime; shouldn't matter 077 hashFunctions[i] = murmur3_128(seed); 078 } 079 return new ConcatenatedHashFunction(hashFunctions); 080 } 081 082 /** 083 * Used to randomize {@link #goodFastHash} instances, so that programs which persist anything 084 * dependent on the hash codes they produce will fail sooner. 085 */ 086 private static final int GOOD_FAST_HASH_SEED = (int) System.currentTimeMillis(); 087 088 /** 089 * Returns a hash function implementing the 090 * <a href="http://smhasher.googlecode.com/svn/trunk/MurmurHash3.cpp"> 091 * 32-bit murmur3 algorithm, x86 variant</a> (little-endian variant), 092 * using the given seed value. 093 * 094 * <p>The exact C++ equivalent is the MurmurHash3_x86_32 function (Murmur3A). 095 */ 096 public static HashFunction murmur3_32(int seed) { 097 return new Murmur3_32HashFunction(seed); 098 } 099 100 /** 101 * Returns a hash function implementing the 102 * <a href="http://smhasher.googlecode.com/svn/trunk/MurmurHash3.cpp"> 103 * 32-bit murmur3 algorithm, x86 variant</a> (little-endian variant), 104 * using a seed value of zero. 105 * 106 * <p>The exact C++ equivalent is the MurmurHash3_x86_32 function (Murmur3A). 107 */ 108 public static HashFunction murmur3_32() { 109 return Murmur3_32Holder.MURMUR3_32; 110 } 111 112 private static class Murmur3_32Holder { 113 static final HashFunction MURMUR3_32 = new Murmur3_32HashFunction(0); 114 115 /** Returned by {@link #goodFastHash} when {@code minimumBits <= 32}. */ 116 static final HashFunction GOOD_FAST_HASH_FUNCTION_32 = murmur3_32(GOOD_FAST_HASH_SEED); 117 } 118 119 /** 120 * Returns a hash function implementing the 121 * <a href="http://smhasher.googlecode.com/svn/trunk/MurmurHash3.cpp"> 122 * 128-bit murmur3 algorithm, x64 variant</a> (little-endian variant), 123 * using the given seed value. 124 * 125 * <p>The exact C++ equivalent is the MurmurHash3_x64_128 function (Murmur3F). 126 */ 127 public static HashFunction murmur3_128(int seed) { 128 return new Murmur3_128HashFunction(seed); 129 } 130 131 /** 132 * Returns a hash function implementing the 133 * <a href="http://smhasher.googlecode.com/svn/trunk/MurmurHash3.cpp"> 134 * 128-bit murmur3 algorithm, x64 variant</a> (little-endian variant), 135 * using a seed value of zero. 136 * 137 * <p>The exact C++ equivalent is the MurmurHash3_x64_128 function (Murmur3F). 138 */ 139 public static HashFunction murmur3_128() { 140 return Murmur3_128Holder.MURMUR3_128; 141 } 142 143 private static class Murmur3_128Holder { 144 static final HashFunction MURMUR3_128 = new Murmur3_128HashFunction(0); 145 146 /** Returned by {@link #goodFastHash} when {@code 32 < minimumBits <= 128}. */ 147 static final HashFunction GOOD_FAST_HASH_FUNCTION_128 = murmur3_128(GOOD_FAST_HASH_SEED); 148 } 149 150 /** 151 * Returns a hash function implementing the 152 * <a href="https://131002.net/siphash/">64-bit SipHash-2-4 algorithm</a> 153 * using a seed value of {@code k = 00 01 02 ...}. 154 * 155 * @since 15.0 156 */ 157 public static HashFunction sipHash24() { 158 return SipHash24Holder.SIP_HASH_24; 159 } 160 161 private static class SipHash24Holder { 162 static final HashFunction SIP_HASH_24 = 163 new SipHashFunction(2, 4, 0x0706050403020100L, 0x0f0e0d0c0b0a0908L); 164 } 165 166 /** 167 * Returns a hash function implementing the 168 * <a href="https://131002.net/siphash/">64-bit SipHash-2-4 algorithm</a> 169 * using the given seed. 170 * 171 * @since 15.0 172 */ 173 public static HashFunction sipHash24(long k0, long k1) { 174 return new SipHashFunction(2, 4, k0, k1); 175 } 176 177 /** 178 * Returns a hash function implementing the MD5 hash algorithm (128 hash bits) by delegating to 179 * the MD5 {@link MessageDigest}. 180 */ 181 public static HashFunction md5() { 182 return Md5Holder.MD5; 183 } 184 185 private static class Md5Holder { 186 static final HashFunction MD5 = new MessageDigestHashFunction("MD5", "Hashing.md5()"); 187 } 188 189 /** 190 * Returns a hash function implementing the SHA-1 algorithm (160 hash bits) by delegating to the 191 * SHA-1 {@link MessageDigest}. 192 */ 193 public static HashFunction sha1() { 194 return Sha1Holder.SHA_1; 195 } 196 197 private static class Sha1Holder { 198 static final HashFunction SHA_1 = 199 new MessageDigestHashFunction("SHA-1", "Hashing.sha1()"); 200 } 201 202 /** 203 * Returns a hash function implementing the SHA-256 algorithm (256 hash bits) by delegating to 204 * the SHA-256 {@link MessageDigest}. 205 */ 206 public static HashFunction sha256() { 207 return Sha256Holder.SHA_256; 208 } 209 210 private static class Sha256Holder { 211 static final HashFunction SHA_256 = 212 new MessageDigestHashFunction("SHA-256", "Hashing.sha256()"); 213 } 214 215 /** 216 * Returns a hash function implementing the SHA-512 algorithm (512 hash bits) by delegating to the 217 * SHA-512 {@link MessageDigest}. 218 */ 219 public static HashFunction sha512() { 220 return Sha512Holder.SHA_512; 221 } 222 223 private static class Sha512Holder { 224 static final HashFunction SHA_512 = 225 new MessageDigestHashFunction("SHA-512", "Hashing.sha512()"); 226 } 227 228 /** 229 * Returns a hash function implementing the CRC32C checksum algorithm (32 hash bits) as described 230 * by RFC 3720, Section 12.1. 231 * 232 * @since 18.0 233 */ 234 public static HashFunction crc32c() { 235 return Crc32cHolder.CRC_32_C; 236 } 237 238 private static final class Crc32cHolder { 239 static final HashFunction CRC_32_C = new Crc32cHashFunction(); 240 } 241 242 /** 243 * Returns a hash function implementing the CRC-32 checksum algorithm (32 hash bits) by delegating 244 * to the {@link CRC32} {@link Checksum}. 245 * 246 * <p>To get the {@code long} value equivalent to {@link Checksum#getValue()} for a 247 * {@code HashCode} produced by this function, use {@link HashCode#padToLong()}. 248 * 249 * @since 14.0 250 */ 251 public static HashFunction crc32() { 252 return Crc32Holder.CRC_32; 253 } 254 255 private static class Crc32Holder { 256 static final HashFunction CRC_32 = 257 checksumHashFunction(ChecksumType.CRC_32, "Hashing.crc32()"); 258 } 259 260 /** 261 * Returns a hash function implementing the Adler-32 checksum algorithm (32 hash bits) by 262 * delegating to the {@link Adler32} {@link Checksum}. 263 * 264 * <p>To get the {@code long} value equivalent to {@link Checksum#getValue()} for a 265 * {@code HashCode} produced by this function, use {@link HashCode#padToLong()}. 266 * 267 * @since 14.0 268 */ 269 public static HashFunction adler32() { 270 return Adler32Holder.ADLER_32; 271 } 272 273 private static class Adler32Holder { 274 static final HashFunction ADLER_32 = 275 checksumHashFunction(ChecksumType.ADLER_32, "Hashing.adler32()"); 276 } 277 278 private static HashFunction checksumHashFunction(ChecksumType type, String toString) { 279 return new ChecksumHashFunction(type, type.bits, toString); 280 } 281 282 enum ChecksumType implements Supplier<Checksum> { 283 CRC_32(32) { 284 @Override 285 public Checksum get() { 286 return new CRC32(); 287 } 288 }, 289 ADLER_32(32) { 290 @Override 291 public Checksum get() { 292 return new Adler32(); 293 } 294 }; 295 296 private final int bits; 297 298 ChecksumType(int bits) { 299 this.bits = bits; 300 } 301 302 @Override 303 public abstract Checksum get(); 304 } 305 306 /** 307 * Assigns to {@code hashCode} a "bucket" in the range {@code [0, buckets)}, in a uniform 308 * manner that minimizes the need for remapping as {@code buckets} grows. That is, 309 * {@code consistentHash(h, n)} equals: 310 * 311 * <ul> 312 * <li>{@code n - 1}, with approximate probability {@code 1/n} 313 * <li>{@code consistentHash(h, n - 1)}, otherwise (probability {@code 1 - 1/n}) 314 * </ul> 315 * 316 * <p>See the <a href="http://en.wikipedia.org/wiki/Consistent_hashing">wikipedia 317 * article on consistent hashing</a> for more information. 318 */ 319 public static int consistentHash(HashCode hashCode, int buckets) { 320 return consistentHash(hashCode.padToLong(), buckets); 321 } 322 323 /** 324 * Assigns to {@code input} a "bucket" in the range {@code [0, buckets)}, in a uniform 325 * manner that minimizes the need for remapping as {@code buckets} grows. That is, 326 * {@code consistentHash(h, n)} equals: 327 * 328 * <ul> 329 * <li>{@code n - 1}, with approximate probability {@code 1/n} 330 * <li>{@code consistentHash(h, n - 1)}, otherwise (probability {@code 1 - 1/n}) 331 * </ul> 332 * 333 * <p>See the <a href="http://en.wikipedia.org/wiki/Consistent_hashing">wikipedia 334 * article on consistent hashing</a> for more information. 335 */ 336 public static int consistentHash(long input, int buckets) { 337 checkArgument(buckets > 0, "buckets must be positive: %s", buckets); 338 LinearCongruentialGenerator generator = new LinearCongruentialGenerator(input); 339 int candidate = 0; 340 int next; 341 342 // Jump from bucket to bucket until we go out of range 343 while (true) { 344 next = (int) ((candidate + 1) / generator.nextDouble()); 345 if (next >= 0 && next < buckets) { 346 candidate = next; 347 } else { 348 return candidate; 349 } 350 } 351 } 352 353 /** 354 * Returns a hash code, having the same bit length as each of the input hash codes, 355 * that combines the information of these hash codes in an ordered fashion. That 356 * is, whenever two equal hash codes are produced by two calls to this method, it 357 * is <i>as likely as possible</i> that each was computed from the <i>same</i> 358 * input hash codes in the <i>same</i> order. 359 * 360 * @throws IllegalArgumentException if {@code hashCodes} is empty, or the hash codes 361 * do not all have the same bit length 362 */ 363 public static HashCode combineOrdered(Iterable<HashCode> hashCodes) { 364 Iterator<HashCode> iterator = hashCodes.iterator(); 365 checkArgument(iterator.hasNext(), "Must be at least 1 hash code to combine."); 366 int bits = iterator.next().bits(); 367 byte[] resultBytes = new byte[bits / 8]; 368 for (HashCode hashCode : hashCodes) { 369 byte[] nextBytes = hashCode.asBytes(); 370 checkArgument(nextBytes.length == resultBytes.length, 371 "All hashcodes must have the same bit length."); 372 for (int i = 0; i < nextBytes.length; i++) { 373 resultBytes[i] = (byte) (resultBytes[i] * 37 ^ nextBytes[i]); 374 } 375 } 376 return HashCode.fromBytesNoCopy(resultBytes); 377 } 378 379 /** 380 * Returns a hash code, having the same bit length as each of the input hash codes, 381 * that combines the information of these hash codes in an unordered fashion. That 382 * is, whenever two equal hash codes are produced by two calls to this method, it 383 * is <i>as likely as possible</i> that each was computed from the <i>same</i> 384 * input hash codes in <i>some</i> order. 385 * 386 * @throws IllegalArgumentException if {@code hashCodes} is empty, or the hash codes 387 * do not all have the same bit length 388 */ 389 public static HashCode combineUnordered(Iterable<HashCode> hashCodes) { 390 Iterator<HashCode> iterator = hashCodes.iterator(); 391 checkArgument(iterator.hasNext(), "Must be at least 1 hash code to combine."); 392 byte[] resultBytes = new byte[iterator.next().bits() / 8]; 393 for (HashCode hashCode : hashCodes) { 394 byte[] nextBytes = hashCode.asBytes(); 395 checkArgument(nextBytes.length == resultBytes.length, 396 "All hashcodes must have the same bit length."); 397 for (int i = 0; i < nextBytes.length; i++) { 398 resultBytes[i] += nextBytes[i]; 399 } 400 } 401 return HashCode.fromBytesNoCopy(resultBytes); 402 } 403 404 /** 405 * Checks that the passed argument is positive, and ceils it to a multiple of 32. 406 */ 407 static int checkPositiveAndMakeMultipleOf32(int bits) { 408 checkArgument(bits > 0, "Number of bits must be positive"); 409 return (bits + 31) & ~31; 410 } 411 412 // TODO(kevinb): Maybe expose this class via a static Hashing method? 413 @VisibleForTesting 414 static final class ConcatenatedHashFunction extends AbstractCompositeHashFunction { 415 private final int bits; 416 417 ConcatenatedHashFunction(HashFunction... functions) { 418 super(functions); 419 int bitSum = 0; 420 for (HashFunction function : functions) { 421 bitSum += function.bits(); 422 } 423 this.bits = bitSum; 424 } 425 426 @Override 427 HashCode makeHash(Hasher[] hashers) { 428 byte[] bytes = new byte[bits / 8]; 429 int i = 0; 430 for (Hasher hasher : hashers) { 431 HashCode newHash = hasher.hash(); 432 i += newHash.writeBytesTo(bytes, i, newHash.bits() / 8); 433 } 434 return HashCode.fromBytesNoCopy(bytes); 435 } 436 437 @Override 438 public int bits() { 439 return bits; 440 } 441 442 @Override 443 public boolean equals(@Nullable Object object) { 444 if (object instanceof ConcatenatedHashFunction) { 445 ConcatenatedHashFunction other = (ConcatenatedHashFunction) object; 446 if (bits != other.bits || functions.length != other.functions.length) { 447 return false; 448 } 449 for (int i = 0; i < functions.length; i++) { 450 if (!functions[i].equals(other.functions[i])) { 451 return false; 452 } 453 } 454 return true; 455 } 456 return false; 457 } 458 459 @Override 460 public int hashCode() { 461 int hash = bits; 462 for (HashFunction function : functions) { 463 hash ^= function.hashCode(); 464 } 465 return hash; 466 } 467 } 468 469 /** 470 * Linear CongruentialGenerator to use for consistent hashing. 471 * See http://en.wikipedia.org/wiki/Linear_congruential_generator 472 */ 473 private static final class LinearCongruentialGenerator { 474 private long state; 475 476 public LinearCongruentialGenerator(long seed) { 477 this.state = seed; 478 } 479 480 public double nextDouble() { 481 state = 2862933555777941757L * state + 1; 482 return ((double) ((int) (state >>> 33) + 1)) / (0x1.0p31); 483 } 484 } 485 486 private Hashing() {} 487}