001/* 002 * Copyright (C) 2011 The Guava Authors 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 005 * in compliance with the License. You may obtain a copy of the License at 006 * 007 * http://www.apache.org/licenses/LICENSE-2.0 008 * 009 * Unless required by applicable law or agreed to in writing, software distributed under the License 010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 011 * or implied. See the License for the specific language governing permissions and limitations under 012 * the License. 013 */ 014 015package com.google.common.hash; 016 017import static com.google.common.base.Preconditions.checkArgument; 018import static com.google.common.base.Preconditions.checkNotNull; 019import static com.google.common.base.Preconditions.checkState; 020 021import com.google.common.annotations.Beta; 022import com.google.common.base.Preconditions; 023import com.google.common.primitives.Ints; 024import com.google.common.primitives.UnsignedInts; 025import com.google.errorprone.annotations.CanIgnoreReturnValue; 026import java.io.Serializable; 027import org.checkerframework.checker.nullness.qual.Nullable; 028 029/** 030 * An immutable hash code of arbitrary bit length. 031 * 032 * @author Dimitris Andreou 033 * @author Kurt Alfred Kluever 034 * @since 11.0 035 */ 036@Beta 037public abstract class HashCode { 038 HashCode() {} 039 040 /** Returns the number of bits in this hash code; a positive multiple of 8. */ 041 public abstract int bits(); 042 043 /** 044 * Returns the first four bytes of {@linkplain #asBytes() this hashcode's bytes}, converted to an 045 * {@code int} value in little-endian order. 046 * 047 * @throws IllegalStateException if {@code bits() < 32} 048 */ 049 public abstract int asInt(); 050 051 /** 052 * Returns the first eight bytes of {@linkplain #asBytes() this hashcode's bytes}, converted to a 053 * {@code long} value in little-endian order. 054 * 055 * @throws IllegalStateException if {@code bits() < 64} 056 */ 057 public abstract long asLong(); 058 059 /** 060 * If this hashcode has enough bits, returns {@code asLong()}, otherwise returns a {@code long} 061 * value with {@code asBytes()} as the least-significant bytes and {@code 0x00} as the remaining 062 * most-significant bytes. 063 * 064 * @since 14.0 (since 11.0 as {@code Hashing.padToLong(HashCode)}) 065 */ 066 public abstract long padToLong(); 067 068 /** 069 * Returns the value of this hash code as a byte array. The caller may modify the byte array; 070 * changes to it will <i>not</i> be reflected in this {@code HashCode} object or any other arrays 071 * returned by this method. 072 */ 073 // TODO(user): consider ByteString here, when that is available 074 public abstract byte[] asBytes(); 075 076 /** 077 * Copies bytes from this hash code into {@code dest}. 078 * 079 * @param dest the byte array into which the hash code will be written 080 * @param offset the start offset in the data 081 * @param maxLength the maximum number of bytes to write 082 * @return the number of bytes written to {@code dest} 083 * @throws IndexOutOfBoundsException if there is not enough room in {@code dest} 084 */ 085 @CanIgnoreReturnValue 086 public int writeBytesTo(byte[] dest, int offset, int maxLength) { 087 maxLength = Ints.min(maxLength, bits() / 8); 088 Preconditions.checkPositionIndexes(offset, offset + maxLength, dest.length); 089 writeBytesToImpl(dest, offset, maxLength); 090 return maxLength; 091 } 092 093 abstract void writeBytesToImpl(byte[] dest, int offset, int maxLength); 094 095 /** 096 * Returns a mutable view of the underlying bytes for the given {@code HashCode} if it is a 097 * byte-based hashcode. Otherwise it returns {@link HashCode#asBytes}. Do <i>not</i> mutate this 098 * array or else you will break the immutability contract of {@code HashCode}. 099 */ 100 byte[] getBytesInternal() { 101 return asBytes(); 102 } 103 104 /** 105 * Returns whether this {@code HashCode} and that {@code HashCode} have the same value, given that 106 * they have the same number of bits. 107 */ 108 abstract boolean equalsSameBits(HashCode that); 109 110 /** 111 * Creates a 32-bit {@code HashCode} representation of the given int value. The underlying bytes 112 * are interpreted in little endian order. 113 * 114 * @since 15.0 (since 12.0 in HashCodes) 115 */ 116 public static HashCode fromInt(int hash) { 117 return new IntHashCode(hash); 118 } 119 120 private static final class IntHashCode extends HashCode implements Serializable { 121 final int hash; 122 123 IntHashCode(int hash) { 124 this.hash = hash; 125 } 126 127 @Override 128 public int bits() { 129 return 32; 130 } 131 132 @Override 133 public byte[] asBytes() { 134 return new byte[] {(byte) hash, (byte) (hash >> 8), (byte) (hash >> 16), (byte) (hash >> 24)}; 135 } 136 137 @Override 138 public int asInt() { 139 return hash; 140 } 141 142 @Override 143 public long asLong() { 144 throw new IllegalStateException("this HashCode only has 32 bits; cannot create a long"); 145 } 146 147 @Override 148 public long padToLong() { 149 return UnsignedInts.toLong(hash); 150 } 151 152 @Override 153 void writeBytesToImpl(byte[] dest, int offset, int maxLength) { 154 for (int i = 0; i < maxLength; i++) { 155 dest[offset + i] = (byte) (hash >> (i * 8)); 156 } 157 } 158 159 @Override 160 boolean equalsSameBits(HashCode that) { 161 return hash == that.asInt(); 162 } 163 164 private static final long serialVersionUID = 0; 165 } 166 167 /** 168 * Creates a 64-bit {@code HashCode} representation of the given long value. The underlying bytes 169 * are interpreted in little endian order. 170 * 171 * @since 15.0 (since 12.0 in HashCodes) 172 */ 173 public static HashCode fromLong(long hash) { 174 return new LongHashCode(hash); 175 } 176 177 private static final class LongHashCode extends HashCode implements Serializable { 178 final long hash; 179 180 LongHashCode(long hash) { 181 this.hash = hash; 182 } 183 184 @Override 185 public int bits() { 186 return 64; 187 } 188 189 @Override 190 public byte[] asBytes() { 191 return new byte[] { 192 (byte) hash, 193 (byte) (hash >> 8), 194 (byte) (hash >> 16), 195 (byte) (hash >> 24), 196 (byte) (hash >> 32), 197 (byte) (hash >> 40), 198 (byte) (hash >> 48), 199 (byte) (hash >> 56) 200 }; 201 } 202 203 @Override 204 public int asInt() { 205 return (int) hash; 206 } 207 208 @Override 209 public long asLong() { 210 return hash; 211 } 212 213 @Override 214 public long padToLong() { 215 return hash; 216 } 217 218 @Override 219 void writeBytesToImpl(byte[] dest, int offset, int maxLength) { 220 for (int i = 0; i < maxLength; i++) { 221 dest[offset + i] = (byte) (hash >> (i * 8)); 222 } 223 } 224 225 @Override 226 boolean equalsSameBits(HashCode that) { 227 return hash == that.asLong(); 228 } 229 230 private static final long serialVersionUID = 0; 231 } 232 233 /** 234 * Creates a {@code HashCode} from a byte array. The array is defensively copied to preserve the 235 * immutability contract of {@code HashCode}. The array cannot be empty. 236 * 237 * @since 15.0 (since 12.0 in HashCodes) 238 */ 239 public static HashCode fromBytes(byte[] bytes) { 240 checkArgument(bytes.length >= 1, "A HashCode must contain at least 1 byte."); 241 return fromBytesNoCopy(bytes.clone()); 242 } 243 244 /** 245 * Creates a {@code HashCode} from a byte array. The array is <i>not</i> copied defensively, so it 246 * must be handed-off so as to preserve the immutability contract of {@code HashCode}. 247 */ 248 static HashCode fromBytesNoCopy(byte[] bytes) { 249 return new BytesHashCode(bytes); 250 } 251 252 private static final class BytesHashCode extends HashCode implements Serializable { 253 final byte[] bytes; 254 255 BytesHashCode(byte[] bytes) { 256 this.bytes = checkNotNull(bytes); 257 } 258 259 @Override 260 public int bits() { 261 return bytes.length * 8; 262 } 263 264 @Override 265 public byte[] asBytes() { 266 return bytes.clone(); 267 } 268 269 @Override 270 public int asInt() { 271 checkState( 272 bytes.length >= 4, 273 "HashCode#asInt() requires >= 4 bytes (it only has %s bytes).", 274 bytes.length); 275 return (bytes[0] & 0xFF) 276 | ((bytes[1] & 0xFF) << 8) 277 | ((bytes[2] & 0xFF) << 16) 278 | ((bytes[3] & 0xFF) << 24); 279 } 280 281 @Override 282 public long asLong() { 283 checkState( 284 bytes.length >= 8, 285 "HashCode#asLong() requires >= 8 bytes (it only has %s bytes).", 286 bytes.length); 287 return padToLong(); 288 } 289 290 @Override 291 public long padToLong() { 292 long retVal = (bytes[0] & 0xFF); 293 for (int i = 1; i < Math.min(bytes.length, 8); i++) { 294 retVal |= (bytes[i] & 0xFFL) << (i * 8); 295 } 296 return retVal; 297 } 298 299 @Override 300 void writeBytesToImpl(byte[] dest, int offset, int maxLength) { 301 System.arraycopy(bytes, 0, dest, offset, maxLength); 302 } 303 304 @Override 305 byte[] getBytesInternal() { 306 return bytes; 307 } 308 309 @Override 310 boolean equalsSameBits(HashCode that) { 311 // We don't use MessageDigest.isEqual() here because its contract does not guarantee 312 // constant-time evaluation (no short-circuiting). 313 if (this.bytes.length != that.getBytesInternal().length) { 314 return false; 315 } 316 317 boolean areEqual = true; 318 for (int i = 0; i < this.bytes.length; i++) { 319 areEqual &= (this.bytes[i] == that.getBytesInternal()[i]); 320 } 321 return areEqual; 322 } 323 324 private static final long serialVersionUID = 0; 325 } 326 327 /** 328 * Creates a {@code HashCode} from a hexadecimal ({@code base 16}) encoded string. The string must 329 * be at least 2 characters long, and contain only valid, lower-cased hexadecimal characters. 330 * 331 * <p>This method accepts the exact format generated by {@link #toString}. If you require more 332 * lenient {@code base 16} decoding, please use {@link com.google.common.io.BaseEncoding#decode} 333 * (and pass the result to {@link #fromBytes}). 334 * 335 * @since 15.0 336 */ 337 public static HashCode fromString(String string) { 338 checkArgument( 339 string.length() >= 2, "input string (%s) must have at least 2 characters", string); 340 checkArgument( 341 string.length() % 2 == 0, 342 "input string (%s) must have an even number of characters", 343 string); 344 345 byte[] bytes = new byte[string.length() / 2]; 346 for (int i = 0; i < string.length(); i += 2) { 347 int ch1 = decode(string.charAt(i)) << 4; 348 int ch2 = decode(string.charAt(i + 1)); 349 bytes[i / 2] = (byte) (ch1 + ch2); 350 } 351 return fromBytesNoCopy(bytes); 352 } 353 354 private static int decode(char ch) { 355 if (ch >= '0' && ch <= '9') { 356 return ch - '0'; 357 } 358 if (ch >= 'a' && ch <= 'f') { 359 return ch - 'a' + 10; 360 } 361 throw new IllegalArgumentException("Illegal hexadecimal character: " + ch); 362 } 363 364 /** 365 * Returns {@code true} if {@code object} is a {@link HashCode} instance with the identical byte 366 * representation to this hash code. 367 * 368 * <p><b>Security note:</b> this method uses a constant-time (not short-circuiting) implementation 369 * to protect against <a href="http://en.wikipedia.org/wiki/Timing_attack">timing attacks</a>. 370 */ 371 @Override 372 public final boolean equals(@Nullable Object object) { 373 if (object instanceof HashCode) { 374 HashCode that = (HashCode) object; 375 return bits() == that.bits() && equalsSameBits(that); 376 } 377 return false; 378 } 379 380 /** 381 * Returns a "Java hash code" for this {@code HashCode} instance; this is well-defined (so, for 382 * example, you can safely put {@code HashCode} instances into a {@code HashSet}) but is otherwise 383 * probably not what you want to use. 384 */ 385 @Override 386 public final int hashCode() { 387 // If we have at least 4 bytes (32 bits), just take the first 4 bytes. Since this is 388 // already a (presumably) high-quality hash code, any four bytes of it will do. 389 if (bits() >= 32) { 390 return asInt(); 391 } 392 // If we have less than 4 bytes, use them all. 393 byte[] bytes = getBytesInternal(); 394 int val = (bytes[0] & 0xFF); 395 for (int i = 1; i < bytes.length; i++) { 396 val |= ((bytes[i] & 0xFF) << (i * 8)); 397 } 398 return val; 399 } 400 401 /** 402 * Returns a string containing each byte of {@link #asBytes}, in order, as a two-digit unsigned 403 * hexadecimal number in lower case. 404 * 405 * <p>Note that if the output is considered to be a single hexadecimal number, this hash code's 406 * bytes are the <i>big-endian</i> representation of that number. This may be surprising since 407 * everything else in the hashing API uniformly treats multibyte values as little-endian. But this 408 * format conveniently matches that of utilities such as the UNIX {@code md5sum} command. 409 * 410 * <p>To create a {@code HashCode} from its string representation, see {@link #fromString}. 411 */ 412 @Override 413 public final String toString() { 414 byte[] bytes = getBytesInternal(); 415 StringBuilder sb = new StringBuilder(2 * bytes.length); 416 for (byte b : bytes) { 417 sb.append(hexDigits[(b >> 4) & 0xf]).append(hexDigits[b & 0xf]); 418 } 419 return sb.toString(); 420 } 421 422 private static final char[] hexDigits = "0123456789abcdef".toCharArray(); 423}