001/*
002 * Copyright (C) 2011 The Guava Authors
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
005 * in compliance with the License. You may obtain a copy of the License at
006 *
007 * http://www.apache.org/licenses/LICENSE-2.0
008 *
009 * Unless required by applicable law or agreed to in writing, software distributed under the License
010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
011 * or implied. See the License for the specific language governing permissions and limitations under
012 * the License.
013 */
014
015package com.google.common.hash;
016
017import static com.google.common.base.Preconditions.checkArgument;
018import static com.google.common.base.Preconditions.checkNotNull;
019import static com.google.common.base.Preconditions.checkState;
020import static java.lang.Math.min;
021
022import com.google.common.base.Preconditions;
023import com.google.common.primitives.UnsignedInts;
024import com.google.errorprone.annotations.CanIgnoreReturnValue;
025import java.io.Serializable;
026import javax.annotation.CheckForNull;
027
028/**
029 * An immutable hash code of arbitrary bit length.
030 *
031 * @author Dimitris Andreou
032 * @author Kurt Alfred Kluever
033 * @since 11.0
034 */
035@ElementTypesAreNonnullByDefault
036public abstract class HashCode {
037  HashCode() {}
038
039  /** Returns the number of bits in this hash code; a positive multiple of 8. */
040  public abstract int bits();
041
042  /**
043   * Returns the first four bytes of {@linkplain #asBytes() this hashcode's bytes}, converted to an
044   * {@code int} value in little-endian order.
045   *
046   * @throws IllegalStateException if {@code bits() < 32}
047   */
048  public abstract int asInt();
049
050  /**
051   * Returns the first eight bytes of {@linkplain #asBytes() this hashcode's bytes}, converted to a
052   * {@code long} value in little-endian order.
053   *
054   * @throws IllegalStateException if {@code bits() < 64}
055   */
056  public abstract long asLong();
057
058  /**
059   * If this hashcode has enough bits, returns {@code asLong()}, otherwise returns a {@code long}
060   * value with {@code asBytes()} as the least-significant bytes and {@code 0x00} as the remaining
061   * most-significant bytes.
062   *
063   * @since 14.0 (since 11.0 as {@code Hashing.padToLong(HashCode)})
064   */
065  public abstract long padToLong();
066
067  /**
068   * Returns the value of this hash code as a byte array. The caller may modify the byte array;
069   * changes to it will <i>not</i> be reflected in this {@code HashCode} object or any other arrays
070   * returned by this method.
071   */
072  // TODO(user): consider ByteString here, when that is available
073  public abstract byte[] asBytes();
074
075  /**
076   * Copies bytes from this hash code into {@code dest}.
077   *
078   * @param dest the byte array into which the hash code will be written
079   * @param offset the start offset in the data
080   * @param maxLength the maximum number of bytes to write
081   * @return the number of bytes written to {@code dest}
082   * @throws IndexOutOfBoundsException if there is not enough room in {@code dest}
083   */
084  @CanIgnoreReturnValue
085  public int writeBytesTo(byte[] dest, int offset, int maxLength) {
086    maxLength = min(maxLength, bits() / 8);
087    Preconditions.checkPositionIndexes(offset, offset + maxLength, dest.length);
088    writeBytesToImpl(dest, offset, maxLength);
089    return maxLength;
090  }
091
092  abstract void writeBytesToImpl(byte[] dest, int offset, int maxLength);
093
094  /**
095   * Returns a mutable view of the underlying bytes for the given {@code HashCode} if it is a
096   * byte-based hashcode. Otherwise it returns {@link HashCode#asBytes}. Do <i>not</i> mutate this
097   * array or else you will break the immutability contract of {@code HashCode}.
098   */
099  byte[] getBytesInternal() {
100    return asBytes();
101  }
102
103  /**
104   * Returns whether this {@code HashCode} and that {@code HashCode} have the same value, given that
105   * they have the same number of bits.
106   */
107  abstract boolean equalsSameBits(HashCode that);
108
109  /**
110   * Creates a 32-bit {@code HashCode} representation of the given int value. The underlying bytes
111   * are interpreted in little endian order.
112   *
113   * @since 15.0 (since 12.0 in HashCodes)
114   */
115  public static HashCode fromInt(int hash) {
116    return new IntHashCode(hash);
117  }
118
119  private static final class IntHashCode extends HashCode implements Serializable {
120    final int hash;
121
122    IntHashCode(int hash) {
123      this.hash = hash;
124    }
125
126    @Override
127    public int bits() {
128      return 32;
129    }
130
131    @Override
132    public byte[] asBytes() {
133      return new byte[] {(byte) hash, (byte) (hash >> 8), (byte) (hash >> 16), (byte) (hash >> 24)};
134    }
135
136    @Override
137    public int asInt() {
138      return hash;
139    }
140
141    @Override
142    public long asLong() {
143      throw new IllegalStateException("this HashCode only has 32 bits; cannot create a long");
144    }
145
146    @Override
147    public long padToLong() {
148      return UnsignedInts.toLong(hash);
149    }
150
151    @Override
152    void writeBytesToImpl(byte[] dest, int offset, int maxLength) {
153      for (int i = 0; i < maxLength; i++) {
154        dest[offset + i] = (byte) (hash >> (i * 8));
155      }
156    }
157
158    @Override
159    boolean equalsSameBits(HashCode that) {
160      return hash == that.asInt();
161    }
162
163    private static final long serialVersionUID = 0;
164  }
165
166  /**
167   * Creates a 64-bit {@code HashCode} representation of the given long value. The underlying bytes
168   * are interpreted in little endian order.
169   *
170   * @since 15.0 (since 12.0 in HashCodes)
171   */
172  public static HashCode fromLong(long hash) {
173    return new LongHashCode(hash);
174  }
175
176  private static final class LongHashCode extends HashCode implements Serializable {
177    final long hash;
178
179    LongHashCode(long hash) {
180      this.hash = hash;
181    }
182
183    @Override
184    public int bits() {
185      return 64;
186    }
187
188    @Override
189    public byte[] asBytes() {
190      return new byte[] {
191        (byte) hash,
192        (byte) (hash >> 8),
193        (byte) (hash >> 16),
194        (byte) (hash >> 24),
195        (byte) (hash >> 32),
196        (byte) (hash >> 40),
197        (byte) (hash >> 48),
198        (byte) (hash >> 56)
199      };
200    }
201
202    @Override
203    public int asInt() {
204      return (int) hash;
205    }
206
207    @Override
208    public long asLong() {
209      return hash;
210    }
211
212    @Override
213    public long padToLong() {
214      return hash;
215    }
216
217    @Override
218    void writeBytesToImpl(byte[] dest, int offset, int maxLength) {
219      for (int i = 0; i < maxLength; i++) {
220        dest[offset + i] = (byte) (hash >> (i * 8));
221      }
222    }
223
224    @Override
225    boolean equalsSameBits(HashCode that) {
226      return hash == that.asLong();
227    }
228
229    private static final long serialVersionUID = 0;
230  }
231
232  /**
233   * Creates a {@code HashCode} from a byte array. The array is defensively copied to preserve the
234   * immutability contract of {@code HashCode}. The array cannot be empty.
235   *
236   * @since 15.0 (since 12.0 in HashCodes)
237   */
238  public static HashCode fromBytes(byte[] bytes) {
239    checkArgument(bytes.length >= 1, "A HashCode must contain at least 1 byte.");
240    return fromBytesNoCopy(bytes.clone());
241  }
242
243  /**
244   * Creates a {@code HashCode} from a byte array. The array is <i>not</i> copied defensively, so it
245   * must be handed-off so as to preserve the immutability contract of {@code HashCode}.
246   */
247  static HashCode fromBytesNoCopy(byte[] bytes) {
248    return new BytesHashCode(bytes);
249  }
250
251  private static final class BytesHashCode extends HashCode implements Serializable {
252    final byte[] bytes;
253
254    BytesHashCode(byte[] bytes) {
255      this.bytes = checkNotNull(bytes);
256    }
257
258    @Override
259    public int bits() {
260      return bytes.length * 8;
261    }
262
263    @Override
264    public byte[] asBytes() {
265      return bytes.clone();
266    }
267
268    @Override
269    public int asInt() {
270      checkState(
271          bytes.length >= 4,
272          "HashCode#asInt() requires >= 4 bytes (it only has %s bytes).",
273          bytes.length);
274      return (bytes[0] & 0xFF)
275          | ((bytes[1] & 0xFF) << 8)
276          | ((bytes[2] & 0xFF) << 16)
277          | ((bytes[3] & 0xFF) << 24);
278    }
279
280    @Override
281    public long asLong() {
282      checkState(
283          bytes.length >= 8,
284          "HashCode#asLong() requires >= 8 bytes (it only has %s bytes).",
285          bytes.length);
286      return padToLong();
287    }
288
289    @Override
290    public long padToLong() {
291      long retVal = (bytes[0] & 0xFF);
292      for (int i = 1; i < min(bytes.length, 8); i++) {
293        retVal |= (bytes[i] & 0xFFL) << (i * 8);
294      }
295      return retVal;
296    }
297
298    @Override
299    void writeBytesToImpl(byte[] dest, int offset, int maxLength) {
300      System.arraycopy(bytes, 0, dest, offset, maxLength);
301    }
302
303    @Override
304    byte[] getBytesInternal() {
305      return bytes;
306    }
307
308    @Override
309    boolean equalsSameBits(HashCode that) {
310      // We don't use MessageDigest.isEqual() here because its contract does not guarantee
311      // constant-time evaluation (no short-circuiting).
312      if (this.bytes.length != that.getBytesInternal().length) {
313        return false;
314      }
315
316      boolean areEqual = true;
317      for (int i = 0; i < this.bytes.length; i++) {
318        areEqual &= (this.bytes[i] == that.getBytesInternal()[i]);
319      }
320      return areEqual;
321    }
322
323    private static final long serialVersionUID = 0;
324  }
325
326  /**
327   * Creates a {@code HashCode} from a hexadecimal ({@code base 16}) encoded string. The string must
328   * be at least 2 characters long, and contain only valid, lower-cased hexadecimal characters.
329   *
330   * <p>This method accepts the exact format generated by {@link #toString}. If you require more
331   * lenient {@code base 16} decoding, please use {@link com.google.common.io.BaseEncoding#decode}
332   * (and pass the result to {@link #fromBytes}).
333   *
334   * @since 15.0
335   */
336  public static HashCode fromString(String string) {
337    checkArgument(
338        string.length() >= 2, "input string (%s) must have at least 2 characters", string);
339    checkArgument(
340        string.length() % 2 == 0,
341        "input string (%s) must have an even number of characters",
342        string);
343
344    byte[] bytes = new byte[string.length() / 2];
345    for (int i = 0; i < string.length(); i += 2) {
346      int ch1 = decode(string.charAt(i)) << 4;
347      int ch2 = decode(string.charAt(i + 1));
348      bytes[i / 2] = (byte) (ch1 + ch2);
349    }
350    return fromBytesNoCopy(bytes);
351  }
352
353  private static int decode(char ch) {
354    if (ch >= '0' && ch <= '9') {
355      return ch - '0';
356    }
357    if (ch >= 'a' && ch <= 'f') {
358      return ch - 'a' + 10;
359    }
360    throw new IllegalArgumentException("Illegal hexadecimal character: " + ch);
361  }
362
363  /**
364   * Returns {@code true} if {@code object} is a {@link HashCode} instance with the identical byte
365   * representation to this hash code.
366   *
367   * <p><b>Security note:</b> this method uses a constant-time (not short-circuiting) implementation
368   * to protect against <a href="http://en.wikipedia.org/wiki/Timing_attack">timing attacks</a>.
369   */
370  @Override
371  public final boolean equals(@CheckForNull Object object) {
372    if (object instanceof HashCode) {
373      HashCode that = (HashCode) object;
374      return bits() == that.bits() && equalsSameBits(that);
375    }
376    return false;
377  }
378
379  /**
380   * Returns a "Java hash code" for this {@code HashCode} instance; this is well-defined (so, for
381   * example, you can safely put {@code HashCode} instances into a {@code HashSet}) but is otherwise
382   * probably not what you want to use.
383   */
384  @Override
385  public final int hashCode() {
386    // If we have at least 4 bytes (32 bits), just take the first 4 bytes. Since this is
387    // already a (presumably) high-quality hash code, any four bytes of it will do.
388    if (bits() >= 32) {
389      return asInt();
390    }
391    // If we have less than 4 bytes, use them all.
392    byte[] bytes = getBytesInternal();
393    int val = (bytes[0] & 0xFF);
394    for (int i = 1; i < bytes.length; i++) {
395      val |= ((bytes[i] & 0xFF) << (i * 8));
396    }
397    return val;
398  }
399
400  /**
401   * Returns a string containing each byte of {@link #asBytes}, in order, as a two-digit unsigned
402   * hexadecimal number in lower case.
403   *
404   * <p>Note that if the output is considered to be a single hexadecimal number, whether this string
405   * is big-endian or little-endian depends on the byte order of {@link #asBytes}. This may be
406   * surprising for implementations of {@code HashCode} that represent the number in big-endian
407   * since everything else in the hashing API uniformly treats multibyte values as little-endian.
408   *
409   * <p>To create a {@code HashCode} from its string representation, see {@link #fromString}.
410   */
411  @Override
412  public final String toString() {
413    byte[] bytes = getBytesInternal();
414    StringBuilder sb = new StringBuilder(2 * bytes.length);
415    for (byte b : bytes) {
416      sb.append(hexDigits[(b >> 4) & 0xf]).append(hexDigits[b & 0xf]);
417    }
418    return sb.toString();
419  }
420
421  private static final char[] hexDigits = "0123456789abcdef".toCharArray();
422}