001/*
002 * Copyright (C) 2011 The Guava Authors
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
005 * in compliance with the License. You may obtain a copy of the License at
006 *
007 * http://www.apache.org/licenses/LICENSE-2.0
008 *
009 * Unless required by applicable law or agreed to in writing, software distributed under the License
010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
011 * or implied. See the License for the specific language governing permissions and limitations under
012 * the License.
013 */
014
015package com.google.common.hash;
016
017import static com.google.common.base.Preconditions.checkArgument;
018import static com.google.common.base.Preconditions.checkNotNull;
019import static com.google.common.base.Preconditions.checkState;
020
021import com.google.common.annotations.Beta;
022import com.google.common.base.Preconditions;
023import com.google.common.primitives.Ints;
024import com.google.common.primitives.UnsignedInts;
025
026import java.io.Serializable;
027import java.security.MessageDigest;
028
029import javax.annotation.Nullable;
030
031/**
032 * An immutable hash code of arbitrary bit length.
033 *
034 * @author Dimitris Andreou
035 * @author Kurt Alfred Kluever
036 * @since 11.0
037 */
038@Beta
039public abstract class HashCode {
040  HashCode() {}
041
042  /**
043   * Returns the number of bits in this hash code; a positive multiple of 8.
044   */
045  public abstract int bits();
046
047  /**
048   * Returns the first four bytes of {@linkplain #asBytes() this hashcode's bytes}, converted to
049   * an {@code int} value in little-endian order.
050   *
051   * @throws IllegalStateException if {@code bits() < 32}
052   */
053  public abstract int asInt();
054
055  /**
056   * Returns the first eight bytes of {@linkplain #asBytes() this hashcode's bytes}, converted to
057   * a {@code long} value in little-endian order.
058   *
059   * @throws IllegalStateException if {@code bits() < 64}
060   */
061  public abstract long asLong();
062
063  /**
064   * If this hashcode has enough bits, returns {@code asLong()}, otherwise returns a {@code long}
065   * value with {@code asBytes()} as the least-significant bytes and {@code 0x00} as the remaining
066   * most-significant bytes.
067   *
068   * @since 14.0 (since 11.0 as {@code Hashing.padToLong(HashCode)})
069   */
070  public abstract long padToLong();
071
072  /**
073   * Returns the value of this hash code as a byte array. The caller may modify the byte array;
074   * changes to it will <i>not</i> be reflected in this {@code HashCode} object or any other arrays
075   * returned by this method.
076   */
077  // TODO(user): consider ByteString here, when that is available
078  public abstract byte[] asBytes();
079
080  /**
081   * Copies bytes from this hash code into {@code dest}.
082   *
083   * @param dest the byte array into which the hash code will be written
084   * @param offset the start offset in the data
085   * @param maxLength the maximum number of bytes to write
086   * @return the number of bytes written to {@code dest}
087   * @throws IndexOutOfBoundsException if there is not enough room in {@code dest}
088   */
089  public int writeBytesTo(byte[] dest, int offset, int maxLength) {
090    maxLength = Ints.min(maxLength, bits() / 8);
091    Preconditions.checkPositionIndexes(offset, offset + maxLength, dest.length);
092    writeBytesToImpl(dest, offset, maxLength);
093    return maxLength;
094  }
095
096  abstract void writeBytesToImpl(byte[] dest, int offset, int maxLength);
097
098  /**
099   * Returns a mutable view of the underlying bytes for the given {@code HashCode} if it is a
100   * byte-based hashcode. Otherwise it returns {@link HashCode#asBytes}. Do <i>not</i> mutate this
101   * array or else you will break the immutability contract of {@code HashCode}.
102   */
103  byte[] getBytesInternal() {
104    return asBytes();
105  }
106
107  /**
108   * Returns whether this {@code HashCode} and that {@code HashCode} have the same value, given that
109   * they have the same number of bits.
110   */
111  abstract boolean equalsSameBits(HashCode that);
112
113  /**
114   * Creates a 32-bit {@code HashCode} representation of the given int value. The underlying bytes
115   * are interpreted in little endian order.
116   *
117   * @since 15.0 (since 12.0 in HashCodes)
118   */
119  public static HashCode fromInt(int hash) {
120    return new IntHashCode(hash);
121  }
122
123  private static final class IntHashCode extends HashCode implements Serializable {
124    final int hash;
125
126    IntHashCode(int hash) {
127      this.hash = hash;
128    }
129
130    @Override
131    public int bits() {
132      return 32;
133    }
134
135    @Override
136    public byte[] asBytes() {
137      return new byte[] {
138          (byte) hash,
139          (byte) (hash >> 8),
140          (byte) (hash >> 16),
141          (byte) (hash >> 24)};
142    }
143
144    @Override
145    public int asInt() {
146      return hash;
147    }
148
149    @Override
150    public long asLong() {
151      throw new IllegalStateException("this HashCode only has 32 bits; cannot create a long");
152    }
153
154    @Override
155    public long padToLong() {
156      return UnsignedInts.toLong(hash);
157    }
158
159    @Override
160    void writeBytesToImpl(byte[] dest, int offset, int maxLength) {
161      for (int i = 0; i < maxLength; i++) {
162        dest[offset + i] = (byte) (hash >> (i * 8));
163      }
164    }
165
166    @Override boolean equalsSameBits(HashCode that) {
167      return hash == that.asInt();
168    }
169
170    private static final long serialVersionUID = 0;
171  }
172
173  /**
174   * Creates a 64-bit {@code HashCode} representation of the given long value. The underlying bytes
175   * are interpreted in little endian order.
176   *
177   * @since 15.0 (since 12.0 in HashCodes)
178   */
179  public static HashCode fromLong(long hash) {
180    return new LongHashCode(hash);
181  }
182
183  private static final class LongHashCode extends HashCode implements Serializable {
184    final long hash;
185
186    LongHashCode(long hash) {
187      this.hash = hash;
188    }
189
190    @Override
191    public int bits() {
192      return 64;
193    }
194
195    @Override
196    public byte[] asBytes() {
197      return new byte[] {
198          (byte) hash,
199          (byte) (hash >> 8),
200          (byte) (hash >> 16),
201          (byte) (hash >> 24),
202          (byte) (hash >> 32),
203          (byte) (hash >> 40),
204          (byte) (hash >> 48),
205          (byte) (hash >> 56)};
206    }
207
208    @Override
209    public int asInt() {
210      return (int) hash;
211    }
212
213    @Override
214    public long asLong() {
215      return hash;
216    }
217
218    @Override
219    public long padToLong() {
220      return hash;
221    }
222
223    @Override
224    void writeBytesToImpl(byte[] dest, int offset, int maxLength) {
225      for (int i = 0; i < maxLength; i++) {
226        dest[offset + i] = (byte) (hash >> (i * 8));
227      }
228    }
229
230    @Override
231    boolean equalsSameBits(HashCode that) {
232      return hash == that.asLong();
233    }
234
235    private static final long serialVersionUID = 0;
236  }
237
238  /**
239   * Creates a {@code HashCode} from a byte array. The array is defensively copied to preserve
240   * the immutability contract of {@code HashCode}. The array cannot be empty.
241   *
242   * @since 15.0 (since 12.0 in HashCodes)
243   */
244  public static HashCode fromBytes(byte[] bytes) {
245    checkArgument(bytes.length >= 1, "A HashCode must contain at least 1 byte.");
246    return fromBytesNoCopy(bytes.clone());
247  }
248
249  /**
250   * Creates a {@code HashCode} from a byte array. The array is <i>not</i> copied defensively,
251   * so it must be handed-off so as to preserve the immutability contract of {@code HashCode}.
252   */
253  static HashCode fromBytesNoCopy(byte[] bytes) {
254    return new BytesHashCode(bytes);
255  }
256
257  private static final class BytesHashCode extends HashCode implements Serializable {
258    final byte[] bytes;
259
260    BytesHashCode(byte[] bytes) {
261      this.bytes = checkNotNull(bytes);
262    }
263
264    @Override
265    public int bits() {
266      return bytes.length * 8;
267    }
268
269    @Override
270    public byte[] asBytes() {
271      return bytes.clone();
272    }
273
274    @Override
275    public int asInt() {
276      checkState(bytes.length >= 4,
277          "HashCode#asInt() requires >= 4 bytes (it only has %s bytes).", bytes.length);
278      return (bytes[0] & 0xFF)
279          | ((bytes[1] & 0xFF) << 8)
280          | ((bytes[2] & 0xFF) << 16)
281          | ((bytes[3] & 0xFF) << 24);
282    }
283
284    @Override
285    public long asLong() {
286      checkState(bytes.length >= 8,
287          "HashCode#asLong() requires >= 8 bytes (it only has %s bytes).", bytes.length);
288      return padToLong();
289    }
290
291    @Override
292    public long padToLong() {
293      long retVal = (bytes[0] & 0xFF);
294      for (int i = 1; i < Math.min(bytes.length, 8); i++) {
295        retVal |= (bytes[i] & 0xFFL) << (i * 8);
296      }
297      return retVal;
298    }
299
300    @Override
301    void writeBytesToImpl(byte[] dest, int offset, int maxLength) {
302      System.arraycopy(bytes, 0, dest, offset, maxLength);
303    }
304
305    @Override
306    byte[] getBytesInternal() {
307      return bytes;
308    }
309
310    @Override
311    boolean equalsSameBits(HashCode that) {
312      return MessageDigest.isEqual(bytes, that.getBytesInternal());
313    }
314
315    private static final long serialVersionUID = 0;
316  }
317
318  /**
319   * Creates a {@code HashCode} from a hexadecimal ({@code base 16}) encoded string. The string must
320   * be at least 2 characters long, and contain only valid, lower-cased hexadecimal characters.
321   *
322   * <p>This method accepts the exact format generated by {@link #toString}. If you require more
323   * lenient {@code base 16} decoding, please use
324   * {@link com.google.common.io.BaseEncoding#decode} (and pass the result to {@link #fromBytes}).
325   *
326   * @since 15.0
327   */
328  public static HashCode fromString(String string) {
329    checkArgument(string.length() >= 2,
330        "input string (%s) must have at least 2 characters", string);
331    checkArgument(string.length() % 2 == 0,
332        "input string (%s) must have an even number of characters", string);
333
334    byte[] bytes = new byte[string.length() / 2];
335    for (int i = 0; i < string.length(); i += 2) {
336      int ch1 = decode(string.charAt(i)) << 4;
337      int ch2 = decode(string.charAt(i + 1));
338      bytes[i / 2] = (byte) (ch1 + ch2);
339    }
340    return fromBytesNoCopy(bytes);
341  }
342
343  private static int decode(char ch) {
344    if (ch >= '0' && ch <= '9') {
345      return ch - '0';
346    }
347    if (ch >= 'a' && ch <= 'f') {
348      return ch - 'a' + 10;
349    }
350    throw new IllegalArgumentException("Illegal hexadecimal character: " + ch);
351  }
352
353  @Override
354  public final boolean equals(@Nullable Object object) {
355    if (object instanceof HashCode) {
356      HashCode that = (HashCode) object;
357      return bits() == that.bits() && equalsSameBits(that);
358    }
359    return false;
360  }
361
362  /**
363   * Returns a "Java hash code" for this {@code HashCode} instance; this is well-defined
364   * (so, for example, you can safely put {@code HashCode} instances into a {@code
365   * HashSet}) but is otherwise probably not what you want to use.
366   */
367  @Override
368  public final int hashCode() {
369    // If we have at least 4 bytes (32 bits), just take the first 4 bytes. Since this is
370    // already a (presumably) high-quality hash code, any four bytes of it will do.
371    if (bits() >= 32) {
372      return asInt();
373    }
374    // If we have less than 4 bytes, use them all.
375    byte[] bytes = asBytes();
376    int val = (bytes[0] & 0xFF);
377    for (int i = 1; i < bytes.length; i++) {
378      val |= ((bytes[i] & 0xFF) << (i * 8));
379    }
380    return val;
381  }
382
383  /**
384   * Returns a string containing each byte of {@link #asBytes}, in order, as a two-digit unsigned
385   * hexadecimal number in lower case.
386   *
387   * <p>Note that if the output is considered to be a single hexadecimal number, this hash code's
388   * bytes are the <i>big-endian</i> representation of that number. This may be surprising since
389   * everything else in the hashing API uniformly treats multibyte values as little-endian. But
390   * this format conveniently matches that of utilities such as the UNIX {@code md5sum} command.
391   *
392   * <p>To create a {@code HashCode} from its string representation, see {@link #fromString}.
393   */
394  @Override
395  public final String toString() {
396    byte[] bytes = asBytes();
397    StringBuilder sb = new StringBuilder(2 * bytes.length);
398    for (byte b : bytes) {
399      sb.append(hexDigits[(b >> 4) & 0xf]).append(hexDigits[b & 0xf]);
400    }
401    return sb.toString();
402  }
403
404  private static final char[] hexDigits = "0123456789abcdef".toCharArray();
405}