001/*
002 * Copyright (C) 2011 The Guava Authors
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
005 * in compliance with the License. You may obtain a copy of the License at
006 *
007 * http://www.apache.org/licenses/LICENSE-2.0
008 *
009 * Unless required by applicable law or agreed to in writing, software distributed under the License
010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
011 * or implied. See the License for the specific language governing permissions and limitations under
012 * the License.
013 */
014
015package com.google.common.hash;
016
017import static com.google.common.base.Preconditions.checkArgument;
018import static com.google.common.base.Preconditions.checkNotNull;
019import static com.google.common.base.Preconditions.checkState;
020import static java.lang.Math.min;
021
022import com.google.common.base.Preconditions;
023import com.google.common.primitives.UnsignedInts;
024import com.google.errorprone.annotations.CanIgnoreReturnValue;
025import java.io.Serializable;
026import org.jspecify.annotations.Nullable;
027
028/**
029 * An immutable hash code of arbitrary bit length.
030 *
031 * @author Dimitris Andreou
032 * @author Kurt Alfred Kluever
033 * @since 11.0
034 */
035public abstract class HashCode {
036  HashCode() {}
037
038  /** Returns the number of bits in this hash code; a positive multiple of 8. */
039  public abstract int bits();
040
041  /**
042   * Returns the first four bytes of {@linkplain #asBytes() this hashcode's bytes}, converted to an
043   * {@code int} value in little-endian order.
044   *
045   * @throws IllegalStateException if {@code bits() < 32}
046   */
047  public abstract int asInt();
048
049  /**
050   * Returns the first eight bytes of {@linkplain #asBytes() this hashcode's bytes}, converted to a
051   * {@code long} value in little-endian order.
052   *
053   * @throws IllegalStateException if {@code bits() < 64}
054   */
055  public abstract long asLong();
056
057  /**
058   * If this hashcode has enough bits, returns {@code asLong()}, otherwise returns a {@code long}
059   * value with {@code asBytes()} as the least-significant bytes and {@code 0x00} as the remaining
060   * most-significant bytes.
061   *
062   * @since 14.0 (since 11.0 as {@code Hashing.padToLong(HashCode)})
063   */
064  public abstract long padToLong();
065
066  /**
067   * Returns the value of this hash code as a byte array. The caller may modify the byte array;
068   * changes to it will <i>not</i> be reflected in this {@code HashCode} object or any other arrays
069   * returned by this method.
070   */
071  // TODO(user): consider ByteString here, when that is available
072  public abstract byte[] asBytes();
073
074  /**
075   * Copies bytes from this hash code into {@code dest}.
076   *
077   * @param dest the byte array into which the hash code will be written
078   * @param offset the start offset in the data
079   * @param maxLength the maximum number of bytes to write
080   * @return the number of bytes written to {@code dest}
081   * @throws IndexOutOfBoundsException if there is not enough room in {@code dest}
082   */
083  @CanIgnoreReturnValue
084  public int writeBytesTo(byte[] dest, int offset, int maxLength) {
085    maxLength = min(maxLength, bits() / 8);
086    Preconditions.checkPositionIndexes(offset, offset + maxLength, dest.length);
087    writeBytesToImpl(dest, offset, maxLength);
088    return maxLength;
089  }
090
091  abstract void writeBytesToImpl(byte[] dest, int offset, int maxLength);
092
093  /**
094   * Returns a mutable view of the underlying bytes for the given {@code HashCode} if it is a
095   * byte-based hashcode. Otherwise it returns {@link HashCode#asBytes}. Do <i>not</i> mutate this
096   * array or else you will break the immutability contract of {@code HashCode}.
097   */
098  byte[] getBytesInternal() {
099    return asBytes();
100  }
101
102  /**
103   * Returns whether this {@code HashCode} and that {@code HashCode} have the same value, given that
104   * they have the same number of bits.
105   */
106  abstract boolean equalsSameBits(HashCode that);
107
108  /**
109   * Creates a 32-bit {@code HashCode} representation of the given int value. The underlying bytes
110   * are interpreted in little endian order.
111   *
112   * @since 15.0 (since 12.0 in HashCodes)
113   */
114  public static HashCode fromInt(int hash) {
115    return new IntHashCode(hash);
116  }
117
118  private static final class IntHashCode extends HashCode implements Serializable {
119    final int hash;
120
121    IntHashCode(int hash) {
122      this.hash = hash;
123    }
124
125    @Override
126    public int bits() {
127      return 32;
128    }
129
130    @Override
131    public byte[] asBytes() {
132      return new byte[] {(byte) hash, (byte) (hash >> 8), (byte) (hash >> 16), (byte) (hash >> 24)};
133    }
134
135    @Override
136    public int asInt() {
137      return hash;
138    }
139
140    @Override
141    public long asLong() {
142      throw new IllegalStateException("this HashCode only has 32 bits; cannot create a long");
143    }
144
145    @Override
146    public long padToLong() {
147      return UnsignedInts.toLong(hash);
148    }
149
150    @Override
151    void writeBytesToImpl(byte[] dest, int offset, int maxLength) {
152      for (int i = 0; i < maxLength; i++) {
153        dest[offset + i] = (byte) (hash >> (i * 8));
154      }
155    }
156
157    @Override
158    boolean equalsSameBits(HashCode that) {
159      return hash == that.asInt();
160    }
161
162    private static final long serialVersionUID = 0;
163  }
164
165  /**
166   * Creates a 64-bit {@code HashCode} representation of the given long value. The underlying bytes
167   * are interpreted in little endian order.
168   *
169   * @since 15.0 (since 12.0 in HashCodes)
170   */
171  public static HashCode fromLong(long hash) {
172    return new LongHashCode(hash);
173  }
174
175  private static final class LongHashCode extends HashCode implements Serializable {
176    final long hash;
177
178    LongHashCode(long hash) {
179      this.hash = hash;
180    }
181
182    @Override
183    public int bits() {
184      return 64;
185    }
186
187    @Override
188    public byte[] asBytes() {
189      return new byte[] {
190        (byte) hash,
191        (byte) (hash >> 8),
192        (byte) (hash >> 16),
193        (byte) (hash >> 24),
194        (byte) (hash >> 32),
195        (byte) (hash >> 40),
196        (byte) (hash >> 48),
197        (byte) (hash >> 56)
198      };
199    }
200
201    @Override
202    public int asInt() {
203      return (int) hash;
204    }
205
206    @Override
207    public long asLong() {
208      return hash;
209    }
210
211    @Override
212    public long padToLong() {
213      return hash;
214    }
215
216    @Override
217    void writeBytesToImpl(byte[] dest, int offset, int maxLength) {
218      for (int i = 0; i < maxLength; i++) {
219        dest[offset + i] = (byte) (hash >> (i * 8));
220      }
221    }
222
223    @Override
224    boolean equalsSameBits(HashCode that) {
225      return hash == that.asLong();
226    }
227
228    private static final long serialVersionUID = 0;
229  }
230
231  /**
232   * Creates a {@code HashCode} from a byte array. The array is defensively copied to preserve the
233   * immutability contract of {@code HashCode}. The array cannot be empty.
234   *
235   * @since 15.0 (since 12.0 in HashCodes)
236   */
237  public static HashCode fromBytes(byte[] bytes) {
238    checkArgument(bytes.length >= 1, "A HashCode must contain at least 1 byte.");
239    return fromBytesNoCopy(bytes.clone());
240  }
241
242  /**
243   * Creates a {@code HashCode} from a byte array. The array is <i>not</i> copied defensively, so it
244   * must be handed-off so as to preserve the immutability contract of {@code HashCode}.
245   */
246  static HashCode fromBytesNoCopy(byte[] bytes) {
247    return new BytesHashCode(bytes);
248  }
249
250  private static final class BytesHashCode extends HashCode implements Serializable {
251    final byte[] bytes;
252
253    BytesHashCode(byte[] bytes) {
254      this.bytes = checkNotNull(bytes);
255    }
256
257    @Override
258    public int bits() {
259      return bytes.length * 8;
260    }
261
262    @Override
263    public byte[] asBytes() {
264      return bytes.clone();
265    }
266
267    @Override
268    public int asInt() {
269      checkState(
270          bytes.length >= 4,
271          "HashCode#asInt() requires >= 4 bytes (it only has %s bytes).",
272          bytes.length);
273      return (bytes[0] & 0xFF)
274          | ((bytes[1] & 0xFF) << 8)
275          | ((bytes[2] & 0xFF) << 16)
276          | ((bytes[3] & 0xFF) << 24);
277    }
278
279    @Override
280    public long asLong() {
281      checkState(
282          bytes.length >= 8,
283          "HashCode#asLong() requires >= 8 bytes (it only has %s bytes).",
284          bytes.length);
285      return padToLong();
286    }
287
288    @Override
289    public long padToLong() {
290      long retVal = (bytes[0] & 0xFF);
291      for (int i = 1; i < min(bytes.length, 8); i++) {
292        retVal |= (bytes[i] & 0xFFL) << (i * 8);
293      }
294      return retVal;
295    }
296
297    @Override
298    void writeBytesToImpl(byte[] dest, int offset, int maxLength) {
299      System.arraycopy(bytes, 0, dest, offset, maxLength);
300    }
301
302    @Override
303    byte[] getBytesInternal() {
304      return bytes;
305    }
306
307    @Override
308    boolean equalsSameBits(HashCode that) {
309      // We don't use MessageDigest.isEqual() here because its contract does not guarantee
310      // constant-time evaluation (no short-circuiting).
311      if (this.bytes.length != that.getBytesInternal().length) {
312        return false;
313      }
314
315      boolean areEqual = true;
316      for (int i = 0; i < this.bytes.length; i++) {
317        areEqual &= (this.bytes[i] == that.getBytesInternal()[i]);
318      }
319      return areEqual;
320    }
321
322    private static final long serialVersionUID = 0;
323  }
324
325  /**
326   * Creates a {@code HashCode} from a hexadecimal ({@code base 16}) encoded string. The string must
327   * be at least 2 characters long, and contain only valid, lower-cased hexadecimal characters.
328   *
329   * <p>This method accepts the exact format generated by {@link #toString}. If you require more
330   * lenient {@code base 16} decoding, please use {@link com.google.common.io.BaseEncoding#decode}
331   * (and pass the result to {@link #fromBytes}).
332   *
333   * @since 15.0
334   */
335  public static HashCode fromString(String string) {
336    checkArgument(
337        string.length() >= 2, "input string (%s) must have at least 2 characters", string);
338    checkArgument(
339        string.length() % 2 == 0,
340        "input string (%s) must have an even number of characters",
341        string);
342
343    byte[] bytes = new byte[string.length() / 2];
344    for (int i = 0; i < string.length(); i += 2) {
345      int ch1 = decode(string.charAt(i)) << 4;
346      int ch2 = decode(string.charAt(i + 1));
347      bytes[i / 2] = (byte) (ch1 + ch2);
348    }
349    return fromBytesNoCopy(bytes);
350  }
351
352  private static int decode(char ch) {
353    if (ch >= '0' && ch <= '9') {
354      return ch - '0';
355    }
356    if (ch >= 'a' && ch <= 'f') {
357      return ch - 'a' + 10;
358    }
359    throw new IllegalArgumentException("Illegal hexadecimal character: " + ch);
360  }
361
362  /**
363   * Returns {@code true} if {@code object} is a {@link HashCode} instance with the identical byte
364   * representation to this hash code.
365   *
366   * <p><b>Security note:</b> this method uses a constant-time (not short-circuiting) implementation
367   * to protect against <a href="http://en.wikipedia.org/wiki/Timing_attack">timing attacks</a>.
368   */
369  @Override
370  public final boolean equals(@Nullable Object object) {
371    if (object instanceof HashCode) {
372      HashCode that = (HashCode) object;
373      return bits() == that.bits() && equalsSameBits(that);
374    }
375    return false;
376  }
377
378  /**
379   * Returns a "Java hash code" for this {@code HashCode} instance; this is well-defined (so, for
380   * example, you can safely put {@code HashCode} instances into a {@code HashSet}) but is otherwise
381   * probably not what you want to use.
382   */
383  @Override
384  public final int hashCode() {
385    // If we have at least 4 bytes (32 bits), just take the first 4 bytes. Since this is
386    // already a (presumably) high-quality hash code, any four bytes of it will do.
387    if (bits() >= 32) {
388      return asInt();
389    }
390    // If we have less than 4 bytes, use them all.
391    byte[] bytes = getBytesInternal();
392    int val = (bytes[0] & 0xFF);
393    for (int i = 1; i < bytes.length; i++) {
394      val |= ((bytes[i] & 0xFF) << (i * 8));
395    }
396    return val;
397  }
398
399  /**
400   * Returns a string containing each byte of {@link #asBytes}, in order, as a two-digit unsigned
401   * hexadecimal number in lower case.
402   *
403   * <p>Note that if the output is considered to be a single hexadecimal number, whether this string
404   * is big-endian or little-endian depends on the byte order of {@link #asBytes}. This may be
405   * surprising for implementations of {@code HashCode} that represent the number in big-endian
406   * since everything else in the hashing API uniformly treats multibyte values as little-endian.
407   *
408   * <p>To create a {@code HashCode} from its string representation, see {@link #fromString}.
409   */
410  @Override
411  public final String toString() {
412    byte[] bytes = getBytesInternal();
413    StringBuilder sb = new StringBuilder(2 * bytes.length);
414    for (byte b : bytes) {
415      sb.append(hexDigits[(b >> 4) & 0xf]).append(hexDigits[b & 0xf]);
416    }
417    return sb.toString();
418  }
419
420  private static final char[] hexDigits = "0123456789abcdef".toCharArray();
421}