001/*
002 * Copyright (C) 2011 The Guava Authors
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
005 * in compliance with the License. You may obtain a copy of the License at
006 *
007 * http://www.apache.org/licenses/LICENSE-2.0
008 *
009 * Unless required by applicable law or agreed to in writing, software distributed under the License
010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
011 * or implied. See the License for the specific language governing permissions and limitations under
012 * the License.
013 */
014
015package com.google.common.hash;
016
017import com.google.common.annotations.Beta;
018import com.google.errorprone.annotations.CanIgnoreReturnValue;
019import java.nio.ByteBuffer;
020import java.nio.charset.Charset;
021import org.checkerframework.checker.nullness.qual.Nullable;
022
023/**
024 * A {@link PrimitiveSink} that can compute a hash code after reading the input. Each hasher should
025 * translate all multibyte values ({@link #putInt(int)}, {@link #putLong(long)}, etc) to bytes in
026 * little-endian order.
027 *
028 * <p><b>Warning:</b> The result of calling any methods after calling {@link #hash} is undefined.
029 *
030 * <p><b>Warning:</b> Using a specific character encoding when hashing a {@link CharSequence} with
031 * {@link #putString(CharSequence, Charset)} is generally only useful for cross-language
032 * compatibility (otherwise prefer {@link #putUnencodedChars}). However, the character encodings
033 * must be identical across languages. Also beware that {@link Charset} definitions may occasionally
034 * change between Java releases.
035 *
036 * <p><b>Warning:</b> Chunks of data that are put into the {@link Hasher} are not delimited. The
037 * resulting {@link HashCode} is dependent only on the bytes inserted, and the order in which they
038 * were inserted, not how those bytes were chunked into discrete put() operations. For example, the
039 * following three expressions all generate colliding hash codes:
040 *
041 * <pre>{@code
042 * newHasher().putByte(b1).putByte(b2).putByte(b3).hash()
043 * newHasher().putByte(b1).putBytes(new byte[] { b2, b3 }).hash()
044 * newHasher().putBytes(new byte[] { b1, b2, b3 }).hash()
045 * }</pre>
046 *
047 * <p>If you wish to avoid this, you should either prepend or append the size of each chunk. Keep in
048 * mind that when dealing with char sequences, the encoded form of two concatenated char sequences
049 * is not equivalent to the concatenation of their encoded form. Therefore, {@link
050 * #putString(CharSequence, Charset)} should only be used consistently with <i>complete</i>
051 * sequences and not broken into chunks.
052 *
053 * @author Kevin Bourrillion
054 * @since 11.0
055 */
056@Beta
057@ElementTypesAreNonnullByDefault
058public interface Hasher extends PrimitiveSink {
059  @CanIgnoreReturnValue
060  @Override
061  Hasher putByte(byte b);
062
063  @CanIgnoreReturnValue
064  @Override
065  Hasher putBytes(byte[] bytes);
066
067  @CanIgnoreReturnValue
068  @Override
069  Hasher putBytes(byte[] bytes, int off, int len);
070
071  @CanIgnoreReturnValue
072  @Override
073  Hasher putBytes(ByteBuffer bytes);
074
075  @CanIgnoreReturnValue
076  @Override
077  Hasher putShort(short s);
078
079  @CanIgnoreReturnValue
080  @Override
081  Hasher putInt(int i);
082
083  @CanIgnoreReturnValue
084  @Override
085  Hasher putLong(long l);
086
087  /** Equivalent to {@code putInt(Float.floatToRawIntBits(f))}. */
088  @CanIgnoreReturnValue
089  @Override
090  Hasher putFloat(float f);
091
092  /** Equivalent to {@code putLong(Double.doubleToRawLongBits(d))}. */
093  @CanIgnoreReturnValue
094  @Override
095  Hasher putDouble(double d);
096
097  /** Equivalent to {@code putByte(b ? (byte) 1 : (byte) 0)}. */
098  @CanIgnoreReturnValue
099  @Override
100  Hasher putBoolean(boolean b);
101
102  @CanIgnoreReturnValue
103  @Override
104  Hasher putChar(char c);
105
106  /**
107   * Equivalent to processing each {@code char} value in the {@code CharSequence}, in order. In
108   * other words, no character encoding is performed; the low byte and high byte of each {@code
109   * char} are hashed directly (in that order). The input must not be updated while this method is
110   * in progress.
111   *
112   * <p><b>Warning:</b> This method will produce different output than most other languages do when
113   * running the same hash function on the equivalent input. For cross-language compatibility, use
114   * {@link #putString}, usually with a charset of UTF-8. For other use cases, use {@code
115   * putUnencodedChars}.
116   *
117   * @since 15.0 (since 11.0 as putString(CharSequence)).
118   */
119  @CanIgnoreReturnValue
120  @Override
121  Hasher putUnencodedChars(CharSequence charSequence);
122
123  /**
124   * Equivalent to {@code putBytes(charSequence.toString().getBytes(charset))}.
125   *
126   * <p><b>Warning:</b> This method, which reencodes the input before hashing it, is useful only for
127   * cross-language compatibility. For other use cases, prefer {@link #putUnencodedChars}, which is
128   * faster, produces the same output across Java releases, and hashes every {@code char} in the
129   * input, even if some are invalid.
130   */
131  @CanIgnoreReturnValue
132  @Override
133  Hasher putString(CharSequence charSequence, Charset charset);
134
135  /** A simple convenience for {@code funnel.funnel(object, this)}. */
136  @CanIgnoreReturnValue
137  <T extends @Nullable Object> Hasher putObject(
138      @ParametricNullness T instance, Funnel<? super T> funnel);
139
140  /**
141   * Computes a hash code based on the data that have been provided to this hasher. The result is
142   * unspecified if this method is called more than once on the same instance.
143   */
144  HashCode hash();
145
146  /**
147   * {@inheritDoc}
148   *
149   * @deprecated This returns {@link Object#hashCode()}; you almost certainly mean to call {@code
150   *     hash().asInt()}.
151   */
152  @Override
153  @Deprecated
154  int hashCode();
155}