001/*
002 * Copyright (C) 2009 The Guava Authors
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
005 * in compliance with the License. You may obtain a copy of the License at
006 *
007 * http://www.apache.org/licenses/LICENSE-2.0
008 *
009 * Unless required by applicable law or agreed to in writing, software distributed under the License
010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
011 * or implied. See the License for the specific language governing permissions and limitations under
012 * the License.
013 */
014
015package com.google.common.escape;
016
017import static com.google.common.base.Preconditions.checkNotNull;
018
019import com.google.common.annotations.GwtCompatible;
020import com.google.errorprone.annotations.CanIgnoreReturnValue;
021import java.util.HashMap;
022import java.util.Map;
023import javax.annotation.CheckForNull;
024import org.checkerframework.checker.nullness.qual.Nullable;
025
026/**
027 * Static utility methods pertaining to {@link Escaper} instances.
028 *
029 * @author Sven Mawson
030 * @author David Beaumont
031 * @since 15.0
032 */
033@GwtCompatible
034@ElementTypesAreNonnullByDefault
035public final class Escapers {
036  private Escapers() {}
037
038  /**
039   * Returns an {@link Escaper} that does no escaping, passing all character data through unchanged.
040   */
041  public static Escaper nullEscaper() {
042    return NULL_ESCAPER;
043  }
044
045  // An Escaper that efficiently performs no escaping.
046  // Extending CharEscaper (instead of Escaper) makes Escapers.compose() easier.
047  private static final Escaper NULL_ESCAPER =
048      new CharEscaper() {
049        @Override
050        public String escape(String string) {
051          return checkNotNull(string);
052        }
053
054        @Override
055        @CheckForNull
056        protected char[] escape(char c) {
057          // TODO: Fix tests not to call this directly and make it throw an error.
058          return null;
059        }
060      };
061
062  /**
063   * Returns a builder for creating simple, fast escapers. A builder instance can be reused and each
064   * escaper that is created will be a snapshot of the current builder state. Builders are not
065   * thread safe.
066   *
067   * <p>The initial state of the builder is such that:
068   *
069   * <ul>
070   *   <li>There are no replacement mappings
071   *   <li>{@code safeMin == Character.MIN_VALUE}
072   *   <li>{@code safeMax == Character.MAX_VALUE}
073   *   <li>{@code unsafeReplacement == null}
074   * </ul>
075   *
076   * <p>For performance reasons escapers created by this builder are not Unicode aware and will not
077   * validate the well-formedness of their input.
078   */
079  public static Builder builder() {
080    return new Builder();
081  }
082
083  /**
084   * A builder for simple, fast escapers.
085   *
086   * <p>Typically an escaper needs to deal with the escaping of high valued characters or code
087   * points. In these cases it is necessary to extend either {@link ArrayBasedCharEscaper} or {@link
088   * ArrayBasedUnicodeEscaper} to provide the desired behavior. However this builder is suitable for
089   * creating escapers that replace a relative small set of characters.
090   *
091   * @author David Beaumont
092   * @since 15.0
093   */
094  public static final class Builder {
095    private final Map<Character, String> replacementMap = new HashMap<>();
096    private char safeMin = Character.MIN_VALUE;
097    private char safeMax = Character.MAX_VALUE;
098    @CheckForNull private String unsafeReplacement = null;
099
100    // The constructor is exposed via the builder() method above.
101    private Builder() {}
102
103    /**
104     * Sets the safe range of characters for the escaper. Characters in this range that have no
105     * explicit replacement are considered 'safe' and remain unescaped in the output. If {@code
106     * safeMax < safeMin} then the safe range is empty.
107     *
108     * @param safeMin the lowest 'safe' character
109     * @param safeMax the highest 'safe' character
110     * @return the builder instance
111     */
112    @CanIgnoreReturnValue
113    public Builder setSafeRange(char safeMin, char safeMax) {
114      this.safeMin = safeMin;
115      this.safeMax = safeMax;
116      return this;
117    }
118
119    /**
120     * Sets the replacement string for any characters outside the 'safe' range that have no explicit
121     * replacement. If {@code unsafeReplacement} is {@code null} then no replacement will occur, if
122     * it is {@code ""} then the unsafe characters are removed from the output.
123     *
124     * @param unsafeReplacement the string to replace unsafe characters
125     * @return the builder instance
126     */
127    @CanIgnoreReturnValue
128    public Builder setUnsafeReplacement(@Nullable String unsafeReplacement) {
129      this.unsafeReplacement = unsafeReplacement;
130      return this;
131    }
132
133    /**
134     * Adds a replacement string for the given input character. The specified character will be
135     * replaced by the given string whenever it occurs in the input, irrespective of whether it lies
136     * inside or outside the 'safe' range.
137     *
138     * @param c the character to be replaced
139     * @param replacement the string to replace the given character
140     * @return the builder instance
141     * @throws NullPointerException if {@code replacement} is null
142     */
143    @CanIgnoreReturnValue
144    public Builder addEscape(char c, String replacement) {
145      checkNotNull(replacement);
146      // This can replace an existing character (the builder is re-usable).
147      replacementMap.put(c, replacement);
148      return this;
149    }
150
151    /** Returns a new escaper based on the current state of the builder. */
152    public Escaper build() {
153      return new ArrayBasedCharEscaper(replacementMap, safeMin, safeMax) {
154        @CheckForNull
155        private final char[] replacementChars =
156            unsafeReplacement != null ? unsafeReplacement.toCharArray() : null;
157
158        @Override
159        @CheckForNull
160        protected char[] escapeUnsafe(char c) {
161          return replacementChars;
162        }
163      };
164    }
165  }
166
167  /**
168   * Returns a string that would replace the given character in the specified escaper, or {@code
169   * null} if no replacement should be made. This method is intended for use in tests through the
170   * {@code EscaperAsserts} class; production users of {@link CharEscaper} should limit themselves
171   * to its public interface.
172   *
173   * @param c the character to escape if necessary
174   * @return the replacement string, or {@code null} if no escaping was needed
175   */
176  @CheckForNull
177  public static String computeReplacement(CharEscaper escaper, char c) {
178    return stringOrNull(escaper.escape(c));
179  }
180
181  /**
182   * Returns a string that would replace the given character in the specified escaper, or {@code
183   * null} if no replacement should be made. This method is intended for use in tests through the
184   * {@code EscaperAsserts} class; production users of {@link UnicodeEscaper} should limit
185   * themselves to its public interface.
186   *
187   * @param cp the Unicode code point to escape if necessary
188   * @return the replacement string, or {@code null} if no escaping was needed
189   */
190  @CheckForNull
191  public static String computeReplacement(UnicodeEscaper escaper, int cp) {
192    return stringOrNull(escaper.escape(cp));
193  }
194
195  @CheckForNull
196  private static String stringOrNull(@CheckForNull char[] in) {
197    return (in == null) ? null : new String(in);
198  }
199}