001/*
002 * Copyright (C) 2009 The Guava Authors
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
005 * in compliance with the License. You may obtain a copy of the License at
006 *
007 * http://www.apache.org/licenses/LICENSE-2.0
008 *
009 * Unless required by applicable law or agreed to in writing, software distributed under the License
010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
011 * or implied. See the License for the specific language governing permissions and limitations under
012 * the License.
013 */
014
015package com.google.common.escape;
016
017import static com.google.common.base.Preconditions.checkNotNull;
018
019import com.google.common.annotations.GwtCompatible;
020import com.google.errorprone.annotations.CanIgnoreReturnValue;
021import java.util.HashMap;
022import java.util.Map;
023import javax.annotation.CheckForNull;
024import org.checkerframework.checker.nullness.qual.Nullable;
025
026/**
027 * Static utility methods pertaining to {@link Escaper} instances.
028 *
029 * @author Sven Mawson
030 * @author David Beaumont
031 * @since 15.0
032 */
033@GwtCompatible
034public final class Escapers {
035  private Escapers() {}
036
037  /**
038   * Returns an {@link Escaper} that does no escaping, passing all character data through unchanged.
039   */
040  public static Escaper nullEscaper() {
041    return NULL_ESCAPER;
042  }
043
044  // An Escaper that efficiently performs no escaping.
045  // Extending CharEscaper (instead of Escaper) makes Escapers.compose() easier.
046  private static final Escaper NULL_ESCAPER =
047      new CharEscaper() {
048        @Override
049        public String escape(String string) {
050          return checkNotNull(string);
051        }
052
053        @Override
054        @CheckForNull
055        protected char[] escape(char c) {
056          // TODO: Fix tests not to call this directly and make it throw an error.
057          return null;
058        }
059      };
060
061  /**
062   * Returns a builder for creating simple, fast escapers. A builder instance can be reused and each
063   * escaper that is created will be a snapshot of the current builder state. Builders are not
064   * thread safe.
065   *
066   * <p>The initial state of the builder is such that:
067   *
068   * <ul>
069   *   <li>There are no replacement mappings
070   *   <li>{@code safeMin == Character.MIN_VALUE}
071   *   <li>{@code safeMax == Character.MAX_VALUE}
072   *   <li>{@code unsafeReplacement == null}
073   * </ul>
074   *
075   * <p>For performance reasons escapers created by this builder are not Unicode aware and will not
076   * validate the well-formedness of their input.
077   */
078  public static Builder builder() {
079    return new Builder();
080  }
081
082  /**
083   * A builder for simple, fast escapers.
084   *
085   * <p>Typically an escaper needs to deal with the escaping of high valued characters or code
086   * points. In these cases it is necessary to extend either {@link ArrayBasedCharEscaper} or {@link
087   * ArrayBasedUnicodeEscaper} to provide the desired behavior. However this builder is suitable for
088   * creating escapers that replace a relative small set of characters.
089   *
090   * @author David Beaumont
091   * @since 15.0
092   */
093  public static final class Builder {
094    private final Map<Character, String> replacementMap = new HashMap<>();
095    private char safeMin = Character.MIN_VALUE;
096    private char safeMax = Character.MAX_VALUE;
097    @CheckForNull private String unsafeReplacement = null;
098
099    // The constructor is exposed via the builder() method above.
100    private Builder() {}
101
102    /**
103     * Sets the safe range of characters for the escaper. Characters in this range that have no
104     * explicit replacement are considered 'safe' and remain unescaped in the output. If {@code
105     * safeMax < safeMin} then the safe range is empty.
106     *
107     * @param safeMin the lowest 'safe' character
108     * @param safeMax the highest 'safe' character
109     * @return the builder instance
110     */
111    @CanIgnoreReturnValue
112    public Builder setSafeRange(char safeMin, char safeMax) {
113      this.safeMin = safeMin;
114      this.safeMax = safeMax;
115      return this;
116    }
117
118    /**
119     * Sets the replacement string for any characters outside the 'safe' range that have no explicit
120     * replacement. If {@code unsafeReplacement} is {@code null} then no replacement will occur, if
121     * it is {@code ""} then the unsafe characters are removed from the output.
122     *
123     * @param unsafeReplacement the string to replace unsafe characters
124     * @return the builder instance
125     */
126    @CanIgnoreReturnValue
127    public Builder setUnsafeReplacement(@Nullable String unsafeReplacement) {
128      this.unsafeReplacement = unsafeReplacement;
129      return this;
130    }
131
132    /**
133     * Adds a replacement string for the given input character. The specified character will be
134     * replaced by the given string whenever it occurs in the input, irrespective of whether it lies
135     * inside or outside the 'safe' range.
136     *
137     * @param c the character to be replaced
138     * @param replacement the string to replace the given character
139     * @return the builder instance
140     * @throws NullPointerException if {@code replacement} is null
141     */
142    @CanIgnoreReturnValue
143    public Builder addEscape(char c, String replacement) {
144      checkNotNull(replacement);
145      // This can replace an existing character (the builder is re-usable).
146      replacementMap.put(c, replacement);
147      return this;
148    }
149
150    /** Returns a new escaper based on the current state of the builder. */
151    public Escaper build() {
152      return new ArrayBasedCharEscaper(replacementMap, safeMin, safeMax) {
153        @CheckForNull
154        private final char[] replacementChars =
155            unsafeReplacement != null ? unsafeReplacement.toCharArray() : null;
156
157        @Override
158        @CheckForNull
159        protected char[] escapeUnsafe(char c) {
160          return replacementChars;
161        }
162      };
163    }
164  }
165
166  /**
167   * Returns a string that would replace the given character in the specified escaper, or {@code
168   * null} if no replacement should be made. This method is intended for use in tests through the
169   * {@code EscaperAsserts} class; production users of {@link CharEscaper} should limit themselves
170   * to its public interface.
171   *
172   * @param c the character to escape if necessary
173   * @return the replacement string, or {@code null} if no escaping was needed
174   */
175  @CheckForNull
176  public static String computeReplacement(CharEscaper escaper, char c) {
177    return stringOrNull(escaper.escape(c));
178  }
179
180  /**
181   * Returns a string that would replace the given character in the specified escaper, or {@code
182   * null} if no replacement should be made. This method is intended for use in tests through the
183   * {@code EscaperAsserts} class; production users of {@link UnicodeEscaper} should limit
184   * themselves to its public interface.
185   *
186   * @param cp the Unicode code point to escape if necessary
187   * @return the replacement string, or {@code null} if no escaping was needed
188   */
189  @CheckForNull
190  public static String computeReplacement(UnicodeEscaper escaper, int cp) {
191    return stringOrNull(escaper.escape(cp));
192  }
193
194  @CheckForNull
195  private static String stringOrNull(@CheckForNull char[] in) {
196    return (in == null) ? null : new String(in);
197  }
198}