001/*
002 * Copyright (C) 2009 The Guava Authors
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
005 * in compliance with the License. You may obtain a copy of the License at
006 *
007 * http://www.apache.org/licenses/LICENSE-2.0
008 *
009 * Unless required by applicable law or agreed to in writing, software distributed under the License
010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
011 * or implied. See the License for the specific language governing permissions and limitations under
012 * the License.
013 */
014
015package com.google.common.escape;
016
017import static com.google.common.base.Preconditions.checkNotNull;
018
019import com.google.common.annotations.GwtCompatible;
020import com.google.errorprone.annotations.CanIgnoreReturnValue;
021import java.util.HashMap;
022import java.util.Map;
023import org.jspecify.annotations.Nullable;
024
025/**
026 * Static utility methods pertaining to {@link Escaper} instances.
027 *
028 * @author Sven Mawson
029 * @author David Beaumont
030 * @since 15.0
031 */
032@GwtCompatible
033public final class Escapers {
034  private Escapers() {}
035
036  /**
037   * Returns an {@link Escaper} that does no escaping, passing all character data through unchanged.
038   */
039  public static Escaper nullEscaper() {
040    return NULL_ESCAPER;
041  }
042
043  // An Escaper that efficiently performs no escaping.
044  // Extending CharEscaper (instead of Escaper) makes Escapers.compose() easier.
045  private static final Escaper NULL_ESCAPER =
046      new CharEscaper() {
047        @Override
048        public String escape(String string) {
049          return checkNotNull(string);
050        }
051
052        @Override
053        protected char @Nullable [] escape(char c) {
054          // TODO: Fix tests not to call this directly and make it throw an error.
055          return null;
056        }
057      };
058
059  /**
060   * Returns a builder for creating simple, fast escapers. A builder instance can be reused and each
061   * escaper that is created will be a snapshot of the current builder state. Builders are not
062   * thread safe.
063   *
064   * <p>The initial state of the builder is such that:
065   *
066   * <ul>
067   *   <li>There are no replacement mappings
068   *   <li>{@code safeMin == Character.MIN_VALUE}
069   *   <li>{@code safeMax == Character.MAX_VALUE}
070   *   <li>{@code unsafeReplacement == null}
071   * </ul>
072   *
073   * <p>For performance reasons escapers created by this builder are not Unicode aware and will not
074   * validate the well-formedness of their input.
075   */
076  public static Builder builder() {
077    return new Builder();
078  }
079
080  /**
081   * A builder for simple, fast escapers.
082   *
083   * <p>Typically an escaper needs to deal with the escaping of high valued characters or code
084   * points. In these cases it is necessary to extend either {@link ArrayBasedCharEscaper} or {@link
085   * ArrayBasedUnicodeEscaper} to provide the desired behavior. However this builder is suitable for
086   * creating escapers that replace a relative small set of characters.
087   *
088   * @author David Beaumont
089   * @since 15.0
090   */
091  public static final class Builder {
092    private final Map<Character, String> replacementMap = new HashMap<>();
093    private char safeMin = Character.MIN_VALUE;
094    private char safeMax = Character.MAX_VALUE;
095    private @Nullable String unsafeReplacement = null;
096
097    // The constructor is exposed via the builder() method above.
098    private Builder() {}
099
100    /**
101     * Sets the safe range of characters for the escaper. Characters in this range that have no
102     * explicit replacement are considered 'safe' and remain unescaped in the output. If {@code
103     * safeMax < safeMin} then the safe range is empty.
104     *
105     * @param safeMin the lowest 'safe' character
106     * @param safeMax the highest 'safe' character
107     * @return the builder instance
108     */
109    @CanIgnoreReturnValue
110    public Builder setSafeRange(char safeMin, char safeMax) {
111      this.safeMin = safeMin;
112      this.safeMax = safeMax;
113      return this;
114    }
115
116    /**
117     * Sets the replacement string for any characters outside the 'safe' range that have no explicit
118     * replacement. If {@code unsafeReplacement} is {@code null} then no replacement will occur, if
119     * it is {@code ""} then the unsafe characters are removed from the output.
120     *
121     * @param unsafeReplacement the string to replace unsafe characters
122     * @return the builder instance
123     */
124    @CanIgnoreReturnValue
125    public Builder setUnsafeReplacement(@Nullable String unsafeReplacement) {
126      this.unsafeReplacement = unsafeReplacement;
127      return this;
128    }
129
130    /**
131     * Adds a replacement string for the given input character. The specified character will be
132     * replaced by the given string whenever it occurs in the input, irrespective of whether it lies
133     * inside or outside the 'safe' range.
134     *
135     * @param c the character to be replaced
136     * @param replacement the string to replace the given character
137     * @return the builder instance
138     * @throws NullPointerException if {@code replacement} is null
139     */
140    @CanIgnoreReturnValue
141    public Builder addEscape(char c, String replacement) {
142      checkNotNull(replacement);
143      // This can replace an existing character (the builder is re-usable).
144      replacementMap.put(c, replacement);
145      return this;
146    }
147
148    /** Returns a new escaper based on the current state of the builder. */
149    public Escaper build() {
150      return new ArrayBasedCharEscaper(replacementMap, safeMin, safeMax) {
151        private final char @Nullable [] replacementChars =
152            unsafeReplacement != null ? unsafeReplacement.toCharArray() : null;
153
154        @Override
155        protected char @Nullable [] escapeUnsafe(char c) {
156          return replacementChars;
157        }
158      };
159    }
160  }
161
162  /**
163   * Returns a string that would replace the given character in the specified escaper, or {@code
164   * null} if no replacement should be made. This method is intended for use in tests through the
165   * {@code EscaperAsserts} class; production users of {@link CharEscaper} should limit themselves
166   * to its public interface.
167   *
168   * @param c the character to escape if necessary
169   * @return the replacement string, or {@code null} if no escaping was needed
170   */
171  public static @Nullable String computeReplacement(CharEscaper escaper, char c) {
172    return stringOrNull(escaper.escape(c));
173  }
174
175  /**
176   * Returns a string that would replace the given character in the specified escaper, or {@code
177   * null} if no replacement should be made. This method is intended for use in tests through the
178   * {@code EscaperAsserts} class; production users of {@link UnicodeEscaper} should limit
179   * themselves to its public interface.
180   *
181   * @param cp the Unicode code point to escape if necessary
182   * @return the replacement string, or {@code null} if no escaping was needed
183   */
184  public static @Nullable String computeReplacement(UnicodeEscaper escaper, int cp) {
185    return stringOrNull(escaper.escape(cp));
186  }
187
188  private static @Nullable String stringOrNull(char @Nullable [] in) {
189    return (in == null) ? null : new String(in);
190  }
191}