001/*
002 * Copyright (C) 2006 The Guava Authors
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
005 * in compliance with the License. You may obtain a copy of the License at
006 *
007 * http://www.apache.org/licenses/LICENSE-2.0
008 *
009 * Unless required by applicable law or agreed to in writing, software distributed under the License
010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
011 * or implied. See the License for the specific language governing permissions and limitations under
012 * the License.
013 */
014
015package com.google.common.escape;
016
017import static com.google.common.base.Preconditions.checkNotNull;
018
019import com.google.common.annotations.GwtCompatible;
020import javax.annotation.CheckForNull;
021
022/**
023 * An object that converts literal text into a format safe for inclusion in a particular context
024 * (such as an XML document). Typically (but not always), the inverse process of "unescaping" the
025 * text is performed automatically by the relevant parser.
026 *
027 * <p>For example, an XML escaper would convert the literal string {@code "Foo<Bar>"} into {@code
028 * "Foo&lt;Bar&gt;"} to prevent {@code "<Bar>"} from being confused with an XML tag. When the
029 * resulting XML document is parsed, the parser API will return this text as the original literal
030 * string {@code "Foo<Bar>"}.
031 *
032 * <p>A {@code CharEscaper} instance is required to be stateless, and safe when used concurrently by
033 * multiple threads.
034 *
035 * <p>Popular escapers are defined as constants in classes like {@link
036 * com.google.common.html.HtmlEscapers} and {@link com.google.common.xml.XmlEscapers}. To create
037 * your own escapers extend this class and implement the {@link #escape(char)} method.
038 *
039 * @author Sven Mawson
040 * @since 15.0
041 */
042@GwtCompatible
043@ElementTypesAreNonnullByDefault
044public abstract class CharEscaper extends Escaper {
045  /** Constructor for use by subclasses. */
046  protected CharEscaper() {}
047
048  /**
049   * Returns the escaped form of a given literal string.
050   *
051   * @param string the literal string to be escaped
052   * @return the escaped form of {@code string}
053   * @throws NullPointerException if {@code string} is null
054   */
055  @Override
056  public String escape(String string) {
057    checkNotNull(string); // GWT specific check (do not optimize)
058    // Inlineable fast-path loop which hands off to escapeSlow() only if needed
059    int length = string.length();
060    for (int index = 0; index < length; index++) {
061      if (escape(string.charAt(index)) != null) {
062        return escapeSlow(string, index);
063      }
064    }
065    return string;
066  }
067
068  /**
069   * Returns the escaped form of the given character, or {@code null} if this character does not
070   * need to be escaped. If an empty array is returned, this effectively strips the input character
071   * from the resulting text.
072   *
073   * <p>If the character does not need to be escaped, this method should return {@code null}, rather
074   * than a one-character array containing the character itself. This enables the escaping algorithm
075   * to perform more efficiently.
076   *
077   * <p>An escaper is expected to be able to deal with any {@code char} value, so this method should
078   * not throw any exceptions.
079   *
080   * @param c the character to escape if necessary
081   * @return the replacement characters, or {@code null} if no escaping was needed
082   */
083  @CheckForNull
084  protected abstract char[] escape(char c);
085
086  /**
087   * Returns the escaped form of a given literal string, starting at the given index. This method is
088   * called by the {@link #escape(String)} method when it discovers that escaping is required. It is
089   * protected to allow subclasses to override the fastpath escaping function to inline their
090   * escaping test. See {@link CharEscaperBuilder} for an example usage.
091   *
092   * @param s the literal string to be escaped
093   * @param index the index to start escaping from
094   * @return the escaped form of {@code string}
095   * @throws NullPointerException if {@code string} is null
096   */
097  protected final String escapeSlow(String s, int index) {
098    int slen = s.length();
099
100    // Get a destination buffer and setup some loop variables.
101    char[] dest = Platform.charBufferFromThreadLocal();
102    int destSize = dest.length;
103    int destIndex = 0;
104    int lastEscape = 0;
105
106    // Loop through the rest of the string, replacing when needed into the
107    // destination buffer, which gets grown as needed as well.
108    for (; index < slen; index++) {
109
110      // Get a replacement for the current character.
111      char[] r = escape(s.charAt(index));
112
113      // If no replacement is needed, just continue.
114      if (r == null) {
115        continue;
116      }
117
118      int rlen = r.length;
119      int charsSkipped = index - lastEscape;
120
121      // This is the size needed to add the replacement, not the full size
122      // needed by the string. We only regrow when we absolutely must, and
123      // when we do grow, grow enough to avoid excessive growing. Grow.
124      int sizeNeeded = destIndex + charsSkipped + rlen;
125      if (destSize < sizeNeeded) {
126        destSize = sizeNeeded + DEST_PAD_MULTIPLIER * (slen - index);
127        dest = growBuffer(dest, destIndex, destSize);
128      }
129
130      // If we have skipped any characters, we need to copy them now.
131      if (charsSkipped > 0) {
132        s.getChars(lastEscape, index, dest, destIndex);
133        destIndex += charsSkipped;
134      }
135
136      // Copy the replacement string into the dest buffer as needed.
137      if (rlen > 0) {
138        System.arraycopy(r, 0, dest, destIndex, rlen);
139        destIndex += rlen;
140      }
141      lastEscape = index + 1;
142    }
143
144    // Copy leftover characters if there are any.
145    int charsLeft = slen - lastEscape;
146    if (charsLeft > 0) {
147      int sizeNeeded = destIndex + charsLeft;
148      if (destSize < sizeNeeded) {
149
150        // Regrow and copy, expensive! No padding as this is the final copy.
151        dest = growBuffer(dest, destIndex, sizeNeeded);
152      }
153      s.getChars(lastEscape, slen, dest, destIndex);
154      destIndex = sizeNeeded;
155    }
156    return new String(dest, 0, destIndex);
157  }
158
159  /**
160   * Helper method to grow the character buffer as needed, this only happens once in a while so it's
161   * ok if it's in a method call. If the index passed in is 0 then no copying will be done.
162   */
163  private static char[] growBuffer(char[] dest, int index, int size) {
164    if (size < 0) { // overflow - should be OutOfMemoryError but GWT/j2cl don't support it
165      throw new AssertionError("Cannot increase internal buffer any further");
166    }
167    char[] copy = new char[size];
168    if (index > 0) {
169      System.arraycopy(dest, 0, copy, 0, index);
170    }
171    return copy;
172  }
173
174  /** The multiplier for padding to use when growing the escape buffer. */
175  private static final int DEST_PAD_MULTIPLIER = 2;
176}