001/*
002 * Copyright (C) 2006 The Guava Authors
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 * http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016
017package com.google.common.escape;
018
019import static com.google.common.base.Preconditions.checkNotNull;
020
021import com.google.common.annotations.Beta;
022import com.google.common.annotations.GwtCompatible;
023
024/**
025 * An object that converts literal text into a format safe for inclusion in a particular context
026 * (such as an XML document). Typically (but not always), the inverse process of "unescaping" the
027 * text is performed automatically by the relevant parser.
028 *
029 * <p>For example, an XML escaper would convert the literal string {@code "Foo<Bar>"} into {@code
030 * "Foo&lt;Bar&gt;"} to prevent {@code "<Bar>"} from being confused with an XML tag. When the
031 * resulting XML document is parsed, the parser API will return this text as the original literal
032 * string {@code "Foo<Bar>"}.
033 *
034 * <p>A {@code CharEscaper} instance is required to be stateless, and safe when used concurrently by
035 * multiple threads.
036 *
037 * <p>Several popular escapers are defined as constants in classes like {@link
038 * com.google.common.html.HtmlEscapers}, {@link com.google.common.xml.XmlEscapers}, and {@link
039 * SourceCodeEscapers}. To create your own escapers extend this class and implement the {@link
040 * #escape(char)} method.
041 *
042 * @author Sven Mawson
043 * @since 15.0
044 */
045@Beta
046@GwtCompatible
047public abstract class CharEscaper extends Escaper {
048  /** Constructor for use by subclasses. */
049  protected CharEscaper() {}
050
051  /**
052   * Returns the escaped form of a given literal string.
053   *
054   * @param string the literal string to be escaped
055   * @return the escaped form of {@code string}
056   * @throws NullPointerException if {@code string} is null
057   */
058  @Override public String escape(String string) {
059    checkNotNull(string);  // GWT specific check (do not optimize)
060    // Inlineable fast-path loop which hands off to escapeSlow() only if needed
061    int length = string.length();
062    for (int index = 0; index < length; index++) {
063      if (escape(string.charAt(index)) != null) {
064        return escapeSlow(string, index);
065      }
066    }
067    return string;
068  }
069
070  /**
071   * Returns the escaped form of a given literal string, starting at the given index. This method is
072   * called by the {@link #escape(String)} method when it discovers that escaping is required. It is
073   * protected to allow subclasses to override the fastpath escaping function to inline their
074   * escaping test. See {@link CharEscaperBuilder} for an example usage.
075   *
076   * @param s the literal string to be escaped
077   * @param index the index to start escaping from
078   * @return the escaped form of {@code string}
079   * @throws NullPointerException if {@code string} is null
080   */
081  protected final String escapeSlow(String s, int index) {
082    int slen = s.length();
083
084    // Get a destination buffer and setup some loop variables.
085    char[] dest = Platform.charBufferFromThreadLocal();
086    int destSize = dest.length;
087    int destIndex = 0;
088    int lastEscape = 0;
089
090    // Loop through the rest of the string, replacing when needed into the
091    // destination buffer, which gets grown as needed as well.
092    for (; index < slen; index++) {
093
094      // Get a replacement for the current character.
095      char[] r = escape(s.charAt(index));
096
097      // If no replacement is needed, just continue.
098      if (r == null) continue;
099
100      int rlen = r.length;
101      int charsSkipped = index - lastEscape;
102
103      // This is the size needed to add the replacement, not the full size
104      // needed by the string. We only regrow when we absolutely must.
105      int sizeNeeded = destIndex + charsSkipped + rlen;
106      if (destSize < sizeNeeded) {
107        destSize = sizeNeeded + (slen - index) + DEST_PAD;
108        dest = growBuffer(dest, destIndex, destSize);
109      }
110
111      // If we have skipped any characters, we need to copy them now.
112      if (charsSkipped > 0) {
113        s.getChars(lastEscape, index, dest, destIndex);
114        destIndex += charsSkipped;
115      }
116
117      // Copy the replacement string into the dest buffer as needed.
118      if (rlen > 0) {
119        System.arraycopy(r, 0, dest, destIndex, rlen);
120        destIndex += rlen;
121      }
122      lastEscape = index + 1;
123    }
124
125    // Copy leftover characters if there are any.
126    int charsLeft = slen - lastEscape;
127    if (charsLeft > 0) {
128      int sizeNeeded = destIndex + charsLeft;
129      if (destSize < sizeNeeded) {
130
131        // Regrow and copy, expensive! No padding as this is the final copy.
132        dest = growBuffer(dest, destIndex, sizeNeeded);
133      }
134      s.getChars(lastEscape, slen, dest, destIndex);
135      destIndex = sizeNeeded;
136    }
137    return new String(dest, 0, destIndex);
138  }
139
140  /**
141   * Returns the escaped form of the given character, or {@code null} if this character does not
142   * need to be escaped. If an empty array is returned, this effectively strips the input character
143   * from the resulting text.
144   *
145   * <p>If the character does not need to be escaped, this method should return {@code null}, rather
146   * than a one-character array containing the character itself. This enables the escaping algorithm
147   * to perform more efficiently.
148   *
149   * <p>An escaper is expected to be able to deal with any {@code char} value, so this method should
150   * not throw any exceptions.
151   *
152   * @param c the character to escape if necessary
153   * @return the replacement characters, or {@code null} if no escaping was needed
154   */
155  protected abstract char[] escape(char c);
156
157  /**
158   * Helper method to grow the character buffer as needed, this only happens once in a while so it's
159   * ok if it's in a method call. If the index passed in is 0 then no copying will be done.
160   */
161  private static char[] growBuffer(char[] dest, int index, int size) {
162    char[] copy = new char[size];
163    if (index > 0) {
164      System.arraycopy(dest, 0, copy, 0, index);
165    }
166    return copy;
167  }
168
169  /**
170   * The amount of padding to use when growing the escape buffer.
171   */
172  private static final int DEST_PAD = 32;
173}