001/* 002 * Copyright (C) 2006 The Guava Authors 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016 017package com.google.common.escape; 018 019import static com.google.common.base.Preconditions.checkNotNull; 020 021import com.google.common.annotations.Beta; 022import com.google.common.annotations.GwtCompatible; 023 024/** 025 * An object that converts literal text into a format safe for inclusion in a particular context 026 * (such as an XML document). Typically (but not always), the inverse process of "unescaping" the 027 * text is performed automatically by the relevant parser. 028 * 029 * <p>For example, an XML escaper would convert the literal string {@code "Foo<Bar>"} into {@code 030 * "Foo<Bar>"} to prevent {@code "<Bar>"} from being confused with an XML tag. When the 031 * resulting XML document is parsed, the parser API will return this text as the original literal 032 * string {@code "Foo<Bar>"}. 033 * 034 * <p>A {@code CharEscaper} instance is required to be stateless, and safe when used concurrently by 035 * multiple threads. 036 * 037 * <p>Several popular escapers are defined as constants in classes like {@link 038 * com.google.common.html.HtmlEscapers}, {@link com.google.common.xml.XmlEscapers}, and {@link 039 * SourceCodeEscapers}. To create your own escapers extend this class and implement the {@link 040 * #escape(char)} method. 041 * 042 * @author Sven Mawson 043 * @since 15.0 044 */ 045@Beta 046@GwtCompatible 047public abstract class CharEscaper extends Escaper { 048 /** Constructor for use by subclasses. */ 049 protected CharEscaper() {} 050 051 /** 052 * Returns the escaped form of a given literal string. 053 * 054 * @param string the literal string to be escaped 055 * @return the escaped form of {@code string} 056 * @throws NullPointerException if {@code string} is null 057 */ 058 @Override public String escape(String string) { 059 checkNotNull(string); // GWT specific check (do not optimize) 060 // Inlineable fast-path loop which hands off to escapeSlow() only if needed 061 int length = string.length(); 062 for (int index = 0; index < length; index++) { 063 if (escape(string.charAt(index)) != null) { 064 return escapeSlow(string, index); 065 } 066 } 067 return string; 068 } 069 070 /** 071 * Returns the escaped form of a given literal string, starting at the given index. This method is 072 * called by the {@link #escape(String)} method when it discovers that escaping is required. It is 073 * protected to allow subclasses to override the fastpath escaping function to inline their 074 * escaping test. See {@link CharEscaperBuilder} for an example usage. 075 * 076 * @param s the literal string to be escaped 077 * @param index the index to start escaping from 078 * @return the escaped form of {@code string} 079 * @throws NullPointerException if {@code string} is null 080 */ 081 protected final String escapeSlow(String s, int index) { 082 int slen = s.length(); 083 084 // Get a destination buffer and setup some loop variables. 085 char[] dest = Platform.charBufferFromThreadLocal(); 086 int destSize = dest.length; 087 int destIndex = 0; 088 int lastEscape = 0; 089 090 // Loop through the rest of the string, replacing when needed into the 091 // destination buffer, which gets grown as needed as well. 092 for (; index < slen; index++) { 093 094 // Get a replacement for the current character. 095 char[] r = escape(s.charAt(index)); 096 097 // If no replacement is needed, just continue. 098 if (r == null) continue; 099 100 int rlen = r.length; 101 int charsSkipped = index - lastEscape; 102 103 // This is the size needed to add the replacement, not the full size 104 // needed by the string. We only regrow when we absolutely must. 105 int sizeNeeded = destIndex + charsSkipped + rlen; 106 if (destSize < sizeNeeded) { 107 destSize = sizeNeeded + (slen - index) + DEST_PAD; 108 dest = growBuffer(dest, destIndex, destSize); 109 } 110 111 // If we have skipped any characters, we need to copy them now. 112 if (charsSkipped > 0) { 113 s.getChars(lastEscape, index, dest, destIndex); 114 destIndex += charsSkipped; 115 } 116 117 // Copy the replacement string into the dest buffer as needed. 118 if (rlen > 0) { 119 System.arraycopy(r, 0, dest, destIndex, rlen); 120 destIndex += rlen; 121 } 122 lastEscape = index + 1; 123 } 124 125 // Copy leftover characters if there are any. 126 int charsLeft = slen - lastEscape; 127 if (charsLeft > 0) { 128 int sizeNeeded = destIndex + charsLeft; 129 if (destSize < sizeNeeded) { 130 131 // Regrow and copy, expensive! No padding as this is the final copy. 132 dest = growBuffer(dest, destIndex, sizeNeeded); 133 } 134 s.getChars(lastEscape, slen, dest, destIndex); 135 destIndex = sizeNeeded; 136 } 137 return new String(dest, 0, destIndex); 138 } 139 140 /** 141 * Returns the escaped form of the given character, or {@code null} if this character does not 142 * need to be escaped. If an empty array is returned, this effectively strips the input character 143 * from the resulting text. 144 * 145 * <p>If the character does not need to be escaped, this method should return {@code null}, rather 146 * than a one-character array containing the character itself. This enables the escaping algorithm 147 * to perform more efficiently. 148 * 149 * <p>An escaper is expected to be able to deal with any {@code char} value, so this method should 150 * not throw any exceptions. 151 * 152 * @param c the character to escape if necessary 153 * @return the replacement characters, or {@code null} if no escaping was needed 154 */ 155 protected abstract char[] escape(char c); 156 157 /** 158 * Helper method to grow the character buffer as needed, this only happens once in a while so it's 159 * ok if it's in a method call. If the index passed in is 0 then no copying will be done. 160 */ 161 private static char[] growBuffer(char[] dest, int index, int size) { 162 char[] copy = new char[size]; 163 if (index > 0) { 164 System.arraycopy(dest, 0, copy, 0, index); 165 } 166 return copy; 167 } 168 169 /** 170 * The amount of padding to use when growing the escape buffer. 171 */ 172 private static final int DEST_PAD = 32; 173}