001/* 002 * Copyright (C) 2006 The Guava Authors 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 005 * in compliance with the License. You may obtain a copy of the License at 006 * 007 * http://www.apache.org/licenses/LICENSE-2.0 008 * 009 * Unless required by applicable law or agreed to in writing, software distributed under the License 010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 011 * or implied. See the License for the specific language governing permissions and limitations under 012 * the License. 013 */ 014 015package com.google.common.escape; 016 017import static com.google.common.base.Preconditions.checkNotNull; 018 019import com.google.common.annotations.GwtCompatible; 020import org.checkerframework.checker.nullness.qual.Nullable; 021 022/** 023 * An object that converts literal text into a format safe for inclusion in a particular context 024 * (such as an XML document). Typically (but not always), the inverse process of "unescaping" the 025 * text is performed automatically by the relevant parser. 026 * 027 * <p>For example, an XML escaper would convert the literal string {@code "Foo<Bar>"} into {@code 028 * "Foo<Bar>"} to prevent {@code "<Bar>"} from being confused with an XML tag. When the 029 * resulting XML document is parsed, the parser API will return this text as the original literal 030 * string {@code "Foo<Bar>"}. 031 * 032 * <p>A {@code CharEscaper} instance is required to be stateless, and safe when used concurrently by 033 * multiple threads. 034 * 035 * <p>Popular escapers are defined as constants in classes like {@link 036 * com.google.common.html.HtmlEscapers} and {@link com.google.common.xml.XmlEscapers}. To create 037 * your own escapers extend this class and implement the {@link #escape(char)} method. 038 * 039 * @author Sven Mawson 040 * @since 15.0 041 */ 042@GwtCompatible 043public abstract class CharEscaper extends Escaper { 044 /** Constructor for use by subclasses. */ 045 protected CharEscaper() {} 046 047 /** 048 * Returns the escaped form of a given literal string. 049 * 050 * @param string the literal string to be escaped 051 * @return the escaped form of {@code string} 052 * @throws NullPointerException if {@code string} is null 053 */ 054 @Override 055 public String escape(String string) { 056 checkNotNull(string); // GWT specific check (do not optimize) 057 // Inlineable fast-path loop which hands off to escapeSlow() only if needed 058 int length = string.length(); 059 for (int index = 0; index < length; index++) { 060 if (escape(string.charAt(index)) != null) { 061 return escapeSlow(string, index); 062 } 063 } 064 return string; 065 } 066 067 /** 068 * Returns the escaped form of the given character, or {@code null} if this character does not 069 * need to be escaped. If an empty array is returned, this effectively strips the input character 070 * from the resulting text. 071 * 072 * <p>If the character does not need to be escaped, this method should return {@code null}, rather 073 * than a one-character array containing the character itself. This enables the escaping algorithm 074 * to perform more efficiently. 075 * 076 * <p>An escaper is expected to be able to deal with any {@code char} value, so this method should 077 * not throw any exceptions. 078 * 079 * @param c the character to escape if necessary 080 * @return the replacement characters, or {@code null} if no escaping was needed 081 */ 082 protected abstract char @Nullable [] escape(char c); 083 084 /** 085 * Returns the escaped form of a given literal string, starting at the given index. This method is 086 * called by the {@link #escape(String)} method when it discovers that escaping is required. It is 087 * protected to allow subclasses to override the fastpath escaping function to inline their 088 * escaping test. See {@link CharEscaperBuilder} for an example usage. 089 * 090 * @param s the literal string to be escaped 091 * @param index the index to start escaping from 092 * @return the escaped form of {@code string} 093 * @throws NullPointerException if {@code string} is null 094 */ 095 protected final String escapeSlow(String s, int index) { 096 int slen = s.length(); 097 098 // Get a destination buffer and setup some loop variables. 099 char[] dest = Platform.charBufferFromThreadLocal(); 100 int destSize = dest.length; 101 int destIndex = 0; 102 int lastEscape = 0; 103 104 // Loop through the rest of the string, replacing when needed into the 105 // destination buffer, which gets grown as needed as well. 106 for (; index < slen; index++) { 107 108 // Get a replacement for the current character. 109 char[] r = escape(s.charAt(index)); 110 111 // If no replacement is needed, just continue. 112 if (r == null) { 113 continue; 114 } 115 116 int rlen = r.length; 117 int charsSkipped = index - lastEscape; 118 119 // This is the size needed to add the replacement, not the full size 120 // needed by the string. We only regrow when we absolutely must, and 121 // when we do grow, grow enough to avoid excessive growing. Grow. 122 int sizeNeeded = destIndex + charsSkipped + rlen; 123 if (destSize < sizeNeeded) { 124 destSize = sizeNeeded + DEST_PAD_MULTIPLIER * (slen - index); 125 dest = growBuffer(dest, destIndex, destSize); 126 } 127 128 // If we have skipped any characters, we need to copy them now. 129 if (charsSkipped > 0) { 130 s.getChars(lastEscape, index, dest, destIndex); 131 destIndex += charsSkipped; 132 } 133 134 // Copy the replacement string into the dest buffer as needed. 135 if (rlen > 0) { 136 System.arraycopy(r, 0, dest, destIndex, rlen); 137 destIndex += rlen; 138 } 139 lastEscape = index + 1; 140 } 141 142 // Copy leftover characters if there are any. 143 int charsLeft = slen - lastEscape; 144 if (charsLeft > 0) { 145 int sizeNeeded = destIndex + charsLeft; 146 if (destSize < sizeNeeded) { 147 148 // Regrow and copy, expensive! No padding as this is the final copy. 149 dest = growBuffer(dest, destIndex, sizeNeeded); 150 } 151 s.getChars(lastEscape, slen, dest, destIndex); 152 destIndex = sizeNeeded; 153 } 154 return new String(dest, 0, destIndex); 155 } 156 157 /** 158 * Helper method to grow the character buffer as needed, this only happens once in a while so it's 159 * ok if it's in a method call. If the index passed in is 0 then no copying will be done. 160 */ 161 private static char[] growBuffer(char[] dest, int index, int size) { 162 if (size < 0) { // overflow - should be OutOfMemoryError but GWT/j2cl don't support it 163 throw new AssertionError("Cannot increase internal buffer any further"); 164 } 165 char[] copy = new char[size]; 166 if (index > 0) { 167 System.arraycopy(dest, 0, copy, 0, index); 168 } 169 return copy; 170 } 171 172 /** The multiplier for padding to use when growing the escape buffer. */ 173 private static final int DEST_PAD_MULTIPLIER = 2; 174}