001/* 002 * Copyright (C) 2009 The Guava Authors 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 005 * in compliance with the License. You may obtain a copy of the License at 006 * 007 * http://www.apache.org/licenses/LICENSE-2.0 008 * 009 * Unless required by applicable law or agreed to in writing, software distributed under the License 010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 011 * or implied. See the License for the specific language governing permissions and limitations under 012 * the License. 013 */ 014 015package com.google.common.escape; 016 017import static com.google.common.base.Preconditions.checkNotNull; 018 019import com.google.common.annotations.GwtCompatible; 020import com.google.errorprone.annotations.CanIgnoreReturnValue; 021import java.util.HashMap; 022import java.util.Map; 023import org.jspecify.annotations.Nullable; 024 025/** 026 * Static utility methods pertaining to {@link Escaper} instances. 027 * 028 * @author Sven Mawson 029 * @author David Beaumont 030 * @since 15.0 031 */ 032@GwtCompatible 033public final class Escapers { 034 private Escapers() {} 035 036 /** 037 * Returns an {@link Escaper} that does no escaping, passing all character data through unchanged. 038 */ 039 public static Escaper nullEscaper() { 040 return NULL_ESCAPER; 041 } 042 043 // An Escaper that efficiently performs no escaping. 044 // Extending CharEscaper (instead of Escaper) makes Escapers.compose() easier. 045 private static final Escaper NULL_ESCAPER = 046 new CharEscaper() { 047 @Override 048 public String escape(String string) { 049 return checkNotNull(string); 050 } 051 052 @Override 053 protected char @Nullable [] escape(char c) { 054 // TODO: Fix tests not to call this directly and make it throw an error. 055 return null; 056 } 057 }; 058 059 /** 060 * Returns a builder for creating simple, fast escapers. A builder instance can be reused and each 061 * escaper that is created will be a snapshot of the current builder state. Builders are not 062 * thread safe. 063 * 064 * <p>The initial state of the builder is such that: 065 * 066 * <ul> 067 * <li>There are no replacement mappings 068 * <li>{@code safeMin == Character.MIN_VALUE} 069 * <li>{@code safeMax == Character.MAX_VALUE} 070 * <li>{@code unsafeReplacement == null} 071 * </ul> 072 * 073 * <p>For performance reasons escapers created by this builder are not Unicode aware and will not 074 * validate the well-formedness of their input. 075 */ 076 public static Builder builder() { 077 return new Builder(); 078 } 079 080 /** 081 * A builder for simple, fast escapers. 082 * 083 * <p>Typically an escaper needs to deal with the escaping of high valued characters or code 084 * points. In these cases it is necessary to extend either {@link ArrayBasedCharEscaper} or {@link 085 * ArrayBasedUnicodeEscaper} to provide the desired behavior. However this builder is suitable for 086 * creating escapers that replace a relative small set of characters. 087 * 088 * @author David Beaumont 089 * @since 15.0 090 */ 091 public static final class Builder { 092 private final Map<Character, String> replacementMap = new HashMap<>(); 093 private char safeMin = Character.MIN_VALUE; 094 private char safeMax = Character.MAX_VALUE; 095 private @Nullable String unsafeReplacement = null; 096 097 // The constructor is exposed via the builder() method above. 098 private Builder() {} 099 100 /** 101 * Sets the safe range of characters for the escaper. Characters in this range that have no 102 * explicit replacement are considered 'safe' and remain unescaped in the output. If {@code 103 * safeMax < safeMin} then the safe range is empty. 104 * 105 * @param safeMin the lowest 'safe' character 106 * @param safeMax the highest 'safe' character 107 * @return the builder instance 108 */ 109 @CanIgnoreReturnValue 110 public Builder setSafeRange(char safeMin, char safeMax) { 111 this.safeMin = safeMin; 112 this.safeMax = safeMax; 113 return this; 114 } 115 116 /** 117 * Sets the replacement string for any characters outside the 'safe' range that have no explicit 118 * replacement. If {@code unsafeReplacement} is {@code null} then no replacement will occur, if 119 * it is {@code ""} then the unsafe characters are removed from the output. 120 * 121 * @param unsafeReplacement the string to replace unsafe characters 122 * @return the builder instance 123 */ 124 @CanIgnoreReturnValue 125 public Builder setUnsafeReplacement(@Nullable String unsafeReplacement) { 126 this.unsafeReplacement = unsafeReplacement; 127 return this; 128 } 129 130 /** 131 * Adds a replacement string for the given input character. The specified character will be 132 * replaced by the given string whenever it occurs in the input, irrespective of whether it lies 133 * inside or outside the 'safe' range. 134 * 135 * @param c the character to be replaced 136 * @param replacement the string to replace the given character 137 * @return the builder instance 138 * @throws NullPointerException if {@code replacement} is null 139 */ 140 @CanIgnoreReturnValue 141 public Builder addEscape(char c, String replacement) { 142 checkNotNull(replacement); 143 // This can replace an existing character (the builder is re-usable). 144 replacementMap.put(c, replacement); 145 return this; 146 } 147 148 /** Returns a new escaper based on the current state of the builder. */ 149 public Escaper build() { 150 return new ArrayBasedCharEscaper(replacementMap, safeMin, safeMax) { 151 private final char @Nullable [] replacementChars = 152 unsafeReplacement != null ? unsafeReplacement.toCharArray() : null; 153 154 @Override 155 protected char @Nullable [] escapeUnsafe(char c) { 156 return replacementChars; 157 } 158 }; 159 } 160 } 161 162 /** 163 * Returns a string that would replace the given character in the specified escaper, or {@code 164 * null} if no replacement should be made. This method is intended for use in tests through the 165 * {@code EscaperAsserts} class; production users of {@link CharEscaper} should limit themselves 166 * to its public interface. 167 * 168 * @param c the character to escape if necessary 169 * @return the replacement string, or {@code null} if no escaping was needed 170 */ 171 public static @Nullable String computeReplacement(CharEscaper escaper, char c) { 172 return stringOrNull(escaper.escape(c)); 173 } 174 175 /** 176 * Returns a string that would replace the given character in the specified escaper, or {@code 177 * null} if no replacement should be made. This method is intended for use in tests through the 178 * {@code EscaperAsserts} class; production users of {@link UnicodeEscaper} should limit 179 * themselves to its public interface. 180 * 181 * @param cp the Unicode code point to escape if necessary 182 * @return the replacement string, or {@code null} if no escaping was needed 183 */ 184 public static @Nullable String computeReplacement(UnicodeEscaper escaper, int cp) { 185 return stringOrNull(escaper.escape(cp)); 186 } 187 188 private static @Nullable String stringOrNull(char @Nullable [] in) { 189 return (in == null) ? null : new String(in); 190 } 191}