001/* 002 * Copyright (C) 2009 The Guava Authors 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016 017package com.google.common.html; 018 019import com.google.common.annotations.Beta; 020import com.google.common.annotations.GwtCompatible; 021import com.google.common.escape.Escaper; 022import com.google.common.escape.Escapers; 023 024/** 025 * {@code Escaper} instances suitable for strings to be included in HTML 026 * attribute values and <em>most</em> elements' text contents. When possible, 027 * avoid manual escaping by using templating systems and high-level APIs that 028 * provide autoescaping. 029 * One Google-authored templating system available for external use is <a 030 * href="https://developers.google.com/closure/templates/">Closure 031 * Templates</a>. 032 * 033 * <p>HTML escaping is particularly tricky: For example, <a 034 * href="http://goo.gl/5TgZb">some elements' text contents must not be HTML 035 * escaped</a>. As a result, it is impossible to escape an HTML document 036 * correctly without domain-specific knowledge beyond what {@code HtmlEscapers} 037 * provides. We strongly encourage the use of HTML templating systems. 038 * 039 * @author Sven Mawson 040 * @author David Beaumont 041 * @since 15.0 042 */ 043@Beta 044@GwtCompatible 045public final class HtmlEscapers { 046 /** 047 * Returns an {@link Escaper} instance that escapes HTML metacharacters as 048 * specified by <a href="http://www.w3.org/TR/html4/">HTML 4.01</a>. The 049 * resulting strings can be used both in attribute values and in <em>most</em> 050 * elements' text contents, provided that the HTML document's character 051 * encoding can encode any non-ASCII code points in the input (as UTF-8 and 052 * other Unicode encodings can). 053 * 054 * 055 * <p><b>Note:</b> This escaper only performs minimal escaping to make content 056 * structurally compatible with HTML. Specifically, it does not perform entity 057 * replacement (symbolic or numeric), so it does not replace non-ASCII code 058 * points with character references. This escaper escapes only the following 059 * five ASCII characters: {@code '"&<>}. 060 */ 061 public static Escaper htmlEscaper() { 062 return HTML_ESCAPER; 063 } 064 065 // For each xxxEscaper() method, please add links to external reference pages 066 // that are considered authoritative for the behavior of that escaper. 067 068 private static final Escaper HTML_ESCAPER = 069 Escapers.builder() 070 .addEscape('"', """) 071 // Note: "'" is not defined in HTML 4.01. 072 .addEscape('\'', "'") 073 .addEscape('&', "&") 074 .addEscape('<', "<") 075 .addEscape('>', ">") 076 .build(); 077 078 private HtmlEscapers() {} 079}