001/* 002 * Copyright (C) 2009 The Guava Authors 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016 017package com.google.common.html; 018 019import com.google.common.annotations.Beta; 020import com.google.common.annotations.GwtCompatible; 021import com.google.common.escape.Escaper; 022import com.google.common.escape.Escapers; 023 024/** 025 * {@code Escaper} instances suitable for strings to be included in HTML 026 * attribute values and <em>most</em> elements' text contents. When possible, 027 * avoid manual escaping by using templating systems and high-level APIs that 028 * provide autoescaping. 029 * 030 * <p>HTML escaping is particularly tricky: For example, <a 031 * href="http://goo.gl/5TgZb">some elements' text contents must not be HTML 032 * escaped</a>. As a result, it is impossible to escape an HTML document 033 * correctly without domain-specific knowledge beyond what {@code HtmlEscapers} 034 * provides. We strongly encourage the use of HTML templating systems. 035 * 036 * @author Sven Mawson 037 * @author David Beaumont 038 * @since 15.0 039 */ 040@Beta 041@GwtCompatible 042public final class HtmlEscapers { 043 /** 044 * Returns an {@link Escaper} instance that escapes HTML metacharacters as 045 * specified by <a href="http://www.w3.org/TR/html4/">HTML 4.01</a>. The 046 * resulting strings can be used both in attribute values and in <em>most</em> 047 * elements' text contents, provided that the HTML document's character 048 * encoding can encode any non-ASCII code points in the input (as UTF-8 and 049 * other Unicode encodings can). 050 * 051 * 052 * <p><b>Note</b>: This escaper only performs minimal escaping to make content 053 * structurally compatible with HTML. Specifically, it does not perform entity 054 * replacement (symbolic or numeric), so it does not replace non-ASCII code 055 * points with character references. This escaper escapes only the following 056 * five ASCII characters: {@code '"&<>}. 057 */ 058 public static Escaper htmlEscaper() { 059 return HTML_ESCAPER; 060 } 061 062 // For each xxxEscaper() method, please add links to external reference pages 063 // that are considered authoritative for the behavior of that escaper. 064 065 private static final Escaper HTML_ESCAPER = 066 Escapers.builder() 067 .addEscape('"', """) 068 // Note: "'" is not defined in HTML 4.01. 069 .addEscape('\'', "'") 070 .addEscape('&', "&") 071 .addEscape('<', "<") 072 .addEscape('>', ">") 073 .build(); 074 075 private HtmlEscapers() {} 076}