001/* 002 * Copyright (C) 2009 The Guava Authors 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); 005 * you may not use this file except in compliance with the License. 006 * You may obtain a copy of the License at 007 * 008 * http://www.apache.org/licenses/LICENSE-2.0 009 * 010 * Unless required by applicable law or agreed to in writing, software 011 * distributed under the License is distributed on an "AS IS" BASIS, 012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 013 * See the License for the specific language governing permissions and 014 * limitations under the License. 015 */ 016 017package com.google.common.html; 018 019import com.google.common.annotations.Beta; 020import com.google.common.annotations.GwtCompatible; 021import com.google.common.escape.Escaper; 022import com.google.common.escape.Escapers; 023 024/** 025 * {@code Escaper} instances suitable for strings to be included in HTML 026 * attribute values and <em>most</em> elements' text contents. When possible, 027 * avoid manual escaping by using templating systems and high-level APIs that 028 * provide autoescaping. 029 * 030 * <p>HTML escaping is particularly tricky: For example, <a 031 * href="http://goo.gl/5TgZb">some elements' text contents must not be HTML 032 * escaped</a>. As a result, it is impossible to escape an HTML document 033 * correctly without domain-specific knowledge beyond what {@code HtmlEscapers} 034 * provides. We strongly encourage the use of HTML templating systems. 035 * 036 * @author Sven Mawson 037 * @author David Beaumont 038 * @since 15.0 039 */ 040@Beta 041@GwtCompatible 042public final class HtmlEscapers { 043 private HtmlEscapers() {} 044 045 // For each xxxEscaper() method, please add links to external reference pages 046 // that are considered authoritative for the behavior of that escaper. 047 048 /** 049 * Returns an {@link Escaper} instance that escapes HTML metacharacters as 050 * specified by <a href="http://www.w3.org/TR/html4/">HTML 4.01</a>. The 051 * resulting strings can be used both in attribute values and in <em>most</em> 052 * elements' text contents, provided that the HTML document's character 053 * encoding can encode any non-ASCII code points in the input (as UTF-8 and 054 * other Unicode encodings can). 055 * 056 * 057 * <p><b>Note</b>: This escaper only performs minimal escaping to make content 058 * structurally compatible with HTML. Specifically, it does not perform entity 059 * replacement (symbolic or numeric), so it does not replace non-ASCII code 060 * points with character references. This escaper escapes only the following 061 * five ASCII characters: {@code '"&<>}. 062 */ 063 public static Escaper htmlEscaper() { 064 return HTML_ESCAPER; 065 } 066 067 private static final Escaper HTML_ESCAPER = 068 Escapers.builder() 069 .addEscape('"', """) 070 // Note: "'" is not defined in HTML 4.01. 071 .addEscape('\'', "'") 072 .addEscape('&', "&") 073 .addEscape('<', "<") 074 .addEscape('>', ">") 075 .build(); 076}