5353 *
5454 * <p>Please make sure to read and understand the context that the method encodes
5555 * for. Encoding for the incorrect context will likely lead to exposing a
56- * cross-site scripting vulnerability.</p>
56+ * cross-site scripting vulnerability. Those new to XSS mitigation may find it
57+ * useful to read the
58+ * <a href="https://cheatsheetseries.owasp.org/cheatsheets/Cross_Site_Scripting_Prevention_Cheat_Sheet.html">
59+ * Cross Site Scripting Prevention Cheat Sheet</a> that is part of the OWASP Cheat Sheet series for background
60+ * material.
61+ * </p>
5762 *
5863 * @author Jeff Ichnowski
5964 */
@@ -66,7 +71,7 @@ private Encode() {}
6671 * this method encodes for both contexts, it may be slightly less
6772 * efficient to use this method over the methods targeted towards
6873 * the specific contexts ({@link #forHtmlAttribute(String)} and
69- * {@link #forHtmlContent(String)}. In general this method should
74+ * {@link #forHtmlContent(String)}) . In general this method should
7075 * be preferred unless you are really concerned with saving a few
7176 * bytes or are writing a framework that utilizes this
7277 * package.</p>
@@ -155,7 +160,7 @@ public static void forHtml(Writer out, String input) throws IOException {
155160 /**
156161 * <p>This method encodes for HTML text content. It does not escape
157162 * quotation characters and is thus unsafe for use with
158- * HTML attributes. Use either forHtml or forHtmlAttribute for those
163+ * HTML attributes. Use either {@link # forHtml(String)} or {@link # forHtmlAttribute(String)} for those
159164 * methods.</p>
160165 *
161166 * <b>Example JSP Usage</b>
@@ -232,7 +237,9 @@ public static void forHtmlContent(Writer out, String input)
232237 }
233238
234239 /**
235- * <p>This method encodes for HTML text attributes.</p>
240+ * <p>This method encodes for HTML text attributes. Do not use for JavaScript event attributes or for attributes
241+ * that are interpreted as a URL. Instead use {@link #forJavaScript(String)} and {@link #forUriComponent(String)}
242+ * respectively for those.</p>
236243 *
237244 * <b>Example JSP Usage</b>
238245 * <pre>
@@ -472,31 +479,29 @@ public static void forHtmlUnquotedAttribute(Writer out, String input)
472479 * <b>Encoding Notes</b>
473480 * <ul>
474481 *
475- * <li>The following characters are encoded using hexidecimal
482+ * <li>The following characters are encoded using hexadecimal
476483 * encodings: {@code U+0000} - {@code U+001f},
477484 * {@code "},
478485 * {@code '},
479486 * {@code \},
480487 * {@code <},
481488 * {@code &},
482- * {@code (},
483- * {@code )},
484489 * {@code /},
485490 * {@code >},
486491 * {@code U+007f},
487492 * line separator ({@code U+2028}),
488493 * paragraph separator ({@code U+2029}).</li>
489494 *
490495 * <li>Any character requiring encoding is encoded as {@code \xxx}
491- * where {@code xxx} is the shortest hexidecimal representation of
496+ * where {@code xxx} is the shortest hexadecimal representation of
492497 * its Unicode code point (after decoding surrogate pairs if
493498 * necessary). This encoding is never zero padded. Thus, for
494499 * example, the tab character is encoded as {@code \9}, not {@code
495500 * \0009}.</li>
496501 *
497502 * <li>The encoder looks ahead 1 character in the input and
498503 * appends a space to an encoding to avoid the next character
499- * becoming part of the hexidecimal encoded sequence. Thus
504+ * becoming part of the hexadecimal encoded sequence. Thus
500505 * “{@code '1}” is encoded as “{@code \27
501506 * 1}”, and not as “{@code \271}”. If a space
502507 * is not necessary, it is not included, thus “{@code
@@ -544,13 +549,13 @@ public static void forCssString(Writer out, String input)
544549 * <div style="background:url(<=Encode.forCssUrl(...)%>);">
545550 *
546551 * <style type="text/css">
547- * background: url(<%=Encode.forCssUrl(...)%>);
552+ * background: url(' <%=Encode.forCssUrl(...)%>' );
548553 * </style>
549554 * </pre>
550555 * <b>Encoding Notes</b>
551556 * <ul>
552557 *
553- * <li>The following characters are encoded using hexidecimal
558+ * <li>The following characters are encoded using hexadecimal
554559 * encodings: {@code U+0000} - {@code U+001f},
555560 * {@code "},
556561 * {@code '},
@@ -564,15 +569,15 @@ public static void forCssString(Writer out, String input)
564569 * paragraph separator ({@code U+2029}).</li>
565570 *
566571 * <li>Any character requiring encoding is encoded as {@code \xxx}
567- * where {@code xxx} is the shortest hexidecimal representation of
572+ * where {@code xxx} is the shortest hexadecimal representation of
568573 * its Unicode code point (after decoding surrogate pairs if
569574 * necessary). This encoding is never zero padded. Thus, for
570575 * example, the tab character is encoded as {@code \9}, not {@code
571576 * \0009}.</li>
572577 *
573578 * <li>The encoder looks ahead 1 character in the input and
574579 * appends a space to an encoding to avoid the next character
575- * becoming part of the hexidecimal encoded sequence. Thus
580+ * becoming part of the hexadecimal encoded sequence. Thus
576581 * “{@code '1}” is encoded as “{@code \27
577582 * 1}”, and not as “{@code \271}”. If a space
578583 * is not necessary, it is not included, thus “{@code
@@ -639,7 +644,7 @@ public static void forCssUrl(Writer out, String input)
639644 * <li>URL encoding is an encoding for bytes, not unicode. The
640645 * input string is thus first encoded as a sequence of UTF-8
641646 * byte. The bytes are then encoded as {@code %xx} where {@code
642- * xx} is the two-digit hexidecimal representation of the
647+ * xx} is the two-digit hexadecimal representation of the
643648 * byte. (The implementation does this as one step for
644649 * performance.)</li>
645650 *
@@ -690,7 +695,7 @@ public static void forCssUrl(Writer out, String input)
690695 * <p>The following characters are <i>not</i> encoded:</p>
691696 * <pre>
692697 * U+20: - . 0 1 2 3 4 5 6 7 8 9
693- * U+40: @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z _
698+ * U+40: A B C D E F G H I J K L M N O P Q R S T U V W X Y Z _
694699 * U+60: a b c d e f g h i j k l m n o p q r s t u v w x y z ~
695700 * </pre>
696701 *
@@ -704,7 +709,7 @@ public static void forCssUrl(Writer out, String input)
704709 * <li>URL encoding is an encoding for bytes, not unicode. The
705710 * input string is thus first encoded as a sequence of UTF-8
706711 * byte. The bytes are then encoded as {@code %xx} where {@code
707- * xx} is the two-digit hexidecimal representation of the
712+ * xx} is the two-digit hexadecimal representation of the
708713 * byte. (The implementation does this as one step for
709714 * performance.)</li>
710715 *
@@ -937,7 +942,7 @@ public static void forJava(Writer out, String input)
937942 * provide the surrounding quotation characters for the string.
938943 * Since this performs additional encoding so it can work in all
939944 * of the JavaScript contexts listed, it may be slightly less
940- * efficient than using one of the methods targetted to a specific
945+ * efficient than using one of the methods targeted to a specific
941946 * JavaScript context ({@link #forJavaScriptAttribute(String)},
942947 * {@link #forJavaScriptBlock}, {@link #forJavaScriptSource}).
943948 * Unless you are interested in saving a few bytes of output or
0 commit comments