|
13 | 13 |
|
14 | 14 | spaceCharacters = "".join(spaceCharacters) |
15 | 15 |
|
16 | | -quoteAttributeSpec = re.compile("[" + spaceCharacters + "\"'=<>`]") |
17 | | -quoteAttributeLegacy = re.compile("[\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n" |
| 16 | +quoteAttributeSpecChars = spaceCharacters + "\"'=<>`" |
| 17 | +quoteAttributeSpec = re.compile("[" + quoteAttributeSpecChars + "]") |
| 18 | +quoteAttributeLegacy = re.compile("[" + quoteAttributeSpecChars + |
| 19 | + "\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n" |
18 | 20 | "\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15" |
19 | 21 | "\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" |
20 | 22 | "\x20\x2f\x60\xa0\u1680\u180e\u180f\u2000" |
@@ -82,7 +84,7 @@ def htmlentityreplace_errors(exc): |
82 | 84 | class HTMLSerializer(object): |
83 | 85 |
|
84 | 86 | # attribute quoting options |
85 | | - quote_attr_values = "legacy" |
| 87 | + quote_attr_values = "legacy" # be secure by default |
86 | 88 | quote_char = '"' |
87 | 89 | use_best_quote_char = True |
88 | 90 |
|
@@ -118,9 +120,9 @@ def __init__(self, **kwargs): |
118 | 120 | inject_meta_charset=True|False |
119 | 121 | Whether it insert a meta element to define the character set of the |
120 | 122 | document. |
121 | | - quote_attr_values="legacy"|"spec"|True |
| 123 | + quote_attr_values="legacy"|"spec"|"always" |
122 | 124 | Whether to quote attribute values that don't require quoting |
123 | | - per legacy browser behaviour, HTML authoring rules, or always. |
| 125 | + per legacy browser behaviour, when required by the standard, or always. |
124 | 126 | quote_char=u'"'|u"'" |
125 | 127 | Use given quote character for attribute quoting. Default is to |
126 | 128 | use double quote unless attribute value contains a double quote, |
@@ -249,10 +251,15 @@ def serialize(self, treewalker, encoding=None): |
249 | 251 | (k not in booleanAttributes.get(name, tuple()) |
250 | 252 | and k not in booleanAttributes.get("", tuple())): |
251 | 253 | yield self.encodeStrict("=") |
252 | | - if self.quote_attr_values or len(v) == 0: |
| 254 | + if self.quote_attr_values == "always" or len(v) == 0: |
253 | 255 | quote_attr = True |
254 | | - elif : |
255 | | - quoteAttributeSpec.search(v) |
| 256 | + elif self.quote_attr_values == "spec": |
| 257 | + quote_attr = quoteAttributeSpec.search(v) is not None |
| 258 | + elif self.quote_attr_values == "legacy": |
| 259 | + quote_attr = quoteAttributeLegacy.search(v) is not None |
| 260 | + else: |
| 261 | + raise ValueError("quote_attr_values must be one of: " |
| 262 | + "'always', 'spec', or 'legacy'") |
256 | 263 | v = v.replace("&", "&") |
257 | 264 | if self.escape_lt_in_attrs: |
258 | 265 | v = v.replace("<", "<") |
|
0 commit comments