|
2 | 2 |
|
3 | 3 | import os |
4 | 4 | import json |
5 | | -import unittest |
6 | 5 |
|
7 | | -from .support import get_data_files |
| 6 | +import pytest |
8 | 7 |
|
9 | | -try: |
10 | | - unittest.TestCase.assertEqual |
11 | | -except AttributeError: |
12 | | - unittest.TestCase.assertEqual = unittest.TestCase.assertEquals |
| 8 | +from .support import get_data_files |
13 | 9 |
|
14 | | -import html5lib |
15 | 10 | from html5lib import constants |
16 | 11 | from html5lib.filters.lint import Filter as Lint |
17 | 12 | from html5lib.serializer import HTMLSerializer, serialize |
@@ -102,70 +97,83 @@ def runSerializerTest(input, expected, options): |
102 | 97 | assert False, "Expected: %s, Received: %s" % (expected, result) |
103 | 98 |
|
104 | 99 |
|
105 | | -class EncodingTestCase(unittest.TestCase): |
106 | | - def throwsWithLatin1(self, input): |
107 | | - self.assertRaises(UnicodeEncodeError, serialize_html, input, {"encoding": "iso-8859-1"}) |
| 100 | +def throwsWithLatin1(input): |
| 101 | + with pytest.raises(UnicodeEncodeError): |
| 102 | + serialize_html(input, {"encoding": "iso-8859-1"}) |
| 103 | + |
| 104 | + |
| 105 | +def testDoctypeName(): |
| 106 | + throwsWithLatin1([["Doctype", "\u0101"]]) |
| 107 | + |
| 108 | + |
| 109 | +def testDoctypePublicId(): |
| 110 | + throwsWithLatin1([["Doctype", "potato", "\u0101"]]) |
| 111 | + |
| 112 | + |
| 113 | +def testDoctypeSystemId(): |
| 114 | + throwsWithLatin1([["Doctype", "potato", "potato", "\u0101"]]) |
| 115 | + |
| 116 | + |
| 117 | +def testCdataCharacters(): |
| 118 | + runSerializerTest([["StartTag", "http://www.w3.org/1999/xhtml", "style", {}], ["Characters", "\u0101"]], |
| 119 | + ["<style>ā"], {"encoding": "iso-8859-1"}) |
| 120 | + |
| 121 | + |
| 122 | +def testCharacters(): |
| 123 | + runSerializerTest([["Characters", "\u0101"]], |
| 124 | + ["ā"], {"encoding": "iso-8859-1"}) |
| 125 | + |
| 126 | + |
| 127 | +def testStartTagName(): |
| 128 | + throwsWithLatin1([["StartTag", "http://www.w3.org/1999/xhtml", "\u0101", []]]) |
| 129 | + |
108 | 130 |
|
109 | | - def testDoctypeName(self): |
110 | | - self.throwsWithLatin1([["Doctype", "\u0101"]]) |
| 131 | +def testAttributeName(): |
| 132 | + throwsWithLatin1([["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": None, "name": "\u0101", "value": "potato"}]]]) |
111 | 133 |
|
112 | | - def testDoctypePublicId(self): |
113 | | - self.throwsWithLatin1([["Doctype", "potato", "\u0101"]]) |
114 | 134 |
|
115 | | - def testDoctypeSystemId(self): |
116 | | - self.throwsWithLatin1([["Doctype", "potato", "potato", "\u0101"]]) |
| 135 | +def testAttributeValue(): |
| 136 | + runSerializerTest([["StartTag", "http://www.w3.org/1999/xhtml", "span", |
| 137 | + [{"namespace": None, "name": "potato", "value": "\u0101"}]]], |
| 138 | + ["<span potato=ā>"], {"encoding": "iso-8859-1"}) |
117 | 139 |
|
118 | | - def testCdataCharacters(self): |
119 | | - runSerializerTest([["StartTag", "http://www.w3.org/1999/xhtml", "style", {}], ["Characters", "\u0101"]], |
120 | | - ["<style>ā"], {"encoding": "iso-8859-1"}) |
121 | 140 |
|
122 | | - def testCharacters(self): |
123 | | - runSerializerTest([["Characters", "\u0101"]], |
124 | | - ["ā"], {"encoding": "iso-8859-1"}) |
| 141 | +def testEndTagName(): |
| 142 | + throwsWithLatin1([["EndTag", "http://www.w3.org/1999/xhtml", "\u0101"]]) |
125 | 143 |
|
126 | | - def testStartTagName(self): |
127 | | - self.throwsWithLatin1([["StartTag", "http://www.w3.org/1999/xhtml", "\u0101", []]]) |
128 | 144 |
|
129 | | - def testAttributeName(self): |
130 | | - self.throwsWithLatin1([["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": None, "name": "\u0101", "value": "potato"}]]]) |
| 145 | +def testComment(): |
| 146 | + throwsWithLatin1([["Comment", "\u0101"]]) |
131 | 147 |
|
132 | | - def testAttributeValue(self): |
133 | | - runSerializerTest([["StartTag", "http://www.w3.org/1999/xhtml", "span", |
134 | | - [{"namespace": None, "name": "potato", "value": "\u0101"}]]], |
135 | | - ["<span potato=ā>"], {"encoding": "iso-8859-1"}) |
136 | 148 |
|
137 | | - def testEndTagName(self): |
138 | | - self.throwsWithLatin1([["EndTag", "http://www.w3.org/1999/xhtml", "\u0101"]]) |
| 149 | +@pytest.fixture |
| 150 | +def lxml_parser(): |
| 151 | + return etree.XMLParser(resolve_entities=False) |
139 | 152 |
|
140 | | - def testComment(self): |
141 | | - self.throwsWithLatin1([["Comment", "\u0101"]]) |
142 | 153 |
|
| 154 | +@pytest.mark.skipif("lxml" not in optionals_loaded, reason="lxml not importable") |
| 155 | +def testEntityReplacement(lxml_parser): |
| 156 | + doc = '<!DOCTYPE html SYSTEM "about:legacy-compat"><html>β</html>' |
| 157 | + tree = etree.fromstring(doc, parser=lxml_parser).getroottree() |
| 158 | + result = serialize(tree, tree="lxml", omit_optional_tags=False) |
| 159 | + assert result == '<!DOCTYPE html SYSTEM "about:legacy-compat"><html>\u03B2</html>' |
143 | 160 |
|
144 | | -if "lxml" in optionals_loaded: |
145 | | - class LxmlTestCase(unittest.TestCase): |
146 | | - def setUp(self): |
147 | | - self.parser = etree.XMLParser(resolve_entities=False) |
148 | | - self.treewalker = html5lib.getTreeWalker("lxml") |
149 | | - self.serializer = HTMLSerializer() |
150 | 161 |
|
151 | | - def testEntityReplacement(self): |
152 | | - doc = """<!DOCTYPE html SYSTEM "about:legacy-compat"><html>β</html>""" |
153 | | - tree = etree.fromstring(doc, parser=self.parser).getroottree() |
154 | | - result = serialize(tree, tree="lxml", omit_optional_tags=False) |
155 | | - self.assertEqual("""<!DOCTYPE html SYSTEM "about:legacy-compat"><html>\u03B2</html>""", result) |
| 162 | +@pytest.mark.skipif("lxml" not in optionals_loaded, reason="lxml not importable") |
| 163 | +def testEntityXML(lxml_parser): |
| 164 | + doc = '<!DOCTYPE html SYSTEM "about:legacy-compat"><html>></html>' |
| 165 | + tree = etree.fromstring(doc, parser=lxml_parser).getroottree() |
| 166 | + result = serialize(tree, tree="lxml", omit_optional_tags=False) |
| 167 | + assert result == '<!DOCTYPE html SYSTEM "about:legacy-compat"><html>></html>' |
156 | 168 |
|
157 | | - def testEntityXML(self): |
158 | | - doc = """<!DOCTYPE html SYSTEM "about:legacy-compat"><html>></html>""" |
159 | | - tree = etree.fromstring(doc, parser=self.parser).getroottree() |
160 | | - result = serialize(tree, tree="lxml", omit_optional_tags=False) |
161 | | - self.assertEqual("""<!DOCTYPE html SYSTEM "about:legacy-compat"><html>></html>""", result) |
162 | 169 |
|
163 | | - def testEntityNoResolve(self): |
164 | | - doc = """<!DOCTYPE html SYSTEM "about:legacy-compat"><html>β</html>""" |
165 | | - tree = etree.fromstring(doc, parser=self.parser).getroottree() |
166 | | - result = serialize(tree, tree="lxml", omit_optional_tags=False, |
167 | | - resolve_entities=False) |
168 | | - self.assertEqual("""<!DOCTYPE html SYSTEM "about:legacy-compat"><html>β</html>""", result) |
| 170 | +@pytest.mark.skipif("lxml" not in optionals_loaded, reason="lxml not importable") |
| 171 | +def testEntityNoResolve(lxml_parser): |
| 172 | + doc = '<!DOCTYPE html SYSTEM "about:legacy-compat"><html>β</html>' |
| 173 | + tree = etree.fromstring(doc, parser=lxml_parser).getroottree() |
| 174 | + result = serialize(tree, tree="lxml", omit_optional_tags=False, |
| 175 | + resolve_entities=False) |
| 176 | + assert result == '<!DOCTYPE html SYSTEM "about:legacy-compat"><html>β</html>' |
169 | 177 |
|
170 | 178 |
|
171 | 179 | def test_serializer(): |
|
0 commit comments