11from __future__ import absolute_import , division , unicode_literals
22
3- import os
4- import sys
5- import unittest
6- import warnings
7- from difflib import unified_diff
3+ import pytest
84
9- try :
10- unittest .TestCase .assertEqual
11- except AttributeError :
12- unittest .TestCase .assertEqual = unittest .TestCase .assertEquals
5+ from .support import treeTypes
136
14- from .support import get_data_files , TestData , convertExpected
15-
16- from html5lib import html5parser , treewalkers , treebuilders , treeadapters , constants
7+ from html5lib import html5parser , treewalkers
178from html5lib .filters .lint import Filter as Lint
189
19-
20- treeTypes = {
21- "DOM" : {"builder" : treebuilders .getTreeBuilder ("dom" ),
22- "walker" : treewalkers .getTreeWalker ("dom" )},
23- }
24-
25- # Try whatever etree implementations are available from a list that are
26- #"supposed" to work
27- try :
28- import xml .etree .ElementTree as ElementTree
29- except ImportError :
30- pass
31- else :
32- treeTypes ['ElementTree' ] = \
33- {"builder" : treebuilders .getTreeBuilder ("etree" , ElementTree , fullTree = True ),
34- "walker" : treewalkers .getTreeWalker ("etree" , ElementTree )}
35-
36- try :
37- import xml .etree .cElementTree as ElementTree
38- except ImportError :
39- pass
40- else :
41- treeTypes ['cElementTree' ] = \
42- {"builder" : treebuilders .getTreeBuilder ("etree" , ElementTree , fullTree = True ),
43- "walker" : treewalkers .getTreeWalker ("etree" , ElementTree )}
44-
45-
46- try :
47- import lxml .etree as ElementTree # flake8: noqa
48- except ImportError :
49- pass
50- else :
51- treeTypes ['lxml_native' ] = \
52- {"builder" : treebuilders .getTreeBuilder ("lxml" ),
53- "walker" : treewalkers .getTreeWalker ("lxml" )}
54-
55-
56- try :
57- import genshi # flake8: noqa
58- except ImportError :
59- pass
60- else :
61- treeTypes ["genshi" ] = \
62- {"builder" : treebuilders .getTreeBuilder ("dom" ),
63- "adapter" : lambda tree : treeadapters .genshi .to_genshi (treewalkers .getTreeWalker ("dom" )(tree )),
64- "walker" : treewalkers .getTreeWalker ("genshi" )}
65-
6610import re
6711attrlist = re .compile (r"^(\s+)\w+=.*(\n\1\w+=.*)+" , re .M )
6812
@@ -73,80 +17,29 @@ def sortattrs(x):
7317 return "\n " .join (lines )
7418
7519
76- class TokenTestCase (unittest .TestCase ):
77- def test_all_tokens (self ):
78- expected = [
79- {'data' : {}, 'type' : 'StartTag' , 'namespace' : 'http://www.w3.org/1999/xhtml' , 'name' : 'html' },
80- {'data' : {}, 'type' : 'StartTag' , 'namespace' : 'http://www.w3.org/1999/xhtml' , 'name' : 'head' },
81- {'type' : 'EndTag' , 'namespace' : 'http://www.w3.org/1999/xhtml' , 'name' : 'head' },
82- {'data' : {}, 'type' : 'StartTag' , 'namespace' : 'http://www.w3.org/1999/xhtml' , 'name' : 'body' },
83- {'data' : 'a' , 'type' : 'Characters' },
84- {'data' : {}, 'type' : 'StartTag' , 'namespace' : 'http://www.w3.org/1999/xhtml' , 'name' : 'div' },
85- {'data' : 'b' , 'type' : 'Characters' },
86- {'type' : 'EndTag' , 'namespace' : 'http://www.w3.org/1999/xhtml' , 'name' : 'div' },
87- {'data' : 'c' , 'type' : 'Characters' },
88- {'type' : 'EndTag' , 'namespace' : 'http://www.w3.org/1999/xhtml' , 'name' : 'body' },
89- {'type' : 'EndTag' , 'namespace' : 'http://www.w3.org/1999/xhtml' , 'name' : 'html' }
90- ]
91- for treeName , treeCls in sorted (treeTypes .items ()):
92- p = html5parser .HTMLParser (tree = treeCls ["builder" ])
93- document = p .parse ("<html><head></head><body>a<div>b</div>c</body></html>" )
94- document = treeCls .get ("adapter" , lambda x : x )(document )
95- output = Lint (treeCls ["walker" ](document ))
96- for expectedToken , outputToken in zip (expected , output ):
97- self .assertEqual (expectedToken , outputToken )
98-
99-
100- def runTreewalkerTest (innerHTML , input , expected , errors , treeClass ):
101- warnings .resetwarnings ()
102- warnings .simplefilter ("error" )
103- try :
104- p = html5parser .HTMLParser (tree = treeClass ["builder" ])
105- if innerHTML :
106- document = p .parseFragment (input , innerHTML )
107- else :
108- document = p .parse (input )
109- except constants .DataLossWarning :
110- # Ignore testcases we know we don't pass
111- return
112-
113- document = treeClass .get ("adapter" , lambda x : x )(document )
114- try :
115- output = treewalkers .pprint (Lint (treeClass ["walker" ](document )))
116- output = attrlist .sub (sortattrs , output )
117- expected = attrlist .sub (sortattrs , convertExpected (expected ))
118- diff = "" .join (unified_diff ([line + "\n " for line in expected .splitlines ()],
119- [line + "\n " for line in output .splitlines ()],
120- "Expected" , "Received" ))
121- assert expected == output , "\n " .join ([
122- "" , "Input:" , input ,
123- "" , "Expected:" , expected ,
124- "" , "Received:" , output ,
125- "" , "Diff:" , diff ,
126- ])
127- except NotImplementedError :
128- pass # Amnesty for those that confess...
129-
130-
131- def test_treewalker ():
132- sys .stdout .write ('Testing tree walkers ' + " " .join (list (treeTypes .keys ())) + "\n " )
133-
20+ def test_all_tokens ():
21+ expected = [
22+ {'data' : {}, 'type' : 'StartTag' , 'namespace' : 'http://www.w3.org/1999/xhtml' , 'name' : 'html' },
23+ {'data' : {}, 'type' : 'StartTag' , 'namespace' : 'http://www.w3.org/1999/xhtml' , 'name' : 'head' },
24+ {'type' : 'EndTag' , 'namespace' : 'http://www.w3.org/1999/xhtml' , 'name' : 'head' },
25+ {'data' : {}, 'type' : 'StartTag' , 'namespace' : 'http://www.w3.org/1999/xhtml' , 'name' : 'body' },
26+ {'data' : 'a' , 'type' : 'Characters' },
27+ {'data' : {}, 'type' : 'StartTag' , 'namespace' : 'http://www.w3.org/1999/xhtml' , 'name' : 'div' },
28+ {'data' : 'b' , 'type' : 'Characters' },
29+ {'type' : 'EndTag' , 'namespace' : 'http://www.w3.org/1999/xhtml' , 'name' : 'div' },
30+ {'data' : 'c' , 'type' : 'Characters' },
31+ {'type' : 'EndTag' , 'namespace' : 'http://www.w3.org/1999/xhtml' , 'name' : 'body' },
32+ {'type' : 'EndTag' , 'namespace' : 'http://www.w3.org/1999/xhtml' , 'name' : 'html' }
33+ ]
13434 for treeName , treeCls in sorted (treeTypes .items ()):
135- files = get_data_files ('tree-construction' )
136- for filename in files :
137- testName = os .path .basename (filename ).replace (".dat" , "" )
138- if testName in ("template" ,):
139- continue
140-
141- tests = TestData (filename , "data" )
142-
143- for index , test in enumerate (tests ):
144- (input , errors ,
145- innerHTML , expected ) = [test [key ] for key in ("data" , "errors" ,
146- "document-fragment" ,
147- "document" )]
148- errors = errors .split ("\n " )
149- yield runTreewalkerTest , innerHTML , input , expected , errors , treeCls
35+ if treeCls is None :
36+ continue
37+ p = html5parser .HTMLParser (tree = treeCls ["builder" ])
38+ document = p .parse ("<html><head></head><body>a<div>b</div>c</body></html>" )
39+ document = treeCls .get ("adapter" , lambda x : x )(document )
40+ output = Lint (treeCls ["walker" ](document ))
41+ for expectedToken , outputToken in zip (expected , output ):
42+ assert expectedToken == outputToken
15043
15144
15245def set_attribute_on_first_child (docfrag , name , value , treeName ):
@@ -164,6 +57,8 @@ def set_attribute_on_first_child(docfrag, name, value, treeName):
16457def runTreewalkerEditTest (intext , expected , attrs_to_add , tree ):
16558 """tests what happens when we add attributes to the intext"""
16659 treeName , treeClass = tree
60+ if treeClass is None :
61+ pytest .skip ("Treebuilder not loaded" )
16762 parser = html5parser .HTMLParser (tree = treeClass ["builder" ])
16863 document = parser .parseFragment (intext )
16964 for nom , val in attrs_to_add :
@@ -172,7 +67,7 @@ def runTreewalkerEditTest(intext, expected, attrs_to_add, tree):
17267 document = treeClass .get ("adapter" , lambda x : x )(document )
17368 output = treewalkers .pprint (treeClass ["walker" ](document ))
17469 output = attrlist .sub (sortattrs , output )
175- if not output in expected :
70+ if output not in expected :
17671 raise AssertionError ("TreewalkerEditTest: %s\n Expected:\n %s\n Received:\n %s" % (treeName , expected , output ))
17772
17873
0 commit comments