11from __future__ import absolute_import , division , unicode_literals
2- from six import text_type , string_types
32
43from xml .dom import Node
5- from ..constants import voidElements , spaceCharacters
4+ from ..constants import namespaces , voidElements , spaceCharacters
65
76__all__ = ["DOCUMENT" , "DOCTYPE" , "TEXT" , "ELEMENT" , "COMMENT" , "ENTITY" , "UNKNOWN" ,
87 "TreeWalker" , "NonRecursiveTreeWalker" ]
1817spaceCharacters = "" .join (spaceCharacters )
1918
2019
21- def to_text (s , blank_if_none = True ):
22- """Wrapper around six.text_type to convert None to empty string"""
23- if s is None :
24- if blank_if_none :
25- return ""
26- else :
27- return None
28- elif isinstance (s , text_type ):
29- return s
30- else :
31- return text_type (s )
32-
33-
34- def is_text_or_none (string ):
35- """Wrapper around isinstance(string_types) or is None"""
36- return string is None or isinstance (string , string_types )
37-
38-
3920class TreeWalker (object ):
4021 def __init__ (self , tree ):
4122 self .tree = tree
@@ -47,47 +28,25 @@ def error(self, msg):
4728 return {"type" : "SerializeError" , "data" : msg }
4829
4930 def emptyTag (self , namespace , name , attrs , hasChildren = False ):
50- assert namespace is None or isinstance (namespace , string_types ), type (namespace )
51- assert isinstance (name , string_types ), type (name )
52- assert all ((namespace is None or isinstance (namespace , string_types )) and
53- isinstance (name , string_types ) and
54- isinstance (value , string_types )
55- for (namespace , name ), value in attrs .items ())
56-
57- yield {"type" : "EmptyTag" , "name" : to_text (name , False ),
58- "namespace" : to_text (namespace ),
31+ yield {"type" : "EmptyTag" , "name" : name ,
32+ "namespace" : namespace ,
5933 "data" : attrs }
6034 if hasChildren :
6135 yield self .error ("Void element has children" )
6236
6337 def startTag (self , namespace , name , attrs ):
64- assert namespace is None or isinstance (namespace , string_types ), type (namespace )
65- assert isinstance (name , string_types ), type (name )
66- assert all ((namespace is None or isinstance (namespace , string_types )) and
67- isinstance (name , string_types ) and
68- isinstance (value , string_types )
69- for (namespace , name ), value in attrs .items ())
70-
7138 return {"type" : "StartTag" ,
72- "name" : text_type (name ),
73- "namespace" : to_text (namespace ),
74- "data" : dict (((to_text (namespace , False ), to_text (name )),
75- to_text (value , False ))
76- for (namespace , name ), value in attrs .items ())}
39+ "name" : name ,
40+ "namespace" : namespace ,
41+ "data" : attrs }
7742
7843 def endTag (self , namespace , name ):
79- assert namespace is None or isinstance (namespace , string_types ), type (namespace )
80- assert isinstance (name , string_types ), type (namespace )
81-
8244 return {"type" : "EndTag" ,
83- "name" : to_text (name , False ),
84- "namespace" : to_text (namespace ),
85- "data" : {}}
45+ "name" : name ,
46+ "namespace" : namespace }
8647
8748 def text (self , data ):
88- assert isinstance (data , string_types ), type (data )
89-
90- data = to_text (data )
49+ data = data
9150 middle = data .lstrip (spaceCharacters )
9251 left = data [:len (data ) - len (middle )]
9352 if left :
@@ -101,25 +60,16 @@ def text(self, data):
10160 yield {"type" : "SpaceCharacters" , "data" : right }
10261
10362 def comment (self , data ):
104- assert isinstance (data , string_types ), type (data )
105-
106- return {"type" : "Comment" , "data" : text_type (data )}
107-
108- def doctype (self , name , publicId = None , systemId = None , correct = True ):
109- assert is_text_or_none (name ), type (name )
110- assert is_text_or_none (publicId ), type (publicId )
111- assert is_text_or_none (systemId ), type (systemId )
63+ return {"type" : "Comment" , "data" : data }
11264
65+ def doctype (self , name , publicId = None , systemId = None ):
11366 return {"type" : "Doctype" ,
114- "name" : to_text (name ),
115- "publicId" : to_text (publicId ),
116- "systemId" : to_text (systemId ),
117- "correct" : to_text (correct )}
67+ "name" : name ,
68+ "publicId" : publicId ,
69+ "systemId" : systemId }
11870
11971 def entity (self , name ):
120- assert isinstance (name , string_types ), type (name )
121-
122- return {"type" : "Entity" , "name" : text_type (name )}
72+ return {"type" : "Entity" , "name" : name }
12373
12474 def unknown (self , nodeType ):
12575 return self .error ("Unknown node type: " + nodeType )
@@ -154,7 +104,7 @@ def __iter__(self):
154104
155105 elif type == ELEMENT :
156106 namespace , name , attributes , hasChildren = details
157- if name in voidElements :
107+ if ( not namespace or namespace == namespaces [ "html" ]) and name in voidElements :
158108 for token in self .emptyTag (namespace , name , attributes ,
159109 hasChildren ):
160110 yield token
@@ -187,7 +137,7 @@ def __iter__(self):
187137 type , details = details [0 ], details [1 :]
188138 if type == ELEMENT :
189139 namespace , name , attributes , hasChildren = details
190- if name not in voidElements :
140+ if ( namespace and namespace != namespaces [ "html" ]) or name not in voidElements :
191141 yield self .endTag (namespace , name )
192142 if self .tree is currentNode :
193143 currentNode = None
0 commit comments