11from __future__ import absolute_import , division , unicode_literals
2- from six import with_metaclass
2+ from six import with_metaclass , viewkeys , PY3
33
44import types
55
6+ try :
7+ from collections import OrderedDict
8+ except ImportError :
9+ from ordereddict import OrderedDict
10+
611from . import inputstream
712from . import tokenizer
813
914from . import treebuilders
1015from .treebuilders ._base import Marker
1116
1217from . import utils
13- from . import constants
14- from .constants import spaceCharacters , asciiUpper2Lower
15- from .constants import specialElements
16- from .constants import headingElements
17- from .constants import cdataElements , rcdataElements
18- from .constants import tokenTypes , ReparseException , namespaces
19- from .constants import htmlIntegrationPointElements , mathmlTextIntegrationPointElements
20- from .constants import adjustForeignAttributes as adjustForeignAttributesMap
21- from .constants import E
18+ from .constants import (
19+ spaceCharacters , asciiUpper2Lower ,
20+ specialElements , headingElements , cdataElements , rcdataElements ,
21+ tokenTypes , tagTokenTypes ,
22+ namespaces ,
23+ htmlIntegrationPointElements , mathmlTextIntegrationPointElements ,
24+ adjustForeignAttributes as adjustForeignAttributesMap ,
25+ adjustMathMLAttributes , adjustSVGAttributes ,
26+ E ,
27+ ReparseException
28+ )
2229
2330
2431def parse (doc , treebuilder = "etree" , encoding = None ,
@@ -272,96 +279,18 @@ def normalizeToken(self, token):
272279 """ HTML5 specific normalizations to the token stream """
273280
274281 if token ["type" ] == tokenTypes ["StartTag" ]:
275- token ["data" ] = dict (token [" data" ][::- 1 ])
282+ token ["data" ] = OrderedDict (token [' data' ][::- 1 ])
276283
277284 return token
278285
279286 def adjustMathMLAttributes (self , token ):
280- replacements = {"definitionurl" : "definitionURL" }
281- for k , v in replacements .items ():
282- if k in token ["data" ]:
283- token ["data" ][v ] = token ["data" ][k ]
284- del token ["data" ][k ]
287+ adjust_attributes (token , adjustMathMLAttributes )
285288
286289 def adjustSVGAttributes (self , token ):
287- replacements = {
288- "attributename" : "attributeName" ,
289- "attributetype" : "attributeType" ,
290- "basefrequency" : "baseFrequency" ,
291- "baseprofile" : "baseProfile" ,
292- "calcmode" : "calcMode" ,
293- "clippathunits" : "clipPathUnits" ,
294- "contentscripttype" : "contentScriptType" ,
295- "contentstyletype" : "contentStyleType" ,
296- "diffuseconstant" : "diffuseConstant" ,
297- "edgemode" : "edgeMode" ,
298- "externalresourcesrequired" : "externalResourcesRequired" ,
299- "filterres" : "filterRes" ,
300- "filterunits" : "filterUnits" ,
301- "glyphref" : "glyphRef" ,
302- "gradienttransform" : "gradientTransform" ,
303- "gradientunits" : "gradientUnits" ,
304- "kernelmatrix" : "kernelMatrix" ,
305- "kernelunitlength" : "kernelUnitLength" ,
306- "keypoints" : "keyPoints" ,
307- "keysplines" : "keySplines" ,
308- "keytimes" : "keyTimes" ,
309- "lengthadjust" : "lengthAdjust" ,
310- "limitingconeangle" : "limitingConeAngle" ,
311- "markerheight" : "markerHeight" ,
312- "markerunits" : "markerUnits" ,
313- "markerwidth" : "markerWidth" ,
314- "maskcontentunits" : "maskContentUnits" ,
315- "maskunits" : "maskUnits" ,
316- "numoctaves" : "numOctaves" ,
317- "pathlength" : "pathLength" ,
318- "patterncontentunits" : "patternContentUnits" ,
319- "patterntransform" : "patternTransform" ,
320- "patternunits" : "patternUnits" ,
321- "pointsatx" : "pointsAtX" ,
322- "pointsaty" : "pointsAtY" ,
323- "pointsatz" : "pointsAtZ" ,
324- "preservealpha" : "preserveAlpha" ,
325- "preserveaspectratio" : "preserveAspectRatio" ,
326- "primitiveunits" : "primitiveUnits" ,
327- "refx" : "refX" ,
328- "refy" : "refY" ,
329- "repeatcount" : "repeatCount" ,
330- "repeatdur" : "repeatDur" ,
331- "requiredextensions" : "requiredExtensions" ,
332- "requiredfeatures" : "requiredFeatures" ,
333- "specularconstant" : "specularConstant" ,
334- "specularexponent" : "specularExponent" ,
335- "spreadmethod" : "spreadMethod" ,
336- "startoffset" : "startOffset" ,
337- "stddeviation" : "stdDeviation" ,
338- "stitchtiles" : "stitchTiles" ,
339- "surfacescale" : "surfaceScale" ,
340- "systemlanguage" : "systemLanguage" ,
341- "tablevalues" : "tableValues" ,
342- "targetx" : "targetX" ,
343- "targety" : "targetY" ,
344- "textlength" : "textLength" ,
345- "viewbox" : "viewBox" ,
346- "viewtarget" : "viewTarget" ,
347- "xchannelselector" : "xChannelSelector" ,
348- "ychannelselector" : "yChannelSelector" ,
349- "zoomandpan" : "zoomAndPan"
350- }
351- for originalName in list (token ["data" ].keys ()):
352- if originalName in replacements :
353- svgName = replacements [originalName ]
354- token ["data" ][svgName ] = token ["data" ][originalName ]
355- del token ["data" ][originalName ]
290+ adjust_attributes (token , adjustSVGAttributes )
356291
357292 def adjustForeignAttributes (self , token ):
358- replacements = adjustForeignAttributesMap
359-
360- for originalName in token ["data" ].keys ():
361- if originalName in replacements :
362- foreignName = replacements [originalName ]
363- token ["data" ][foreignName ] = token ["data" ][originalName ]
364- del token ["data" ][originalName ]
293+ adjust_attributes (token , adjustForeignAttributesMap )
365294
366295 def reparseTokenNormal (self , token ):
367296 # pylint:disable=unused-argument
@@ -434,7 +363,7 @@ def getPhases(debug):
434363 def log (function ):
435364 """Logger that records which phase processes each token"""
436365 type_names = dict ((value , key ) for key , value in
437- constants . tokenTypes .items ())
366+ tokenTypes .items ())
438367
439368 def wrapped (self , * args , ** kwargs ):
440369 if function .__name__ .startswith ("process" ) and len (args ) > 0 :
@@ -443,7 +372,7 @@ def wrapped(self, *args, **kwargs):
443372 info = {"type" : type_names [token ['type' ]]}
444373 except :
445374 raise
446- if token ['type' ] in constants . tagTokenTypes :
375+ if token ['type' ] in tagTokenTypes :
447376 info ["name" ] = token ['name' ]
448377
449378 self .parser .log .append ((self .parser .tokenizer .state .__name__ ,
@@ -1022,17 +951,9 @@ def __init__(self, parser, tree):
1022951 self .endTagHandler .default = self .endTagOther
1023952
1024953 def isMatchingFormattingElement (self , node1 , node2 ):
1025- if node1 .name != node2 .name or node1 .namespace != node2 .namespace :
1026- return False
1027- elif len (node1 .attributes ) != len (node2 .attributes ):
1028- return False
1029- else :
1030- attributes1 = sorted (node1 .attributes .items ())
1031- attributes2 = sorted (node2 .attributes .items ())
1032- for attr1 , attr2 in zip (attributes1 , attributes2 ):
1033- if attr1 != attr2 :
1034- return False
1035- return True
954+ return (node1 .name == node2 .name and
955+ node1 .namespace == node2 .namespace and
956+ node1 .attributes == node2 .attributes )
1036957
1037958 # helper
1038959 def addFormattingElement (self , token ):
@@ -2798,6 +2719,16 @@ def processEndTag(self, token):
27982719 }
27992720
28002721
2722+ def adjust_attributes (token , replacements ):
2723+ if PY3 or utils .PY27 :
2724+ needs_adjustment = viewkeys (token ['data' ]) & viewkeys (replacements )
2725+ else :
2726+ needs_adjustment = frozenset (token ['data' ]) & frozenset (replacements )
2727+ if needs_adjustment :
2728+ token ['data' ] = OrderedDict ((replacements .get (k , k ), v )
2729+ for k , v in token ['data' ].items ())
2730+
2731+
28012732def impliedTagToken (name , type = "EndTag" , attributes = None ,
28022733 selfClosing = False ):
28032734 if attributes is None :
0 commit comments