@@ -59,18 +59,13 @@ class HTMLParser(object):
5959 """HTML parser. Generates a tree structure from a stream of (possibly
6060 malformed) HTML"""
6161
62- def __init__ (self , tree = None , tokenizer = tokenizer .HTMLTokenizer ,
63- strict = False , namespaceHTMLElements = True , debug = False ):
62+ def __init__ (self , tree = None , strict = False , namespaceHTMLElements = True , debug = False ):
6463 """
6564 strict - raise an exception when a parse error is encountered
6665
6766 tree - a treebuilder class controlling the type of tree that will be
6867 returned. Built in treebuilders can be accessed through
6968 html5lib.treebuilders.getTreeBuilder(treeType)
70-
71- tokenizer - a class that provides a stream of tokens to the treebuilder.
72- This may be replaced for e.g. a sanitizer which converts some tags to
73- text
7469 """
7570
7671 # Raise an exception on the first error encountered
@@ -79,7 +74,6 @@ def __init__(self, tree=None, tokenizer=tokenizer.HTMLTokenizer,
7974 if tree is None :
8075 tree = treebuilders .getTreeBuilder ("etree" )
8176 self .tree = tree (namespaceHTMLElements )
82- self .tokenizer_class = tokenizer
8377 self .errors = []
8478
8579 self .phases = dict ([(name , cls (self , self .tree )) for name , cls in
@@ -91,9 +85,9 @@ def _parse(self, stream, innerHTML=False, container="div", encoding=None,
9185 self .innerHTMLMode = innerHTML
9286 self .container = container
9387 self .scripting = scripting
94- self .tokenizer = self . tokenizer_class (stream , encoding = encoding ,
95- useChardet = useChardet ,
96- parser = self , ** kwargs )
88+ self .tokenizer = tokenizer . HTMLTokenizer (stream , encoding = encoding ,
89+ useChardet = useChardet ,
90+ parser = self , ** kwargs )
9791 self .reset ()
9892
9993 try :
0 commit comments