99from optparse import OptionParser
1010
1111from html5lib import html5parser
12- from html5lib .tokenizer import HTMLTokenizer
1312from html5lib import treebuilders , serializer , treewalkers
1413from html5lib import constants
1514from html5lib import utils
@@ -53,9 +52,7 @@ def parse():
5352
5453 treebuilder = treebuilders .getTreeBuilder (opts .treebuilder )
5554
56- tokenizer = HTMLTokenizer
57-
58- p = html5parser .HTMLParser (tree = treebuilder , tokenizer = tokenizer , debug = opts .log )
55+ p = html5parser .HTMLParser (tree = treebuilder , debug = opts .log )
5956
6057 if opts .fragment :
6158 parseMethod = p .parseFragment
@@ -96,7 +93,7 @@ def parse():
9693
9794def run (parseMethod , f , encoding , scripting ):
9895 try :
99- document = parseMethod (f , encoding = encoding , scripting = scripting )
96+ document = parseMethod (f , override_encoding = encoding , scripting = scripting )
10097 except :
10198 document = None
10299 traceback .print_exc ()
@@ -117,16 +114,14 @@ def printOutput(parser, document, opts):
117114 document .writexml (sys .stdout , encoding = "utf-8" )
118115 elif tb == "lxml" :
119116 import lxml .etree
120- sys .stdout .write (lxml .etree .tostring (document ))
117+ sys .stdout .write (lxml .etree .tostring (document , encoding = "unicode" ))
121118 elif tb == "etree" :
122- sys .stdout .write (utils .default_etree .tostring (document ))
119+ sys .stdout .write (utils .default_etree .tostring (document , encoding = "unicode" ))
123120 elif opts .tree :
124121 if not hasattr (document , '__getitem__' ):
125122 document = [document ]
126123 for fragment in document :
127124 print (parser .tree .testSerializer (fragment ))
128- elif opts .hilite :
129- sys .stdout .write (document .hilite ("utf-8" ))
130125 elif opts .html :
131126 kwargs = {}
132127 for opt in serializer .HTMLSerializer .options :
@@ -188,9 +183,6 @@ def getOptParser():
188183 parser .add_option ("" , "--no-html" , action = "store_false" , default = True ,
189184 dest = "html" , help = "Don't output html" )
190185
191- parser .add_option ("" , "--hilite" , action = "store_true" , default = False ,
192- dest = "hilite" , help = "Output as formatted highlighted code." )
193-
194186 parser .add_option ("-c" , "--encoding" , action = "store_true" , default = False ,
195187 dest = "encoding" , help = "Print character encoding used" )
196188
0 commit comments