55"""
66
77import sys
8- import os
98import traceback
109from optparse import OptionParser
1110
1514from html5lib import constants
1615from html5lib import utils
1716
17+
1818def parse ():
1919 optParser = getOptParser ()
20- opts ,args = optParser .parse_args ()
20+ opts , args = optParser .parse_args ()
2121 encoding = "utf8"
2222
2323 try :
2424 f = args [- 1 ]
2525 # Try opening from the internet
2626 if f .startswith ('http://' ):
2727 try :
28- import urllib .request , urllib .parse , urllib .error , cgi
28+ import urllib .request
29+ import urllib .parse
30+ import urllib .error
31+ import cgi
2932 f = urllib .request .urlopen (f )
3033 contentType = f .headers .get ('content-type' )
3134 if contentType :
@@ -41,7 +44,7 @@ def parse():
4144 try :
4245 # Try opening from file system
4346 f = open (f , "rb" )
44- except IOError as e :
47+ except IOError as e :
4548 sys .stderr .write ("Unable to open file: %s\n " % e )
4649 sys .exit (1 )
4750 except IndexError :
@@ -82,14 +85,15 @@ def parse():
8285 if document :
8386 printOutput (p , document , opts )
8487 t2 = time .time ()
85- sys .stderr .write ("\n \n Run took: %fs (plus %fs to print the output)" % (t1 - t0 , t2 - t1 ))
88+ sys .stderr .write ("\n \n Run took: %fs (plus %fs to print the output)" % (t1 - t0 , t2 - t1 ))
8689 else :
87- sys .stderr .write ("\n \n Run took: %fs" % (t1 - t0 ))
90+ sys .stderr .write ("\n \n Run took: %fs" % (t1 - t0 ))
8891 else :
8992 document = run (parseMethod , f , encoding , opts .scripting )
9093 if document :
9194 printOutput (p , document , opts )
9295
96+
9397def run (parseMethod , f , encoding , scripting ):
9498 try :
9599 document = parseMethod (f , encoding = encoding , scripting = scripting )
@@ -98,6 +102,7 @@ def run(parseMethod, f, encoding, scripting):
98102 traceback .print_exc ()
99103 return document
100104
105+
101106def printOutput (parser , document , opts ):
102107 if opts .encoding :
103108 print ("Encoding:" , parser .tokenizer .stream .charEncoding )
@@ -116,7 +121,7 @@ def printOutput(parser, document, opts):
116121 elif tb == "etree" :
117122 sys .stdout .write (utils .default_etree .tostring (document ))
118123 elif opts .tree :
119- if not hasattr (document ,'__getitem__' ):
124+ if not hasattr (document , '__getitem__' ):
120125 document = [document ]
121126 for fragment in document :
122127 print (parser .tree .testSerializer (fragment ))
@@ -126,7 +131,7 @@ def printOutput(parser, document, opts):
126131 kwargs = {}
127132 for opt in serializer .HTMLSerializer .options :
128133 try :
129- kwargs [opt ] = getattr (opts ,opt )
134+ kwargs [opt ] = getattr (opts , opt )
130135 except :
131136 pass
132137 if not kwargs ['quote_char' ]:
@@ -142,12 +147,14 @@ def printOutput(parser, document, opts):
142147 encoding = "utf-8"
143148 for text in serializer .HTMLSerializer (** kwargs ).serialize (tokens , encoding = encoding ):
144149 sys .stdout .write (text )
145- if not text .endswith ('\n ' ): sys .stdout .write ('\n ' )
150+ if not text .endswith ('\n ' ):
151+ sys .stdout .write ('\n ' )
146152 if opts .error :
147- errList = []
153+ errList = []
148154 for pos , errorcode , datavars in parser .errors :
149- errList .append ("Line %i Col %i" % pos + " " + constants .E .get (errorcode , 'Unknown error "%s"' % errorcode ) % datavars )
150- sys .stdout .write ("\n Parse errors:\n " + "\n " .join (errList )+ "\n " )
155+ errList .append ("Line %i Col %i" % pos + " " + constants .E .get (errorcode , 'Unknown error "%s"' % errorcode ) % datavars )
156+ sys .stdout .write ("\n Parse errors:\n " + "\n " .join (errList ) + "\n " )
157+
151158
152159def getOptParser ():
153160 parser = OptionParser (usage = __doc__ )
0 commit comments