11"""
22Unit tests for pageparsing
33"""
4- import os
5- from six import StringIO
64from unittest import TestCase
7- import numpy
85
96from scrapely .htmlpage import HtmlPage
107from scrapely .extraction .pageparsing import (
204201</head>
205202"""
206203
204+
207205def _parse_page (parser_class , pagetext ):
208206 htmlpage = HtmlPage (None , {}, pagetext )
209207 parser = parser_class (TokenDict ())
210208 parser .feed (htmlpage )
211209 return parser
212210
211+
213212def _tags (pp , predicate ):
214213 return [pp .token_dict .token_string (s ) for s in pp .token_list \
215214 if predicate (s )]
216215
216+
217217class TestPageParsing (TestCase ):
218218
219219 def test_instance_parsing (self ):
@@ -227,11 +227,11 @@ def test_instance_parsing(self):
227227 closep = lambda x : pp .token_dict .token_type (x ) == TokenType .CLOSE_TAG
228228 self .assertEqual (_tags (pp , closep ), ['</p>' , '</html>' ])
229229
230- def _validate_annotation (self , parser , lable_region , name , start_tag , end_tag ):
231- self .assertEqual (lable_region .surrounds_attribute , name )
232- start_token = parser .token_list [lable_region .start_index ]
230+ def _validate_annotation (self , parser , label_region , name , start_tag , end_tag ):
231+ self .assertEqual (label_region .surrounds_attribute , name )
232+ start_token = parser .token_list [label_region .start_index ]
233233 self .assertEqual (parser .token_dict .token_string (start_token ), start_tag )
234- end_token = parser .token_list [lable_region .end_index ]
234+ end_token = parser .token_list [label_region .end_index ]
235235 self .assertEqual (parser .token_dict .token_string (end_token ), end_tag )
236236
237237 def test_template_parsing (self ):
0 commit comments