@@ -107,13 +107,9 @@ def _extract_both(self, page, start_index, end_index, ignored_regions=None, **kw
107107 self ._extract_attribute (page , start_index , end_index , ignored_regions )
108108
109109 def _extract_content (self , extraction_page , start_index , end_index , ignored_regions = None , ** kwargs ):
110- # extract content between annotation indexes
111- if not ignored_regions :
112- region = extraction_page .htmlpage_region_inside (start_index , end_index )
113- else :
114- # assumes ignored_regions are completely contained within start and end index
115- assert (start_index <= ignored_regions [0 ].start_index and
116- end_index >= ignored_regions [- 1 ].end_index )
110+ """extract content between annotation indexes"""
111+ if ignored_regions and (start_index <= ignored_regions [0 ].start_index and
112+ end_index >= ignored_regions [- 1 ].end_index ):
117113 starts = [start_index ] + [i .end_index for i in ignored_regions if i .end_index is not None ]
118114 ends = [i .start_index for i in ignored_regions ]
119115 if starts [- 1 ] is not None :
@@ -123,6 +119,8 @@ def _extract_content(self, extraction_page, start_index, end_index, ignored_regi
123119 included_regions .next ()
124120 regions = starmap (extraction_page .htmlpage_region_inside , included_regions )
125121 region = FragmentedHtmlPageRegion (extraction_page .htmlpage , list (regions ))
122+ else :
123+ region = extraction_page .htmlpage_region_inside (start_index , end_index )
126124 validated = self .content_validate (region )
127125 return [(self .annotation .surrounds_attribute , validated )] if validated else []
128126
0 commit comments