55import pytest
66
77import extruct
8+ from extruct import SYNTAXES
89from tests import get_testdata , jsonize_dict , replace_node_ref_with_node_id
910
1011
@@ -16,6 +17,21 @@ def test_all(self):
1617 body = get_testdata ('songkick' , 'elysianfields.html' )
1718 expected = json .loads (get_testdata ('songkick' , 'elysianfields.json' ).decode ('UTF-8' ))
1819 data = extruct .extract (body , base_url = 'http://www.songkick.com/artists/236156-elysian-fields' )
20+ # See test_rdfa_not_preserving_order()
21+ del data ['rdfa' ][0 ]['http://ogp.me/ns#image' ]
22+ del expected ['rdfa' ][0 ]['http://ogp.me/ns#image' ]
23+ self .assertEqual (jsonize_dict (data ), expected )
24+
25+ @pytest .mark .xfail
26+ def test_rdfa_not_preserving_order (self ):
27+ # See https://github.com/scrapinghub/extruct/issues/116
28+ # RDFa is not preserving ordering on duplicated properties. So this
29+ # test sometimes fails for property 'http://ogp.me/ns#image'
30+ body = get_testdata ('songkick' , 'elysianfields.html' )
31+ expected = json .loads (get_testdata ('songkick' , 'elysianfields.json' ).decode ('UTF-8' ))
32+ data = extruct .extract (body ,
33+ base_url = 'http://www.songkick.com/artists/236156-elysian-fields' ,
34+ syntaxes = ['rdfa' ])
1935 self .assertEqual (jsonize_dict (data ), expected )
2036
2137 def test_microdata_custom_url (self ):
0 commit comments