@@ -17,8 +17,6 @@ extruct
1717
1818*extruct * is a library for extracting embedded metadata from HTML markup.
1919
20- It also has a built-in HTTP server to test its output as JSON.
21-
2220Currently, *extruct * supports:
2321
2422- `W3C's HTML Microdata `_
@@ -586,104 +584,6 @@ Microformat extraction
586584 }
587585 }]
588586
589- REST API service
590- ----------------
591-
592- *extruct * also ships with a REST API service to test its output from URLs.
593-
594- Dependencies
595- ++++++++++++
596-
597- * bottle _ (Web framework)
598- * gevent _ (Aysnc framework)
599- * requests _
600-
601- .. _bottle : https://pypi.python.org/pypi/bottle
602- .. _gevent : http://www.gevent.org/
603- .. _requests : http://docs.python-requests.org/
604-
605- Usage
606- +++++
607-
608- ::
609-
610- python -m extruct.service
611-
612- launches an HTTP server listening on port 10005.
613-
614- Methods supported
615- +++++++++++++++++
616-
617- ::
618-
619- /extruct/<URL>
620- method = GET
621-
622-
623- /extruct/batch
624- method = POST
625- params:
626- urls - a list of URLs separted by newlines
627- urlsfile - a file with one URL per line
628-
629- E.g. http://localhost:10005/extruct/http://www.sarenza.com/i-love-shoes-susket-s767163-p0000119412
630-
631- will output something like this:
632-
633- >>>
634- { 'json-ld': [ { '@context': 'http://schema.org',
635- '@id': 'FP',
636- '@type': 'Product',
637- 'brand': { '@type': 'Brand',
638- 'url': 'https://www.sarenza.com/i-love-shoes'},
639- 'color': ['Lava', 'Black', 'Lt grey'],
640- 'image': [ 'https://cdn.sarenza.net/_img/productsv4/0000119412/MD_0000119412_223992_09.jpg?201509221045&v=20180313113923',
641- 'https://cdn.sarenza.net/_img/productsv4/0000119412/MD_0000119412_223992_02.jpg?201509291747&v=20180313113923',
642- 'https://cdn.sarenza.net/_img/productsv4/0000119412/MD_0000119412_223992_03.jpg?201509221045&v=20180313113923',
643- 'https://cdn.sarenza.net/_img/productsv4/0000119412/MD_0000119412_223992_04.jpg?201509221045&v=20180313113923',
644- 'https://cdn.sarenza.net/_img/productsv4/0000119412/MD_0000119412_223992_05.jpg?201509221045&v=20180313113923',
645- 'https://cdn.sarenza.net/_img/productsv4/0000119412/MD_0000119412_223992_06.jpg?201509221045&v=20180313113923',
646- 'https://cdn.sarenza.net/_img/productsv4/0000119412/MD_0000119412_223992_07.jpg?201509221045&v=20180313113923',
647- 'https://cdn.sarenza.net/_img/productsv4/0000119412/MD_0000119412_223992_08.jpg?201509221045&v=20180313113923'],
648- 'name': 'Susket',
649- 'offers': { '@type': 'AggregateOffer',
650- 'availability': 'InStock',
651- 'highPrice': '49.00',
652- 'lowPrice': '0.00',
653- 'price': '0.00',
654- 'priceCurrency': 'EUR'}}],
655- 'microdata': [ { 'properties': { 'average': '4.7',
656- 'best': '5',
657- 'itemreviewed': 'Sarenza',
658- 'rating': '4.7 / 5\n\t\t (4 066 avis)',
659- 'votes': '4 066'},
660- 'type': 'http://data-vocabulary.org/Review-aggregate'}],
661- 'microformat': [],
662- 'opengraph': [ { 'namespace': {'og': 'http://ogp.me/ns#'},
663- 'properties': [ ( 'og:title',
664- 'I Love Shoes Susket @sarenza.com'),
665- ( 'og:image',
666- 'https://cdn.sarenza.net/_img/productsv4/0000119412/MD_0000119412_223992_09.jpg?201509221045&v=20180313113923'),
667- ('og:site_name', 'sarenza.com'),
668- ('og:type', 'product'),
669- ('og:description', '...'),
670- ( 'og:url',
671- 'https://www.sarenza.com/i-love-shoes-susket-s767163-p0000119412'),
672- ('og:country-name', 'FRA')]}],
673- 'rdfa': [ { '@id': 'https://www.sarenza.com/i-love-shoes-susket-s767163-p0000119412',
674- 'http://ogp.me/ns#country-name': [{'@value': 'FRA'}],
675- 'http://ogp.me/ns#description': [{'@value': '...'}],
676- 'http://ogp.me/ns#image': [ { '@value': 'https://cdn.sarenza.net/_img/productsv4/0000119412/MD_0000119412_223992_09.jpg?201509221045&v=20180313113923'}],
677- 'http://ogp.me/ns#site_name': [{'@value': 'sarenza.com'}],
678- 'http://ogp.me/ns#title': [ { '@value': 'I Love Shoes Susket '
679- '@sarenza.com'}],
680- 'http://ogp.me/ns#type': [{'@value': 'product'}],
681- 'http://ogp.me/ns#url': [ { '@value': 'https://www.sarenza.com/i-love-shoes-susket-s767163-p0000119412'}],
682- 'http://ogp.me/ns/fb#admins': [{'@value': '100001934697625'}],
683- 'http://ogp.me/ns/fb#app_id': [{'@value': '148128758532914'}]},
684- { '@id': '_:Ncf1962068aa142b29000813372db7841',
685- 'http://www.w3.org/1999/xhtml/vocab#role': [ { '@id': 'http://www.w3.org/1999/xhtml/vocab#navigation'}]}]}
686-
687587
688588Command Line Tool
689589-----------------
0 commit comments