switch to nosetests as preferred way to run tests. also updated README clarifying about using string for training data (closes #16)

pablohoffman · pablohoffman · commit 65bd8783c79f · 2012-02-19T02:05:36.000-02:00
diff --git a/README.rst b/README.rst
@@ -35,16 +35,16 @@ Scrapely has a powerful API, including a template format that can be edited
 externally, that you can use to build very capable scrapers.
 
 What follows is a quick example of the simplest possible usage, that you can
-run in the Python shell. This example is also available in the ``example.py``
-script, located at the root of the repository.
+run in a Python shell.
 
 Start by importing and instantiating the Scraper class::
 
     >>> from scrapely import Scraper
     >>> s = Scraper()
 
 Then, proceed to train the scraper by adding some page and the data you expect
-to scrape from there::
+to scrape from there (note that all keys and values in the data you pass must
+be strings)::
 
     >>> url1 = 'http://pypi.python.org/pypi/w3lib'
     >>> data = {'name': 'w3lib 1.0', 'author': 'Scrapy project', 'description': 'Library of web-related functions'}
@@ -156,6 +156,12 @@ And then install scrapely with::
 
     aptitude install python-scrapely
 
+Tests
+=====
+
+`nose`_ is the preferred way to run tests. Just run: ``nosetests`` from the
+root directory.
+
 Architecture
 ============
 
@@ -183,7 +189,8 @@ the other hand, the extraction code is reliable and production-ready. So, if
 you want to use Scrapely in production, you should use train() with caution and
 make sure it annotates the area of the page you intent being annotated.
 
-Alternatively, you can use the Scrapely tool to annotate pages.
+Alternatively, you can use the Scrapely command line tool to annotate pages,
+which provides more manual control for higher accuracy.
 
 License
 =======
@@ -197,3 +204,4 @@ Scrapely library is licensed under the BSD license.
 .. _same Github account: https://github.com/scrapy
 .. _slybot: https://github.com/scrapy/slybot
 .. _selectors: http://doc.scrapy.org/en/latest/topics/selectors.html
+.. _nose: http://readthedocs.org/docs/nose/en/latest/
diff --git a/scrapely/tests/__init__.py b/scrapely/tests/__init__.py
@@ -1,8 +1,6 @@
 import sys
 from os import path
 from itertools import count
-from unittest import TestSuite, TestLoader, main
-from doctest import DocTestSuite
 from scrapely import json
 
 _PATH  = path.abspath(path.dirname(__file__))
@@ -25,28 +23,3 @@ def iter_samples(prefix, html_encoding='utf-8', **json_kwargs):
         html_str = open(html_page, 'rb').read()
         sample_data = json.load(open(fname + '.json'), **json_load_kwargs)
         yield html_str.decode(html_encoding), sample_data
-
-UNIT_TESTS = [
-    'scrapely.tests.test_extraction',
-    'scrapely.tests.test_htmlpage',
-    'scrapely.tests.test_htmlpage_data',
-    'scrapely.tests.test_pageparsing',
-    'scrapely.tests.test_template',
-    'scrapely.tests.test_scraper',
-]
-
-DOC_TESTS = [
-    'scrapely.extractors',
-    'scrapely.extraction.regionextract',
-    'scrapely.extraction.similarity',
-    'scrapely.extraction.pageobjects',
-]
-
-def suite():
-    suite = TestSuite()
-    for m in UNIT_TESTS:
-        suite.addTests(TestLoader().loadTestsFromName(m))
-    for m in DOC_TESTS:
-        suite.addTest(DocTestSuite(__import__(m, {}, {}, [''])))
-    return suite
-
diff --git a/setup.py b/setup.py
@@ -25,7 +25,6 @@
 
 try:
     from setuptools import setup
-    args['test_suite'] = 'scrapely.tests.suite'
     args['install_requires'] = ['numpy', 'w3lib']
     if sys.version_info < (2, 6):
         args['install_requires'] += ['simplejson']