Merge pull request #157 from asmeurer/xfails-file

asmeurer · web-flow · commit d2d43a7261dd · 2023-04-18T13:57:40.000-06:00
Add support for an xfails file, and allow the files to be specified with a flag
diff --git a/.github/workflows/numpy.yml b/.github/workflows/numpy.yml
@@ -28,51 +28,4 @@ jobs:
       env:
         ARRAY_API_TESTS_MODULE: numpy.array_api
       run: |
-        # Skip testing functions with known issues
-        cat << EOF >> skips.txt
-
-        # copy not implemented
-        array_api_tests/test_creation_functions.py::test_asarray_arrays
-        # https://github.com/numpy/numpy/issues/20870
-        array_api_tests/test_data_type_functions.py::test_can_cast
-        # The return dtype for trace is not consistent in the spec
-        # https://github.com/data-apis/array-api/issues/202#issuecomment-952529197
-        array_api_tests/test_linalg.py::test_trace
-        # waiting on NumPy to allow/revert distinct NaNs for np.unique
-        # https://github.com/numpy/numpy/issues/20326#issuecomment-1012380448
-        array_api_tests/test_set_functions.py
-
-        # https://github.com/numpy/numpy/issues/21373
-        array_api_tests/test_array_object.py::test_getitem
-
-        # missing copy arg
-        array_api_tests/test_signatures.py::test_func_signature[reshape]
-
-        # https://github.com/numpy/numpy/issues/21211
-        array_api_tests/test_special_cases.py::test_iop[__iadd__(x1_i is -0 and x2_i is -0) -> -0]
-        # https://github.com/numpy/numpy/issues/21213
-        array_api_tests/test_special_cases.py::test_iop[__ipow__(x1_i is -infinity and x2_i > 0 and not (x2_i.is_integer() and x2_i % 2 == 1)) -> +infinity]
-        array_api_tests/test_special_cases.py::test_iop[__ipow__(x1_i is -0 and x2_i > 0 and not (x2_i.is_integer() and x2_i % 2 == 1)) -> +0]
-        # noted diversions from spec
-        array_api_tests/test_special_cases.py::test_binary[floor_divide(x1_i is +infinity and isfinite(x2_i) and x2_i > 0) -> +infinity]
-        array_api_tests/test_special_cases.py::test_binary[floor_divide(x1_i is +infinity and isfinite(x2_i) and x2_i < 0) -> -infinity]
-        array_api_tests/test_special_cases.py::test_binary[floor_divide(x1_i is -infinity and isfinite(x2_i) and x2_i > 0) -> -infinity]
-        array_api_tests/test_special_cases.py::test_binary[floor_divide(x1_i is -infinity and isfinite(x2_i) and x2_i < 0) -> +infinity]
-        array_api_tests/test_special_cases.py::test_binary[floor_divide(isfinite(x1_i) and x1_i > 0 and x2_i is -infinity) -> -0]
-        array_api_tests/test_special_cases.py::test_binary[floor_divide(isfinite(x1_i) and x1_i < 0 and x2_i is +infinity) -> -0]
-        array_api_tests/test_special_cases.py::test_binary[__floordiv__(x1_i is +infinity and isfinite(x2_i) and x2_i > 0) -> +infinity]
-        array_api_tests/test_special_cases.py::test_binary[__floordiv__(x1_i is +infinity and isfinite(x2_i) and x2_i < 0) -> -infinity]
-        array_api_tests/test_special_cases.py::test_binary[__floordiv__(x1_i is -infinity and isfinite(x2_i) and x2_i > 0) -> -infinity]
-        array_api_tests/test_special_cases.py::test_binary[__floordiv__(x1_i is -infinity and isfinite(x2_i) and x2_i < 0) -> +infinity]
-        array_api_tests/test_special_cases.py::test_binary[__floordiv__(isfinite(x1_i) and x1_i > 0 and x2_i is -infinity) -> -0]
-        array_api_tests/test_special_cases.py::test_binary[__floordiv__(isfinite(x1_i) and x1_i < 0 and x2_i is +infinity) -> -0]
-        array_api_tests/test_special_cases.py::test_iop[__ifloordiv__(x1_i is +infinity and isfinite(x2_i) and x2_i > 0) -> +infinity]
-        array_api_tests/test_special_cases.py::test_iop[__ifloordiv__(x1_i is +infinity and isfinite(x2_i) and x2_i < 0) -> -infinity]
-        array_api_tests/test_special_cases.py::test_iop[__ifloordiv__(x1_i is -infinity and isfinite(x2_i) and x2_i > 0) -> -infinity]
-        array_api_tests/test_special_cases.py::test_iop[__ifloordiv__(x1_i is -infinity and isfinite(x2_i) and x2_i < 0) -> +infinity]
-        array_api_tests/test_special_cases.py::test_iop[__ifloordiv__(isfinite(x1_i) and x1_i > 0 and x2_i is -infinity) -> -0]
-        array_api_tests/test_special_cases.py::test_iop[__ifloordiv__(isfinite(x1_i) and x1_i < 0 and x2_i is +infinity) -> -0]
-
-        EOF
-
-        pytest -v -rxXfE --ci
+        pytest -v -rxXfE --ci --skips-file numpy-skips.txt
diff --git a/README.md b/README.md
@@ -185,13 +185,22 @@ By default, tests for the optional Array API extensions such as
 will be skipped if not present in the specified array module. You can purposely
 skip testing extension(s) via the `--disable-extension` option.
 
-#### Skip test cases
+#### Skip or XFAIL test cases
 
-Test cases you want to skip can be specified in a `skips.txt` file in the root
-of this repository, e.g.:
+Test cases you want to skip can be specified in a skips or XFAILS file. The
+difference between skip and XFAIL is that XFAIL tests are still run and
+reported as XPASS if they pass.
+
+By default, the skips and xfails files are `skips.txt` and `fails.txt` in the root
+of this repository, but any file can be specified with the `--skips-file` and
+`--xfails-file` command line flags.
+
+The files should list the test ids to be skipped/xfailed. Empty lines and
+lines starting with `#` are ignored. The test id can be any substring of the
+test ids to skip/xfail.
 
 ```
-# ./skips.txt
+# skips.txt or xfails.txt
 # Line comments can be denoted with the hash symbol (#)
 
 # Skip specific test case, e.g. when argsort() does not respect relative order
@@ -207,39 +216,81 @@ array_api_tests/test_add[__iadd__(x, s)]
 array_api_tests/test_set_functions.py
 ```
 
-For GitHub Actions, you might like to keep everything in the workflow config
-instead of having a seperate `skips.txt` file, e.g.:
+Here is an example GitHub Actions workflow file, where the xfails are stored
+in `array-api-tests.xfails.txt` in the base of the `your-array-library` repo.
+
+If you want, you can use `-o xfail_strict=True`, which causes XPASS tests (XFAIL
+tests that actually pass) to fail the test suite. However, be aware that
+XFAILures can be flaky (see below, so this may not be a good idea unless you
+use some other mitigation of such flakyness).
+
+If you don't want this behavior, you can remove it, or use `--skips-file`
+instead of `--xfails-file`.
 
 ```yaml
 # ./.github/workflows/array_api.yml
-...
-    ...
-    - name: Run the test suite
+jobs:
+  tests:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ['3.8', '3.9', '3.10', '3.11']
+
+    steps:
+    - name: Checkout <your array library>
+      uses: actions/checkout@v3
+      with:
+        path: your-array-library
+
+    - name: Checkout array-api-tests
+      uses: actions/checkout@v3
+      with:
+        repository: data-apis/array-api-tests
+        submodules: 'true'
+        path: array-api-tests
+
+    - name: Run the array API test suite
       env:
         ARRAY_API_TESTS_MODULE: your.array.api.namespace
       run: |
-        # Skip test cases with known issues
-        cat << EOF >> skips.txt
-
-        # Comments can still work here
-        array_api_tests/test_sorting_functions.py::test_argsort
-        array_api_tests/test_add[__iadd__(x1, x2)]
-        array_api_tests/test_add[__iadd__(x, s)]
-        array_api_tests/test_set_functions.py
-
-        EOF
-
-        pytest -v -rxXfE --ci
+        export PYTHONPATH="${GITHUB_WORKSPACE}/your-array-library"
+        cd ${GITHUB_WORKSPACE}/array-api-tests
+        pytest -v -rxXfE --ci --xfails-file ${GITHUB_WORKSPACE}/your-array-library/array-api-tests-xfails.txt array_api_tests/
 ```
 
+> **Warning**
+>
+> XFAIL tests that use Hypothesis (basically every test in the test suite except
+> those in test_has_names.py) can be flaky, due to the fact that Hypothesis
+> might not always run the test with an input that causes the test to fail.
+> There are several ways to avoid this problem:
+>
+> - Increase the maximum number of examples, e.g., by adding `--max-examples
+>   200` to the test command (the default is `100`, see below). This will
+>   make it more likely that the failing case will be found, but it will also
+>   make the tests take longer to run.
+> - Don't use `-o xfail_strict=True`. This will make it so that if an XFAIL
+>   test passes, it will alert you in the test summary but will not cause the
+>   test run to register as failed.
+> - Use skips instead of XFAILS. The difference between XFAIL and skip is that
+>   a skipped test is never run at all, whereas an XFAIL test is always run
+>   but ignored if it fails.
+> - Save the [Hypothesis examples
+>   database](https://hypothesis.readthedocs.io/en/latest/database.html)
+>   persistently on CI. That way as soon as a run finds one failing example,
+>   it will always re-run future runs with that example. But note that the
+>   Hypothesis examples database may be cleared when a new version of
+>   Hypothesis or the test suite is released.
+
 #### Max examples
 
 The tests make heavy use
 [Hypothesis](https://hypothesis.readthedocs.io/en/latest/). You can configure
-how many examples are generated using the `--max-examples` flag, which defaults
-to 100. Lower values can be useful for quick checks, and larger values should
-result in more rigorous runs. For example, `--max-examples 10_000` may find bugs
-where default runs don't but will take much longer to run.
+how many examples are generated using the `--max-examples` flag, which
+defaults to `100`. Lower values can be useful for quick checks, and larger
+values should result in more rigorous runs. For example, `--max-examples
+10_000` may find bugs where default runs don't but will take much longer to
+run.
 
 
 ## Contributing
diff --git a/array_api_tests/test_has_names.py b/array_api_tests/test_has_names.py
@@ -9,6 +9,8 @@
 from .stubs import (array_attributes, array_methods, category_to_funcs,
                     extension_to_funcs, EXTENSIONS)
 
+pytestmark = pytest.mark.ci
+
 has_name_params = []
 for ext, stubs in extension_to_funcs.items():
     for stub in stubs:
diff --git a/conftest.py b/conftest.py
@@ -1,5 +1,7 @@
 from functools import lru_cache
 from pathlib import Path
+import warnings
+import os
 
 from hypothesis import settings
 from pytest import mark
@@ -48,7 +50,17 @@ def pytest_addoption(parser):
     parser.addoption(
         "--ci",
         action="store_true",
-        help="run just the tests appropiate for CI",
+        help="run just the tests appropriate for CI",
+    )
+    parser.addoption(
+        "--skips-file",
+        action="store",
+        help="file with tests to skip. Defaults to skips.txt"
+    )
+    parser.addoption(
+        "--xfails-file",
+        action="store",
+        help="file with tests to skip. Defaults to xfails.txt"
     )
 
 
@@ -87,26 +99,56 @@ def xp_has_ext(ext: str) -> bool:
         return False
 
 
-skip_ids = []
-skips_path = Path(__file__).parent / "skips.txt"
-if skips_path.exists():
-    with open(skips_path) as f:
-        for line in f:
-            if line.startswith("array_api_tests"):
+def pytest_collection_modifyitems(config, items):
+    skips_file = skips_path = config.getoption('--skips-file')
+    if skips_file is None:
+        skips_file = Path(__file__).parent / "skips.txt"
+        if skips_file.exists():
+            skips_path = skips_file
+
+    skip_ids = []
+    if skips_path:
+        with open(os.path.expanduser(skips_path)) as f:
+            for line in f:
+                if line.startswith("array_api_tests"):
+                    id_ = line.strip("\n")
+                    skip_ids.append(id_)
+
+    xfails_file = xfails_path = config.getoption('--xfails-file')
+    if xfails_file is None:
+        xfails_file = Path(__file__).parent / "xfails.txt"
+        if xfails_file.exists():
+            xfails_path = xfails_file
+
+    xfail_ids = []
+    if xfails_path:
+        with open(os.path.expanduser(xfails_path)) as f:
+            for line in f:
+                if not line.strip() or line.startswith('#'):
+                    continue
                 id_ = line.strip("\n")
-                skip_ids.append(id_)
+                xfail_ids.append(id_)
 
+    skip_id_matched = {id_: False for id_ in skip_ids}
+    xfail_id_matched = {id_: False for id_ in xfail_ids}
 
-def pytest_collection_modifyitems(config, items):
     disabled_exts = config.getoption("--disable-extension")
     disabled_dds = config.getoption("--disable-data-dependent-shapes")
     ci = config.getoption("--ci")
+
     for item in items:
         markers = list(item.iter_markers())
-        # skip if specified in skips.txt
+        # skip if specified in skips file
         for id_ in skip_ids:
-            if item.nodeid.startswith(id_):
-                item.add_marker(mark.skip(reason="skips.txt"))
+            if id_ in item.nodeid:
+                item.add_marker(mark.skip(reason=f"--skips-file ({skips_file})"))
+                skip_id_matched[id_] = True
+                break
+        # xfail if specified in xfails file
+        for id_ in xfail_ids:
+            if id_ in item.nodeid:
+                item.add_marker(mark.xfail(reason=f"--xfails-file ({xfails_file})"))
+                xfail_id_matched[id_] = True
                 break
         # skip if disabled or non-existent extension
         ext_mark = next((m for m in markers if m.name == "xp_extension"), None)
@@ -141,3 +183,26 @@ def pytest_collection_modifyitems(config, items):
                         reason=f"requires ARRAY_API_TESTS_VERSION=>{min_version}"
                     )
                 )
+
+    bad_ids_end_msg = (
+        "Note the relevant tests might not of been collected by pytest, or "
+        "another specified id might have already matched a test."
+    )
+    bad_skip_ids = [id_ for id_, matched in skip_id_matched.items() if not matched]
+    if bad_skip_ids:
+        f_bad_ids = "\n".join(f"    {id_}" for id_ in bad_skip_ids)
+        warnings.warn(
+            f"{len(bad_skip_ids)} ids in skips file don't match any collected tests: \n"
+            f"{f_bad_ids}\n"
+            f"(skips file: {skips_file})\n"
+            f"{bad_ids_end_msg}"
+        )
+    bad_xfail_ids = [id_ for id_, matched in xfail_id_matched.items() if not matched]
+    if bad_xfail_ids:
+        f_bad_ids = "\n".join(f"    {id_}" for id_ in bad_xfail_ids)
+        warnings.warn(
+            f"{len(bad_xfail_ids)} ids in xfails file don't match any collected tests: \n"
+            f"{f_bad_ids}\n"
+            f"(xfails file: {xfails_file})\n"
+            f"{bad_ids_end_msg}"
+        )
diff --git a/numpy-skips.txt b/numpy-skips.txt
@@ -0,0 +1,41 @@
+# copy not implemented
+array_api_tests/test_creation_functions.py::test_asarray_arrays
+# https://github.com/numpy/numpy/issues/20870
+array_api_tests/test_data_type_functions.py::test_can_cast
+# The return dtype for trace is not consistent in the spec
+# https://github.com/data-apis/array-api/issues/202#issuecomment-952529197
+array_api_tests/test_linalg.py::test_trace
+# waiting on NumPy to allow/revert distinct NaNs for np.unique
+# https://github.com/numpy/numpy/issues/20326#issuecomment-1012380448
+array_api_tests/test_set_functions.py
+
+# https://github.com/numpy/numpy/issues/21373
+array_api_tests/test_array_object.py::test_getitem
+
+# missing copy arg
+array_api_tests/test_signatures.py::test_func_signature[reshape]
+
+# https://github.com/numpy/numpy/issues/21211
+array_api_tests/test_special_cases.py::test_iop[__iadd__(x1_i is -0 and x2_i is -0) -> -0]
+# https://github.com/numpy/numpy/issues/21213
+array_api_tests/test_special_cases.py::test_iop[__ipow__(x1_i is -infinity and x2_i > 0 and not (x2_i.is_integer() and x2_i % 2 == 1)) -> +infinity]
+array_api_tests/test_special_cases.py::test_iop[__ipow__(x1_i is -0 and x2_i > 0 and not (x2_i.is_integer() and x2_i % 2 == 1)) -> +0]
+# noted diversions from spec
+array_api_tests/test_special_cases.py::test_binary[floor_divide(x1_i is +infinity and isfinite(x2_i) and x2_i > 0) -> +infinity]
+array_api_tests/test_special_cases.py::test_binary[floor_divide(x1_i is +infinity and isfinite(x2_i) and x2_i < 0) -> -infinity]
+array_api_tests/test_special_cases.py::test_binary[floor_divide(x1_i is -infinity and isfinite(x2_i) and x2_i > 0) -> -infinity]
+array_api_tests/test_special_cases.py::test_binary[floor_divide(x1_i is -infinity and isfinite(x2_i) and x2_i < 0) -> +infinity]
+array_api_tests/test_special_cases.py::test_binary[floor_divide(isfinite(x1_i) and x1_i > 0 and x2_i is -infinity) -> -0]
+array_api_tests/test_special_cases.py::test_binary[floor_divide(isfinite(x1_i) and x1_i < 0 and x2_i is +infinity) -> -0]
+array_api_tests/test_special_cases.py::test_binary[__floordiv__(x1_i is +infinity and isfinite(x2_i) and x2_i > 0) -> +infinity]
+array_api_tests/test_special_cases.py::test_binary[__floordiv__(x1_i is +infinity and isfinite(x2_i) and x2_i < 0) -> -infinity]
+array_api_tests/test_special_cases.py::test_binary[__floordiv__(x1_i is -infinity and isfinite(x2_i) and x2_i > 0) -> -infinity]
+array_api_tests/test_special_cases.py::test_binary[__floordiv__(x1_i is -infinity and isfinite(x2_i) and x2_i < 0) -> +infinity]
+array_api_tests/test_special_cases.py::test_binary[__floordiv__(isfinite(x1_i) and x1_i > 0 and x2_i is -infinity) -> -0]
+array_api_tests/test_special_cases.py::test_binary[__floordiv__(isfinite(x1_i) and x1_i < 0 and x2_i is +infinity) -> -0]
+array_api_tests/test_special_cases.py::test_iop[__ifloordiv__(x1_i is +infinity and isfinite(x2_i) and x2_i > 0) -> +infinity]
+array_api_tests/test_special_cases.py::test_iop[__ifloordiv__(x1_i is +infinity and isfinite(x2_i) and x2_i < 0) -> -infinity]
+array_api_tests/test_special_cases.py::test_iop[__ifloordiv__(x1_i is -infinity and isfinite(x2_i) and x2_i > 0) -> -infinity]
+array_api_tests/test_special_cases.py::test_iop[__ifloordiv__(x1_i is -infinity and isfinite(x2_i) and x2_i < 0) -> +infinity]
+array_api_tests/test_special_cases.py::test_iop[__ifloordiv__(isfinite(x1_i) and x1_i > 0 and x2_i is -infinity) -> -0]
+array_api_tests/test_special_cases.py::test_iop[__ifloordiv__(isfinite(x1_i) and x1_i < 0 and x2_i is +infinity) -> -0]