Use pytest-benchmark

brandonwillard · ricardoV94 · commit 0a242f29afd9 · 2022-12-16T22:53:36.000+01:00
Co-authored-by: Brandon T. Willard &lt;brandonwillard@users.noreply.github.com&gt;
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -115,7 +115,7 @@ jobs:
       - name: Install dependencies
         shell: bash -l {0}
         run: |
-          mamba install --yes -q "python~=${PYTHON_VERSION}=*_cpython" mkl numpy scipy pip mkl-service graphviz cython pytest coverage pytest-cov sympy
+          mamba install --yes -q "python~=${PYTHON_VERSION}=*_cpython" mkl numpy scipy pip mkl-service graphviz cython pytest coverage pytest-cov pytest-benchmark sympy
           if [[ $INSTALL_NUMBA == "1" ]]; then mamba install --yes -q -c conda-forge "python~=${PYTHON_VERSION}=*_cpython" "numba>=0.55" numba-scipy; fi
           mamba install --yes -q -c conda-forge "python~=${PYTHON_VERSION}=*_cpython" jax jaxlib
           pip install -e ./
@@ -132,7 +132,7 @@ jobs:
           if [[ $FAST_COMPILE == "1" ]]; then export PYTENSOR_FLAGS=$PYTENSOR_FLAGS,mode=FAST_COMPILE; fi
           if [[ $FLOAT32 == "1" ]]; then export PYTENSOR_FLAGS=$PYTENSOR_FLAGS,floatX=float32; fi
           export PYTENSOR_FLAGS=$PYTENSOR_FLAGS,warn__ignore_bug_before=all,on_opt_error=raise,on_shape_error=raise,gcc__cxxflags=-pipe
-          python -m pytest -x -r A --verbose --runslow --cov=pytensor/ --cov-report=xml:coverage/coverage-${MATRIX_ID}.xml --no-cov-on-fail $PART
+          python -m pytest -x -r A --verbose --runslow --cov=pytensor/ --cov-report=xml:coverage/coverage-${MATRIX_ID}.xml --no-cov-on-fail $PART --benchmark-skip
         env:
           MATRIX_ID: ${{ steps.matrix-id.outputs.id }}
           MKL_THREADING_LAYER: GNU
diff --git a/environment.yml b/environment.yml
@@ -30,6 +30,7 @@ dependencies:
   - pytest
   - pytest-cov
   - pytest-xdist
+  - pytest-benchmark
   # For building docs
   - sphinx>=5.1.0
   - sphinx_rtd_theme
diff --git a/pyproject.toml b/pyproject.toml
@@ -86,6 +86,7 @@ tests = [
     "pre-commit",
     "pytest-cov>=2.6.1",
     "coverage>=5.1",
+    "pytest-benchmark",
 ]
 rtd = [
     "sphinx>=1.3.0",
diff --git a/tests/link/jax/test_elemwise.py b/tests/link/jax/test_elemwise.py
@@ -1,6 +1,9 @@
 import numpy as np
 import pytest
+import scipy.special
 
+import pytensor
+import pytensor.tensor as at
 from pytensor.configdefaults import config
 from pytensor.graph.fg import FunctionGraph
 from pytensor.graph.op import get_test_value
@@ -98,3 +101,24 @@ def test_softmax_grad(axis):
     out = SoftmaxGrad(axis=axis)(dy, sm)
     fgraph = FunctionGraph([dy, sm], [out])
     compare_jax_and_py(fgraph, [get_test_value(i) for i in fgraph.inputs])
+
+
+@pytest.mark.parametrize("size", [(10, 10), (1000, 1000), (10000, 10000)])
+@pytest.mark.parametrize("axis", [0, 1])
+def test_logsumexp_benchmark(size, axis, benchmark):
+    X = at.matrix("X")
+    X_max = at.max(X, axis=axis, keepdims=True)
+    X_max = at.switch(at.isinf(X_max), 0, X_max)
+    X_lse = at.log(at.sum(at.exp(X - X_max), axis=axis, keepdims=True)) + X_max
+
+    X_val = np.random.normal(size=size)
+
+    X_lse_fn = pytensor.function([X], X_lse, mode="JAX")
+
+    # JIT compile first
+    _ = X_lse_fn(X_val)
+
+    res = benchmark(X_lse_fn, X_val)
+
+    exp_res = scipy.special.logsumexp(X_val, axis=axis, keepdims=True)
+    np.testing.assert_array_almost_equal(res, exp_res)
diff --git a/tests/link/numba/test_basic.py b/tests/link/numba/test_basic.py
@@ -1,6 +1,6 @@
 import contextlib
 import inspect
-from typing import TYPE_CHECKING, Callable, Optional, Sequence, Tuple, Union
+from typing import TYPE_CHECKING, Any, Callable, Optional, Sequence, Tuple, Union
 from unittest import mock
 
 import numba
@@ -190,7 +190,7 @@ def compare_numba_and_py(
     numba_mode=numba_mode,
     py_mode=py_mode,
     updates=None,
-):
+) -> Tuple[Callable, Any]:
     """Function to compare python graph output and Numba compiled output for testing equality
 
     In the tests below computational graphs are defined in PyTensor. These graphs are then passed to
@@ -209,6 +209,10 @@ def compare_numba_and_py(
     updates
         Updates to be passed to `pytensor.function`.
 
+    Returns
+    -------
+    The compiled PyTensor function and its last computed result.
+
     """
     if assert_fn is None:
 
@@ -248,7 +252,7 @@ def assert_fn(x, y):
     else:
         assert_fn(numba_res, py_res)
 
-    return numba_res
+    return pytensor_numba_fn, numba_res
 
 
 @pytest.mark.parametrize(
diff --git a/tests/link/numba/test_scan.py b/tests/link/numba/test_scan.py
@@ -159,7 +159,7 @@ def test_xit_xot_types(
         assert np.allclose(res_val, output_vals)
 
 
-def test_scan_multiple_output():
+def test_scan_multiple_output(benchmark):
     """Test a scan implementation of a SEIR model.
 
     SEIR model definition:
@@ -244,7 +244,9 @@ def seir_one_step(ct0, dt0, st0, et0, it0, logp_c, logp_d, beta, gamma, delta):
         gamma_val,
         delta_val,
     ]
-    compare_numba_and_py(out_fg, test_input_vals)
+    scan_fn, _ = compare_numba_and_py(out_fg, test_input_vals)
+
+    benchmark(scan_fn, *test_input_vals)
 
 
 @config.change_flags(compute_test_value="raise")
diff --git a/tests/link/numba/test_tensor_basic.py b/tests/link/numba/test_tensor_basic.py
@@ -32,7 +32,7 @@ def test_Alloc(v, shape):
     g = at.alloc(v, *shape)
     g_fg = FunctionGraph(outputs=[g])
 
-    (numba_res,) = compare_numba_and_py(
+    _, (numba_res,) = compare_numba_and_py(
         g_fg,
         [
             i.tag.test_value
diff --git a/tests/scan/test_basic.py b/tests/scan/test_basic.py
@@ -13,7 +13,6 @@
 import pickle
 import shutil
 import sys
-import timeit
 from collections import OrderedDict
 from tempfile import mkdtemp
 
@@ -2179,15 +2178,13 @@ def scan_fn():
 @pytest.mark.skipif(
     not config.cxx, reason="G++ not available, so we need to skip this test."
 )
-def test_cython_performance():
+def test_cython_performance(benchmark):
 
     # This implicitly confirms that the Cython version is being used
     from pytensor.scan import scan_perform_ext  # noqa: F401
 
     # Python usually out-performs PyTensor below 100 iterations
     N = 200
-    n_timeit = 50
-
     M = -1 / np.arange(1, 11).astype(config.floatX)
     r = np.arange(N * 10).astype(config.floatX).reshape(N, 10)
 
@@ -2216,17 +2213,11 @@ def f_py():
     # Make sure we're actually computing a `Scan`
     assert any(isinstance(node.op, Scan) for node in f_cvm.maker.fgraph.apply_nodes)
 
-    cvm_res = f_cvm()
+    cvm_res = benchmark(f_cvm)
 
     # Make sure the results are the same between the two implementations
     assert np.allclose(cvm_res, py_res)
 
-    python_duration = timeit.timeit(lambda: f_py(), number=n_timeit)
-    cvm_duration = timeit.timeit(lambda: f_cvm(), number=n_timeit)
-    print(f"python={python_duration}, cvm={cvm_duration}")
-
-    assert cvm_duration <= python_duration
-
 
 @config.change_flags(mode="FAST_COMPILE", compute_test_value="raise")
 def test_compute_test_values():
@@ -2662,7 +2653,7 @@ def numpy_implementation(vsample):
         n_result = numpy_implementation(v_vsample)
         utt.assert_allclose(t_result, n_result)
 
-    def test_reordering(self):
+    def test_reordering(self, benchmark):
         """Test re-ordering of inputs.
 
         some rnn with multiple outputs and multiple inputs; other
@@ -2722,14 +2713,14 @@ def f_rnn_cmpl(u1_t, u2_t, x_tm1, y_tm1, y_tm3, W_in1):
             v_x[i] = np.dot(v_u1[i], vW_in1) + v_u2[i] * vW_in2 + np.dot(v_x[i - 1], vW)
             v_y[i] = np.dot(v_x[i - 1], vWout) + v_y[i - 1]
 
-        (pytensor_dump1, pytensor_dump2, pytensor_x, pytensor_y) = f4(
-            v_u1, v_u2, v_x0, v_y0, vW_in1
+        (pytensor_dump1, pytensor_dump2, pytensor_x, pytensor_y) = benchmark(
+            f4, v_u1, v_u2, v_x0, v_y0, vW_in1
         )
 
         utt.assert_allclose(pytensor_x, v_x)
         utt.assert_allclose(pytensor_y, v_y)
 
-    def test_scan_as_tensor_on_gradients(self):
+    def test_scan_as_tensor_on_gradients(self, benchmark):
         to_scan = dvector("to_scan")
         seq = dmatrix("seq")
         f1 = dscalar("f1")
@@ -2743,7 +2734,12 @@ def scanStep(prev, seq, f1):
         function(inputs=[to_scan, seq, f1], outputs=scanned, allow_input_downcast=True)
 
         t_grad = grad(scanned.sum(), wrt=[to_scan, f1], consider_constant=[seq])
-        function(inputs=[to_scan, seq, f1], outputs=t_grad, allow_input_downcast=True)
+        benchmark(
+            function,
+            inputs=[to_scan, seq, f1],
+            outputs=t_grad,
+            allow_input_downcast=True,
+        )
 
     def caching_nsteps_by_scan_op(self):
         W = matrix("weights")
@@ -3060,7 +3056,7 @@ def inner_fn(tap_m3, tap_m2, tap_m1):
         utt.assert_allclose(outputs, expected_outputs)
 
     @pytest.mark.slow
-    def test_hessian_bug_grad_grad_two_scans(self):
+    def test_hessian_bug_grad_grad_two_scans(self, benchmark):
         # Bug reported by Bitton Tenessi
         # NOTE : The test to reproduce the bug reported by Bitton Tenessi
         # was modified from its original version to be faster to run.
@@ -3094,7 +3090,7 @@ def loss_inner(sum_inner, W):
         H = hessian(cost, W)
         print(".", file=sys.stderr)
         f = function([W, n_steps], H)
-        f(np.ones((8,), dtype="float32"), 1)
+        benchmark(f, np.ones((8,), dtype="float32"), 1)
 
     def test_grad_connectivity_matrix(self):
         def inner_fn(x_tm1, y_tm1, z_tm1):
@@ -3710,7 +3706,7 @@ def f_rnn_cmpl(u1_t, u2_t, x_tm1, y_tm1, W_in1):
         utt.assert_allclose(pytensor_x, v_x)
         utt.assert_allclose(pytensor_y, v_y)
 
-    def test_multiple_outs_taps(self):
+    def test_multiple_outs_taps(self, benchmark):
         l = 5
         rng = np.random.default_rng(utt.fetch_seed())
 
@@ -3805,6 +3801,8 @@ def f_rnn_cmpl(u1_t, u2_tm1, u2_t, u2_tp1, x_tm1, y_tm1, y_tm3, W_in1):
         np.testing.assert_almost_equal(res[1], ny1)
         np.testing.assert_almost_equal(res[2], ny2)
 
+        benchmark(f, v_u1, v_u2, v_x0, v_y0, vW_in1)
+
     def _grad_mout_helper(self, n_iters, mode):
         rng = np.random.default_rng(utt.fetch_seed())
         n_hid = 3
diff --git a/tests/scan/test_rewriting.py b/tests/scan/test_rewriting.py
@@ -620,7 +620,7 @@ def test_sum_dot(self):
         vB = rng.uniform(size=(5, 5)).astype(config.floatX)
         utt.assert_allclose(f(vA, vB), np.dot(vA.T, vB))
 
-    def test_pregreedy_optimizer(self):
+    def test_pregreedy_optimizer(self, benchmark):
         W = at.zeros((5, 4))
         bv = at.zeros((5,))
         bh = at.zeros((4,))
@@ -634,7 +634,9 @@ def test_pregreedy_optimizer(self):
             n_steps=2,
         )
         # TODO FIXME: Make this a real test and assert something.
-        function([v], chain)(np.zeros((3, 5), dtype=config.floatX))
+        chain_fn = function([v], chain)
+
+        benchmark(chain_fn, np.zeros((3, 5), dtype=config.floatX))
 
     def test_machine_translation(self):
         """
@@ -1291,15 +1293,16 @@ def test_savemem_does_not_duplicate_number_of_scan_nodes(self):
         ]
         assert len(scan_nodes) == 1
 
-    def test_savemem_opt(self):
+    def test_savemem_opt(self, benchmark):
         y0 = shared(np.ones((2, 10)))
         [y1, y2], updates = scan(
             lambda y: [y, y],
             outputs_info=[dict(initial=y0, taps=[-2]), None],
             n_steps=5,
         )
         # TODO FIXME: Make this a real test and assert something.
-        function([], y2.sum(), mode=self.mode)()
+        fn = function([], y2.sum(), mode=self.mode)
+        benchmark(fn)
 
     def test_savemem_opt_0_step(self):
         """

Original file line number	Diff line number	Diff line change
`@@ -86,6 +86,7 @@ tests = [`
`86`	`86`	`"pre-commit",`
`87`	`87`	`"pytest-cov>=2.6.1",`
`88`	`88`	`"coverage>=5.1",`
	`89`	`+ "pytest-benchmark",`
`89`	`90`	`]`
`90`	`91`	`rtd = [`
`91`	`92`	`"sphinx>=1.3.0",`
Original file line number	Diff line number	Diff line change
`@@ -32,7 +32,7 @@ def test_Alloc(v, shape):`
`32`	`32`	`g = at.alloc(v, *shape)`
`33`	`33`	`g_fg = FunctionGraph(outputs=[g])`
`34`	`34`
`35`		`- (numba_res,) = compare_numba_and_py(`
	`35`	`+ _, (numba_res,) = compare_numba_and_py(`
`36`	`36`	`g_fg,`
`37`	`37`	`[`
`38`	`38`	`i.tag.test_value`