diff --git a/.github/docker-compose.yml b/.github/docker-compose.yml index 400c044c4ed..76e3eb251d2 100644 --- a/.github/docker-compose.yml +++ b/.github/docker-compose.yml @@ -6,9 +6,9 @@ services: context: ../ dockerfile: .github/Dockerfile ports: - - "3000:3000" + - "${STDB_PORT:-3000}:3000" # Postgres - - "5432:5432" + - "${STDB_PG_PORT:-5432}:5432" entrypoint: spacetime start --pg-port 5432 privileged: true environment: diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 870d61b9613..0b547ee7e9c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -36,6 +36,8 @@ jobs: container: ${{ matrix.container }} env: CARGO_TARGET_DIR: ${{ github.workspace }}/target + # Note: clear_database and replication only work in private + SMOKETEST_ARGS: ${{ matrix.smoketest_args }} -x clear_database replication teams steps: - name: Find Git ref env: @@ -87,22 +89,13 @@ jobs: if: runner.os == 'Linux' run: /usr/local/bin/start-docker.sh - - name: Build and start database (Linux) - if: runner.os == 'Linux' - run: | - # Our .dockerignore omits `target`, which our CI Dockerfile needs. - rm .dockerignore - docker compose -f .github/docker-compose.yml up -d - - name: Build and start database (Windows) + # the sdk-manifests on windows-latest are messed up, so we need to update them + - name: Fix sdk-manifests if: runner.os == 'Windows' + working-directory: modules + # Powershell doesn't early-exit properly from a multi-line command if one fails + shell: bash run: | - # Fail properly if any individual command fails - $ErrorActionPreference = 'Stop' - $PSNativeCommandUseErrorActionPreference = $true - - Start-Process target/debug/spacetimedb-cli.exe -ArgumentList 'start --pg-port 5432' - cd modules - # the sdk-manifests on windows-latest are messed up, so we need to update them dotnet workload config --update-mode manifests dotnet workload update - uses: actions/setup-python@v5 @@ -110,12 +103,15 @@ jobs: if: runner.os == 'Windows' - name: Install python deps run: python -m pip install -r smoketests/requirements.txt - - name: Run smoketests - # Note: clear_database and replication only work in private - run: cargo ci smoketests -- ${{ matrix.smoketest_args }} -x clear_database replication teams - - name: Stop containers (Linux) - if: always() && runner.os == 'Linux' - run: docker compose -f .github/docker-compose.yml down + - name: Run smoketests (Linux) + if: runner.os == 'Linux' + run: | + # Our .dockerignore omits `target`, which our CI Dockerfile needs. + rm .dockerignore + cargo ci smoketests --docker .github/docker-compose.yml --parallel -- ${SMOKETEST_ARGS} + - name: Run smoketests (Windows) + if: runner.os == 'Windows' + run: cargo ci smoketests --parallel -- ${SMOKETEST_ARGS} test: name: Test Suite diff --git a/Cargo.lock b/Cargo.lock index 66c824d694f..09db7759ff2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -874,6 +874,7 @@ dependencies = [ "duct", "log", "regex", + "serde_json", ] [[package]] diff --git a/docker-compose.yml b/docker-compose.yml index 718281892dd..0b00256a808 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -24,11 +24,11 @@ services: - key_files:/etc/spacetimedb - /stdb ports: - - "3000:3000" + - "${STDB_PORT:-3000}:3000" # Postgres - - "5432:5432" + - "${STDB_PG_PORT:-5432}:5432" # Tracy - - "8086:8086" + - "${STDB_TRACY_PORT:-8086}:8086" entrypoint: cargo watch -i flamegraphs -i log.conf --why -C crates/standalone -x 'run start --data-dir=/stdb/data --jwt-pub-key-path=/etc/spacetimedb/id_ecdsa.pub --jwt-priv-key-path=/etc/spacetimedb/id_ecdsa --pg-port 5432' privileged: true environment: diff --git a/smoketests/__init__.py b/smoketests/__init__.py index 93e179e30dd..cd50ef11575 100644 --- a/smoketests/__init__.py +++ b/smoketests/__init__.py @@ -80,6 +80,7 @@ def requires_anonymous_login(item): return item def requires_local_server(item): + setattr(item, "_requires_local_server", True) if REMOTE_SERVER: return unittest.skip("running against a remote server")(item) return item diff --git a/smoketests/__main__.py b/smoketests/__main__.py index cc3b0d004b6..b4f2160352f 100644 --- a/smoketests/__main__.py +++ b/smoketests/__main__.py @@ -77,16 +77,15 @@ def main(): parser.add_argument("--no-docker-logs", action="store_true") parser.add_argument("--skip-dotnet", action="store_true", help="ignore tests which require dotnet") parser.add_argument("--show-all-output", action="store_true", help="show all stdout/stderr from the tests as they're running") - parser.add_argument("--parallel", action="store_true", help="run test classes in parallel") - parser.add_argument("-j", dest='jobs', help="Set number of jobs for parallel test runs. Default is `nproc`", type=int, default=0) parser.add_argument('-k', dest='testNamePatterns', action='append', type=_convert_select_pattern, help='Only run tests which match the given substring') parser.add_argument("-x", dest="exclude", nargs="*", default=[]) parser.add_argument("--no-build-cli", action="store_true", help="don't cargo build the cli") - parser.add_argument("--list", action="store_true", help="list the tests that would be run, but don't run them") + parser.add_argument("--list", nargs="?", const="text", choices=("text", "json"), default=None, help="list the tests that would be run (optionally as 'text' or 'json'), but don't run them") parser.add_argument("--remote-server", action="store", help="Run against a remote server") parser.add_argument("--spacetime-login", action="store_true", help="Use `spacetime login` for these tests (and disable tests that don't work with that)") + parser.add_argument("--local-only", action="store_true", help="Only run tests that require a local server") args = parser.parse_args() if args.docker: @@ -116,22 +115,58 @@ def main(): loader.testNamePatterns = args.testNamePatterns tests = loader.loadTestsFromNames(testlist) - if args.list: + + if args.local_only: + def _is_local_only(test_case): + method_name = getattr(test_case, "_testMethodName", None) + if method_name is not None and hasattr(test_case, method_name): + method = getattr(test_case, method_name) + if getattr(method, "_requires_local_server", False): + return True + # Also allow class-level decoration + if getattr(test_case.__class__, "_requires_local_server", False): + return True + return False + + filtered = unittest.TestSuite() + for t in _iter_all_tests(tests): + if _is_local_only(t): + filtered.addTest(t) + tests = filtered + + if args.list is not None: failed_cls = getattr(unittest.loader, "_FailedTest", None) any_failed = False + test_names = [] + failed_tests = [] for test in _iter_all_tests(tests): name = test.id() if isinstance(test, failed_cls): any_failed = True - print('') - print("Failed to construct %s:" % test.id()) exc = getattr(test, "_exception", None) - if exc is not None: - tb = ''.join(traceback.format_exception(exc)) - print(tb.rstrip()) - print('') + tb = ''.join(traceback.format_exception(exc)) if exc is not None else None + failed_tests.append({ + "test_id": name, + "error": tb.rstrip() if tb is not None else None, + }) + if args.list == "text": + print('') + print("Failed to construct %s:" % name) + if tb is not None: + print(tb.rstrip()) + print('') else: - print(f"{name}") + test_names.append(name) + if args.list == "text": + print(f"{name}") + + if args.list == "json": + output = { + "tests": test_names, + "errors": failed_tests, + } + print(json.dumps(output)) + exit(1 if any_failed else 0) if not args.no_build_cli: @@ -176,14 +211,9 @@ def main(): buffer = not args.show_all_output verbosity = 2 - if args.parallel: - print("parallel test running is under construction, this will probably not work correctly") - from . import unittest_parallel - unittest_parallel.main(buffer=buffer, verbose=verbosity, level="class", discovered_tests=tests, jobs=args.jobs) - else: - result = unittest.TextTestRunner(buffer=buffer, verbosity=verbosity).run(tests) - if not result.wasSuccessful(): - parser.exit(status=1) + result = unittest.TextTestRunner(buffer=buffer, verbosity=verbosity).run(tests) + if not result.wasSuccessful(): + parser.exit(status=1) if __name__ == '__main__': diff --git a/tools/ci/Cargo.toml b/tools/ci/Cargo.toml index e907526a42c..19a40d0afe1 100644 --- a/tools/ci/Cargo.toml +++ b/tools/ci/Cargo.toml @@ -10,3 +10,4 @@ chrono = { workspace = true, features=["clock"] } clap.workspace = true regex.workspace = true duct.workspace = true +serde_json.workspace = true diff --git a/tools/ci/src/main.rs b/tools/ci/src/main.rs index d4750da29df..93cf0028312 100644 --- a/tools/ci/src/main.rs +++ b/tools/ci/src/main.rs @@ -1,8 +1,11 @@ -use anyhow::{bail, Result}; +use anyhow::{bail, Context, Result}; use clap::{CommandFactory, Parser, Subcommand}; use duct::cmd; -use std::collections::HashMap; -use std::path::Path; +use log::warn; +use serde_json; +use std::collections::{HashMap, HashSet}; +use std::net::TcpListener; +use std::path::{Path, PathBuf}; use std::{env, fs}; const README_PATH: &str = "tools/ci/README.md"; @@ -51,6 +54,32 @@ enum CiCmd { /// /// Executes the smoketests suite with some default exclusions. Smoketests { + #[arg( + long = "start-server", + default_value_t = true, + long_help = "Whether to start a local SpacetimeDB server before running smoketests" + )] + start_server: bool, + #[arg( + long = "docker", + value_name = "COMPOSE_FILE", + num_args(0..=1), + default_missing_value = "docker-compose.yml", + long_help = "Use docker for smoketests, specifying a docker compose file. If no value is provided, docker-compose.yml is used by default. This cannot be combined with --start-server." + )] + docker: Option, + #[arg( + long = "parallel", + default_value_t = false, + long_help = "Run smoketests in parallel batches grouped by test suite" + )] + parallel: bool, + #[arg( + long = "python", + value_name = "PYTHON_PATH", + long_help = "Python interpreter to use for smoketests" + )] + python: Option, #[arg( trailing_var_arg = true, long_help = "Additional arguments to pass to the smoketests runner. These are usually set by the CI environment, such as `-- --docker`" @@ -133,9 +162,138 @@ fn run_bash(cmdline: &str, additional_env: &[(&str, &str)]) -> Result<()> { Ok(()) } +#[derive(Debug, Clone)] +pub enum StartServer { + No, + Yes { random_port: bool }, + Docker { compose_file: PathBuf, random_port: bool }, +} + +#[derive(Debug, Clone)] +pub enum ServerState { + None, + Yes { pid: i32 }, + Docker { compose_file: PathBuf, project: String }, +} + +fn find_free_port() -> Result { + let listener = TcpListener::bind("127.0.0.1:0").context("failed to bind to an ephemeral port")?; + let port = listener + .local_addr() + .context("failed to read local address for ephemeral port")? + .port(); + drop(listener); + Ok(port) +} + +fn run_smoketests_batch(server_mode: StartServer, args: &[String], python: &str) -> Result<()> { + let server_state = match server_mode { + StartServer::No => ServerState::None, + StartServer::Docker { + compose_file, + random_port, + } => { + println!("Starting server.."); + let env_string; + let project; + if random_port { + let server_port = find_free_port()?; + let pg_port = find_free_port()?; + let tracy_port = find_free_port()?; + env_string = format!("STDB_PORT={server_port} STDB_PG_PORT={pg_port} STDB_TRACY_PORT={tracy_port}"); + project = format!("spacetimedb-smoketests-{server_port}"); + } else { + env_string = String::new(); + project = "spacetimedb-smoketests".to_string(); + }; + let compose_str = compose_file.to_string_lossy(); + bash!(&format!( + "{env_string} docker compose -f {compose_str} --project-name {project} up -d" + ))?; + ServerState::Docker { compose_file, project } + } + StartServer::Yes { random_port } => { + // TODO: Make sure that this isn't brittle / multiple parallel batches don't grab the same port + let arg_string = if random_port { + let server_port = find_free_port()?; + let pg_port = find_free_port()?; + &format!("--listen-addr 0.0.0.0:{server_port} --pg-port {pg_port}") + } else { + "--pg-port 5432" + }; + println!("Starting server.."); + let pid_str; + if cfg!(target_os = "windows") { + pid_str = cmd!( + "powershell", + "-NoProfile", + "-Command", + &format!( + "$p = Start-Process cargo -ArgumentList 'run -p spacetimedb-cli -- start {arg_string}' -PassThru; $p.Id" + ) + ) + .read() + .unwrap_or_default(); + } else { + // TODO: Maybe we do this in a thread instead? Then it's easier to kill + pid_str = cmd!( + "bash", + "-lc", + &format!("nohup cargo run -p spacetimedb-cli -- start {arg_string} >/dev/null 2>&1 & echo $!") + ) + .read() + .unwrap_or_default(); + } + ServerState::Yes { + pid: pid_str + .trim() + .parse::() + .expect("Failed to get PID of started process"), + } + } + }; + + println!("Running smoketests.."); + // TODO: Don't we need to _use_ the port here?! + let test_result = bash!(&format!("{python} -m smoketests {}", args.join(" "))); + + // TODO: Make an effort to run the wind-down behavior if we ctrl-C this process + match server_state { + ServerState::None => {} + ServerState::Docker { compose_file, project } => { + println!("Shutting down server.."); + let compose_str = compose_file.to_string_lossy(); + let _ = bash!(&format!( + "docker compose -f {compose_str} --project-name {project} down" + )); + } + ServerState::Yes { pid } => { + println!("Shutting down server.."); + if cfg!(target_os = "windows") { + let _ = bash!(&format!( + "powershell -NoProfile -Command \"Stop-Process -Id {} -Force -ErrorAction SilentlyContinue\"", + pid + )); + } else { + // TODO: I keep getting errors about the pid not existing.. but the servers seem to shut down? + let _ = bash!(&format!("kill {}", pid)); + } + } + } + + test_result +} + fn main() -> Result<()> { let cli = Cli::parse(); + // Remove all Cargo-provided env vars from the subcommand + for (key, _) in std::env::vars() { + if key.starts_with("CARGO_") && key != "CARGO_TARGET_DIR" { + std::env::remove_var(key); + } + } + match cli.cmd { Some(CiCmd::Test) => { bash!("cargo test --all -- --skip unreal")?; @@ -168,14 +326,141 @@ fn main() -> Result<()> { bash!("cargo run -p spacetimedb-cli -- build --project-path modules/module-test")?; } - Some(CiCmd::Smoketests { args }) => { - // On some systems, there is no `python`, but there is `python3`. - let py3_available = cmd!("bash", "-lc", "command -v python3 >/dev/null 2>&1") - .run() - .map(|s| s.status.success()) - .unwrap_or(false); - let python = if py3_available { "python3" } else { "python" }; - bash!(&format!("{python} -m smoketests {}", args.join(" ")))?; + Some(CiCmd::Smoketests { + start_server, + docker, + parallel, + python, + args, + }) => { + let start_server = match (start_server, docker.as_ref()) { + (start_server, Some(compose_file)) => { + if !start_server { + warn!("--docker implies --start-server=true"); + } + StartServer::Docker { + random_port: parallel, + compose_file: compose_file.into(), + } + } + (true, None) => StartServer::Yes { random_port: parallel }, + (false, None) => StartServer::No, + }; + let mut args = args.to_vec(); + if let Some(compose_file) = docker.as_ref() { + // Note that we do not assume that the user wants to pass --docker to the tests. We leave them the power to + // run the server in docker while still retaining full control over what tests they want. + args.push("--compose-file".to_string()); + args.push(compose_file.to_string()); + } + + let python = if let Some(p) = python { + p + } else { + // TODO: does this work on windows? + let py3_available = cmd!("bash", "-lc", "command -v python3 >/dev/null 2>&1") + .run() + .map(|s| s.status.success()) + .unwrap_or(false); + if py3_available { + "python3".to_string() + } else { + "python".to_string() + } + }; + + if matches!(start_server, StartServer::Yes { .. }) { + println!("Building SpacetimeDB.."); + + // Pre-build so that `cargo run -p spacetimedb-cli` will immediately start. Otherwise we risk starting the tests + // before the server is up. + // TODO: The `cargo run` invocation still seems to rebuild a bunch? investigate.. maybe we infer the binary path from cargo metadata. + bash!("cargo build -p spacetimedb-cli -p spacetimedb-standalone")?; + args.push("--no-build-cli".into()); + } + + if parallel { + println!("Listing smoketests for parallel execution.."); + + let mut list_args: Vec = args.to_vec(); + list_args.push("--list=json".to_string()); + let list_cmdline = format!("{python} -m smoketests {}", list_args.join(" ")); + + // TODO: do actually check the return code here. and make --list=json not return non-zero if there are errors. + let list_output = cmd!("bash", "-lc", list_cmdline) + .stderr_to_stdout() + .unchecked() + .read()?; + + let parsed: serde_json::Value = serde_json::from_str(&list_output)?; + let tests = parsed.get("tests").and_then(|v| v.as_array()).cloned().unwrap(); + let errors = parsed + .get("errors") + .and_then(|v| v.as_array()) + .cloned() + .unwrap_or_default(); + + if !errors.is_empty() { + println!("Errors while constructing smoketests:"); + for err in &errors { + let test_id = err.get("test_id").and_then(|v| v.as_str()).unwrap(); + let msg = err.get("error").and_then(|v| v.as_str()).unwrap(); + println!("{test_id}"); + println!("{msg}"); + } + // If there were errors constructing tests, treat this as a failure + // and do not run any batches. + return Err(anyhow::anyhow!( + "Errors encountered while constructing smoketests; aborting parallel run" + )); + } + + let batches: HashSet = tests + .into_iter() + .map(|t| { + let name = t.as_str().unwrap(); + let parts = name.split('.').collect::>(); + parts[2].to_string() + }) + .collect(); + + // Run each batch in parallel threads. + let mut handles = Vec::new(); + for batch in batches { + let start_server_clone = start_server.clone(); + let python_clone = python.clone(); + let mut batch_args: Vec = Vec::new(); + // TODO: this doesn't work properly if the user passed multiple batches as input. + batch_args.push(batch.clone()); + batch_args.extend(args.iter().cloned()); + + handles.push(( + batch.clone(), + std::thread::spawn(move || { + println!("Running smoketests batch {batch}.."); + // TODO: capture output and print it only in contiguous blocks + run_smoketests_batch(start_server_clone, &batch_args, &python_clone) + }), + )); + } + + let mut failed_batches = vec![]; + for (batch, handle) in handles { + // If the thread panicked or the batch failed, treat it as a failure. + let result = handle + .join() + .unwrap_or_else(|_| Err(anyhow::anyhow!("smoketest batch thread panicked",))); + if result.is_err() { + failed_batches.push(batch); + } + } + + if !failed_batches.is_empty() { + anyhow::bail!("Smoketest batch(es) failed: {}", failed_batches.join(", ")); + } + } else { + run_smoketests_batch(start_server, &args, &python)?; + } } Some(CiCmd::UpdateFlow {