diff --git a/.github/actions/c-chain-reexecution-benchmark/action.yml b/.github/actions/c-chain-reexecution-benchmark/action.yml index 3385ff50a0c1..6217ba3824d3 100644 --- a/.github/actions/c-chain-reexecution-benchmark/action.yml +++ b/.github/actions/c-chain-reexecution-benchmark/action.yml @@ -2,30 +2,34 @@ name: 'C-Chain Re-Execution Benchmark' description: 'Run C-Chain re-execution benchmark' inputs: - runner_name: - description: 'The name of the runner to use and include in the Golang Benchmark name.' - required: true + task: + description: 'Task name to execute from Taskfile.yml. Leave empty to use custom inputs below.' + default: '' + # Custom inputs (alternative to task-based approach) config: description: 'The config to pass to the VM for the benchmark. See BenchmarkReexecuteRange for details.' default: '' start-block: description: 'The start block for the benchmark.' - default: '101' + default: '' end-block: description: 'The end block for the benchmark.' - default: '250000' + default: '' block-dir-src: description: 'The source block directory. Supports S3 directory/zip and local directories.' - default: 's3://avalanchego-bootstrap-testing/cchain-mainnet-blocks-1m-ldb/**' + default: '' current-state-dir-src: description: 'The current state directory. Supports S3 directory/zip and local directories.' - default: 's3://avalanchego-bootstrap-testing/cchain-current-state-hashdb-full-100/**' + default: '' + runner_name: + description: 'The name of the runner to use and include in the Golang Benchmark name.' + required: true aws-role: description: 'AWS role to assume for S3 access.' required: true aws-region: description: 'AWS region to use for S3 access.' - required: true + default: 'us-east-2' aws-role-duration-seconds: description: 'The duration of the AWS role to assume for S3 access.' required: true @@ -56,54 +60,126 @@ inputs: push-github-action-benchmark: description: 'Whether to push the benchmark result to GitHub.' required: true - default: false push-post-state: description: 'S3 destination to copy the current-state directory after completing re-execution. If empty, this will be skipped.' default: '' + # The following inputs need never be provided by the caller. They + # default to context values that the action's steps are unable to + # access directly. + repository-owner: + default: ${{ github.repository_owner }} + repository-name: + default: ${{ github.event.repository.name }} + workflow: + default: ${{ github.workflow }} + run-id: + default: ${{ github.run_id }} + run-number: + default: ${{ github.run_number }} + run-attempt: + default: ${{ github.run_attempt }} + job: + default: ${{ github.job }} runs: using: composite steps: - - name: Set task env + - uses: cachix/install-nix-action@02a151ada4993995686f9ed4f1be7cfbb229e56f #v31 + with: + github_access_token: ${{ inputs.github-token }} + - run: echo "dependencies installed" + shell: nix develop --command bash {0} + # Cache Go modules (architecture-independent) + - uses: actions/cache@v4 + id: go-mod-cache + with: + path: ~/go/pkg/mod + key: ${{ runner.os }}-go-mod-${{ hashFiles('go.sum') }} + restore-keys: ${{ runner.os }}-go-mod- + # Cache Go build cache (architecture-specific) + - uses: actions/cache@v4 + with: + path: ~/.cache/go-build + key: ${{ runner.os }}-${{ runner.arch }}-go-build-${{ hashFiles('go.sum') }} + restore-keys: ${{ runner.os }}-${{ runner.arch }}-go-build- + # Download modules only on cache miss + - run: go mod download + if: steps.go-mod-cache.outputs.cache-hit != 'true' + shell: nix develop --command bash -x {0} + - name: Notify of metrics availability + if: inputs.prometheus-username != '' shell: bash run: | - { - echo "EXECUTION_DATA_DIR=${{ inputs.workspace }}/reexecution-data" - echo "BENCHMARK_OUTPUT_FILE=output.txt" - echo "START_BLOCK=${{ inputs.start-block }}" - echo "END_BLOCK=${{ inputs.end-block }}" - echo "BLOCK_DIR_SRC=${{ inputs.block-dir-src }}" - echo "CURRENT_STATE_DIR_SRC=${{ inputs.current-state-dir-src }}" - } >> $GITHUB_ENV + metrics_url=$($GITHUB_ACTION_PATH/output-metrics-url.sh) + echo "Grafana: ${metrics_url}" + echo "🔗 [View Grafana Dashboard](${metrics_url})" >> "$GITHUB_STEP_SUMMARY" + env: + GRAFANA_URL: https://grafana-poc.avax-dev.network/d/Gl1I20mnk/c-chain?orgId=1&refresh=10s&var-filter=is_ephemeral_node%7C%3D%7Cfalse&var-filter=gh_repo%7C%3D%7C${{ inputs.repository_owner }}%2F${{ inputs.repository_name }}&var-filter=gh_run_id%7C%3D%7C${{ inputs.run_id }}&var-filter=gh_run_attempt%7C%3D%7C${{ inputs.run_attempt }} + GH_JOB_ID: ${{ inputs.job }} + - name: Warn that collection of metrics and logs will not be performed + if: inputs.prometheus-username == '' + shell: bash + run: echo "::warning::Monitoring credentials not found. Skipping collector start. Is the PR from a fork branch?" - name: Configure AWS Credentials uses: aws-actions/configure-aws-credentials@v4 with: role-to-assume: ${{ inputs.aws-role }} aws-region: ${{ inputs.aws-region }} role-duration-seconds: ${{ inputs.aws-role-duration-seconds }} - - name: Run C-Chain Re-Execution - uses: ./.github/actions/run-monitored-tmpnet-cmd - with: - run: | + - name: Validate inputs + shell: bash + run: | + if [[ -z "${{ inputs.task }}" ]]; then + # Granular mode - validate required inputs + missing=() + [[ -z "${{ inputs.block-dir-src }}" ]] && missing+=("block-dir-src") + [[ -z "${{ inputs.current-state-dir-src }}" ]] && missing+=("current-state-dir-src") + [[ -z "${{ inputs.start-block }}" ]] && missing+=("start-block") + [[ -z "${{ inputs.end-block }}" ]] && missing+=("end-block") + + if [[ ${#missing[@]} -gt 0 ]]; then + echo "::error::When 'task' is empty, the following inputs are required: ${missing[*]}" + exit 1 + fi + fi + - name: Set task env + shell: bash + run: | + TIMESTAMP=$(date '+%Y%m%d-%H%M%S') + echo "EXECUTION_DATA_DIR=/tmp/reexecution-data-${TIMESTAMP}" >> "$GITHUB_ENV" + echo "BENCHMARK_OUTPUT_FILE=${GITHUB_WORKSPACE}/benchmark-output.txt" >> "$GITHUB_ENV" + - name: Run C-Chain Re-execution Benchmark + shell: nix develop --impure --command bash -x {0} + run: | + if [[ -n "${{ inputs.task }}" ]]; then + # Task-based approach + ./scripts/run_task.sh ${{ inputs.task }} \ + BENCHMARK_OUTPUT_FILE="${{ env.BENCHMARK_OUTPUT_FILE }}" \ + EXECUTION_DATA_DIR="${{ env.EXECUTION_DATA_DIR }}" + else + # Granular approach ./scripts/run_task.sh reexecute-cchain-range-with-copied-data \ CONFIG=${{ inputs.config }} \ EXECUTION_DATA_DIR=${{ env.EXECUTION_DATA_DIR }} \ - BLOCK_DIR_SRC=${{ env.BLOCK_DIR_SRC }} \ - CURRENT_STATE_DIR_SRC=${{ env.CURRENT_STATE_DIR_SRC }} \ - START_BLOCK=${{ env.START_BLOCK }} \ - END_BLOCK=${{ env.END_BLOCK }} \ - LABELS=${{ env.LABELS }} \ - BENCHMARK_OUTPUT_FILE=${{ env.BENCHMARK_OUTPUT_FILE }} \ - RUNNER_NAME=${{ inputs.runner_name }} \ - METRICS_SERVER_ENABLED=true \ - METRICS_COLLECTOR_ENABLED=true - prometheus_url: ${{ inputs.prometheus-url }} - prometheus_push_url: ${{ inputs.prometheus-push-url }} - prometheus_username: ${{ inputs.prometheus-username }} - prometheus_password: ${{ inputs.prometheus-password }} - grafana_dashboard_id: 'Gl1I20mnk/c-chain' - runtime: "" # Set runtime input to empty string to disable log collection - + BLOCK_DIR_SRC=${{ inputs.block-dir-src }} \ + CURRENT_STATE_DIR_SRC=${{ inputs.current-state-dir-src }} \ + START_BLOCK=${{ inputs.start-block }} \ + END_BLOCK=${{ inputs.end-block }} \ + BENCHMARK_OUTPUT_FILE="${{ env.BENCHMARK_OUTPUT_FILE }}" + fi + env: + RUNNER_NAME: ${{ inputs.runner_name }} + METRICS_COLLECTOR_ENABLED: ${{ inputs.prometheus-username != '' }} + PROMETHEUS_URL: ${{ inputs.prometheus-url }} + PROMETHEUS_PUSH_URL: ${{ inputs.prometheus-push-url }} + PROMETHEUS_USERNAME: ${{ inputs.prometheus-username }} + PROMETHEUS_PASSWORD: ${{ inputs.prometheus-password }} + GH_REPO: ${{ inputs.repository_owner }}/${{ inputs.repository_name }} + GH_WORKFLOW: ${{ inputs.workflow }} + GH_RUN_ID: ${{ inputs.run_id }} + GH_RUN_NUMBER: ${{ inputs.run_number }} + GH_RUN_ATTEMPT: ${{ inputs.run_attempt }} + GH_JOB_ID: ${{ inputs.job }} - name: Compare Benchmark Results uses: benchmark-action/github-action-benchmark@v1 with: @@ -112,8 +188,10 @@ runs: summary-always: true github-token: ${{ inputs.github-token }} auto-push: ${{ inputs.push-github-action-benchmark }} - - - name: Push Post-State to S3 (if not exists) - if: ${{ inputs.push-post-state != '' }} - shell: nix develop --command bash -x {0} - run: ./scripts/run_task.sh export-dir-to-s3 SRC=${{ env.EXECUTION_DATA_DIR }}/current-state/ DST=${{ inputs.push-post-state }} + - name: Push Post-State to S3 + if: inputs.push-post-state != '' + shell: nix develop --impure --command bash -x {0} + run: | + ./scripts/run_task.sh export-dir-to-s3 \ + SRC=${{ env.EXECUTION_DATA_DIR }}/current-state/ \ + DST=${{ inputs.push-post-state }} diff --git a/.github/actions/c-chain-reexecution-benchmark/output-metrics-url.sh b/.github/actions/c-chain-reexecution-benchmark/output-metrics-url.sh new file mode 100755 index 000000000000..875f7d7fdb8d --- /dev/null +++ b/.github/actions/c-chain-reexecution-benchmark/output-metrics-url.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env bash + +# WARNING: This file is a duplication of: +# - .github/actions/run-monitored-tmpnet-cmd/output-metrics-url.sh (source of truth) +# Changes must be made to BOTH files. + +set -euo pipefail + +# Timestamps are in seconds +from_timestamp="$(date '+%s')" +monitoring_period=900 # 15 minutes +to_timestamp="$((from_timestamp + monitoring_period))" + +# Grafana expects microseconds, so pad timestamps with 3 zeros +metrics_url="${GRAFANA_URL}&var-filter=gh_job_id%7C%3D%7C${GH_JOB_ID}&from=${from_timestamp}000&to=${to_timestamp}000" + +# Optionally ensure that the link displays metrics only for the shared +# network rather than mixing it with the results for private networks. +if [[ -n "${FILTER_BY_OWNER:-}" ]]; then + metrics_url="${metrics_url}&var-filter=network_owner%7C%3D%7C${FILTER_BY_OWNER}" +fi + +echo "${metrics_url}" diff --git a/.github/actions/run-monitored-tmpnet-cmd/output-metrics-url.sh b/.github/actions/run-monitored-tmpnet-cmd/output-metrics-url.sh index ccecc34ac09c..5d2e8d59e8d1 100755 --- a/.github/actions/run-monitored-tmpnet-cmd/output-metrics-url.sh +++ b/.github/actions/run-monitored-tmpnet-cmd/output-metrics-url.sh @@ -1,5 +1,9 @@ #!/usr/bin/env bash +# WARNING: This file is duplicated at: +# - .github/actions/c-chain-reexecution-benchmark/output-metrics-url.sh (copy) +# Changes must be made to BOTH files. + set -euo pipefail # Timestamps are in seconds diff --git a/.github/workflows/c-chain-reexecution-benchmark-container.json b/.github/workflows/c-chain-reexecution-benchmark-container.json index aa8edb0aac70..2ccfac027111 100644 --- a/.github/workflows/c-chain-reexecution-benchmark-container.json +++ b/.github/workflows/c-chain-reexecution-benchmark-container.json @@ -3,20 +3,12 @@ "include": [ { "runner": "ubuntu-latest", - "config": "default", - "start-block": 101, - "end-block": 250000, - "block-dir-src": "s3://avalanchego-bootstrap-testing/cchain-mainnet-blocks-1m-ldb/**", - "current-state-dir-src": "s3://avalanchego-bootstrap-testing/cchain-current-state-hashdb-full-100/**", + "task": "c-chain-reexecution-hashdb-101-250k", "timeout-minutes": 30 }, { "runner": "avalanche-avalanchego-runner-2ti", - "config": "default", - "start-block": 101, - "end-block": 250000, - "block-dir-src": "s3://avalanchego-bootstrap-testing/cchain-mainnet-blocks-1m-ldb/**", - "current-state-dir-src": "s3://avalanchego-bootstrap-testing/cchain-current-state-hashdb-full-100/**", + "task": "c-chain-reexecution-hashdb-101-250k", "timeout-minutes": 30 } ] @@ -25,20 +17,12 @@ "include": [ { "runner": "avago-runner-m6i-4xlarge-ebs-fast", - "config": "default", - "start-block": 33000001, - "end-block": 33500000, - "block-dir-src": "s3://avalanchego-bootstrap-testing/cchain-mainnet-blocks-30m-40m-ldb/**", - "current-state-dir-src": "s3://avalanchego-bootstrap-testing/cchain-current-state-hashdb-full-33m/**", + "task": "c-chain-reexecution-hashdb-33m-33m500k", "timeout-minutes": 1440 }, { "runner": "avago-runner-i4i-4xlarge-local-ssd", - "config": "default", - "start-block": 33000001, - "end-block": 33500000, - "block-dir-src": "s3://avalanchego-bootstrap-testing/cchain-mainnet-blocks-30m-40m-ldb/**", - "current-state-dir-src": "s3://avalanchego-bootstrap-testing/cchain-current-state-hashdb-full-33m/**", + "task": "c-chain-reexecution-hashdb-33m-33m500k", "timeout-minutes": 1440 } ] diff --git a/.github/workflows/c-chain-reexecution-benchmark-container.yml b/.github/workflows/c-chain-reexecution-benchmark-container.yml index db12a98ad703..aa6e734b75a9 100644 --- a/.github/workflows/c-chain-reexecution-benchmark-container.yml +++ b/.github/workflows/c-chain-reexecution-benchmark-container.yml @@ -6,34 +6,39 @@ on: inputs: config: description: 'The config to pass to the VM for the benchmark. See BenchmarkReexecuteRange for details.' - required: false default: '' start-block: description: 'The start block for the benchmark.' - required: false - default: 101 + default: '' end-block: description: 'The end block for the benchmark.' - required: false - default: 250000 + default: '' block-dir-src: description: 'The source block directory. Supports S3 directory/zip and local directories.' - required: false - default: s3://avalanchego-bootstrap-testing/cchain-mainnet-blocks-1m-ldb/** + default: '' current-state-dir-src: description: 'The current state directory. Supports S3 directory/zip and local directories.' - required: false - default: s3://avalanchego-bootstrap-testing/cchain-current-state-hashdb-full-100/** - runner: - description: 'Runner to execute the benchmark. Input to the runs-on field of the job.' - required: false + default: '' + task: + description: 'Taskfile task to execute (e.g., c-chain-reexecution-hashdb-101-250k)' + default: '' + runner-preset: + description: 'Select a predefined runner (ignored if custom-runner is provided)' + type: choice + options: + - ubuntu-latest + - avalanche-avalanchego-runner-2ti + - avago-runner-m6i-4xlarge-ebs-fast + - avago-runner-i4i-4xlarge-local-ssd default: ubuntu-latest + custom-runner: + description: 'Custom runner name (overrides runner-preset if provided)' + default: '' push-post-state: description: 'S3 location to push post-execution state directory. Skips this step if left unpopulated.' default: '' timeout-minutes: description: 'Timeout in minutes for the job.' - required: false default: 30 # Disabled because scheduled trigger is empty. To enable, uncomment and add at least one vector to the schedule @@ -53,15 +58,20 @@ jobs: shell: bash -x {0} run: | if [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then + # Use custom-runner if provided (after trimming whitespace), otherwise use runner-preset + CUSTOM_RUNNER=$(echo "${{ github.event.inputs.custom-runner }}" | xargs) + + if [[ -n "$CUSTOM_RUNNER" ]]; then + RUNNER="$CUSTOM_RUNNER" + else + RUNNER="${{ github.event.inputs.runner-preset }}" + fi + { echo "matrix<> "$GITHUB_OUTPUT" @@ -100,11 +110,12 @@ jobs: - name: Run C-Chain Re-Execution Benchmark uses: ./.github/actions/c-chain-reexecution-benchmark with: - config: ${{ matrix.config }} - start-block: ${{ matrix.start-block }} - end-block: ${{ matrix.end-block }} - block-dir-src: ${{ matrix.block-dir-src }} - current-state-dir-src: ${{ matrix.current-state-dir-src }} + task: ${{ matrix.task }} + config: ${{ inputs.config }} + start-block: ${{ inputs.start-block }} + end-block: ${{ inputs.end-block }} + block-dir-src: ${{ inputs.block-dir-src }} + current-state-dir-src: ${{ inputs.current-state-dir-src }} prometheus-url: ${{ secrets.PROMETHEUS_URL || '' }} prometheus-push-url: ${{ secrets.PROMETHEUS_PUSH_URL || '' }} prometheus-username: ${{ secrets.PROMETHEUS_USERNAME || '' }} diff --git a/.github/workflows/c-chain-reexecution-benchmark-gh-native.json b/.github/workflows/c-chain-reexecution-benchmark-gh-native.json index 19197b4b33be..57e20bcc97ab 100644 --- a/.github/workflows/c-chain-reexecution-benchmark-gh-native.json +++ b/.github/workflows/c-chain-reexecution-benchmark-gh-native.json @@ -2,30 +2,18 @@ "pull_request": { "include": [ { - "runner": "ubuntu-latest", - "config": "default", - "start-block": 101, - "end-block": 250000, - "block-dir-src": "s3://avalanchego-bootstrap-testing/cchain-mainnet-blocks-1m-ldb/**", - "current-state-dir-src": "s3://avalanchego-bootstrap-testing/cchain-current-state-hashdb-full-100/**", - "timeout-minutes": 30 + "runner": "ubuntu-latest", + "task": "c-chain-reexecution-hashdb-101-250k", + "timeout-minutes": 30 }, { "runner": "blacksmith-4vcpu-ubuntu-2404", - "config": "default", - "start-block": 101, - "end-block": 250000, - "block-dir-src": "s3://avalanchego-bootstrap-testing/cchain-mainnet-blocks-1m-ldb/**", - "current-state-dir-src": "s3://avalanchego-bootstrap-testing/cchain-current-state-hashdb-full-100/**", + "task": "c-chain-reexecution-hashdb-101-250k", "timeout-minutes": 30 }, { "runner": "blacksmith-4vcpu-ubuntu-2404", - "config": "archive", - "start-block": 101, - "end-block": 250000, - "block-dir-src": "s3://avalanchego-bootstrap-testing/cchain-mainnet-blocks-1m-ldb/**", - "current-state-dir-src": "s3://avalanchego-bootstrap-testing/cchain-current-state-hashdb-archive-100/**", + "task": "c-chain-reexecution-hashdb-archive-101-250k", "timeout-minutes": 30 } ] diff --git a/.github/workflows/c-chain-reexecution-benchmark-gh-native.yml b/.github/workflows/c-chain-reexecution-benchmark-gh-native.yml index 174b8f36403b..9628f7cf6122 100644 --- a/.github/workflows/c-chain-reexecution-benchmark-gh-native.yml +++ b/.github/workflows/c-chain-reexecution-benchmark-gh-native.yml @@ -6,34 +6,37 @@ on: inputs: config: description: 'The config to pass to the VM for the benchmark. See BenchmarkReexecuteRange for details.' - required: false default: '' start-block: description: 'The start block for the benchmark.' - required: false - default: 101 + default: '' end-block: description: 'The end block for the benchmark.' - required: false - default: 250000 + default: '' block-dir-src: description: 'The source block directory. Supports S3 directory/zip and local directories.' - required: false - default: s3://avalanchego-bootstrap-testing/cchain-mainnet-blocks-1m-ldb/** + default: '' current-state-dir-src: description: 'The current state directory. Supports S3 directory/zip and local directories.' - required: false - default: s3://avalanchego-bootstrap-testing/cchain-current-state-hashdb-full-100/** - runner: - description: 'Runner to execute the benchmark. Input to the runs-on field of the job.' - required: false + default: '' + task: + description: 'Taskfile task to execute (e.g., c-chain-reexecution-hashdb-101-250k)' + default: '' + runner-preset: + description: 'Select a predefined runner (ignored if custom-runner is provided)' + type: choice + options: + - ubuntu-latest + - blacksmith-4vcpu-ubuntu-2404 default: ubuntu-latest + custom-runner: + description: 'Custom runner name (overrides runner-preset if provided)' + default: '' push-post-state: description: 'S3 location to push post-execution state directory. Skips this step if left unpopulated.' default: '' timeout-minutes: description: 'Timeout in minutes for the job.' - required: false default: 30 # Disabled because scheduled trigger is empty. To enable, uncomment and add at least one vector to the schedule @@ -53,15 +56,20 @@ jobs: shell: bash -x {0} run: | if [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then + # Use custom-runner if provided (after trimming whitespace), otherwise use runner-preset + CUSTOM_RUNNER=$(echo "${{ github.event.inputs.custom-runner }}" | xargs) + + if [[ -n "$CUSTOM_RUNNER" ]]; then + RUNNER="$CUSTOM_RUNNER" + else + RUNNER="${{ github.event.inputs.runner-preset }}" + fi + { echo "matrix<> "$GITHUB_OUTPUT" @@ -90,11 +98,12 @@ jobs: - name: Run C-Chain Re-Execution Benchmark uses: ./.github/actions/c-chain-reexecution-benchmark with: - config: ${{ matrix.config }} - start-block: ${{ matrix.start-block }} - end-block: ${{ matrix.end-block }} - block-dir-src: ${{ matrix.block-dir-src }} - current-state-dir-src: ${{ matrix.current-state-dir-src }} + task: ${{ matrix.task }} + config: ${{ inputs.config }} + start-block: ${{ inputs.start-block }} + end-block: ${{ inputs.end-block }} + block-dir-src: ${{ inputs.block-dir-src }} + current-state-dir-src: ${{ inputs.current-state-dir-src }} prometheus-url: ${{ secrets.PROMETHEUS_URL || '' }} prometheus-push-url: ${{ secrets.PROMETHEUS_PUSH_URL || '' }} prometheus-username: ${{ secrets.PROMETHEUS_USERNAME || '' }} diff --git a/Taskfile.yml b/Taskfile.yml index d871e10118b5..733ea09c39c7 100644 --- a/Taskfile.yml +++ b/Taskfile.yml @@ -4,6 +4,9 @@ version: '3' +env: + S3_BOOTSTRAP_BUCKET: 's3://avalanchego-bootstrap-testing' + tasks: default: ./scripts/run_task.sh --list @@ -142,8 +145,8 @@ tasks: desc: Imports the C-Chain block and state data to re-execute. Defaults to import the first 200 and the current state created with the default config of the C-Chain (hashdb). vars: EXECUTION_DATA_DIR: '{{.EXECUTION_DATA_DIR}}' - BLOCK_DIR_SRC: '{{.BLOCK_DIR_SRC | default "s3://avalanchego-bootstrap-testing/cchain-mainnet-blocks-200-ldb/**"}}' - CURRENT_STATE_DIR_SRC: '{{.CURRENT_STATE_DIR_SRC | default "s3://avalanchego-bootstrap-testing/cchain-current-state-hashdb-full-100/**"}}' + BLOCK_DIR_SRC: '{{.BLOCK_DIR_SRC | default (printf "%s/cchain-mainnet-blocks-200-ldb/**" .S3_BOOTSTRAP_BUCKET)}}' + CURRENT_STATE_DIR_SRC: '{{.CURRENT_STATE_DIR_SRC | default (printf "%s/cchain-current-state-hashdb-full-100/**" .S3_BOOTSTRAP_BUCKET)}}' cmds: - task: import-s3-to-dir vars: @@ -201,63 +204,127 @@ tasks: vars: CURRENT_STATE_DIR: '{{.CURRENT_STATE_DIR}}' BLOCK_DIR: '{{.BLOCK_DIR}}' - RUNNER_NAME: '{{.RUNNER_NAME | default "dev"}}' CONFIG: '{{.CONFIG | default ""}}' START_BLOCK: '{{.START_BLOCK}}' END_BLOCK: '{{.END_BLOCK}}' LABELS: '{{.LABELS | default ""}}' BENCHMARK_OUTPUT_FILE: '{{.BENCHMARK_OUTPUT_FILE | default ""}}' - METRICS_SERVER_ENABLED: '{{.METRICS_SERVER_ENABLED | default "false"}}' - METRICS_SERVER_PORT: '{{.METRICS_SERVER_PORT}}' - METRICS_COLLECTOR_ENABLED: '{{.METRICS_COLLECTOR_ENABLED | default "false"}}' + TIMESTAMP: '{{.TIMESTAMP | default (now | date "20060102-150405")}}' + EXECUTION_DATA_DIR: '{{.EXECUTION_DATA_DIR | default (printf "/tmp/%s-%s" .TASK_NAME .TIMESTAMP)}}' cmd: | CURRENT_STATE_DIR={{.CURRENT_STATE_DIR}} \ BLOCK_DIR={{.BLOCK_DIR}} \ - RUNNER_NAME='{{.RUNNER_NAME | default "dev"}}' \ CONFIG={{.CONFIG}} \ START_BLOCK={{.START_BLOCK}} \ END_BLOCK={{.END_BLOCK}} \ LABELS={{.LABELS}} \ BENCHMARK_OUTPUT_FILE={{.BENCHMARK_OUTPUT_FILE}} \ - METRICS_SERVER_ENABLED={{.METRICS_SERVER_ENABLED}} \ - METRICS_SERVER_PORT={{.METRICS_SERVER_PORT}} \ - METRICS_COLLECTOR_ENABLED={{.METRICS_COLLECTOR_ENABLED}} \ + EXECUTION_DATA_DIR={{.EXECUTION_DATA_DIR}} \ bash -x ./scripts/benchmark_cchain_range.sh + # Runtime context variables are read from environment by the script: + # - RUNNER_NAME (execution environment) + # - METRICS_SERVER_PORT (monitoring config) + # - METRICS_SERVER_ENABLED (runtime monitoring decision) + # - METRICS_COLLECTOR_ENABLED (runtime monitoring decision) + # - PROMETHEUS_URL, PROMETHEUS_USERNAME, PROMETHEUS_PASSWORD (monitoring config) + # - GH_REPO, GH_WORKFLOW, GH_RUN_ID, etc. (GitHub context) reexecute-cchain-range-with-copied-data: desc: Combines import-cchain-reexecute-range and reexecute-cchain-range vars: - EXECUTION_DATA_DIR: '{{.EXECUTION_DATA_DIR}}' - BLOCK_DIR_SRC: '{{.BLOCK_DIR_SRC | default "s3://avalanchego-bootstrap-testing/cchain-mainnet-blocks-1m-ldb/**"}}' - CURRENT_STATE_DIR_SRC: '{{.CURRENT_STATE_DIR_SRC | default "s3://avalanchego-bootstrap-testing/cchain-current-state-hashdb-full-100/**"}}' - RUNNER_NAME: '{{.RUNNER_NAME | default "dev"}}' - CONFIG: '{{.CONFIG | default ""}}' - START_BLOCK: '{{.START_BLOCK | default "101"}}' - END_BLOCK: '{{.END_BLOCK | default "250000"}}' - LABELS: '{{.LABELS | default ""}}' - BENCHMARK_OUTPUT_FILE: '{{.BENCHMARK_OUTPUT_FILE | default ""}}' - METRICS_SERVER_ENABLED: '{{.METRICS_SERVER_ENABLED | default "false"}}' - METRICS_SERVER_PORT: '{{.METRICS_SERVER_PORT}}' - METRICS_COLLECTOR_ENABLED: '{{.METRICS_COLLECTOR_ENABLED | default "false"}}' + TASK_NAME: '{{.TASK_NAME}}' + BLOCK_DIR_SRC: '{{.BLOCK_DIR_SRC}}' + CURRENT_STATE_DIR_SRC: '{{.CURRENT_STATE_DIR_SRC}}' + CONFIG: '{{.CONFIG}}' + START_BLOCK: '{{.START_BLOCK}}' + END_BLOCK: '{{.END_BLOCK}}' + TIMESTAMP: '{{.TIMESTAMP | default (now | date "20060102-150405")}}' + EXECUTION_DATA_DIR: '{{.EXECUTION_DATA_DIR | default (printf "/tmp/%s-%s" .TASK_NAME .TIMESTAMP)}}' cmds: - task: import-cchain-reexecute-range vars: - BLOCK_DIR_SRC: '{{.BLOCK_DIR_SRC}}' - CURRENT_STATE_DIR_SRC: '{{.CURRENT_STATE_DIR_SRC}}' + BLOCK_DIR_SRC: '{{.S3_BOOTSTRAP_BUCKET}}/{{.BLOCK_DIR_SRC}}/**' + CURRENT_STATE_DIR_SRC: '{{.S3_BOOTSTRAP_BUCKET}}/{{.CURRENT_STATE_DIR_SRC}}/**' EXECUTION_DATA_DIR: '{{.EXECUTION_DATA_DIR}}' - task: reexecute-cchain-range vars: + TASK_NAME: '{{.TASK_NAME}}' BLOCK_DIR: '{{.EXECUTION_DATA_DIR}}/blocks' CURRENT_STATE_DIR: '{{.EXECUTION_DATA_DIR}}/current-state' - RUNNER_NAME: '{{.RUNNER_NAME}}' CONFIG: '{{.CONFIG}}' START_BLOCK: '{{.START_BLOCK}}' END_BLOCK: '{{.END_BLOCK}}' - LABELS: '{{.LABELS}}' - BENCHMARK_OUTPUT_FILE: '{{.BENCHMARK_OUTPUT_FILE}}' - METRICS_SERVER_ENABLED: '{{.METRICS_SERVER_ENABLED}}' - METRICS_SERVER_PORT: '{{.METRICS_SERVER_PORT}}' - METRICS_COLLECTOR_ENABLED: '{{.METRICS_COLLECTOR_ENABLED}}' + EXECUTION_DATA_DIR: '{{.EXECUTION_DATA_DIR}}' + + c-chain-reexecution-hashdb-101-250k: + desc: C-Chain re-execution from block 101 to 250k with hashdb + cmds: + - task: reexecute-cchain-range-with-copied-data + vars: + TASK_NAME: '{{.TASK}}' + START_BLOCK: 101 + END_BLOCK: 250000 + BLOCK_DIR_SRC: 'cchain-mainnet-blocks-1m-ldb' + CURRENT_STATE_DIR_SRC: 'cchain-current-state-hashdb-full-100' + + c-chain-reexecution-hashdb-archive-101-250k: + desc: C-Chain re-execution from block 101 to 250k with hashdb archive + cmds: + - task: reexecute-cchain-range-with-copied-data + vars: + TASK_NAME: '{{.TASK}}' + START_BLOCK: 101 + END_BLOCK: 250000 + BLOCK_DIR_SRC: 'cchain-mainnet-blocks-1m-ldb' + CURRENT_STATE_DIR_SRC: 'cchain-current-state-hashdb-archive-100' + CONFIG: archive + + c-chain-reexecution-hashdb-33m-33m500k: + desc: C-Chain re-execution from block 33m to 33.5m with hashdb + cmds: + - task: reexecute-cchain-range-with-copied-data + vars: + TASK_NAME: '{{.TASK}}' + START_BLOCK: 33000001 + END_BLOCK: 33500000 + BLOCK_DIR_SRC: 'cchain-mainnet-blocks-30m-40m-ldb' + CURRENT_STATE_DIR_SRC: 'cchain-current-state-hashdb-full-33m' + + c-chain-reexecution-firewood-33m-33m500k: + desc: C-Chain re-execution from block 33m to 33.5m with firewood + cmds: + - task: reexecute-cchain-range-with-copied-data + vars: + TASK_NAME: '{{.TASK}}' + START_BLOCK: 33000001 + END_BLOCK: 33500000 + BLOCK_DIR_SRC: 'cchain-mainnet-blocks-30m-40m-ldb' + CURRENT_STATE_DIR_SRC: 'cchain-current-state-firewood-33m' + CONFIG: firewood + + c-chain-reexecution-firewood-33m-40m: + desc: C-Chain re-execution from block 33m to 40m with firewood + cmds: + - task: reexecute-cchain-range-with-copied-data + vars: + TASK_NAME: '{{.TASK}}' + START_BLOCK: 33000001 + END_BLOCK: 40000000 + BLOCK_DIR_SRC: 'cchain-mainnet-blocks-30m-40m-ldb' + CURRENT_STATE_DIR_SRC: 'cchain-current-state-firewood-33m' + CONFIG: firewood + + c-chain-reexecution-firewood-101-250k: + desc: C-Chain re-execution from block 101 to 250k with firewood + cmds: + - task: reexecute-cchain-range-with-copied-data + vars: + TASK_NAME: '{{.TASK}}' + START_BLOCK: 101 + END_BLOCK: 250000 + BLOCK_DIR_SRC: 'cchain-mainnet-blocks-1m-ldb' + CURRENT_STATE_DIR_SRC: 'cchain-current-state-firewood-100' + CONFIG: firewood test-bootstrap-monitor-e2e: desc: Runs bootstrap monitor e2e tests diff --git a/scripts/benchmark_cchain_range.sh b/scripts/benchmark_cchain_range.sh index 0d9d951e7194..896e68f8a559 100755 --- a/scripts/benchmark_cchain_range.sh +++ b/scripts/benchmark_cchain_range.sh @@ -16,14 +16,13 @@ set -euo pipefail : "${BLOCK_DIR:?BLOCK_DIR must be set}" : "${CURRENT_STATE_DIR:?CURRENT_STATE_DIR must be set}" -: "${RUNNER_NAME:?RUNNER_NAME must be set}" : "${START_BLOCK:?START_BLOCK must be set}" : "${END_BLOCK:?END_BLOCK must be set}" cmd="go test -timeout=0 -v -benchtime=1x -bench=BenchmarkReexecuteRange -run=^$ github.com/ava-labs/avalanchego/tests/reexecute/c \ --block-dir=\"${BLOCK_DIR}\" \ --current-state-dir=\"${CURRENT_STATE_DIR}\" \ - --runner=\"${RUNNER_NAME}\" \ + ${RUNNER_NAME:+--runner=\"${RUNNER_NAME}\"} \ ${CONFIG:+--config=\"${CONFIG}\"} \ --start-block=\"${START_BLOCK}\" \ --end-block=\"${END_BLOCK}\" \ diff --git a/scripts/copy_dir.sh b/scripts/copy_dir.sh index 874b53c74769..a6de180c1dc7 100755 --- a/scripts/copy_dir.sh +++ b/scripts/copy_dir.sh @@ -3,13 +3,13 @@ set -euo pipefail # Usage: ./scripts/copy_dir.sh source_directory destination_directory -# Sources can be S3 URLs (s3://bucket/path) or a local file path # Assumes s5cmd has been installed and is available in the PATH. # s5cmd is included in the nix dev shell. if [ $# -ne 2 ]; then echo "Usage: $0 " - echo "Import from S3 Example: $0 's3://bucket1/path1' /dest/dir" + echo "Import from S3 URL Example: $0 's3://bucket1/path1' /dest/dir" + echo "Import from S3 object key Example: $0 'cchain-mainnet-blocks-1m-ldb' /dest/dir" echo "Export to S3 Example: $0 '/local/path1' 's3://bucket2/path2'" echo "Local Example: $0 '/local/path1' /dest/dir" exit 1 @@ -18,11 +18,18 @@ fi SRC="$1" DST="$2" +# If SRC doesn't start with s3:// or /, assume it's an S3 object key +if [[ "$SRC" != s3://* ]] && [[ "$SRC" != /* ]]; then + echo "Error: SRC must be either an S3 URL (s3://...), a local path (/...), or already expanded" + echo "If using an object key, expand it before calling this script" + exit 1 +fi + # Function to copy from a single source to destination function copy_source() { local source="$1" local dest="$2" - + # Check if source starts with s3:// if [[ "$source" == s3://* || "$dest" == s3://* ]]; then # Use s5cmd to copy from S3 @@ -30,7 +37,7 @@ function copy_source() { time s5cmd cp --show-progress "$source" "$dest" else # Use cp for local filesystem with recursive support - + # Ensure destination directory exists mkdir -p "$dest" diff --git a/tests/reexecute/c/README.md b/tests/reexecute/c/README.md index 03d918cc36a6..d8d124edc351 100644 --- a/tests/reexecute/c/README.md +++ b/tests/reexecute/c/README.md @@ -42,7 +42,7 @@ export AWS_REGION=us-east-2 ### Metrics Collection -If running locally, metrics collection can be customized via the following parameters: +If running locally, metrics collection can be customized via the following **environment variables**: - `METRICS_SERVER_ENABLED`: starts a Prometheus server exporting VM metrics. - `METRICS_SERVER_PORT`: if set, determines the port the Prometheus server will listen to (set to `0` by default). @@ -50,13 +50,26 @@ If running locally, metrics collection can be customized via the following param When utilizing the metrics collector feature, follow the instructions in the e2e [README](../../e2e/README.md#monitoring) to set the required Prometheus environment variables. -Running the re-execution test in CI will always set `METRICS_SERVER_ENABLED=true` and `METRICS_COLLECTOR_ENABLED=true`. +Running the re-execution test in CI will implicitly set `METRICS_COLLECTOR_ENABLED: ${{ inputs.prometheus-username != '' }}` if Prometheus credentials are provided. ## Quick Start Let's run the default benchmark to get started. Make sure that you have completed the [Prerequisites](#prerequisites) section because it is required to copy the data from S3. -Decide what directory you want to use as a working directory and set the parameter `EXECUTION_DATA_DIR`. To re-execute a range of blocks, we need to copy the blocks themselves and the initial state of the chain, so these will be copied into `EXECUTION_DATA_DIR`. +### Using Predefined Tasks + +You can run `./scripts/run_task.sh --list | grep "c-chain-reexecution"` to list predefined tasks for common re-execution scenarios. + +To run a predefined task: +```bash +./scripts/run_task.sh c-chain-reexecution-hashdb-101-250k +``` + +These tasks automatically download the required data from S3 and run the benchmark with the appropriate configuration. + +### Using Custom Parameters + +For custom benchmark runs: [Taskfile](https://taskfile.dev/) supports reading arguments via both environment variables and named arguments on the command line, so we'll set `EXECUTION_DATA_DIR` and use the defaults for the remainder of the parameters: @@ -238,11 +251,24 @@ The `CONFIG` parameter currently only supports pre-defined configs and not passi The C-Chain benchmarks export VM metrics to the same Grafana instance as AvalancheGo CI: https://grafana-poc.avax-dev.network/. -To export metrics for a local run, simply set the Taskfile variables `METRICS_SERVER_ENABLED=true` and `METRICS_COLLECTOR_ENABLED=true` either via environment variable or passing it at the command line. +To export metrics for a local run, set the environment variables `METRICS_COLLECTOR_ENABLED=true`: + +```bash +export METRICS_COLLECTOR_ENABLED=true +./scripts/run_task.sh c-chain-reexecution-hashdb-101-250k +``` + +You can view granular C-Chain processing metrics with the label attached to this job (job="c-chain-reexecution") [here](https://grafana-poc.avax-dev.network/d/Gl1I20mnk/c-chain?orgId=1&from=now-5m&to=now&timezone=browser&var-datasource=P1809F7CD0C75ACF3&var-filter=job%7C%3D%7Cc-chain-reexecution&var-chain=C&refresh=10s). + +**NOTE: Prometheus credentials are required for collection** -You can view granular C-Chain processing metrics with the label attached to this job (job="c-chain-reexecution") [here](https://grafana-poc.avax-dev.network/d/Gl1I20mnk/c-chain?orgId=1&from=now-5m&to=now&timezone=browser&var-datasource=P1809F7CD0C75ACF3&var-filter=job%7C%3D%7Cc-chain-reexecution&var-chain=C&refresh=10s). +--- -To attach additional labels to the metrics from a local run, set the Taskfile variable `LABELS` to a comma separated list of key value pairs (ex. `LABELS=user=alice,os=ubuntu`). +To attach additional labels to the metrics from a local run, set the `LABELS` environment variable to a comma separated list of key value pairs: + +```bash +export LABELS=user=alice,os=ubuntu +``` Note: to ensure Prometheus gets a final scrape at the end of a run, the test will sleep for 2s greater than the 10s Prometheus scrape interval, which will cause short-running tests to appear to take much longer than expected. Additionally, the linked dashboard displays most metrics using a 1min rate, which means that very short running tests will not produce a very useful visualization. @@ -267,46 +293,43 @@ Both workflows provide three triggers: The manual workflow takes in all parameters specified by the user. To more easily specify a CI matrix and avoid GitHub's pain inducing matrix syntax, we define simple JSON files with the exact set of configs to run for each `pull_request` and `schedule` trigger. To add a new job for either of these triggers, simply define the entry in JSON and add it to run on the desired workflow. -For example, to add a new Firewood benchmark to execute the block range [30m, 40m] on a daily basis, follow the instructions above to generate the Firewood state as of block height 30m, export it to S3, and add the following entry under the `schedule` include array in the [GH Native JSON file](../../../.github/workflows/c-chain-reexecution-benchmark-gh-native.json). +The workflows support two approaches: +1. **Task-based**: Specify a predefined `task` name (e.g., `"task": "c-chain-reexecution-firewood-101-250k"`) +2. **Custom parameters**: Specify individual parameters with `"task": ""` and provide `config`, `start-block`, `end-block`, `block-dir-src`, `current-state-dir-src` + +For example, to add a new task-based Firewood benchmark to execute the block range [101, 250K] on a daily basis, add the following entry under the `schedule` include array in the [GH Native JSON file](../../../.github/workflows/c-chain-reexecution-benchmark-gh-native.json). ```json { "runner": "blacksmith-4vcpu-ubuntu-2404", - "config": "firewood", - "start-block": 30000001, - "end-block": 40000000, - "block-dir-src": "s3://avalanchego-bootstrap-testing/cchain-mainnet-blocks-50m-ldb/**", - "current-state-dir-src": "s3://avalanchego-bootstrap-testing/cchain-current-state-firewood-30m/**", - "timeout-minutes": 1440 + "task": "c-chain-reexecution-firewood-101-250k", + "timeout-minutes": 30 } ``` ## Trigger Workflow Dispatch with GitHub CLI -To triggers runs conveniently, you can use the [GitHub CLI](https://cli.github.com/manual/gh_workflow_run) to trigger workflows. +To trigger runs conveniently, you can use the [GitHub CLI](https://cli.github.com/manual/gh_workflow_run) to trigger workflows. -Note: passing JSON to the GitHub CLI requires all key/value pairs as strings, so ensure that any number parameters are quoted as strings or you will see the error: +### Using a Predefined Task ```bash -could not parse provided JSON: json: cannot unmarshal number into Go value of type string -``` - -Copy your desired parameters as JSON into a file or write it out on the command line: - -```json -{ - "runner": "blacksmith-4vcpu-ubuntu-2404", - "config": "firewood", - "start-block": "101", - "end-block": "200", - "block-dir-src": "s3://avalanchego-bootstrap-testing/cchain-mainnet-blocks-10k-ldb/**", - "current-state-dir-src": "s3://avalanchego-bootstrap-testing/cchain-current-state-firewood-100/**", - "timeout-minutes": "5" -} +gh workflow run "C-Chain Re-Execution Benchmark GH Native" \ + -f task=c-chain-reexecution-firewood-101-250k \ + -f runner=blacksmith-4vcpu-ubuntu-2404 \ + -f timeout-minutes=60 ``` -Then pass it to the GitHub CLI: +### Using Custom Parameters ```bash -cat input.json | gh workflow run .github/workflows/c-chain-reexecution-benchmark-gh-native.yml --json +gh workflow run "C-Chain Re-Execution Benchmark GH Native" \ + -f task="" \ + -f block-dir-src=cchain-mainnet-blocks-1m-ldb \ + -f current-state-dir-src=cchain-current-state-hashdb-full-100 \ + -f start-block=101 \ + -f end-block=250000 \ + -f config=default \ + -f runner=ubuntu-latest \ + -f timeout-minutes=360 ``` diff --git a/tests/reexecute/c/vm_reexecute_test.go b/tests/reexecute/c/vm_reexecute_test.go index 2cb50b91fe21..b396a8eacac2 100644 --- a/tests/reexecute/c/vm_reexecute_test.go +++ b/tests/reexecute/c/vm_reexecute_test.go @@ -208,6 +208,12 @@ func benchmarkReexecuteRange( ) log.Info("re-executing block range with params", + zap.String("runner", runnerNameArg), + zap.String("config", configNameArg), + zap.String("labels", labelsArg), + zap.String("metrics-server-enabled", strconv.FormatBool(metricsServerEnabled)), + zap.Uint64("metrics-server-port", metricsPort), + zap.String("metrics-collector-enabled", strconv.FormatBool(metricsCollectorEnabled)), zap.String("block-dir", blockDir), zap.String("vm-db-dir", vmDBDir), zap.String("chain-data-dir", chainDataDir),