Skip to content

Commit be353bc

Browse files
authored
add nightly throughput stress (#2717)
* add nightly throughput stress * add permissions block * add slack webhook type * add debug logging * fix build step * remove clean from build * fix version path * add fetch-depth 0 * remove debug logging * add repo name to slack alert * exclude virtualThreadTests from build
1 parent 3f41b69 commit be353bc

File tree

1 file changed

+157
-0
lines changed

1 file changed

+157
-0
lines changed
Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,157 @@
1+
name: Nightly Throughput Stress
2+
3+
on:
4+
schedule:
5+
# Run at 3 AM PST (11:00 UTC) - offset from existing nightly
6+
- cron: '00 11 * * *'
7+
push:
8+
branches:
9+
- add-nightly-throughput-stress-workflow
10+
workflow_dispatch:
11+
inputs:
12+
duration:
13+
description: 'Test duration (e.g., 6h, 1h)'
14+
required: false
15+
default: '5h'
16+
type: string
17+
timeout:
18+
description: 'Scenario timeout (should always be more than duration)'
19+
required: false
20+
default: '5h30m'
21+
type: string
22+
job_timeout_minutes:
23+
description: 'GitHub Actions job timeout in minutes'
24+
required: false
25+
default: 360
26+
type: number
27+
permissions:
28+
contents: read
29+
30+
env:
31+
# Workflow configuration
32+
TEST_DURATION: ${{ inputs.duration || vars.NIGHTLY_TEST_DURATION || '5h' }}
33+
TEST_TIMEOUT: ${{ inputs.timeout || vars.NIGHTLY_TEST_TIMEOUT || '5h30m' }}
34+
35+
# Logging and artifacts
36+
WORKER_LOG_DIR: /tmp/throughput-stress-logs
37+
38+
# Omes configuration
39+
OMES_REPO: temporalio/omes
40+
OMES_REF: main
41+
RUN_ID: ${{ github.run_id }}-throughput-stress
42+
43+
jobs:
44+
throughput-stress:
45+
runs-on: ubuntu-latest-4-cores
46+
timeout-minutes: ${{ fromJSON(inputs.job_timeout_minutes || vars.NIGHTLY_JOB_TIMEOUT_MINUTES || 360) }}
47+
48+
steps:
49+
- name: Print test configuration
50+
run: |
51+
echo "=== Throughput Stress Test Configuration ==="
52+
echo "Duration: $TEST_DURATION"
53+
echo "Timeout: $TEST_TIMEOUT"
54+
echo "Run ID: $RUN_ID"
55+
echo "=========================================="
56+
57+
- name: Checkout SDK
58+
uses: actions/checkout@v5
59+
with:
60+
submodules: recursive
61+
fetch-depth: 0
62+
63+
- name: Checkout OMES
64+
uses: actions/checkout@v5
65+
with:
66+
repository: ${{ env.OMES_REPO }}
67+
ref: ${{ env.OMES_REF }}
68+
path: omes
69+
70+
- name: Setup Go
71+
uses: actions/setup-go@v5
72+
with:
73+
go-version-file: omes/go.mod
74+
cache-dependency-path: omes/go.sum
75+
76+
- name: Set up Java
77+
uses: actions/setup-java@v5
78+
with:
79+
java-version: "11"
80+
distribution: "temurin"
81+
82+
- name: Set up Gradle
83+
uses: gradle/actions/setup-gradle@v4
84+
85+
- name: Build SDK
86+
run: ./gradlew build -x test -x virtualThreadTests
87+
88+
- name: Install Temporal CLI
89+
uses: temporalio/setup-temporal@v0
90+
91+
- name: Setup log directory
92+
run: mkdir -p $WORKER_LOG_DIR
93+
94+
- name: Start Temporal Server
95+
run: |
96+
temporal server start-dev \
97+
--db-filename temporal-throughput-stress.sqlite \
98+
--sqlite-pragma journal_mode=WAL \
99+
--sqlite-pragma synchronous=OFF \
100+
--headless &> $WORKER_LOG_DIR/temporal-server.log &
101+
102+
- name: Run throughput stress scenario with local SDK
103+
working-directory: omes
104+
run: |
105+
# This makes the pipeline return the exit code of the first failing command
106+
# Otherwise the output of the `tee` command will be used
107+
# (which is troublesome when the scenario fails but the `tee` command succeeds)
108+
set -o pipefail
109+
110+
# Use run-scenario-with-worker to build and run in one step
111+
# Pass the SDK directory as --version for local testing
112+
# Note: The hardcoded values below match OMES defaults, except:
113+
# - visibility-count-timeout: 5m (vs 3m default)
114+
# to give CI a bit more time for visibility consistency
115+
go run ./cmd run-scenario-with-worker \
116+
--scenario throughput_stress \
117+
--language java \
118+
--version $(pwd)/.. \
119+
--run-id $RUN_ID \
120+
--duration $TEST_DURATION \
121+
--timeout $TEST_TIMEOUT \
122+
--max-concurrent 10 \
123+
--option internal-iterations=10 \
124+
--option continue-as-new-after-iterations=3 \
125+
--option sleep-time=1s \
126+
--option visibility-count-timeout=5m \
127+
--option min-throughput-per-hour=1000 \
128+
2>&1 | tee $WORKER_LOG_DIR/scenario.log
129+
130+
- name: Upload logs on failure
131+
if: failure() || cancelled()
132+
uses: actions/upload-artifact@v4
133+
with:
134+
name: throughput-stress-logs
135+
path: ${{ env.WORKER_LOG_DIR }}
136+
retention-days: 30
137+
138+
- name: Notify Slack on failure
139+
if: failure() || cancelled()
140+
uses: slackapi/slack-github-action@v2
141+
with:
142+
webhook-type: incoming-webhook
143+
payload: |
144+
{
145+
"text": "Nightly Java throughput stress test failed",
146+
"blocks": [
147+
{
148+
"type": "section",
149+
"text": {
150+
"type": "mrkdwn",
151+
"text": "*Nightly Throughput Stress Failed* :x:\n\n*Repository:* ${{ github.repository }}\n*Duration:* ${{ env.TEST_DURATION }}\n*Run:* <${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}|View Logs>\n*Triggered by:* ${{ github.event_name == 'schedule' && 'Scheduled' || github.actor }}"
152+
}
153+
}
154+
]
155+
}
156+
env:
157+
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_SDK_ALERTS_WEBHOOK }}

0 commit comments

Comments
 (0)