@@ -52,16 +52,16 @@ jobs:
5252 run : |
5353 # List of examples to test
5454 # Excluded examples:
55+ # - 01_hello_world.py: requires LiteLLM proxy URL (OPENAI_BASE_URL) not set in CI
5556 # - 04_confirmation_mode_example.py: requires user input
5657 # - 06_interactive_terminal_w_reasoning.py: interactive terminal
5758 # - 08_mcp_with_oauth.py: requires OAuth setup
5859 # - 15_browser_use.py: requires browser setup
5960 # - 16_llm_security_analyzer.py: requires user input
60- # - 03_browser_use_with_docker_sandboxed_server.py: Docker build cache issues
6161 # - 04_convo_with_api_sandboxed_server.py: requires sandbox API keys
6262 # - 04_vscode_with_docker_sandboxed_server.py: requires VSCode setup
63+ set -e
6364 EXAMPLES=(
64- "examples/01_standalone_sdk/01_hello_world.py"
6565 "examples/01_standalone_sdk/02_custom_tools.py"
6666 "examples/01_standalone_sdk/03_activate_skill.py"
6767 "examples/01_standalone_sdk/05_use_llm_registry.py"
8282 "examples/01_standalone_sdk/24_planning_agent_workflow.py"
8383 "examples/02_remote_agent_server/01_convo_with_local_agent_server.py"
8484 "examples/02_remote_agent_server/02_convo_with_docker_sandboxed_server.py"
85+ "examples/02_remote_agent_server/03_browser_use_with_docker_sandboxed_server.py"
8586 )
8687
8788 # GitHub API setup (only for PR events)
@@ -196,28 +197,38 @@ jobs:
196197 echo ""
197198 echo "Running: $example"
198199 echo "------------------------------------------"
199-
200+
200201 START_TIME=$(date +%s)
201-
202+
202203 # Create temp file to capture output
203204 OUTPUT_FILE=$(mktemp)
204-
205+
205206 # Run example with timeout (20 minutes per example)
206207 # Capture output while still displaying it
207- if timeout 1200 uv run python "$example" 2>&1 | tee "$OUTPUT_FILE"; then
208- END_TIME=$(date +%s)
209- DURATION=$((END_TIME - START_TIME))
210- DURATION_STR="${DURATION}s"
211-
212- # Extract cost from output
213- COST=$(grep "EXAMPLE_COST:" "$OUTPUT_FILE" | awk '{print $2}' | tail -1)
214- if [ -z "$COST" ]; then
215- COST="0.00"
216- fi
217-
218- # Accumulate total cost
219- TOTAL_COST=$(echo "$TOTAL_COST + $COST" | bc -l)
220-
208+ # Use || true to prevent script exit on failure
209+ (timeout 1200 uv run python "$example" 2>&1 || true) | tee "$OUTPUT_FILE"
210+
211+ # Check if command succeeded by looking at Python exit
212+ if ! grep -q "EXAMPLE_COST:" "$OUTPUT_FILE"; then
213+ EXIT_CODE=1
214+ else
215+ EXIT_CODE=0
216+ fi
217+
218+ END_TIME=$(date +%s)
219+ DURATION=$((END_TIME - START_TIME))
220+ DURATION_STR="${DURATION}s"
221+
222+ # Extract cost from output
223+ COST=$(grep "EXAMPLE_COST:" "$OUTPUT_FILE" | awk '{print $2}' | tail -1 || echo "0.00")
224+ if [ -z "$COST" ]; then
225+ COST="0.00"
226+ fi
227+
228+ # Accumulate total cost
229+ TOTAL_COST=$(echo "$TOTAL_COST + $COST" | bc -l 2>/dev/null || echo "$TOTAL_COST")
230+
231+ if [ "$EXIT_CODE" -eq 0 ]; then
221232 echo "✓ PASSED: $example (${DURATION_STR}, cost: \$${COST})"
222233 PASSED=$((PASSED + 1))
223234 COMPLETED=$((COMPLETED + 1))
@@ -226,20 +237,6 @@ jobs:
226237 TEST_COST[$example]="$(format_cost $COST)"
227238 echo "PASS|$example|${DURATION}|${COST}" >> "$RESULTS_FILE"
228239 else
229- EXIT_CODE=$?
230- END_TIME=$(date +%s)
231- DURATION=$((END_TIME - START_TIME))
232- DURATION_STR="${DURATION}s"
233-
234- # Try to extract cost even for failed tests
235- COST=$(grep "EXAMPLE_COST:" "$OUTPUT_FILE" | awk '{print $2}' | tail -1)
236- if [ -z "$COST" ]; then
237- COST="0.00"
238- fi
239-
240- # Accumulate total cost
241- TOTAL_COST=$(echo "$TOTAL_COST + $COST" | bc -l)
242-
243240 echo "✗ FAILED: $example (exit code: $EXIT_CODE, ${DURATION_STR}, cost: \$${COST})"
244241 FAILED=$((FAILED + 1))
245242 COMPLETED=$((COMPLETED + 1))
@@ -249,13 +246,13 @@ jobs:
249246 TEST_COST[$example]="$(format_cost $COST)"
250247 echo "FAIL|$example|$EXIT_CODE|${DURATION}|${COST}" >> "$RESULTS_FILE"
251248 fi
252-
249+
253250 # Clean up temp file
254251 rm -f "$OUTPUT_FILE"
255-
256- # Update PR comment after each test
252+
253+ # Update PR comment after each test (with error handling)
257254 echo "Updating PR comment..."
258- update_comment "$(generate_table)"
255+ update_comment "$(generate_table)" || echo "Warning: Failed to update PR comment"
259256 done
260257
261258 echo ""
0 commit comments