@@ -48,6 +48,72 @@ if [[ -n "${CI:-}" ]] || [[ -n "${GITHUB_ACTIONS:-}" ]]; then
4848 echo " [INFO] CI environment detected, scaling timeouts by ${TIMEOUT_SCALE} x" >&2
4949fi
5050
51+ # ============================================================================
52+ # Helper Functions - Common operations extracted to reduce duplication
53+ # ============================================================================
54+
55+ # Check if a PID is valid and the process is running
56+ is_valid_pid () {
57+ local pid=" $1 "
58+ [[ -n " $pid " ]] && [[ " $pid " =~ ^[0-9]+$ ]] && kill -0 " $pid " 2> /dev/null
59+ }
60+
61+ # Kill all processes listening on a specific port (OS-aware)
62+ kill_processes_on_port () {
63+ local port=" $1 "
64+ if command -v lsof > /dev/null 2>&1 ; then
65+ if [[ " $( uname) " == " Darwin" ]]; then
66+ lsof -ti " :$port " | xargs kill -9 2> /dev/null || true
67+ else
68+ lsof -ti " :$port " | xargs -r kill -9 2> /dev/null || true
69+ fi
70+ fi
71+ }
72+
73+ # Get all ports used by the test (nodes + relay)
74+ get_all_ports () {
75+ local config_file=" $1 "
76+ local ports=($( jq -r ' .nodes[].port' " $config_file " 2> /dev/null) )
77+ local relay_port=$( jq -r ' .network.relay_port // 5555' " $config_file " 2> /dev/null)
78+ ports+=(" $relay_port " )
79+ echo " ${ports[@]} "
80+ }
81+
82+ # Get the number of nodes in the test configuration
83+ get_node_count () {
84+ local config_file=" $1 "
85+ jq -r ' .nodes | length' " $config_file "
86+ }
87+
88+ # Display standardized error message for node failures
89+ display_node_error () {
90+ local node_id=" $1 "
91+ local error_type=" $2 " # "timeout" or "exit_code"
92+ local actual=" $3 "
93+ local expected=" $4 "
94+ local output_file=" $5 "
95+ local error_file=" $6 "
96+ local timeout_val=" ${7:- } " # Optional, only for timeout errors
97+
98+ if [[ " $error_type " == " timeout" ]]; then
99+ echo " ERROR: Node $node_id timed out unexpectedly" >&2
100+ echo " Timeout: ${actual} s (original: ${timeout_val} s)" >&2
101+ echo " Expected exit code: $expected " >&2
102+ else
103+ echo " ERROR: Node $node_id exit code mismatch" >&2
104+ echo " Actual: $actual " >&2
105+ echo " Expected: $expected " >&2
106+ fi
107+
108+ echo " Last 10 lines of output:" >&2
109+ tail -n 10 " $output_file " 2> /dev/null | sed ' s/^/ /' >&2
110+
111+ if [[ " $error_type " == " exit_code" ]] && [[ -s " $error_file " ]]; then
112+ echo " Last 5 lines of errors:" >&2
113+ tail -n 5 " $error_file " 2> /dev/null | sed ' s/^/ /' >&2
114+ fi
115+ }
116+
51117usage () {
52118 cat << EOF
53119Usage: $0 [options] <test-config.json>
@@ -73,7 +139,7 @@ cleanup() {
73139 local cleaned_count=0
74140
75141 # Kill relay first with multiple attempts
76- if [[ -n " $RELAY_PID " ]] && [[ " $RELAY_PID " =~ ^[0-9]+$ ]] && kill -0 " $RELAY_PID " 2> /dev/null ; then
142+ if is_valid_pid " $RELAY_PID " ; then
77143 log " Stopping relay (PID: $RELAY_PID )"
78144 kill " $RELAY_PID " 2> /dev/null || true
79145 sleep 1
@@ -92,7 +158,7 @@ cleanup() {
92158 if [[ ${# CLEANUP_PIDS[@]} -gt 0 ]]; then
93159 log " Cleaning up ${# CLEANUP_PIDS[@]} node processes"
94160 for pid in " ${CLEANUP_PIDS[@]} " ; do
95- if [[ " $pid " =~ ^[0-9]+$ ]] && kill -0 " $pid " 2> /dev/null ; then
161+ if is_valid_pid " $pid " ; then
96162 kill " $pid " 2> /dev/null || true
97163 (( cleaned_count++ ))
98164 fi
@@ -103,7 +169,7 @@ cleanup() {
103169
104170 # Force kill remaining processes
105171 for pid in " ${CLEANUP_PIDS[@]} " ; do
106- if [[ " $pid " =~ ^[0-9]+$ ]] && kill -0 " $pid " 2> /dev/null ; then
172+ if is_valid_pid " $pid " ; then
107173 log " Force-killing remaining process $pid "
108174 kill -9 " $pid " 2> /dev/null || true
109175 fi
@@ -112,21 +178,11 @@ cleanup() {
112178
113179 # Clean up ports used by the test
114180 if [[ -n " $TEST_CONFIG " ]]; then
115- local ports=($( jq -r ' .nodes[].port' " $TEST_CONFIG " 2> /dev/null) )
116- local relay_port=$( jq -r ' .network.relay_port // 5555' " $TEST_CONFIG " 2> /dev/null)
117- ports+=(" $relay_port " )
118-
181+ local ports=($( get_all_ports " $TEST_CONFIG " ) )
182+
119183 for port in " ${ports[@]} " ; do
120184 if [[ " $port " =~ ^[0-9]+$ ]]; then
121- # Kill any process listening on this port
122- if command -v lsof > /dev/null 2>&1 ; then
123- # macOS doesn't support -r flag for xargs
124- if [[ " $( uname) " == " Darwin" ]]; then
125- lsof -ti " :$port " | xargs kill -9 2> /dev/null || true
126- else
127- lsof -ti " :$port " | xargs -r kill -9 2> /dev/null || true
128- fi
129- fi
185+ kill_processes_on_port " $port "
130186 fi
131187 done
132188 fi
@@ -187,7 +243,7 @@ wait_for_port_release() {
187243 local port=" $1 "
188244 local max_wait=10
189245 local wait_count=0
190-
246+
191247 while [[ $wait_count -lt $max_wait ]]; do
192248 if check_port_available " $port " ; then
193249 log " Port $port is now available"
@@ -197,28 +253,19 @@ wait_for_port_release() {
197253 sleep 1
198254 (( wait_count++ ))
199255 done
200-
256+
201257 # Force kill processes using the port
202258 log " Force killing processes on port $port "
203- if command -v lsof > /dev/null 2>&1 ; then
204- # macOS doesn't support -r flag for xargs
205- if [[ " $( uname) " == " Darwin" ]]; then
206- lsof -ti " :$port " | xargs kill -9 2> /dev/null || true
207- else
208- lsof -ti " :$port " | xargs -r kill -9 2> /dev/null || true
209- fi
210- fi
259+ kill_processes_on_port " $port "
211260 sleep 1
212261 return 0
213262}
214263
215264validate_ports () {
216265 local config_file=" $1 "
217266 log " Validating port availability..."
218-
219- local ports=($( jq -r ' .nodes[].port' " $config_file " 2> /dev/null) )
220- local relay_port=$( jq -r ' .network.relay_port // 5555' " $config_file " 2> /dev/null)
221- ports+=(" $relay_port " )
267+
268+ local ports=($( get_all_ports " $config_file " ) )
222269
223270 local port_conflict=false
224271 for port in " ${ports[@]} " ; do
@@ -267,7 +314,7 @@ setup_network_identities() {
267314
268315 # Generate node identities
269316 local node_count
270- node_count=$( jq -r ' .nodes | length ' " $config_file " )
317+ node_count=$( get_node_count " $config_file " )
271318
272319 for (( i= 0 ; i< node_count; i++ )) ; do
273320 local node_id
@@ -606,23 +653,11 @@ run_node() {
606653 if [[ " $expected_exit_code " == " 124" ]]; then
607654 log " Node $node_id timed out as expected after ${scaled_timeout} s"
608655 else
609- echo " ERROR: Node $node_id timed out unexpectedly" >&2
610- echo " Timeout: ${scaled_timeout} s (original: ${timeout_val} s)" >&2
611- echo " Expected exit code: $expected_exit_code " >&2
612- echo " Last 10 lines of output:" >&2
613- tail -n 10 " $output_file " 2> /dev/null | sed ' s/^/ /' >&2
656+ display_node_error " $node_id " " timeout" " $scaled_timeout " " $expected_exit_code " " $output_file " " $error_file " " $timeout_val "
614657 error " Node $node_id timed out unexpectedly"
615658 fi
616659 elif [[ " $actual_exit_code " != " $expected_exit_code " ]]; then
617- echo " ERROR: Node $node_id exit code mismatch" >&2
618- echo " Actual: $actual_exit_code " >&2
619- echo " Expected: $expected_exit_code " >&2
620- echo " Last 10 lines of output:" >&2
621- tail -n 10 " $output_file " 2> /dev/null | sed ' s/^/ /' >&2
622- if [[ -s " $error_file " ]]; then
623- echo " Last 5 lines of errors:" >&2
624- tail -n 5 " $error_file " 2> /dev/null | sed ' s/^/ /' >&2
625- fi
660+ display_node_error " $node_id " " exit_code" " $actual_exit_code " " $expected_exit_code " " $output_file " " $error_file "
626661 error " Node $node_id exited with code $actual_exit_code , expected $expected_exit_code "
627662 fi
628663
@@ -649,7 +684,7 @@ merge_outputs() {
649684 " sequential" )
650685 # Concatenate outputs in node order
651686 local node_count
652- node_count=$( jq -r ' .nodes | length ' " $config_file " )
687+ node_count=$( get_node_count " $config_file " )
653688
654689 for (( i= 0 ; i< node_count; i++ )) ; do
655690 local node_id
@@ -665,7 +700,7 @@ merge_outputs() {
665700 " per_node" )
666701 # Output each node separately without filtering
667702 local node_count
668- node_count=$( jq -r ' .nodes | length ' " $config_file " )
703+ node_count=$( get_node_count " $config_file " )
669704
670705 for (( i= 0 ; i< node_count; i++ )) ; do
671706 local node_id
@@ -714,7 +749,7 @@ run_test() {
714749 coordination=$( jq -r ' .coordination // "parallel"' " $config_file " )
715750
716751 local node_count
717- node_count=$( jq -r ' .nodes | length ' " $config_file " )
752+ node_count=$( get_node_count " $config_file " )
718753
719754 case " $coordination " in
720755 " parallel" )
0 commit comments