Skip to content

Commit 74e450a

Browse files
authored
Merge pull request #20409 from cockroachdb/test-netlify-retry
Add exponential backoff retry logic to Netlify builds
2 parents 3159501 + e822fc8 commit 74e450a

File tree

3 files changed

+199
-24
lines changed

3 files changed

+199
-24
lines changed

src/current/netlify.toml

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,16 +6,27 @@
66
[build.environment]
77
NODE_VERSION = "18.14.0"
88
RUBY_VERSION = "3.2.1"
9+
JEKYLL_REMOTE_INCLUDE_CACHE = "true"
10+
JEKYLL_REMOTE_INCLUDE_CACHE_TTL = "3600"
11+
# Enable retry logic for production builds
12+
MAX_RETRIES = "3"
13+
BASE_RETRY_DELAY = "30"
914
[build.processing.html]
1015
pretty_urls = true
1116

17+
# Cache Plugin (keep caching for performance)
18+
[[plugins]]
19+
package = "netlify-plugin-cache"
20+
[plugins.inputs]
21+
paths = [
22+
".jekyll-cache",
23+
".remote-includes-cache",
24+
"node_modules/.cache",
25+
"_data/cached"
26+
]
27+
28+
# Retry functionality implemented in build script for robust production builds
29+
1230
[[edge_functions]]
1331
path = "/*"
14-
function = "blockBytedance"
15-
16-
#[[plugins]]
17-
# package = "@netlify/plugin-lighthouse"
18-
# [plugins.inputs]
19-
# output_path = "./reports/lighthouse.html"
20-
# [[plugins.inputs.audits]]
21-
# path = "./docs/index.html"
32+
function = "blockBytedance"

src/current/netlify/build.sh

Lines changed: 178 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,32 @@
11
#!/bin/bash
22

3+
echo "🚀 NETLIFY BUILD SCRIPT WITH RETRY LOGIC"
4+
echo "========================================"
5+
echo "Branch: ${BRANCH:-unknown}"
6+
echo "Context: ${CONTEXT:-unknown}"
7+
echo "Build ID: ${BUILD_ID:-unknown}"
8+
echo "Timestamp: $(date)"
9+
echo ""
10+
11+
# Configure retry settings
12+
MAX_RETRIES=${MAX_RETRIES:-3}
13+
BASE_RETRY_DELAY=${BASE_RETRY_DELAY:-30}
14+
15+
if [[ $MAX_RETRIES -gt 1 ]]; then
16+
echo "🔄 RETRY LOGIC ENABLED"
17+
echo "Max retries: ${MAX_RETRIES}"
18+
echo "Base retry delay: ${BASE_RETRY_DELAY}s (exponential backoff)"
19+
else
20+
echo "📋 SINGLE ATTEMPT BUILD"
21+
fi
22+
23+
echo ""
24+
25+
# Build monitoring variables
26+
BUILD_START_TIME=$(date +%s)
27+
ATTEMPT_COUNT=0
28+
TOTAL_NETWORK_CALLS=0
29+
330
# Populate the site_url to be used by Jekyll for generating sidebar and search links
431
site_url="${DEPLOY_PRIME_URL}"
532
JEKYLL_ENV="preview"
@@ -18,55 +45,191 @@ fi
1845

1946
echo "url: ${site_url}" > _config_url.yml
2047

21-
function build {
22-
bundle exec jekyll build --trace --config _config_base.yml,$1
23-
if [[ $? != 0 ]]; then
24-
exit 1
25-
fi
48+
function log_attempt() {
49+
local attempt=$1
50+
local total=$2
51+
echo ""
52+
echo "🔄 BUILD ATTEMPT ${attempt}/${total}"
53+
echo "================================"
54+
echo "Time: $(date)"
55+
if [[ $attempt -gt 1 ]]; then
56+
# Calculate exponential backoff: base_delay * 2^(attempt-2)
57+
local retry_delay=$((BASE_RETRY_DELAY * (1 << (attempt - 2))))
58+
echo "Previous attempts failed - retrying..."
59+
echo "Exponential backoff delay: ${retry_delay}s (base: ${BASE_RETRY_DELAY}s)"
60+
sleep ${retry_delay}
61+
fi
62+
echo ""
63+
}
64+
65+
function build_with_monitoring {
66+
local config=$1
67+
local build_log="build_${ATTEMPT_COUNT}.log"
68+
69+
echo "📝 Starting Jekyll build with config: $config"
70+
echo "⏰ Build start: $(date)"
71+
echo "📄 Build log: $build_log"
72+
73+
# Capture Jekyll output for error analysis
74+
if bundle exec jekyll build --trace --config _config_base.yml,$config 2>&1 | tee "$build_log"; then
75+
echo "⏰ Build end: $(date)"
76+
echo "✅ Jekyll build completed successfully"
77+
return 0
78+
else
79+
local exit_code=$?
80+
echo "⏰ Build end: $(date)"
81+
echo "❌ Jekyll build failed with exit code: $exit_code"
82+
83+
# Analyze build log for error classification
84+
echo "🔍 Analyzing build errors for retry eligibility..."
85+
86+
# Check for transient network errors that should be retried
87+
if grep -qiE "(temporary failure in name resolution|connection refused|connection reset|SSL_connect|certificate verify failed|execution expired|timeout|network is unreachable|failed to open tcp connection|socketerror)" "$build_log"; then
88+
echo "🌐 Transient network error detected - eligible for retry"
89+
echo "📋 Network error details:"
90+
grep -iE "(temporary failure in name resolution|connection refused|connection reset|SSL_connect|certificate verify failed|execution expired|timeout|network is unreachable|failed to open tcp connection|socketerror)" "$build_log" | head -3
91+
return 2 # Retryable error
92+
fi
93+
94+
# Check for permanent errors that should NOT be retried
95+
if grep -qiE "(liquid.*syntax error|liquid error|argumenterror|no such file or directory|undefined method|unknown tag|was not properly terminated|missing file)" "$build_log"; then
96+
echo "🚫 Permanent build error detected - not retrying"
97+
echo "📋 Error details:"
98+
grep -iE "(liquid.*syntax error|liquid error|argumenterror|no such file or directory|undefined method|unknown tag|was not properly terminated|missing file)" "$build_log" | head -3
99+
return 1 # Non-retryable error
100+
fi
101+
102+
# If we can't classify the error, treat it as non-retryable to be safe
103+
echo "❓ Unclassified build error - treating as permanent (not retrying)"
104+
echo "📋 Last few lines of build log:"
105+
tail -5 "$build_log"
106+
return 1 # Non-retryable error by default
107+
fi
26108
}
27109

110+
function build_with_retries {
111+
local config=$1
112+
local success=false
113+
114+
for (( attempt=1; attempt<=MAX_RETRIES; attempt++ )); do
115+
log_attempt $attempt $MAX_RETRIES
116+
ATTEMPT_COUNT=$attempt
117+
118+
build_with_monitoring "$config"
119+
local result=$?
120+
121+
if [[ $result == 0 ]]; then
122+
echo "✅ Build succeeded on attempt ${attempt}/${MAX_RETRIES}"
123+
success=true
124+
break
125+
elif [[ $result == 1 ]]; then
126+
echo "❌ Build failed on attempt ${attempt}/${MAX_RETRIES} with permanent error"
127+
echo "🚫 Permanent error detected - failing immediately (no retry)"
128+
break # Don't retry permanent errors
129+
elif [[ $result == 2 ]]; then
130+
echo "❌ Build failed on attempt ${attempt}/${MAX_RETRIES} with transient error"
131+
if [[ $attempt -lt $MAX_RETRIES ]]; then
132+
local next_delay=$((BASE_RETRY_DELAY * (1 << (attempt - 1))))
133+
echo "🔄 Transient error - will retry in ${next_delay} seconds (exponential backoff)..."
134+
else
135+
echo "💀 All retry attempts exhausted for transient error"
136+
fi
137+
else
138+
# Fallback for unexpected return codes
139+
echo "❌ Build failed on attempt ${attempt}/${MAX_RETRIES} with unexpected error code: $result"
140+
echo "⚠️ Treating as permanent error - not retrying"
141+
break
142+
fi
143+
done
144+
145+
if [[ "$success" = true ]]; then
146+
return 0
147+
else
148+
return 1
149+
fi
150+
}
151+
152+
echo "📦 Installing dependencies..."
28153
gem install bundler --silent
29154
bundle install --quiet
30-
build _config_cockroachdb.yml,_config_url.yml
31155

156+
echo ""
157+
echo "🚀 Starting build process..."
158+
echo "=============================="
159+
160+
# Main build with retry logic
161+
if build_with_retries "_config_cockroachdb.yml,_config_url.yml"; then
162+
echo ""
163+
echo "✅ MAIN BUILD COMPLETED SUCCESSFULLY"
164+
else
165+
echo ""
166+
echo "❌ MAIN BUILD FAILED AFTER ALL RETRIES"
167+
echo ""
168+
echo "📊 FINAL BUILD STATISTICS:"
169+
echo "=========================="
170+
echo "Total attempts: ${ATTEMPT_COUNT}/${MAX_RETRIES}"
171+
echo "Build duration: $(($(date +%s) - BUILD_START_TIME))s"
172+
echo "Branch: ${BRANCH:-unknown}"
173+
echo "Context: ${CONTEXT:-unknown}"
174+
exit 1
175+
fi
176+
177+
echo ""
178+
echo "📂 Setting up site files..."
32179
cp _site/docs/_redirects _site/_redirects
33180
cp _site/docs/404.html _site/404.html
34181

35-
# Set up htmltest
36-
182+
echo ""
183+
echo "🔧 Installing htmltest..."
37184
curl -s https://htmltest.wjdp.uk | bash
38185
if [[ $? != 0 ]]; then
39-
echo "Failed to install htmltest"
186+
echo "Failed to install htmltest"
40187
exit 1
41188
fi
42189

43190
./bin/build.sh>/dev/null 2>&1
44191

45192
# Run htmltest to check external links on scheduled nightly runs
46-
# (see .github/workflows/nightly.yml)
47-
48193
if [[ "$INCOMING_HOOK_TITLE" = "nightly" ]]; then
194+
echo "🔍 Running full htmltest (nightly)..."
49195
./bin/htmltest
50196
if [[ $? != 0 ]]; then
51197
exit 1
52198
fi
53199
fi
54200

55-
# Run Algolia if building main
201+
# Skip Algolia for testing
56202
if [ "$CONTEXT" == "production" ]; then
57203
echo "Temporarily skipping the Algolia index build"
58-
# echo "Building Algolia index..."
59-
# ALGOLIA_API_KEY=${PROD_ALGOLIA_API_KEY} bundle exec jekyll algolia --config _config_base.yml,_config_url.yml --builds-config _config_cockroachdb.yml
60204
else
61205
echo "Not building Algolia index for context $CONTEXT"
62206
fi
63207

64208
# Run htmltest, but skip checking external links to speed things up
209+
echo ""
210+
echo "🔍 Running htmltest (skip external)..."
65211
./bin/htmltest --skip-external
66212
if [[ $? != 0 ]]; then
213+
echo "❌ htmltest failed"
67214
exit 1
68215
fi
69216

70217
# Run tests defined in __tests__
218+
echo ""
219+
echo "🧪 Running Jest tests..."
71220
./node_modules/.bin/jest
72-
exit $?
221+
test_result=$?
222+
223+
# Final summary
224+
echo ""
225+
echo "🎯 BUILD SUMMARY"
226+
echo "================"
227+
echo "✅ Build completed successfully!"
228+
echo "📊 Build statistics:"
229+
echo " - Total attempts: ${ATTEMPT_COUNT}/${MAX_RETRIES}"
230+
echo " - Build duration: $(($(date +%s) - BUILD_START_TIME))s"
231+
echo " - Branch: ${BRANCH:-unknown}"
232+
echo " - Context: ${CONTEXT:-unknown}"
233+
echo " - Jest result: ${test_result}"
234+
235+
exit $test_result

src/current/package.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@
44
"devDependencies": {
55
"@netlify/plugin-lighthouse": "^4.0.7",
66
"jest": "^26",
7-
"jest-cli": "^26"
7+
"jest-cli": "^26",
8+
"netlify-plugin-cache": "^1.0.3"
89
},
910
"dependencies": {
1011
"@netlify/edge-functions": "^2.10.0",

0 commit comments

Comments
 (0)