Skip to content

Commit 5b5a879

Browse files
authored
Merge branch 'master' into cf/change-word
2 parents cd8554b + 5635be9 commit 5b5a879

File tree

5 files changed

+80
-30
lines changed

5 files changed

+80
-30
lines changed

.github/workflows/ci.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ on:
1717
- "release/**"
1818

1919
env:
20-
GO_VERSION: 1.25
20+
GO_VERSION: 1.25
2121

2222
jobs:
2323
test:

server/fork_integration_test.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ func TestForkingAgainstTestnet(t *testing.T) {
4141
conn, err := grpc.NewClient(
4242
"access.testnet.nodes.onflow.org:9000",
4343
grpc.WithTransportCredentials(insecure.NewCredentials()),
44-
grpc.WithDefaultServiceConfig(utils.DefaultGRPCServiceConfig),
44+
utils.DefaultGRPCRetryInterceptor(),
4545
)
4646
if err != nil {
4747
t.Fatalf("dial remote: %v", err)
@@ -53,7 +53,7 @@ func TestForkingAgainstTestnet(t *testing.T) {
5353
if err != nil {
5454
t.Fatalf("get remote header: %v", err)
5555
}
56-
remoteHeight := rh.Block.Height
56+
remoteHeight := rh.Block.Height - 10 // Use a buffer to avoid edge cases
5757

5858
cfg := &Config{
5959
// Do not start listeners; NewEmulatorServer only configures components.
@@ -177,7 +177,7 @@ func TestForkingAgainstMainnet(t *testing.T) {
177177
conn, err := grpc.NewClient(
178178
"access.mainnet.nodes.onflow.org:9000",
179179
grpc.WithTransportCredentials(insecure.NewCredentials()),
180-
grpc.WithDefaultServiceConfig(utils.DefaultGRPCServiceConfig),
180+
utils.DefaultGRPCRetryInterceptor(),
181181
)
182182
if err != nil {
183183
t.Fatalf("dial remote: %v", err)
@@ -189,7 +189,7 @@ func TestForkingAgainstMainnet(t *testing.T) {
189189
if err != nil {
190190
t.Fatalf("get remote header: %v", err)
191191
}
192-
remoteHeight := rh.Block.Height
192+
remoteHeight := rh.Block.Height - 10 // Use a buffer to avoid edge cases
193193

194194
cfg := &Config{
195195
// Do not start listeners; NewEmulatorServer only configures components.

server/server.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -284,7 +284,7 @@ func DetectRemoteChainID(url string) (flowgo.ChainID, error) {
284284
conn, err := grpc.NewClient(
285285
url,
286286
grpc.WithTransportCredentials(insecure.NewCredentials()),
287-
grpc.WithDefaultServiceConfig(utils.DefaultGRPCServiceConfig),
287+
utils.DefaultGRPCRetryInterceptor(),
288288
)
289289
if err != nil {
290290
return "", err

storage/remote/store.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ func New(provider *sqlite.Store, logger *zerolog.Logger, options ...Option) (*St
120120
store.host,
121121
grpc.WithTransportCredentials(insecure.NewCredentials()),
122122
grpc.WithDefaultCallOptions(grpc.MaxCallRecvMsgSize(1024*1024*1024)),
123-
grpc.WithDefaultServiceConfig(utils.DefaultGRPCServiceConfig),
123+
utils.DefaultGRPCRetryInterceptor(),
124124
)
125125
if err != nil {
126126
return nil, fmt.Errorf("could not connect to rpc host: %w", err)

utils/grpc.go

Lines changed: 73 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -18,27 +18,77 @@
1818

1919
package utils
2020

21-
// DefaultGRPCServiceConfig provides automatic retry configuration for transient gRPC errors.
22-
// This config is applied to all remote gRPC connections to handle network flakiness in CI
23-
// and other environments.
24-
//
25-
// Retries on:
26-
// - UNAVAILABLE: Service temporarily unavailable (e.g., node restarting)
27-
// - RESOURCE_EXHAUSTED: Rate limiting from remote node
28-
// - UNKNOWN: Connection failures, DNS issues, and other network errors
29-
//
30-
// Note: We only retry on clearly transient network/availability errors.
31-
// We do NOT retry on INTERNAL (programming errors), ABORTED (conflicts),
32-
// or DEADLINE_EXCEEDED (to avoid cascading failures on slow services).
33-
const DefaultGRPCServiceConfig = `{
34-
"methodConfig": [{
35-
"name": [{"service": ""}],
36-
"retryPolicy": {
37-
"maxAttempts": 5,
38-
"initialBackoff": "0.1s",
39-
"maxBackoff": "30s",
40-
"backoffMultiplier": 2,
41-
"retryableStatusCodes": ["UNAVAILABLE", "RESOURCE_EXHAUSTED", "UNKNOWN"]
21+
import (
22+
"context"
23+
"time"
24+
25+
"google.golang.org/grpc"
26+
"google.golang.org/grpc/codes"
27+
"google.golang.org/grpc/status"
28+
)
29+
30+
const (
31+
defaultMaxAttempts = 10
32+
defaultInitialBackoff = 1 * time.Second
33+
defaultMaxBackoff = 30 * time.Second
34+
defaultBackoffFactor = 2.0
35+
)
36+
37+
// DefaultGRPCRetryInterceptor returns a unary client interceptor that retries
38+
// transient failures with exponential backoff. Unlike native gRPC retries, this
39+
// ignores server pushback headers (grpc-retry-pushback-ms) so rate-limited calls
40+
// will retry client-side rather than fail immediately.
41+
func DefaultGRPCRetryInterceptor() grpc.DialOption {
42+
return grpc.WithChainUnaryInterceptor(retryInterceptor)
43+
}
44+
45+
func retryInterceptor(
46+
ctx context.Context,
47+
method string,
48+
req, reply any,
49+
cc *grpc.ClientConn,
50+
invoker grpc.UnaryInvoker,
51+
opts ...grpc.CallOption,
52+
) error {
53+
var lastErr error
54+
backoff := defaultInitialBackoff
55+
56+
for attempt := 0; attempt < defaultMaxAttempts; attempt++ {
57+
if attempt > 0 {
58+
// Wait before retry
59+
select {
60+
case <-time.After(backoff):
61+
case <-ctx.Done():
62+
return ctx.Err()
63+
}
64+
// Exponential backoff with cap
65+
backoff = time.Duration(float64(backoff) * defaultBackoffFactor)
66+
if backoff > defaultMaxBackoff {
67+
backoff = defaultMaxBackoff
68+
}
69+
}
70+
71+
lastErr = invoker(ctx, method, req, reply, cc, opts...)
72+
if lastErr == nil {
73+
return nil
74+
}
75+
76+
// Check if error is retryable
77+
code := status.Code(lastErr)
78+
if !isRetryableCode(code) {
79+
return lastErr
4280
}
43-
}]
44-
}`
81+
}
82+
83+
return lastErr
84+
}
85+
86+
func isRetryableCode(code codes.Code) bool {
87+
switch code {
88+
case codes.Unavailable, codes.ResourceExhausted, codes.Unknown:
89+
return true
90+
default:
91+
return false
92+
}
93+
}
94+

0 commit comments

Comments
 (0)