Skip to content

Commit a3206f0

Browse files
authored
GT-313 Fix for ContextExceeded error during backup upload (#1263)
1 parent 0cd5a2a commit a3206f0

File tree

7 files changed

+53
-33
lines changed

7 files changed

+53
-33
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
- (Bugfix) Fix agency timeout
88
- (Improvement) Extract Agency Timeout
99
- (Feature) Rebalancer V2
10+
- (Bugfix) Fix for ContextExceeded error during backup upload
1011

1112
## [1.2.30](https://github.com/arangodb/kube-arangodb/tree/1.2.30) (2023-06-16)
1213
- (Feature) AgencyCache Interface

cmd/cmd.go

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -155,13 +155,15 @@ var (
155155
concurrentUploads int
156156
}
157157
operatorTimeouts struct {
158-
k8s time.Duration
159-
arangoD time.Duration
160-
arangoDCheck time.Duration
161-
reconciliation time.Duration
162-
agency time.Duration
163-
shardRebuild time.Duration
164-
shardRebuildRetry time.Duration
158+
k8s time.Duration
159+
arangoD time.Duration
160+
arangoDCheck time.Duration
161+
reconciliation time.Duration
162+
agency time.Duration
163+
shardRebuild time.Duration
164+
shardRebuildRetry time.Duration
165+
backupArangoD time.Duration
166+
backupUploadArangoD time.Duration
165167
}
166168
chaosOptions struct {
167169
allowed bool
@@ -216,6 +218,8 @@ func init() {
216218
f.DurationVar(&operatorTimeouts.reconciliation, "timeout.reconciliation", globals.DefaultReconciliationTimeout, "The reconciliation timeout to the ArangoDB CR")
217219
f.DurationVar(&operatorTimeouts.shardRebuild, "timeout.shard-rebuild", globals.DefaultOutSyncedShardRebuildTimeout, "Timeout after which particular out-synced shard is considered as failed and rebuild is triggered")
218220
f.DurationVar(&operatorTimeouts.shardRebuildRetry, "timeout.shard-rebuild-retry", globals.DefaultOutSyncedShardRebuildRetryTimeout, "Timeout after which rebuild shards retry flow is triggered")
221+
f.DurationVar(&operatorTimeouts.backupArangoD, "timeout.backup-arangod", globals.BackupDefaultArangoClientTimeout, "The request timeout to the ArangoDB during backup calls")
222+
f.DurationVar(&operatorTimeouts.backupUploadArangoD, "timeout.backup-upload", globals.BackupUploadArangoClientTimeout, "The request timeout to the ArangoDB during uploading files")
219223
f.DurationVar(&shutdownOptions.delay, "shutdown.delay", defaultShutdownDelay, "The delay before running shutdown handlers")
220224
f.DurationVar(&shutdownOptions.timeout, "shutdown.timeout", defaultShutdownTimeout, "Timeout for shutdown handlers")
221225
f.BoolVar(&operatorOptions.scalingIntegrationEnabled, "internal.scaling-integration", false, "Enable Scaling Integration")
@@ -267,6 +271,9 @@ func executeMain(cmd *cobra.Command, args []string) {
267271
globals.GetGlobalTimeouts().Reconciliation().Set(operatorTimeouts.reconciliation)
268272
globals.GetGlobalTimeouts().ShardRebuild().Set(operatorTimeouts.shardRebuild)
269273
globals.GetGlobalTimeouts().ShardRebuildRetry().Set(operatorTimeouts.shardRebuildRetry)
274+
globals.GetGlobalTimeouts().BackupArangoClientTimeout().Set(operatorTimeouts.backupArangoD)
275+
globals.GetGlobalTimeouts().BackupArangoClientUploadTimeout().Set(operatorTimeouts.backupUploadArangoD)
276+
270277
globals.GetGlobals().Kubernetes().RequestBatchSize().Set(operatorKubernetesOptions.maxBatchSize)
271278
globals.GetGlobals().Backup().ConcurrentUploads().Set(operatorBackup.concurrentUploads)
272279

pkg/handlers/backup/arango_client_impl.go

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ func newArangoClientBackupFactory(handler *handler) ArangoClientFactory {
6363
}
6464

6565
func (ac *arangoClientBackupImpl) List() (map[driver.BackupID]driver.BackupMeta, error) {
66-
ctx, cancel := context.WithTimeout(context.Background(), defaultArangoClientTimeout)
66+
ctx, cancel := globals.GetGlobalTimeouts().BackupArangoClientTimeout().WithTimeout(context.Background())
6767
defer cancel()
6868

6969
backups, err := ac.driver.Backup().List(ctx, nil)
@@ -75,7 +75,7 @@ func (ac *arangoClientBackupImpl) List() (map[driver.BackupID]driver.BackupMeta,
7575
}
7676

7777
func (ac *arangoClientBackupImpl) Create() (ArangoBackupCreateResponse, error) {
78-
dt := defaultArangoClientTimeout
78+
dt := globals.GetGlobalTimeouts().BackupArangoClientTimeout().Get()
7979

8080
co := driver.BackupCreateOptions{}
8181

@@ -110,7 +110,7 @@ func (ac *arangoClientBackupImpl) Create() (ArangoBackupCreateResponse, error) {
110110
}
111111

112112
func (ac *arangoClientBackupImpl) Get(backupID driver.BackupID) (driver.BackupMeta, error) {
113-
ctx, cancel := context.WithTimeout(context.Background(), defaultArangoClientTimeout)
113+
ctx, cancel := globals.GetGlobalTimeouts().BackupArangoClientTimeout().WithTimeout(context.Background())
114114
defer cancel()
115115

116116
// list, err := ac.driver.Backup().List(ctx, &driver.BackupListOptions{ID: backupID})
@@ -148,7 +148,7 @@ func (ac *arangoClientBackupImpl) getCredentialsFromSecret(ctx context.Context,
148148
}
149149

150150
func (ac *arangoClientBackupImpl) Upload(backupID driver.BackupID) (driver.BackupTransferJobID, error) {
151-
ctx, cancel := context.WithTimeout(context.Background(), defaultArangoClientTimeout)
151+
ctx, cancel := globals.GetGlobalTimeouts().BackupArangoClientUploadTimeout().WithTimeout(context.Background())
152152
defer cancel()
153153

154154
uploadSpec := ac.backup.Spec.Upload
@@ -165,7 +165,7 @@ func (ac *arangoClientBackupImpl) Upload(backupID driver.BackupID) (driver.Backu
165165
}
166166

167167
func (ac *arangoClientBackupImpl) Download(backupID driver.BackupID) (driver.BackupTransferJobID, error) {
168-
ctx, cancel := context.WithTimeout(context.Background(), defaultArangoClientTimeout)
168+
ctx, cancel := globals.GetGlobalTimeouts().BackupArangoClientUploadTimeout().WithTimeout(context.Background())
169169
defer cancel()
170170

171171
downloadSpec := ac.backup.Spec.Download
@@ -182,7 +182,7 @@ func (ac *arangoClientBackupImpl) Download(backupID driver.BackupID) (driver.Bac
182182
}
183183

184184
func (ac *arangoClientBackupImpl) Progress(jobID driver.BackupTransferJobID) (ArangoBackupProgress, error) {
185-
ctx, cancel := context.WithTimeout(context.Background(), defaultArangoClientTimeout)
185+
ctx, cancel := globals.GetGlobalTimeouts().BackupArangoClientTimeout().WithTimeout(context.Background())
186186
defer cancel()
187187

188188
report, err := ac.driver.Backup().Progress(ctx, jobID)
@@ -243,14 +243,14 @@ func (ac *arangoClientBackupImpl) Exists(backupID driver.BackupID) (bool, error)
243243
}
244244

245245
func (ac *arangoClientBackupImpl) Delete(backupID driver.BackupID) error {
246-
ctx, cancel := context.WithTimeout(context.Background(), defaultArangoClientTimeout)
246+
ctx, cancel := globals.GetGlobalTimeouts().BackupArangoClientTimeout().WithTimeout(context.Background())
247247
defer cancel()
248248

249249
return ac.driver.Backup().Delete(ctx, backupID)
250250
}
251251

252252
func (ac *arangoClientBackupImpl) Abort(jobID driver.BackupTransferJobID) error {
253-
ctx, cancel := context.WithTimeout(context.Background(), defaultArangoClientTimeout)
253+
ctx, cancel := globals.GetGlobalTimeouts().BackupArangoClientTimeout().WithTimeout(context.Background())
254254
defer cancel()
255255

256256
return ac.driver.Backup().Abort(ctx, jobID)

pkg/handlers/backup/backup_suite_test.go

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -52,11 +52,9 @@ func newFakeHandler() *handler {
5252
k := fake.NewSimpleClientset()
5353

5454
return &handler{
55-
client: f,
56-
kubeClient: k,
57-
58-
arangoClientTimeout: defaultArangoClientTimeout,
59-
eventRecorder: newEventInstance(event.NewEventRecorder("mock", k)),
55+
client: f,
56+
kubeClient: k,
57+
eventRecorder: newEventInstance(event.NewEventRecorder("mock", k)),
6058
}
6159
}
6260

pkg/handlers/backup/handler.go

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -50,9 +50,8 @@ import (
5050
var logger = logging.Global().RegisterAndGetLogger("backup-operator", logging.Info)
5151

5252
const (
53-
defaultArangoClientTimeout = 30 * time.Second
54-
retryCount = 25
55-
retryDelay = time.Second
53+
retryCount = 25
54+
retryDelay = time.Second
5655

5756
// StateChange name of the event send when state changed
5857
StateChange = "StateChange"
@@ -71,7 +70,6 @@ type handler struct {
7170
eventRecorder event.RecorderInstance
7271

7372
arangoClientFactory ArangoClientFactory
74-
arangoClientTimeout time.Duration
7573

7674
operator operator.Operator
7775
}

pkg/handlers/backup/register.go

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,8 +53,6 @@ func RegisterInformer(operator operator.Operator, recorder event.Recorder, clien
5353
eventRecorder: newEventInstance(recorder),
5454

5555
operator: operator,
56-
57-
arangoClientTimeout: defaultArangoClientTimeout,
5856
}
5957
h.arangoClientFactory = newArangoClientBackupFactory(h)
6058

pkg/util/globals/global.go

Lines changed: 25 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,9 @@ const (
2929
DefaultArangoDCheckTimeout = time.Second * 2
3030
DefaultReconciliationTimeout = time.Minute
3131

32+
BackupDefaultArangoClientTimeout = 30 * time.Second
33+
BackupUploadArangoClientTimeout = 300 * time.Second
34+
3235
// DefaultOutSyncedShardRebuildTimeout
3336
// timeout after which particular out-synced shard is considered as failed and rebuild is triggered
3437
DefaultOutSyncedShardRebuildTimeout = time.Minute * 60
@@ -42,13 +45,15 @@ const (
4245

4346
var globalObj = &globals{
4447
timeouts: &globalTimeouts{
45-
requests: NewTimeout(DefaultKubernetesTimeout),
46-
arangod: NewTimeout(DefaultArangoDTimeout),
47-
arangodCheck: NewTimeout(DefaultArangoDCheckTimeout),
48-
reconciliation: NewTimeout(DefaultReconciliationTimeout),
49-
agency: NewTimeout(DefaultArangoDAgencyTimeout),
50-
shardRebuild: NewTimeout(DefaultOutSyncedShardRebuildTimeout),
51-
shardRebuildRetry: NewTimeout(DefaultOutSyncedShardRebuildRetryTimeout),
48+
requests: NewTimeout(DefaultKubernetesTimeout),
49+
arangod: NewTimeout(DefaultArangoDTimeout),
50+
arangodCheck: NewTimeout(DefaultArangoDCheckTimeout),
51+
reconciliation: NewTimeout(DefaultReconciliationTimeout),
52+
agency: NewTimeout(DefaultArangoDAgencyTimeout),
53+
shardRebuild: NewTimeout(DefaultOutSyncedShardRebuildTimeout),
54+
shardRebuildRetry: NewTimeout(DefaultOutSyncedShardRebuildRetryTimeout),
55+
backupArangoClientTimeout: NewTimeout(BackupDefaultArangoClientTimeout),
56+
backupArangoClientUploadTimeout: NewTimeout(BackupUploadArangoClientTimeout),
5257
},
5358
kubernetes: &globalKubernetes{
5459
requestBatchSize: NewInt64(DefaultKubernetesRequestBatchSize),
@@ -123,10 +128,15 @@ type GlobalTimeouts interface {
123128
ArangoD() Timeout
124129
ArangoDCheck() Timeout
125130
Agency() Timeout
131+
132+
BackupArangoClientTimeout() Timeout
133+
BackupArangoClientUploadTimeout() Timeout
126134
}
127135

128136
type globalTimeouts struct {
129137
requests, arangod, reconciliation, arangodCheck, agency, shardRebuild, shardRebuildRetry Timeout
138+
backupArangoClientTimeout Timeout
139+
backupArangoClientUploadTimeout Timeout
130140
}
131141

132142
func (g *globalTimeouts) Agency() Timeout {
@@ -156,3 +166,11 @@ func (g *globalTimeouts) ArangoD() Timeout {
156166
func (g *globalTimeouts) Kubernetes() Timeout {
157167
return g.requests
158168
}
169+
170+
func (g *globalTimeouts) BackupArangoClientTimeout() Timeout {
171+
return g.backupArangoClientTimeout
172+
}
173+
174+
func (g *globalTimeouts) BackupArangoClientUploadTimeout() Timeout {
175+
return g.backupArangoClientUploadTimeout
176+
}

0 commit comments

Comments
 (0)