Skip to content

Commit 0f2e4d5

Browse files
committed
feat: add exponential backoff to API client
The API client now retries retrieving the serviceKey via the initialization request in an exponential backoff loop. The exponential algorithm utilizes an error factor to spread out the retrials, so when multiple clients are started at the same time they are unlikely to issue any request after the initial one at the same time.
1 parent b463c22 commit 0f2e4d5

File tree

13 files changed

+423
-52
lines changed

13 files changed

+423
-52
lines changed

lib/agent/api/httpError.js

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
'use strict'
2+
3+
var http = require('http')
4+
var inherits = require('util').inherits
5+
6+
function HttpError (statusCode, response) {
7+
Error.captureStackTrace && Error.captureStackTrace(this, this.constructor)
8+
this.message = String(statusCode) + ' - ' + http.STATUS_CODES[statusCode]
9+
this.statusCode = statusCode
10+
this.response = response
11+
}
12+
inherits(HttpError, Error)
13+
14+
module.exports = HttpError

lib/agent/api/httpRetry.js

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
'use strict'
2+
3+
var HttpError = require('./httpError')
4+
var exponentialRetry = require('../../utils/exponentialRetry')
5+
6+
var DEFAULT_MAX_RETRIES = Infinity
7+
var DEFAULT_MAX_WAIT = 10 * 60 * 1000
8+
var DEFAULT_EXP_SCALE = 0.828
9+
var DEFAULT_LIN_SCALE = 150
10+
var DEFAULT_ERR_SCALE = 0.24 // +-12% error
11+
12+
function httpRetry (opts, cb) {
13+
opts = opts || {}
14+
var client = opts.client
15+
var payload = opts.payload
16+
var reqOpts = opts.reqOpts
17+
var errorFilter = opts.errorFilter
18+
var maxRetries = opts.maxRetries != null ? opts.maxRetries : DEFAULT_MAX_RETRIES
19+
var maxWait = opts.maxWait != null ? opts.maxWait : DEFAULT_MAX_WAIT
20+
21+
function httpRequest (cb) {
22+
var completed = false
23+
var req = client.request(reqOpts, function (response) {
24+
completed = true
25+
if (response.statusCode >= 400) {
26+
return cb(new HttpError(response.statusCode), response)
27+
}
28+
return cb(null, response)
29+
})
30+
req.on('error', function (err) {
31+
if (!completed) {
32+
completed = true
33+
return cb(err)
34+
}
35+
})
36+
if (payload) {
37+
req.write(payload)
38+
}
39+
req.end()
40+
}
41+
return exponentialRetry({
42+
maxRetries: maxRetries,
43+
maxWait: maxWait,
44+
expScale: DEFAULT_EXP_SCALE,
45+
linScale: DEFAULT_LIN_SCALE,
46+
errScale: DEFAULT_ERR_SCALE,
47+
errorFilter: errorFilter
48+
}, httpRequest, cb)
49+
}
50+
51+
module.exports = httpRetry

lib/agent/api/httpRetry.spec.js

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
'use strict'
2+
3+
var http = require('http')
4+
var HttpError = require('./httpError')
5+
var httpRetry = require('./httpRetry')
6+
var expect = require('chai').expect
7+
var nock = require('nock')
8+
var bl = require('bl')
9+
10+
describe('httpRetry', function (done) {
11+
it('retries', function (done) {
12+
nock('http://something.com')
13+
.post('/', 'data')
14+
.reply(500)
15+
nock('http://something.com')
16+
.post('/', 'data')
17+
.reply(200, 'response')
18+
19+
this.sandbox.stub(global, 'setTimeout').callsFake(function (cb, int) {
20+
return process.nextTick(cb)
21+
})
22+
23+
httpRetry({
24+
client: http,
25+
maxRetries: 1,
26+
reqOpts: {
27+
hostname: 'something.com',
28+
method: 'POST',
29+
path: '/'
30+
},
31+
payload: 'data'
32+
}, function (err, data) {
33+
expect(err).to.not.exist
34+
data.pipe(bl(function (err, data) {
35+
expect(err).not.to.exist
36+
expect(data.toString()).to.eql('response')
37+
done()
38+
}))
39+
})
40+
})
41+
it('returns error', function (done) {
42+
nock('http://something.com')
43+
.post('/', 'data')
44+
.reply(500, 'bad')
45+
46+
this.sandbox.stub(global, 'setTimeout').callsFake(function (cb, int) {
47+
return process.nextTick(cb)
48+
})
49+
50+
httpRetry({
51+
client: http,
52+
maxRetries: 0,
53+
reqOpts: {
54+
hostname: 'something.com',
55+
method: 'POST',
56+
path: '/'
57+
},
58+
payload: 'data'
59+
}, function (err, data) {
60+
expect(err).to.be.instanceof(HttpError)
61+
data.pipe(bl(function (err, data) {
62+
expect(err).to.not.exist
63+
expect(data.toString()).to.eql('bad')
64+
done()
65+
}))
66+
})
67+
})
68+
})

lib/agent/api/index.js

Lines changed: 30 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,12 @@ var util = require('util')
55
var requestSync = require('sync-request')
66
var isNumber = require('lodash.isnumber')
77
var debug = require('../../utils/debug')('api')
8-
var format = require('util').format
98
var assign = require('lodash.assign')
109
var HttpsProxyAgent = require('https-proxy-agent')
1110
var stringify = require('json-stringify-safe')
1211
var BufferStream = require('./bufferStream')
12+
var httpRetry = require('./httpRetry')
13+
var CompositeError = require('../../utils/compositeError')
1314

1415
var bl = require('bl')
1516
var libPackage = require('../../../package')
@@ -34,6 +35,7 @@ function CollectorApi (options) {
3435
this.serviceName = options.serviceName
3536
this.baseRetryInterval = 1000 * 60 * 30 // 30 minutes
3637
this.serviceKey = null
38+
this.getServiceMaxRetries = Infinity
3739

3840
if (options.proxy) {
3941
this.proxyAgent = new HttpsProxyAgent(options.proxy)
@@ -270,7 +272,8 @@ CollectorApi.prototype.getService = function (cb) {
270272
cpus: self.system.cpus
271273
}
272274
})
273-
var req = https.request({
275+
276+
var reqOpts = {
274277
hostname: opts.hostname,
275278
port: opts.port,
276279
path: opts.path,
@@ -283,51 +286,41 @@ CollectorApi.prototype.getService = function (cb) {
283286
'X-Reporter-Language': this.collectorLanguage,
284287
'Content-Length': Buffer.byteLength(payload)
285288
}
286-
}, function (res) {
287-
res.setEncoding('utf8')
288-
res.pipe(bl(function (err, resBuffer) {
289-
var response
290-
291-
var retryInterval = self.baseRetryInterval
289+
}
292290

293-
if (err) {
294-
debug.error('getService', err)
295-
return setTimeout(function () {
296-
debug.warn('getService', format('Retrying with %d ms', retryInterval))
297-
self.getService()
298-
}, retryInterval)
291+
httpRetry({
292+
client: https,
293+
reqOpts: reqOpts,
294+
payload: payload,
295+
errorFilter: function shouldContinueRetrying (err) {
296+
if (err.statusCode === 401) {
297+
return false
299298
}
300-
301-
var resText = resBuffer.toString('utf8')
302-
303-
if (res.statusCode === 401) {
304-
debug.error('getService', 'Api key rejected')
305-
return
299+
return true
300+
},
301+
maxRetries: this.getServiceMaxRetries
302+
},
303+
function done (err, result) {
304+
if (err) {
305+
if (err.statusCode === 401) {
306+
debug.error('getService', 'API key rejected')
306307
}
307-
if (res.statusCode > 399) {
308-
debug.error('getService', 'Service responded with ' + res.statusCode)
309-
return setTimeout(function () {
310-
debug.warn('getService', format('Retrying with %d ms', retryInterval))
311-
self.getService(cb)
312-
}, retryInterval)
308+
return cb(new CompositeError('Could not get service key', err))
309+
}
310+
var response
311+
result.pipe(bl(function (err, data) {
312+
if (err) {
313+
cb(err)
313314
}
314-
315315
try {
316-
response = JSON.parse(resText)
317-
} catch (ex) {
318-
return
316+
response = JSON.parse(data.toString('utf8'))
317+
} catch (err) {
318+
return cb(err)
319319
}
320-
321320
self.serviceKey = response.key
322321
cb(null, response.key)
323322
}))
324323
})
325-
326-
req.on('error', function (error) {
327-
debug.error('getService', error)
328-
})
329-
req.write(payload)
330-
req.end()
331324
}
332325

333326
function logServiceKeyError (method) {

lib/agent/api/index.spec.js

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -275,16 +275,18 @@ describe('The Trace CollectorApi module', function () {
275275
}
276276
})
277277
.post(defaultConfig.collectorApiServiceEndpoint, JSON.stringify(data))
278-
.times(2)
278+
.times(100)
279279
.reply(409, {})
280280

281-
collectorApi.getService()
282-
283-
collectorApi.baseRetryInterval = 1
281+
collectorApi.getServiceMaxRetries = 100
284282

285-
this.timeout(500)
283+
global.setTimeout = this.sandbox.stub(global, 'setTimeout').callsFake(function (cb, int) {
284+
return process.nextTick(cb)
285+
})
286286

287-
this.sandbox.stub(collectorApi, 'getService').callsFake(function () {
287+
collectorApi.getService(function (err) {
288+
expect(setTimeout).to.have.callCount(100)
289+
expect(err).to.exist
288290
done()
289291
})
290292
})

lib/agent/index.js

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -116,17 +116,12 @@ Agent.prototype.stop = function (callback) {
116116
debug.info('stop', 'Stopping agents...')
117117
var agents = this.agents
118118
var counter = 1
119-
var error
120119

121120
agents.forEach(function (agent) {
122-
agent.stop(function (err) {
123-
if (!error && err) {
124-
error = err
125-
}
126-
121+
agent.stop(function () {
127122
if (counter >= agents.length) {
128123
if (callback && typeof callback === 'function') {
129-
callback(error)
124+
callback()
130125
}
131126
} else {
132127
counter++

lib/utils/compositeError.js

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
'use strict'
2+
3+
var inherits = require('util').inherits
4+
5+
function CompositeError (message, cause) {
6+
if (message instanceof Error) {
7+
message = ''
8+
cause = message
9+
}
10+
this.message = message ? message.toString() : ''
11+
this.cause = cause
12+
Error.captureStackTrace && Error.captureStackTrace(this, this.constructor)
13+
if (this.stack != null && this.cause instanceof Error && this.cause.stack != null) {
14+
this.stack += '\nCaused by: ' + this.cause.stack
15+
}
16+
}
17+
inherits(CompositeError, Error)
18+
19+
module.exports = CompositeError

lib/utils/exponentialRetry.js

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
'use strict'
2+
3+
var inherits = require('util').inherits
4+
var retry = require('async/retry')
5+
6+
var DEFAULT_MAX_RETRIES = Infinity
7+
var DEFAULT_MAX_WAIT = Infinity
8+
var DEFAULT_EXP_SCALE = 1
9+
var DEFAULT_LIN_SCALE = 1
10+
var DEFAULT_TRANS = 0
11+
var DEFAULT_ERR_SCALE = 0
12+
var DEFAULT_ERR_TRANS = 0
13+
14+
function MaxRetriesExceededError (n, last) {
15+
Error.captureStackTrace && Error.captureStackTrace(this, this.constructor)
16+
this.message = 'Network request max retry limit reached after ' + n + ' attempts. Last error message was: ' + last.message
17+
if (this.stack && last.stack) {
18+
this.stack += '\nCaused by: ' + last.stack
19+
}
20+
}
21+
inherits(MaxRetriesExceededError, Error)
22+
23+
function exponentialRetry (opts, task, cb) {
24+
if (typeof opts === 'function') {
25+
cb = task
26+
task = opts
27+
opts = {}
28+
}
29+
opts = opts || {}
30+
var maxRetries = opts.maxRetries != null ? opts.maxRetries : DEFAULT_MAX_RETRIES
31+
var maxWait = opts.maxWait != null ? opts.maxWait : DEFAULT_MAX_WAIT
32+
var expScale = opts.expScale != null ? opts.expScale : DEFAULT_EXP_SCALE
33+
var linScale = opts.linScale != null ? opts.linScale : DEFAULT_LIN_SCALE
34+
var trans = opts.trans != null ? opts.trans : DEFAULT_TRANS
35+
var errScale = opts.errScale != null ? opts.errScale : DEFAULT_ERR_SCALE
36+
var errTrans = opts.errTrans != null ? opts.errTrans : DEFAULT_ERR_TRANS
37+
var errorFilter = opts.errorFilter
38+
39+
return retry({
40+
times: maxRetries + 1,
41+
errorFilter: errorFilter,
42+
interval: function (i) {
43+
var wait = Math.exp((i - 1) * expScale) * linScale + trans
44+
if (wait > maxWait) {
45+
wait = maxWait
46+
}
47+
var rnd = 0.5 - Math.random()
48+
wait = wait + (wait * rnd * errScale) + errTrans
49+
var res = Math.floor(wait)
50+
return res
51+
}
52+
}, task, cb)
53+
}
54+
55+
module.exports = exponentialRetry
56+
module.exports.MaxRetriesExceededError = MaxRetriesExceededError

0 commit comments

Comments
 (0)