Skip to content

Commit 62bf905

Browse files
committed
feat(observability): fix bugs found from product review + negative cases
This change adds recording of retry span annotations, catching cases in which exceptions where thrown but spans were not ended while testing out and visually confirming the results.
1 parent 0342e74 commit 62bf905

File tree

4 files changed

+146
-78
lines changed

4 files changed

+146
-78
lines changed

src/database.ts

Lines changed: 68 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -2815,6 +2815,10 @@ class Database extends common.GrpcServiceObject {
28152815
this.runStream(query, options)
28162816
.on('error', err => {
28172817
setSpanError(span, err);
2818+
console.log(
2819+
`\x1b[34mDatabase.run.error: ${err} isRecording: ${span.isRecording()}\x1b[00m`
2820+
);
2821+
span.end();
28182822
callback!(err as grpc.ServiceError, rows, stats, metadata);
28192823
})
28202824
.on('response', response => {
@@ -3060,7 +3064,7 @@ class Database extends common.GrpcServiceObject {
30603064
dataStream
30613065
.once('data', () => (dataReceived = true))
30623066
.once('error', err => {
3063-
setSpanError(span, err);
3067+
setSpanErrorAndException(span, err as Error);
30643068

30653069
if (
30663070
!dataReceived &&
@@ -3222,8 +3226,8 @@ class Database extends common.GrpcServiceObject {
32223226
span.addEvent('No session available', {
32233227
'session.id': session?.id,
32243228
});
3225-
this.runTransaction(options, runFn!);
32263229
span.end();
3230+
this.runTransaction(options, runFn!);
32273231
return;
32283232
}
32293233

@@ -3242,8 +3246,8 @@ class Database extends common.GrpcServiceObject {
32423246
}
32433247

32443248
const release = () => {
3245-
span.end();
32463249
this.pool_.release(session!);
3250+
span.end();
32473251
};
32483252

32493253
const runner = new TransactionRunner(
@@ -3253,28 +3257,34 @@ class Database extends common.GrpcServiceObject {
32533257
if (err) {
32543258
setSpanError(span, err!);
32553259
}
3256-
span.end();
32573260
runFn!(err, resp);
32583261
},
32593262
options
32603263
);
32613264

32623265
runner.run().then(release, err => {
3263-
if (err) {
3264-
setSpanError(span, err!);
3265-
}
3266+
setSpanError(span, err);
32663267

32673268
if (isSessionNotFoundError(err)) {
32683269
span.addEvent('No session available', {
32693270
'session.id': session?.id,
32703271
});
3272+
span.addEvent('Retrying');
32713273
release();
3272-
this.runTransaction(options, runFn!);
3274+
this.runTransaction(
3275+
options,
3276+
(
3277+
err: ServiceError | null,
3278+
txn: Transaction | null | undefined
3279+
) => {
3280+
runFn!(err, txn);
3281+
}
3282+
);
32733283
} else {
3274-
if (!err) {
3275-
span.addEvent('Using Session', {'session.id': session!.id});
3276-
}
3277-
setImmediate(runFn!, err);
3284+
span.addEvent('Using Session', {'session.id': session!.id});
3285+
setImmediate((err: null | ServiceError) => {
3286+
runFn!(err);
3287+
}, err);
32783288
release();
32793289
}
32803290
});
@@ -3363,46 +3373,55 @@ class Database extends common.GrpcServiceObject {
33633373

33643374
let sessionId = '';
33653375
const getSession = this.pool_.getSession.bind(this.pool_);
3366-
const span = getActiveOrNoopSpan();
3367-
// Loop to retry 'Session not found' errors.
3368-
// (and yes, we like while (true) more than for (;;) here)
3369-
// eslint-disable-next-line no-constant-condition
3370-
while (true) {
3371-
try {
3372-
const [session, transaction] = await promisify(getSession)();
3373-
transaction.requestOptions = Object.assign(
3374-
transaction.requestOptions || {},
3375-
options.requestOptions
3376-
);
3377-
if (options.optimisticLock) {
3378-
transaction.useOptimisticLock();
3379-
}
3380-
if (options.excludeTxnFromChangeStreams) {
3381-
transaction.excludeTxnFromChangeStreams();
3382-
}
3383-
sessionId = session?.id;
3384-
span.addEvent('Using Session', {'session.id': sessionId});
3385-
const runner = new AsyncTransactionRunner<T>(
3386-
session,
3387-
transaction,
3388-
runFn,
3389-
options
3390-
);
33913376

3392-
try {
3393-
return await runner.run();
3394-
} finally {
3395-
this.pool_.release(session);
3396-
}
3397-
} catch (e) {
3398-
if (!isSessionNotFoundError(e as ServiceError)) {
3399-
span.addEvent('No session available', {
3400-
'session.id': sessionId,
3401-
});
3402-
throw e;
3377+
return startTrace(
3378+
'Database.runTransactionAsync',
3379+
this._traceConfig,
3380+
span => {
3381+
// Loop to retry 'Session not found' errors.
3382+
// (and yes, we like while (true) more than for (;;) here)
3383+
// eslint-disable-next-line no-constant-condition
3384+
while (true) {
3385+
try {
3386+
const [session, transaction] = await promisify(getSession)();
3387+
transaction.requestOptions = Object.assign(
3388+
transaction.requestOptions || {},
3389+
options.requestOptions
3390+
);
3391+
if (options.optimisticLock) {
3392+
transaction.useOptimisticLock();
3393+
}
3394+
if (options.excludeTxnFromChangeStreams) {
3395+
transaction.excludeTxnFromChangeStreams();
3396+
}
3397+
sessionId = session?.id;
3398+
span.addEvent('Using Session', {'session.id': sessionId});
3399+
const runner = new AsyncTransactionRunner<T>(
3400+
session,
3401+
transaction,
3402+
runFn,
3403+
options
3404+
);
3405+
3406+
try {
3407+
const result = await runner.run();
3408+
span.end();
3409+
return result;
3410+
} finally {
3411+
this.pool_.release(session);
3412+
}
3413+
} catch (e) {
3414+
if (!isSessionNotFoundError(e as ServiceError)) {
3415+
span.addEvent('No session available', {
3416+
'session.id': sessionId,
3417+
});
3418+
setSpanErrorAndException(span, e as Error);
3419+
throw e;
3420+
}
3421+
}
34033422
}
34043423
}
3405-
}
3424+
);
34063425
}
34073426

34083427
/**

src/instrument.ts

Lines changed: 31 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,8 @@ function ensureInitialContextManagerSet() {
118118
}
119119
}
120120

121+
const debugTraces = process.env.SPANNER_DEBUG_TRACES === 'true';
122+
121123
/**
122124
* startTrace begins an active span in the current active context
123125
* and passes it back to the set callback function. Each span will
@@ -142,6 +144,10 @@ export function startTrace<T>(
142144
SPAN_NAMESPACE_PREFIX + '.' + spanNameSuffix,
143145
{kind: SpanKind.CLIENT},
144146
span => {
147+
if (debugTraces) {
148+
patchSpanEndForDebugging(span);
149+
}
150+
145151
span.setAttribute(SEMATTRS_DB_SYSTEM, 'spanner');
146152
span.setAttribute(ATTR_OTEL_SCOPE_NAME, TRACER_NAME);
147153
span.setAttribute(ATTR_OTEL_SCOPE_VERSION, TRACER_VERSION);
@@ -165,11 +171,20 @@ export function startTrace<T>(
165171
}
166172
}
167173

168-
if (config.that) {
169-
const fn = cb.bind(config.that);
170-
return fn(span);
171-
} else {
172-
return cb(span);
174+
// If at all the invoked function throws an exception,
175+
// record the exception and then end this span.
176+
try {
177+
if (config.that) {
178+
const fn = cb.bind(config.that);
179+
return fn(span);
180+
} else {
181+
return cb(span);
182+
}
183+
} catch (e) {
184+
setSpanErrorAndException(span, e as Error);
185+
span.end();
186+
// Finally re-throw the exception.
187+
throw e;
173188
}
174189
}
175190
);
@@ -289,3 +304,14 @@ class noopSpan implements Span {
289304
return this;
290305
}
291306
}
307+
308+
function patchSpanEndForDebugging(span: Span) {
309+
const origSpanEnd = span.end;
310+
const wrapSpanEnd = function (this: Span) {
311+
console.trace(`\x1b[35m${spanNameSuffix}.end()\x1b[00m`);
312+
return origSpanEnd.apply(this);
313+
};
314+
Object.defineProperty(span, 'end', {
315+
value: wrapSpanEnd,
316+
});
317+
}

src/transaction-runner.ts

Lines changed: 28 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ import {isSessionNotFoundError} from './session-pool';
2626
import {Database} from './database';
2727
import {google} from '../protos/protos';
2828
import IRequestOptions = google.spanner.v1.IRequestOptions;
29+
import {getActiveOrNoopSpan, setSpanErrorAndException} from './instrument';
2930

3031
// eslint-disable-next-line @typescript-eslint/no-var-requires
3132
const jsonProtos = require('../protos/protos.json');
@@ -224,6 +225,7 @@ export abstract class Runner<T> {
224225
async run(): Promise<T> {
225226
const start = Date.now();
226227
const timeout = this.options.timeout!;
228+
const span = getActiveOrNoopSpan();
227229

228230
let lastError: grpc.ServiceError;
229231

@@ -233,8 +235,18 @@ export abstract class Runner<T> {
233235
const transaction = await this.getTransaction();
234236

235237
try {
236-
return await this._run(transaction);
238+
const result = await this._run(transaction);
239+
if (this.attempts > 0) {
240+
// No add to annotate if the transaction wasn't retried.
241+
span.addEvent('Transaction Attempt Succeeded', {
242+
attempt: this.attempts + 1,
243+
});
244+
}
245+
return result;
237246
} catch (e) {
247+
span.addEvent('Transaction Attempt Failed', {
248+
attempt: this.attempts + 1,
249+
});
238250
this.session.lastError = e as grpc.ServiceError;
239251
lastError = e as grpc.ServiceError;
240252
}
@@ -243,19 +255,29 @@ export abstract class Runner<T> {
243255
// thrown here. We do this to bubble this error up to the caller who is
244256
// responsible for retrying the transaction on a different session.
245257
if (
246-
!RETRYABLE.includes(lastError.code!) &&
247-
!isRetryableInternalError(lastError)
258+
!RETRYABLE.includes(lastError!.code!) &&
259+
!isRetryableInternalError(lastError!)
248260
) {
249-
throw lastError;
261+
span.addEvent('Transaction Attempt Aborted', {
262+
attempt: this.attempts + 1,
263+
});
264+
setSpanErrorAndException(span, lastError!);
265+
throw lastError!;
250266
}
251267

252268
this.attempts += 1;
253269

254-
const delay = this.getNextDelay(lastError);
270+
const delay = this.getNextDelay(lastError!);
271+
span.addEvent('Backing off', {delay: delay, attempt: this.attempts});
255272
await new Promise(resolve => setTimeout(resolve, delay));
256273
}
257274

258-
throw new DeadlineError(lastError!);
275+
span.addEvent('Transaction Attempt Aborted due to Deadline Error', {
276+
total_attempts: this.attempts + 1,
277+
});
278+
const err = new DeadlineError(lastError!);
279+
setSpanErrorAndException(span, err);
280+
throw err;
259281
}
260282
}
261283

src/transaction.ts

Lines changed: 19 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -757,12 +757,6 @@ export class Snapshot extends EventEmitter {
757757
this.begin();
758758
}
759759
setSpanError(span, err);
760-
})
761-
.on('end', err => {
762-
if (err) {
763-
setSpanError(span, err);
764-
}
765-
span.end();
766760
});
767761

768762
if (resultStream instanceof Stream) {
@@ -1288,7 +1282,6 @@ export class Snapshot extends EventEmitter {
12881282
} catch (e) {
12891283
const errorStream = new PassThrough();
12901284
setSpanErrorAndException(span, e as Error);
1291-
span.end();
12921285
setImmediate(() => errorStream.destroy(e as Error));
12931286
return errorStream;
12941287
}
@@ -1332,12 +1325,6 @@ export class Snapshot extends EventEmitter {
13321325
) {
13331326
this.begin();
13341327
}
1335-
})
1336-
.on('end', err => {
1337-
if (err) {
1338-
setSpanError(span, err as Error);
1339-
}
1340-
span.end();
13411328
});
13421329

13431330
if (resultStream instanceof Stream) {
@@ -2112,15 +2099,29 @@ export class Transaction extends Dml {
21122099
} else if (!this._useInRunner) {
21132100
reqOpts.singleUseTransaction = this._options;
21142101
} else {
2115-
this.begin().then(() => {
2116-
this.commit(options, (err, resp) => {
2117-
if (err) {
2102+
this.begin()
2103+
.then(
2104+
() => {
2105+
this.commit(options, (err, resp) => {
2106+
if (err) {
2107+
setSpanError(span, err);
2108+
}
2109+
span.end();
2110+
callback(err, resp);
2111+
});
2112+
},
2113+
err => {
21182114
setSpanError(span, err);
2115+
callback(err);
2116+
span.end();
21192117
}
2118+
)
2119+
.catch(err => {
2120+
setSpanErrorAndException(span, err as Error);
21202121
span.end();
2121-
callback(err, resp);
2122+
// Re-throw the exception after recording it.
2123+
throw err;
21222124
});
2123-
}, callback);
21242125
return;
21252126
}
21262127

0 commit comments

Comments
 (0)