Skip to content

Commit 610d1b9

Browse files
lszinvravjotbrarjeremyprimesurbhigarg92
authored
Feat: Add Custom OpenTelemetry Exporter in for Service Metrics (#2272)
* chore: Add Custom OpenTelemetry Exporter in for Service Metrics * fix: lint errors * chore: migrate metrics service API from googleapis to @google-cloud/monitoring * fix: correct gfe latencies metric name * chore: add batch unit test and update gauge double handling --------- Co-authored-by: Ravjot Brar <83892020+ravjotbrar@users.noreply.github.com> Co-authored-by: Jeremy Parr-Pearson <94406158+jeremyprime@users.noreply.github.com> Co-authored-by: surbhigarg92 <surbhigarg.92@gmail.com>
1 parent 3c3db13 commit 610d1b9

File tree

8 files changed

+1269
-0
lines changed

8 files changed

+1269
-0
lines changed

package.json

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,13 +55,17 @@
5555
},
5656
"dependencies": {
5757
"@google-cloud/common": "^6.0.0",
58+
"@google-cloud/monitoring": "^5.0.0",
59+
"@google-cloud/opentelemetry-resource-util": "^2.4.0",
5860
"@google-cloud/precise-date": "^5.0.0",
5961
"@google-cloud/projectify": "^5.0.0",
6062
"@google-cloud/promisify": "^5.0.0",
63+
"@grpc/grpc-js": "^1.13.2",
6164
"@grpc/proto-loader": "^0.7.13",
6265
"@opentelemetry/api": "^1.9.0",
6366
"@opentelemetry/context-async-hooks": "^2.0.0",
6467
"@opentelemetry/core": "^2.0.0",
68+
"@opentelemetry/sdk-metrics": "^1.30.1",
6569
"@opentelemetry/semantic-conventions": "^1.30.0",
6670
"@types/big.js": "^6.2.2",
6771
"@types/stack-trace": "^0.0.33",

src/metrics/README.md

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
# Custom Metric Exporter
2+
The custom metric exporter, as defined in [spanner-metrics-exporter.ts](./spanner-metrics-exporter.ts), is designed to work in conjunction with OpenTelemetry and the Spanner client. It converts data into its protobuf equivalent and sends it to Google Cloud Monitoring.
3+
4+
## Filtering Criteria
5+
The exporter filters metrics based on the following conditions, utilizing values defined in [constants.ts](./constants.ts):
6+
7+
* Metrics with a scope set to `spanner-nodejs`.
8+
* Metrics with one of the following predefined names:
9+
* `attempt_latencies`
10+
* `attempt_count`
11+
* `operation_latencies`
12+
* `operation_count`
13+
* `gfe_latencies`
14+
* `gfe_connectivity_error_count`
15+
16+
## Service Endpoint
17+
The exporter sends metrics to the Google Cloud Monitoring [service endpoint](https://cloud.google.com/python/docs/reference/monitoring/latest/google.cloud.monitoring_v3.services.metric_service.MetricServiceClient#google_cloud_monitoring_v3_services_metric_service_MetricServiceClient_create_service_time_series), distinct from the regular client endpoint. This service endpoint operates under a different quota limit than the user endpoint and features an additional server-side filter that only permits a predefined set of metrics to pass through.
18+
19+
When introducing new service metrics, it is essential to ensure they are allowed through by the server-side filter as well.

src/metrics/constants.ts

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
// Copyright 2025 Google LLC
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
export const SPANNER_METER_NAME = 'spanner-nodejs';
16+
export const CLIENT_METRICS_PREFIX = 'spanner.googleapis.com/internal/client';
17+
export const SPANNER_RESOURCE_TYPE = 'spanner_instance_client';
18+
19+
// Monitored resource labels
20+
export const MONITORED_RES_LABEL_KEY_PROJECT = 'project_id';
21+
export const MONITORED_RES_LABEL_KEY_INSTANCE = 'instance_id';
22+
export const MONITORED_RES_LABEL_KEY_INSTANCE_CONFIG = 'instance_config';
23+
export const MONITORED_RES_LABEL_KEY_LOCATION = 'location';
24+
export const MONITORED_RES_LABEL_KEY_CLIENT_HASH = 'client_hash';
25+
export const MONITORED_RESOURCE_LABELS = new Set([
26+
MONITORED_RES_LABEL_KEY_PROJECT,
27+
MONITORED_RES_LABEL_KEY_INSTANCE,
28+
MONITORED_RES_LABEL_KEY_INSTANCE_CONFIG,
29+
MONITORED_RES_LABEL_KEY_LOCATION,
30+
MONITORED_RES_LABEL_KEY_CLIENT_HASH,
31+
]);
32+
33+
// Metric labels
34+
export const METRIC_LABEL_KEY_CLIENT_UID = 'client_uid';
35+
export const METRIC_LABEL_KEY_CLIENT_NAME = 'client_name';
36+
export const METRIC_LABEL_KEY_DATABASE = 'database';
37+
export const METRIC_LABEL_KEY_METHOD = 'method';
38+
export const METRIC_LABEL_KEY_STATUS = 'status';
39+
export const METRIC_LABELS = new Set([
40+
METRIC_LABEL_KEY_CLIENT_UID,
41+
METRIC_LABEL_KEY_CLIENT_NAME,
42+
METRIC_LABEL_KEY_DATABASE,
43+
METRIC_LABEL_KEY_METHOD,
44+
METRIC_LABEL_KEY_STATUS,
45+
]);
46+
47+
// Metric names
48+
export const METRIC_NAME_OPERATION_LATENCIES = 'operation_latencies';
49+
export const METRIC_NAME_ATTEMPT_LATENCIES = 'attempt_latencies';
50+
export const METRIC_NAME_OPERATION_COUNT = 'operation_count';
51+
export const METRIC_NAME_ATTEMPT_COUNT = 'attempt_count';
52+
export const METRIC_NAME_GFE_LATENCIES = 'gfe_latencies';
53+
export const METRIC_NAME_GFE_CONNECTIVITY_ERROR_COUNT =
54+
'gfe_connectivity_error_count';
55+
export const METRIC_NAMES = new Set([
56+
METRIC_NAME_OPERATION_LATENCIES,
57+
METRIC_NAME_ATTEMPT_LATENCIES,
58+
METRIC_NAME_GFE_LATENCIES,
59+
METRIC_NAME_OPERATION_COUNT,
60+
METRIC_NAME_ATTEMPT_COUNT,
61+
METRIC_NAME_GFE_CONNECTIVITY_ERROR_COUNT,
62+
]);

src/metrics/external-types.ts

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
// Copyright 2025 Google LLC
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
import {GoogleAuth} from 'google-auth-library';
16+
17+
export interface ExporterOptions {
18+
/**
19+
* Optional authentication options for Google services.
20+
*/
21+
auth: GoogleAuth;
22+
}
23+
24+
export enum MetricKind {
25+
UNSPECIFIED = 'METRIC_KIND_UNSPECIFIED',
26+
GAUGE = 'GAUGE',
27+
DELTA = 'DELTA',
28+
CUMULATIVE = 'CUMULATIVE',
29+
}
30+
31+
/** The value type of a metric. */
32+
export enum ValueType {
33+
VALUE_TYPE_UNSPECIFIED = 'VALUE_TYPE_UNSPECIFIED',
34+
INT64 = 'INT64',
35+
DOUBLE = 'DOUBLE',
36+
DISTRIBUTION = 'DISTRIBUTION',
37+
}
Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
// Copyright 2025 Google LLC
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
import {PushMetricExporter, ResourceMetrics} from '@opentelemetry/sdk-metrics';
16+
import {ExportResult, ExportResultCode} from '@opentelemetry/core';
17+
import {ExporterOptions} from './external-types';
18+
import {MetricServiceClient} from '@google-cloud/monitoring';
19+
import {transformResourceMetricToTimeSeriesArray} from './transform';
20+
import {status} from '@grpc/grpc-js';
21+
22+
// Stackdriver Monitoring v3 only accepts up to 200 TimeSeries per
23+
// CreateTimeSeries call.
24+
export const MAX_BATCH_EXPORT_SIZE = 200;
25+
26+
/**
27+
* Format and sends metrics information to Google Cloud Monitoring.
28+
*/
29+
export class CloudMonitoringMetricsExporter implements PushMetricExporter {
30+
private _projectId: string | void | Promise<string | void>;
31+
32+
private readonly _client: MetricServiceClient;
33+
34+
constructor({auth}: ExporterOptions) {
35+
this._client = new MetricServiceClient({auth: auth});
36+
37+
// Start this async process as early as possible. It will be
38+
// awaited on the first export because constructors are synchronous
39+
this._projectId = auth.getProjectId().catch(err => {
40+
console.error(err);
41+
});
42+
}
43+
44+
/**
45+
* Implementation for {@link PushMetricExporter.export}.
46+
* Calls the async wrapper method {@link _exportAsync} and
47+
* assures no rejected promises bubble up to the caller.
48+
*
49+
* @param metrics Metrics to be sent to the Google Cloud Monitoring backend
50+
* @param resultCallback result callback to be called on finish
51+
*/
52+
export(
53+
metrics: ResourceMetrics,
54+
resultCallback: (result: ExportResult) => void,
55+
): void {
56+
this._exportAsync(metrics).then(resultCallback, err => {
57+
console.error(err.message);
58+
resultCallback({code: ExportResultCode.FAILED, error: err});
59+
});
60+
}
61+
62+
async shutdown(): Promise<void> {}
63+
async forceFlush(): Promise<void> {}
64+
65+
/**
66+
* Asnyc wrapper for the {@link export} implementation.
67+
* Writes the current values of all exported {@link MetricRecord}s
68+
* to the Google Cloud Monitoring backend.
69+
*
70+
* @param resourceMetrics Metrics to be sent to the Google Cloud Monitoring backend
71+
*/
72+
private async _exportAsync(
73+
resourceMetrics: ResourceMetrics,
74+
): Promise<ExportResult> {
75+
if (this._projectId instanceof Promise) {
76+
this._projectId = await this._projectId;
77+
}
78+
79+
if (!this._projectId) {
80+
const error = new Error('expecting a non-blank ProjectID');
81+
console.error(error.message);
82+
return {code: ExportResultCode.FAILED, error};
83+
}
84+
85+
const timeSeriesList =
86+
transformResourceMetricToTimeSeriesArray(resourceMetrics);
87+
88+
let failure: {sendFailed: false} | {sendFailed: true; error: Error} = {
89+
sendFailed: false,
90+
};
91+
await Promise.all(
92+
this._partitionList(timeSeriesList, MAX_BATCH_EXPORT_SIZE).map(
93+
async batchedTimeSeries => this._sendTimeSeries(batchedTimeSeries),
94+
),
95+
).catch(e => {
96+
const error = e as {code: number};
97+
if (error.code === status.PERMISSION_DENIED) {
98+
console.warn(
99+
`Need monitoring metric writer permission on project ${this._projectId}. Follow https://cloud.google.com/spanner/docs/view-manage-client-side-metrics#access-client-side-metrics to set up permissions`,
100+
);
101+
}
102+
const err = asError(e);
103+
err.message = `Send TimeSeries failed: ${err.message}`;
104+
failure = {sendFailed: true, error: err};
105+
console.error(`ERROR: ${err.message}`);
106+
});
107+
108+
return failure.sendFailed
109+
? {
110+
code: ExportResultCode.FAILED,
111+
error: (failure as {sendFailed: boolean; error: Error}).error,
112+
}
113+
: {code: ExportResultCode.SUCCESS};
114+
}
115+
116+
private async _sendTimeSeries(timeSeries) {
117+
if (timeSeries.length === 0) {
118+
return Promise.resolve();
119+
}
120+
121+
// TODO: Use createServiceTimeSeries when it is available
122+
await this._client.createTimeSeries({
123+
name: `projects/${this._projectId}`,
124+
timeSeries: timeSeries,
125+
});
126+
}
127+
128+
/** Returns the minimum number of arrays of max size chunkSize, partitioned from the given array. */
129+
private _partitionList(list, chunkSize: number) {
130+
return Array.from({length: Math.ceil(list.length / chunkSize)}, (_, i) =>
131+
list.slice(i * chunkSize, (i + 1) * chunkSize),
132+
);
133+
}
134+
}
135+
136+
function asError(error: unknown): Error {
137+
return error instanceof Error ? error : new Error(String(error));
138+
}

0 commit comments

Comments
 (0)