Skip to content

Commit 718a3ea

Browse files
authored
feat(core): Truncate request messages in AI integrations (#17921)
Fixes: #17809 Implements message truncation logic that drops oldest messages first until the payload fits within the 20KB limit. If a single message exceeds the limit, its content is truncated from the end. Supports OpenAI/Anthropic ({ role, content }) and Google GenAI ({ role, parts: [{ text }] }) message formats.
1 parent da8397b commit 718a3ea

File tree

13 files changed

+691
-31
lines changed

13 files changed

+691
-31
lines changed
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
import { instrumentAnthropicAiClient } from '@sentry/core';
2+
import * as Sentry from '@sentry/node';
3+
4+
class MockAnthropic {
5+
constructor(config) {
6+
this.apiKey = config.apiKey;
7+
this.baseURL = config.baseURL;
8+
9+
// Create messages object with create method
10+
this.messages = {
11+
create: this._messagesCreate.bind(this),
12+
};
13+
}
14+
15+
/**
16+
* Create a mock message
17+
*/
18+
async _messagesCreate(params) {
19+
// Simulate processing time
20+
await new Promise(resolve => setTimeout(resolve, 10));
21+
22+
return {
23+
id: 'msg-truncation-test',
24+
type: 'message',
25+
role: 'assistant',
26+
content: [
27+
{
28+
type: 'text',
29+
text: 'Response to truncated messages',
30+
},
31+
],
32+
model: params.model,
33+
stop_reason: 'end_turn',
34+
stop_sequence: null,
35+
usage: {
36+
input_tokens: 10,
37+
output_tokens: 15,
38+
},
39+
};
40+
}
41+
}
42+
43+
async function run() {
44+
await Sentry.startSpan({ op: 'function', name: 'main' }, async () => {
45+
const mockClient = new MockAnthropic({
46+
apiKey: 'mock-api-key',
47+
});
48+
49+
const client = instrumentAnthropicAiClient(mockClient);
50+
51+
// Create 3 large messages where:
52+
// - First 2 messages are very large (will be dropped)
53+
// - Last message is large but will be truncated to fit within the 20KB limit
54+
const largeContent1 = 'A'.repeat(15000); // ~15KB
55+
const largeContent2 = 'B'.repeat(15000); // ~15KB
56+
const largeContent3 = 'C'.repeat(25000); // ~25KB (will be truncated)
57+
58+
await client.messages.create({
59+
model: 'claude-3-haiku-20240307',
60+
max_tokens: 100,
61+
messages: [
62+
{ role: 'user', content: largeContent1 },
63+
{ role: 'assistant', content: largeContent2 },
64+
{ role: 'user', content: largeContent3 },
65+
],
66+
temperature: 0.7,
67+
});
68+
});
69+
}
70+
71+
run();

dev-packages/node-integration-tests/suites/tracing/anthropic/test.ts

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -497,4 +497,40 @@ describe('Anthropic integration', () => {
497497
await createRunner().ignore('event').expect({ transaction: EXPECTED_ERROR_SPANS }).start().completed();
498498
});
499499
});
500+
501+
createEsmAndCjsTests(
502+
__dirname,
503+
'scenario-message-truncation.mjs',
504+
'instrument-with-pii.mjs',
505+
(createRunner, test) => {
506+
test('truncates messages when they exceed byte limit - keeps only last message and crops it', async () => {
507+
await createRunner()
508+
.ignore('event')
509+
.expect({
510+
transaction: {
511+
transaction: 'main',
512+
spans: expect.arrayContaining([
513+
expect.objectContaining({
514+
data: expect.objectContaining({
515+
'gen_ai.operation.name': 'messages',
516+
'sentry.op': 'gen_ai.messages',
517+
'sentry.origin': 'auto.ai.anthropic',
518+
'gen_ai.system': 'anthropic',
519+
'gen_ai.request.model': 'claude-3-haiku-20240307',
520+
// Messages should be present (truncation happened) and should be a JSON array
521+
'gen_ai.request.messages': expect.stringMatching(/^\[\{"role":"user","content":"C+"\}\]$/),
522+
}),
523+
description: 'messages claude-3-haiku-20240307',
524+
op: 'gen_ai.messages',
525+
origin: 'auto.ai.anthropic',
526+
status: 'ok',
527+
}),
528+
]),
529+
},
530+
})
531+
.start()
532+
.completed();
533+
});
534+
},
535+
);
500536
});
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
import { instrumentGoogleGenAIClient } from '@sentry/core';
2+
import * as Sentry from '@sentry/node';
3+
4+
class MockGoogleGenerativeAI {
5+
constructor(config) {
6+
this.apiKey = config.apiKey;
7+
8+
this.models = {
9+
generateContent: this._generateContent.bind(this),
10+
};
11+
}
12+
13+
async _generateContent() {
14+
await new Promise(resolve => setTimeout(resolve, 10));
15+
16+
return {
17+
response: {
18+
text: () => 'Response to truncated messages',
19+
usageMetadata: {
20+
promptTokenCount: 10,
21+
candidatesTokenCount: 15,
22+
totalTokenCount: 25,
23+
},
24+
candidates: [
25+
{
26+
content: {
27+
parts: [{ text: 'Response to truncated messages' }],
28+
role: 'model',
29+
},
30+
finishReason: 'STOP',
31+
},
32+
],
33+
},
34+
};
35+
}
36+
}
37+
38+
async function run() {
39+
await Sentry.startSpan({ op: 'function', name: 'main' }, async () => {
40+
const mockClient = new MockGoogleGenerativeAI({
41+
apiKey: 'mock-api-key',
42+
});
43+
44+
const client = instrumentGoogleGenAIClient(mockClient);
45+
46+
// Create 3 large messages where:
47+
// - First 2 messages are very large (will be dropped)
48+
// - Last message is large but will be truncated to fit within the 20KB limit
49+
const largeContent1 = 'A'.repeat(15000); // ~15KB
50+
const largeContent2 = 'B'.repeat(15000); // ~15KB
51+
const largeContent3 = 'C'.repeat(25000); // ~25KB (will be truncated)
52+
53+
await client.models.generateContent({
54+
model: 'gemini-1.5-flash',
55+
config: {
56+
temperature: 0.7,
57+
topP: 0.9,
58+
maxOutputTokens: 100,
59+
},
60+
contents: [
61+
{ role: 'user', parts: [{ text: largeContent1 }] },
62+
{ role: 'model', parts: [{ text: largeContent2 }] },
63+
{ role: 'user', parts: [{ text: largeContent3 }] },
64+
],
65+
});
66+
});
67+
}
68+
69+
run();

dev-packages/node-integration-tests/suites/tracing/google-genai/test.ts

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -486,4 +486,42 @@ describe('Google GenAI integration', () => {
486486
.completed();
487487
});
488488
});
489+
490+
createEsmAndCjsTests(
491+
__dirname,
492+
'scenario-message-truncation.mjs',
493+
'instrument-with-pii.mjs',
494+
(createRunner, test) => {
495+
test('truncates messages when they exceed byte limit - keeps only last message and crops it', async () => {
496+
await createRunner()
497+
.ignore('event')
498+
.expect({
499+
transaction: {
500+
transaction: 'main',
501+
spans: expect.arrayContaining([
502+
expect.objectContaining({
503+
data: expect.objectContaining({
504+
'gen_ai.operation.name': 'models',
505+
'sentry.op': 'gen_ai.models',
506+
'sentry.origin': 'auto.ai.google_genai',
507+
'gen_ai.system': 'google_genai',
508+
'gen_ai.request.model': 'gemini-1.5-flash',
509+
// Messages should be present (truncation happened) and should be a JSON array with parts
510+
'gen_ai.request.messages': expect.stringMatching(
511+
/^\[\{"role":"user","parts":\[\{"text":"C+"\}\]\}\]$/,
512+
),
513+
}),
514+
description: 'models gemini-1.5-flash',
515+
op: 'gen_ai.models',
516+
origin: 'auto.ai.google_genai',
517+
status: 'ok',
518+
}),
519+
]),
520+
},
521+
})
522+
.start()
523+
.completed();
524+
});
525+
},
526+
);
489527
});
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
import { instrumentOpenAiClient } from '@sentry/core';
2+
import * as Sentry from '@sentry/node';
3+
4+
class MockOpenAI {
5+
constructor(config) {
6+
this.apiKey = config.apiKey;
7+
8+
this.chat = {
9+
completions: {
10+
create: async params => {
11+
// Simulate processing time
12+
await new Promise(resolve => setTimeout(resolve, 10));
13+
14+
return {
15+
id: 'chatcmpl-truncation-test',
16+
object: 'chat.completion',
17+
created: 1677652288,
18+
model: params.model,
19+
system_fingerprint: 'fp_44709d6fcb',
20+
choices: [
21+
{
22+
index: 0,
23+
message: {
24+
role: 'assistant',
25+
content: 'Response to truncated messages',
26+
},
27+
finish_reason: 'stop',
28+
},
29+
],
30+
usage: {
31+
prompt_tokens: 10,
32+
completion_tokens: 15,
33+
total_tokens: 25,
34+
},
35+
};
36+
},
37+
},
38+
};
39+
}
40+
}
41+
42+
async function run() {
43+
await Sentry.startSpan({ op: 'function', name: 'main' }, async () => {
44+
const mockClient = new MockOpenAI({
45+
apiKey: 'mock-api-key',
46+
});
47+
48+
const client = instrumentOpenAiClient(mockClient);
49+
50+
// Create 3 large messages where:
51+
// - First 2 messages are very large (will be dropped)
52+
// - Last message is large but will be truncated to fit within the 20KB limit
53+
const largeContent1 = 'A'.repeat(15000); // ~15KB
54+
const largeContent2 = 'B'.repeat(15000); // ~15KB
55+
const largeContent3 = 'C'.repeat(25000); // ~25KB (will be truncated)
56+
57+
await client.chat.completions.create({
58+
model: 'gpt-3.5-turbo',
59+
messages: [
60+
{ role: 'system', content: largeContent1 },
61+
{ role: 'user', content: largeContent2 },
62+
{ role: 'user', content: largeContent3 },
63+
],
64+
temperature: 0.7,
65+
});
66+
});
67+
}
68+
69+
run();

dev-packages/node-integration-tests/suites/tracing/openai/test.ts

Lines changed: 38 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,7 @@ describe('OpenAI integration', () => {
187187
'sentry.origin': 'auto.ai.openai',
188188
'gen_ai.system': 'openai',
189189
'gen_ai.request.model': 'gpt-3.5-turbo',
190-
'gen_ai.request.messages': '"Translate this to French: Hello"',
190+
'gen_ai.request.messages': 'Translate this to French: Hello',
191191
'gen_ai.response.text': 'Response to: Translate this to French: Hello',
192192
'gen_ai.response.finish_reasons': '["completed"]',
193193
'gen_ai.response.model': 'gpt-3.5-turbo',
@@ -261,7 +261,7 @@ describe('OpenAI integration', () => {
261261
'gen_ai.system': 'openai',
262262
'gen_ai.request.model': 'gpt-4',
263263
'gen_ai.request.stream': true,
264-
'gen_ai.request.messages': '"Test streaming responses API"',
264+
'gen_ai.request.messages': 'Test streaming responses API',
265265
'gen_ai.response.text': 'Streaming response to: Test streaming responses APITest streaming responses API',
266266
'gen_ai.response.finish_reasons': '["in_progress","completed"]',
267267
'gen_ai.response.id': 'resp_stream_456',
@@ -397,4 +397,40 @@ describe('OpenAI integration', () => {
397397
.completed();
398398
});
399399
});
400+
401+
createEsmAndCjsTests(
402+
__dirname,
403+
'scenario-message-truncation.mjs',
404+
'instrument-with-pii.mjs',
405+
(createRunner, test) => {
406+
test('truncates messages when they exceed byte limit - keeps only last message and crops it', async () => {
407+
await createRunner()
408+
.ignore('event')
409+
.expect({
410+
transaction: {
411+
transaction: 'main',
412+
spans: expect.arrayContaining([
413+
expect.objectContaining({
414+
data: expect.objectContaining({
415+
'gen_ai.operation.name': 'chat',
416+
'sentry.op': 'gen_ai.chat',
417+
'sentry.origin': 'auto.ai.openai',
418+
'gen_ai.system': 'openai',
419+
'gen_ai.request.model': 'gpt-3.5-turbo',
420+
// Messages should be present (truncation happened) and should be a JSON array of a single index
421+
'gen_ai.request.messages': expect.stringMatching(/^\[\{"role":"user","content":"C+"\}\]$/),
422+
}),
423+
description: 'chat gpt-3.5-turbo',
424+
op: 'gen_ai.chat',
425+
origin: 'auto.ai.openai',
426+
status: 'ok',
427+
}),
428+
]),
429+
},
430+
})
431+
.start()
432+
.completed();
433+
});
434+
},
435+
);
400436
});

0 commit comments

Comments
 (0)