Skip to content

Commit f5fc6bf

Browse files
authored
test(ai): Remove flaky token count integration expect directives (#9401)
We had some "ballpark" checks for the number of tokens returned in responses. However, the number of tokens returned had a high variance based on general AI chaos and the model that was used. This PR replaces somewhat exacting token count checks with simple non-zero checks in order to reduce flake. Additionally, the change removes `toolUsePromptTokenCount` checks for the `gemini-2.0-flash` model, which doesn't support this field.
1 parent 9101b46 commit f5fc6bf

File tree

2 files changed

+29
-88
lines changed

2 files changed

+29
-88
lines changed

packages/ai/integration/chat.test.ts

Lines changed: 13 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ import {
2424
SafetySetting,
2525
getGenerativeModel
2626
} from '../src';
27-
import { testConfigs, TOKEN_COUNT_DELTA } from './constants';
27+
import { testConfigs } from './constants';
2828

2929
describe('Chat Session', () => {
3030
testConfigs.forEach(testConfig => {
@@ -98,62 +98,21 @@ describe('Chat Session', () => {
9898

9999
if (model.model.includes('gemini-2.5-flash')) {
100100
// Token counts can vary slightly in chat context
101-
expect(response1.usageMetadata!.promptTokenCount).to.be.closeTo(
102-
17, // "What is the capital of France?" + system instruction
103-
TOKEN_COUNT_DELTA + 2 // More variance for chat context
104-
);
105-
expect(response1.usageMetadata!.candidatesTokenCount).to.be.closeTo(
106-
8, // "Paris"
107-
TOKEN_COUNT_DELTA
108-
);
109-
expect(response1.usageMetadata!.totalTokenCount).to.be.closeTo(
110-
49, // "What is the capital of France?" + system instruction + "Paris"
111-
TOKEN_COUNT_DELTA + 3 // More variance for chat context
112-
);
113-
expect(response1.usageMetadata!.totalTokenCount).to.be.closeTo(
114-
49, // "What is the capital of France?" + system instruction + "Paris"
115-
TOKEN_COUNT_DELTA + 3 // More variance for chat context
116-
);
117-
118-
expect(response2.usageMetadata!.promptTokenCount).to.be.closeTo(
119-
32, // History + "And what about Italy?" + system instruction
120-
TOKEN_COUNT_DELTA + 5 // More variance for chat context with history
121-
);
122-
expect(response2.usageMetadata!.candidatesTokenCount).to.be.closeTo(
123-
8,
124-
TOKEN_COUNT_DELTA
125-
);
126-
expect(response2.usageMetadata!.totalTokenCount).to.be.closeTo(
127-
68,
128-
TOKEN_COUNT_DELTA + 2
129-
);
101+
expect(response1.usageMetadata!.promptTokenCount).to.not.equal(0);
102+
expect(response1.usageMetadata!.candidatesTokenCount).to.not.equal(0);
103+
expect(response1.usageMetadata!.totalTokenCount).to.not.equal(0);
104+
expect(response2.usageMetadata!.promptTokenCount).to.not.equal(0);
105+
expect(response2.usageMetadata!.candidatesTokenCount).to.not.equal(0);
106+
expect(response2.usageMetadata!.totalTokenCount).to.not.equal(0);
130107
} else if (model.model.includes('gemini-2.0-flash')) {
131108
expect(response1.usageMetadata).to.not.be.null;
132109
// Token counts can vary slightly in chat context
133-
expect(response1.usageMetadata!.promptTokenCount).to.be.closeTo(
134-
15, // "What is the capital of France?" + system instruction
135-
TOKEN_COUNT_DELTA + 2 // More variance for chat context
136-
);
137-
expect(response1.usageMetadata!.candidatesTokenCount).to.be.closeTo(
138-
8, // "Paris"
139-
TOKEN_COUNT_DELTA
140-
);
141-
expect(response1.usageMetadata!.totalTokenCount).to.be.closeTo(
142-
23, // "What is the capital of France?" + system instruction + "Paris"
143-
TOKEN_COUNT_DELTA + 3 // More variance for chat context
144-
);
145-
expect(response2.usageMetadata!.promptTokenCount).to.be.closeTo(
146-
28, // History + "And what about Italy?" + system instruction
147-
TOKEN_COUNT_DELTA + 5 // More variance for chat context with history
148-
);
149-
expect(response2.usageMetadata!.candidatesTokenCount).to.be.closeTo(
150-
8,
151-
TOKEN_COUNT_DELTA
152-
);
153-
expect(response2.usageMetadata!.totalTokenCount).to.be.closeTo(
154-
36,
155-
TOKEN_COUNT_DELTA
156-
);
110+
expect(response1.usageMetadata!.promptTokenCount).to.not.equal(0);
111+
expect(response1.usageMetadata!.candidatesTokenCount).to.not.equal(0);
112+
expect(response1.usageMetadata!.totalTokenCount).to.not.equal(0);
113+
expect(response2.usageMetadata!.promptTokenCount).to.not.equal(0);
114+
expect(response2.usageMetadata!.candidatesTokenCount).to.not.equal(0);
115+
expect(response2.usageMetadata!.totalTokenCount).to.not.equal(0);
157116
}
158117
});
159118
});

packages/ai/integration/generate-content.test.ts

Lines changed: 16 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ import {
2929
URLRetrievalStatus,
3030
getGenerativeModel
3131
} from '../src';
32-
import { testConfigs, TOKEN_COUNT_DELTA } from './constants';
32+
import { testConfigs } from './constants';
3333

3434
describe('Generate Content', function () {
3535
this.timeout(20_000);
@@ -88,22 +88,10 @@ describe('Generate Content', function () {
8888
expect(response.usageMetadata).to.not.be.null;
8989

9090
if (model.model.includes('gemini-2.5-flash')) {
91-
expect(response.usageMetadata!.promptTokenCount).to.be.closeTo(
92-
22,
93-
TOKEN_COUNT_DELTA
94-
);
95-
expect(response.usageMetadata!.candidatesTokenCount).to.be.closeTo(
96-
2,
97-
TOKEN_COUNT_DELTA
98-
);
99-
expect(response.usageMetadata!.thoughtsTokenCount).to.be.closeTo(
100-
30,
101-
TOKEN_COUNT_DELTA * 2
102-
);
103-
expect(response.usageMetadata!.totalTokenCount).to.be.closeTo(
104-
55,
105-
TOKEN_COUNT_DELTA * 2
106-
);
91+
expect(response.usageMetadata!.promptTokenCount).to.not.equal(0);
92+
expect(response.usageMetadata!.candidatesTokenCount).to.not.equal(0);
93+
expect(response.usageMetadata!.thoughtsTokenCount).to.not.equal(0);
94+
expect(response.usageMetadata!.totalTokenCount).to.not.equal(0);
10795
expect(response.usageMetadata!.promptTokensDetails).to.not.be.null;
10896
expect(response.usageMetadata!.promptTokensDetails!.length).to.equal(
10997
1
@@ -113,22 +101,13 @@ describe('Generate Content', function () {
113101
).to.equal(Modality.TEXT);
114102
expect(
115103
response.usageMetadata!.promptTokensDetails![0].tokenCount
116-
).to.closeTo(22, TOKEN_COUNT_DELTA);
104+
).to.not.equal(0);
117105

118106
// candidatesTokenDetails comes back about half the time, so let's just not test it.
119107
} else if (model.model.includes('gemini-2.0-flash')) {
120-
expect(response.usageMetadata!.promptTokenCount).to.be.closeTo(
121-
21,
122-
TOKEN_COUNT_DELTA
123-
);
124-
expect(response.usageMetadata!.candidatesTokenCount).to.be.closeTo(
125-
4,
126-
TOKEN_COUNT_DELTA
127-
);
128-
expect(response.usageMetadata!.totalTokenCount).to.be.closeTo(
129-
25,
130-
TOKEN_COUNT_DELTA * 2
131-
);
108+
expect(response.usageMetadata!.promptTokenCount).to.not.equal(0);
109+
expect(response.usageMetadata!.candidatesTokenCount).to.not.equal(0);
110+
expect(response.usageMetadata!.totalTokenCount).to.not.equal(0);
132111
expect(response.usageMetadata!.promptTokensDetails).to.not.be.null;
133112
expect(response.usageMetadata!.promptTokensDetails!.length).to.equal(
134113
1
@@ -149,7 +128,7 @@ describe('Generate Content', function () {
149128
).to.equal(Modality.TEXT);
150129
expect(
151130
response.usageMetadata!.candidatesTokensDetails![0].tokenCount
152-
).to.be.closeTo(4, TOKEN_COUNT_DELTA);
131+
).to.not.equal(0);
153132
}
154133
});
155134

@@ -230,8 +209,11 @@ describe('Generate Content', function () {
230209

231210
const usageMetadata = response.usageMetadata;
232211
expect(usageMetadata).to.exist;
233-
expect(usageMetadata?.toolUsePromptTokenCount).to.exist;
234-
expect(usageMetadata?.toolUsePromptTokenCount).to.be.greaterThan(0);
212+
// usageMetaData.toolUsePromptTokenCount does not exist in Gemini 2.0 flash responses.
213+
if (!model.model.includes('gemini-2.0-flash')) {
214+
expect(usageMetadata?.toolUsePromptTokenCount).to.exist;
215+
expect(usageMetadata?.toolUsePromptTokenCount).to.be.greaterThan(0);
216+
}
235217
});
236218

237219
it('generateContent: url context and google search grounding', async () => {
@@ -288,7 +270,7 @@ describe('Generate Content', function () {
288270
});
289271

290272
const result = await model.generateContent(
291-
'Recommend 3 books for beginners to read to learn more about the latest advancements in Quantum Computing.'
273+
'Recommend 3 books for beginners to read to learn more about the latest advancements in Quantum Computing'
292274
);
293275
const response = result.response;
294276
const urlContextMetadata =

0 commit comments

Comments
 (0)