From eb84677d7aff5c96b2751e4da20b9c49cbfe0124 Mon Sep 17 00:00:00 2001 From: Petyo Ivanov Date: Mon, 10 Nov 2025 11:18:39 +0200 Subject: [PATCH 1/2] Make genai prices auto-update in the worker --- gateway/package.json | 2 +- gateway/src/index.ts | 2 ++ gateway/src/refreshGenaiPrices.ts | 60 +++++++++++++++++++++++++++++++ package-lock.json | 8 ++--- 4 files changed, 67 insertions(+), 5 deletions(-) create mode 100644 gateway/src/refreshGenaiPrices.ts diff --git a/gateway/package.json b/gateway/package.json index acab033..41af055 100644 --- a/gateway/package.json +++ b/gateway/package.json @@ -11,7 +11,7 @@ "@opentelemetry/api": "^1.9.0", "@opentelemetry/exporter-trace-otlp-http": "^0.203.0", "@opentelemetry/resources": "^2.0.1", - "@pydantic/genai-prices": "~0.0.36", + "@pydantic/genai-prices": "~0.0.38", "@pydantic/logfire-api": "^0.9.0", "eventsource-parser": "^3.0.6", "mime-types": "^3.0.1", diff --git a/gateway/src/index.ts b/gateway/src/index.ts index 003ded3..bfb2d79 100644 --- a/gateway/src/index.ts +++ b/gateway/src/index.ts @@ -19,6 +19,7 @@ import type { KeysDb, LimitDb } from './db' import { gateway } from './gateway' import type { DefaultProviderProxy, Middleware, Next } from './providers/default' import type { RateLimiter } from './rateLimiter' +import { refreshGenaiPrices } from './refreshGenaiPrices' import type { SubFetch } from './types' import { ctHeader, ResponseError, response405, textResponse } from './utils' @@ -48,6 +49,7 @@ export async function gatewayFetch( ctx: ExecutionContext, options: GatewayOptions, ): Promise { + ctx.waitUntil(refreshGenaiPrices()) let { pathname: proxyPath, search: queryString } = url if (options.proxyPrefixLength) { proxyPath = proxyPath.slice(options.proxyPrefixLength) diff --git a/gateway/src/refreshGenaiPrices.ts b/gateway/src/refreshGenaiPrices.ts new file mode 100644 index 0000000..939a516 --- /dev/null +++ b/gateway/src/refreshGenaiPrices.ts @@ -0,0 +1,60 @@ +import { type Provider, updatePrices, waitForUpdate } from '@pydantic/genai-prices' + +// data will be refetched every 30 minutes +const PRICE_TTL = 1000 * 60 * 30 +let genaiData: Provider[] | null = null +let genaiDataTimestamp: number | null = null +let isFetching = false + +export function refreshGenaiPrices() { + updatePrices(({ setProviderData, remoteDataUrl }) => { + if (genaiDataTimestamp !== null) { + console.debug('genai prices in-memory cache found') + + if (genaiData !== null) { + setProviderData(genaiData) + } + + if (Date.now() - genaiDataTimestamp < PRICE_TTL) { + // this will be the most frequent, cheap path + console.debug('genai prices in-memory data is fresh') + return + } else { + console.debug('genai prices in-memory cache is stale, attempting to fetch remote data') + } + } + + if (isFetching) { + console.debug('genai-prices data fetch already in progress, skipping') + return + } + + console.debug('Fetching genai-prices data') + isFetching = true + + // Note: **DO NOT** await this promise + const freshDataPromise = fetch(remoteDataUrl) + .then(async (response) => { + if (!response.ok) { + console.error('Failed fetching provider data, response status %d', response.status) + return null + } + + const freshData = (await response.json()) as Provider[] + console.debug('Updated genai prices data, %d providers', freshData.length) + genaiDataTimestamp = Date.now() + genaiData = freshData + return freshData + }) + .catch((error: unknown) => { + console.error('Failed fetching provider data err: %o', error) + return null + }) + .finally(() => { + isFetching = false + }) + + setProviderData(freshDataPromise) + }) + return waitForUpdate() +} diff --git a/package-lock.json b/package-lock.json index 4a3f9e9..52d426a 100644 --- a/package-lock.json +++ b/package-lock.json @@ -44,7 +44,7 @@ "@opentelemetry/api": "^1.9.0", "@opentelemetry/exporter-trace-otlp-http": "^0.203.0", "@opentelemetry/resources": "^2.0.1", - "@pydantic/genai-prices": "~0.0.36", + "@pydantic/genai-prices": "~0.0.38", "@pydantic/logfire-api": "^0.9.0", "eventsource-parser": "^3.0.6", "mime-types": "^3.0.1", @@ -3325,9 +3325,9 @@ "link": true }, "node_modules/@pydantic/genai-prices": { - "version": "0.0.36", - "resolved": "https://registry.npmjs.org/@pydantic/genai-prices/-/genai-prices-0.0.36.tgz", - "integrity": "sha512-+bTdZJqc909WOQRrcW8E9GOQCQzcTk/K9xlSa6MW9X7rCrVieffG+1uIsNQPnFELuod5ACmj7ZgngoU2k/om2w==", + "version": "0.0.38", + "resolved": "https://registry.npmjs.org/@pydantic/genai-prices/-/genai-prices-0.0.38.tgz", + "integrity": "sha512-Oncr9Co/Em5jgJ13V08WmhWb7Wg0xecLNt4WhJsx90peS0Jup5hoKIAtXrAOSEssOtsCcAIiH1fcG8sAEMip3Q==", "license": "MIT", "dependencies": { "yargs": "^17.7.2" From 277137747b34be34a9b8f750addb0b6e950669b0 Mon Sep 17 00:00:00 2001 From: Petyo Ivanov Date: Tue, 11 Nov 2025 11:52:56 +0200 Subject: [PATCH 2/2] Simplify in-memory storage, we don't need this --- gateway/src/refreshGenaiPrices.ts | 6 ------ 1 file changed, 6 deletions(-) diff --git a/gateway/src/refreshGenaiPrices.ts b/gateway/src/refreshGenaiPrices.ts index 939a516..388a303 100644 --- a/gateway/src/refreshGenaiPrices.ts +++ b/gateway/src/refreshGenaiPrices.ts @@ -2,7 +2,6 @@ import { type Provider, updatePrices, waitForUpdate } from '@pydantic/genai-pric // data will be refetched every 30 minutes const PRICE_TTL = 1000 * 60 * 30 -let genaiData: Provider[] | null = null let genaiDataTimestamp: number | null = null let isFetching = false @@ -11,10 +10,6 @@ export function refreshGenaiPrices() { if (genaiDataTimestamp !== null) { console.debug('genai prices in-memory cache found') - if (genaiData !== null) { - setProviderData(genaiData) - } - if (Date.now() - genaiDataTimestamp < PRICE_TTL) { // this will be the most frequent, cheap path console.debug('genai prices in-memory data is fresh') @@ -43,7 +38,6 @@ export function refreshGenaiPrices() { const freshData = (await response.json()) as Provider[] console.debug('Updated genai prices data, %d providers', freshData.length) genaiDataTimestamp = Date.now() - genaiData = freshData return freshData }) .catch((error: unknown) => {