diff --git a/gateway/package.json b/gateway/package.json index acab033..41af055 100644 --- a/gateway/package.json +++ b/gateway/package.json @@ -11,7 +11,7 @@ "@opentelemetry/api": "^1.9.0", "@opentelemetry/exporter-trace-otlp-http": "^0.203.0", "@opentelemetry/resources": "^2.0.1", - "@pydantic/genai-prices": "~0.0.36", + "@pydantic/genai-prices": "~0.0.38", "@pydantic/logfire-api": "^0.9.0", "eventsource-parser": "^3.0.6", "mime-types": "^3.0.1", diff --git a/gateway/src/index.ts b/gateway/src/index.ts index 204f26a..d677a71 100644 --- a/gateway/src/index.ts +++ b/gateway/src/index.ts @@ -19,6 +19,7 @@ import type { KeysDb, LimitDb } from './db' import { gateway } from './gateway' import type { DefaultProviderProxy, Middleware, Next } from './providers/default' import type { RateLimiter } from './rateLimiter' +import { refreshGenaiPrices } from './refreshGenaiPrices' import type { SubFetch } from './types' import { ctHeader, response405, runAfter, textResponse } from './utils' @@ -48,6 +49,7 @@ export async function gatewayFetch( ctx: ExecutionContext, options: GatewayOptions, ): Promise { + ctx.waitUntil(refreshGenaiPrices()) let { pathname: proxyPath, search: queryString } = url if (options.proxyPrefixLength) { proxyPath = proxyPath.slice(options.proxyPrefixLength) diff --git a/gateway/src/refreshGenaiPrices.ts b/gateway/src/refreshGenaiPrices.ts new file mode 100644 index 0000000..388a303 --- /dev/null +++ b/gateway/src/refreshGenaiPrices.ts @@ -0,0 +1,54 @@ +import { type Provider, updatePrices, waitForUpdate } from '@pydantic/genai-prices' + +// data will be refetched every 30 minutes +const PRICE_TTL = 1000 * 60 * 30 +let genaiDataTimestamp: number | null = null +let isFetching = false + +export function refreshGenaiPrices() { + updatePrices(({ setProviderData, remoteDataUrl }) => { + if (genaiDataTimestamp !== null) { + console.debug('genai prices in-memory cache found') + + if (Date.now() - genaiDataTimestamp < PRICE_TTL) { + // this will be the most frequent, cheap path + console.debug('genai prices in-memory data is fresh') + return + } else { + console.debug('genai prices in-memory cache is stale, attempting to fetch remote data') + } + } + + if (isFetching) { + console.debug('genai-prices data fetch already in progress, skipping') + return + } + + console.debug('Fetching genai-prices data') + isFetching = true + + // Note: **DO NOT** await this promise + const freshDataPromise = fetch(remoteDataUrl) + .then(async (response) => { + if (!response.ok) { + console.error('Failed fetching provider data, response status %d', response.status) + return null + } + + const freshData = (await response.json()) as Provider[] + console.debug('Updated genai prices data, %d providers', freshData.length) + genaiDataTimestamp = Date.now() + return freshData + }) + .catch((error: unknown) => { + console.error('Failed fetching provider data err: %o', error) + return null + }) + .finally(() => { + isFetching = false + }) + + setProviderData(freshDataPromise) + }) + return waitForUpdate() +} diff --git a/package-lock.json b/package-lock.json index 4a3f9e9..52d426a 100644 --- a/package-lock.json +++ b/package-lock.json @@ -44,7 +44,7 @@ "@opentelemetry/api": "^1.9.0", "@opentelemetry/exporter-trace-otlp-http": "^0.203.0", "@opentelemetry/resources": "^2.0.1", - "@pydantic/genai-prices": "~0.0.36", + "@pydantic/genai-prices": "~0.0.38", "@pydantic/logfire-api": "^0.9.0", "eventsource-parser": "^3.0.6", "mime-types": "^3.0.1", @@ -3325,9 +3325,9 @@ "link": true }, "node_modules/@pydantic/genai-prices": { - "version": "0.0.36", - "resolved": "https://registry.npmjs.org/@pydantic/genai-prices/-/genai-prices-0.0.36.tgz", - "integrity": "sha512-+bTdZJqc909WOQRrcW8E9GOQCQzcTk/K9xlSa6MW9X7rCrVieffG+1uIsNQPnFELuod5ACmj7ZgngoU2k/om2w==", + "version": "0.0.38", + "resolved": "https://registry.npmjs.org/@pydantic/genai-prices/-/genai-prices-0.0.38.tgz", + "integrity": "sha512-Oncr9Co/Em5jgJ13V08WmhWb7Wg0xecLNt4WhJsx90peS0Jup5hoKIAtXrAOSEssOtsCcAIiH1fcG8sAEMip3Q==", "license": "MIT", "dependencies": { "yargs": "^17.7.2"