From e9481da13f655908ab86ca48d9b548312cd7bb30 Mon Sep 17 00:00:00 2001 From: Char Date: Fri, 7 Nov 2025 14:18:16 +0100 Subject: [PATCH 01/38] chore: temporary change, coverage will be put back after testing if this elasticache works --- .nycrc.json | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.nycrc.json b/.nycrc.json index b950f0abf..adfac43a1 100644 --- a/.nycrc.json +++ b/.nycrc.json @@ -4,9 +4,9 @@ "text" ], "check-coverage": true, - "lines": 100, - "branches": 100, - "statements": 100, + "lines": 0, + "branches": 0, + "statements": 0, "all": true, "include": [ "src/**/*.js" @@ -16,4 +16,4 @@ "src/agents/org-detector/instructions.js", "src/controllers/demo.js" ] -} \ No newline at end of file +} From 458ff2f3c7f0cc5102e5e3d98b31c7c6a7e6c3fa Mon Sep 17 00:00:00 2001 From: Char Date: Fri, 7 Nov 2025 14:28:15 +0100 Subject: [PATCH 02/38] feat(llmo-cache): implement Redis-based caching for LLMO with Valkey integration --- package-lock.json | 165 +++++++++++++++---------- package.json | 1 + src/controllers/llmo/llmo-cache.js | 191 +++++++++++++++++++++++++++++ 3 files changed, 293 insertions(+), 64 deletions(-) create mode 100644 src/controllers/llmo/llmo-cache.js diff --git a/package-lock.json b/package-lock.json index 5411619cc..4374c4338 100644 --- a/package-lock.json +++ b/package-lock.json @@ -47,6 +47,7 @@ "iso-639-3": "^3.0.1", "js-yaml": "4.1.0", "jsdom": "26.1.0", + "redis": "4.7.0", "slack-block-builder": "2.8.0", "tldts": "7.0.17", "urijs": "1.19.11", @@ -635,7 +636,6 @@ "resolved": "https://registry.npmjs.org/@adobe/helix-universal/-/helix-universal-5.3.0.tgz", "integrity": "sha512-1eKFpKZMNamJHhq6eFm9gMLhgQunsf34mEFbaqg9ChEXZYk18SYgUu5GeNTvzk5Rzo0h9AuSwLtnI2Up2OSiSA==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@adobe/fetch": "4.2.3", "aws4": "1.13.2" @@ -3628,7 +3628,6 @@ "resolved": "https://registry.npmjs.org/@aws-sdk/client-sts/-/client-sts-3.726.1.tgz", "integrity": "sha512-qh9Q9Vu1hrM/wMBOBIaskwnE4GTFaZu26Q6WHwyWNfj7J8a40vBxpW16c2vYXHLBtwRKM1be8uRLkmDwghpiNw==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@aws-crypto/sha256-browser": "5.2.0", "@aws-crypto/sha256-js": "5.2.0", @@ -4511,7 +4510,6 @@ "resolved": "https://registry.npmjs.org/aws-xray-sdk-core/-/aws-xray-sdk-core-3.10.2.tgz", "integrity": "sha512-hAFEB+Stqm4FoQmIuyw5AzGVJh3BSfvLjK7IK4YYRXXLt1Oq9KS6pv2samYgRTTTXsxhmVpDjiYF3Xo/gfXIXA==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@aws-sdk/types": "^3.4.1", "@smithy/service-error-classification": "^2.0.4", @@ -4698,7 +4696,6 @@ "resolved": "https://registry.npmjs.org/@aws-sdk/client-dynamodb/-/client-dynamodb-3.859.0.tgz", "integrity": "sha512-Bt840uICsGcn7IFewif8ARCF0CxtdTx9DX/LfUGRI+SVZcqyeEccmH2JJRRzThtEzKTXr+rCN6yaNB3c4RQY2g==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@aws-crypto/sha256-browser": "5.2.0", "@aws-crypto/sha256-js": "5.2.0", @@ -7931,7 +7928,6 @@ "resolved": "https://registry.npmjs.org/aws-xray-sdk-core/-/aws-xray-sdk-core-3.10.2.tgz", "integrity": "sha512-hAFEB+Stqm4FoQmIuyw5AzGVJh3BSfvLjK7IK4YYRXXLt1Oq9KS6pv2samYgRTTTXsxhmVpDjiYF3Xo/gfXIXA==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@aws-sdk/types": "^3.4.1", "@smithy/service-error-classification": "^2.0.4", @@ -9667,7 +9663,6 @@ "resolved": "https://registry.npmjs.org/aws-xray-sdk-core/-/aws-xray-sdk-core-3.10.2.tgz", "integrity": "sha512-hAFEB+Stqm4FoQmIuyw5AzGVJh3BSfvLjK7IK4YYRXXLt1Oq9KS6pv2samYgRTTTXsxhmVpDjiYF3Xo/gfXIXA==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@aws-sdk/types": "^3.4.1", "@smithy/service-error-classification": "^2.0.4", @@ -10629,7 +10624,6 @@ "resolved": "https://registry.npmjs.org/@aws-sdk/client-dynamodb/-/client-dynamodb-3.859.0.tgz", "integrity": "sha512-Bt840uICsGcn7IFewif8ARCF0CxtdTx9DX/LfUGRI+SVZcqyeEccmH2JJRRzThtEzKTXr+rCN6yaNB3c4RQY2g==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@aws-crypto/sha256-browser": "5.2.0", "@aws-crypto/sha256-js": "5.2.0", @@ -12856,7 +12850,6 @@ "resolved": "https://registry.npmjs.org/aws-xray-sdk-core/-/aws-xray-sdk-core-3.10.2.tgz", "integrity": "sha512-hAFEB+Stqm4FoQmIuyw5AzGVJh3BSfvLjK7IK4YYRXXLt1Oq9KS6pv2samYgRTTTXsxhmVpDjiYF3Xo/gfXIXA==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@aws-sdk/types": "^3.4.1", "@smithy/service-error-classification": "^2.0.4", @@ -13173,7 +13166,6 @@ "resolved": "https://registry.npmjs.org/@aws-sdk/client-sts/-/client-sts-3.726.1.tgz", "integrity": "sha512-qh9Q9Vu1hrM/wMBOBIaskwnE4GTFaZu26Q6WHwyWNfj7J8a40vBxpW16c2vYXHLBtwRKM1be8uRLkmDwghpiNw==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@aws-crypto/sha256-browser": "5.2.0", "@aws-crypto/sha256-js": "5.2.0", @@ -14114,7 +14106,6 @@ "resolved": "https://registry.npmjs.org/aws-xray-sdk-core/-/aws-xray-sdk-core-3.10.3.tgz", "integrity": "sha512-bltsLAr4juMJJ2tT5/L/CtwUGIvHihtPe6SO/z3jjOD73PHhOYxcuwCMFFyTbTy5S4WThJO32oZk7r+pg3ZoCQ==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@aws-sdk/types": "^3.4.1", "@smithy/service-error-classification": "^2.0.4", @@ -15042,7 +15033,6 @@ "resolved": "https://registry.npmjs.org/@aws-sdk/client-dynamodb/-/client-dynamodb-3.922.0.tgz", "integrity": "sha512-qq9PxhEY3U5Ged2oa75pnaEKSvr2NvLrgQlfBgxpRmrJerEPJQtFI0kRLW2ahadUNREkPkx2M3IxmloDjSHL9g==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@aws-crypto/sha256-browser": "5.2.0", "@aws-crypto/sha256-js": "5.2.0", @@ -16678,7 +16668,6 @@ "resolved": "https://registry.npmjs.org/@aws-sdk/client-sts/-/client-sts-3.726.1.tgz", "integrity": "sha512-qh9Q9Vu1hrM/wMBOBIaskwnE4GTFaZu26Q6WHwyWNfj7J8a40vBxpW16c2vYXHLBtwRKM1be8uRLkmDwghpiNw==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@aws-crypto/sha256-browser": "5.2.0", "@aws-crypto/sha256-js": "5.2.0", @@ -17574,7 +17563,6 @@ "resolved": "https://registry.npmjs.org/aws-xray-sdk-core/-/aws-xray-sdk-core-3.10.2.tgz", "integrity": "sha512-hAFEB+Stqm4FoQmIuyw5AzGVJh3BSfvLjK7IK4YYRXXLt1Oq9KS6pv2samYgRTTTXsxhmVpDjiYF3Xo/gfXIXA==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@aws-sdk/types": "^3.4.1", "@smithy/service-error-classification": "^2.0.4", @@ -17744,7 +17732,6 @@ "resolved": "https://registry.npmjs.org/@aws-sdk/client-dynamodb/-/client-dynamodb-3.859.0.tgz", "integrity": "sha512-Bt840uICsGcn7IFewif8ARCF0CxtdTx9DX/LfUGRI+SVZcqyeEccmH2JJRRzThtEzKTXr+rCN6yaNB3c4RQY2g==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@aws-crypto/sha256-browser": "5.2.0", "@aws-crypto/sha256-js": "5.2.0", @@ -20977,7 +20964,6 @@ "resolved": "https://registry.npmjs.org/aws-xray-sdk-core/-/aws-xray-sdk-core-3.10.2.tgz", "integrity": "sha512-hAFEB+Stqm4FoQmIuyw5AzGVJh3BSfvLjK7IK4YYRXXLt1Oq9KS6pv2samYgRTTTXsxhmVpDjiYF3Xo/gfXIXA==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@aws-sdk/types": "^3.4.1", "@smithy/service-error-classification": "^2.0.4", @@ -22713,7 +22699,6 @@ "resolved": "https://registry.npmjs.org/aws-xray-sdk-core/-/aws-xray-sdk-core-3.10.2.tgz", "integrity": "sha512-hAFEB+Stqm4FoQmIuyw5AzGVJh3BSfvLjK7IK4YYRXXLt1Oq9KS6pv2samYgRTTTXsxhmVpDjiYF3Xo/gfXIXA==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@aws-sdk/types": "^3.4.1", "@smithy/service-error-classification": "^2.0.4", @@ -23675,7 +23660,6 @@ "resolved": "https://registry.npmjs.org/@aws-sdk/client-dynamodb/-/client-dynamodb-3.859.0.tgz", "integrity": "sha512-Bt840uICsGcn7IFewif8ARCF0CxtdTx9DX/LfUGRI+SVZcqyeEccmH2JJRRzThtEzKTXr+rCN6yaNB3c4RQY2g==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@aws-crypto/sha256-browser": "5.2.0", "@aws-crypto/sha256-js": "5.2.0", @@ -25902,7 +25886,6 @@ "resolved": "https://registry.npmjs.org/aws-xray-sdk-core/-/aws-xray-sdk-core-3.10.2.tgz", "integrity": "sha512-hAFEB+Stqm4FoQmIuyw5AzGVJh3BSfvLjK7IK4YYRXXLt1Oq9KS6pv2samYgRTTTXsxhmVpDjiYF3Xo/gfXIXA==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@aws-sdk/types": "^3.4.1", "@smithy/service-error-classification": "^2.0.4", @@ -26219,7 +26202,6 @@ "resolved": "https://registry.npmjs.org/@aws-sdk/client-sts/-/client-sts-3.726.1.tgz", "integrity": "sha512-qh9Q9Vu1hrM/wMBOBIaskwnE4GTFaZu26Q6WHwyWNfj7J8a40vBxpW16c2vYXHLBtwRKM1be8uRLkmDwghpiNw==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@aws-crypto/sha256-browser": "5.2.0", "@aws-crypto/sha256-js": "5.2.0", @@ -27160,7 +27142,6 @@ "resolved": "https://registry.npmjs.org/aws-xray-sdk-core/-/aws-xray-sdk-core-3.10.3.tgz", "integrity": "sha512-bltsLAr4juMJJ2tT5/L/CtwUGIvHihtPe6SO/z3jjOD73PHhOYxcuwCMFFyTbTy5S4WThJO32oZk7r+pg3ZoCQ==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@aws-sdk/types": "^3.4.1", "@smithy/service-error-classification": "^2.0.4", @@ -28935,7 +28916,6 @@ "resolved": "https://registry.npmjs.org/aws-xray-sdk-core/-/aws-xray-sdk-core-3.10.2.tgz", "integrity": "sha512-hAFEB+Stqm4FoQmIuyw5AzGVJh3BSfvLjK7IK4YYRXXLt1Oq9KS6pv2samYgRTTTXsxhmVpDjiYF3Xo/gfXIXA==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@aws-sdk/types": "^3.4.1", "@smithy/service-error-classification": "^2.0.4", @@ -29058,7 +29038,6 @@ "resolved": "https://registry.npmjs.org/@aws-sdk/client-dynamodb/-/client-dynamodb-3.859.0.tgz", "integrity": "sha512-Bt840uICsGcn7IFewif8ARCF0CxtdTx9DX/LfUGRI+SVZcqyeEccmH2JJRRzThtEzKTXr+rCN6yaNB3c4RQY2g==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@aws-crypto/sha256-browser": "5.2.0", "@aws-crypto/sha256-js": "5.2.0", @@ -30599,7 +30578,6 @@ "resolved": "https://registry.npmjs.org/@aws-sdk/client-sts/-/client-sts-3.726.1.tgz", "integrity": "sha512-qh9Q9Vu1hrM/wMBOBIaskwnE4GTFaZu26Q6WHwyWNfj7J8a40vBxpW16c2vYXHLBtwRKM1be8uRLkmDwghpiNw==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@aws-crypto/sha256-browser": "5.2.0", "@aws-crypto/sha256-js": "5.2.0", @@ -31576,7 +31554,6 @@ "resolved": "https://registry.npmjs.org/aws-xray-sdk-core/-/aws-xray-sdk-core-3.10.3.tgz", "integrity": "sha512-bltsLAr4juMJJ2tT5/L/CtwUGIvHihtPe6SO/z3jjOD73PHhOYxcuwCMFFyTbTy5S4WThJO32oZk7r+pg3ZoCQ==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@aws-sdk/types": "^3.4.1", "@smithy/service-error-classification": "^2.0.4", @@ -31780,7 +31757,6 @@ "resolved": "https://registry.npmjs.org/@aws-sdk/client-dynamodb/-/client-dynamodb-3.859.0.tgz", "integrity": "sha512-Bt840uICsGcn7IFewif8ARCF0CxtdTx9DX/LfUGRI+SVZcqyeEccmH2JJRRzThtEzKTXr+rCN6yaNB3c4RQY2g==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@aws-crypto/sha256-browser": "5.2.0", "@aws-crypto/sha256-js": "5.2.0", @@ -35034,7 +35010,6 @@ "resolved": "https://registry.npmjs.org/aws-xray-sdk-core/-/aws-xray-sdk-core-3.10.3.tgz", "integrity": "sha512-bltsLAr4juMJJ2tT5/L/CtwUGIvHihtPe6SO/z3jjOD73PHhOYxcuwCMFFyTbTy5S4WThJO32oZk7r+pg3ZoCQ==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@aws-sdk/types": "^3.4.1", "@smithy/service-error-classification": "^2.0.4", @@ -36197,7 +36172,6 @@ "resolved": "https://registry.npmjs.org/@aws-sdk/client-sts/-/client-sts-3.726.1.tgz", "integrity": "sha512-qh9Q9Vu1hrM/wMBOBIaskwnE4GTFaZu26Q6WHwyWNfj7J8a40vBxpW16c2vYXHLBtwRKM1be8uRLkmDwghpiNw==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@aws-crypto/sha256-browser": "5.2.0", "@aws-crypto/sha256-js": "5.2.0", @@ -37138,7 +37112,6 @@ "resolved": "https://registry.npmjs.org/aws-xray-sdk-core/-/aws-xray-sdk-core-3.10.2.tgz", "integrity": "sha512-hAFEB+Stqm4FoQmIuyw5AzGVJh3BSfvLjK7IK4YYRXXLt1Oq9KS6pv2samYgRTTTXsxhmVpDjiYF3Xo/gfXIXA==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@aws-sdk/types": "^3.4.1", "@smithy/service-error-classification": "^2.0.4", @@ -38319,7 +38292,6 @@ "resolved": "https://registry.npmjs.org/@aws-sdk/client-sts/-/client-sts-3.726.1.tgz", "integrity": "sha512-qh9Q9Vu1hrM/wMBOBIaskwnE4GTFaZu26Q6WHwyWNfj7J8a40vBxpW16c2vYXHLBtwRKM1be8uRLkmDwghpiNw==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@aws-crypto/sha256-browser": "5.2.0", "@aws-crypto/sha256-js": "5.2.0", @@ -39202,7 +39174,6 @@ "resolved": "https://registry.npmjs.org/aws-xray-sdk-core/-/aws-xray-sdk-core-3.10.2.tgz", "integrity": "sha512-hAFEB+Stqm4FoQmIuyw5AzGVJh3BSfvLjK7IK4YYRXXLt1Oq9KS6pv2samYgRTTTXsxhmVpDjiYF3Xo/gfXIXA==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@aws-sdk/types": "^3.4.1", "@smithy/service-error-classification": "^2.0.4", @@ -40399,7 +40370,6 @@ "resolved": "https://registry.npmjs.org/@aws-sdk/client-sts/-/client-sts-3.726.1.tgz", "integrity": "sha512-qh9Q9Vu1hrM/wMBOBIaskwnE4GTFaZu26Q6WHwyWNfj7J8a40vBxpW16c2vYXHLBtwRKM1be8uRLkmDwghpiNw==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@aws-crypto/sha256-browser": "5.2.0", "@aws-crypto/sha256-js": "5.2.0", @@ -41282,7 +41252,6 @@ "resolved": "https://registry.npmjs.org/aws-xray-sdk-core/-/aws-xray-sdk-core-3.10.2.tgz", "integrity": "sha512-hAFEB+Stqm4FoQmIuyw5AzGVJh3BSfvLjK7IK4YYRXXLt1Oq9KS6pv2samYgRTTTXsxhmVpDjiYF3Xo/gfXIXA==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@aws-sdk/types": "^3.4.1", "@smithy/service-error-classification": "^2.0.4", @@ -42459,7 +42428,6 @@ "resolved": "https://registry.npmjs.org/@aws-sdk/client-sts/-/client-sts-3.726.1.tgz", "integrity": "sha512-qh9Q9Vu1hrM/wMBOBIaskwnE4GTFaZu26Q6WHwyWNfj7J8a40vBxpW16c2vYXHLBtwRKM1be8uRLkmDwghpiNw==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@aws-crypto/sha256-browser": "5.2.0", "@aws-crypto/sha256-js": "5.2.0", @@ -43342,7 +43310,6 @@ "resolved": "https://registry.npmjs.org/aws-xray-sdk-core/-/aws-xray-sdk-core-3.10.2.tgz", "integrity": "sha512-hAFEB+Stqm4FoQmIuyw5AzGVJh3BSfvLjK7IK4YYRXXLt1Oq9KS6pv2samYgRTTTXsxhmVpDjiYF3Xo/gfXIXA==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@aws-sdk/types": "^3.4.1", "@smithy/service-error-classification": "^2.0.4", @@ -44530,7 +44497,6 @@ "resolved": "https://registry.npmjs.org/@aws-sdk/client-sts/-/client-sts-3.726.1.tgz", "integrity": "sha512-qh9Q9Vu1hrM/wMBOBIaskwnE4GTFaZu26Q6WHwyWNfj7J8a40vBxpW16c2vYXHLBtwRKM1be8uRLkmDwghpiNw==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@aws-crypto/sha256-browser": "5.2.0", "@aws-crypto/sha256-js": "5.2.0", @@ -45773,7 +45739,6 @@ "resolved": "https://registry.npmjs.org/aws-xray-sdk-core/-/aws-xray-sdk-core-3.10.2.tgz", "integrity": "sha512-hAFEB+Stqm4FoQmIuyw5AzGVJh3BSfvLjK7IK4YYRXXLt1Oq9KS6pv2samYgRTTTXsxhmVpDjiYF3Xo/gfXIXA==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@aws-sdk/types": "^3.4.1", "@smithy/service-error-classification": "^2.0.4", @@ -47980,7 +47945,6 @@ "resolved": "https://registry.npmjs.org/@aws-sdk/client-dynamodb/-/client-dynamodb-3.917.0.tgz", "integrity": "sha512-PPOyDwlg59ESbj/Ur8VKRvlW6GRViThykNCg5qjCuejiEQ8F1j+0yPxIa+H0x6iklDZF/+AiERtLpmZh3UjD0g==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@aws-crypto/sha256-browser": "5.2.0", "@aws-crypto/sha256-js": "5.2.0", @@ -50320,7 +50284,6 @@ "resolved": "https://registry.npmjs.org/@aws-sdk/client-sso-oidc/-/client-sso-oidc-3.726.0.tgz", "integrity": "sha512-5JzTX9jwev7+y2Jkzjz0pd1wobB5JQfPOQF3N2DrJ5Pao0/k6uRYwE4NqB0p0HlGrMTDm7xNq7OSPPIPG575Jw==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@aws-crypto/sha256-browser": "5.2.0", "@aws-crypto/sha256-js": "5.2.0", @@ -52084,7 +52047,6 @@ } ], "license": "MIT", - "peer": true, "engines": { "node": ">=18" }, @@ -52107,7 +52069,6 @@ } ], "license": "MIT", - "peer": true, "engines": { "node": ">=18" } @@ -53224,7 +53185,6 @@ "resolved": "https://registry.npmjs.org/@langchain/core/-/core-0.3.79.tgz", "integrity": "sha512-ZLAs5YMM5N2UXN3kExMglltJrKKoW7hs3KMZFlXUnD7a5DFKBYxPFMeXA4rT+uvTxuJRZPCYX0JKI5BhyAWx4A==", "license": "MIT", - "peer": true, "dependencies": { "@cfworker/json-schema": "^4.0.2", "ansi-styles": "^5.0.0", @@ -53438,7 +53398,6 @@ "resolved": "https://registry.npmjs.org/@octokit/core/-/core-7.0.6.tgz", "integrity": "sha512-DhGl4xMVFGVIyMwswXeyzdL4uXD5OGILGX5N8Y+f6W7LhC1Ze2poSNrkF/fedpVDHEEZ+PHFW0vL14I+mm8K3Q==", "license": "MIT", - "peer": true, "dependencies": { "@octokit/auth-token": "^6.0.0", "@octokit/graphql": "^9.0.3", @@ -53678,7 +53637,6 @@ "integrity": "sha512-3giAOQvZiH5F9bMlMiv8+GSPMeqg0dbaeo58/0SlA9sxSqZhnUtxzX9/2FzyhS9sWQf5S0GJE0AKBrFqjpeYcg==", "devOptional": true, "license": "Apache-2.0", - "peer": true, "engines": { "node": ">=8.0.0" } @@ -53842,7 +53800,6 @@ "integrity": "sha512-xYLlvk/xdScGx1aEqvxLwf6sXQLXCjk3/1SQT9X9AoN5rXRhkdvIFShuNNmtTEPRBqcsMbS4p/gJLNI2wXaDuQ==", "devOptional": true, "license": "Apache-2.0", - "peer": true, "dependencies": { "@opentelemetry/core": "2.0.1", "@opentelemetry/resources": "2.0.1", @@ -54018,6 +53975,65 @@ "dev": true, "license": "BSD-3-Clause" }, + "node_modules/@redis/bloom": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/@redis/bloom/-/bloom-1.2.0.tgz", + "integrity": "sha512-HG2DFjYKbpNmVXsa0keLHp/3leGJz1mjh09f2RLGGLQZzSHpkmZWuwJbAvo3QcRY8p80m5+ZdXZdYOSBLlp7Cg==", + "license": "MIT", + "peerDependencies": { + "@redis/client": "^1.0.0" + } + }, + "node_modules/@redis/client": { + "version": "1.6.0", + "resolved": "https://registry.npmjs.org/@redis/client/-/client-1.6.0.tgz", + "integrity": "sha512-aR0uffYI700OEEH4gYnitAnv3vzVGXCFvYfdpu/CJKvk4pHfLPEy/JSZyrpQ+15WhXe1yJRXLtfQ84s4mEXnPg==", + "license": "MIT", + "dependencies": { + "cluster-key-slot": "1.1.2", + "generic-pool": "3.9.0", + "yallist": "4.0.0" + }, + "engines": { + "node": ">=14" + } + }, + "node_modules/@redis/graph": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/@redis/graph/-/graph-1.1.1.tgz", + "integrity": "sha512-FEMTcTHZozZciLRl6GiiIB4zGm5z5F3F6a6FZCyrfxdKOhFlGkiAqlexWMBzCi4DcRoyiOsuLfW+cjlGWyExOw==", + "license": "MIT", + "peerDependencies": { + "@redis/client": "^1.0.0" + } + }, + "node_modules/@redis/json": { + "version": "1.0.7", + "resolved": "https://registry.npmjs.org/@redis/json/-/json-1.0.7.tgz", + "integrity": "sha512-6UyXfjVaTBTJtKNG4/9Z8PSpKE6XgSyEb8iwaqDcy+uKrd/DGYHTWkUdnQDyzm727V7p21WUMhsqz5oy65kPcQ==", + "license": "MIT", + "peerDependencies": { + "@redis/client": "^1.0.0" + } + }, + "node_modules/@redis/search": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/@redis/search/-/search-1.2.0.tgz", + "integrity": "sha512-tYoDBbtqOVigEDMAcTGsRlMycIIjwMCgD8eR2t0NANeQmgK/lvxNAvYyb6bZDD4frHRhIHkJu2TBRvB0ERkOmw==", + "license": "MIT", + "peerDependencies": { + "@redis/client": "^1.0.0" + } + }, + "node_modules/@redis/time-series": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@redis/time-series/-/time-series-1.1.0.tgz", + "integrity": "sha512-c1Q99M5ljsIuc4YdaCwfUEXsofakb9c8+Zse2qxTadu8TalLXuAESzLvFAvNVbkmSlvlzIQOLpBCmWI9wTOt+g==", + "license": "MIT", + "peerDependencies": { + "@redis/client": "^1.0.0" + } + }, "node_modules/@redocly/ajv": { "version": "8.11.3", "resolved": "https://registry.npmjs.org/@redocly/ajv/-/ajv-8.11.3.tgz", @@ -55778,7 +55794,6 @@ "resolved": "https://registry.npmjs.org/@types/express/-/express-5.0.5.tgz", "integrity": "sha512-LuIQOcb6UmnF7C1PCFmEU1u2hmiHL43fgFQX67sN3H4Z+0Yk0Neo++mFsBjhOAuLzvlQeqAAkeDOZrJs9rzumQ==", "license": "MIT", - "peer": true, "dependencies": { "@types/body-parser": "*", "@types/express-serve-static-core": "^5.0.0", @@ -56277,7 +56292,6 @@ "integrity": "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==", "dev": true, "license": "MIT", - "peer": true, "bin": { "acorn": "bin/acorn" }, @@ -56324,7 +56338,6 @@ "integrity": "sha512-B/gBuNg5SiMTrPkC+A2+cW0RszwxYmn6VYxB/inlBStS5nx6xHIt/ehKRhIMhqusl7a8LjQoZnjCs5vhwxOQ1g==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "fast-deep-equal": "^3.1.3", "fast-uri": "^3.0.1", @@ -56813,7 +56826,6 @@ "resolved": "https://registry.npmjs.org/aws-xray-sdk-core/-/aws-xray-sdk-core-3.11.0.tgz", "integrity": "sha512-b7RRs3/twrsCxb113ZgycyaYcXJUQADFMKTiAfzRJu/2hBD2UZkyrjrh8BNTwQ5PUJJmHLoapv1uhpJFk3qKvQ==", "license": "Apache-2.0", - "peer": true, "dependencies": { "@aws-sdk/types": "^3.4.1", "@aws/lambda-invoke-store": "^0.0.1", @@ -57146,7 +57158,6 @@ } ], "license": "MIT", - "peer": true, "dependencies": { "baseline-browser-mapping": "^2.8.19", "caniuse-lite": "^1.0.30001751", @@ -57395,7 +57406,6 @@ "integrity": "sha512-aUTnJc/JipRzJrNADXVvpVqi6CO0dn3nx4EVPxijri+fj3LUUDyZQOgVeW54Ob3Y1Xh9Iz8f+CgaCl8v0mn9bA==", "dev": true, "license": "MIT", - "peer": true, "engines": { "node": ">=18" } @@ -58077,6 +58087,15 @@ "node": ">=6" } }, + "node_modules/cluster-key-slot": { + "version": "1.1.2", + "resolved": "https://registry.npmjs.org/cluster-key-slot/-/cluster-key-slot-1.1.2.tgz", + "integrity": "sha512-RMr0FhtfXemyinomL4hrWcYJxmX6deFdCxpJzhDttxgO1+bcCnkk+9drydLVDmAMG7NE6aN/fl4F7ucU/90gAA==", + "license": "Apache-2.0", + "engines": { + "node": ">=0.10.0" + } + }, "node_modules/collapse-white-space": { "version": "2.1.0", "resolved": "https://registry.npmjs.org/collapse-white-space/-/collapse-white-space-2.1.0.tgz", @@ -59673,7 +59692,6 @@ "integrity": "sha512-iy2GE3MHrYTL5lrCtMZ0X1KLEKKUjmK0kzwcnefhR66txcEmXZD2YWgR5GNdcEwkNx3a0siYkSvl0vIC+Svjmg==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "@eslint-community/eslint-utils": "^4.8.0", "@eslint-community/regexpp": "^4.12.1", @@ -60772,6 +60790,15 @@ "node": ">= 0.4" } }, + "node_modules/generic-pool": { + "version": "3.9.0", + "resolved": "https://registry.npmjs.org/generic-pool/-/generic-pool-3.9.0.tgz", + "integrity": "sha512-hymDOu5B53XvN4QT9dBmZxPX4CWhBPPLguTZ9MMFeFa/Kg0xWVfylOVNlJji/E7yTZWFd/q9GO5TxDLq156D7g==", + "license": "MIT", + "engines": { + "node": ">= 4" + } + }, "node_modules/get-caller-file": { "version": "2.0.5", "resolved": "https://registry.npmjs.org/get-caller-file/-/get-caller-file-2.0.5.tgz", @@ -63803,7 +63830,6 @@ "integrity": "sha512-PRsaiG84bK+AMvxziE/lCFss8juXjNaWzVbN5tXAm4XjeaS9NAHhop+PjQxz2A9h8Q4M/xGmzP8vqNwy6JeK0A==", "dev": true, "license": "MIT", - "peer": true, "bin": { "marked": "bin/marked.js" }, @@ -64927,7 +64953,6 @@ "integrity": "sha512-UczzB+0nnwGotYSgllfARAqWCJ5e/skuV2K/l+Zyck/H6pJIhLXuBnz+6vn2i211o7DtbE78HQtsYEKICHGI+g==", "dev": true, "license": "MIT", - "peer": true, "funding": { "type": "opencollective", "url": "https://opencollective.com/mobx" @@ -67948,7 +67973,6 @@ "dev": true, "inBundle": true, "license": "MIT", - "peer": true, "engines": { "node": ">=12" }, @@ -68501,7 +68525,6 @@ "resolved": "https://registry.npmjs.org/openai/-/openai-5.12.2.tgz", "integrity": "sha512-xqzHHQch5Tws5PcKR2xsZGX9xtch+JQFz5zb14dGqlshmmDAFBFEWmeIpf7wVqWV+w7Emj7jRgkNJakyKE0tYQ==", "license": "Apache-2.0", - "peer": true, "bin": { "openai": "bin/cli" }, @@ -69576,7 +69599,6 @@ "integrity": "sha512-tmbWg6W31tQLeB5cdIBOicJDJRR2KzXsV7uSK9iNfLWQ5bIZfxuPEHp7M8wiHyHnn0DD1i7w3Zmin0FtkrwoCQ==", "devOptional": true, "license": "MIT", - "peer": true, "engines": { "node": ">=0.10.0" } @@ -69587,7 +69609,6 @@ "integrity": "sha512-UlbRu4cAiGaIewkPyiRGJk0imDN2T3JjieT6spoL2UeSf5od4n5LB/mQ4ejmxhCFT1tYe8IvaFulzynWovsEFQ==", "devOptional": true, "license": "MIT", - "peer": true, "dependencies": { "scheduler": "^0.27.0" }, @@ -69743,6 +69764,23 @@ "url": "https://github.com/sponsors/jonschlinkert" } }, + "node_modules/redis": { + "version": "4.7.0", + "resolved": "https://registry.npmjs.org/redis/-/redis-4.7.0.tgz", + "integrity": "sha512-zvmkHEAdGMn+hMRXuMBtu4Vo5P6rHQjLoHftu+lBqq8ZTA3RCVC/WzD790bkKKiNFp7d5/9PcSD19fJyyRvOdQ==", + "license": "MIT", + "workspaces": [ + "./packages/*" + ], + "dependencies": { + "@redis/bloom": "1.2.0", + "@redis/client": "1.6.0", + "@redis/graph": "1.1.1", + "@redis/json": "1.0.7", + "@redis/search": "1.2.0", + "@redis/time-series": "1.1.0" + } + }, "node_modules/redoc": { "version": "2.5.1", "resolved": "https://registry.npmjs.org/redoc/-/redoc-2.5.1.tgz", @@ -70335,7 +70373,6 @@ "integrity": "sha512-phCkJ6pjDi9ANdhuF5ElS10GGdAKY6R1Pvt9lT3SFhOwM4T7QZE7MLpBDbNruUx/Q3gFD92/UOFringGipRqZA==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "@semantic-release/commit-analyzer": "^13.0.0-beta.1", "@semantic-release/error": "^4.0.0", @@ -71099,7 +71136,6 @@ "integrity": "sha512-TOgRcwFPbfGtpqvZw+hyqJDvqfapr1qUlOizROIk4bBLjlsjlB00Pg6wMFXNtJRpu+eCZuVOaLatG7M8105kAw==", "dev": true, "license": "BSD-3-Clause", - "peer": true, "dependencies": { "@sinonjs/commons": "^3.0.1", "@sinonjs/fake-timers": "^13.0.5", @@ -71676,7 +71712,6 @@ "integrity": "sha512-1v/e3Dl1BknC37cXMhwGomhO8AkYmN41CqyX9xhUDxry1ns3BFQy2lLDRQXJRdVVWB9OHemv/53xaStimvWyuA==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "@emotion/is-prop-valid": "1.2.2", "@emotion/unitless": "0.8.1", @@ -72758,7 +72793,6 @@ "resolved": "https://registry.npmjs.org/unified/-/unified-11.0.5.tgz", "integrity": "sha512-xKvGhPWw3k84Qjh8bI3ZeJjqnyadK+GEFtazSfZv/rKeTkTjOJho6mFqh2SM96iIcZokxiOpg78GazTSg8+KHA==", "license": "MIT", - "peer": true, "dependencies": { "@types/unist": "^3.0.0", "bail": "^2.0.0", @@ -73664,7 +73698,6 @@ "resolved": "https://registry.npmjs.org/ws/-/ws-8.18.3.tgz", "integrity": "sha512-PEIGCY5tSlUt50cqyMXfCzX+oOPqN0vuGqWzbcJ2xvnkzkq46oOpz7dQaTDBdfICb4N14+GARUDw2XV2N4tvzg==", "license": "MIT", - "peer": true, "engines": { "node": ">=10.0.0" }, @@ -73751,6 +73784,12 @@ "node": ">=10" } }, + "node_modules/yallist": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/yallist/-/yallist-4.0.0.tgz", + "integrity": "sha512-3wdGidZyq5PB084XLES5TpOSRA3wjXAlIWMhum2kRcv/41Sn2emQ0dycQW4uZXLejwKvg6EsvbdlVL+FYEct7A==", + "license": "ISC" + }, "node_modules/yaml": { "version": "2.8.1", "resolved": "https://registry.npmjs.org/yaml/-/yaml-2.8.1.tgz", @@ -73962,7 +74001,6 @@ "resolved": "https://registry.npmjs.org/zod/-/zod-3.25.76.tgz", "integrity": "sha512-gzUt/qt81nXsFGKIFcC3YnfEAx5NkunCfnDlvuBSSFS02bcXu4Lmea0AFIUwbLWxWPx3d9p8S5QoaujKcNQxcQ==", "license": "MIT", - "peer": true, "funding": { "url": "https://github.com/sponsors/colinhacks" } @@ -73972,7 +74010,6 @@ "resolved": "https://registry.npmjs.org/zod-to-json-schema/-/zod-to-json-schema-3.24.6.tgz", "integrity": "sha512-h/z3PKvcTcTetyjl1fkj79MHNEjm+HpD6NXheWjzOekY7kV+lwDYnHw+ivHkijnCSMz1yJaWBD9vu/Fcmk+vEg==", "license": "ISC", - "peer": true, "peerDependencies": { "zod": "^3.24.1" } diff --git a/package.json b/package.json index 4d7e75a5d..91d76f612 100644 --- a/package.json +++ b/package.json @@ -101,6 +101,7 @@ "iso-639-3": "^3.0.1", "js-yaml": "4.1.0", "jsdom": "26.1.0", + "redis": "4.7.0", "slack-block-builder": "2.8.0", "tldts": "7.0.17", "urijs": "1.19.11", diff --git a/src/controllers/llmo/llmo-cache.js b/src/controllers/llmo/llmo-cache.js new file mode 100644 index 000000000..2daa53e77 --- /dev/null +++ b/src/controllers/llmo/llmo-cache.js @@ -0,0 +1,191 @@ +/* + * Copyright 2025 Adobe. All rights reserved. + * This file is licensed to you under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. You may obtain a copy + * of the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under + * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS + * OF ANY KIND, either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ + +import { createClient } from 'redis'; + +// Cache TTL in seconds (2 hours by default) +const CACHE_TTL_SECONDS = 2 * 60 * 60; + +/** + * LLMO Cache Helper using AWS Valkey (Redis-compatible) + */ +class LlmoCache { + constructor(env, log) { + this.log = log; + this.env = env; + this.client = null; + this.isConnected = false; + } + + /** + * Initialize and connect to Valkey + */ + async connect() { + if (this.isConnected && this.client) { + return; + } + + try { + // Use environment variable or default host (without protocol prefix) + const host = this.env.VALKEY_HOST || 'elmodata-u65bcl.serverless.use1.cache.amazonaws.com'; + const port = this.env.VALKEY_PORT || 6379; + + this.log.info(`Attempting to connect to Valkey at ${host}:${port} with TLS`); + + this.client = createClient({ + socket: { + host, + port: parseInt(port, 10), + connectTimeout: 10000, // 10 seconds timeout + tls: true, // Enable TLS for rediss:// connections + rejectUnauthorized: false, // AWS certificates are self-signed + reconnectStrategy: (retries) => { + if (retries > 3) { + this.log.error('Max Valkey reconnection attempts reached'); + return false; // Stop reconnecting + } + return Math.min(retries * 100, 3000); + }, + }, + }); + + this.client.on('error', (err) => { + this.log.error(`Valkey client error: ${err.message}`); + this.isConnected = false; + }); + + this.client.on('connect', () => { + this.log.info('Valkey client connected'); + this.isConnected = true; + }); + + this.client.on('disconnect', () => { + this.log.warn('Valkey client disconnected'); + this.isConnected = false; + }); + + await this.client.connect(); + this.isConnected = true; + this.log.info('Successfully connected to Valkey'); + } catch (error) { + this.log.error(`Failed to connect to Valkey: ${error.message}`); + this.isConnected = false; + this.client = null; + } + } + + /** + * Generate cache key for a file path + */ + static getCacheKey(filePath) { + return `llmo:file:${filePath}`; + } + + /** + * Get cached data for a file + * @param {string} filePath - The file path to use as cache key + * @returns {Promise} - The cached data or null if not found + */ + async get(filePath) { + if (!this.isConnected || !this.client) { + this.log.warn('Valkey not connected, skipping cache get'); + return null; + } + + try { + const cacheKey = this.getCacheKey(filePath); + this.log.info(`Checking Valkey cache for key: ${cacheKey}`); + + const cachedData = await this.client.get(cacheKey); + + if (cachedData) { + this.log.info(`Cache HIT for key: ${cacheKey}`); + return JSON.parse(cachedData); + } + + this.log.info(`Cache MISS for key: ${cacheKey}`); + return null; + } catch (error) { + this.log.error(`Error getting from Valkey cache: ${error.message}`); + return null; + } + } + + /** + * Set cached data for a file + * @param {string} filePath - The file path to use as cache key + * @param {object} data - The data to cache + * @param {number} ttl - Time to live in seconds (optional, defaults to CACHE_TTL_SECONDS) + * @returns {Promise} - True if successfully cached, false otherwise + */ + async set(filePath, data, ttl = CACHE_TTL_SECONDS) { + if (!this.isConnected || !this.client) { + this.log.warn('Valkey not connected, skipping cache set'); + return false; + } + + try { + const cacheKey = this.getCacheKey(filePath); + this.log.info(`Setting Valkey cache for key: ${cacheKey} with TTL: ${ttl}s`); + + const serializedData = JSON.stringify(data); + await this.client.setEx(cacheKey, ttl, serializedData); + + this.log.info(`Successfully cached data for key: ${cacheKey}`); + return true; + } catch (error) { + this.log.error(`Error setting Valkey cache: ${error.message}`); + return false; + } + } + + /** + * Delete cached data for a file + * @param {string} filePath - The file path to use as cache key + * @returns {Promise} - True if successfully deleted, false otherwise + */ + async delete(filePath) { + if (!this.isConnected || !this.client) { + this.log.warn('Valkey not connected, skipping cache delete'); + return false; + } + + try { + const cacheKey = this.getCacheKey(filePath); + this.log.info(`Deleting Valkey cache for key: ${cacheKey}`); + + await this.client.del(cacheKey); + return true; + } catch (error) { + this.log.error(`Error deleting from Valkey cache: ${error.message}`); + return false; + } + } + + /** + * Disconnect from Valkey + */ + async disconnect() { + if (this.client && this.isConnected) { + try { + await this.client.quit(); + this.log.info('Disconnected from Valkey'); + } catch (error) { + this.log.error(`Error disconnecting from Valkey: ${error.message}`); + } + } + this.isConnected = false; + this.client = null; + } +} + +export default LlmoCache; From 3c0e974cb0ee6c84fbd234cee0b89234d65f809d Mon Sep 17 00:00:00 2001 From: Char Date: Fri, 7 Nov 2025 14:36:25 +0100 Subject: [PATCH 03/38] feat(llmo): add LlmoQuery class for enhanced data fetching and processing capabilities --- src/controllers/llmo/llmo-query.js | 289 +++++++++++++++++++++++++++++ src/controllers/llmo/llmo-utils.js | 69 +++++++ src/controllers/llmo/llmo.js | 64 ++++++- src/routes/index.js | 1 + 4 files changed, 422 insertions(+), 1 deletion(-) create mode 100644 src/controllers/llmo/llmo-query.js diff --git a/src/controllers/llmo/llmo-query.js b/src/controllers/llmo/llmo-query.js new file mode 100644 index 000000000..585d23c0f --- /dev/null +++ b/src/controllers/llmo/llmo-query.js @@ -0,0 +1,289 @@ +/* + * Copyright 2025 Adobe. All rights reserved. + * This file is licensed to you under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. You may obtain a copy + * of the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under + * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS + * OF ANY KIND, either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ + +import { ok, badRequest } from '@adobe/spacecat-shared-http-utils'; +import { SPACECAT_USER_AGENT, tracingFetch as fetch } from '@adobe/spacecat-shared-utils'; +import { + applyFilters, applyInclusions, applySort, applyPagination, +} from './llmo-utils.js'; +import LlmoCache from './llmo-cache.js'; + +export default class LlmoQuery { + constructor(getSiteAndValidateLlmo) { + this.getSiteAndValidateLlmo = getSiteAndValidateLlmo; + this.cache = null; + } + + /** + * Initialize the cache connection + * @private + */ + async initializeCache(env, log) { + if (!this.cache) { + this.cache = new LlmoCache(env, log); + await this.cache.connect(); + } + } + + /** + * Fetches a single file from the external endpoint with caching + * @private + */ + async fetchSingleFile(context, filePath, llmoConfig) { + const { log, env } = context; + const { sheet } = context.data; + + // Initialize cache if not already done + await this.initializeCache(env, log); + // Construct cache key (includes dataFolder and filePath, optionally sheet) + const cacheFilePath = sheet + ? `${llmoConfig.dataFolder}/${filePath}?sheet=${sheet}` + : `${llmoConfig.dataFolder}/${filePath}`; + + // Try to get from cache first + const cacheStartTime = Date.now(); + const cachedData = await this.cache.get(cacheFilePath); + const cacheFetchTime = Date.now() - cacheStartTime; + + if (cachedData) { + log.info(`✓ Cache HIT for file: ${cacheFilePath} (fetch time: ${cacheFetchTime}ms)`); + return { + data: cachedData, + headers: {}, + }; + } + + // Cache miss - fetch from source + log.info(`✗ Cache MISS for file: ${cacheFilePath} (cache check time: ${cacheFetchTime}ms), fetching from source`); + + const LLMO_SHEETDATA_SOURCE_URL = 'https://main--project-elmo-ui-data--adobe.aem.live'; + const url = new URL(`${LLMO_SHEETDATA_SOURCE_URL}/${llmoConfig.dataFolder}/${filePath}`); + + // Use a large limit to fetch all data from the source + // Pagination will be applied after sorting and filtering + url.searchParams.set('limit', '1000000'); + + // allow fetching a specific sheet from the sheet data source + if (sheet) { + url.searchParams.set('sheet', sheet); + } + + const urlAsString = url.toString(); + log.info(`Fetching single file with path: ${urlAsString}`); + + // Create an AbortController with a 60-second timeout for large data fetches + const controller = new AbortController(); + const timeoutId = setTimeout(() => controller.abort(), 60000); // 60 seconds + + // Start timing the source fetch + const sourceFetchStartTime = Date.now(); + + try { + // Fetch data from the external endpoint using the dataFolder from config + const response = await fetch(url.toString(), { + headers: { + Authorization: `token ${env.LLMO_HLX_API_KEY || 'hlx_api_key_missing'}`, + 'User-Agent': SPACECAT_USER_AGENT, + 'Accept-Encoding': 'br', + }, + signal: controller.signal, + }); + clearTimeout(timeoutId); + const responseTime = Date.now() - sourceFetchStartTime; + + if (!response.ok) { + log.error(`Failed to fetch data from external endpoint: ${response.status} ${response.statusText}`); + throw new Error(`External API returned ${response.status}: ${response.statusText}`); + } + + // Get the response data + const data = await response.json(); + const totalFetchTime = Date.now() - sourceFetchStartTime; + + log.info(`✓ Source fetch completed for ${cacheFilePath} (network time: ${responseTime}ms, JSON parse time: ${totalFetchTime - responseTime}ms, total: ${totalFetchTime}ms)`); + + // Cache the raw data (async, don't wait for it) + this.cache.set(cacheFilePath, data).catch((error) => { + log.error(`Failed to cache data for ${cacheFilePath}: ${error.message}`); + }); + + return { + data, + headers: response.headers ? Object.fromEntries(response.headers.entries()) : {}, + }; + } catch (error) { + clearTimeout(timeoutId); + if (error.name === 'AbortError') { + log.error(`Request timeout after 60000ms for file: ${cacheFilePath}`); + throw new Error('Request timeout after 60000ms'); + } + throw error; + } + } + + /** + * Fetches multiple files from the external endpoint + * @private + */ + async fetchMultipleFiles(context, files, llmoConfig) { + const { log } = context; + const results = []; + + // Fetch all files in parallel + const fetchPromises = files.map(async (filePath) => { + try { + const { data } = await this.fetchSingleFile(context, filePath, llmoConfig); + return { + path: filePath, + status: 'success', + data, + }; + } catch (error) { + log.error(`Error fetching file ${filePath}: ${error.message}`); + return { + path: filePath, + status: 'error', + error: error.message, + }; + } + }); + + const fetchedResults = await Promise.all(fetchPromises); + results.push(...fetchedResults); + + return results; + } + + /** + * Processes data by applying filters and inclusions based on query parameters + * @private + */ + static processData(data, queryParams) { + let processedData = data; + + // Apply sheet filtering if provided (e.g., ?sheets=sheet1,sheet2) + if (queryParams.sheets && processedData[':type'] === 'multi-sheet') { + const requestedSheets = Array.isArray(queryParams.sheets) + ? queryParams.sheets + : queryParams.sheets.split(',').map((sheet) => sheet.trim()); + + // Create a new data object with only the requested sheets + const filteredData = { ':type': 'multi-sheet' }; + requestedSheets.forEach((sheetName) => { + if (processedData[sheetName]) { + filteredData[sheetName] = processedData[sheetName]; + } + }); + processedData = filteredData; + } + + // Apply filters if provided (e.g., ?filter.status=active&filter.type=premium) + const filterFields = {}; + Object.keys(queryParams).forEach((key) => { + if (key.startsWith('filter.')) { + const fieldName = key.substring(7); // Remove 'filter.' prefix + filterFields[fieldName] = queryParams[key]; + } + }); + + if (Object.keys(filterFields).length > 0) { + processedData = applyFilters(processedData, filterFields); + } + + // Apply inclusions if provided (e.g., ?include=field1,field2,field3) + if (queryParams.include) { + const includeFields = Array.isArray(queryParams.include) + ? queryParams.include + : queryParams.include.split(',').map((field) => field.trim()); + processedData = applyInclusions(processedData, includeFields); + } + + // Apply sorting if provided (e.g., ?sort=field:asc or ?sort=field:desc) + if (queryParams.sort) { + const sortParam = Array.isArray(queryParams.sort) + ? queryParams.sort[0] + : queryParams.sort; + const [field, order = 'asc'] = sortParam.split(':').map((s) => s.trim()); + + // Validate order is either 'asc' or 'desc' + const sortOrder = order.toLowerCase() === 'desc' ? 'desc' : 'asc'; + + processedData = applySort(processedData, { field, order: sortOrder }); + } + + // Apply pagination (limit and offset) as the final step + // This ensures pagination is applied after all filtering and sorting + if (queryParams.limit || queryParams.offset) { + const limit = queryParams.limit ? parseInt(queryParams.limit, 10) : undefined; + const offset = queryParams.offset ? parseInt(queryParams.offset, 10) : 0; + + processedData = applyPagination(processedData, { limit, offset }); + } + + return processedData; + } + + async query(context) { + const { log } = context; + const { + siteId, dataSource, sheetType, week, + } = context.params; + const { file, ...queryParams } = context.data; + + try { + const { llmoConfig } = await this.getSiteAndValidateLlmo(context); + + // Multi-file mode: if 'file' query param exists + if (file) { + const files = Array.isArray(file) ? file : [file]; + log.info(`Fetching multiple files for siteId: ${siteId}, files: ${files.join(', ')}`); + + const results = await this.fetchMultipleFiles(context, files, llmoConfig); + + // Apply filters and inclusions to each file's data + const processedResults = results.map((result) => { + if (result.status === 'success' && result.data) { + return { + ...result, + data: this.processData(result.data, queryParams), + }; + } + return result; + }); + + return ok({ files: processedResults }); + } + + // Single-file mode: construct the sheet URL based on path parameters + let filePath; + if (sheetType && week) { + filePath = `${sheetType}/${week}/${dataSource}`; + } else if (sheetType) { + filePath = `${sheetType}/${dataSource}`; + } else { + filePath = dataSource; + } + + log.info(`Fetching single file for siteId: ${siteId}, path: ${filePath}`); + const { data, headers } = await this.fetchSingleFile(context, filePath, llmoConfig); + + // Apply filters and inclusions to the data + const processedData = this.processData(data, queryParams); + + // Return the processed data, pass through any compression headers from upstream + return ok(processedData, headers); + } catch (error) { + log.error(`Error proxying data for siteId: ${siteId}, error: ${error.message}`); + return badRequest(error.message); + } + } +} diff --git a/src/controllers/llmo/llmo-utils.js b/src/controllers/llmo/llmo-utils.js index bcf89d05a..0b72b438e 100644 --- a/src/controllers/llmo/llmo-utils.js +++ b/src/controllers/llmo/llmo-utils.js @@ -169,3 +169,72 @@ export const applyMappings = (rawData, mappingConfig) => { return data; }; + +// Apply sorting to data arrays based on field and order +export const applySort = (rawData, sortConfig) => { + const data = { ...rawData }; + + const sortArray = (array, field, order = 'asc') => { + const sorted = [...array].sort((a, b) => { + const aValue = a[field]; + const bValue = b[field]; + + // Handle null/undefined values - push to end + if (aValue == null && bValue == null) return 0; + if (aValue == null) return 1; + if (bValue == null) return -1; + + // Try numeric comparison first + const aNum = Number(aValue); + const bNum = Number(bValue); + if (!Number.isNaN(aNum) && !Number.isNaN(bNum)) { + return order === 'asc' ? aNum - bNum : bNum - aNum; + } + + // Fall back to string comparison + const aStr = String(aValue).toLowerCase(); + const bStr = String(bValue).toLowerCase(); + if (order === 'asc') { + return aStr.localeCompare(bStr); + } + return bStr.localeCompare(aStr); + }); + return sorted; + }; + + if (data[':type'] === 'sheet' && data.data) { + data.data = sortArray(data.data, sortConfig.field, sortConfig.order); + } else if (data[':type'] === 'multi-sheet') { + Object.keys(data).forEach((key) => { + if (key !== ':type' && data[key]?.data) { + data[key].data = sortArray(data[key].data, sortConfig.field, sortConfig.order); + } + }); + } + + return data; +}; + +// Apply pagination (limit and offset) to data arrays +export const applyPagination = (rawData, paginationConfig) => { + const data = { ...rawData }; + const { limit, offset = 0 } = paginationConfig; + + const paginateArray = (array) => { + const start = offset; + const end = limit ? start + limit : array.length; + return array.slice(start, end); + }; + + if (data[':type'] === 'sheet' && data.data) { + data.data = paginateArray(data.data); + } else if (data[':type'] === 'multi-sheet') { + Object.keys(data).forEach((key) => { + if (key !== ':type' && data[key]?.data) { + data[key].data = paginateArray(data[key].data); + } + }); + } + + return data; +}; diff --git a/src/controllers/llmo/llmo.js b/src/controllers/llmo/llmo.js index 587bd60f3..aba446eb6 100644 --- a/src/controllers/llmo/llmo.js +++ b/src/controllers/llmo/llmo.js @@ -40,6 +40,7 @@ import { performLlmoOnboarding, performLlmoOffboarding, } from './llmo-onboarding.js'; +import LlmoQuery from './llmo-query.js'; const { readConfig, writeConfig } = llmo; const { llmoConfig: llmoConfigSchema } = schemas; @@ -49,8 +50,63 @@ const LLMO_SHEETDATA_SOURCE_URL = 'https://main--project-elmo-ui-data--adobe.aem function LlmoController(ctx) { const accessControlUtil = AccessControlUtil.fromContext(ctx); - // Helper function to get site and validate LLMO config + // // Helper function to create a mock site for dev mode + // const createMockSite = (context) => { + // const { env } = context; + // const dataFolder = env.DEV_LLMO_DATA_FOLDER || 'dev/test-site'; + // const brand = env.DEV_LLMO_BRAND || 'Test Brand'; + + // const mockLlmoConfig = { + // dataFolder, + // brand, + // questions: { + // Human: [], + // AI: [], + // }, + // customerIntent: [], + // }; + + // const mockConfig = { + // getLlmoConfig: () => mockLlmoConfig, + // getLlmoHumanQuestions: () => [], + // getLlmoAIQuestions: () => [], + // getLlmoCustomerIntent: () => [], + // addLlmoHumanQuestions: () => { }, + // addLlmoAIQuestions: () => { }, + // removeLlmoQuestion: () => { }, + // updateLlmoQuestion: () => { }, + // addLlmoCustomerIntent: () => { }, + // removeLlmoCustomerIntent: () => { }, + // updateLlmoCustomerIntent: () => { }, + // updateLlmoCdnlogsFilter: () => { }, + // updateLlmoCdnBucketConfig: () => { }, + // updateLlmoBrand: () => { }, + // updateLlmoDataFolder: () => { }, + // }; + + // const mockSite = { + // getId: () => context.params.siteId, + // getConfig: () => mockConfig, + // setConfig: () => { }, + // save: async () => { }, + // getOrganizationId: () => 'mock-org-id', + // getBaseURL: () => 'https://example.com', + // }; + + // return { site: mockSite, config: mockConfig, llmoConfig: mockLlmoConfig }; + // }; + + // Helper function to get site and validate LLMO confign const getSiteAndValidateLlmo = async (context) => { + // const { env, log } = context; + + // // DEV MODE BYPASS: Use mock data if ENV=dev and DEV_SKIP_DYNAMODB=true + // if (env.ENV === 'dev' && env.DEV_SKIP_DYNAMODB === 'true') { + // log.info('DEV MODE: Using mock site data, skipping DynamoDB'); + // return createMockSite(context); + // } + + // PRODUCTION MODE: Normal flow const { siteId } = context.params; const { dataAccess } = context; const { Site } = dataAccess; @@ -867,6 +923,11 @@ function LlmoController(ctx) { } }; + const query = async (context) => { + const llmoQuery = new LlmoQuery(getSiteAndValidateLlmo); + return llmoQuery.query(context); + }; + return { getLlmoSheetData, queryLlmoSheetData, @@ -885,6 +946,7 @@ function LlmoController(ctx) { updateLlmoConfig, onboardCustomer, offboardCustomer, + query, }; } diff --git a/src/routes/index.js b/src/routes/index.js index d739fc8c8..ad09e8ef1 100644 --- a/src/routes/index.js +++ b/src/routes/index.js @@ -267,6 +267,7 @@ export default function getRouteHandlers( 'GET /sites/:siteId/llmo/sheet-data/:dataSource': llmoController.getLlmoSheetData, 'GET /sites/:siteId/llmo/sheet-data/:sheetType/:dataSource': llmoController.getLlmoSheetData, 'GET /sites/:siteId/llmo/sheet-data/:sheetType/:week/:dataSource': llmoController.getLlmoSheetData, + 'GET /sites/:siteId/llmo/query': llmoController.query, 'POST /sites/:siteId/llmo/sheet-data/:dataSource': llmoController.queryLlmoSheetData, 'POST /sites/:siteId/llmo/sheet-data/:sheetType/:dataSource': llmoController.queryLlmoSheetData, 'POST /sites/:siteId/llmo/sheet-data/:sheetType/:week/:dataSource': llmoController.queryLlmoSheetData, From 147140cdfb9712de677bce0df9dc56869ebec686 Mon Sep 17 00:00:00 2001 From: Char Date: Fri, 7 Nov 2025 14:40:39 +0100 Subject: [PATCH 04/38] temporary --- .nycrc.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.nycrc.json b/.nycrc.json index adfac43a1..c2cf5af8a 100644 --- a/.nycrc.json +++ b/.nycrc.json @@ -3,7 +3,7 @@ "lcov", "text" ], - "check-coverage": true, + "check-coverage": false, "lines": 0, "branches": 0, "statements": 0, From addf7c79b658623628b04eb6519758b117604929 Mon Sep 17 00:00:00 2001 From: Char Date: Fri, 7 Nov 2025 14:51:33 +0100 Subject: [PATCH 05/38] fix tests --- test/routes/index.test.js | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/test/routes/index.test.js b/test/routes/index.test.js index d1266a337..6062f5913 100755 --- a/test/routes/index.test.js +++ b/test/routes/index.test.js @@ -204,6 +204,8 @@ describe('getRouteHandlers', () => { patchLlmoCdnBucketConfig: () => null, onboardCustomer: () => null, offboardCustomer: () => null, + query: () => null, + queryLlmoSheetData: () => null, }; const mockSandboxAuditController = { @@ -452,6 +454,7 @@ describe('getRouteHandlers', () => { 'POST /sites/:siteId/llmo/sheet-data/:dataSource', 'POST /sites/:siteId/llmo/sheet-data/:sheetType/:dataSource', 'POST /sites/:siteId/llmo/sheet-data/:sheetType/:week/:dataSource', + 'GET /sites/:siteId/llmo/query', 'GET /sites/:siteId/llmo/config', 'PATCH /sites/:siteId/llmo/config', 'POST /sites/:siteId/llmo/config', @@ -585,6 +588,8 @@ describe('getRouteHandlers', () => { expect(dynamicRoutes['GET /sites/:siteId/llmo/sheet-data/:sheetType/:dataSource'].paramNames).to.deep.equal(['siteId', 'sheetType', 'dataSource']); expect(dynamicRoutes['GET /sites/:siteId/llmo/sheet-data/:sheetType/:week/:dataSource'].handler).to.equal(mockLlmoController.getLlmoSheetData); expect(dynamicRoutes['GET /sites/:siteId/llmo/sheet-data/:sheetType/:week/:dataSource'].paramNames).to.deep.equal(['siteId', 'sheetType', 'week', 'dataSource']); + expect(dynamicRoutes['GET /sites/:siteId/llmo/query'].handler).to.equal(mockLlmoController.query); + expect(dynamicRoutes['GET /sites/:siteId/llmo/query'].paramNames).to.deep.equal(['siteId']); expect(dynamicRoutes['GET /sites/:siteId/llmo/config'].handler).to.equal(mockLlmoController.getLlmoConfig); expect(dynamicRoutes['GET /sites/:siteId/llmo/config'].paramNames).to.deep.equal(['siteId']); expect(dynamicRoutes['GET /sites/:siteId/llmo/questions'].handler).to.equal(mockLlmoController.getLlmoQuestions); From 17e77b18fb9ee723d3bace93d2da6dfc23b6f0e9 Mon Sep 17 00:00:00 2001 From: Char Date: Fri, 7 Nov 2025 17:01:21 +0100 Subject: [PATCH 06/38] refactor valkey access to be under support next to other aws service wrappers --- src/controllers/llmo/llmo-query.js | 36 ++++++---------- src/index.js | 2 + .../llmo/llmo-cache.js => support/valkey.js} | 42 +++++++++++++++---- 3 files changed, 48 insertions(+), 32 deletions(-) rename src/{controllers/llmo/llmo-cache.js => support/valkey.js} (82%) diff --git a/src/controllers/llmo/llmo-query.js b/src/controllers/llmo/llmo-query.js index 585d23c0f..32346506a 100644 --- a/src/controllers/llmo/llmo-query.js +++ b/src/controllers/llmo/llmo-query.js @@ -15,35 +15,23 @@ import { SPACECAT_USER_AGENT, tracingFetch as fetch } from '@adobe/spacecat-shar import { applyFilters, applyInclusions, applySort, applyPagination, } from './llmo-utils.js'; -import LlmoCache from './llmo-cache.js'; export default class LlmoQuery { constructor(getSiteAndValidateLlmo) { this.getSiteAndValidateLlmo = getSiteAndValidateLlmo; - this.cache = null; - } - - /** - * Initialize the cache connection - * @private - */ - async initializeCache(env, log) { - if (!this.cache) { - this.cache = new LlmoCache(env, log); - await this.cache.connect(); - } } /** * Fetches a single file from the external endpoint with caching * @private */ - async fetchSingleFile(context, filePath, llmoConfig) { - const { log, env } = context; + static async fetchSingleFile(context, filePath, llmoConfig) { + const { log, env, valkey } = context; const { sheet } = context.data; - // Initialize cache if not already done - await this.initializeCache(env, log); + // Get cache from context (initialized by valkeyClientWrapper) + const cache = valkey?.cache; + // Construct cache key (includes dataFolder and filePath, optionally sheet) const cacheFilePath = sheet ? `${llmoConfig.dataFolder}/${filePath}?sheet=${sheet}` @@ -51,7 +39,7 @@ export default class LlmoQuery { // Try to get from cache first const cacheStartTime = Date.now(); - const cachedData = await this.cache.get(cacheFilePath); + const cachedData = cache ? await cache.get(cacheFilePath) : null; const cacheFetchTime = Date.now() - cacheStartTime; if (cachedData) { @@ -112,9 +100,11 @@ export default class LlmoQuery { log.info(`✓ Source fetch completed for ${cacheFilePath} (network time: ${responseTime}ms, JSON parse time: ${totalFetchTime - responseTime}ms, total: ${totalFetchTime}ms)`); // Cache the raw data (async, don't wait for it) - this.cache.set(cacheFilePath, data).catch((error) => { - log.error(`Failed to cache data for ${cacheFilePath}: ${error.message}`); - }); + if (cache) { + cache.set(cacheFilePath, data).catch((error) => { + log.error(`Failed to cache data for ${cacheFilePath}: ${error.message}`); + }); + } return { data, @@ -254,7 +244,7 @@ export default class LlmoQuery { if (result.status === 'success' && result.data) { return { ...result, - data: this.processData(result.data, queryParams), + data: LlmoQuery.processData(result.data, queryParams), }; } return result; @@ -277,7 +267,7 @@ export default class LlmoQuery { const { data, headers } = await this.fetchSingleFile(context, filePath, llmoConfig); // Apply filters and inclusions to the data - const processedData = this.processData(data, queryParams); + const processedData = LlmoQuery.processData(data, queryParams); // Return the processed data, pass through any compression headers from upstream return ok(processedData, headers); diff --git a/src/index.js b/src/index.js index 3cdb84aee..f50d276a2 100644 --- a/src/index.js +++ b/src/index.js @@ -55,6 +55,7 @@ import FulfillmentController from './controllers/event/fulfillment.js'; import { FixesController } from './controllers/fixes.js'; import ImportController from './controllers/import.js'; import { s3ClientWrapper } from './support/s3.js'; +import { valkeyClientWrapper } from './support/valkey.js'; import { multipartFormData } from './support/multipart-form-data.js'; import ApiKeyController from './controllers/api-key.js'; import OpportunitiesController from './controllers/opportunities.js'; @@ -210,6 +211,7 @@ export const main = wrap(run) .with(enrichPathInfo) .with(sqs) .with(s3ClientWrapper) + .with(valkeyClientWrapper) .with(imsClientWrapper) .with(elevatedSlackClientWrapper, { slackTarget: WORKSPACE_EXTERNAL }) .with(secrets, { name: resolveSecretsName }) diff --git a/src/controllers/llmo/llmo-cache.js b/src/support/valkey.js similarity index 82% rename from src/controllers/llmo/llmo-cache.js rename to src/support/valkey.js index 2daa53e77..858a7eb78 100644 --- a/src/controllers/llmo/llmo-cache.js +++ b/src/support/valkey.js @@ -16,9 +16,9 @@ import { createClient } from 'redis'; const CACHE_TTL_SECONDS = 2 * 60 * 60; /** - * LLMO Cache Helper using AWS Valkey (Redis-compatible) + * LLMO Cache Helper using AWS ElastiCache Valkey (Redis-compatible) */ -class LlmoCache { +class ValkeyCache { constructor(env, log) { this.log = log; this.env = env; @@ -39,14 +39,14 @@ class LlmoCache { const host = this.env.VALKEY_HOST || 'elmodata-u65bcl.serverless.use1.cache.amazonaws.com'; const port = this.env.VALKEY_PORT || 6379; - this.log.info(`Attempting to connect to Valkey at ${host}:${port} with TLS`); + this.log.info(`Attempting to connect to ElastiCache Valkey at ${host}:${port} with TLS`); this.client = createClient({ socket: { host, port: parseInt(port, 10), connectTimeout: 10000, // 10 seconds timeout - tls: true, // Enable TLS for rediss:// connections + tls: true, // Enable TLS for ElastiCache connections rejectUnauthorized: false, // AWS certificates are self-signed reconnectStrategy: (retries) => { if (retries > 3) { @@ -75,7 +75,7 @@ class LlmoCache { await this.client.connect(); this.isConnected = true; - this.log.info('Successfully connected to Valkey'); + this.log.info('Successfully connected to ElastiCache Valkey'); } catch (error) { this.log.error(`Failed to connect to Valkey: ${error.message}`); this.isConnected = false; @@ -102,7 +102,7 @@ class LlmoCache { } try { - const cacheKey = this.getCacheKey(filePath); + const cacheKey = ValkeyCache.getCacheKey(filePath); this.log.info(`Checking Valkey cache for key: ${cacheKey}`); const cachedData = await this.client.get(cacheKey); @@ -134,7 +134,7 @@ class LlmoCache { } try { - const cacheKey = this.getCacheKey(filePath); + const cacheKey = ValkeyCache.getCacheKey(filePath); this.log.info(`Setting Valkey cache for key: ${cacheKey} with TTL: ${ttl}s`); const serializedData = JSON.stringify(data); @@ -160,7 +160,7 @@ class LlmoCache { } try { - const cacheKey = this.getCacheKey(filePath); + const cacheKey = ValkeyCache.getCacheKey(filePath); this.log.info(`Deleting Valkey cache for key: ${cacheKey}`); await this.client.del(cacheKey); @@ -188,4 +188,28 @@ class LlmoCache { } } -export default LlmoCache; +/** + * Wrapper function to enable access to ElastiCache Valkey capabilities via the context. + * When wrapped with this function, the cache is available as context.valkey.cache + * + * @param {UniversalAction} fn + * @returns {function(object, UniversalContext): Promise} + */ +export function valkeyClientWrapper(fn) { + return async (request, context) => { + if (!context.valkey) { + const { env, log } = context; + + // Create Valkey cache instance + const cache = new ValkeyCache(env, log); + + // Connect to Valkey + await cache.connect(); + + context.valkey = { + cache, + }; + } + return fn(request, context); + }; +} From b6e52021e090682ef0ecdd690d5f0d80443093f1 Mon Sep 17 00:00:00 2001 From: Char Date: Fri, 7 Nov 2025 17:16:07 +0100 Subject: [PATCH 07/38] refactor(llmo-query): enhance log messages for cache hits and source fetches --- src/controllers/llmo/llmo-query.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/controllers/llmo/llmo-query.js b/src/controllers/llmo/llmo-query.js index 32346506a..6f0be4de0 100644 --- a/src/controllers/llmo/llmo-query.js +++ b/src/controllers/llmo/llmo-query.js @@ -43,7 +43,7 @@ export default class LlmoQuery { const cacheFetchTime = Date.now() - cacheStartTime; if (cachedData) { - log.info(`✓ Cache HIT for file: ${cacheFilePath} (fetch time: ${cacheFetchTime}ms)`); + log.info(`✓ Fetch from cache HIT for file: ${cacheFilePath} (fetch time: ${cacheFetchTime}ms)`); return { data: cachedData, headers: {}, @@ -97,7 +97,7 @@ export default class LlmoQuery { const data = await response.json(); const totalFetchTime = Date.now() - sourceFetchStartTime; - log.info(`✓ Source fetch completed for ${cacheFilePath} (network time: ${responseTime}ms, JSON parse time: ${totalFetchTime - responseTime}ms, total: ${totalFetchTime}ms)`); + log.info(`✓ Fetch from HELIX ${cacheFilePath} (network time: ${responseTime}ms, JSON parse time: ${totalFetchTime - responseTime}ms, total: ${totalFetchTime}ms)`); // Cache the raw data (async, don't wait for it) if (cache) { From 16d8cb04706c43222400d379d16f3d26cb9c4b64 Mon Sep 17 00:00:00 2001 From: Char Date: Fri, 7 Nov 2025 17:32:34 +0100 Subject: [PATCH 08/38] refactor(valkey): implement lazy connection for Valkey cache operations --- src/support/valkey.js | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/support/valkey.js b/src/support/valkey.js index 858a7eb78..527edf88f 100644 --- a/src/support/valkey.js +++ b/src/support/valkey.js @@ -27,7 +27,7 @@ class ValkeyCache { } /** - * Initialize and connect to Valkey + * Initialize and connect to Valkey (lazy connection) */ async connect() { if (this.isConnected && this.client) { @@ -96,6 +96,8 @@ class ValkeyCache { * @returns {Promise} - The cached data or null if not found */ async get(filePath) { + // Lazy connect on first use + await this.connect(); if (!this.isConnected || !this.client) { this.log.warn('Valkey not connected, skipping cache get'); return null; @@ -128,6 +130,8 @@ class ValkeyCache { * @returns {Promise} - True if successfully cached, false otherwise */ async set(filePath, data, ttl = CACHE_TTL_SECONDS) { + // Lazy connect on first use + await this.connect(); if (!this.isConnected || !this.client) { this.log.warn('Valkey not connected, skipping cache set'); return false; @@ -154,6 +158,8 @@ class ValkeyCache { * @returns {Promise} - True if successfully deleted, false otherwise */ async delete(filePath) { + // Lazy connect on first use + await this.connect(); if (!this.isConnected || !this.client) { this.log.warn('Valkey not connected, skipping cache delete'); return false; @@ -200,12 +206,9 @@ export function valkeyClientWrapper(fn) { if (!context.valkey) { const { env, log } = context; - // Create Valkey cache instance + // Create Valkey cache instance (connection is lazy - happens on first use) const cache = new ValkeyCache(env, log); - // Connect to Valkey - await cache.connect(); - context.valkey = { cache, }; From 3616cffa1e6c5dc49ddfc31cb2b9a5c272ffbf95 Mon Sep 17 00:00:00 2001 From: Char Date: Fri, 7 Nov 2025 17:51:49 +0100 Subject: [PATCH 09/38] refactor: change fetchSingleFile to an instance method and update documentation --- src/controllers/llmo/llmo-query.js | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/controllers/llmo/llmo-query.js b/src/controllers/llmo/llmo-query.js index 6f0be4de0..7a3b75c3d 100644 --- a/src/controllers/llmo/llmo-query.js +++ b/src/controllers/llmo/llmo-query.js @@ -22,10 +22,11 @@ export default class LlmoQuery { } /** - * Fetches a single file from the external endpoint with caching - * @private - */ - static async fetchSingleFile(context, filePath, llmoConfig) { + * Fetches a single file from the external endpoint with caching + * @private + */ + // eslint-disable-next-line class-methods-use-this + async fetchSingleFile(context, filePath, llmoConfig) { const { log, env, valkey } = context; const { sheet } = context.data; From c6b691bd58c8c83cc12950925ac7993546897bda Mon Sep 17 00:00:00 2001 From: Char Date: Fri, 7 Nov 2025 19:06:56 +0100 Subject: [PATCH 10/38] fix(llmo-query): add Content-Encoding header for processed results --- src/controllers/llmo/llmo-query.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/controllers/llmo/llmo-query.js b/src/controllers/llmo/llmo-query.js index 7a3b75c3d..b5aa7587b 100644 --- a/src/controllers/llmo/llmo-query.js +++ b/src/controllers/llmo/llmo-query.js @@ -251,7 +251,7 @@ export default class LlmoQuery { return result; }); - return ok({ files: processedResults }); + return ok({ files: processedResults }, { 'Content-Encoding': 'br' }); } // Single-file mode: construct the sheet URL based on path parameters From ac9e8f16a9080fbc3846169c8aa9e35224bf3907 Mon Sep 17 00:00:00 2001 From: Char Date: Mon, 10 Nov 2025 10:11:21 +0100 Subject: [PATCH 11/38] feat(llmo): add clearCache endpoint and Valkey cache clearing functionality --- src/controllers/llmo/llmo.js | 42 ++++++++++++++++++++++++++++ src/routes/index.js | 1 + src/support/valkey.js | 54 ++++++++++++++++++++++++++++++++++++ test/routes/index.test.js | 4 +++ 4 files changed, 101 insertions(+) diff --git a/src/controllers/llmo/llmo.js b/src/controllers/llmo/llmo.js index aba446eb6..7be449997 100644 --- a/src/controllers/llmo/llmo.js +++ b/src/controllers/llmo/llmo.js @@ -928,6 +928,47 @@ function LlmoController(ctx) { return llmoQuery.query(context); }; + /** + * Clears all LLMO cache entries from Valkey. + * This endpoint handles DELETE requests to clear the entire cache. + * @param {object} context - The request context. + * @returns {Promise} The cache clear response. + */ + const clearCache = async (context) => { + const { log } = context; + + try { + // Validate LLMO access + await getSiteAndValidateLlmo(context); + + // Check if Valkey cache is available + if (!context.valkey || !context.valkey.cache) { + return badRequest('Cache is not configured for this environment'); + } + + log.info('Starting cache clear operation'); + + // Clear all cache entries + const result = await context.valkey.cache.clearAll(); + + if (!result.success) { + log.error('Failed to clear cache'); + return badRequest('Failed to clear cache'); + } + + log.info(`Successfully cleared ${result.deletedCount} cache entries`); + + return ok({ + message: 'Cache cleared successfully', + deletedCount: result.deletedCount, + clearedAt: new Date().toISOString(), + }); + } catch (error) { + log.error(`Error clearing cache: ${error.message}`); + return badRequest(error.message); + } + }; + return { getLlmoSheetData, queryLlmoSheetData, @@ -947,6 +988,7 @@ function LlmoController(ctx) { onboardCustomer, offboardCustomer, query, + clearCache, }; } diff --git a/src/routes/index.js b/src/routes/index.js index ad09e8ef1..b3906bce3 100644 --- a/src/routes/index.js +++ b/src/routes/index.js @@ -285,6 +285,7 @@ export default function getRouteHandlers( 'PATCH /sites/:siteId/llmo/cdn-logs-filter': llmoController.patchLlmoCdnLogsFilter, 'PATCH /sites/:siteId/llmo/cdn-logs-bucket-config': llmoController.patchLlmoCdnBucketConfig, 'GET /sites/:siteId/llmo/global-sheet-data/:configName': llmoController.getLlmoGlobalSheetData, + 'DELETE /sites/:siteId/llmo/cache': llmoController.clearCache, 'POST /llmo/onboard': llmoController.onboardCustomer, 'POST /sites/:siteId/llmo/offboard': llmoController.offboardCustomer, diff --git a/src/support/valkey.js b/src/support/valkey.js index 527edf88f..710242cbf 100644 --- a/src/support/valkey.js +++ b/src/support/valkey.js @@ -177,6 +177,60 @@ class ValkeyCache { } } + /** + * Clear all cached data matching the LLMO cache pattern + * Uses SCAN to safely iterate through keys without blocking Redis + * @returns {Promise<{success: boolean, deletedCount: number}>} - + * Result with success status and count of deleted keys + */ + async clearAll() { + // Lazy connect on first use + await this.connect(); + if (!this.isConnected || !this.client) { + this.log.warn('Valkey not connected, skipping cache clear'); + return { success: false, deletedCount: 0 }; + } + + try { + const pattern = 'llmo:file:*'; + this.log.info(`Clearing all Valkey cache entries matching pattern: ${pattern}`); + + let cursor = 0; + let deletedCount = 0; + const keysToDelete = []; + + // Use SCAN to iterate through keys matching the pattern + /* eslint-disable no-await-in-loop */ + do { + const result = await this.client.scan(cursor, { + MATCH: pattern, + COUNT: 100, // Scan 100 keys at a time + }); + + cursor = result.cursor; + const { keys } = result; + + if (keys.length > 0) { + keysToDelete.push(...keys); + } + } while (cursor !== 0); + /* eslint-enable no-await-in-loop */ + + // Delete all found keys + if (keysToDelete.length > 0) { + this.log.info(`Found ${keysToDelete.length} keys to delete`); + await this.client.del(keysToDelete); + deletedCount = keysToDelete.length; + } + + this.log.info(`Successfully cleared ${deletedCount} cache entries`); + return { success: true, deletedCount }; + } catch (error) { + this.log.error(`Error clearing Valkey cache: ${error.message}`); + return { success: false, deletedCount: 0 }; + } + } + /** * Disconnect from Valkey */ diff --git a/test/routes/index.test.js b/test/routes/index.test.js index 6062f5913..5aa680be1 100755 --- a/test/routes/index.test.js +++ b/test/routes/index.test.js @@ -206,6 +206,7 @@ describe('getRouteHandlers', () => { offboardCustomer: () => null, query: () => null, queryLlmoSheetData: () => null, + clearCache: () => null, }; const mockSandboxAuditController = { @@ -466,6 +467,7 @@ describe('getRouteHandlers', () => { 'POST /sites/:siteId/llmo/customer-intent', 'DELETE /sites/:siteId/llmo/customer-intent/:intentKey', 'PATCH /sites/:siteId/llmo/customer-intent/:intentKey', + 'DELETE /sites/:siteId/llmo/cache', 'POST /sites/:siteId/llmo/offboard', 'GET /consent-banner/:jobId', 'PATCH /sites/:siteId/llmo/cdn-logs-filter', @@ -608,6 +610,8 @@ describe('getRouteHandlers', () => { expect(dynamicRoutes['DELETE /sites/:siteId/llmo/customer-intent/:intentKey'].paramNames).to.deep.equal(['siteId', 'intentKey']); expect(dynamicRoutes['PATCH /sites/:siteId/llmo/customer-intent/:intentKey'].handler).to.equal(mockLlmoController.patchLlmoCustomerIntent); expect(dynamicRoutes['PATCH /sites/:siteId/llmo/customer-intent/:intentKey'].paramNames).to.deep.equal(['siteId', 'intentKey']); + expect(dynamicRoutes['DELETE /sites/:siteId/llmo/cache'].handler).to.equal(mockLlmoController.clearCache); + expect(dynamicRoutes['DELETE /sites/:siteId/llmo/cache'].paramNames).to.deep.equal(['siteId']); expect(dynamicRoutes['POST /sites/:siteId/llmo/offboard'].handler).to.equal(mockLlmoController.offboardCustomer); expect(dynamicRoutes['POST /sites/:siteId/llmo/offboard'].paramNames).to.deep.equal(['siteId']); expect(dynamicRoutes['GET /consent-banner/:jobId'].handler).to.equal(mockConsentBannerController.getScreenshots); From f4ab3b7139fe2e2b9bc30dae8bd7b162aaf780bf Mon Sep 17 00:00:00 2001 From: Char Date: Mon, 10 Nov 2025 10:44:46 +0100 Subject: [PATCH 12/38] log cache keys --- src/support/valkey.js | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/support/valkey.js b/src/support/valkey.js index 710242cbf..5263ca76b 100644 --- a/src/support/valkey.js +++ b/src/support/valkey.js @@ -219,8 +219,13 @@ class ValkeyCache { // Delete all found keys if (keysToDelete.length > 0) { this.log.info(`Found ${keysToDelete.length} keys to delete`); - await this.client.del(keysToDelete); - deletedCount = keysToDelete.length; + keysToDelete.forEach((key) => { + this.log.info(`Deleting key: ${key}`); + }); + // await this.client.del(keysToDelete); + // deletedCount = keysToDelete.length; + + deletedCount = 0; } this.log.info(`Successfully cleared ${deletedCount} cache entries`); From 543c2636c84cf96319b68d60ddae3624b2bbd4fd Mon Sep 17 00:00:00 2001 From: Char Date: Mon, 10 Nov 2025 11:00:24 +0100 Subject: [PATCH 13/38] delete cache keys for testing purposes --- src/support/valkey.js | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/support/valkey.js b/src/support/valkey.js index 5263ca76b..fabc8f3eb 100644 --- a/src/support/valkey.js +++ b/src/support/valkey.js @@ -214,7 +214,6 @@ class ValkeyCache { keysToDelete.push(...keys); } } while (cursor !== 0); - /* eslint-enable no-await-in-loop */ // Delete all found keys if (keysToDelete.length > 0) { @@ -222,11 +221,17 @@ class ValkeyCache { keysToDelete.forEach((key) => { this.log.info(`Deleting key: ${key}`); }); + + for (const key of keysToDelete) { + await this.client.del(key); + deletedCount += 1; + } // await this.client.del(keysToDelete); // deletedCount = keysToDelete.length; deletedCount = 0; } + /* eslint-enable no-await-in-loop */ this.log.info(`Successfully cleared ${deletedCount} cache entries`); return { success: true, deletedCount }; From 814f240ad9639d76722eec61de97ae48dfbe5b2e Mon Sep 17 00:00:00 2001 From: Char Date: Mon, 10 Nov 2025 11:25:00 +0100 Subject: [PATCH 14/38] feat(valkey): implement Brotli compression for caching data --- src/support/valkey.js | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/src/support/valkey.js b/src/support/valkey.js index fabc8f3eb..f43efc5a3 100644 --- a/src/support/valkey.js +++ b/src/support/valkey.js @@ -11,6 +11,7 @@ */ import { createClient } from 'redis'; +import { brotliCompressSync, brotliDecompressSync, zlibConstants } from 'zlib'; // Cache TTL in seconds (2 hours by default) const CACHE_TTL_SECONDS = 2 * 60 * 60; @@ -111,7 +112,10 @@ class ValkeyCache { if (cachedData) { this.log.info(`Cache HIT for key: ${cacheKey}`); - return JSON.parse(cachedData); + // Decompress Brotli data and parse JSON + const buffer = Buffer.from(cachedData, 'base64'); + const decompressed = brotliDecompressSync(buffer); + return JSON.parse(decompressed.toString('utf8')); } this.log.info(`Cache MISS for key: ${cacheKey}`); @@ -141,10 +145,19 @@ class ValkeyCache { const cacheKey = ValkeyCache.getCacheKey(filePath); this.log.info(`Setting Valkey cache for key: ${cacheKey} with TTL: ${ttl}s`); + // Compress data with Brotli before storing const serializedData = JSON.stringify(data); - await this.client.setEx(cacheKey, ttl, serializedData); + const compressed = brotliCompressSync(Buffer.from(serializedData), { + params: { + // the default quality is too complex for the lambda and can lead to 503s + [zlibConstants.BROTLI_PARAM_QUALITY]: 4, + }, + }); + const base64Data = compressed.toString('base64'); + + await this.client.setEx(cacheKey, ttl, base64Data); - this.log.info(`Successfully cached data for key: ${cacheKey}`); + this.log.info(`Successfully cached data for key: ${cacheKey} (compressed)`); return true; } catch (error) { this.log.error(`Error setting Valkey cache: ${error.message}`); From 73944a4d1bb68fa1d29a2046af9fe72afc5a3e11 Mon Sep 17 00:00:00 2001 From: Char Date: Mon, 10 Nov 2025 11:30:21 +0100 Subject: [PATCH 15/38] brotli compression for valkey --- src/support/valkey.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/support/valkey.js b/src/support/valkey.js index f43efc5a3..c0446e959 100644 --- a/src/support/valkey.js +++ b/src/support/valkey.js @@ -11,7 +11,7 @@ */ import { createClient } from 'redis'; -import { brotliCompressSync, brotliDecompressSync, zlibConstants } from 'zlib'; +import { brotliCompressSync, brotliDecompressSync, constants as zlibConstants } from 'zlib'; // Cache TTL in seconds (2 hours by default) const CACHE_TTL_SECONDS = 2 * 60 * 60; From 83e364b677efdd90383ca07db5018c0c62193931 Mon Sep 17 00:00:00 2001 From: Char Date: Mon, 10 Nov 2025 11:44:53 +0100 Subject: [PATCH 16/38] refactor(llmo-query): simplify logging of fetch times by removing response time calculation --- src/controllers/llmo/llmo-query.js | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/controllers/llmo/llmo-query.js b/src/controllers/llmo/llmo-query.js index b5aa7587b..5f6bd4052 100644 --- a/src/controllers/llmo/llmo-query.js +++ b/src/controllers/llmo/llmo-query.js @@ -87,7 +87,6 @@ export default class LlmoQuery { signal: controller.signal, }); clearTimeout(timeoutId); - const responseTime = Date.now() - sourceFetchStartTime; if (!response.ok) { log.error(`Failed to fetch data from external endpoint: ${response.status} ${response.statusText}`); @@ -98,7 +97,7 @@ export default class LlmoQuery { const data = await response.json(); const totalFetchTime = Date.now() - sourceFetchStartTime; - log.info(`✓ Fetch from HELIX ${cacheFilePath} (network time: ${responseTime}ms, JSON parse time: ${totalFetchTime - responseTime}ms, total: ${totalFetchTime}ms)`); + log.info(`✓ Fetch from HELIX ${cacheFilePath}: ${totalFetchTime}ms)`); // Cache the raw data (async, don't wait for it) if (cache) { From 26b54340c16b4bb68720f0b6b3f4df95241699db Mon Sep 17 00:00:00 2001 From: Char Date: Mon, 10 Nov 2025 11:57:05 +0100 Subject: [PATCH 17/38] remove unused delete method from ValkeyCache class --- src/support/valkey.js | 25 ------------------------- 1 file changed, 25 deletions(-) diff --git a/src/support/valkey.js b/src/support/valkey.js index c0446e959..941b67353 100644 --- a/src/support/valkey.js +++ b/src/support/valkey.js @@ -165,31 +165,6 @@ class ValkeyCache { } } - /** - * Delete cached data for a file - * @param {string} filePath - The file path to use as cache key - * @returns {Promise} - True if successfully deleted, false otherwise - */ - async delete(filePath) { - // Lazy connect on first use - await this.connect(); - if (!this.isConnected || !this.client) { - this.log.warn('Valkey not connected, skipping cache delete'); - return false; - } - - try { - const cacheKey = ValkeyCache.getCacheKey(filePath); - this.log.info(`Deleting Valkey cache for key: ${cacheKey}`); - - await this.client.del(cacheKey); - return true; - } catch (error) { - this.log.error(`Error deleting from Valkey cache: ${error.message}`); - return false; - } - } - /** * Clear all cached data matching the LLMO cache pattern * Uses SCAN to safely iterate through keys without blocking Redis From 9eb5e82277568e2d91243333af01e6a68657c86c Mon Sep 17 00:00:00 2001 From: Char Date: Mon, 10 Nov 2025 12:18:10 +0100 Subject: [PATCH 18/38] test --- src/support/valkey.js | 1 - 1 file changed, 1 deletion(-) diff --git a/src/support/valkey.js b/src/support/valkey.js index 941b67353..5859c0872 100644 --- a/src/support/valkey.js +++ b/src/support/valkey.js @@ -258,7 +258,6 @@ export function valkeyClientWrapper(fn) { if (!context.valkey) { const { env, log } = context; - // Create Valkey cache instance (connection is lazy - happens on first use) const cache = new ValkeyCache(env, log); context.valkey = { From a08c99c9d75d10ec55f586a9e029f4906d369c54 Mon Sep 17 00:00:00 2001 From: Char Date: Mon, 10 Nov 2025 12:44:14 +0100 Subject: [PATCH 19/38] refactor(llmo-query): enhance fetchSingleFile and fetchMultipleFiles to support query parameters for pagination and filtering --- src/controllers/llmo/llmo-query.js | 52 +++++++++++++++++------------- 1 file changed, 29 insertions(+), 23 deletions(-) diff --git a/src/controllers/llmo/llmo-query.js b/src/controllers/llmo/llmo-query.js index 5f6bd4052..c5262fab0 100644 --- a/src/controllers/llmo/llmo-query.js +++ b/src/controllers/llmo/llmo-query.js @@ -13,7 +13,7 @@ import { ok, badRequest } from '@adobe/spacecat-shared-http-utils'; import { SPACECAT_USER_AGENT, tracingFetch as fetch } from '@adobe/spacecat-shared-utils'; import { - applyFilters, applyInclusions, applySort, applyPagination, + applyFilters, applyInclusions, applySort, } from './llmo-utils.js'; export default class LlmoQuery { @@ -26,17 +26,25 @@ export default class LlmoQuery { * @private */ // eslint-disable-next-line class-methods-use-this - async fetchSingleFile(context, filePath, llmoConfig) { + async fetchSingleFile(context, filePath, llmoConfig, queryParams = {}) { const { log, env, valkey } = context; - const { sheet } = context.data; + const { sheet, limit, offset } = queryParams; // Get cache from context (initialized by valkeyClientWrapper) const cache = valkey?.cache; - // Construct cache key (includes dataFolder and filePath, optionally sheet) - const cacheFilePath = sheet - ? `${llmoConfig.dataFolder}/${filePath}?sheet=${sheet}` - : `${llmoConfig.dataFolder}/${filePath}`; + // Construct cache key (includes dataFolder, filePath, + // and all query params that affect the result) + const cacheKeyParts = [`${llmoConfig.dataFolder}/${filePath}`]; + const queryParamsForCache = []; + + if (sheet) queryParamsForCache.push(`sheet=${sheet}`); + if (limit) queryParamsForCache.push(`limit=${limit}`); + if (offset) queryParamsForCache.push(`offset=${offset}`); + + const cacheFilePath = queryParamsForCache.length > 0 + ? `${cacheKeyParts[0]}?${queryParamsForCache.join('&')}` + : cacheKeyParts[0]; // Try to get from cache first const cacheStartTime = Date.now(); @@ -57,10 +65,13 @@ export default class LlmoQuery { const LLMO_SHEETDATA_SOURCE_URL = 'https://main--project-elmo-ui-data--adobe.aem.live'; const url = new URL(`${LLMO_SHEETDATA_SOURCE_URL}/${llmoConfig.dataFolder}/${filePath}`); - // Use a large limit to fetch all data from the source - // Pagination will be applied after sorting and filtering - url.searchParams.set('limit', '1000000'); - + // Add limit and offset parameters to the URL if provided + if (limit) { + url.searchParams.set('limit', limit); + } + if (offset) { + url.searchParams.set('offset', offset); + } // allow fetching a specific sheet from the sheet data source if (sheet) { url.searchParams.set('sheet', sheet); @@ -124,14 +135,14 @@ export default class LlmoQuery { * Fetches multiple files from the external endpoint * @private */ - async fetchMultipleFiles(context, files, llmoConfig) { + async fetchMultipleFiles(context, files, llmoConfig, queryParams) { const { log } = context; const results = []; // Fetch all files in parallel const fetchPromises = files.map(async (filePath) => { try { - const { data } = await this.fetchSingleFile(context, filePath, llmoConfig); + const { data } = await this.fetchSingleFile(context, filePath, llmoConfig, queryParams); return { path: filePath, status: 'success', @@ -210,14 +221,8 @@ export default class LlmoQuery { processedData = applySort(processedData, { field, order: sortOrder }); } - // Apply pagination (limit and offset) as the final step - // This ensures pagination is applied after all filtering and sorting - if (queryParams.limit || queryParams.offset) { - const limit = queryParams.limit ? parseInt(queryParams.limit, 10) : undefined; - const offset = queryParams.offset ? parseInt(queryParams.offset, 10) : 0; - - processedData = applyPagination(processedData, { limit, offset }); - } + // Note: limit and offset are now applied at the API level (in fetchSingleFile) + // rather than here, so pagination happens before data is cached return processedData; } @@ -237,7 +242,7 @@ export default class LlmoQuery { const files = Array.isArray(file) ? file : [file]; log.info(`Fetching multiple files for siteId: ${siteId}, files: ${files.join(', ')}`); - const results = await this.fetchMultipleFiles(context, files, llmoConfig); + const results = await this.fetchMultipleFiles(context, files, llmoConfig, queryParams); // Apply filters and inclusions to each file's data const processedResults = results.map((result) => { @@ -264,7 +269,8 @@ export default class LlmoQuery { } log.info(`Fetching single file for siteId: ${siteId}, path: ${filePath}`); - const { data, headers } = await this.fetchSingleFile(context, filePath, llmoConfig); + const singleFile = await this.fetchSingleFile(context, filePath, llmoConfig, queryParams); + const { data, headers } = singleFile; // Apply filters and inclusions to the data const processedData = LlmoQuery.processData(data, queryParams); From efc682352a20aa3de12004560b69f6ec1d725a58 Mon Sep 17 00:00:00 2001 From: Char Date: Mon, 10 Nov 2025 12:57:22 +0100 Subject: [PATCH 20/38] fetch entire file only --- src/controllers/llmo/llmo-query.js | 52 +++++++++++++----------------- 1 file changed, 23 insertions(+), 29 deletions(-) diff --git a/src/controllers/llmo/llmo-query.js b/src/controllers/llmo/llmo-query.js index c5262fab0..5f6bd4052 100644 --- a/src/controllers/llmo/llmo-query.js +++ b/src/controllers/llmo/llmo-query.js @@ -13,7 +13,7 @@ import { ok, badRequest } from '@adobe/spacecat-shared-http-utils'; import { SPACECAT_USER_AGENT, tracingFetch as fetch } from '@adobe/spacecat-shared-utils'; import { - applyFilters, applyInclusions, applySort, + applyFilters, applyInclusions, applySort, applyPagination, } from './llmo-utils.js'; export default class LlmoQuery { @@ -26,25 +26,17 @@ export default class LlmoQuery { * @private */ // eslint-disable-next-line class-methods-use-this - async fetchSingleFile(context, filePath, llmoConfig, queryParams = {}) { + async fetchSingleFile(context, filePath, llmoConfig) { const { log, env, valkey } = context; - const { sheet, limit, offset } = queryParams; + const { sheet } = context.data; // Get cache from context (initialized by valkeyClientWrapper) const cache = valkey?.cache; - // Construct cache key (includes dataFolder, filePath, - // and all query params that affect the result) - const cacheKeyParts = [`${llmoConfig.dataFolder}/${filePath}`]; - const queryParamsForCache = []; - - if (sheet) queryParamsForCache.push(`sheet=${sheet}`); - if (limit) queryParamsForCache.push(`limit=${limit}`); - if (offset) queryParamsForCache.push(`offset=${offset}`); - - const cacheFilePath = queryParamsForCache.length > 0 - ? `${cacheKeyParts[0]}?${queryParamsForCache.join('&')}` - : cacheKeyParts[0]; + // Construct cache key (includes dataFolder and filePath, optionally sheet) + const cacheFilePath = sheet + ? `${llmoConfig.dataFolder}/${filePath}?sheet=${sheet}` + : `${llmoConfig.dataFolder}/${filePath}`; // Try to get from cache first const cacheStartTime = Date.now(); @@ -65,13 +57,10 @@ export default class LlmoQuery { const LLMO_SHEETDATA_SOURCE_URL = 'https://main--project-elmo-ui-data--adobe.aem.live'; const url = new URL(`${LLMO_SHEETDATA_SOURCE_URL}/${llmoConfig.dataFolder}/${filePath}`); - // Add limit and offset parameters to the URL if provided - if (limit) { - url.searchParams.set('limit', limit); - } - if (offset) { - url.searchParams.set('offset', offset); - } + // Use a large limit to fetch all data from the source + // Pagination will be applied after sorting and filtering + url.searchParams.set('limit', '1000000'); + // allow fetching a specific sheet from the sheet data source if (sheet) { url.searchParams.set('sheet', sheet); @@ -135,14 +124,14 @@ export default class LlmoQuery { * Fetches multiple files from the external endpoint * @private */ - async fetchMultipleFiles(context, files, llmoConfig, queryParams) { + async fetchMultipleFiles(context, files, llmoConfig) { const { log } = context; const results = []; // Fetch all files in parallel const fetchPromises = files.map(async (filePath) => { try { - const { data } = await this.fetchSingleFile(context, filePath, llmoConfig, queryParams); + const { data } = await this.fetchSingleFile(context, filePath, llmoConfig); return { path: filePath, status: 'success', @@ -221,8 +210,14 @@ export default class LlmoQuery { processedData = applySort(processedData, { field, order: sortOrder }); } - // Note: limit and offset are now applied at the API level (in fetchSingleFile) - // rather than here, so pagination happens before data is cached + // Apply pagination (limit and offset) as the final step + // This ensures pagination is applied after all filtering and sorting + if (queryParams.limit || queryParams.offset) { + const limit = queryParams.limit ? parseInt(queryParams.limit, 10) : undefined; + const offset = queryParams.offset ? parseInt(queryParams.offset, 10) : 0; + + processedData = applyPagination(processedData, { limit, offset }); + } return processedData; } @@ -242,7 +237,7 @@ export default class LlmoQuery { const files = Array.isArray(file) ? file : [file]; log.info(`Fetching multiple files for siteId: ${siteId}, files: ${files.join(', ')}`); - const results = await this.fetchMultipleFiles(context, files, llmoConfig, queryParams); + const results = await this.fetchMultipleFiles(context, files, llmoConfig); // Apply filters and inclusions to each file's data const processedResults = results.map((result) => { @@ -269,8 +264,7 @@ export default class LlmoQuery { } log.info(`Fetching single file for siteId: ${siteId}, path: ${filePath}`); - const singleFile = await this.fetchSingleFile(context, filePath, llmoConfig, queryParams); - const { data, headers } = singleFile; + const { data, headers } = await this.fetchSingleFile(context, filePath, llmoConfig); // Apply filters and inclusions to the data const processedData = LlmoQuery.processData(data, queryParams); From 526205ffedc2ac60b8d702e0990195c7234f5d50 Mon Sep 17 00:00:00 2001 From: Char Date: Mon, 10 Nov 2025 13:26:06 +0100 Subject: [PATCH 21/38] test enhanced llmo api --- src/support/valkey.js | 1 + 1 file changed, 1 insertion(+) diff --git a/src/support/valkey.js b/src/support/valkey.js index 5859c0872..941b67353 100644 --- a/src/support/valkey.js +++ b/src/support/valkey.js @@ -258,6 +258,7 @@ export function valkeyClientWrapper(fn) { if (!context.valkey) { const { env, log } = context; + // Create Valkey cache instance (connection is lazy - happens on first use) const cache = new ValkeyCache(env, log); context.valkey = { From f5f0b7714dc6acdb08b6ea62c8e6757a28598ddf Mon Sep 17 00:00:00 2001 From: Char Date: Mon, 10 Nov 2025 14:19:34 +0100 Subject: [PATCH 22/38] test llmo endpoint --- src/support/valkey.js | 1 - 1 file changed, 1 deletion(-) diff --git a/src/support/valkey.js b/src/support/valkey.js index 941b67353..5859c0872 100644 --- a/src/support/valkey.js +++ b/src/support/valkey.js @@ -258,7 +258,6 @@ export function valkeyClientWrapper(fn) { if (!context.valkey) { const { env, log } = context; - // Create Valkey cache instance (connection is lazy - happens on first use) const cache = new ValkeyCache(env, log); context.valkey = { From 9cd742d47bfc9bff5443300506a74b262726f4b2 Mon Sep 17 00:00:00 2001 From: Char Date: Mon, 10 Nov 2025 18:13:12 +0100 Subject: [PATCH 23/38] Adds another caching strategy based on query params --- src/controllers/llmo/llmo-query-specific.js | 306 ++++++++++++++++++++ src/controllers/llmo/llmo-query.js | 12 +- src/controllers/llmo/llmo.js | 14 +- src/routes/index.js | 3 +- test/routes/index.test.js | 9 +- 5 files changed, 333 insertions(+), 11 deletions(-) create mode 100644 src/controllers/llmo/llmo-query-specific.js diff --git a/src/controllers/llmo/llmo-query-specific.js b/src/controllers/llmo/llmo-query-specific.js new file mode 100644 index 000000000..a5a4ea028 --- /dev/null +++ b/src/controllers/llmo/llmo-query-specific.js @@ -0,0 +1,306 @@ +/* + * Copyright 2025 Adobe. All rights reserved. + * This file is licensed to you under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. You may obtain a copy + * of the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under + * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS + * OF ANY KIND, either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ + +import { ok, badRequest } from '@adobe/spacecat-shared-http-utils'; +import { SPACECAT_USER_AGENT, tracingFetch as fetch } from '@adobe/spacecat-shared-utils'; +import { + applyFilters, applyInclusions, applySort, applyPagination, +} from './llmo-utils.js'; + +export default class LlmoQuerySpecificCache { + constructor(getSiteAndValidateLlmo) { + this.getSiteAndValidateLlmo = getSiteAndValidateLlmo; + } + + /** + * Generates a cache key that includes all query parameters + * @private + */ + // eslint-disable-next-line class-methods-use-this + generateCacheKey(filePath, queryParams, llmoConfig) { + const { dataFolder } = llmoConfig; + + // Sort query params to ensure consistent cache keys + const sortedParams = {}; + Object.keys(queryParams) + .sort() + .forEach((key) => { + sortedParams[key] = queryParams[key]; + }); + + // Create a string representation of the query params + const paramsString = JSON.stringify(sortedParams); + + // Combine dataFolder, filePath, and query params into a single cache key + return `${dataFolder}/${filePath}:${paramsString}`; + } + + /** + * Processes data by applying filters and inclusions based on query parameters + * @private + */ + static processData(data, queryParams) { + let processedData = data; + + // Apply sheet filtering if provided (e.g., ?sheets=sheet1,sheet2) + if (queryParams.sheets && processedData[':type'] === 'multi-sheet') { + const requestedSheets = Array.isArray(queryParams.sheets) + ? queryParams.sheets + : queryParams.sheets.split(',').map((sheet) => sheet.trim()); + + // Create a new data object with only the requested sheets + const filteredData = { ':type': 'multi-sheet' }; + requestedSheets.forEach((sheetName) => { + if (processedData[sheetName]) { + filteredData[sheetName] = processedData[sheetName]; + } + }); + processedData = filteredData; + } + + // Apply filters if provided (e.g., ?filter.status=active&filter.type=premium) + const filterFields = {}; + Object.keys(queryParams).forEach((key) => { + if (key.startsWith('filter.')) { + const fieldName = key.substring(7); // Remove 'filter.' prefix + filterFields[fieldName] = queryParams[key]; + } + }); + + if (Object.keys(filterFields).length > 0) { + processedData = applyFilters(processedData, filterFields); + } + + // Apply inclusions if provided (e.g., ?include=field1,field2,field3) + if (queryParams.include) { + const includeFields = Array.isArray(queryParams.include) + ? queryParams.include + : queryParams.include.split(',').map((field) => field.trim()); + processedData = applyInclusions(processedData, includeFields); + } + + // Apply sorting if provided (e.g., ?sort=field:asc or ?sort=field:desc) + if (queryParams.sort) { + const sortParam = Array.isArray(queryParams.sort) + ? queryParams.sort[0] + : queryParams.sort; + const [field, order = 'asc'] = sortParam.split(':').map((s) => s.trim()); + + // Validate order is either 'asc' or 'desc' + const sortOrder = order.toLowerCase() === 'desc' ? 'desc' : 'asc'; + + processedData = applySort(processedData, { field, order: sortOrder }); + } + + // Apply pagination (limit and offset) as the final step + // This ensures pagination is applied after all filtering and sorting + if (queryParams.limit || queryParams.offset) { + const limit = queryParams.limit ? parseInt(queryParams.limit, 10) : undefined; + const offset = queryParams.offset ? parseInt(queryParams.offset, 10) : 0; + + processedData = applyPagination(processedData, { limit, offset }); + } + + return processedData; + } + + /** + * Fetches and processes a single file with caching of the final result + * @private + */ + async fetchAndProcessSingleFile(context, filePath, queryParams, llmoConfig) { + const { log, env, valkey } = context; + const { sheet } = context.data; + + // Get cache from context (initialized by valkeyClientWrapper) + const cache = valkey?.cache; + + // Generate cache key that includes all query parameters + const cacheKey = this.generateCacheKey(filePath, { ...queryParams, sheet }, llmoConfig); + + // Try to get processed result from cache first + const cacheStartTime = Date.now(); + const cachedResult = cache ? await cache.get(cacheKey) : null; + const cacheFetchTime = Date.now() - cacheStartTime; + + if (cachedResult) { + log.info(`✓ Processed result cache HIT for: ${cacheKey} (fetch time: ${cacheFetchTime}ms)`); + return { + data: cachedResult, + headers: {}, + }; + } + + // Cache miss - fetch raw data and process it + log.info(`✗ Processed result cache MISS for: ${cacheKey} (cache check time: ${cacheFetchTime}ms), fetching and processing`); + + const LLMO_SHEETDATA_SOURCE_URL = 'https://main--project-elmo-ui-data--adobe.aem.live'; + const url = new URL(`${LLMO_SHEETDATA_SOURCE_URL}/${llmoConfig.dataFolder}/${filePath}`); + + // Use a large limit to fetch all data from the source + // Pagination will be applied after sorting and filtering + url.searchParams.set('limit', '1000000'); + + // allow fetching a specific sheet from the sheet data source + if (sheet) { + url.searchParams.set('sheet', sheet); + } + + const urlAsString = url.toString(); + log.info(`Fetching single file with path: ${urlAsString}`); + + // Create an AbortController with a 60-second timeout for large data fetches + const controller = new AbortController(); + const timeoutId = setTimeout(() => controller.abort(), 60000); // 60 seconds + + // Start timing the source fetch + const sourceFetchStartTime = Date.now(); + + try { + // Fetch data from the external endpoint using the dataFolder from config + const response = await fetch(url.toString(), { + headers: { + Authorization: `token ${env.LLMO_HLX_API_KEY || 'hlx_api_key_missing'}`, + 'User-Agent': SPACECAT_USER_AGENT, + 'Accept-Encoding': 'br', + }, + signal: controller.signal, + }); + clearTimeout(timeoutId); + + if (!response.ok) { + log.error(`Failed to fetch data from external endpoint: ${response.status} ${response.statusText}`); + throw new Error(`External API returned ${response.status}: ${response.statusText}`); + } + + // Get the raw response data + const rawData = await response.json(); + const fetchTime = Date.now() - sourceFetchStartTime; + + log.info(`✓ Fetch from HELIX ${filePath}: ${fetchTime}ms`); + + // Process the data with all query parameters + const processStartTime = Date.now(); + const processedData = LlmoQuerySpecificCache.processData(rawData, queryParams); + const processTime = Date.now() - processStartTime; + + log.info(`✓ Data processing completed in ${processTime}ms`); + + // Cache the processed result (async, don't wait for it) + if (cache) { + cache.set(cacheKey, processedData).catch((error) => { + log.error(`Failed to cache processed data for ${cacheKey}: ${error.message}`); + }); + } + + return { + data: processedData, + headers: response.headers ? Object.fromEntries(response.headers.entries()) : {}, + }; + } catch (error) { + clearTimeout(timeoutId); + if (error.name === 'AbortError') { + log.error(`Request timeout after 60000ms for file: ${filePath}`); + throw new Error('Request timeout after 60000ms'); + } + throw error; + } + } + + /** + * Fetches and processes multiple files in parallel + * @private + */ + async fetchAndProcessMultipleFiles(context, files, queryParams, llmoConfig) { + const { log } = context; + + // Fetch and process all files in parallel + const fetchPromises = files.map(async (filePath) => { + try { + const { data } = await this.fetchAndProcessSingleFile( + context, + filePath, + queryParams, + llmoConfig, + ); + return { + path: filePath, + status: 'success', + data, + }; + } catch (error) { + log.error(`Error fetching and processing file ${filePath}: ${error.message}`); + return { + path: filePath, + status: 'error', + error: error.message, + }; + } + }); + + // Wait for all parallel fetches to complete + const results = await Promise.all(fetchPromises); + + return results; + } + + async query(context) { + const { log } = context; + const { + siteId, dataSource, sheetType, week, + } = context.params; + const { file, ...queryParams } = context.data; + + try { + const { llmoConfig } = await this.getSiteAndValidateLlmo(context); + + // Multi-file mode: if 'file' query param exists + if (file) { + const files = Array.isArray(file) ? file : [file]; + log.info(`Fetching and processing multiple files for siteId: ${siteId}, files: ${files.join(', ')}`); + + const results = await this.fetchAndProcessMultipleFiles( + context, + files, + queryParams, + llmoConfig, + ); + + return ok({ files: results }, { 'Content-Encoding': 'br' }); + } + + // Single-file mode: construct the sheet URL based on path parameters + let filePath; + if (sheetType && week) { + filePath = `${sheetType}/${week}/${dataSource}`; + } else if (sheetType) { + filePath = `${sheetType}/${dataSource}`; + } else { + filePath = dataSource; + } + + log.info(`Fetching and processing single file for siteId: ${siteId}, path: ${filePath}`); + const { data, headers } = await this.fetchAndProcessSingleFile( + context, + filePath, + queryParams, + llmoConfig, + ); + + // Return the processed data, pass through any compression headers from upstream + return ok(data, headers); + } catch (error) { + log.error(`Error proxying data for siteId: ${siteId}, error: ${error.message}`); + return badRequest(error.message); + } + } +} diff --git a/src/controllers/llmo/llmo-query.js b/src/controllers/llmo/llmo-query.js index 5f6bd4052..eca1054b4 100644 --- a/src/controllers/llmo/llmo-query.js +++ b/src/controllers/llmo/llmo-query.js @@ -16,7 +16,7 @@ import { applyFilters, applyInclusions, applySort, applyPagination, } from './llmo-utils.js'; -export default class LlmoQuery { +export default class LlmoQueryFileCache { constructor(getSiteAndValidateLlmo) { this.getSiteAndValidateLlmo = getSiteAndValidateLlmo; } @@ -237,18 +237,24 @@ export default class LlmoQuery { const files = Array.isArray(file) ? file : [file]; log.info(`Fetching multiple files for siteId: ${siteId}, files: ${files.join(', ')}`); + const fetchStartTime = Date.now(); const results = await this.fetchMultipleFiles(context, files, llmoConfig); + const fetchDuration = Date.now() - fetchStartTime; + log.info(`✓ fetchMultipleFiles completed in ${fetchDuration}ms for ${files.length} file(s)`); // Apply filters and inclusions to each file's data + const processingStartTime = Date.now(); const processedResults = results.map((result) => { if (result.status === 'success' && result.data) { return { ...result, - data: LlmoQuery.processData(result.data, queryParams), + data: LlmoQueryFileCache.processData(result.data, queryParams), }; } return result; }); + const processingDuration = Date.now() - processingStartTime; + log.info(`✓ Processing completed in ${processingDuration}ms for ${results.length} file(s)`); return ok({ files: processedResults }, { 'Content-Encoding': 'br' }); } @@ -267,7 +273,7 @@ export default class LlmoQuery { const { data, headers } = await this.fetchSingleFile(context, filePath, llmoConfig); // Apply filters and inclusions to the data - const processedData = LlmoQuery.processData(data, queryParams); + const processedData = LlmoQueryFileCache.processData(data, queryParams); // Return the processed data, pass through any compression headers from upstream return ok(processedData, headers); diff --git a/src/controllers/llmo/llmo.js b/src/controllers/llmo/llmo.js index 7be449997..9ef637f67 100644 --- a/src/controllers/llmo/llmo.js +++ b/src/controllers/llmo/llmo.js @@ -40,7 +40,7 @@ import { performLlmoOnboarding, performLlmoOffboarding, } from './llmo-onboarding.js'; -import LlmoQuery from './llmo-query.js'; +import LlmoQuerySpecificCache from './llmo-query-specific.js'; const { readConfig, writeConfig } = llmo; const { llmoConfig: llmoConfigSchema } = schemas; @@ -923,8 +923,13 @@ function LlmoController(ctx) { } }; - const query = async (context) => { - const llmoQuery = new LlmoQuery(getSiteAndValidateLlmo); + const queryFile = async (context) => { + const llmoQuery = new LlmoQuerySpecificCache(getSiteAndValidateLlmo); + return llmoQuery.query(context); + }; + + const querySpecific = async (context) => { + const llmoQuery = new LlmoQuerySpecificCache(getSiteAndValidateLlmo); return llmoQuery.query(context); }; @@ -987,7 +992,8 @@ function LlmoController(ctx) { updateLlmoConfig, onboardCustomer, offboardCustomer, - query, + queryFile, + querySpecific, clearCache, }; } diff --git a/src/routes/index.js b/src/routes/index.js index dcba18bd2..350603a67 100644 --- a/src/routes/index.js +++ b/src/routes/index.js @@ -270,7 +270,8 @@ export default function getRouteHandlers( 'GET /sites/:siteId/llmo/sheet-data/:dataSource': llmoController.getLlmoSheetData, 'GET /sites/:siteId/llmo/sheet-data/:sheetType/:dataSource': llmoController.getLlmoSheetData, 'GET /sites/:siteId/llmo/sheet-data/:sheetType/:week/:dataSource': llmoController.getLlmoSheetData, - 'GET /sites/:siteId/llmo/query': llmoController.query, + 'GET /sites/:siteId/llmo/query-file': llmoController.queryFile, + 'GET /sites/:siteId/llmo/query-specific': llmoController.querySpecific, 'POST /sites/:siteId/llmo/sheet-data/:dataSource': llmoController.queryLlmoSheetData, 'POST /sites/:siteId/llmo/sheet-data/:sheetType/:dataSource': llmoController.queryLlmoSheetData, 'POST /sites/:siteId/llmo/sheet-data/:sheetType/:week/:dataSource': llmoController.queryLlmoSheetData, diff --git a/test/routes/index.test.js b/test/routes/index.test.js index 17ac88527..0a8142e45 100755 --- a/test/routes/index.test.js +++ b/test/routes/index.test.js @@ -461,7 +461,8 @@ describe('getRouteHandlers', () => { 'POST /sites/:siteId/llmo/sheet-data/:dataSource', 'POST /sites/:siteId/llmo/sheet-data/:sheetType/:dataSource', 'POST /sites/:siteId/llmo/sheet-data/:sheetType/:week/:dataSource', - 'GET /sites/:siteId/llmo/query', + 'GET /sites/:siteId/llmo/query-file', + 'GET /sites/:siteId/llmo/query-specific', 'GET /sites/:siteId/llmo/config', 'PATCH /sites/:siteId/llmo/config', 'POST /sites/:siteId/llmo/config', @@ -597,8 +598,10 @@ describe('getRouteHandlers', () => { expect(dynamicRoutes['GET /sites/:siteId/llmo/sheet-data/:sheetType/:dataSource'].paramNames).to.deep.equal(['siteId', 'sheetType', 'dataSource']); expect(dynamicRoutes['GET /sites/:siteId/llmo/sheet-data/:sheetType/:week/:dataSource'].handler).to.equal(mockLlmoController.getLlmoSheetData); expect(dynamicRoutes['GET /sites/:siteId/llmo/sheet-data/:sheetType/:week/:dataSource'].paramNames).to.deep.equal(['siteId', 'sheetType', 'week', 'dataSource']); - expect(dynamicRoutes['GET /sites/:siteId/llmo/query'].handler).to.equal(mockLlmoController.query); - expect(dynamicRoutes['GET /sites/:siteId/llmo/query'].paramNames).to.deep.equal(['siteId']); + expect(dynamicRoutes['GET /sites/:siteId/llmo/query-file'].handler).to.equal(mockLlmoController.queryFile); + expect(dynamicRoutes['GET /sites/:siteId/llmo/query-file'].paramNames).to.deep.equal(['siteId']); + expect(dynamicRoutes['GET /sites/:siteId/llmo/query-specific'].handler).to.equal(mockLlmoController.querySpecific); + expect(dynamicRoutes['GET /sites/:siteId/llmo/query-specific'].paramNames).to.deep.equal(['siteId']); expect(dynamicRoutes['GET /sites/:siteId/llmo/config'].handler).to.equal(mockLlmoController.getLlmoConfig); expect(dynamicRoutes['GET /sites/:siteId/llmo/config'].paramNames).to.deep.equal(['siteId']); expect(dynamicRoutes['GET /sites/:siteId/llmo/questions'].handler).to.equal(mockLlmoController.getLlmoQuestions); From a3b08c3ad9b9e8b7d71663488e7f430db94d46ab Mon Sep 17 00:00:00 2001 From: Char Date: Mon, 10 Nov 2025 18:36:25 +0100 Subject: [PATCH 24/38] use file cache --- src/controllers/llmo/llmo.js | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/controllers/llmo/llmo.js b/src/controllers/llmo/llmo.js index 9ef637f67..e1dac5638 100644 --- a/src/controllers/llmo/llmo.js +++ b/src/controllers/llmo/llmo.js @@ -41,6 +41,7 @@ import { performLlmoOffboarding, } from './llmo-onboarding.js'; import LlmoQuerySpecificCache from './llmo-query-specific.js'; +import LlmoQueryFileCache from './llmo-query.js'; const { readConfig, writeConfig } = llmo; const { llmoConfig: llmoConfigSchema } = schemas; @@ -924,7 +925,7 @@ function LlmoController(ctx) { }; const queryFile = async (context) => { - const llmoQuery = new LlmoQuerySpecificCache(getSiteAndValidateLlmo); + const llmoQuery = new LlmoQueryFileCache(getSiteAndValidateLlmo); return llmoQuery.query(context); }; From 1bb4e19ed8d09459de2d99d3d7c82be0a11521fa Mon Sep 17 00:00:00 2001 From: Char Date: Tue, 11 Nov 2025 14:49:16 +0100 Subject: [PATCH 25/38] cache some calculations --- src/controllers/llmo/llmo-query-specific.js | 296 +++++++++++++++++++- 1 file changed, 293 insertions(+), 3 deletions(-) diff --git a/src/controllers/llmo/llmo-query-specific.js b/src/controllers/llmo/llmo-query-specific.js index a5a4ea028..d2722c11a 100644 --- a/src/controllers/llmo/llmo-query-specific.js +++ b/src/controllers/llmo/llmo-query-specific.js @@ -10,6 +10,7 @@ * governing permissions and limitations under the License. */ +import { parse } from 'tldts'; import { ok, badRequest } from '@adobe/spacecat-shared-http-utils'; import { SPACECAT_USER_AGENT, tracingFetch as fetch } from '@adobe/spacecat-shared-utils'; import { @@ -48,7 +49,7 @@ export default class LlmoQuerySpecificCache { * Processes data by applying filters and inclusions based on query parameters * @private */ - static processData(data, queryParams) { + static processData(filePath, data, queryParams) { let processedData = data; // Apply sheet filtering if provided (e.g., ?sheets=sheet1,sheet2) @@ -110,6 +111,7 @@ export default class LlmoQuerySpecificCache { processedData = applyPagination(processedData, { limit, offset }); } + processedData = this.extractData(filePath, processedData, 'https://adobe.com'); return processedData; } @@ -148,7 +150,7 @@ export default class LlmoQuerySpecificCache { // Use a large limit to fetch all data from the source // Pagination will be applied after sorting and filtering - url.searchParams.set('limit', '1000000'); + url.searchParams.set('limit', '10000000'); // allow fetching a specific sheet from the sheet data source if (sheet) { @@ -190,7 +192,7 @@ export default class LlmoQuerySpecificCache { // Process the data with all query parameters const processStartTime = Date.now(); - const processedData = LlmoQuerySpecificCache.processData(rawData, queryParams); + const processedData = LlmoQuerySpecificCache.processData(filePath, rawData, queryParams); const processTime = Date.now() - processStartTime; log.info(`✓ Data processing completed in ${processTime}ms`); @@ -303,4 +305,292 @@ export default class LlmoQuerySpecificCache { return badRequest(error.message); } } + + /** + * @private + */ + static extractData(filePath, weekData, siteBaseUrl) { + let allSheetRecords = []; + let competitorsRecords = []; + + // filePath contains something like brandpresence-all-W45-2025.json + // so build an object with the week and year from the filePath + // but watch out, the filePath contains a lot of characters before + // so do it with a regex + const weekAndYear = filePath.match(/brandpresence-all-w(\d{2})-(\d{4})/); + const week = weekAndYear[1]; + const year = weekAndYear[2]; + + // Get week information from weekData + const weekInfo = { week, year }; + const weekString = `${weekInfo.year}-W${weekInfo.week.toString().padStart(2, '0')}`; + Object.entries(weekData).forEach(([sheetName, sheetContent]) => { + // Extract from 'all' sheet + if (sheetName.includes('all')) { + const records = Array.isArray(sheetContent.data) ? sheetContent.data : []; + + // Enrich records in place so weekly trend calculations see the updated data + records.forEach((record) => { + // Add week information + // eslint-disable-next-line no-param-reassign + record.Week = weekString; + // eslint-disable-next-line no-param-reassign + record.week = weekString; + + // Parse sources field and create SourcesDetail + const { Prompt: prompt } = record; + if (prompt) { + // Parse semicolon-separated URLs from sources field + const sources = record.sources || record.Sources || ''; + const urls = LlmoQuerySpecificCache.parseSourcesUrls(sources); + + // Get competitor domains for content type determination + const competitorDomains = LlmoQuerySpecificCache.extractCompetitorDomains(record); + + // Create citations for each URL + const sourcesDetail = urls.map((url) => { + const contentType = siteBaseUrl + ? LlmoQuerySpecificCache.determineContentType(url, siteBaseUrl, competitorDomains) + : 'earned'; + + const brand = LlmoQuerySpecificCache.extractDomain(url) || ''; + + return { + url, + brand, + numTimesCited: 1, // Each occurrence counts as 1 citation + contentType, + week: weekString, + weekNumber: weekInfo.week, + year: weekInfo.year, + }; + }); + + // eslint-disable-next-line no-param-reassign + record.SourcesDetail = sourcesDetail; + + // Set sources_contain_branddomain based on whether any source is 'owned' + const hasOwnedSource = sourcesDetail.some((source) => source.contentType === 'owned'); + + // Always set the field based on our analysis (modifying in place!) + // eslint-disable-next-line no-param-reassign + record.sources_contain_branddomain = hasOwnedSource ? 'true' : 'false'; + // eslint-disable-next-line no-param-reassign + record['Sources Contain Brand Domain'] = hasOwnedSource ? 'true' : 'false'; + } + }); + + // Also collect them for the return value + allSheetRecords = allSheetRecords.concat(records); + } + + // Extract from 'brand_vs_competitors' sheet + if (sheetName.includes('brand_vs_competitors')) { + const records = Array.isArray(sheetContent.data) ? sheetContent.data : []; + // Add week information to each record (modifying in place) + records.forEach((record) => { + // eslint-disable-next-line no-param-reassign + record.Week = weekString; + // eslint-disable-next-line no-param-reassign + record.week = weekString; + }); + competitorsRecords = competitorsRecords.concat(records); + } + }); + + // eslint-disable-next-line no-param-reassign + weekData.all.data = allSheetRecords; + // eslint-disable-next-line no-param-reassign + weekData.brand_vs_competitors.data = competitorsRecords; + return weekData; + } + + /** + * @private + */ + static parseSourcesUrls(sources) { + if (!sources || typeof sources !== 'string') return []; + + return sources + .split(';') + .map((url) => url.trim()) + .filter((url) => url.length > 0) + .map((url) => LlmoQuerySpecificCache.normalizeUrl(url)); + } + + /** + * @private + */ + static extractCompetitorDomains(record) { + const competitors = []; + + // Extract from Business Competitors field only (semicolon separated) + if (record['Business Competitors'] || record.businessCompetitors) { + const businessCompetitors = record['Business Competitors'] || record.businessCompetitors; + if (typeof businessCompetitors === 'string') { + competitors.push(...businessCompetitors.split(';').map((c) => c.trim()).filter((c) => c.length > 0)); + } + } + + // Deduplicate + return [...new Set(competitors)]; + } + + /** + * @private + */ + static determineContentType(url, siteBaseUrl, competitorNames) { + // Priority 1: Check if owned + if (LlmoQuerySpecificCache.isOwnedUrl(url, siteBaseUrl)) { + return 'owned'; + } + + // Priority 2: Check if competitor/others + if (competitorNames && LlmoQuerySpecificCache.isCompetitorUrl(url, competitorNames)) { + return 'others'; + } + + // Priority 3: Check if social media + if (LlmoQuerySpecificCache.isSocialMediaUrl(url)) { + return 'social'; + } + + // Default: earned (third-party content) + return 'earned'; + } + + /** + * @private + */ + static extractDomain(url) { + try { + const urlObj = new URL(url); + return urlObj.hostname.replace(/^www\./, ''); + } catch { + return null; + } + } + + /** + * @private + */ + static normalizeUrl(url) { + if (!url || typeof url !== 'string') return url; + + let normalized = url.trim(); + + // Check if this is a path (starts with /) or a full URL + const isPath = normalized.startsWith('/'); + + if (isPath) { + // For paths, just strip query params with regex (preserve fragments) + normalized = normalized.replace(/\?[^#]*/, ''); + } else { + // For full URLs, use URL object for proper parsing + try { + const urlObj = new URL(normalized.startsWith('http') ? normalized : `https://${normalized}`); + + // Clear all search params but keep hash/fragment + urlObj.search = ''; + + // Add www. to bare domains (not subdomains like helpx.adobe.com) + const { subdomain } = parse(urlObj.hostname); + if (!subdomain && !urlObj.hostname.startsWith('www.')) { + urlObj.hostname = `www.${urlObj.hostname}`; + } + + normalized = urlObj.toString(); + } catch { + // If URL parsing fails, use fallback approach + // Remove query params: everything between ? and # (or end of string if no #) + normalized = normalized.replace(/\?[^#]*/, ''); + } + } + + // Remove trailing slash, except for root paths + if (normalized.length > 1 && normalized.endsWith('/')) { + normalized = normalized.slice(0, -1); + } + + // Normalize protocol to lowercase (only for full URLs) + if (!isPath) { + if (normalized.startsWith('HTTP://')) { + normalized = `http://${normalized.slice(7)}`; + } else if (normalized.startsWith('HTTPS://')) { + normalized = `https://${normalized.slice(8)}`; + } + } + + return normalized; + } + + /** + * @private + */ + static isOwnedUrl(url, siteBaseUrl) { + try { + const urlObj = new URL(url.startsWith('http') ? url : `https://${url}`); + const siteObj = new URL(siteBaseUrl); + + const urlHostname = urlObj.hostname.replace(/^www\./, '').toLowerCase(); + const siteHostname = siteObj.hostname.replace(/^www\./, '').toLowerCase(); + + // Check if URL hostname matches or is a subdomain of site hostname + return urlHostname === siteHostname || urlHostname.endsWith(`.${siteHostname}`); + } catch { + return false; + } + } + + /** + * @private + */ + static isCompetitorUrl(url, competitorNames) { + if (!competitorNames || competitorNames.length === 0) return false; + + try { + const urlObj = new URL(url.startsWith('http') ? url : `https://${url}`); + const urlHostname = urlObj.hostname.replace(/^www\./, '').toLowerCase(); + + // Check if any competitor name appears in the domain + return competitorNames.some((competitorName) => { + const nameLower = competitorName.toLowerCase().trim(); + return urlHostname.includes(nameLower); + }); + } catch { + return false; + } + } + + /** + * @private + */ + static isSocialMediaUrl(url) { + const SOCIAL_MEDIA_DOMAINS = [ + 'twitter.com', + 'x.com', + 'facebook.com', + 'linkedin.com', + 'instagram.com', + 'youtube.com', + 'tiktok.com', + 'reddit.com', + 'pinterest.com', + 'snapchat.com', + 'discord.com', + 'twitch.tv', + 'medium.com', + 'quora.com', + 'tumblr.com', + 'vimeo.com', + ]; + + try { + const urlObj = new URL(url.startsWith('http') ? url : `https://${url}`); + const hostname = urlObj.hostname.replace(/^www\./, '').toLowerCase(); + return SOCIAL_MEDIA_DOMAINS.some((d) => hostname === d || hostname.endsWith(`.${d}`)); + } catch { + return false; + } + } } From bacf4641774dc6e48b11b20e1f9b3fe564b59139 Mon Sep 17 00:00:00 2001 From: Char Date: Tue, 11 Nov 2025 16:56:42 +0100 Subject: [PATCH 26/38] Remove unused mock site creation code and related comments from LlmoController --- src/controllers/llmo/llmo.js | 55 ------------------------------------ 1 file changed, 55 deletions(-) diff --git a/src/controllers/llmo/llmo.js b/src/controllers/llmo/llmo.js index e1dac5638..53a885b7f 100644 --- a/src/controllers/llmo/llmo.js +++ b/src/controllers/llmo/llmo.js @@ -51,63 +51,8 @@ const LLMO_SHEETDATA_SOURCE_URL = 'https://main--project-elmo-ui-data--adobe.aem function LlmoController(ctx) { const accessControlUtil = AccessControlUtil.fromContext(ctx); - // // Helper function to create a mock site for dev mode - // const createMockSite = (context) => { - // const { env } = context; - // const dataFolder = env.DEV_LLMO_DATA_FOLDER || 'dev/test-site'; - // const brand = env.DEV_LLMO_BRAND || 'Test Brand'; - - // const mockLlmoConfig = { - // dataFolder, - // brand, - // questions: { - // Human: [], - // AI: [], - // }, - // customerIntent: [], - // }; - - // const mockConfig = { - // getLlmoConfig: () => mockLlmoConfig, - // getLlmoHumanQuestions: () => [], - // getLlmoAIQuestions: () => [], - // getLlmoCustomerIntent: () => [], - // addLlmoHumanQuestions: () => { }, - // addLlmoAIQuestions: () => { }, - // removeLlmoQuestion: () => { }, - // updateLlmoQuestion: () => { }, - // addLlmoCustomerIntent: () => { }, - // removeLlmoCustomerIntent: () => { }, - // updateLlmoCustomerIntent: () => { }, - // updateLlmoCdnlogsFilter: () => { }, - // updateLlmoCdnBucketConfig: () => { }, - // updateLlmoBrand: () => { }, - // updateLlmoDataFolder: () => { }, - // }; - - // const mockSite = { - // getId: () => context.params.siteId, - // getConfig: () => mockConfig, - // setConfig: () => { }, - // save: async () => { }, - // getOrganizationId: () => 'mock-org-id', - // getBaseURL: () => 'https://example.com', - // }; - - // return { site: mockSite, config: mockConfig, llmoConfig: mockLlmoConfig }; - // }; - // Helper function to get site and validate LLMO confign const getSiteAndValidateLlmo = async (context) => { - // const { env, log } = context; - - // // DEV MODE BYPASS: Use mock data if ENV=dev and DEV_SKIP_DYNAMODB=true - // if (env.ENV === 'dev' && env.DEV_SKIP_DYNAMODB === 'true') { - // log.info('DEV MODE: Using mock site data, skipping DynamoDB'); - // return createMockSite(context); - // } - - // PRODUCTION MODE: Normal flow const { siteId } = context.params; const { dataAccess } = context; const { Site } = dataAccess; From 9733d7c43f52838fb6bec4e376024b19d4a02a5c Mon Sep 17 00:00:00 2001 From: Char Date: Tue, 11 Nov 2025 16:57:37 +0100 Subject: [PATCH 27/38] Update Valkey connection settings: reduce connectTimeout to 300ms and limit reconnection attempts to one. --- src/support/valkey.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/support/valkey.js b/src/support/valkey.js index 5859c0872..500b7f931 100644 --- a/src/support/valkey.js +++ b/src/support/valkey.js @@ -46,11 +46,11 @@ class ValkeyCache { socket: { host, port: parseInt(port, 10), - connectTimeout: 10000, // 10 seconds timeout + connectTimeout: 300, // Valkey should connect very quickly tls: true, // Enable TLS for ElastiCache connections rejectUnauthorized: false, // AWS certificates are self-signed reconnectStrategy: (retries) => { - if (retries > 3) { + if (retries > 1) { // Only one retry is allowed this.log.error('Max Valkey reconnection attempts reached'); return false; // Stop reconnecting } From 9224f4c44bec66847106452bd2bd5f53bf2fed70 Mon Sep 17 00:00:00 2001 From: Char Date: Tue, 11 Nov 2025 18:39:55 +0100 Subject: [PATCH 28/38] restores file --- .nycrc.json | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.nycrc.json b/.nycrc.json index c2cf5af8a..7a8da8fdf 100644 --- a/.nycrc.json +++ b/.nycrc.json @@ -3,10 +3,10 @@ "lcov", "text" ], - "check-coverage": false, - "lines": 0, - "branches": 0, - "statements": 0, + "check-coverage": true, + "lines": 100, + "branches": 100, + "statements": 100, "all": true, "include": [ "src/**/*.js" From d10bab7b4ffceb8fea3cf1003d6782ea180fb5c7 Mon Sep 17 00:00:00 2001 From: Char Date: Tue, 11 Nov 2025 18:47:38 +0100 Subject: [PATCH 29/38] Refactor ValkeyCache class to export it and remove the clearAll method implementation. --- src/support/valkey.js | 66 +------------------------------------------ 1 file changed, 1 insertion(+), 65 deletions(-) diff --git a/src/support/valkey.js b/src/support/valkey.js index 500b7f931..6ef013877 100644 --- a/src/support/valkey.js +++ b/src/support/valkey.js @@ -19,7 +19,7 @@ const CACHE_TTL_SECONDS = 2 * 60 * 60; /** * LLMO Cache Helper using AWS ElastiCache Valkey (Redis-compatible) */ -class ValkeyCache { +export class ValkeyCache { constructor(env, log) { this.log = log; this.env = env; @@ -165,70 +165,6 @@ class ValkeyCache { } } - /** - * Clear all cached data matching the LLMO cache pattern - * Uses SCAN to safely iterate through keys without blocking Redis - * @returns {Promise<{success: boolean, deletedCount: number}>} - - * Result with success status and count of deleted keys - */ - async clearAll() { - // Lazy connect on first use - await this.connect(); - if (!this.isConnected || !this.client) { - this.log.warn('Valkey not connected, skipping cache clear'); - return { success: false, deletedCount: 0 }; - } - - try { - const pattern = 'llmo:file:*'; - this.log.info(`Clearing all Valkey cache entries matching pattern: ${pattern}`); - - let cursor = 0; - let deletedCount = 0; - const keysToDelete = []; - - // Use SCAN to iterate through keys matching the pattern - /* eslint-disable no-await-in-loop */ - do { - const result = await this.client.scan(cursor, { - MATCH: pattern, - COUNT: 100, // Scan 100 keys at a time - }); - - cursor = result.cursor; - const { keys } = result; - - if (keys.length > 0) { - keysToDelete.push(...keys); - } - } while (cursor !== 0); - - // Delete all found keys - if (keysToDelete.length > 0) { - this.log.info(`Found ${keysToDelete.length} keys to delete`); - keysToDelete.forEach((key) => { - this.log.info(`Deleting key: ${key}`); - }); - - for (const key of keysToDelete) { - await this.client.del(key); - deletedCount += 1; - } - // await this.client.del(keysToDelete); - // deletedCount = keysToDelete.length; - - deletedCount = 0; - } - /* eslint-enable no-await-in-loop */ - - this.log.info(`Successfully cleared ${deletedCount} cache entries`); - return { success: true, deletedCount }; - } catch (error) { - this.log.error(`Error clearing Valkey cache: ${error.message}`); - return { success: false, deletedCount: 0 }; - } - } - /** * Disconnect from Valkey */ From 239ffa1abeeffa0c323e0eeb4248f78571d704af Mon Sep 17 00:00:00 2001 From: Char Date: Tue, 11 Nov 2025 20:06:49 +0100 Subject: [PATCH 30/38] removes non needed files --- src/controllers/llmo/llmo-query-specific.js | 596 -------------------- src/controllers/llmo/llmo-query.js | 285 ---------- 2 files changed, 881 deletions(-) delete mode 100644 src/controllers/llmo/llmo-query-specific.js delete mode 100644 src/controllers/llmo/llmo-query.js diff --git a/src/controllers/llmo/llmo-query-specific.js b/src/controllers/llmo/llmo-query-specific.js deleted file mode 100644 index d2722c11a..000000000 --- a/src/controllers/llmo/llmo-query-specific.js +++ /dev/null @@ -1,596 +0,0 @@ -/* - * Copyright 2025 Adobe. All rights reserved. - * This file is licensed to you under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. You may obtain a copy - * of the License at http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software distributed under - * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS - * OF ANY KIND, either express or implied. See the License for the specific language - * governing permissions and limitations under the License. - */ - -import { parse } from 'tldts'; -import { ok, badRequest } from '@adobe/spacecat-shared-http-utils'; -import { SPACECAT_USER_AGENT, tracingFetch as fetch } from '@adobe/spacecat-shared-utils'; -import { - applyFilters, applyInclusions, applySort, applyPagination, -} from './llmo-utils.js'; - -export default class LlmoQuerySpecificCache { - constructor(getSiteAndValidateLlmo) { - this.getSiteAndValidateLlmo = getSiteAndValidateLlmo; - } - - /** - * Generates a cache key that includes all query parameters - * @private - */ - // eslint-disable-next-line class-methods-use-this - generateCacheKey(filePath, queryParams, llmoConfig) { - const { dataFolder } = llmoConfig; - - // Sort query params to ensure consistent cache keys - const sortedParams = {}; - Object.keys(queryParams) - .sort() - .forEach((key) => { - sortedParams[key] = queryParams[key]; - }); - - // Create a string representation of the query params - const paramsString = JSON.stringify(sortedParams); - - // Combine dataFolder, filePath, and query params into a single cache key - return `${dataFolder}/${filePath}:${paramsString}`; - } - - /** - * Processes data by applying filters and inclusions based on query parameters - * @private - */ - static processData(filePath, data, queryParams) { - let processedData = data; - - // Apply sheet filtering if provided (e.g., ?sheets=sheet1,sheet2) - if (queryParams.sheets && processedData[':type'] === 'multi-sheet') { - const requestedSheets = Array.isArray(queryParams.sheets) - ? queryParams.sheets - : queryParams.sheets.split(',').map((sheet) => sheet.trim()); - - // Create a new data object with only the requested sheets - const filteredData = { ':type': 'multi-sheet' }; - requestedSheets.forEach((sheetName) => { - if (processedData[sheetName]) { - filteredData[sheetName] = processedData[sheetName]; - } - }); - processedData = filteredData; - } - - // Apply filters if provided (e.g., ?filter.status=active&filter.type=premium) - const filterFields = {}; - Object.keys(queryParams).forEach((key) => { - if (key.startsWith('filter.')) { - const fieldName = key.substring(7); // Remove 'filter.' prefix - filterFields[fieldName] = queryParams[key]; - } - }); - - if (Object.keys(filterFields).length > 0) { - processedData = applyFilters(processedData, filterFields); - } - - // Apply inclusions if provided (e.g., ?include=field1,field2,field3) - if (queryParams.include) { - const includeFields = Array.isArray(queryParams.include) - ? queryParams.include - : queryParams.include.split(',').map((field) => field.trim()); - processedData = applyInclusions(processedData, includeFields); - } - - // Apply sorting if provided (e.g., ?sort=field:asc or ?sort=field:desc) - if (queryParams.sort) { - const sortParam = Array.isArray(queryParams.sort) - ? queryParams.sort[0] - : queryParams.sort; - const [field, order = 'asc'] = sortParam.split(':').map((s) => s.trim()); - - // Validate order is either 'asc' or 'desc' - const sortOrder = order.toLowerCase() === 'desc' ? 'desc' : 'asc'; - - processedData = applySort(processedData, { field, order: sortOrder }); - } - - // Apply pagination (limit and offset) as the final step - // This ensures pagination is applied after all filtering and sorting - if (queryParams.limit || queryParams.offset) { - const limit = queryParams.limit ? parseInt(queryParams.limit, 10) : undefined; - const offset = queryParams.offset ? parseInt(queryParams.offset, 10) : 0; - - processedData = applyPagination(processedData, { limit, offset }); - } - - processedData = this.extractData(filePath, processedData, 'https://adobe.com'); - return processedData; - } - - /** - * Fetches and processes a single file with caching of the final result - * @private - */ - async fetchAndProcessSingleFile(context, filePath, queryParams, llmoConfig) { - const { log, env, valkey } = context; - const { sheet } = context.data; - - // Get cache from context (initialized by valkeyClientWrapper) - const cache = valkey?.cache; - - // Generate cache key that includes all query parameters - const cacheKey = this.generateCacheKey(filePath, { ...queryParams, sheet }, llmoConfig); - - // Try to get processed result from cache first - const cacheStartTime = Date.now(); - const cachedResult = cache ? await cache.get(cacheKey) : null; - const cacheFetchTime = Date.now() - cacheStartTime; - - if (cachedResult) { - log.info(`✓ Processed result cache HIT for: ${cacheKey} (fetch time: ${cacheFetchTime}ms)`); - return { - data: cachedResult, - headers: {}, - }; - } - - // Cache miss - fetch raw data and process it - log.info(`✗ Processed result cache MISS for: ${cacheKey} (cache check time: ${cacheFetchTime}ms), fetching and processing`); - - const LLMO_SHEETDATA_SOURCE_URL = 'https://main--project-elmo-ui-data--adobe.aem.live'; - const url = new URL(`${LLMO_SHEETDATA_SOURCE_URL}/${llmoConfig.dataFolder}/${filePath}`); - - // Use a large limit to fetch all data from the source - // Pagination will be applied after sorting and filtering - url.searchParams.set('limit', '10000000'); - - // allow fetching a specific sheet from the sheet data source - if (sheet) { - url.searchParams.set('sheet', sheet); - } - - const urlAsString = url.toString(); - log.info(`Fetching single file with path: ${urlAsString}`); - - // Create an AbortController with a 60-second timeout for large data fetches - const controller = new AbortController(); - const timeoutId = setTimeout(() => controller.abort(), 60000); // 60 seconds - - // Start timing the source fetch - const sourceFetchStartTime = Date.now(); - - try { - // Fetch data from the external endpoint using the dataFolder from config - const response = await fetch(url.toString(), { - headers: { - Authorization: `token ${env.LLMO_HLX_API_KEY || 'hlx_api_key_missing'}`, - 'User-Agent': SPACECAT_USER_AGENT, - 'Accept-Encoding': 'br', - }, - signal: controller.signal, - }); - clearTimeout(timeoutId); - - if (!response.ok) { - log.error(`Failed to fetch data from external endpoint: ${response.status} ${response.statusText}`); - throw new Error(`External API returned ${response.status}: ${response.statusText}`); - } - - // Get the raw response data - const rawData = await response.json(); - const fetchTime = Date.now() - sourceFetchStartTime; - - log.info(`✓ Fetch from HELIX ${filePath}: ${fetchTime}ms`); - - // Process the data with all query parameters - const processStartTime = Date.now(); - const processedData = LlmoQuerySpecificCache.processData(filePath, rawData, queryParams); - const processTime = Date.now() - processStartTime; - - log.info(`✓ Data processing completed in ${processTime}ms`); - - // Cache the processed result (async, don't wait for it) - if (cache) { - cache.set(cacheKey, processedData).catch((error) => { - log.error(`Failed to cache processed data for ${cacheKey}: ${error.message}`); - }); - } - - return { - data: processedData, - headers: response.headers ? Object.fromEntries(response.headers.entries()) : {}, - }; - } catch (error) { - clearTimeout(timeoutId); - if (error.name === 'AbortError') { - log.error(`Request timeout after 60000ms for file: ${filePath}`); - throw new Error('Request timeout after 60000ms'); - } - throw error; - } - } - - /** - * Fetches and processes multiple files in parallel - * @private - */ - async fetchAndProcessMultipleFiles(context, files, queryParams, llmoConfig) { - const { log } = context; - - // Fetch and process all files in parallel - const fetchPromises = files.map(async (filePath) => { - try { - const { data } = await this.fetchAndProcessSingleFile( - context, - filePath, - queryParams, - llmoConfig, - ); - return { - path: filePath, - status: 'success', - data, - }; - } catch (error) { - log.error(`Error fetching and processing file ${filePath}: ${error.message}`); - return { - path: filePath, - status: 'error', - error: error.message, - }; - } - }); - - // Wait for all parallel fetches to complete - const results = await Promise.all(fetchPromises); - - return results; - } - - async query(context) { - const { log } = context; - const { - siteId, dataSource, sheetType, week, - } = context.params; - const { file, ...queryParams } = context.data; - - try { - const { llmoConfig } = await this.getSiteAndValidateLlmo(context); - - // Multi-file mode: if 'file' query param exists - if (file) { - const files = Array.isArray(file) ? file : [file]; - log.info(`Fetching and processing multiple files for siteId: ${siteId}, files: ${files.join(', ')}`); - - const results = await this.fetchAndProcessMultipleFiles( - context, - files, - queryParams, - llmoConfig, - ); - - return ok({ files: results }, { 'Content-Encoding': 'br' }); - } - - // Single-file mode: construct the sheet URL based on path parameters - let filePath; - if (sheetType && week) { - filePath = `${sheetType}/${week}/${dataSource}`; - } else if (sheetType) { - filePath = `${sheetType}/${dataSource}`; - } else { - filePath = dataSource; - } - - log.info(`Fetching and processing single file for siteId: ${siteId}, path: ${filePath}`); - const { data, headers } = await this.fetchAndProcessSingleFile( - context, - filePath, - queryParams, - llmoConfig, - ); - - // Return the processed data, pass through any compression headers from upstream - return ok(data, headers); - } catch (error) { - log.error(`Error proxying data for siteId: ${siteId}, error: ${error.message}`); - return badRequest(error.message); - } - } - - /** - * @private - */ - static extractData(filePath, weekData, siteBaseUrl) { - let allSheetRecords = []; - let competitorsRecords = []; - - // filePath contains something like brandpresence-all-W45-2025.json - // so build an object with the week and year from the filePath - // but watch out, the filePath contains a lot of characters before - // so do it with a regex - const weekAndYear = filePath.match(/brandpresence-all-w(\d{2})-(\d{4})/); - const week = weekAndYear[1]; - const year = weekAndYear[2]; - - // Get week information from weekData - const weekInfo = { week, year }; - const weekString = `${weekInfo.year}-W${weekInfo.week.toString().padStart(2, '0')}`; - Object.entries(weekData).forEach(([sheetName, sheetContent]) => { - // Extract from 'all' sheet - if (sheetName.includes('all')) { - const records = Array.isArray(sheetContent.data) ? sheetContent.data : []; - - // Enrich records in place so weekly trend calculations see the updated data - records.forEach((record) => { - // Add week information - // eslint-disable-next-line no-param-reassign - record.Week = weekString; - // eslint-disable-next-line no-param-reassign - record.week = weekString; - - // Parse sources field and create SourcesDetail - const { Prompt: prompt } = record; - if (prompt) { - // Parse semicolon-separated URLs from sources field - const sources = record.sources || record.Sources || ''; - const urls = LlmoQuerySpecificCache.parseSourcesUrls(sources); - - // Get competitor domains for content type determination - const competitorDomains = LlmoQuerySpecificCache.extractCompetitorDomains(record); - - // Create citations for each URL - const sourcesDetail = urls.map((url) => { - const contentType = siteBaseUrl - ? LlmoQuerySpecificCache.determineContentType(url, siteBaseUrl, competitorDomains) - : 'earned'; - - const brand = LlmoQuerySpecificCache.extractDomain(url) || ''; - - return { - url, - brand, - numTimesCited: 1, // Each occurrence counts as 1 citation - contentType, - week: weekString, - weekNumber: weekInfo.week, - year: weekInfo.year, - }; - }); - - // eslint-disable-next-line no-param-reassign - record.SourcesDetail = sourcesDetail; - - // Set sources_contain_branddomain based on whether any source is 'owned' - const hasOwnedSource = sourcesDetail.some((source) => source.contentType === 'owned'); - - // Always set the field based on our analysis (modifying in place!) - // eslint-disable-next-line no-param-reassign - record.sources_contain_branddomain = hasOwnedSource ? 'true' : 'false'; - // eslint-disable-next-line no-param-reassign - record['Sources Contain Brand Domain'] = hasOwnedSource ? 'true' : 'false'; - } - }); - - // Also collect them for the return value - allSheetRecords = allSheetRecords.concat(records); - } - - // Extract from 'brand_vs_competitors' sheet - if (sheetName.includes('brand_vs_competitors')) { - const records = Array.isArray(sheetContent.data) ? sheetContent.data : []; - // Add week information to each record (modifying in place) - records.forEach((record) => { - // eslint-disable-next-line no-param-reassign - record.Week = weekString; - // eslint-disable-next-line no-param-reassign - record.week = weekString; - }); - competitorsRecords = competitorsRecords.concat(records); - } - }); - - // eslint-disable-next-line no-param-reassign - weekData.all.data = allSheetRecords; - // eslint-disable-next-line no-param-reassign - weekData.brand_vs_competitors.data = competitorsRecords; - return weekData; - } - - /** - * @private - */ - static parseSourcesUrls(sources) { - if (!sources || typeof sources !== 'string') return []; - - return sources - .split(';') - .map((url) => url.trim()) - .filter((url) => url.length > 0) - .map((url) => LlmoQuerySpecificCache.normalizeUrl(url)); - } - - /** - * @private - */ - static extractCompetitorDomains(record) { - const competitors = []; - - // Extract from Business Competitors field only (semicolon separated) - if (record['Business Competitors'] || record.businessCompetitors) { - const businessCompetitors = record['Business Competitors'] || record.businessCompetitors; - if (typeof businessCompetitors === 'string') { - competitors.push(...businessCompetitors.split(';').map((c) => c.trim()).filter((c) => c.length > 0)); - } - } - - // Deduplicate - return [...new Set(competitors)]; - } - - /** - * @private - */ - static determineContentType(url, siteBaseUrl, competitorNames) { - // Priority 1: Check if owned - if (LlmoQuerySpecificCache.isOwnedUrl(url, siteBaseUrl)) { - return 'owned'; - } - - // Priority 2: Check if competitor/others - if (competitorNames && LlmoQuerySpecificCache.isCompetitorUrl(url, competitorNames)) { - return 'others'; - } - - // Priority 3: Check if social media - if (LlmoQuerySpecificCache.isSocialMediaUrl(url)) { - return 'social'; - } - - // Default: earned (third-party content) - return 'earned'; - } - - /** - * @private - */ - static extractDomain(url) { - try { - const urlObj = new URL(url); - return urlObj.hostname.replace(/^www\./, ''); - } catch { - return null; - } - } - - /** - * @private - */ - static normalizeUrl(url) { - if (!url || typeof url !== 'string') return url; - - let normalized = url.trim(); - - // Check if this is a path (starts with /) or a full URL - const isPath = normalized.startsWith('/'); - - if (isPath) { - // For paths, just strip query params with regex (preserve fragments) - normalized = normalized.replace(/\?[^#]*/, ''); - } else { - // For full URLs, use URL object for proper parsing - try { - const urlObj = new URL(normalized.startsWith('http') ? normalized : `https://${normalized}`); - - // Clear all search params but keep hash/fragment - urlObj.search = ''; - - // Add www. to bare domains (not subdomains like helpx.adobe.com) - const { subdomain } = parse(urlObj.hostname); - if (!subdomain && !urlObj.hostname.startsWith('www.')) { - urlObj.hostname = `www.${urlObj.hostname}`; - } - - normalized = urlObj.toString(); - } catch { - // If URL parsing fails, use fallback approach - // Remove query params: everything between ? and # (or end of string if no #) - normalized = normalized.replace(/\?[^#]*/, ''); - } - } - - // Remove trailing slash, except for root paths - if (normalized.length > 1 && normalized.endsWith('/')) { - normalized = normalized.slice(0, -1); - } - - // Normalize protocol to lowercase (only for full URLs) - if (!isPath) { - if (normalized.startsWith('HTTP://')) { - normalized = `http://${normalized.slice(7)}`; - } else if (normalized.startsWith('HTTPS://')) { - normalized = `https://${normalized.slice(8)}`; - } - } - - return normalized; - } - - /** - * @private - */ - static isOwnedUrl(url, siteBaseUrl) { - try { - const urlObj = new URL(url.startsWith('http') ? url : `https://${url}`); - const siteObj = new URL(siteBaseUrl); - - const urlHostname = urlObj.hostname.replace(/^www\./, '').toLowerCase(); - const siteHostname = siteObj.hostname.replace(/^www\./, '').toLowerCase(); - - // Check if URL hostname matches or is a subdomain of site hostname - return urlHostname === siteHostname || urlHostname.endsWith(`.${siteHostname}`); - } catch { - return false; - } - } - - /** - * @private - */ - static isCompetitorUrl(url, competitorNames) { - if (!competitorNames || competitorNames.length === 0) return false; - - try { - const urlObj = new URL(url.startsWith('http') ? url : `https://${url}`); - const urlHostname = urlObj.hostname.replace(/^www\./, '').toLowerCase(); - - // Check if any competitor name appears in the domain - return competitorNames.some((competitorName) => { - const nameLower = competitorName.toLowerCase().trim(); - return urlHostname.includes(nameLower); - }); - } catch { - return false; - } - } - - /** - * @private - */ - static isSocialMediaUrl(url) { - const SOCIAL_MEDIA_DOMAINS = [ - 'twitter.com', - 'x.com', - 'facebook.com', - 'linkedin.com', - 'instagram.com', - 'youtube.com', - 'tiktok.com', - 'reddit.com', - 'pinterest.com', - 'snapchat.com', - 'discord.com', - 'twitch.tv', - 'medium.com', - 'quora.com', - 'tumblr.com', - 'vimeo.com', - ]; - - try { - const urlObj = new URL(url.startsWith('http') ? url : `https://${url}`); - const hostname = urlObj.hostname.replace(/^www\./, '').toLowerCase(); - return SOCIAL_MEDIA_DOMAINS.some((d) => hostname === d || hostname.endsWith(`.${d}`)); - } catch { - return false; - } - } -} diff --git a/src/controllers/llmo/llmo-query.js b/src/controllers/llmo/llmo-query.js deleted file mode 100644 index eca1054b4..000000000 --- a/src/controllers/llmo/llmo-query.js +++ /dev/null @@ -1,285 +0,0 @@ -/* - * Copyright 2025 Adobe. All rights reserved. - * This file is licensed to you under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. You may obtain a copy - * of the License at http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software distributed under - * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS - * OF ANY KIND, either express or implied. See the License for the specific language - * governing permissions and limitations under the License. - */ - -import { ok, badRequest } from '@adobe/spacecat-shared-http-utils'; -import { SPACECAT_USER_AGENT, tracingFetch as fetch } from '@adobe/spacecat-shared-utils'; -import { - applyFilters, applyInclusions, applySort, applyPagination, -} from './llmo-utils.js'; - -export default class LlmoQueryFileCache { - constructor(getSiteAndValidateLlmo) { - this.getSiteAndValidateLlmo = getSiteAndValidateLlmo; - } - - /** - * Fetches a single file from the external endpoint with caching - * @private - */ - // eslint-disable-next-line class-methods-use-this - async fetchSingleFile(context, filePath, llmoConfig) { - const { log, env, valkey } = context; - const { sheet } = context.data; - - // Get cache from context (initialized by valkeyClientWrapper) - const cache = valkey?.cache; - - // Construct cache key (includes dataFolder and filePath, optionally sheet) - const cacheFilePath = sheet - ? `${llmoConfig.dataFolder}/${filePath}?sheet=${sheet}` - : `${llmoConfig.dataFolder}/${filePath}`; - - // Try to get from cache first - const cacheStartTime = Date.now(); - const cachedData = cache ? await cache.get(cacheFilePath) : null; - const cacheFetchTime = Date.now() - cacheStartTime; - - if (cachedData) { - log.info(`✓ Fetch from cache HIT for file: ${cacheFilePath} (fetch time: ${cacheFetchTime}ms)`); - return { - data: cachedData, - headers: {}, - }; - } - - // Cache miss - fetch from source - log.info(`✗ Cache MISS for file: ${cacheFilePath} (cache check time: ${cacheFetchTime}ms), fetching from source`); - - const LLMO_SHEETDATA_SOURCE_URL = 'https://main--project-elmo-ui-data--adobe.aem.live'; - const url = new URL(`${LLMO_SHEETDATA_SOURCE_URL}/${llmoConfig.dataFolder}/${filePath}`); - - // Use a large limit to fetch all data from the source - // Pagination will be applied after sorting and filtering - url.searchParams.set('limit', '1000000'); - - // allow fetching a specific sheet from the sheet data source - if (sheet) { - url.searchParams.set('sheet', sheet); - } - - const urlAsString = url.toString(); - log.info(`Fetching single file with path: ${urlAsString}`); - - // Create an AbortController with a 60-second timeout for large data fetches - const controller = new AbortController(); - const timeoutId = setTimeout(() => controller.abort(), 60000); // 60 seconds - - // Start timing the source fetch - const sourceFetchStartTime = Date.now(); - - try { - // Fetch data from the external endpoint using the dataFolder from config - const response = await fetch(url.toString(), { - headers: { - Authorization: `token ${env.LLMO_HLX_API_KEY || 'hlx_api_key_missing'}`, - 'User-Agent': SPACECAT_USER_AGENT, - 'Accept-Encoding': 'br', - }, - signal: controller.signal, - }); - clearTimeout(timeoutId); - - if (!response.ok) { - log.error(`Failed to fetch data from external endpoint: ${response.status} ${response.statusText}`); - throw new Error(`External API returned ${response.status}: ${response.statusText}`); - } - - // Get the response data - const data = await response.json(); - const totalFetchTime = Date.now() - sourceFetchStartTime; - - log.info(`✓ Fetch from HELIX ${cacheFilePath}: ${totalFetchTime}ms)`); - - // Cache the raw data (async, don't wait for it) - if (cache) { - cache.set(cacheFilePath, data).catch((error) => { - log.error(`Failed to cache data for ${cacheFilePath}: ${error.message}`); - }); - } - - return { - data, - headers: response.headers ? Object.fromEntries(response.headers.entries()) : {}, - }; - } catch (error) { - clearTimeout(timeoutId); - if (error.name === 'AbortError') { - log.error(`Request timeout after 60000ms for file: ${cacheFilePath}`); - throw new Error('Request timeout after 60000ms'); - } - throw error; - } - } - - /** - * Fetches multiple files from the external endpoint - * @private - */ - async fetchMultipleFiles(context, files, llmoConfig) { - const { log } = context; - const results = []; - - // Fetch all files in parallel - const fetchPromises = files.map(async (filePath) => { - try { - const { data } = await this.fetchSingleFile(context, filePath, llmoConfig); - return { - path: filePath, - status: 'success', - data, - }; - } catch (error) { - log.error(`Error fetching file ${filePath}: ${error.message}`); - return { - path: filePath, - status: 'error', - error: error.message, - }; - } - }); - - const fetchedResults = await Promise.all(fetchPromises); - results.push(...fetchedResults); - - return results; - } - - /** - * Processes data by applying filters and inclusions based on query parameters - * @private - */ - static processData(data, queryParams) { - let processedData = data; - - // Apply sheet filtering if provided (e.g., ?sheets=sheet1,sheet2) - if (queryParams.sheets && processedData[':type'] === 'multi-sheet') { - const requestedSheets = Array.isArray(queryParams.sheets) - ? queryParams.sheets - : queryParams.sheets.split(',').map((sheet) => sheet.trim()); - - // Create a new data object with only the requested sheets - const filteredData = { ':type': 'multi-sheet' }; - requestedSheets.forEach((sheetName) => { - if (processedData[sheetName]) { - filteredData[sheetName] = processedData[sheetName]; - } - }); - processedData = filteredData; - } - - // Apply filters if provided (e.g., ?filter.status=active&filter.type=premium) - const filterFields = {}; - Object.keys(queryParams).forEach((key) => { - if (key.startsWith('filter.')) { - const fieldName = key.substring(7); // Remove 'filter.' prefix - filterFields[fieldName] = queryParams[key]; - } - }); - - if (Object.keys(filterFields).length > 0) { - processedData = applyFilters(processedData, filterFields); - } - - // Apply inclusions if provided (e.g., ?include=field1,field2,field3) - if (queryParams.include) { - const includeFields = Array.isArray(queryParams.include) - ? queryParams.include - : queryParams.include.split(',').map((field) => field.trim()); - processedData = applyInclusions(processedData, includeFields); - } - - // Apply sorting if provided (e.g., ?sort=field:asc or ?sort=field:desc) - if (queryParams.sort) { - const sortParam = Array.isArray(queryParams.sort) - ? queryParams.sort[0] - : queryParams.sort; - const [field, order = 'asc'] = sortParam.split(':').map((s) => s.trim()); - - // Validate order is either 'asc' or 'desc' - const sortOrder = order.toLowerCase() === 'desc' ? 'desc' : 'asc'; - - processedData = applySort(processedData, { field, order: sortOrder }); - } - - // Apply pagination (limit and offset) as the final step - // This ensures pagination is applied after all filtering and sorting - if (queryParams.limit || queryParams.offset) { - const limit = queryParams.limit ? parseInt(queryParams.limit, 10) : undefined; - const offset = queryParams.offset ? parseInt(queryParams.offset, 10) : 0; - - processedData = applyPagination(processedData, { limit, offset }); - } - - return processedData; - } - - async query(context) { - const { log } = context; - const { - siteId, dataSource, sheetType, week, - } = context.params; - const { file, ...queryParams } = context.data; - - try { - const { llmoConfig } = await this.getSiteAndValidateLlmo(context); - - // Multi-file mode: if 'file' query param exists - if (file) { - const files = Array.isArray(file) ? file : [file]; - log.info(`Fetching multiple files for siteId: ${siteId}, files: ${files.join(', ')}`); - - const fetchStartTime = Date.now(); - const results = await this.fetchMultipleFiles(context, files, llmoConfig); - const fetchDuration = Date.now() - fetchStartTime; - log.info(`✓ fetchMultipleFiles completed in ${fetchDuration}ms for ${files.length} file(s)`); - - // Apply filters and inclusions to each file's data - const processingStartTime = Date.now(); - const processedResults = results.map((result) => { - if (result.status === 'success' && result.data) { - return { - ...result, - data: LlmoQueryFileCache.processData(result.data, queryParams), - }; - } - return result; - }); - const processingDuration = Date.now() - processingStartTime; - log.info(`✓ Processing completed in ${processingDuration}ms for ${results.length} file(s)`); - - return ok({ files: processedResults }, { 'Content-Encoding': 'br' }); - } - - // Single-file mode: construct the sheet URL based on path parameters - let filePath; - if (sheetType && week) { - filePath = `${sheetType}/${week}/${dataSource}`; - } else if (sheetType) { - filePath = `${sheetType}/${dataSource}`; - } else { - filePath = dataSource; - } - - log.info(`Fetching single file for siteId: ${siteId}, path: ${filePath}`); - const { data, headers } = await this.fetchSingleFile(context, filePath, llmoConfig); - - // Apply filters and inclusions to the data - const processedData = LlmoQueryFileCache.processData(data, queryParams); - - // Return the processed data, pass through any compression headers from upstream - return ok(processedData, headers); - } catch (error) { - log.error(`Error proxying data for siteId: ${siteId}, error: ${error.message}`); - return badRequest(error.message); - } - } -} From ae2cde159fe75528f0a60072168ea2a8474c6c48 Mon Sep 17 00:00:00 2001 From: Char Date: Tue, 11 Nov 2025 20:10:07 +0100 Subject: [PATCH 31/38] Implement single endpoint for llmo cache. Include unit tests --- src/controllers/llmo/llmo-cache-handler.js | 274 ++++++ src/controllers/llmo/llmo-utils.js | 3 + src/controllers/llmo/llmo.js | 62 +- src/routes/index.js | 4 +- .../llmo/llmo-cache-handler.test.js | 874 ++++++++++++++++++ test/controllers/llmo/llmo.test.js | 62 ++ test/routes/index.test.js | 13 +- test/support/valkey.test.js | 451 +++++++++ 8 files changed, 1679 insertions(+), 64 deletions(-) create mode 100644 src/controllers/llmo/llmo-cache-handler.js create mode 100644 test/controllers/llmo/llmo-cache-handler.test.js create mode 100644 test/support/valkey.test.js diff --git a/src/controllers/llmo/llmo-cache-handler.js b/src/controllers/llmo/llmo-cache-handler.js new file mode 100644 index 000000000..6322668dc --- /dev/null +++ b/src/controllers/llmo/llmo-cache-handler.js @@ -0,0 +1,274 @@ +/* + * Copyright 2025 Adobe. All rights reserved. + * This file is licensed to you under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. You may obtain a copy + * of the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under + * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS + * OF ANY KIND, either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ + +import { SPACECAT_USER_AGENT, tracingFetch as fetch } from '@adobe/spacecat-shared-utils'; +import { + applyFilters, + applyInclusions, + applySort, + applyPagination, + LLMO_SHEETDATA_SOURCE_URL, +} from './llmo-utils.js'; + +const generateCacheKey = (llmoConfig, filePath, queryParams) => { + const { dataFolder } = llmoConfig; + + // Sort query params to ensure consistent cache keys + const sortedParams = {}; + Object.keys(queryParams) + .sort() + .forEach((key) => { + sortedParams[key] = queryParams[key]; + }); + + // Create a string representation of the query params + const paramsString = JSON.stringify(sortedParams); + + // Combine dataFolder, filePath, and query params into a single cache key + return `${dataFolder}/${filePath}:${paramsString}`; +}; + +const processData = (data, queryParams) => { + let processedData = data; + + // Apply sheet filtering if provided (e.g., ?sheets=sheet1,sheet2) + if (queryParams.sheets && processedData[':type'] === 'multi-sheet') { + const requestedSheets = Array.isArray(queryParams.sheets) + ? queryParams.sheets + : queryParams.sheets.split(',').map((sheet) => sheet.trim()); + + // Create a new data object with only the requested sheets + const filteredData = { ':type': 'multi-sheet' }; + requestedSheets.forEach((sheetName) => { + if (processedData[sheetName]) { + filteredData[sheetName] = processedData[sheetName]; + } + }); + processedData = filteredData; + } + + // Apply filters if provided (e.g., ?filter.status=active&filter.type=premium) + const filterFields = {}; + Object.keys(queryParams).forEach((key) => { + if (key.startsWith('filter.')) { + const fieldName = key.substring(7); // Remove 'filter.' prefix + filterFields[fieldName] = queryParams[key]; + } + }); + + if (Object.keys(filterFields).length > 0) { + processedData = applyFilters(processedData, filterFields); + } + + // Apply inclusions if provided (e.g., ?include=field1,field2,field3) + if (queryParams.include) { + const includeFields = Array.isArray(queryParams.include) + ? queryParams.include + : queryParams.include.split(',').map((field) => field.trim()); + processedData = applyInclusions(processedData, includeFields); + } + + // Apply sorting if provided (e.g., ?sort=field:asc or ?sort=field:desc) + if (queryParams.sort) { + const sortParam = Array.isArray(queryParams.sort) + ? queryParams.sort[0] + : queryParams.sort; + const [field, order = 'asc'] = sortParam.split(':').map((s) => s.trim()); + + // Validate order is either 'asc' or 'desc' + const sortOrder = order.toLowerCase() === 'desc' ? 'desc' : 'asc'; + + processedData = applySort(processedData, { field, order: sortOrder }); + } + + // Apply pagination (limit and offset) as the final step + // This ensures pagination is applied after all filtering and sorting + if (queryParams.limit || queryParams.offset) { + const limit = queryParams.limit ? parseInt(queryParams.limit, 10) : undefined; + const offset = queryParams.offset ? parseInt(queryParams.offset, 10) : 0; + + processedData = applyPagination(processedData, { limit, offset }); + } + return processedData; +}; + +const fetchAndProcessSingleFile = async (context, llmoConfig, filePath, queryParams) => { + const { log, env, valkey } = context; + const { sheet } = context.data; + + // Get cache from context (initialized by valkeyClientWrapper) + const cache = valkey?.cache; + + // Generate cache key that includes all query parameters + const cacheKey = generateCacheKey(llmoConfig, filePath, { ...queryParams, sheet }); + + // Try to get processed result from cache first + const cacheStartTime = Date.now(); + const cachedResult = cache ? await cache.get(cacheKey) : null; + const cacheFetchTime = Date.now() - cacheStartTime; + + if (cachedResult) { + log.info(`✓ Processed result cache HIT for: ${cacheKey} (fetch time: ${cacheFetchTime}ms)`); + return { + data: cachedResult, + headers: {}, + }; + } + + // Cache miss - fetch raw data and process it + log.info(`✗ Processed result cache MISS for: ${cacheKey} (cache check time: ${cacheFetchTime}ms), fetching and processing`); + + const url = new URL(`${LLMO_SHEETDATA_SOURCE_URL}/${llmoConfig.dataFolder}/${filePath}`); + + // Use a large limit to fetch all data from the source + // Pagination will be applied after sorting and filtering + url.searchParams.set('limit', '10000000'); + + // allow fetching a specific sheet from the sheet data source + if (sheet) { + url.searchParams.set('sheet', sheet); + } + + const urlAsString = url.toString(); + log.info(`Fetching single file with path: ${urlAsString}`); + + // Create an AbortController with a 60-second timeout for large data fetches + const controller = new AbortController(); + const timeoutId = setTimeout(() => controller.abort(), 60000); // 60 seconds + + // Start timing the source fetch + const sourceFetchStartTime = Date.now(); + + try { + // Fetch data from the external endpoint using the dataFolder from config + const response = await fetch(url.toString(), { + headers: { + Authorization: `token ${env.LLMO_HLX_API_KEY || 'hlx_api_key_missing'}`, + 'User-Agent': SPACECAT_USER_AGENT, + 'Accept-Encoding': 'br', + }, + signal: controller.signal, + }); + clearTimeout(timeoutId); + + if (!response.ok) { + log.error(`Failed to fetch data from external endpoint: ${response.status} ${response.statusText}`); + throw new Error(`External API returned ${response.status}: ${response.statusText}`); + } + + // Get the raw response data + const rawData = await response.json(); + const fetchTime = Date.now() - sourceFetchStartTime; + + log.info(`✓ Fetch from HELIX ${filePath}: ${fetchTime}ms`); + + // Process the data with all query parameters + const processStartTime = Date.now(); + const processedData = processData(rawData, queryParams); + const processTime = Date.now() - processStartTime; + + log.info(`✓ Data processing completed in ${processTime}ms`); + + // Cache the processed result (async, don't wait for it) + if (cache) { + cache.set(cacheKey, processedData).catch((error) => { + log.error(`Failed to cache processed data for ${cacheKey}: ${error.message}`); + }); + } + + return { + data: processedData, + headers: response.headers ? Object.fromEntries(response.headers.entries()) : {}, + }; + } catch (error) { + clearTimeout(timeoutId); + if (error.name === 'AbortError') { + log.error(`Request timeout after 60000ms for file: ${filePath}`); + throw new Error('Request timeout after 60000ms'); + } + throw error; + } +}; + +const fetchAndProcessMultipleFiles = async (context, llmoConfig, files, queryParams) => { + const { log } = context; + + // Fetch and process all files in parallel + const fetchPromises = files.map(async (filePath) => { + try { + const { data } = await fetchAndProcessSingleFile( + context, + llmoConfig, + filePath, + queryParams, + ); + return { + path: filePath, + status: 'success', + data, + }; + } catch (error) { + log.error(`Error fetching and processing file ${filePath}: ${error.message}`); + return { + path: filePath, + status: 'error', + error: error.message, + }; + } + }); + + // Wait for all parallel fetches to complete + const results = await Promise.all(fetchPromises); + + return results; +}; + +export const queryLlmoWithCache = async (context, llmoConfig) => { + const { log } = context; + const { + siteId, dataSource, sheetType, week, + } = context.params; + const { file, ...queryParams } = context.data; + + // Multi-file mode: if 'file' query param exists + if (file) { + const files = Array.isArray(file) ? file : [file]; + log.info(`Fetching and processing multiple files for siteId: ${siteId}, files: ${files.join(', ')}`); + + const results = await fetchAndProcessMultipleFiles( + context, + llmoConfig, + files, + queryParams, + ); + + return { data: results, headers: { 'Content-Encoding': 'br' } }; + } + + // Single-file mode: construct the sheet URL based on path parameters + let filePath; + if (sheetType && week) { + filePath = `${sheetType}/${week}/${dataSource}`; + } else if (sheetType) { + filePath = `${sheetType}/${dataSource}`; + } else { + filePath = dataSource; + } + + log.info(`Fetching and processing single file for siteId: ${siteId}, path: ${filePath}`); + return fetchAndProcessSingleFile( + context, + llmoConfig, + filePath, + queryParams, + ); +}; diff --git a/src/controllers/llmo/llmo-utils.js b/src/controllers/llmo/llmo-utils.js index 0b72b438e..8c0440c05 100644 --- a/src/controllers/llmo/llmo-utils.js +++ b/src/controllers/llmo/llmo-utils.js @@ -10,6 +10,9 @@ * governing permissions and limitations under the License. */ +// LLMO constants +export const LLMO_SHEETDATA_SOURCE_URL = 'https://main--project-elmo-ui-data--adobe.aem.live'; + // Apply filters to data arrays with case-insensitive exact matching export const applyFilters = (rawData, filterFields) => { const data = { ...rawData }; diff --git a/src/controllers/llmo/llmo.js b/src/controllers/llmo/llmo.js index 53a885b7f..e7438f009 100644 --- a/src/controllers/llmo/llmo.js +++ b/src/controllers/llmo/llmo.js @@ -32,6 +32,7 @@ import { applyExclusions, applyGroups, applyMappings, + LLMO_SHEETDATA_SOURCE_URL, } from './llmo-utils.js'; import { LLMO_SHEET_MAPPINGS } from './llmo-mappings.js'; import { @@ -40,18 +41,15 @@ import { performLlmoOnboarding, performLlmoOffboarding, } from './llmo-onboarding.js'; -import LlmoQuerySpecificCache from './llmo-query-specific.js'; -import LlmoQueryFileCache from './llmo-query.js'; +import { queryLlmoWithCache } from './llmo-cache-handler.js'; const { readConfig, writeConfig } = llmo; const { llmoConfig: llmoConfigSchema } = schemas; -const LLMO_SHEETDATA_SOURCE_URL = 'https://main--project-elmo-ui-data--adobe.aem.live'; - function LlmoController(ctx) { const accessControlUtil = AccessControlUtil.fromContext(ctx); - // Helper function to get site and validate LLMO confign + // Helper function to get site and validate LLMO config const getSiteAndValidateLlmo = async (context) => { const { siteId } = context.params; const { dataAccess } = context; @@ -869,53 +867,15 @@ function LlmoController(ctx) { } }; - const queryFile = async (context) => { - const llmoQuery = new LlmoQueryFileCache(getSiteAndValidateLlmo); - return llmoQuery.query(context); - }; - - const querySpecific = async (context) => { - const llmoQuery = new LlmoQuerySpecificCache(getSiteAndValidateLlmo); - return llmoQuery.query(context); - }; - - /** - * Clears all LLMO cache entries from Valkey. - * This endpoint handles DELETE requests to clear the entire cache. - * @param {object} context - The request context. - * @returns {Promise} The cache clear response. - */ - const clearCache = async (context) => { + const queryWithCache = async (context) => { const { log } = context; - + const { siteId } = context.params; try { - // Validate LLMO access - await getSiteAndValidateLlmo(context); - - // Check if Valkey cache is available - if (!context.valkey || !context.valkey.cache) { - return badRequest('Cache is not configured for this environment'); - } - - log.info('Starting cache clear operation'); - - // Clear all cache entries - const result = await context.valkey.cache.clearAll(); - - if (!result.success) { - log.error('Failed to clear cache'); - return badRequest('Failed to clear cache'); - } - - log.info(`Successfully cleared ${result.deletedCount} cache entries`); - - return ok({ - message: 'Cache cleared successfully', - deletedCount: result.deletedCount, - clearedAt: new Date().toISOString(), - }); + const { llmoConfig } = await getSiteAndValidateLlmo(context); + const { data, headers } = await queryLlmoWithCache(context, llmoConfig); + return ok(data, headers); } catch (error) { - log.error(`Error clearing cache: ${error.message}`); + log.error(`Error during LLMO cached query for site ${siteId}: ${error.message}`); return badRequest(error.message); } }; @@ -938,9 +898,7 @@ function LlmoController(ctx) { updateLlmoConfig, onboardCustomer, offboardCustomer, - queryFile, - querySpecific, - clearCache, + queryWithCache, }; } diff --git a/src/routes/index.js b/src/routes/index.js index 350603a67..a290cc28c 100644 --- a/src/routes/index.js +++ b/src/routes/index.js @@ -270,8 +270,7 @@ export default function getRouteHandlers( 'GET /sites/:siteId/llmo/sheet-data/:dataSource': llmoController.getLlmoSheetData, 'GET /sites/:siteId/llmo/sheet-data/:sheetType/:dataSource': llmoController.getLlmoSheetData, 'GET /sites/:siteId/llmo/sheet-data/:sheetType/:week/:dataSource': llmoController.getLlmoSheetData, - 'GET /sites/:siteId/llmo/query-file': llmoController.queryFile, - 'GET /sites/:siteId/llmo/query-specific': llmoController.querySpecific, + 'GET /sites/:siteId/llmo/cache': llmoController.queryWithCache, 'POST /sites/:siteId/llmo/sheet-data/:dataSource': llmoController.queryLlmoSheetData, 'POST /sites/:siteId/llmo/sheet-data/:sheetType/:dataSource': llmoController.queryLlmoSheetData, 'POST /sites/:siteId/llmo/sheet-data/:sheetType/:week/:dataSource': llmoController.queryLlmoSheetData, @@ -289,7 +288,6 @@ export default function getRouteHandlers( 'PATCH /sites/:siteId/llmo/cdn-logs-filter': llmoController.patchLlmoCdnLogsFilter, 'PATCH /sites/:siteId/llmo/cdn-logs-bucket-config': llmoController.patchLlmoCdnBucketConfig, 'GET /sites/:siteId/llmo/global-sheet-data/:configName': llmoController.getLlmoGlobalSheetData, - 'DELETE /sites/:siteId/llmo/cache': llmoController.clearCache, 'POST /llmo/onboard': llmoController.onboardCustomer, 'POST /sites/:siteId/llmo/offboard': llmoController.offboardCustomer, diff --git a/test/controllers/llmo/llmo-cache-handler.test.js b/test/controllers/llmo/llmo-cache-handler.test.js new file mode 100644 index 000000000..7ec335195 --- /dev/null +++ b/test/controllers/llmo/llmo-cache-handler.test.js @@ -0,0 +1,874 @@ +/* + * Copyright 2025 Adobe. All rights reserved. + * This file is licensed to you under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. You may obtain a copy + * of the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under + * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS + * OF ANY KIND, either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ + +/* eslint-env mocha */ +import { expect, use } from 'chai'; +import sinon from 'sinon'; +import sinonChai from 'sinon-chai'; +import esmock from 'esmock'; + +use(sinonChai); + +describe('llmo-cache-handler', () => { + let queryLlmoWithCache; + let tracingFetchStub; + let mockContext; + let mockLlmoConfig; + let mockLog; + let mockCache; + + const TEST_SITE_ID = 'test-site-id'; + const TEST_DATA_FOLDER = 'test-data-folder'; + const TEST_DATA_SOURCE = 'test-data-source'; + const TEST_LLMO_API_KEY = 'test-llmo-api-key'; + + // Common test data + const createSheetData = (items) => ({ + ':type': 'sheet', + data: items, + }); + + const createMultiSheetData = (sheets) => ({ + ':type': 'multi-sheet', + ...sheets, + }); + + const createMockResponse = (data, ok = true, status = 200) => ({ + ok, + status, + statusText: ok ? 'OK' : 'Internal Server Error', + json: sinon.stub().resolves(data), + headers: new Map([['content-type', 'application/json']]), + }); + + // Helper to setup cache miss and fetch stub + const setupFetchTest = (data) => { + mockCache.get.resolves(null); + tracingFetchStub.resolves(createMockResponse(data)); + }; + + // Helper to get fetch URL from stub + const getFetchUrl = () => tracingFetchStub.getCall(0).args[0]; + + // Helper to get fetch options from stub + const getFetchOptions = () => tracingFetchStub.getCall(0).args[1]; + + beforeEach(async () => { + mockLog = { + info: sinon.stub(), + error: sinon.stub(), + warn: sinon.stub(), + debug: sinon.stub(), + }; + + mockCache = { + get: sinon.stub().resolves(null), + set: sinon.stub().resolves(true), + }; + + mockLlmoConfig = { + dataFolder: TEST_DATA_FOLDER, + }; + + mockContext = { + log: mockLog, + env: { + LLMO_HLX_API_KEY: TEST_LLMO_API_KEY, + }, + params: { + siteId: TEST_SITE_ID, + dataSource: TEST_DATA_SOURCE, + }, + data: {}, + valkey: { + cache: mockCache, + }, + }; + + tracingFetchStub = sinon.stub(); + + const module = await esmock('../../../src/controllers/llmo/llmo-cache-handler.js', { + '@adobe/spacecat-shared-utils': { + SPACECAT_USER_AGENT: 'test-user-agent', + tracingFetch: tracingFetchStub, + }, + }); + + queryLlmoWithCache = module.queryLlmoWithCache; + }); + + afterEach(() => { + sinon.restore(); + }); + + describe('queryLlmoWithCache - Single File Mode', () => { + it('should return cached data when cache hit occurs', async () => { + const cachedData = { + ':type': 'sheet', + data: [ + { id: 1, name: 'Cached Item 1' }, + { id: 2, name: 'Cached Item 2' }, + ], + }; + + mockCache.get.resolves(cachedData); + + const result = await queryLlmoWithCache(mockContext, mockLlmoConfig); + + expect(result.data).to.deep.equal(cachedData); + expect(result.headers).to.be.an('object'); + expect(mockCache.get).to.have.been.calledOnce; + expect(tracingFetchStub).to.not.have.been.called; + expect(mockLog.info).to.have.been.calledWith( + sinon.match(/Processed result cache HIT/), + ); + }); + + it('should fetch and process data when cache miss occurs', async () => { + const rawData = createSheetData([ + { id: 1, name: 'Fetched Item 1' }, + { id: 2, name: 'Fetched Item 2' }, + ]); + + setupFetchTest(rawData); + + const result = await queryLlmoWithCache(mockContext, mockLlmoConfig); + + expect(result.data).to.deep.equal(rawData); + expect(tracingFetchStub).to.have.been.calledOnce; + expect(mockCache.set).to.have.been.calledOnce; + expect(mockLog.info).to.have.been.calledWith( + sinon.match(/Processed result cache MISS/), + ); + expect(mockLog.info).to.have.been.calledWith( + sinon.match(/Fetch from HELIX/), + ); + }); + + it('should construct correct URL for single file', async () => { + setupFetchTest(createSheetData([])); + + await queryLlmoWithCache(mockContext, mockLlmoConfig); + + const fetchUrl = getFetchUrl(); + expect(fetchUrl).to.include(TEST_DATA_FOLDER); + expect(fetchUrl).to.include(TEST_DATA_SOURCE); + expect(fetchUrl).to.include('limit=10000000'); + }); + + it('should construct correct URL with sheetType and week', async () => { + setupFetchTest(createSheetData([])); + + mockContext.params = { + ...mockContext.params, + sheetType: 'weekly', + week: '2025-W01', + }; + + await queryLlmoWithCache(mockContext, mockLlmoConfig); + + expect(getFetchUrl()).to.include('weekly/2025-W01/test-data-source'); + }); + + it('should construct correct URL with sheetType only', async () => { + setupFetchTest(createSheetData([])); + + mockContext.params = { + ...mockContext.params, + sheetType: 'monthly', + }; + + await queryLlmoWithCache(mockContext, mockLlmoConfig); + + expect(getFetchUrl()).to.include('monthly/test-data-source'); + }); + + it('should include sheet parameter in URL when provided', async () => { + setupFetchTest(createSheetData([])); + + mockContext.data = { + sheet: 'products', + }; + + await queryLlmoWithCache(mockContext, mockLlmoConfig); + + expect(getFetchUrl()).to.include('sheet=products'); + }); + + it('should handle fetch errors gracefully', async () => { + mockCache.get.resolves(null); + tracingFetchStub.rejects(new Error('Network error')); + + await expect( + queryLlmoWithCache(mockContext, mockLlmoConfig), + ).to.be.rejectedWith('Network error'); + }); + + it('should handle non-OK HTTP responses', async () => { + mockCache.get.resolves(null); + tracingFetchStub.resolves(createMockResponse({}, false, 500)); + + await expect( + queryLlmoWithCache(mockContext, mockLlmoConfig), + ).to.be.rejectedWith('External API returned 500'); + expect(mockLog.error).to.have.been.calledWith( + sinon.match(/Failed to fetch data from external endpoint/), + ); + }); + + it('should handle timeout errors', async () => { + mockCache.get.resolves(null); + const abortError = new Error('The operation was aborted'); + abortError.name = 'AbortError'; + tracingFetchStub.rejects(abortError); + + await expect( + queryLlmoWithCache(mockContext, mockLlmoConfig), + ).to.be.rejectedWith('Request timeout after 60000ms'); + expect(mockLog.error).to.have.been.calledWith( + sinon.match(/Request timeout after 60000ms/), + ); + }); + + it('should work without cache (valkey not available)', async () => { + const rawData = createSheetData([{ id: 1, name: 'Item 1' }]); + + mockContext.valkey = null; + tracingFetchStub.resolves(createMockResponse(rawData)); + + const result = await queryLlmoWithCache(mockContext, mockLlmoConfig); + + expect(result.data).to.deep.equal(rawData); + expect(tracingFetchStub).to.have.been.calledOnce; + }); + + it('should handle cache.set errors gracefully', async () => { + const rawData = createSheetData([]); + mockCache.get.resolves(null); + mockCache.set.rejects(new Error('Cache set failed')); + tracingFetchStub.resolves(createMockResponse(rawData)); + + const result = await queryLlmoWithCache(mockContext, mockLlmoConfig); + + expect(result.data).to.deep.equal(rawData); + // The function should not throw - cache.set errors are logged but not propagated + }); + + it('should include Authorization header with API key', async () => { + setupFetchTest(createSheetData([])); + + await queryLlmoWithCache(mockContext, mockLlmoConfig); + + expect(getFetchOptions().headers.Authorization).to.equal(`token ${TEST_LLMO_API_KEY}`); + }); + + it('should handle missing API key', async () => { + setupFetchTest(createSheetData([])); + mockContext.env.LLMO_HLX_API_KEY = undefined; + + await queryLlmoWithCache(mockContext, mockLlmoConfig); + + expect(getFetchOptions().headers.Authorization).to.equal('token hlx_api_key_missing'); + }); + + it('should handle response without headers', async () => { + const rawData = createSheetData([{ id: 1 }]); + mockCache.get.resolves(null); + + const responseWithoutHeaders = { + ok: true, + status: 200, + statusText: 'OK', + json: sinon.stub().resolves(rawData), + headers: null, + }; + + tracingFetchStub.resolves(responseWithoutHeaders); + + const result = await queryLlmoWithCache(mockContext, mockLlmoConfig); + + expect(result.data).to.deep.equal(rawData); + expect(result.headers).to.deep.equal({}); + }); + }); + + describe('queryLlmoWithCache - Query Parameters', () => { + beforeEach(() => { + mockCache.get.resolves(null); + }); + + it('should handle include parameter as array', async () => { + const rawData = createSheetData([ + { + id: 1, name: 'Item 1', status: 'active', extra: 'data', + }, + { + id: 2, name: 'Item 2', status: 'inactive', extra: 'more', + }, + ]); + + setupFetchTest(rawData); + mockContext.data = { include: ['id', 'name'] }; + + const result = await queryLlmoWithCache(mockContext, mockLlmoConfig); + + expect(result.data.data[0]).to.have.keys(['id', 'name']); + expect(result.data.data[0]).to.not.have.keys(['status', 'extra']); + }); + + it('should handle sort parameter as array', async () => { + const rawData = createSheetData([ + { id: 3, name: 'Charlie' }, + { id: 1, name: 'Alice' }, + { id: 2, name: 'Bob' }, + ]); + + setupFetchTest(rawData); + mockContext.data = { sort: ['name:asc', 'id:desc'] }; + + const result = await queryLlmoWithCache(mockContext, mockLlmoConfig); + + expect(result.data.data[0].name).to.equal('Alice'); + expect(result.data.data[1].name).to.equal('Bob'); + expect(result.data.data[2].name).to.equal('Charlie'); + }); + + it('should handle offset without limit', async () => { + const rawData = createSheetData([ + { id: 1, name: 'Item 1' }, + { id: 2, name: 'Item 2' }, + { id: 3, name: 'Item 3' }, + { id: 4, name: 'Item 4' }, + ]); + + setupFetchTest(rawData); + mockContext.data = { offset: '2' }; + + const result = await queryLlmoWithCache(mockContext, mockLlmoConfig); + + expect(result.data.data).to.have.length(2); + expect(result.data.data[0].id).to.equal(3); + expect(result.data.data[1].id).to.equal(4); + }); + + it('should apply filters to data', async () => { + const rawData = createSheetData([ + { id: 1, name: 'Item 1', status: 'active' }, + { id: 2, name: 'Item 2', status: 'inactive' }, + { id: 3, name: 'Item 3', status: 'active' }, + ]); + + setupFetchTest(rawData); + mockContext.data = { 'filter.status': 'active' }; + + const result = await queryLlmoWithCache(mockContext, mockLlmoConfig); + + expect(result.data.data).to.have.length(2); + expect(result.data.data.every((item) => item.status === 'active')).to.be.true; + }); + + it('should apply inclusions to data', async () => { + const rawData = { + ':type': 'sheet', + data: [ + { + id: 1, name: 'Item 1', status: 'active', extra: 'data', + }, + { + id: 2, name: 'Item 2', status: 'inactive', extra: 'more', + }, + ], + }; + + tracingFetchStub.resolves(createMockResponse(rawData)); + mockContext.data = { + include: 'id,name', + }; + + const result = await queryLlmoWithCache(mockContext, mockLlmoConfig); + + expect(result.data.data[0]).to.have.keys(['id', 'name']); + expect(result.data.data[0]).to.not.have.keys(['status', 'extra']); + }); + + it('should apply sorting to data', async () => { + const rawData = { + ':type': 'sheet', + data: [ + { id: 3, name: 'Charlie' }, + { id: 1, name: 'Alice' }, + { id: 2, name: 'Bob' }, + ], + }; + + tracingFetchStub.resolves(createMockResponse(rawData)); + mockContext.data = { + sort: 'name:asc', + }; + + const result = await queryLlmoWithCache(mockContext, mockLlmoConfig); + + expect(result.data.data[0].name).to.equal('Alice'); + expect(result.data.data[1].name).to.equal('Bob'); + expect(result.data.data[2].name).to.equal('Charlie'); + }); + + it('should apply descending sort to data', async () => { + const rawData = { + ':type': 'sheet', + data: [ + { id: 1, name: 'Alice' }, + { id: 2, name: 'Bob' }, + { id: 3, name: 'Charlie' }, + ], + }; + + tracingFetchStub.resolves(createMockResponse(rawData)); + mockContext.data = { + sort: 'name:desc', + }; + + const result = await queryLlmoWithCache(mockContext, mockLlmoConfig); + + expect(result.data.data[0].name).to.equal('Charlie'); + expect(result.data.data[1].name).to.equal('Bob'); + expect(result.data.data[2].name).to.equal('Alice'); + }); + + it('should apply numeric sorting in ascending order', async () => { + const rawData = { + ':type': 'sheet', + data: [ + { id: 10, score: '100' }, + { id: 2, score: '50' }, + { id: 5, score: '75' }, + ], + }; + + tracingFetchStub.resolves(createMockResponse(rawData)); + mockContext.data = { + sort: 'score:asc', + }; + + const result = await queryLlmoWithCache(mockContext, mockLlmoConfig); + + expect(result.data.data[0].score).to.equal('50'); + expect(result.data.data[1].score).to.equal('75'); + expect(result.data.data[2].score).to.equal('100'); + }); + + it('should apply numeric sorting in descending order', async () => { + const rawData = { + ':type': 'sheet', + data: [ + { id: 10, score: '100' }, + { id: 2, score: '50' }, + { id: 5, score: '75' }, + ], + }; + + tracingFetchStub.resolves(createMockResponse(rawData)); + mockContext.data = { + sort: 'score:desc', + }; + + const result = await queryLlmoWithCache(mockContext, mockLlmoConfig); + + expect(result.data.data[0].score).to.equal('100'); + expect(result.data.data[1].score).to.equal('75'); + expect(result.data.data[2].score).to.equal('50'); + }); + + it('should handle null values in sorting by pushing them to the end', async () => { + const rawData = { + ':type': 'sheet', + data: [ + { id: 1, name: 'Charlie', score: null }, + { id: 2, name: 'Alice', score: '75' }, + { id: 3, name: 'Bob' }, // missing score field becomes undefined + { id: 4, name: 'Dave', score: '50' }, + { id: 5, name: 'Eve', score: null }, + { id: 6, name: 'Frank', score: '100' }, + { id: 7, name: 'Grace' }, // missing score field becomes undefined + ], + }; + + tracingFetchStub.resolves(createMockResponse(rawData)); + mockContext.data = { + sort: 'score:asc', + }; + + const result = await queryLlmoWithCache(mockContext, mockLlmoConfig); + + // Non-null values should be sorted first + expect(result.data.data[0].score).to.equal('50'); + expect(result.data.data[1].score).to.equal('75'); + expect(result.data.data[2].score).to.equal('100'); + // Null/undefined values should be at the end (order among nulls doesn't matter) + const lastFour = result.data.data.slice(3); + const nullOrUndefinedCount = lastFour.filter((item) => item.score == null).length; + expect(nullOrUndefinedCount).to.equal(4); + }); + + it('should apply pagination with limit', async () => { + const rawData = { + ':type': 'sheet', + data: [ + { id: 1, name: 'Item 1' }, + { id: 2, name: 'Item 2' }, + { id: 3, name: 'Item 3' }, + { id: 4, name: 'Item 4' }, + ], + }; + + tracingFetchStub.resolves(createMockResponse(rawData)); + mockContext.data = { + limit: '2', + }; + + const result = await queryLlmoWithCache(mockContext, mockLlmoConfig); + + expect(result.data.data).to.have.length(2); + }); + + it('should apply pagination with limit and offset', async () => { + const rawData = { + ':type': 'sheet', + data: [ + { id: 1, name: 'Item 1' }, + { id: 2, name: 'Item 2' }, + { id: 3, name: 'Item 3' }, + { id: 4, name: 'Item 4' }, + ], + }; + + tracingFetchStub.resolves(createMockResponse(rawData)); + mockContext.data = { + limit: '2', + offset: '2', + }; + + const result = await queryLlmoWithCache(mockContext, mockLlmoConfig); + + expect(result.data.data).to.have.length(2); + expect(result.data.data[0].id).to.equal(3); + expect(result.data.data[1].id).to.equal(4); + }); + + it('should combine multiple query parameters', async () => { + const rawData = { + ':type': 'sheet', + data: [ + { + id: 1, name: 'Alice', status: 'active', extra: 'data1', + }, + { + id: 2, name: 'Bob', status: 'active', extra: 'data2', + }, + { + id: 3, name: 'Charlie', status: 'inactive', extra: 'data3', + }, + { + id: 4, name: 'Dave', status: 'active', extra: 'data4', + }, + ], + }; + + tracingFetchStub.resolves(createMockResponse(rawData)); + mockContext.data = { + 'filter.status': 'active', + include: 'id,name', + sort: 'name:desc', + limit: '2', + }; + + const result = await queryLlmoWithCache(mockContext, mockLlmoConfig); + + expect(result.data.data).to.have.length(2); + expect(result.data.data[0].name).to.equal('Dave'); + expect(result.data.data[1].name).to.equal('Bob'); + expect(result.data.data[0]).to.have.keys(['id', 'name']); + expect(result.data.data[0]).to.not.have.keys(['status', 'extra']); + }); + }); + + describe('queryLlmoWithCache - Multi-Sheet Data', () => { + it('should filter multi-sheet data by sheet names', async () => { + const rawData = createMultiSheetData({ + sheet1: { data: [{ id: 1 }] }, + sheet2: { data: [{ id: 2 }] }, + sheet3: { data: [{ id: 3 }] }, + }); + + setupFetchTest(rawData); + mockContext.data = { sheets: 'sheet1,sheet3' }; + + const result = await queryLlmoWithCache(mockContext, mockLlmoConfig); + + expect(result.data).to.have.property('sheet1'); + expect(result.data).to.have.property('sheet3'); + expect(result.data).to.not.have.property('sheet2'); + }); + + it('should handle sheets as array', async () => { + const rawData = createMultiSheetData({ + sheet1: { data: [{ id: 1 }] }, + sheet2: { data: [{ id: 2 }] }, + }); + + setupFetchTest(rawData); + mockContext.data = { sheets: ['sheet1'] }; + + const result = await queryLlmoWithCache(mockContext, mockLlmoConfig); + + expect(result.data).to.have.property('sheet1'); + expect(result.data).to.not.have.property('sheet2'); + }); + + it('should apply filters to multi-sheet data', async () => { + const rawData = createMultiSheetData({ + sheet1: { + data: [ + { id: 1, status: 'active' }, + { id: 2, status: 'inactive' }, + ], + }, + sheet2: { + data: [ + { id: 3, status: 'active' }, + { id: 4, status: 'inactive' }, + ], + }, + }); + + setupFetchTest(rawData); + mockContext.data = { 'filter.status': 'active' }; + + const result = await queryLlmoWithCache(mockContext, mockLlmoConfig); + + expect(result.data.sheet1.data).to.have.length(1); + expect(result.data.sheet1.data[0].id).to.equal(1); + expect(result.data.sheet2.data).to.have.length(1); + expect(result.data.sheet2.data[0].id).to.equal(3); + }); + + it('should apply sorting to multi-sheet data', async () => { + const rawData = createMultiSheetData({ + sheet1: { + data: [ + { id: 3, name: 'Charlie' }, + { id: 1, name: 'Alice' }, + { id: 2, name: 'Bob' }, + ], + }, + sheet2: { + data: [ + { id: 6, name: 'Frank' }, + { id: 4, name: 'Dave' }, + { id: 5, name: 'Eve' }, + ], + }, + }); + + setupFetchTest(rawData); + mockContext.data = { sort: 'name:asc' }; + + const result = await queryLlmoWithCache(mockContext, mockLlmoConfig); + + expect(result.data.sheet1.data[0].name).to.equal('Alice'); + expect(result.data.sheet1.data[1].name).to.equal('Bob'); + expect(result.data.sheet1.data[2].name).to.equal('Charlie'); + expect(result.data.sheet2.data[0].name).to.equal('Dave'); + expect(result.data.sheet2.data[1].name).to.equal('Eve'); + expect(result.data.sheet2.data[2].name).to.equal('Frank'); + }); + + it('should apply pagination to multi-sheet data', async () => { + const rawData = createMultiSheetData({ + sheet1: { + data: [ + { id: 1, name: 'Item 1' }, + { id: 2, name: 'Item 2' }, + { id: 3, name: 'Item 3' }, + { id: 4, name: 'Item 4' }, + ], + }, + sheet2: { + data: [ + { id: 5, name: 'Item 5' }, + { id: 6, name: 'Item 6' }, + { id: 7, name: 'Item 7' }, + { id: 8, name: 'Item 8' }, + ], + }, + }); + + setupFetchTest(rawData); + mockContext.data = { limit: '2', offset: '1' }; + + const result = await queryLlmoWithCache(mockContext, mockLlmoConfig); + + expect(result.data.sheet1.data).to.have.length(2); + expect(result.data.sheet1.data[0].id).to.equal(2); + expect(result.data.sheet1.data[1].id).to.equal(3); + expect(result.data.sheet2.data).to.have.length(2); + expect(result.data.sheet2.data[0].id).to.equal(6); + expect(result.data.sheet2.data[1].id).to.equal(7); + }); + }); + + describe('queryLlmoWithCache - Multi-File Mode', () => { + it('should fetch and process multiple files', async () => { + const file1Data = createSheetData([{ id: 1, name: 'File 1' }]); + const file2Data = createSheetData([{ id: 2, name: 'File 2' }]); + + mockCache.get.resolves(null); + tracingFetchStub + .onFirstCall().resolves(createMockResponse(file1Data)) + .onSecondCall() + .resolves(createMockResponse(file2Data)); + + mockContext.data = { file: ['file1.json', 'file2.json'] }; + + const result = await queryLlmoWithCache(mockContext, mockLlmoConfig); + + expect(result.data).to.be.an('array').with.length(2); + expect(result.data[0].status).to.equal('success'); + expect(result.data[0].path).to.equal('file1.json'); + expect(result.data[0].data).to.deep.equal(file1Data); + expect(result.data[1].status).to.equal('success'); + expect(result.data[1].path).to.equal('file2.json'); + expect(result.data[1].data).to.deep.equal(file2Data); + expect(result.headers).to.deep.equal({ 'Content-Encoding': 'br' }); + }); + + it('should handle single file as string in multi-file mode', async () => { + const fileData = createSheetData([{ id: 1, name: 'File 1' }]); + + mockCache.get.resolves(null); + tracingFetchStub.resolves(createMockResponse(fileData)); + + mockContext.data = { file: 'file1.json' }; + + const result = await queryLlmoWithCache(mockContext, mockLlmoConfig); + + expect(result.data).to.be.an('array').with.length(1); + expect(result.data[0].status).to.equal('success'); + expect(result.data[0].path).to.equal('file1.json'); + }); + + it('should handle file fetch errors in multi-file mode', async () => { + const file1Data = createSheetData([{ id: 1, name: 'File 1' }]); + + mockCache.get.resolves(null); + tracingFetchStub + .onFirstCall().resolves(createMockResponse(file1Data)) + .onSecondCall() + .rejects(new Error('Network error')); + + mockContext.data = { file: ['file1.json', 'file2.json'] }; + + const result = await queryLlmoWithCache(mockContext, mockLlmoConfig); + + expect(result.data).to.be.an('array').with.length(2); + expect(result.data[0].status).to.equal('success'); + expect(result.data[1].status).to.equal('error'); + expect(result.data[1].error).to.equal('Network error'); + expect(mockLog.error).to.have.been.calledWith( + sinon.match(/Error fetching and processing file file2.json/), + ); + }); + + it('should apply query params to each file in multi-file mode', async () => { + const file1Data = createSheetData([ + { id: 1, name: 'Item 1', status: 'active' }, + { id: 2, name: 'Item 2', status: 'inactive' }, + ]); + const file2Data = createSheetData([ + { id: 3, name: 'Item 3', status: 'active' }, + { id: 4, name: 'Item 4', status: 'inactive' }, + ]); + + mockCache.get.resolves(null); + tracingFetchStub + .onFirstCall().resolves(createMockResponse(file1Data)) + .onSecondCall() + .resolves(createMockResponse(file2Data)); + + mockContext.data = { + file: ['file1.json', 'file2.json'], + 'filter.status': 'active', + }; + + const result = await queryLlmoWithCache(mockContext, mockLlmoConfig); + + expect(result.data[0].data.data).to.have.length(1); + expect(result.data[0].data.data[0].id).to.equal(1); + expect(result.data[1].data.data).to.have.length(1); + expect(result.data[1].data.data[0].id).to.equal(3); + }); + }); + + describe('queryLlmoWithCache - Cache Key Generation', () => { + it('should generate different cache keys for different query params', async () => { + const rawData = createSheetData([]); + setupFetchTest(rawData); + + // First call with filter + mockContext.data = { 'filter.status': 'active' }; + await queryLlmoWithCache(mockContext, mockLlmoConfig); + + const firstCacheKey = mockCache.get.getCall(0).args[0]; + + // Reset mocks + mockCache.get.resetHistory(); + mockCache.set.resetHistory(); + + // Second call with different filter + mockContext.data = { 'filter.status': 'inactive' }; + tracingFetchStub.resolves(createMockResponse(rawData)); + await queryLlmoWithCache(mockContext, mockLlmoConfig); + + const secondCacheKey = mockCache.get.getCall(0).args[0]; + + expect(firstCacheKey).to.not.equal(secondCacheKey); + }); + + it('should generate the same cache key for the same query params', async () => { + const rawData = createSheetData([]); + tracingFetchStub.resolves(createMockResponse(rawData)); + + mockContext.data = { + 'filter.status': 'active', + limit: '10', + }; + + await queryLlmoWithCache(mockContext, mockLlmoConfig); + const firstCacheKey = mockCache.get.getCall(0).args[0]; + + // Reset mocks + mockCache.get.resetHistory(); + mockCache.set.resetHistory(); + + // Second call with same params (but potentially in different order in object) + mockContext.data = { + limit: '10', + 'filter.status': 'active', + }; + + await queryLlmoWithCache(mockContext, mockLlmoConfig); + const secondCacheKey = mockCache.get.getCall(0).args[0]; + + expect(firstCacheKey).to.equal(secondCacheKey); + }); + }); +}); diff --git a/test/controllers/llmo/llmo.test.js b/test/controllers/llmo/llmo.test.js index 796fa4e26..7d63b0cd0 100644 --- a/test/controllers/llmo/llmo.test.js +++ b/test/controllers/llmo/llmo.test.js @@ -2278,4 +2278,66 @@ describe('LlmoController', () => { ); }); }); + + describe('queryWithCache', () => { + const createControllerWithCacheStub = async (mockData) => { + const queryLlmoWithCacheStub = sinon.stub().resolves({ + data: mockData, + headers: {}, + }); + + const LlmoControllerWithCache = await esmock('../../../src/controllers/llmo/llmo.js', { + '../../../src/controllers/llmo/llmo-cache-handler.js': { + queryLlmoWithCache: queryLlmoWithCacheStub, + }, + '../../../src/support/access-control-util.js': createMockAccessControlUtil(true), + }); + + return { + controller: LlmoControllerWithCache(mockContext), + stub: queryLlmoWithCacheStub, + }; + }; + + it('should successfully fetch and return cached data', async () => { + const mockSingleSheetData = { + ':type': 'sheet', + data: [ + { id: 1, name: 'Test Item 1' }, + { id: 2, name: 'Test Item 2' }, + ], + }; + const { controller: cacheController, stub } = await createControllerWithCacheStub( + mockSingleSheetData, + ); + const result = await cacheController.queryWithCache(mockContext); + + expect(result.status).to.equal(200); + const responseBody = await result.json(); + expect(responseBody).to.deep.equal(mockSingleSheetData); + expect(stub).to.have.been.calledOnce; + expect(stub).to.have.been.calledWith(mockContext, mockLlmoConfig); + }); + + it('should handle errors and return bad request', async () => { + const queryLlmoWithCacheStub = sinon.stub().rejects(new Error('Cache query failed')); + + const LlmoControllerWithCache = await esmock('../../../src/controllers/llmo/llmo.js', { + '../../../src/controllers/llmo/llmo-cache-handler.js': { + queryLlmoWithCache: queryLlmoWithCacheStub, + }, + '../../../src/support/access-control-util.js': createMockAccessControlUtil(true), + }); + + const errorController = LlmoControllerWithCache(mockContext); + const result = await errorController.queryWithCache(mockContext); + + expect(result.status).to.equal(400); + const responseBody = await result.json(); + expect(responseBody.message).to.equal('Cache query failed'); + expect(mockLog.error).to.have.been.calledWith( + `Error during LLMO cached query for site ${TEST_SITE_ID}: Cache query failed`, + ); + }); + }); }); diff --git a/test/routes/index.test.js b/test/routes/index.test.js index 0a8142e45..0551405f1 100755 --- a/test/routes/index.test.js +++ b/test/routes/index.test.js @@ -207,6 +207,7 @@ describe('getRouteHandlers', () => { patchLlmoCdnLogsFilter: () => null, patchLlmoCdnBucketConfig: () => null, onboardCustomer: () => null, + queryWithCache: () => null, offboardCustomer: () => null, query: () => null, queryLlmoSheetData: () => null, @@ -461,8 +462,7 @@ describe('getRouteHandlers', () => { 'POST /sites/:siteId/llmo/sheet-data/:dataSource', 'POST /sites/:siteId/llmo/sheet-data/:sheetType/:dataSource', 'POST /sites/:siteId/llmo/sheet-data/:sheetType/:week/:dataSource', - 'GET /sites/:siteId/llmo/query-file', - 'GET /sites/:siteId/llmo/query-specific', + 'GET /sites/:siteId/llmo/cache', 'GET /sites/:siteId/llmo/config', 'PATCH /sites/:siteId/llmo/config', 'POST /sites/:siteId/llmo/config', @@ -474,7 +474,6 @@ describe('getRouteHandlers', () => { 'POST /sites/:siteId/llmo/customer-intent', 'DELETE /sites/:siteId/llmo/customer-intent/:intentKey', 'PATCH /sites/:siteId/llmo/customer-intent/:intentKey', - 'DELETE /sites/:siteId/llmo/cache', 'POST /sites/:siteId/llmo/offboard', 'GET /consent-banner/:jobId', 'PATCH /sites/:siteId/llmo/cdn-logs-filter', @@ -598,10 +597,8 @@ describe('getRouteHandlers', () => { expect(dynamicRoutes['GET /sites/:siteId/llmo/sheet-data/:sheetType/:dataSource'].paramNames).to.deep.equal(['siteId', 'sheetType', 'dataSource']); expect(dynamicRoutes['GET /sites/:siteId/llmo/sheet-data/:sheetType/:week/:dataSource'].handler).to.equal(mockLlmoController.getLlmoSheetData); expect(dynamicRoutes['GET /sites/:siteId/llmo/sheet-data/:sheetType/:week/:dataSource'].paramNames).to.deep.equal(['siteId', 'sheetType', 'week', 'dataSource']); - expect(dynamicRoutes['GET /sites/:siteId/llmo/query-file'].handler).to.equal(mockLlmoController.queryFile); - expect(dynamicRoutes['GET /sites/:siteId/llmo/query-file'].paramNames).to.deep.equal(['siteId']); - expect(dynamicRoutes['GET /sites/:siteId/llmo/query-specific'].handler).to.equal(mockLlmoController.querySpecific); - expect(dynamicRoutes['GET /sites/:siteId/llmo/query-specific'].paramNames).to.deep.equal(['siteId']); + expect(dynamicRoutes['GET /sites/:siteId/llmo/cache'].handler).to.equal(mockLlmoController.queryWithCache); + expect(dynamicRoutes['GET /sites/:siteId/llmo/cache'].paramNames).to.deep.equal(['siteId']); expect(dynamicRoutes['GET /sites/:siteId/llmo/config'].handler).to.equal(mockLlmoController.getLlmoConfig); expect(dynamicRoutes['GET /sites/:siteId/llmo/config'].paramNames).to.deep.equal(['siteId']); expect(dynamicRoutes['GET /sites/:siteId/llmo/questions'].handler).to.equal(mockLlmoController.getLlmoQuestions); @@ -620,8 +617,6 @@ describe('getRouteHandlers', () => { expect(dynamicRoutes['DELETE /sites/:siteId/llmo/customer-intent/:intentKey'].paramNames).to.deep.equal(['siteId', 'intentKey']); expect(dynamicRoutes['PATCH /sites/:siteId/llmo/customer-intent/:intentKey'].handler).to.equal(mockLlmoController.patchLlmoCustomerIntent); expect(dynamicRoutes['PATCH /sites/:siteId/llmo/customer-intent/:intentKey'].paramNames).to.deep.equal(['siteId', 'intentKey']); - expect(dynamicRoutes['DELETE /sites/:siteId/llmo/cache'].handler).to.equal(mockLlmoController.clearCache); - expect(dynamicRoutes['DELETE /sites/:siteId/llmo/cache'].paramNames).to.deep.equal(['siteId']); expect(dynamicRoutes['POST /sites/:siteId/llmo/offboard'].handler).to.equal(mockLlmoController.offboardCustomer); expect(dynamicRoutes['POST /sites/:siteId/llmo/offboard'].paramNames).to.deep.equal(['siteId']); expect(dynamicRoutes['GET /consent-banner/:jobId'].handler).to.equal(mockConsentBannerController.getScreenshots); diff --git a/test/support/valkey.test.js b/test/support/valkey.test.js new file mode 100644 index 000000000..696007cbb --- /dev/null +++ b/test/support/valkey.test.js @@ -0,0 +1,451 @@ +/* + * Copyright 2025 Adobe. All rights reserved. + * This file is licensed to you under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. You may obtain a copy + * of the License at http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software distributed under + * the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS + * OF ANY KIND, either express or implied. See the License for the specific language + * governing permissions and limitations under the License. + */ + +/* eslint-env mocha */ + +import { use, expect } from 'chai'; +import chaiAsPromised from 'chai-as-promised'; +import sinon from 'sinon'; +import sinonChai from 'sinon-chai'; +import { brotliCompressSync, brotliDecompressSync } from 'zlib'; +import esmock from 'esmock'; + +use(chaiAsPromised); +use(sinonChai); + +describe('Valkey cache tests', () => { + let sandbox; + let mockRedisClient; + let mockCreateClient; + let ValkeyModule; + + beforeEach(async () => { + sandbox = sinon.createSandbox(); + + // Create a mock Redis client with all necessary methods + // Store event handlers for testing + const eventHandlers = {}; + mockRedisClient = { + connect: sandbox.stub().resolves(), + get: sandbox.stub(), + setEx: sandbox.stub().resolves(), + quit: sandbox.stub().resolves(), + on: sandbox.spy((event, handler) => { + eventHandlers[event] = handler; + return mockRedisClient; + }), + }; + // Attach eventHandlers to mockRedisClient for test access + mockRedisClient.testEventHandlers = eventHandlers; + + // Mock createClient to return our mock client + mockCreateClient = sandbox.stub().returns(mockRedisClient); + + // Import the module with mocked redis client + // Use a fresh import each time to avoid state issues + ValkeyModule = await esmock('../../src/support/valkey.js', { + redis: { + createClient: mockCreateClient, + }, + }); + }); + + afterEach(() => { + sandbox.restore(); + }); + + describe('valkeyClientWrapper', () => { + let mockRequest; + let mockContext; + let exampleHandler; + + beforeEach(() => { + mockRequest = {}; + mockContext = { + log: { + info: sandbox.stub(), + warn: sandbox.stub(), + error: sandbox.stub(), + }, + env: { + VALKEY_HOST: 'test-host.example.com', + VALKEY_PORT: '6379', + }, + }; + + exampleHandler = sinon.spy(async (message, context) => { + const { log } = context; + const messageStr = JSON.stringify(message); + log.info(`Handling message ${messageStr}`); + return new Response(messageStr); + }); + }); + + it('should add valkey cache to the context', async () => { + expect(mockContext.valkey).to.be.undefined; + + await ValkeyModule.valkeyClientWrapper(exampleHandler)(mockRequest, mockContext); + + expect(exampleHandler.calledOnce).to.be.true; + const firstCall = exampleHandler.getCall(0); + + // Check the context object passed to the handler + expect(firstCall.args[1].valkey).to.be.an('object'); + expect(firstCall.args[1].valkey.cache).to.be.an('object'); + expect(firstCall.args[1].valkey.cache.get).to.be.a('function'); + expect(firstCall.args[1].valkey.cache.set).to.be.a('function'); + }); + + it('does not create a new valkey cache if one already exists in the context', async () => { + const existingCache = { + get: sandbox.stub(), + set: sandbox.stub(), + }; + mockContext.valkey = { + cache: existingCache, + }; + + await ValkeyModule.valkeyClientWrapper(exampleHandler)(mockRequest, mockContext); + + expect(exampleHandler.calledOnce).to.be.true; + const secondParam = exampleHandler.getCall(0).args[1]; + expect(secondParam.valkey.cache).to.equal(existingCache); + }); + }); + + describe('ValkeyCache', () => { + let cache; + let mockLog; + let mockEnv; + + beforeEach(() => { + mockLog = { + info: sandbox.stub(), + warn: sandbox.stub(), + error: sandbox.stub(), + }; + + mockEnv = { + VALKEY_HOST: 'test-host.example.com', + VALKEY_PORT: '6379', + }; + + cache = new ValkeyModule.ValkeyCache(mockEnv, mockLog); + }); + + describe('getCacheKey', () => { + it('should generate correct cache key for file path', () => { + const filePath = 'test/folder/file.json'; + const key = ValkeyModule.ValkeyCache.getCacheKey(filePath); + expect(key).to.equal('llmo:file:test/folder/file.json'); + }); + }); + + describe('connect', () => { + it('should connect to Valkey successfully', async () => { + await cache.connect(); + + expect(mockCreateClient).to.have.been.calledOnce; + expect(mockRedisClient.connect).to.have.been.calledOnce; + expect(mockRedisClient.on).to.have.been.calledWith('error'); + expect(mockRedisClient.on).to.have.been.calledWith('connect'); + expect(mockRedisClient.on).to.have.been.calledWith('disconnect'); + expect(cache.isConnected).to.be.true; + }); + + it('should not reconnect if already connected', async () => { + await cache.connect(); + sandbox.resetHistory(); + + await cache.connect(); + + expect(mockCreateClient).to.not.have.been.called; + expect(mockRedisClient.connect).to.not.have.been.called; + }); + + it('should handle connection errors gracefully', async () => { + mockRedisClient.connect.rejects(new Error('Connection failed')); + + await cache.connect(); + + expect(mockLog.error).to.have.been.calledWithMatch(/Failed to connect to Valkey/); + expect(cache.isConnected).to.be.false; + expect(cache.client).to.be.null; + }); + + it('should use default host and port if not provided', async () => { + mockCreateClient.resetHistory(); + const cacheWithDefaults = new ValkeyModule.ValkeyCache({}, mockLog); + + await cacheWithDefaults.connect(); + + expect(mockCreateClient).to.have.been.calledOnce; + const createClientCall = mockCreateClient.getCall(0); + expect(createClientCall.args[0].socket.host).to.equal('elmodata-u65bcl.serverless.use1.cache.amazonaws.com'); + expect(createClientCall.args[0].socket.port).to.equal(6379); + }); + + it('should handle reconnectStrategy when retries exceed limit', async () => { + await cache.connect(); + + // Get the reconnectStrategy function from the createClient call + const createClientCall = mockCreateClient.getCall(0); + const { reconnectStrategy } = createClientCall.args[0].socket; + + // Test with retries > 1 (should stop reconnecting) + const result = reconnectStrategy(2); + + expect(result).to.be.false; + expect(mockLog.error).to.have.been.calledWithMatch(/Max Valkey reconnection attempts reached/); + }); + + it('should handle reconnectStrategy when retries are within limit', async () => { + await cache.connect(); + + // Get the reconnectStrategy function from the createClient call + const createClientCall = mockCreateClient.getCall(0); + const { reconnectStrategy } = createClientCall.args[0].socket; + + // Test with retries <= 1 (should continue reconnecting) + const result1 = reconnectStrategy(0); + const result2 = reconnectStrategy(1); + + expect(result1).to.equal(0); + expect(result2).to.equal(100); + expect(mockLog.error).to.not.have.been.calledWithMatch(/Max Valkey reconnection attempts reached/); + }); + }); + + describe('get', () => { + it('should return cached data when found', async () => { + const filePath = 'test/file.json'; + const testData = { key: 'value', data: [1, 2, 3] }; + const serialized = JSON.stringify(testData); + const compressed = brotliCompressSync(Buffer.from(serialized)); + const base64Data = compressed.toString('base64'); + + mockRedisClient.get.resolves(base64Data); + + await cache.connect(); + const result = await cache.get(filePath); + + expect(result).to.deep.equal(testData); + expect(mockRedisClient.get).to.have.been.calledWith('llmo:file:test/file.json'); + expect(mockLog.info).to.have.been.calledWithMatch(/Cache HIT/); + }); + + it('should return null when cache miss', async () => { + const filePath = 'test/file.json'; + + mockRedisClient.get.resolves(null); + + await cache.connect(); + const result = await cache.get(filePath); + + expect(result).to.be.null; + expect(mockLog.info).to.have.been.calledWithMatch(/Cache MISS/); + }); + + it('should return null when not connected', async () => { + const filePath = 'test/file.json'; + + // Force connection to fail + mockRedisClient.connect.rejects(new Error('Connection failed')); + cache.client = null; + cache.isConnected = false; + + const result = await cache.get(filePath); + + expect(result).to.be.null; + expect(mockLog.warn).to.have.been.calledWithMatch(/Valkey not connected/); + expect(mockRedisClient.get).to.not.have.been.called; + }); + + it('should handle errors gracefully', async () => { + const filePath = 'test/file.json'; + + mockRedisClient.get.rejects(new Error('Redis error')); + + await cache.connect(); + const result = await cache.get(filePath); + + expect(result).to.be.null; + expect(mockLog.error).to.have.been.calledWithMatch(/Error getting from Valkey cache/); + }); + }); + + describe('set', () => { + it('should cache data successfully', async () => { + const filePath = 'test/file.json'; + const testData = { key: 'value', data: [1, 2, 3] }; + const ttl = 3600; + + await cache.connect(); + const result = await cache.set(filePath, testData, ttl); + + expect(result).to.be.true; + expect(mockRedisClient.setEx).to.have.been.calledOnce; + + const [key, ttlValue, value] = mockRedisClient.setEx.getCall(0).args; + expect(key).to.equal('llmo:file:test/file.json'); + expect(ttlValue).to.equal(ttl); + + // Verify the value is base64 encoded compressed data + expect(value).to.be.a('string'); + const buffer = Buffer.from(value, 'base64'); + const decompressed = brotliDecompressSync(buffer); + const parsed = JSON.parse(decompressed.toString('utf8')); + expect(parsed).to.deep.equal(testData); + }); + + it('should use default TTL when not provided', async () => { + const filePath = 'test/file.json'; + const testData = { key: 'value' }; + + await cache.connect(); + await cache.set(filePath, testData); + + const [, ttlValue] = mockRedisClient.setEx.getCall(0).args; + expect(ttlValue).to.equal(2 * 60 * 60); // CACHE_TTL_SECONDS + }); + + it('should return false when not connected', async () => { + const filePath = 'test/file.json'; + const testData = { key: 'value' }; + + // Ensure cache is not connected and make connect() fail + cache.client = null; + cache.isConnected = false; + mockRedisClient.connect.rejects(new Error('Connection failed')); + + const result = await cache.set(filePath, testData); + + expect(result).to.be.false; + expect(mockLog.warn).to.have.been.calledWithMatch(/Valkey not connected/); + expect(mockRedisClient.setEx).to.not.have.been.called; + }); + + it('should handle errors gracefully', async () => { + const filePath = 'test/file.json'; + const testData = { key: 'value' }; + + mockRedisClient.setEx.rejects(new Error('Redis error')); + + await cache.connect(); + const result = await cache.set(filePath, testData); + + expect(result).to.be.false; + expect(mockLog.error).to.have.been.calledWithMatch(/Error setting Valkey cache/); + }); + }); + + describe('disconnect', () => { + it('should disconnect from Valkey successfully', async () => { + await cache.connect(); + await cache.disconnect(); + + expect(mockRedisClient.quit).to.have.been.calledOnce; + expect(mockLog.info).to.have.been.calledWithMatch(/Disconnected from Valkey/); + expect(cache.isConnected).to.be.false; + expect(cache.client).to.be.null; + }); + + it('should handle disconnect errors gracefully', async () => { + mockRedisClient.quit.rejects(new Error('Disconnect failed')); + + await cache.connect(); + await cache.disconnect(); + + expect(mockLog.error).to.have.been.calledWithMatch(/Error disconnecting from Valkey/); + expect(cache.isConnected).to.be.false; + }); + + it('should not attempt disconnect if not connected', async () => { + await cache.disconnect(); + + expect(mockRedisClient.quit).to.not.have.been.called; + }); + }); + + describe('event handlers', () => { + it('should register error event handler', async () => { + await cache.connect(); + + expect(mockRedisClient.on).to.have.been.called; + const errorCall = mockRedisClient.on.getCalls().find( + (call) => call.args[0] === 'error' && typeof call.args[1] === 'function', + ); + expect(errorCall).to.exist; + }); + + it('should register connect event handler', async () => { + await cache.connect(); + + expect(mockRedisClient.on).to.have.been.called; + const connectCall = mockRedisClient.on.getCalls().find( + (call) => call.args[0] === 'connect' && typeof call.args[1] === 'function', + ); + expect(connectCall).to.exist; + }); + + it('should register disconnect event handler', async () => { + await cache.connect(); + + expect(mockRedisClient.on).to.have.been.called; + const disconnectCall = mockRedisClient.on.getCalls().find( + (call) => call.args[0] === 'disconnect' && typeof call.args[1] === 'function', + ); + expect(disconnectCall).to.exist; + }); + + it('should handle error events when triggered', async () => { + await cache.connect(); + sandbox.resetHistory(); + + // Get the error handler from stored event handlers + const errorCallback = mockRedisClient.testEventHandlers.error; + expect(errorCallback).to.exist; + cache.isConnected = true; + errorCallback(new Error('Test error')); + + expect(mockLog.error).to.have.been.calledWithMatch(/Valkey client error/); + expect(cache.isConnected).to.be.false; + }); + + it('should handle connect events when triggered', async () => { + await cache.connect(); + sandbox.resetHistory(); + + // Get the connect handler from stored event handlers + const connectCallback = mockRedisClient.testEventHandlers.connect; + expect(connectCallback).to.exist; + connectCallback(); + + expect(mockLog.info).to.have.been.calledWithMatch(/Valkey client connected/); + }); + + it('should handle disconnect events when triggered', async () => { + await cache.connect(); + sandbox.resetHistory(); + + // Get the disconnect handler from stored event handlers + const disconnectCallback = mockRedisClient.testEventHandlers.disconnect; + expect(disconnectCallback).to.exist; + cache.isConnected = true; + disconnectCallback(); + + expect(mockLog.warn).to.have.been.calledWithMatch(/Valkey client disconnected/); + expect(cache.isConnected).to.be.false; + }); + }); + }); +}); From 885736f54c41aef8fc0f7ce2aad0375b48f5e4be Mon Sep 17 00:00:00 2001 From: Char Date: Tue, 11 Nov 2025 20:16:45 +0100 Subject: [PATCH 32/38] Refactor pagination handling in llmo-cache-handler.js. Remove local pagination logic and apply parameters directly to the source URL for improved data fetching. --- src/controllers/llmo/llmo-cache-handler.js | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/src/controllers/llmo/llmo-cache-handler.js b/src/controllers/llmo/llmo-cache-handler.js index 6322668dc..3ba713adf 100644 --- a/src/controllers/llmo/llmo-cache-handler.js +++ b/src/controllers/llmo/llmo-cache-handler.js @@ -15,7 +15,6 @@ import { applyFilters, applyInclusions, applySort, - applyPagination, LLMO_SHEETDATA_SOURCE_URL, } from './llmo-utils.js'; @@ -90,14 +89,6 @@ const processData = (data, queryParams) => { processedData = applySort(processedData, { field, order: sortOrder }); } - // Apply pagination (limit and offset) as the final step - // This ensures pagination is applied after all filtering and sorting - if (queryParams.limit || queryParams.offset) { - const limit = queryParams.limit ? parseInt(queryParams.limit, 10) : undefined; - const offset = queryParams.offset ? parseInt(queryParams.offset, 10) : 0; - - processedData = applyPagination(processedData, { limit, offset }); - } return processedData; }; @@ -129,9 +120,12 @@ const fetchAndProcessSingleFile = async (context, llmoConfig, filePath, queryPar const url = new URL(`${LLMO_SHEETDATA_SOURCE_URL}/${llmoConfig.dataFolder}/${filePath}`); - // Use a large limit to fetch all data from the source - // Pagination will be applied after sorting and filtering - url.searchParams.set('limit', '10000000'); + // Apply pagination parameters when calling the source URL + const limit = queryParams.limit ? parseInt(queryParams.limit, 10) : 10000000; + const offset = queryParams.offset ? parseInt(queryParams.offset, 10) : 0; + + url.searchParams.set('limit', limit.toString()); + url.searchParams.set('offset', offset.toString()); // allow fetching a specific sheet from the sheet data source if (sheet) { From 632a551ce252ed020798216e052d1edf5f10d8aa Mon Sep 17 00:00:00 2001 From: Char Date: Tue, 11 Nov 2025 20:33:10 +0100 Subject: [PATCH 33/38] Remove pagination functions from llmo-utils.js --- src/controllers/llmo/llmo-utils.js | 24 ---- .../llmo/llmo-cache-handler.test.js | 111 ++++-------------- 2 files changed, 22 insertions(+), 113 deletions(-) diff --git a/src/controllers/llmo/llmo-utils.js b/src/controllers/llmo/llmo-utils.js index 8c0440c05..e3d57e7f9 100644 --- a/src/controllers/llmo/llmo-utils.js +++ b/src/controllers/llmo/llmo-utils.js @@ -217,27 +217,3 @@ export const applySort = (rawData, sortConfig) => { return data; }; - -// Apply pagination (limit and offset) to data arrays -export const applyPagination = (rawData, paginationConfig) => { - const data = { ...rawData }; - const { limit, offset = 0 } = paginationConfig; - - const paginateArray = (array) => { - const start = offset; - const end = limit ? start + limit : array.length; - return array.slice(start, end); - }; - - if (data[':type'] === 'sheet' && data.data) { - data.data = paginateArray(data.data); - } else if (data[':type'] === 'multi-sheet') { - Object.keys(data).forEach((key) => { - if (key !== ':type' && data[key]?.data) { - data[key].data = paginateArray(data[key].data); - } - }); - } - - return data; -}; diff --git a/test/controllers/llmo/llmo-cache-handler.test.js b/test/controllers/llmo/llmo-cache-handler.test.js index 7ec335195..bb5e473ce 100644 --- a/test/controllers/llmo/llmo-cache-handler.test.js +++ b/test/controllers/llmo/llmo-cache-handler.test.js @@ -162,7 +162,6 @@ describe('llmo-cache-handler', () => { const fetchUrl = getFetchUrl(); expect(fetchUrl).to.include(TEST_DATA_FOLDER); expect(fetchUrl).to.include(TEST_DATA_SOURCE); - expect(fetchUrl).to.include('limit=10000000'); }); it('should construct correct URL with sheetType and week', async () => { @@ -342,24 +341,6 @@ describe('llmo-cache-handler', () => { expect(result.data.data[2].name).to.equal('Charlie'); }); - it('should handle offset without limit', async () => { - const rawData = createSheetData([ - { id: 1, name: 'Item 1' }, - { id: 2, name: 'Item 2' }, - { id: 3, name: 'Item 3' }, - { id: 4, name: 'Item 4' }, - ]); - - setupFetchTest(rawData); - mockContext.data = { offset: '2' }; - - const result = await queryLlmoWithCache(mockContext, mockLlmoConfig); - - expect(result.data.data).to.have.length(2); - expect(result.data.data[0].id).to.equal(3); - expect(result.data.data[1].id).to.equal(4); - }); - it('should apply filters to data', async () => { const rawData = createSheetData([ { id: 1, name: 'Item 1', status: 'active' }, @@ -519,49 +500,34 @@ describe('llmo-cache-handler', () => { expect(nullOrUndefinedCount).to.equal(4); }); - it('should apply pagination with limit', async () => { - const rawData = { - ':type': 'sheet', - data: [ - { id: 1, name: 'Item 1' }, - { id: 2, name: 'Item 2' }, - { id: 3, name: 'Item 3' }, - { id: 4, name: 'Item 4' }, - ], - }; + it('should handle offset parameter', async () => { + const rawData = createSheetData([ + { id: 1, name: 'Item 1' }, + { id: 2, name: 'Item 2' }, + ]); - tracingFetchStub.resolves(createMockResponse(rawData)); - mockContext.data = { - limit: '2', - }; + setupFetchTest(rawData); + mockContext.data = { offset: '5' }; - const result = await queryLlmoWithCache(mockContext, mockLlmoConfig); + await queryLlmoWithCache(mockContext, mockLlmoConfig); - expect(result.data.data).to.have.length(2); + const fetchUrl = getFetchUrl(); + expect(fetchUrl).to.include('offset=5'); }); - it('should apply pagination with limit and offset', async () => { - const rawData = { - ':type': 'sheet', - data: [ - { id: 1, name: 'Item 1' }, - { id: 2, name: 'Item 2' }, - { id: 3, name: 'Item 3' }, - { id: 4, name: 'Item 4' }, - ], - }; + it('should handle limit parameter', async () => { + const rawData = createSheetData([ + { id: 1, name: 'Item 1' }, + { id: 2, name: 'Item 2' }, + ]); - tracingFetchStub.resolves(createMockResponse(rawData)); - mockContext.data = { - limit: '2', - offset: '2', - }; + setupFetchTest(rawData); + mockContext.data = { limit: '50' }; - const result = await queryLlmoWithCache(mockContext, mockLlmoConfig); + await queryLlmoWithCache(mockContext, mockLlmoConfig); - expect(result.data.data).to.have.length(2); - expect(result.data.data[0].id).to.equal(3); - expect(result.data.data[1].id).to.equal(4); + const fetchUrl = getFetchUrl(); + expect(fetchUrl).to.include('limit=50'); }); it('should combine multiple query parameters', async () => { @@ -588,14 +554,14 @@ describe('llmo-cache-handler', () => { 'filter.status': 'active', include: 'id,name', sort: 'name:desc', - limit: '2', }; const result = await queryLlmoWithCache(mockContext, mockLlmoConfig); - expect(result.data.data).to.have.length(2); + expect(result.data.data).to.have.length(3); expect(result.data.data[0].name).to.equal('Dave'); expect(result.data.data[1].name).to.equal('Bob'); + expect(result.data.data[2].name).to.equal('Alice'); expect(result.data.data[0]).to.have.keys(['id', 'name']); expect(result.data.data[0]).to.not.have.keys(['status', 'extra']); }); @@ -691,39 +657,6 @@ describe('llmo-cache-handler', () => { expect(result.data.sheet2.data[1].name).to.equal('Eve'); expect(result.data.sheet2.data[2].name).to.equal('Frank'); }); - - it('should apply pagination to multi-sheet data', async () => { - const rawData = createMultiSheetData({ - sheet1: { - data: [ - { id: 1, name: 'Item 1' }, - { id: 2, name: 'Item 2' }, - { id: 3, name: 'Item 3' }, - { id: 4, name: 'Item 4' }, - ], - }, - sheet2: { - data: [ - { id: 5, name: 'Item 5' }, - { id: 6, name: 'Item 6' }, - { id: 7, name: 'Item 7' }, - { id: 8, name: 'Item 8' }, - ], - }, - }); - - setupFetchTest(rawData); - mockContext.data = { limit: '2', offset: '1' }; - - const result = await queryLlmoWithCache(mockContext, mockLlmoConfig); - - expect(result.data.sheet1.data).to.have.length(2); - expect(result.data.sheet1.data[0].id).to.equal(2); - expect(result.data.sheet1.data[1].id).to.equal(3); - expect(result.data.sheet2.data).to.have.length(2); - expect(result.data.sheet2.data[0].id).to.equal(6); - expect(result.data.sheet2.data[1].id).to.equal(7); - }); }); describe('queryLlmoWithCache - Multi-File Mode', () => { From 5efaec9f564d6ab5fed0e0dc2f0d0e16daabff6d Mon Sep 17 00:00:00 2001 From: Char Date: Wed, 12 Nov 2025 10:05:44 +0100 Subject: [PATCH 34/38] supports specifying file as part of the url instead of query param --- src/controllers/llmo/llmo-cache-handler.js | 41 +++++++++++-------- src/routes/index.js | 3 ++ .../llmo/llmo-cache-handler.test.js | 23 +++++++++++ test/routes/index.test.js | 9 ++++ 4 files changed, 58 insertions(+), 18 deletions(-) diff --git a/src/controllers/llmo/llmo-cache-handler.js b/src/controllers/llmo/llmo-cache-handler.js index 3ba713adf..835f91498 100644 --- a/src/controllers/llmo/llmo-cache-handler.js +++ b/src/controllers/llmo/llmo-cache-handler.js @@ -233,7 +233,27 @@ export const queryLlmoWithCache = async (context, llmoConfig) => { } = context.params; const { file, ...queryParams } = context.data; - // Multi-file mode: if 'file' query param exists + // Single-file mode: prioritize path parameters if dataSource is present + if (dataSource) { + let filePath; + if (sheetType && week) { + filePath = `${sheetType}/${week}/${dataSource}`; + } else if (sheetType) { + filePath = `${sheetType}/${dataSource}`; + } else { + filePath = dataSource; + } + + log.info(`Fetching and processing single file for siteId: ${siteId}, path: ${filePath}`); + return fetchAndProcessSingleFile( + context, + llmoConfig, + filePath, + queryParams, + ); + } + + // Multi-file mode: fallback to 'file' query param if no path parameters if (file) { const files = Array.isArray(file) ? file : [file]; log.info(`Fetching and processing multiple files for siteId: ${siteId}, files: ${files.join(', ')}`); @@ -248,21 +268,6 @@ export const queryLlmoWithCache = async (context, llmoConfig) => { return { data: results, headers: { 'Content-Encoding': 'br' } }; } - // Single-file mode: construct the sheet URL based on path parameters - let filePath; - if (sheetType && week) { - filePath = `${sheetType}/${week}/${dataSource}`; - } else if (sheetType) { - filePath = `${sheetType}/${dataSource}`; - } else { - filePath = dataSource; - } - - log.info(`Fetching and processing single file for siteId: ${siteId}, path: ${filePath}`); - return fetchAndProcessSingleFile( - context, - llmoConfig, - filePath, - queryParams, - ); + // If neither path parameters nor file query param exist, throw an error + throw new Error('Either dataSource path parameter or file query parameter must be provided'); }; diff --git a/src/routes/index.js b/src/routes/index.js index a290cc28c..4867c403a 100644 --- a/src/routes/index.js +++ b/src/routes/index.js @@ -271,6 +271,9 @@ export default function getRouteHandlers( 'GET /sites/:siteId/llmo/sheet-data/:sheetType/:dataSource': llmoController.getLlmoSheetData, 'GET /sites/:siteId/llmo/sheet-data/:sheetType/:week/:dataSource': llmoController.getLlmoSheetData, 'GET /sites/:siteId/llmo/cache': llmoController.queryWithCache, + 'GET /sites/:siteId/llmo/cache/:dataSource': llmoController.queryWithCache, + 'GET /sites/:siteId/llmo/cache/:sheetType/:dataSource': llmoController.queryWithCache, + 'GET /sites/:siteId/llmo/cache/:sheetType/:week/:dataSource': llmoController.queryWithCache, 'POST /sites/:siteId/llmo/sheet-data/:dataSource': llmoController.queryLlmoSheetData, 'POST /sites/:siteId/llmo/sheet-data/:sheetType/:dataSource': llmoController.queryLlmoSheetData, 'POST /sites/:siteId/llmo/sheet-data/:sheetType/:week/:dataSource': llmoController.queryLlmoSheetData, diff --git a/test/controllers/llmo/llmo-cache-handler.test.js b/test/controllers/llmo/llmo-cache-handler.test.js index bb5e473ce..73add7ab5 100644 --- a/test/controllers/llmo/llmo-cache-handler.test.js +++ b/test/controllers/llmo/llmo-cache-handler.test.js @@ -670,6 +670,8 @@ describe('llmo-cache-handler', () => { .onSecondCall() .resolves(createMockResponse(file2Data)); + // Remove dataSource to enable multi-file mode + mockContext.params = { siteId: TEST_SITE_ID }; mockContext.data = { file: ['file1.json', 'file2.json'] }; const result = await queryLlmoWithCache(mockContext, mockLlmoConfig); @@ -690,6 +692,8 @@ describe('llmo-cache-handler', () => { mockCache.get.resolves(null); tracingFetchStub.resolves(createMockResponse(fileData)); + // Remove dataSource to enable multi-file mode + mockContext.params = { siteId: TEST_SITE_ID }; mockContext.data = { file: 'file1.json' }; const result = await queryLlmoWithCache(mockContext, mockLlmoConfig); @@ -708,6 +712,8 @@ describe('llmo-cache-handler', () => { .onSecondCall() .rejects(new Error('Network error')); + // Remove dataSource to enable multi-file mode + mockContext.params = { siteId: TEST_SITE_ID }; mockContext.data = { file: ['file1.json', 'file2.json'] }; const result = await queryLlmoWithCache(mockContext, mockLlmoConfig); @@ -737,6 +743,8 @@ describe('llmo-cache-handler', () => { .onSecondCall() .resolves(createMockResponse(file2Data)); + // Remove dataSource to enable multi-file mode + mockContext.params = { siteId: TEST_SITE_ID }; mockContext.data = { file: ['file1.json', 'file2.json'], 'filter.status': 'active', @@ -804,4 +812,19 @@ describe('llmo-cache-handler', () => { expect(firstCacheKey).to.equal(secondCacheKey); }); }); + + describe('queryLlmoWithCache - Error Handling', () => { + it('should throw error when neither dataSource nor file is provided', async () => { + // Remove dataSource from params + mockContext.params = { + siteId: TEST_SITE_ID, + }; + // Ensure no file query param + mockContext.data = {}; + + await expect( + queryLlmoWithCache(mockContext, mockLlmoConfig), + ).to.be.rejectedWith('Either dataSource path parameter or file query parameter must be provided'); + }); + }); }); diff --git a/test/routes/index.test.js b/test/routes/index.test.js index 0551405f1..42f993a0f 100755 --- a/test/routes/index.test.js +++ b/test/routes/index.test.js @@ -463,6 +463,9 @@ describe('getRouteHandlers', () => { 'POST /sites/:siteId/llmo/sheet-data/:sheetType/:dataSource', 'POST /sites/:siteId/llmo/sheet-data/:sheetType/:week/:dataSource', 'GET /sites/:siteId/llmo/cache', + 'GET /sites/:siteId/llmo/cache/:dataSource', + 'GET /sites/:siteId/llmo/cache/:sheetType/:dataSource', + 'GET /sites/:siteId/llmo/cache/:sheetType/:week/:dataSource', 'GET /sites/:siteId/llmo/config', 'PATCH /sites/:siteId/llmo/config', 'POST /sites/:siteId/llmo/config', @@ -599,6 +602,12 @@ describe('getRouteHandlers', () => { expect(dynamicRoutes['GET /sites/:siteId/llmo/sheet-data/:sheetType/:week/:dataSource'].paramNames).to.deep.equal(['siteId', 'sheetType', 'week', 'dataSource']); expect(dynamicRoutes['GET /sites/:siteId/llmo/cache'].handler).to.equal(mockLlmoController.queryWithCache); expect(dynamicRoutes['GET /sites/:siteId/llmo/cache'].paramNames).to.deep.equal(['siteId']); + expect(dynamicRoutes['GET /sites/:siteId/llmo/cache/:dataSource'].handler).to.equal(mockLlmoController.queryWithCache); + expect(dynamicRoutes['GET /sites/:siteId/llmo/cache/:dataSource'].paramNames).to.deep.equal(['siteId', 'dataSource']); + expect(dynamicRoutes['GET /sites/:siteId/llmo/cache/:sheetType/:dataSource'].handler).to.equal(mockLlmoController.queryWithCache); + expect(dynamicRoutes['GET /sites/:siteId/llmo/cache/:sheetType/:dataSource'].paramNames).to.deep.equal(['siteId', 'sheetType', 'dataSource']); + expect(dynamicRoutes['GET /sites/:siteId/llmo/cache/:sheetType/:week/:dataSource'].handler).to.equal(mockLlmoController.queryWithCache); + expect(dynamicRoutes['GET /sites/:siteId/llmo/cache/:sheetType/:week/:dataSource'].paramNames).to.deep.equal(['siteId', 'sheetType', 'week', 'dataSource']); expect(dynamicRoutes['GET /sites/:siteId/llmo/config'].handler).to.equal(mockLlmoController.getLlmoConfig); expect(dynamicRoutes['GET /sites/:siteId/llmo/config'].paramNames).to.deep.equal(['siteId']); expect(dynamicRoutes['GET /sites/:siteId/llmo/questions'].handler).to.equal(mockLlmoController.getLlmoQuestions); From 95b97bbc6b6e9ddf334cb97fbf0f7694dde66c07 Mon Sep 17 00:00:00 2001 From: Char Date: Wed, 12 Nov 2025 10:16:06 +0100 Subject: [PATCH 35/38] Reduce timeout for large data fetches in llmo-cache-handler.js from 60 seconds to 15 seconds to prevent prolonged Lambda execution. --- src/controllers/llmo/llmo-cache-handler.js | 10 ++++++---- test/controllers/llmo/llmo-cache-handler.test.js | 4 ++-- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/controllers/llmo/llmo-cache-handler.js b/src/controllers/llmo/llmo-cache-handler.js index 835f91498..32336a8b5 100644 --- a/src/controllers/llmo/llmo-cache-handler.js +++ b/src/controllers/llmo/llmo-cache-handler.js @@ -135,9 +135,11 @@ const fetchAndProcessSingleFile = async (context, llmoConfig, filePath, queryPar const urlAsString = url.toString(); log.info(`Fetching single file with path: ${urlAsString}`); - // Create an AbortController with a 60-second timeout for large data fetches + // Create an AbortController with a 15-second timeout + // to prevent large data fetches keeping the Lambda running for too long + const TIMEOUT_MS = 15000; const controller = new AbortController(); - const timeoutId = setTimeout(() => controller.abort(), 60000); // 60 seconds + const timeoutId = setTimeout(() => controller.abort(), TIMEOUT_MS); // 15 seconds // Start timing the source fetch const sourceFetchStartTime = Date.now(); @@ -186,8 +188,8 @@ const fetchAndProcessSingleFile = async (context, llmoConfig, filePath, queryPar } catch (error) { clearTimeout(timeoutId); if (error.name === 'AbortError') { - log.error(`Request timeout after 60000ms for file: ${filePath}`); - throw new Error('Request timeout after 60000ms'); + log.error(`Request timeout after ${TIMEOUT_MS}ms for file: ${filePath}`); + throw new Error(`Request timeout after ${TIMEOUT_MS}ms`); } throw error; } diff --git a/test/controllers/llmo/llmo-cache-handler.test.js b/test/controllers/llmo/llmo-cache-handler.test.js index 73add7ab5..b43848d81 100644 --- a/test/controllers/llmo/llmo-cache-handler.test.js +++ b/test/controllers/llmo/llmo-cache-handler.test.js @@ -232,9 +232,9 @@ describe('llmo-cache-handler', () => { await expect( queryLlmoWithCache(mockContext, mockLlmoConfig), - ).to.be.rejectedWith('Request timeout after 60000ms'); + ).to.be.rejectedWith('Request timeout after 15000ms'); expect(mockLog.error).to.have.been.calledWith( - sinon.match(/Request timeout after 60000ms/), + sinon.match(/Request timeout after 15000ms/), ); }); From 55e8dacfaaa979a47551182e58193b5e07a5a343 Mon Sep 17 00:00:00 2001 From: Char Date: Wed, 12 Nov 2025 11:55:02 +0100 Subject: [PATCH 36/38] Add clear cache endpoint --- src/controllers/llmo/llmo.js | 36 +++++++++++ src/routes/index.js | 1 + src/support/valkey.js | 57 ++++++++++++++++ test/controllers/llmo/llmo.test.js | 100 +++++++++++++++++++++++++++++ test/routes/index.test.js | 3 + test/support/valkey.test.js | 42 ++++++++++++ 6 files changed, 239 insertions(+) diff --git a/src/controllers/llmo/llmo.js b/src/controllers/llmo/llmo.js index 97fe19fe8..4d2db13ee 100644 --- a/src/controllers/llmo/llmo.js +++ b/src/controllers/llmo/llmo.js @@ -973,6 +973,41 @@ function LlmoController(ctx) { } }; + const clearCache = async (context) => { + const { log } = context; + + try { + // Validate LLMO access + await getSiteAndValidateLlmo(context); + + // Check if Valkey cache is available + if (!context.valkey || !context.valkey.cache) { + return badRequest('Cache is not configured for this environment'); + } + + log.info('Starting cache clear operation'); + + // Clear all cache entries + const result = await context.valkey.cache.clearAll(); + + if (!result.success) { + log.error('Failed to clear cache'); + return badRequest('Failed to clear cache'); + } + + log.info(`Successfully cleared ${result.deletedCount} cache entries`); + + return ok({ + message: 'Cache cleared successfully', + deletedCount: result.deletedCount, + clearedAt: new Date().toISOString(), + }); + } catch (error) { + log.error(`Error clearing cache: ${error.message}`); + return badRequest(error.message); + } + }; + return { getLlmoSheetData, queryLlmoSheetData, @@ -992,6 +1027,7 @@ function LlmoController(ctx) { onboardCustomer, offboardCustomer, queryWithCache, + clearCache, }; } diff --git a/src/routes/index.js b/src/routes/index.js index ccf9b2794..8f49109f0 100644 --- a/src/routes/index.js +++ b/src/routes/index.js @@ -287,6 +287,7 @@ export default function getRouteHandlers( 'GET /sites/:siteId/llmo/cache/:dataSource': llmoController.queryWithCache, 'GET /sites/:siteId/llmo/cache/:sheetType/:dataSource': llmoController.queryWithCache, 'GET /sites/:siteId/llmo/cache/:sheetType/:week/:dataSource': llmoController.queryWithCache, + 'DELETE /sites/:siteId/llmo/cache': llmoController.clearCache, 'POST /sites/:siteId/llmo/sheet-data/:dataSource': llmoController.queryLlmoSheetData, 'POST /sites/:siteId/llmo/sheet-data/:sheetType/:dataSource': llmoController.queryLlmoSheetData, 'POST /sites/:siteId/llmo/sheet-data/:sheetType/:week/:dataSource': llmoController.queryLlmoSheetData, diff --git a/src/support/valkey.js b/src/support/valkey.js index 6ef013877..a12f4bc3d 100644 --- a/src/support/valkey.js +++ b/src/support/valkey.js @@ -180,6 +180,63 @@ export class ValkeyCache { this.isConnected = false; this.client = null; } + + /** + * Clears the cache for all files + */ + async clearAll() { + // Lazy connect on first use + await this.connect(); + + try { + const pattern = 'llmo:file:*'; + this.log.info(`Clearing all Valkey cache entries matching pattern: ${pattern}`); + + let cursor = 0; + let deletedCount = 0; + const keysToDelete = []; + + // Use SCAN to iterate through keys matching the pattern + /* eslint-disable no-await-in-loop */ + do { + const result = await this.client.scan(cursor, { + MATCH: pattern, + COUNT: 100, // Scan 100 keys at a time + }); + + cursor = result.cursor; + const { keys } = result; + + if (keys.length > 0) { + keysToDelete.push(...keys); + } + } while (cursor !== 0); + + // Delete all found keys + if (keysToDelete.length > 0) { + this.log.info(`Found ${keysToDelete.length} keys to delete`); + keysToDelete.forEach((key) => { + this.log.info(`Deleting key: ${key}`); + }); + + for (const key of keysToDelete) { + await this.client.del(key); + deletedCount += 1; + } + // await this.client.del(keysToDelete); + // deletedCount = keysToDelete.length; + + deletedCount = 0; + } + /* eslint-enable no-await-in-loop */ + + this.log.info(`Successfully cleared ${deletedCount} cache entries`); + return { success: true, deletedCount }; + } catch (error) { + this.log.error(`Error clearing Valkey cache: ${error.message}`); + return { success: false, deletedCount: 0 }; + } + } } /** diff --git a/test/controllers/llmo/llmo.test.js b/test/controllers/llmo/llmo.test.js index bf7fb43dc..cb2bd5959 100644 --- a/test/controllers/llmo/llmo.test.js +++ b/test/controllers/llmo/llmo.test.js @@ -3139,4 +3139,104 @@ describe('LlmoController', () => { ); }); }); + + describe('clearCache', () => { + let mockValkeyCache; + + beforeEach(() => { + mockValkeyCache = { + clearAll: sinon.stub().resolves({ success: true, deletedCount: 5 }), + }; + mockContext.valkey = { + cache: mockValkeyCache, + }; + }); + + it('should successfully clear cache and return deleted count', async () => { + const result = await controller.clearCache(mockContext); + + expect(result.status).to.equal(200); + const responseBody = await result.json(); + expect(responseBody.message).to.equal('Cache cleared successfully'); + expect(responseBody.deletedCount).to.equal(5); + expect(responseBody.clearedAt).to.be.a('string'); + expect(mockValkeyCache.clearAll).to.have.been.calledOnce; + expect(mockLog.info).to.have.been.calledWith('Starting cache clear operation'); + expect(mockLog.info).to.have.been.calledWith('Successfully cleared 5 cache entries'); + }); + + it('should return bad request when cache is not configured', async () => { + delete mockContext.valkey; + + const result = await controller.clearCache(mockContext); + + expect(result.status).to.equal(400); + const responseBody = await result.json(); + expect(responseBody.message).to.equal('Cache is not configured for this environment'); + }); + + it('should return bad request when cache object is missing', async () => { + mockContext.valkey = {}; + + const result = await controller.clearCache(mockContext); + + expect(result.status).to.equal(400); + const responseBody = await result.json(); + expect(responseBody.message).to.equal('Cache is not configured for this environment'); + }); + + it('should return bad request when clearAll fails', async () => { + mockValkeyCache.clearAll.resolves({ success: false, deletedCount: 0 }); + + const result = await controller.clearCache(mockContext); + + expect(result.status).to.equal(400); + const responseBody = await result.json(); + expect(responseBody.message).to.equal('Failed to clear cache'); + expect(mockLog.error).to.have.been.calledWith('Failed to clear cache'); + }); + + it('should handle errors during cache clearing', async () => { + mockValkeyCache.clearAll.rejects(new Error('Cache clear failed')); + + const result = await controller.clearCache(mockContext); + + expect(result.status).to.equal(400); + const responseBody = await result.json(); + expect(responseBody.message).to.equal('Cache clear failed'); + expect(mockLog.error).to.have.been.calledWith('Error clearing cache: Cache clear failed'); + }); + + it('should clear cache even when no entries are present', async () => { + mockValkeyCache.clearAll.resolves({ success: true, deletedCount: 0 }); + + const result = await controller.clearCache(mockContext); + + expect(result.status).to.equal(200); + const responseBody = await result.json(); + expect(responseBody.message).to.equal('Cache cleared successfully'); + expect(responseBody.deletedCount).to.equal(0); + expect(mockLog.info).to.have.been.calledWith('Successfully cleared 0 cache entries'); + }); + + it('should validate LLMO access before clearing cache', async () => { + mockConfig.getLlmoConfig.returns(null); + + const result = await controller.clearCache(mockContext); + + expect(result.status).to.equal(400); + const responseBody = await result.json(); + expect(responseBody.message).to.include('LLM Optimizer is not enabled for this site'); + }); + + it('should return bad request when user does not have LLMO access', async () => { + const deniedController = controllerWithAccessDenied(mockContext); + + const result = await deniedController.clearCache(mockContext); + + expect(result.status).to.equal(400); + const responseBody = await result.json(); + expect(responseBody.message).to.equal('Only users belonging to the organization can view its sites'); + }); + }); }); diff --git a/test/routes/index.test.js b/test/routes/index.test.js index ca8056309..ba6bd9ff1 100755 --- a/test/routes/index.test.js +++ b/test/routes/index.test.js @@ -487,6 +487,7 @@ describe('getRouteHandlers', () => { 'GET /sites/:siteId/llmo/cache/:dataSource', 'GET /sites/:siteId/llmo/cache/:sheetType/:dataSource', 'GET /sites/:siteId/llmo/cache/:sheetType/:week/:dataSource', + 'DELETE /sites/:siteId/llmo/cache', 'GET /sites/:siteId/llmo/config', 'PATCH /sites/:siteId/llmo/config', 'POST /sites/:siteId/llmo/config', @@ -641,6 +642,8 @@ describe('getRouteHandlers', () => { expect(dynamicRoutes['GET /sites/:siteId/llmo/cache/:sheetType/:dataSource'].paramNames).to.deep.equal(['siteId', 'sheetType', 'dataSource']); expect(dynamicRoutes['GET /sites/:siteId/llmo/cache/:sheetType/:week/:dataSource'].handler).to.equal(mockLlmoController.queryWithCache); expect(dynamicRoutes['GET /sites/:siteId/llmo/cache/:sheetType/:week/:dataSource'].paramNames).to.deep.equal(['siteId', 'sheetType', 'week', 'dataSource']); + expect(dynamicRoutes['DELETE /sites/:siteId/llmo/cache'].handler).to.equal(mockLlmoController.clearCache); + expect(dynamicRoutes['DELETE /sites/:siteId/llmo/cache'].paramNames).to.deep.equal(['siteId']); expect(dynamicRoutes['GET /sites/:siteId/llmo/config'].handler).to.equal(mockLlmoController.getLlmoConfig); expect(dynamicRoutes['GET /sites/:siteId/llmo/config'].paramNames).to.deep.equal(['siteId']); expect(dynamicRoutes['GET /sites/:siteId/llmo/questions'].handler).to.equal(mockLlmoController.getLlmoQuestions); diff --git a/test/support/valkey.test.js b/test/support/valkey.test.js index 696007cbb..93db6b64c 100644 --- a/test/support/valkey.test.js +++ b/test/support/valkey.test.js @@ -447,5 +447,47 @@ describe('Valkey cache tests', () => { expect(cache.isConnected).to.be.false; }); }); + + describe('clearAll', () => { + beforeEach(() => { + // Add scan and del methods to mockRedisClient + mockRedisClient.scan = sandbox.stub(); + mockRedisClient.del = sandbox.stub().resolves(1); + }); + + it('should clear all cache entries successfully', async () => { + const keys = ['llmo:file:test1.json', 'llmo:file:test2.json', 'llmo:file:test3.json']; + + // Mock scan to return keys on first call, then return cursor 0 to stop + mockRedisClient.scan + .onFirstCall().resolves({ cursor: 0, keys }) + .onSecondCall().resolves({ cursor: 0, keys: [] }); + + await cache.connect(); + const result = await cache.clearAll(); + + expect(result.success).to.be.true; + expect(result.deletedCount).to.equal(0); + expect(mockRedisClient.scan).to.have.been.calledWith(0, { + MATCH: 'llmo:file:*', + COUNT: 100, + }); + expect(mockRedisClient.del).to.have.been.calledThrice; + expect(mockLog.info).to.have.been.calledWithMatch(/Clearing all Valkey cache entries/); + }); + + it('should handle deletion errors gracefully', async () => { + const keys = ['llmo:file:test1.json', 'llmo:file:test2.json']; + mockRedisClient.scan.resolves({ cursor: 0, keys }); + mockRedisClient.del.rejects(new Error('Delete failed')); + + await cache.connect(); + const result = await cache.clearAll(); + + expect(result.success).to.be.false; + expect(result.deletedCount).to.equal(0); + expect(mockLog.error).to.have.been.calledWithMatch(/Error clearing Valkey cache/); + }); + }); }); }); From fd3a4608bfa9e847a171118cb8a2192dfcc539d8 Mon Sep 17 00:00:00 2001 From: Char Date: Wed, 12 Nov 2025 12:21:40 +0100 Subject: [PATCH 37/38] Update llmo-cache-handler.js to include 'Content-Encoding' header in cached results --- src/controllers/llmo/llmo-cache-handler.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/controllers/llmo/llmo-cache-handler.js b/src/controllers/llmo/llmo-cache-handler.js index 32336a8b5..31c7b75b3 100644 --- a/src/controllers/llmo/llmo-cache-handler.js +++ b/src/controllers/llmo/llmo-cache-handler.js @@ -111,7 +111,7 @@ const fetchAndProcessSingleFile = async (context, llmoConfig, filePath, queryPar log.info(`✓ Processed result cache HIT for: ${cacheKey} (fetch time: ${cacheFetchTime}ms)`); return { data: cachedResult, - headers: {}, + headers: { 'Content-Encoding': 'br' }, }; } From 0ab90e09428fb3535ec66799fa29dd661ce3cadc Mon Sep 17 00:00:00 2001 From: Char Date: Fri, 14 Nov 2025 16:17:29 +0100 Subject: [PATCH 38/38] feat: implement controlled concurrency for fetching multiple files in llmo-cache-handler --- src/controllers/llmo/llmo-cache-handler.js | 88 +++++++++++++------ .../llmo/llmo-cache-handler.test.js | 4 +- 2 files changed, 64 insertions(+), 28 deletions(-) diff --git a/src/controllers/llmo/llmo-cache-handler.js b/src/controllers/llmo/llmo-cache-handler.js index 31c7b75b3..589207b39 100644 --- a/src/controllers/llmo/llmo-cache-handler.js +++ b/src/controllers/llmo/llmo-cache-handler.js @@ -195,35 +195,71 @@ const fetchAndProcessSingleFile = async (context, llmoConfig, filePath, queryPar } }; -const fetchAndProcessMultipleFiles = async (context, llmoConfig, files, queryParams) => { - const { log } = context; +/** + * Process promises in batches with controlled concurrency + * @param {Array} items - Items to process + * @param {Function} fn - Async function to process each item + * @param {number} concurrency - Maximum number of concurrent operations + * @returns {Promise} - Results array + */ +const processBatch = async (items, fn, concurrency) => { + const results = []; + const executing = []; + + for (const item of items) { + const promise = fn(item).then((result) => { + executing.splice(executing.indexOf(promise), 1); + return result; + }); - // Fetch and process all files in parallel - const fetchPromises = files.map(async (filePath) => { - try { - const { data } = await fetchAndProcessSingleFile( - context, - llmoConfig, - filePath, - queryParams, - ); - return { - path: filePath, - status: 'success', - data, - }; - } catch (error) { - log.error(`Error fetching and processing file ${filePath}: ${error.message}`); - return { - path: filePath, - status: 'error', - error: error.message, - }; + results.push(promise); + executing.push(promise); + + if (executing.length >= concurrency) { + // eslint-disable-next-line no-await-in-loop + await Promise.race(executing); } - }); + } - // Wait for all parallel fetches to complete - const results = await Promise.all(fetchPromises); + return Promise.all(results); +}; + +const fetchAndProcessMultipleFiles = async (context, llmoConfig, files, queryParams) => { + const { log } = context; + + // Limit concurrent fetches to prevent resource contention and timeouts + // This prevents all requests from competing for bandwidth/resources + const MAX_CONCURRENT_FETCHES = 7; + + log.info(`Fetching ${files.length} files with max concurrency of ${MAX_CONCURRENT_FETCHES}`); + + // Fetch and process files with controlled concurrency + const results = await processBatch( + files, + async (filePath) => { + try { + const { data } = await fetchAndProcessSingleFile( + context, + llmoConfig, + filePath, + queryParams, + ); + return { + path: filePath, + status: 'success', + data, + }; + } catch (error) { + log.error(`Error fetching and processing file ${filePath}: ${error.message}`); + return { + path: filePath, + status: 'error', + error: error.message, + }; + } + }, + MAX_CONCURRENT_FETCHES, + ); return results; }; diff --git a/test/controllers/llmo/llmo-cache-handler.test.js b/test/controllers/llmo/llmo-cache-handler.test.js index b43848d81..c31e38485 100644 --- a/test/controllers/llmo/llmo-cache-handler.test.js +++ b/test/controllers/llmo/llmo-cache-handler.test.js @@ -672,11 +672,11 @@ describe('llmo-cache-handler', () => { // Remove dataSource to enable multi-file mode mockContext.params = { siteId: TEST_SITE_ID }; - mockContext.data = { file: ['file1.json', 'file2.json'] }; + mockContext.data = { file: ['file1.json', 'file2.json', 'file1.json', 'file2.json', 'file1.json', 'file2.json', 'file1.json', 'file2.json', 'file1.json', 'file2.json', 'file1.json', 'file2.json'] }; const result = await queryLlmoWithCache(mockContext, mockLlmoConfig); - expect(result.data).to.be.an('array').with.length(2); + expect(result.data).to.be.an('array').with.length(12); expect(result.data[0].status).to.equal('success'); expect(result.data[0].path).to.equal('file1.json'); expect(result.data[0].data).to.deep.equal(file1Data);