Skip to content

Commit 016f595

Browse files
abhijnaAbhijna Parigi
andauthored
Meilisearch semantic search enabled (#2327)
Add hybrid search and AI chat using Meilisearch and OpenAI - Implement hybrid semantic + keyword search with Meilisearch - Add AI-powered chat responses using OpenAI GPT-3.5 - Integrate 'Ask AI' feature directly into search dropdown - Add secure Netlify functions for search and chat APIs --------- Co-authored-by: Abhijna Parigi <Abhijna@MacBook-Pro.local>
1 parent df14848 commit 016f595

File tree

12 files changed

+4605
-3883
lines changed

12 files changed

+4605
-3883
lines changed
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
name: Meilisearch scrape
2+
3+
on:
4+
workflow_dispatch:
5+
inputs:
6+
parameter:
7+
description: Run from dispatch
8+
push:
9+
branches:
10+
- main
11+
12+
jobs:
13+
scrape:
14+
runs-on: ubuntu-latest
15+
steps:
16+
- name: Check out code
17+
uses: actions/checkout@v4
18+
19+
- name: Wait for deployment
20+
run: sleep 480s
21+
shell: bash
22+
23+
- name: Run Meilisearch Cloud Crawler
24+
uses: meilisearch/actions/cloud-crawler@main
25+
with:
26+
token: ${{ secrets.MEILISEARCH_CLOUD_CRAWLER_TOKEN }}
27+
28+
- name: Restore index settings after scraping
29+
env:
30+
MEILISEARCH_HOST_URL: ${{ secrets.MEILISEARCH_HOST_URL }}
31+
MEILISEARCH_API_KEY: ${{ secrets.MEILISEARCH_API_KEY }}
32+
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
33+
run: |
34+
echo "Restoring index settings (scraper may have reset them)..."
35+
36+
# Apply base settings from repo file
37+
response1=$(curl -w "\n%{http_code}" -X PATCH \
38+
"${MEILISEARCH_HOST_URL}/indexes/semgrep_docs/settings" \
39+
-H "Authorization: Bearer ${MEILISEARCH_API_KEY}" \
40+
-H "Content-Type: application/json" \
41+
-d @meilisearch-settings.json)
42+
43+
http_code1=$(echo "$response1" | tail -n1)
44+
echo "Base settings response code: $http_code1"
45+
46+
if [ "$http_code1" != "202" ]; then
47+
echo "Failed to apply base settings"
48+
echo "$response1"
49+
exit 1
50+
fi
51+
52+
echo "Checking if OPENAI_API_KEY is set..."
53+
if [ -z "$OPENAI_API_KEY" ]; then
54+
echo "ERROR: OPENAI_API_KEY is not set in GitHub Secrets!"
55+
echo "Please add it at: https://github.com/semgrep/semgrep-docs/settings/secrets/actions"
56+
exit 1
57+
fi
58+
59+
# Apply embedder separately (requires secret)
60+
response2=$(curl -w "\n%{http_code}" -X PATCH \
61+
"${MEILISEARCH_HOST_URL}/indexes/semgrep_docs/settings" \
62+
-H "Authorization: Bearer ${MEILISEARCH_API_KEY}" \
63+
-H "Content-Type: application/json" \
64+
-d "{
65+
\"embedders\": {
66+
\"default\": {
67+
\"source\": \"openAi\",
68+
\"model\": \"text-embedding-3-small\",
69+
\"apiKey\": \"${OPENAI_API_KEY}\",
70+
\"dimensions\": 1536,
71+
\"documentTemplate\": \"{% for field in fields %}{% if field.is_searchable and field.value != nil %}{{ field.name }}: {{ field.value }}\n{% endif %}{% endfor %}\"
72+
}
73+
}
74+
}")
75+
76+
http_code2=$(echo "$response2" | tail -n1)
77+
echo "Embedder settings response code: $http_code2"
78+
79+
if [ "$http_code2" != "202" ]; then
80+
echo "Failed to apply embedder settings"
81+
echo "$response2"
82+
exit 1
83+
fi
84+
85+
echo "✅ Index settings restored successfully!"
86+
echo "Waiting for embedder to process (this takes a few minutes)..."
87+
sleep 60
88+

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,3 +31,4 @@ yarn-error.log*
3131

3232
# Ignore .history
3333
/.history/
34+
meili_data/

docusaurus.config.js

Lines changed: 0 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -13,27 +13,7 @@ module.exports = {
1313
organizationName: 'semgrep', // Usually your GitHub org/user name.
1414
projectName: 'semgrep', // Usually your repo name.
1515
trailingSlash: false,
16-
themes: ['@markprompt/docusaurus-theme-search'],
1716
themeConfig: {
18-
markprompt: {
19-
projectKey: 'jbhF5LligltdKaJucMjDcWcRodaVpzqE',
20-
trigger: { floating: false },
21-
systemPrompt: 'You are a kind AI who loves to help people!',
22-
model: 'gpt-4',
23-
display: 'dialog',
24-
search: {
25-
enabled: true,
26-
provider: {
27-
name: 'algolia',
28-
apiKey: 'f53612c29d04a2ff71dce6e3b2f76752',
29-
appId: 'RGEY1AKPUC',
30-
indexName: 'docs',
31-
},
32-
},
33-
chat: {
34-
assistantId: '5af10a40-7ed8-4aa1-9e7a-65d2858445af',
35-
}
36-
},
3717
docs: {
3818
sidebar: {
3919
hideable: true,
@@ -160,34 +140,6 @@ module.exports = {
160140
darkTheme: darkCodeTheme,
161141
additionalLanguages: ['java', 'ruby', 'php', 'csharp', 'rust', 'scala', 'kotlin', 'bash', 'json'],
162142
},
163-
//algolia: {
164-
// apiKey: 'f53612c29d04a2ff71dce6e3b2f76752',
165-
// indexName: 'docs',
166-
167-
// // Optional: see doc section below
168-
// contextualSearch: false,
169-
170-
// // Optional: see doc section below
171-
// appId: 'RGEY1AKPUC',
172-
173-
// // Optional: Algolia search parameters
174-
// searchParameters: {},
175-
// facetFilters: [],
176-
// //... other Algolia params
177-
// "customRanking": [
178-
// "desc(weight.page_rank)"
179-
// ],
180-
// "ranking": [
181-
// "desc(weight.page_rank)",
182-
// "custom",
183-
// "filters",
184-
// "typo",
185-
// "attribute",
186-
// "words",
187-
// "exact",
188-
// "proximity"
189-
// ]
190-
//},
191143
image: 'https://semgrep.dev/thumbnail.png',
192144
//announcementBar: {
193145
// id: 'office-hours',

meilisearch-settings.json

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
{
2+
"synonyms": {
3+
"autofix": ["autofix", "automatic fixes", "remediation", "code fixes"],
4+
"ci": ["ci", "continuous integration", "pipeline", "github actions", "gitlab ci", "automation"],
5+
"config": ["config", "configuration", "settings", "setup"],
6+
"create": ["create", "write", "build", "develop", "make", "author"],
7+
"custom": ["custom", "own", "personal", "user-defined", "bespoke"],
8+
"deployment": ["deployment", "setup", "configuration", "installation", "integration"],
9+
"findings": ["findings", "issues", "vulnerabilities", "results", "matches", "detections"],
10+
"github": ["github", "git", "version control", "repository"],
11+
"gitlab": ["gitlab", "git", "version control", "repository"],
12+
"go": ["go", "golang"],
13+
"ignore": ["ignore", "exclude", "suppress", "disable"],
14+
"java": ["java", "spring", "maven", "gradle"],
15+
"javascript": ["javascript", "js", "node.js", "nodejs", "typescript", "ts"],
16+
"metavariables": ["metavariables", "variables", "placeholders", "pattern variables"],
17+
"oss": ["oss", "open source"],
18+
"patterns": ["patterns", "rules", "expressions", "syntax patterns", "matching"],
19+
"policy": ["policy", "policies", "governance", "compliance"],
20+
"pro": ["pro", "semgrep pro", "commercial", "paid version", "enterprise"],
21+
"python": ["python", "py", "django", "flask"],
22+
"rules": ["rules", "patterns", "detectors", "checks", "rule writing", "rule creation"],
23+
"rulewriting": ["rule writing", "write rules", "create rules", "custom rules", "rule development", "rule authoring"],
24+
"saml": ["saml", "identity provider", "idp", "federation"],
25+
"sast": ["sast", "static application security testing", "code security", "security analysis"],
26+
"sca": ["sca", "supply chain", "dependencies", "vulnerabilities", "dependency scanning"],
27+
"scan": ["scan", "scanning", "analysis", "check", "run"],
28+
"scp": ["scp", "semgrep cloud platform", "semgrep app", "semgrep platform"],
29+
"secrets": ["secrets", "api keys", "tokens", "credentials", "sensitive data"],
30+
"sms": ["sms", "semgrep managed scanning", "managed scans", "cloud scanning"],
31+
"ssc": ["ssc", "semgrep supply chain", "supply chain security", "dependency security"],
32+
"sso": ["sso", "single sign-on", "single sign on", "authentication"],
33+
"taint": ["taint", "taint analysis", "data flow", "taint mode", "taint tracking"],
34+
"workflow": ["workflow", "pipeline", "automation", "ci/cd", "devops"]
35+
},
36+
"stopWords": [
37+
"what", "is", "are", "how", "to", "the", "a", "an", "do", "does", "can", "i", "my"
38+
],
39+
"searchableAttributes": [
40+
"hierarchy_lvl1",
41+
"hierarchy_lvl2",
42+
"hierarchy_lvl3",
43+
"hierarchy_lvl0",
44+
"hierarchy.lvl1",
45+
"hierarchy.lvl2",
46+
"hierarchy.lvl3",
47+
"hierarchy.lvl0",
48+
"content",
49+
"url"
50+
],
51+
"displayedAttributes": [
52+
"*"
53+
],
54+
"rankingRules": [
55+
"words",
56+
"typo",
57+
"proximity",
58+
"attribute",
59+
"sort",
60+
"exactness"
61+
],
62+
"filterableAttributes": [
63+
"type",
64+
"language",
65+
"version",
66+
"docusaurus_tag",
67+
"hierarchy_lvl0",
68+
"hierarchy_lvl1",
69+
"hierarchy_lvl2",
70+
"hierarchy.lvl0",
71+
"hierarchy.lvl1",
72+
"hierarchy.lvl2"
73+
],
74+
"sortableAttributes": [
75+
"hierarchy.lvl0",
76+
"hierarchy.lvl1",
77+
"hierarchy.lvl2"
78+
],
79+
"distinctAttribute": "url",
80+
"typoTolerance": {
81+
"enabled": true,
82+
"minWordSizeForTypos": {
83+
"oneTypo": 3,
84+
"twoTypos": 7
85+
},
86+
"disableOnWords": [],
87+
"disableOnAttributes": []
88+
},
89+
"faceting": {
90+
"maxValuesPerFacet": 100
91+
},
92+
"pagination": {
93+
"maxTotalHits": 1000
94+
}
95+
}
96+

netlify.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
[functions]
2+
directory = "netlify/functions"
3+
node_bundler = "esbuild"
4+
15
[[redirects]]
26
from = "/*"
37
to = "/docs/404.html"

0 commit comments

Comments
 (0)