Skip to content

Commit e23334c

Browse files
committed
Add LLMs txt files support to v4 docs
1 parent ffa907f commit e23334c

File tree

4 files changed

+26852
-9
lines changed

4 files changed

+26852
-9
lines changed

.github/workflows/deploy-production.yml

Lines changed: 56 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,59 @@
1-
name: Deploy Production version
1+
name: Deploy Production v4 version
22
on:
33
push:
44
branches:
5-
- main
6-
5+
- v4
76
jobs:
8-
deploy-prod:
7+
generate-llms-v4:
98
runs-on: ubuntu-latest
9+
permissions:
10+
contents: write
11+
actions: read
12+
outputs:
13+
files-changed: ${{ steps.check-changes.outputs.changed }}
14+
steps:
15+
- name: 📥 Checkout
16+
uses: actions/checkout@v4
17+
with:
18+
token: ${{ secrets.PAT_AUTO_MERGE_LLMS_TXT_TOKEN }}
19+
fetch-depth: 0
20+
21+
- name: 🟢 Setup Node.js
22+
uses: actions/setup-node@v4
23+
with:
24+
node-version: '20'
25+
26+
- name: 📦 Install dependencies
27+
working-directory: ./docusaurus
28+
run: npm install
29+
30+
- name: 🤖 Generate LLMs v4 files
31+
working-directory: ./docusaurus
32+
run: node scripts/generate-llms-v4.js
33+
34+
- name: 🔍 Check for changes
35+
id: check-changes
36+
run: |
37+
if git diff --quiet HEAD -- docusaurus/static/llms-v4*.txt; then
38+
echo "changed=false" >> $GITHUB_OUTPUT
39+
echo "🔄 No changes in LLMs v4 files"
40+
else
41+
echo "changed=true" >> $GITHUB_OUTPUT
42+
echo "📝 LLMs v4 files have changes"
43+
fi
44+
45+
- name: 📤 Commit and push changes
46+
if: steps.check-changes.outputs.changed == 'true'
47+
run: |
48+
git config --local user.email "noreply@github.com"
49+
git config --local user.name "GitHub Actions"
50+
git add docusaurus/static/llms-v4*.txt
51+
git commit -m "🤖 Update LLMs v4 files [skip ci]"
52+
git push
53+
54+
deploy-prod-v4:
55+
runs-on: ubuntu-latest
56+
needs: generate-llms-v4
1057
steps:
1158
# Need to get actual deployment URL and not previous one
1259
- name: Get Vercel deployment URL
@@ -19,7 +66,7 @@ jobs:
1966
vercel_target: "production"
2067
vercel_app: "documentation"
2168
vercel_project_id: ${{ secrets.VERCEL_PROJECT_ID }}
22-
69+
2370
# Waits for Vercel to finish building the docs
2471
- name: Wait for Vercel deploy to complete
2572
uses: UnlyEd/github-action-await-vercel@v1.2.14
@@ -29,12 +76,12 @@ jobs:
2976
with:
3077
deployment-url: ${{ steps.get-url-prod.outputs.preview_url }}
3178
timeout: 300 # wait 5 minutes before failing
32-
79+
3380
# Outputs the current URL and confirms ready status (for debugging purposes)
3481
- name: Output debug status
3582
run: "echo The deployment at ${{ fromJson(steps.await-vercel-prod.outputs.deploymentDetails).url }} is ${{ fromJson(steps.await-vercel-prod.outputs.deploymentDetails).readyState }}"
83+
3684
# Clears Cloudfront cache
37-
3885
- name: Invalidate CloudFront Cache
3986
uses: chetan/invalidate-cloudfront-action@v2
4087
id: invalid-cloudfront-prod
@@ -44,7 +91,7 @@ jobs:
4491
AWS_REGION: ${{ secrets.LEGACY_V4_REGION }}
4592
AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }}
4693
AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
47-
94+
4895
# Updates the Algolia Search indexes
4996
- name: Algolia crawler creation and crawl
5097
uses: algolia/algoliasearch-crawler-github-actions@v1.1.9
@@ -55,4 +102,4 @@ jobs:
55102
algolia-app-id: ${{ secrets.ALGOLIA_APP_ID }}
56103
algolia-api-key: ${{ secrets.ALGOLIA_API_KEY }}
57104
site-url: "https://docs-v4.strapi.io"
58-
crawler-name: "strapiv4Docs"
105+
crawler-name: "strapiv4Docs"
Lines changed: 230 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,230 @@
1+
const fs = require('fs-extra');
2+
const path = require('path');
3+
const matter = require('gray-matter');
4+
5+
class DocusaurusLlmsGenerator {
6+
constructor(config = {}) {
7+
this.docsDir = config.docsDir || '.';
8+
this.sidebarPath = config.sidebarPath || 'sidebars.js';
9+
this.baseUrl = config.baseUrl || 'https://docs-v4.strapi.io';
10+
this.outputDir = config.outputDir || 'static';
11+
this.siteName = config.siteName || 'Strapi v4 Documentation';
12+
}
13+
14+
async generate() {
15+
try {
16+
console.log('🔍 Extracting documentation pages...');
17+
const pages = await this.extractAllPages();
18+
console.log(`📄 ${pages.length} pages found`);
19+
20+
console.log('📝 Generating llms-v4.txt...');
21+
const llmsTxt = this.generateLlmsTxt(pages);
22+
await fs.ensureDir(this.outputDir);
23+
await fs.writeFile(path.join(this.outputDir, 'llms-v4.txt'), llmsTxt);
24+
25+
console.log('📚 Generating llms-v4-full.txt...');
26+
const llmsFullTxt = this.generateLlmsFullTxt(pages);
27+
await fs.writeFile(path.join(this.outputDir, 'llms-v4-full.txt'), llmsFullTxt);
28+
29+
console.log('✅ LLMs files successfully generated !');
30+
console.log(` - ${this.outputDir}/llms-v4.txt`);
31+
console.log(` - ${this.outputDir}/llms-v4-full.txt`);
32+
} catch (error) {
33+
console.error('❌ Error while trying to generate LLMs files:', error);
34+
throw error;
35+
}
36+
}
37+
38+
async extractAllPages() {
39+
const pages = [];
40+
41+
// Load sidebar configuration
42+
const sidebarConfig = this.loadSidebarConfig();
43+
44+
// Process every sidebar
45+
for (const [sidebarName, sidebarItems] of Object.entries(sidebarConfig)) {
46+
await this.processItems(sidebarItems, pages);
47+
}
48+
49+
// Sort pages by URL for a consistent and clear order
50+
return pages.sort((a, b) => a.url.localeCompare(b.url));
51+
}
52+
53+
loadSidebarConfig() {
54+
try {
55+
// Delete cache to reload config
56+
delete require.cache[require.resolve(path.resolve(this.sidebarPath))];
57+
return require(path.resolve(this.sidebarPath));
58+
} catch (error) {
59+
console.warn(`⚠️ Failed to load ${this.sidebarPath}, using folder scan`);
60+
return this.fallbackToDirectoryScan();
61+
}
62+
}
63+
64+
async fallbackToDirectoryScan() {
65+
// Direct scan of v4 docs folders if sidebars.js is not available
66+
const folders = ['docs/dev-docs', 'docs/user-docs', 'docs/cloud'];
67+
const allFiles = [];
68+
69+
for (const folder of folders) {
70+
if (await fs.pathExists(folder)) {
71+
const files = await this.getAllMdFiles(folder);
72+
allFiles.push(...files.map(file => file.replace('.md', '').replace('docs/', '')));
73+
}
74+
}
75+
76+
return { docs: allFiles };
77+
}
78+
79+
async getAllMdFiles(dir, prefix = '') {
80+
const files = [];
81+
const items = await fs.readdir(dir);
82+
83+
for (const item of items) {
84+
const fullPath = path.join(dir, item);
85+
const stat = await fs.stat(fullPath);
86+
87+
if (stat.isDirectory()) {
88+
const subFiles = await this.getAllMdFiles(fullPath, path.join(prefix, item));
89+
files.push(...subFiles);
90+
} else if (item.endsWith('.md')) {
91+
files.push(path.join(prefix, item));
92+
}
93+
}
94+
95+
return files;
96+
}
97+
98+
async processItems(items, pages) {
99+
if (!Array.isArray(items)) return;
100+
101+
for (const item of items) {
102+
if (typeof item === 'string') {
103+
await this.processDocPage(item, pages);
104+
} else if (item.type === 'doc') {
105+
await this.processDocPage(item.id, pages);
106+
} else if (item.type === 'category' && item.items) {
107+
await this.processItems(item.items, pages);
108+
} else if (item.type === 'link') {
109+
// Skip link items as they point to external resources
110+
continue;
111+
} else if (item.items) {
112+
// Handle groups or other structures
113+
await this.processItems(item.items, pages);
114+
}
115+
}
116+
}
117+
118+
async processDocPage(docId, pages) {
119+
const possiblePaths = [
120+
path.join(this.docsDir, `${docId}.md`),
121+
path.join(this.docsDir, `${docId}.mdx`),
122+
path.join(this.docsDir, docId, 'index.md'),
123+
path.join(this.docsDir, docId, 'index.mdx')
124+
];
125+
126+
for (const filePath of possiblePaths) {
127+
if (await fs.pathExists(filePath)) {
128+
try {
129+
const fileContent = await fs.readFile(filePath, 'utf-8');
130+
const { data: frontmatter, content } = matter(fileContent);
131+
132+
const pageUrl = this.generatePageUrl(docId);
133+
134+
pages.push({
135+
id: docId,
136+
title: frontmatter.title || this.getTitleFromContent(content) || docId,
137+
description: frontmatter.description || this.extractDescription(content),
138+
url: pageUrl,
139+
content: this.cleanContent(content),
140+
frontmatter
141+
});
142+
143+
break; // Stop once a file is found
144+
} catch (error) {
145+
console.warn(`⚠️ Error while handling file ${filePath}:`, error.message);
146+
}
147+
}
148+
}
149+
}
150+
151+
generatePageUrl(docId) {
152+
// Delete common prefixes and generate proper URL
153+
const cleanId = docId.replace(/^(docs\/|pages\/)/, '');
154+
return `${this.baseUrl}/${cleanId}`;
155+
}
156+
157+
getTitleFromContent(content) {
158+
// Extract first h1 from content
159+
const match = content.match(/^#\s+(.+)$/m);
160+
return match ? match[1].trim() : null;
161+
}
162+
163+
extractDescription(content) {
164+
// Extract first non-empty paragraph
165+
const lines = content.split('\n');
166+
for (const line of lines) {
167+
const trimmed = line.trim();
168+
if (trimmed && !trimmed.startsWith('#') && !trimmed.startsWith('---')) {
169+
return trimmed.substring(0, 150) + (trimmed.length > 150 ? '...' : '');
170+
}
171+
}
172+
return '';
173+
}
174+
175+
cleanContent(content) {
176+
return content
177+
// Delete frontmatter metadata
178+
.replace(/^---[\s\S]*?---\n/, '')
179+
// Delete React/MDX components
180+
.replace(/<[A-Z][a-zA-Z]*[^>]*>[\s\S]*?<\/[A-Z][a-zA-Z]*>/g, '')
181+
.replace(/<[A-Z][a-zA-Z]*[^>]*\/>/g, '')
182+
// Delete imports
183+
.replace(/^import\s+.*$/gm, '')
184+
// Delete exports
185+
.replace(/^export\s+.*$/gm, '')
186+
// Clean up empty lines
187+
.replace(/\n\s*\n\s*\n/g, '\n\n')
188+
.trim();
189+
}
190+
191+
generateLlmsTxt(pages) {
192+
const lines = [`# ${this.siteName}`, ''];
193+
194+
pages.forEach(page => {
195+
const description = page.description || 'No description available';
196+
lines.push(`- [${page.title}](${page.url}): ${description}`);
197+
});
198+
199+
return lines.join('\n');
200+
}
201+
202+
generateLlmsFullTxt(pages) {
203+
const sections = [];
204+
205+
pages.forEach(page => {
206+
sections.push(`# ${page.title}`);
207+
sections.push(`Source: ${page.url}`);
208+
sections.push('');
209+
sections.push(page.content);
210+
sections.push('\n\n');
211+
});
212+
213+
return sections.join('\n');
214+
}
215+
}
216+
217+
module.exports = DocusaurusLlmsGenerator;
218+
219+
// If script is executed directly
220+
if (require.main === module) {
221+
const generator = new DocusaurusLlmsGenerator({
222+
docsDir: 'docs', // Les docs sont dans docs/
223+
sidebarPath: 'sidebars.js', // sidebars.js est ici même
224+
baseUrl: 'https://docs-v4.strapi.io',
225+
outputDir: 'static',
226+
siteName: 'Strapi v4 Documentation'
227+
});
228+
229+
generator.generate().catch(console.error);
230+
}

0 commit comments

Comments
 (0)