1+ const fs = require ( 'fs-extra' ) ;
2+ const path = require ( 'path' ) ;
3+ const matter = require ( 'gray-matter' ) ;
4+
5+ class DocusaurusLlmsGenerator {
6+ constructor ( config = { } ) {
7+ this . docsDir = config . docsDir || '.' ;
8+ this . sidebarPath = config . sidebarPath || 'sidebars.js' ;
9+ this . baseUrl = config . baseUrl || 'https://docs-v4.strapi.io' ;
10+ this . outputDir = config . outputDir || 'static' ;
11+ this . siteName = config . siteName || 'Strapi v4 Documentation' ;
12+ }
13+
14+ async generate ( ) {
15+ try {
16+ console . log ( '🔍 Extracting documentation pages...' ) ;
17+ const pages = await this . extractAllPages ( ) ;
18+ console . log ( `📄 ${ pages . length } pages found` ) ;
19+
20+ console . log ( '📝 Generating llms-v4.txt...' ) ;
21+ const llmsTxt = this . generateLlmsTxt ( pages ) ;
22+ await fs . ensureDir ( this . outputDir ) ;
23+ await fs . writeFile ( path . join ( this . outputDir , 'llms-v4.txt' ) , llmsTxt ) ;
24+
25+ console . log ( '📚 Generating llms-v4-full.txt...' ) ;
26+ const llmsFullTxt = this . generateLlmsFullTxt ( pages ) ;
27+ await fs . writeFile ( path . join ( this . outputDir , 'llms-v4-full.txt' ) , llmsFullTxt ) ;
28+
29+ console . log ( '✅ LLMs files successfully generated !' ) ;
30+ console . log ( ` - ${ this . outputDir } /llms-v4.txt` ) ;
31+ console . log ( ` - ${ this . outputDir } /llms-v4-full.txt` ) ;
32+ } catch ( error ) {
33+ console . error ( '❌ Error while trying to generate LLMs files:' , error ) ;
34+ throw error ;
35+ }
36+ }
37+
38+ async extractAllPages ( ) {
39+ const pages = [ ] ;
40+
41+ // Load sidebar configuration
42+ const sidebarConfig = this . loadSidebarConfig ( ) ;
43+
44+ // Process every sidebar
45+ for ( const [ sidebarName , sidebarItems ] of Object . entries ( sidebarConfig ) ) {
46+ await this . processItems ( sidebarItems , pages ) ;
47+ }
48+
49+ // Sort pages by URL for a consistent and clear order
50+ return pages . sort ( ( a , b ) => a . url . localeCompare ( b . url ) ) ;
51+ }
52+
53+ loadSidebarConfig ( ) {
54+ try {
55+ // Delete cache to reload config
56+ delete require . cache [ require . resolve ( path . resolve ( this . sidebarPath ) ) ] ;
57+ return require ( path . resolve ( this . sidebarPath ) ) ;
58+ } catch ( error ) {
59+ console . warn ( `⚠️ Failed to load ${ this . sidebarPath } , using folder scan` ) ;
60+ return this . fallbackToDirectoryScan ( ) ;
61+ }
62+ }
63+
64+ async fallbackToDirectoryScan ( ) {
65+ // Direct scan of v4 docs folders if sidebars.js is not available
66+ const folders = [ 'docs/dev-docs' , 'docs/user-docs' , 'docs/cloud' ] ;
67+ const allFiles = [ ] ;
68+
69+ for ( const folder of folders ) {
70+ if ( await fs . pathExists ( folder ) ) {
71+ const files = await this . getAllMdFiles ( folder ) ;
72+ allFiles . push ( ...files . map ( file => file . replace ( '.md' , '' ) . replace ( 'docs/' , '' ) ) ) ;
73+ }
74+ }
75+
76+ return { docs : allFiles } ;
77+ }
78+
79+ async getAllMdFiles ( dir , prefix = '' ) {
80+ const files = [ ] ;
81+ const items = await fs . readdir ( dir ) ;
82+
83+ for ( const item of items ) {
84+ const fullPath = path . join ( dir , item ) ;
85+ const stat = await fs . stat ( fullPath ) ;
86+
87+ if ( stat . isDirectory ( ) ) {
88+ const subFiles = await this . getAllMdFiles ( fullPath , path . join ( prefix , item ) ) ;
89+ files . push ( ...subFiles ) ;
90+ } else if ( item . endsWith ( '.md' ) ) {
91+ files . push ( path . join ( prefix , item ) ) ;
92+ }
93+ }
94+
95+ return files ;
96+ }
97+
98+ async processItems ( items , pages ) {
99+ if ( ! Array . isArray ( items ) ) return ;
100+
101+ for ( const item of items ) {
102+ if ( typeof item === 'string' ) {
103+ await this . processDocPage ( item , pages ) ;
104+ } else if ( item . type === 'doc' ) {
105+ await this . processDocPage ( item . id , pages ) ;
106+ } else if ( item . type === 'category' && item . items ) {
107+ await this . processItems ( item . items , pages ) ;
108+ } else if ( item . type === 'link' ) {
109+ // Skip link items as they point to external resources
110+ continue ;
111+ } else if ( item . items ) {
112+ // Handle groups or other structures
113+ await this . processItems ( item . items , pages ) ;
114+ }
115+ }
116+ }
117+
118+ async processDocPage ( docId , pages ) {
119+ const possiblePaths = [
120+ path . join ( this . docsDir , `${ docId } .md` ) ,
121+ path . join ( this . docsDir , `${ docId } .mdx` ) ,
122+ path . join ( this . docsDir , docId , 'index.md' ) ,
123+ path . join ( this . docsDir , docId , 'index.mdx' )
124+ ] ;
125+
126+ for ( const filePath of possiblePaths ) {
127+ if ( await fs . pathExists ( filePath ) ) {
128+ try {
129+ const fileContent = await fs . readFile ( filePath , 'utf-8' ) ;
130+ const { data : frontmatter , content } = matter ( fileContent ) ;
131+
132+ const pageUrl = this . generatePageUrl ( docId ) ;
133+
134+ pages . push ( {
135+ id : docId ,
136+ title : frontmatter . title || this . getTitleFromContent ( content ) || docId ,
137+ description : frontmatter . description || this . extractDescription ( content ) ,
138+ url : pageUrl ,
139+ content : this . cleanContent ( content ) ,
140+ frontmatter
141+ } ) ;
142+
143+ break ; // Stop once a file is found
144+ } catch ( error ) {
145+ console . warn ( `⚠️ Error while handling file ${ filePath } :` , error . message ) ;
146+ }
147+ }
148+ }
149+ }
150+
151+ generatePageUrl ( docId ) {
152+ // Delete common prefixes and generate proper URL
153+ const cleanId = docId . replace ( / ^ ( d o c s \/ | p a g e s \/ ) / , '' ) ;
154+ return `${ this . baseUrl } /${ cleanId } ` ;
155+ }
156+
157+ getTitleFromContent ( content ) {
158+ // Extract first h1 from content
159+ const match = content . match ( / ^ # \s + ( .+ ) $ / m) ;
160+ return match ? match [ 1 ] . trim ( ) : null ;
161+ }
162+
163+ extractDescription ( content ) {
164+ // Extract first non-empty paragraph
165+ const lines = content . split ( '\n' ) ;
166+ for ( const line of lines ) {
167+ const trimmed = line . trim ( ) ;
168+ if ( trimmed && ! trimmed . startsWith ( '#' ) && ! trimmed . startsWith ( '---' ) ) {
169+ return trimmed . substring ( 0 , 150 ) + ( trimmed . length > 150 ? '...' : '' ) ;
170+ }
171+ }
172+ return '' ;
173+ }
174+
175+ cleanContent ( content ) {
176+ return content
177+ // Delete frontmatter metadata
178+ . replace ( / ^ - - - [ \s \S ] * ?- - - \n / , '' )
179+ // Delete React/MDX components
180+ . replace ( / < [ A - Z ] [ a - z A - Z ] * [ ^ > ] * > [ \s \S ] * ?< \/ [ A - Z ] [ a - z A - Z ] * > / g, '' )
181+ . replace ( / < [ A - Z ] [ a - z A - Z ] * [ ^ > ] * \/ > / g, '' )
182+ // Delete imports
183+ . replace ( / ^ i m p o r t \s + .* $ / gm, '' )
184+ // Delete exports
185+ . replace ( / ^ e x p o r t \s + .* $ / gm, '' )
186+ // Clean up empty lines
187+ . replace ( / \n \s * \n \s * \n / g, '\n\n' )
188+ . trim ( ) ;
189+ }
190+
191+ generateLlmsTxt ( pages ) {
192+ const lines = [ `# ${ this . siteName } ` , '' ] ;
193+
194+ pages . forEach ( page => {
195+ const description = page . description || 'No description available' ;
196+ lines . push ( `- [${ page . title } ](${ page . url } ): ${ description } ` ) ;
197+ } ) ;
198+
199+ return lines . join ( '\n' ) ;
200+ }
201+
202+ generateLlmsFullTxt ( pages ) {
203+ const sections = [ ] ;
204+
205+ pages . forEach ( page => {
206+ sections . push ( `# ${ page . title } ` ) ;
207+ sections . push ( `Source: ${ page . url } ` ) ;
208+ sections . push ( '' ) ;
209+ sections . push ( page . content ) ;
210+ sections . push ( '\n\n' ) ;
211+ } ) ;
212+
213+ return sections . join ( '\n' ) ;
214+ }
215+ }
216+
217+ module . exports = DocusaurusLlmsGenerator ;
218+
219+ // If script is executed directly
220+ if ( require . main === module ) {
221+ const generator = new DocusaurusLlmsGenerator ( {
222+ docsDir : 'docs' , // Les docs sont dans docs/
223+ sidebarPath : 'sidebars.js' , // sidebars.js est ici même
224+ baseUrl : 'https://docs-v4.strapi.io' ,
225+ outputDir : 'static' ,
226+ siteName : 'Strapi v4 Documentation'
227+ } ) ;
228+
229+ generator . generate ( ) . catch ( console . error ) ;
230+ }
0 commit comments