1+ module Docs
2+ class Threejs
3+ class CleanHtmlFilter < Filter
4+ PATTERNS = {
5+ method_this : /\[ method:this\s +([^\] ]+)\] \s *\( (.*?)\) / ,
6+ method_return : /\[ method:([^\s \] ]+)\s +([^\] ]+)\] \s *\( (.*?)\) / ,
7+ method_no_params : /\[ method:([^\s \] ]+)\s +([^\] ]+)\] (?!\( )/ ,
8+ property : /\[ property:([^\] ]+?)\s +([^\] ]+?)\] / ,
9+ example_link : /\[ example:([^\s \] ]+)\s +([^\] ]+)\] / ,
10+ external_link_text : /\[ link:([^\s \] ]+)\s +([^\] ]+)\] / ,
11+ external_link : /\[ link:([^\] ]+)\] / ,
12+ page_link_text : /\[ page:([^\] ]+?)\s +([^\] ]+?)\] / ,
13+ page_link : /\[ page:([^\] ]+?)\] / ,
14+ inline_code : /`([^`]+)`/ ,
15+ name_placeholder : /\[ name\] / ,
16+ constructor_param : /\[ param:([^\] ]+?)\s +([^\] ]+?)\] /
17+ } . freeze
18+
19+ def call
20+ remove_unnecessary_elements
21+ wrap_code_blocks
22+ process_sections
23+ format_links
24+ add_section_structure
25+ format_notes
26+ add_heading_attributes
27+ doc
28+ end
29+
30+ private
31+
32+ def remove_unnecessary_elements
33+ css ( 'head, script, style' ) . remove
34+ end
35+
36+ def wrap_code_blocks
37+ css ( 'code' ) . each do |node |
38+ next if node . parent . name == 'pre'
39+ node . wrap ( '<pre>' )
40+ node . parent [ 'data-language' ] = 'javascript'
41+ end
42+ end
43+
44+ def process_sections
45+ # Handle source links
46+ css ( 'h2' ) . each do |node |
47+ next unless node . content . strip == 'Source'
48+ node . next_element . remove
49+ node . remove
50+ end
51+
52+ # Handle method signatures and properties
53+ css ( 'h3' ) . each do |node |
54+ content = node . inner_html
55+ content = handle_method_signatures ( content )
56+ content = handle_properties ( content )
57+ node . inner_html = content
58+ end
59+
60+ # Handle name placeholders and constructor params
61+ css ( 'h1, h3' ) . each do |node |
62+ content = node . inner_html
63+ content = handle_name_placeholders ( content )
64+ content = format_constructor_params ( content )
65+ node . inner_html = content
66+ end
67+ end
68+
69+ def handle_method_signatures ( content )
70+ content
71+ . gsub ( PATTERNS [ :method_this ] ) { format_method_signature ( 'this' , $1, $2) }
72+ . gsub ( PATTERNS [ :method_return ] ) do |match |
73+ next if $2. start_with? ( 'this' )
74+ format_method_signature ( $1, $2, $3, true )
75+ end
76+ . gsub ( PATTERNS [ :method_no_params ] ) { format_method_signature ( $1, $2, nil , true ) }
77+ end
78+
79+ def format_method_signature ( type_or_this , name , params_str , with_return = false )
80+ params = if params_str
81+ params_str . split ( ',' ) . map { |param | format_parameter ( param . strip ) } . join ( "<span class='sig-paren'>, </span>" )
82+ end
83+
84+ html = "<dt class='sig sig-object js' id='#{ name } '>"
85+ if type_or_this == 'this'
86+ html << "<span class='property'><span class='pre'>this</span></span>."
87+ end
88+ html << "<span class='sig-name descname'>#{ name } </span>" \
89+ "<span class='sig-paren'>(</span>" \
90+ "#{ params } " \
91+ "<span class='sig-paren'>)</span>"
92+ if with_return
93+ html << "<span class='sig-returns'><span class='sig-colon'>:</span> " \
94+ "<span class='sig-type'>#{ type_or_this } </span></span>"
95+ end
96+ html << "</dt>"
97+ end
98+
99+ def format_parameter ( param )
100+ if param . include? ( ' ' )
101+ type , name = param . split ( ' ' , 2 ) . map ( &:strip )
102+ "<span class='sig-param'><span class='sig-type'>#{ type } </span> <span class='sig-name'>#{ name } </span></span>"
103+ else
104+ "<span class='sig-param'>#{ param } </span>"
105+ end
106+ end
107+
108+ def handle_properties ( content )
109+ content . gsub ( PATTERNS [ :property ] ) do |match |
110+ type , name = $1, $2
111+ "<dt class='sig sig-object js'>" \
112+ "<span class='sig-name descname'>#{ name } </span>" \
113+ "<span class='sig-colon'>:</span> " \
114+ "<span class='sig-type'>#{ type } </span></dt>"
115+ end
116+ end
117+
118+ def handle_name_placeholders ( content )
119+ content . gsub ( PATTERNS [ :name_placeholder ] ) do
120+ name = slug . split ( '/' ) . last . gsub ( '.html' , '' )
121+ "<span class='descname'>#{ name } </span>"
122+ end
123+ end
124+
125+ def format_constructor_params ( content )
126+ content . gsub ( PATTERNS [ :constructor_param ] ) do |match |
127+ type , name = $1, $2
128+ "<span class='sig-param'><span class='sig-type'>#{ type } </span> <code class='sig-name'>#{ name } </code></span>"
129+ end
130+ end
131+
132+ def format_links
133+ css ( '*' ) . each do |node |
134+ next if node . text?
135+
136+ content = node . inner_html
137+ . gsub ( PATTERNS [ :example_link ] ) { create_external_link ( "https://threejs.org/examples/##{ $1} " , $2) }
138+ . gsub ( PATTERNS [ :external_link_text ] ) { create_external_link ( $1, $2) }
139+ . gsub ( PATTERNS [ :external_link ] ) { create_external_link ( $1, $1) }
140+ . gsub ( PATTERNS [ :page_link_text ] ) { create_internal_link ( $1, $2) }
141+ . gsub ( PATTERNS [ :page_link ] ) { create_internal_link ( $1, $1) }
142+
143+ node . inner_html = content
144+ end
145+
146+ normalize_href_attributes
147+ end
148+
149+ def create_external_link ( url , text )
150+ %Q(<a class='reference external' href='#{ url } '>#{ text } </a>)
151+ end
152+
153+ def create_internal_link ( path , text )
154+ %Q(<a class='reference internal' href='#{ path . downcase } '><code class='xref js js-#{ path . downcase } '>#{ text } </code></a>)
155+ end
156+
157+ def normalize_href_attributes
158+ css ( 'a[href]' ) . each do |link |
159+ next if link [ 'href' ] . start_with? ( 'http' )
160+ link [ 'href' ] = link [ 'href' ] . remove ( '../' ) . downcase . sub ( /\. html$/ , '' )
161+ link [ 'class' ] = 'reference internal'
162+ end
163+ end
164+
165+ def add_section_structure
166+ css ( 'h2' ) . each do |node |
167+ node [ 'class' ] = 'section-title'
168+ section = node . next_element
169+ next unless section
170+
171+ wrapper = doc . document . create_element ( 'div' )
172+ wrapper [ 'class' ] = 'section'
173+ node . after ( wrapper )
174+ wrapper . add_child ( node )
175+
176+ current = section
177+ while current && current . name != 'h2'
178+ next_el = current . next
179+ wrapper . add_child ( current )
180+ current = next_el
181+ end
182+ end
183+
184+ css ( 'p.desc' ) . each { |node | node [ 'class' ] = 'section-desc' }
185+ end
186+
187+ def format_notes
188+ css ( 'p' ) . each do |node |
189+ next unless node . content . start_with? ( 'Note:' )
190+
191+ wrapper = doc . document . create_element ( 'div' )
192+ wrapper [ 'class' ] = 'admonition note'
193+
194+ title = doc . document . create_element ( 'p' )
195+ title [ 'class' ] = 'first admonition-title'
196+ title . content = 'Note'
197+
198+ content = doc . document . create_element ( 'p' )
199+ content [ 'class' ] = 'last'
200+ content . inner_html = node . inner_html . sub ( 'Note:' , '' ) . strip
201+
202+ wrapper . add_child ( title )
203+ wrapper . add_child ( content )
204+ node . replace ( wrapper )
205+ end
206+ end
207+
208+ def add_heading_attributes
209+ css ( 'h1, h2, h3, h4' ) . each do |node |
210+ node [ 'id' ] ||= node . content . strip . downcase . gsub ( /[^\w ]+/ , '-' )
211+ existing_class = node [ 'class' ] . to_s
212+ node [ 'class' ] = "#{ existing_class } section-header"
213+ end
214+
215+ format_inline_code
216+ end
217+
218+ def format_inline_code
219+ selectors = [ 'p' , 'li' , 'dt' , 'dd' , '.property-type' ] . join ( ', ' )
220+ css ( selectors ) . each do |node |
221+ next if node . at_css ( 'pre' )
222+ node . inner_html = node . inner_html . gsub ( PATTERNS [ :inline_code ] ) do |match |
223+ "<code class='docutils literal notranslate'><span class='pre'>#{ $1} </span></code>"
224+ end
225+ end
226+ end
227+ end
228+ end
229+ end
0 commit comments