@@ -6,6 +6,7 @@ var _s = require('underscore.string');
66var htmlparser = require ( "htmlparser" ) ;
77
88var helper = require ( './helper' ) ;
9+ var format = require ( './formatter' ) ;
910
1011function htmlToText ( html , options ) {
1112 options = options || { } ;
@@ -42,151 +43,6 @@ function filterBody(dom) {
4243 return result || dom ;
4344}
4445
45- function zip ( array ) {
46- return _ . zip . apply ( _ , array ) ;
47- }
48-
49- function wordwrap ( text , max ) {
50- var result = '' ;
51- var words = _s . words ( text ) ;
52- var length = 0 ;
53- var buffer = [ ] ;
54- _ . each ( words , function ( word ) {
55- if ( length + word . length <= max ) {
56- buffer . push ( word ) ;
57- // Add word length + one whitespace
58- length += word . length + 1 ;
59- } else {
60- result += buffer . join ( ' ' ) + '\n' ;
61- buffer = [ word ] ;
62- length = word . length ;
63- }
64- } ) ;
65- result += buffer . join ( ' ' ) ;
66- return _s . rstrip ( result ) ;
67- }
68-
69- function formatText ( elem , options ) {
70- var text = _s . strip ( elem . raw ) ;
71- text = helper . decodeHTMLEntities ( text ) ;
72- return wordwrap ( text , options . wordwrap ) ;
73- }
74-
75- function formatBreak ( elem , fn , options ) {
76- return '\n' + fn ( elem . children , options ) ;
77- }
78-
79- function formatParagraph ( elem , fn , options ) {
80- return fn ( elem . children , options ) + '\n\n' ;
81- }
82-
83- function formatTitle ( elem , fn , options ) {
84- return fn ( elem . children , options ) . toUpperCase ( ) + '\n' ;
85- }
86-
87- function formatAnchor ( elem , fn , options ) {
88- return elem . attribs . href . replace ( / ^ m a i l t o \: / , '' ) ;
89- }
90-
91- function formatHorizontalLine ( elem , fn , options ) {
92- return _s . repeat ( '-' , options . wordwrap ) + '\n\n' ;
93- }
94-
95- function formatListEntry ( prefix , elem , fn , options ) {
96- options = _ . clone ( options ) ;
97- // Reduce the wordwrap for sub elements.
98- options . wordwrap -= prefix . length ;
99- // Process sub elements.
100- var text = fn ( elem . children , options ) ;
101- // Replace all line breaks with line break + prefix spacing.
102- text = text . replace ( / \n / g, '\n' + _s . repeat ( ' ' , prefix . length ) ) ;
103- // Add first prefix and line break at the end.
104- return prefix + text + '\n' ;
105- }
106-
107- function formatList ( elem , fn , options ) {
108- var result = '' ;
109- if ( elem . name === 'ul' ) {
110- _ . each ( elem . children , function ( elem ) {
111- result += formatListEntry ( ' * ' , elem , fn , options ) ;
112- } ) ;
113- } else if ( elem . name === 'ol' ) {
114- // Calculate the maximum length to i.
115- var maxLength = elem . children . length . toString ( ) . length ;
116- _ . each ( elem . children , function ( elem , i ) {
117- var index = i + 1 ;
118- // Calculate the needed spacing for nice indentation.
119- var spacing = maxLength - index . toString ( ) . length ;
120- var prefix = ' ' + index + '. ' + _s . repeat ( ' ' , spacing ) ;
121- result += formatListEntry ( prefix , elem , fn , options ) ;
122- } ) ;
123- }
124- return result + '\n' ;
125- }
126-
127- function tableToString ( table ) {
128- // Determine space width per column
129- // Convert all rows to lengths
130- var widths = _ . map ( table , function ( row ) {
131- return _ . map ( row , function ( col ) {
132- return col . length ;
133- } ) ;
134- } ) ;
135- // Invert rows with colums
136- widths = zip ( widths ) ;
137- // Determine the max values for each column
138- widths = _ . map ( widths , function ( col ) {
139- return _ . max ( col ) ;
140- } ) ;
141-
142- // Build the table
143- var text = '' ;
144- _ . each ( table , function ( row ) {
145- var i = 0 ;
146- _ . each ( row , function ( col ) {
147- text += _s . rpad ( _s . strip ( col ) , widths [ i ++ ] , ' ' ) + ' ' ;
148- } ) ;
149- text += '\n' ;
150- } ) ;
151- return text + '\n' ;
152- }
153-
154- function formatTable ( elem , fn , options ) {
155- var table = [ ] ;
156- _ . each ( elem . children , function ( elem ) {
157- if ( elem . type === 'tag' && elem . name === 'tr' ) {
158- var rows = [ ] ;
159- _ . each ( elem . children , function ( elem ) {
160- var tokens , times ;
161- if ( elem . type === 'tag' ) {
162- if ( elem . name === 'th' ) {
163- tokens = formatTitle ( elem , fn , options ) . split ( '\n' ) ;
164- rows . push ( _ . compact ( tokens ) ) ;
165- } else if ( elem . name === 'td' ) {
166- tokens = fn ( elem . children , options ) . split ( '\n' ) ;
167- rows . push ( _ . compact ( tokens ) ) ;
168- // Fill colspans with empty values
169- if ( elem . attribs && elem . attribs . colspan ) {
170- times = elem . attribs . colspan - 1 ;
171- _ . times ( times , function ( ) {
172- rows . push ( [ '' ] ) ;
173- } ) ;
174- }
175- }
176- }
177- } ) ;
178- rows = zip ( rows ) ;
179- _ . each ( rows , function ( row ) {
180- row = _ . map ( row , function ( col ) {
181- return col || '' ;
182- } ) ;
183- table . push ( row ) ;
184- } ) ;
185- }
186- } ) ;
187- return tableToString ( table ) ;
188- }
189-
19046function containsTable ( attr , tables ) {
19147 if ( tables === true ) return true ;
19248
@@ -216,38 +72,40 @@ function walk(dom, options) {
21672 case 'tag' :
21773 switch ( elem . name ) {
21874 case 'a' :
219- result += formatAnchor ( elem , walk , options ) ;
75+ result += format . anchor ( elem , walk , options ) ;
22076 break ;
22177 case 'p' :
222- result += formatParagraph ( elem , walk , options ) ;
78+ result += format . paragraph ( elem , walk , options ) ;
22379 break ;
22480 case 'h1' :
22581 case 'h2' :
22682 case 'h3' :
22783 case 'h4' :
228- result += formatTitle ( elem , walk , options ) ;
84+ result += format . heading ( elem , walk , options ) ;
22985 break ;
23086 case 'br' :
231- result += formatBreak ( elem , walk , options ) ;
87+ result += format . lineBreak ( elem , walk , options ) ;
23288 break ;
23389 case 'hr' :
234- result += formatHorizontalLine ( elem , walk , options ) ;
90+ result += format . horizontalLine ( elem , walk , options ) ;
23591 break ;
23692 case 'ul' :
93+ result += format . unorderedList ( elem , walk , options ) ;
94+ break ;
23795 case 'ol' :
238- result += formatList ( elem , walk , options ) ;
96+ result += format . orderedList ( elem , walk , options ) ;
23997 break ;
24098 case 'table' :
24199 if ( containsTable ( elem . attribs , options . tables ) ) {
242- result += formatTable ( elem , walk , options ) ;
100+ result += format . table ( elem , walk , options ) ;
243101 break ;
244102 }
245103 default :
246104 result += walk ( elem . children || [ ] , options ) ;
247105 }
248106 break ;
249107 case 'text' :
250- if ( elem . raw !== '\r\n' ) result += formatText ( elem , options ) ;
108+ if ( elem . raw !== '\r\n' ) result += format . text ( elem , options ) ;
251109 break ;
252110 default :
253111 result += walk ( elem . children || [ ] , options ) ;
0 commit comments