diff --git a/README.md b/README.md index 7adf55d..12f960c 100644 --- a/README.md +++ b/README.md @@ -60,6 +60,7 @@ You can configure the behaviour of html-to-text with the following options: * `hideLinkHrefIfSameAsText` by default links are translated the following `text` => becomes => `text [link]`. If this option is set to true and `link` and `text` are the same, `[link]` will be hidden and only `text` visible. * `ignoreHref` ignore all document links if `true`. * `ignoreImage` ignore all document images if `true`. + * `ignoreVideo` ignore all document videos if `true`. * `preserveNewlines` by default, any newlines `\n` in a block of text will be removed. If `true`, these newlines will not be removed. * `decodeOptions` defines the text decoding options given to `he.decode`. For more informations see the [he](https://github.com/mathiasbynens/he) module. * `uppercaseHeadings` by default, headings (`

`, `

`, etc) are uppercased. Set to `false` to leave headings as they are. diff --git a/bin/cli.js b/bin/cli.js index 0372178..231fc36 100755 --- a/bin/cli.js +++ b/bin/cli.js @@ -8,6 +8,7 @@ var argv = optimist .default('wordwrap', 80) .default('ignore-href', false) .default('ignore-image', false) + .default('ignore-video', false) .argv; var text = ''; @@ -25,7 +26,8 @@ process.stdin.on('end', function end() { tables: interpretTables(argv.tables), wordwrap: argv.wordwrap, ignoreHref: argv['ignore-href'], - ignoreImage: argv['ignore-image'] + ignoreImage: argv['ignore-image'], + ignoreVideo: argv['ignore-video'] }); process.stdout.write(text + '\n', 'utf-8'); }); diff --git a/example/test.html b/example/test.html index 4e8706e..ab14ea9 100644 --- a/example/test.html +++ b/example/test.html @@ -142,5 +142,19 @@

Pretty printed Source Code

console.log(text); }); + +
+

Img tag

+ + +
+

Video tag (src attribute)

+ + +
+

Video tag (source tag)

+ - \ No newline at end of file + diff --git a/lib/formatter.js b/lib/formatter.js index 126d32e..57edced 100644 --- a/lib/formatter.js +++ b/lib/formatter.js @@ -33,6 +33,27 @@ function formatImage(elem, options) { return (result); } +function formatVideo(elem, options) { + if (options.ignoreVideo) { + return ''; + } + + var result = '', attribs = elem.attribs || {}; + + if (attribs.src) { + result += '[' + attribs.src + ']'; + } else if (elem.children.length > 0) { + _.each(elem.children, function(el) { + var elAttribs = el.attribs || {}; + if (el.type === 'tag' && el.name.toLowerCase() === 'source') { + result += '[' + el.attribs.src + ']'; + } + }); + } + + return (result); +} + function formatLineBreak(elem, fn, options) { return '\n' + fn(elem.children, options); } @@ -219,6 +240,7 @@ function formatTable(elem, fn, options) { exports.text = formatText; exports.image = formatImage; +exports.video = formatVideo; exports.lineBreak = formatLineBreak; exports.paragraph = formatParagraph; exports.anchor = formatAnchor; diff --git a/lib/html-to-text.js b/lib/html-to-text.js index 08e7ae0..9f20d3f 100644 --- a/lib/html-to-text.js +++ b/lib/html-to-text.js @@ -112,6 +112,9 @@ function walk(dom, options, result) { case 'img': result += format.image(elem, options); break; + case 'video': + result += format.video(elem, options); + break; case 'a': // Inline element needs its leading space to be trimmed if `result` // currently ends with whitespace