From 7381a2bfa5067855a8bc027cb6b2611c2bb94c73 Mon Sep 17 00:00:00 2001
From: Keanu Lee
Date: Mon, 7 Apr 2014 16:13:21 -0700
Subject: [PATCH 1/7] Added support for atomic tags; ignore tag attributes;
interpret whitespace.
---
.gitignore | 1 -
README.md | 2 +-
js/htmldiff.js | 482 +++++++++++++++++++++++++++++
package.json | 4 +-
src/htmldiff.coffee | 125 +++++++-
test/diff.spec.coffee | 14 +-
test/html_to_tokens.spec.coffee | 37 +++
test/render_operations.spec.coffee | 27 ++
8 files changed, 672 insertions(+), 20 deletions(-)
create mode 100644 js/htmldiff.js
diff --git a/.gitignore b/.gitignore
index 06f62bf..3c3629e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1 @@
-*.js
node_modules
diff --git a/README.md b/README.md
index 7da4ee1..fce14f0 100644
--- a/README.md
+++ b/README.md
@@ -1,7 +1,7 @@
# htmldiff.js
### HTML Diffing in JavaScript (ok, CoffeeScript actually.)
-[](http://travis-ci.org/tnwinc/htmldiff.js)
+[](https://travis-ci.org/keanulee/htmldiff.js)
`htmldiff.js` is a CoffeeScript port of https://github.com/myobie/htmldiff
(This one has a few more tests.)
diff --git a/js/htmldiff.js b/js/htmldiff.js
new file mode 100644
index 0000000..e4c56f0
--- /dev/null
+++ b/js/htmldiff.js
@@ -0,0 +1,482 @@
+// Generated by CoffeeScript 1.7.1
+(function() {
+ var Match, calculate_operations, consecutive_where, create_index, diff, find_match, find_matching_blocks, get_key_for_token, html_to_tokens, is_end_of_atomic_tag, is_end_of_tag, is_start_of_atomic_tag, is_start_of_tag, is_tag, is_whitespace, isnt_tag, op_map, recursively_find_matching_blocks, render_operations, wrap;
+
+ is_end_of_tag = function(char) {
+ return char === '>';
+ };
+
+ is_start_of_tag = function(char) {
+ return char === '<';
+ };
+
+ is_whitespace = function(char) {
+ return /^\s+$/.test(char);
+ };
+
+ is_tag = function(token) {
+ return /^\s*<[^>]+>\s*$/.test(token);
+ };
+
+ isnt_tag = function(token) {
+ return !is_tag(token);
+ };
+
+
+ /*
+ * Checks if the current word is the beginning of an atomic tag. An atomic tag is one whose
+ * child nodes should not be compared - the entire tag should be treated as one token.
+ *
+ * @param {string} word The characters of the current token read so far.
+ *
+ * @return {string|null} The name of the atomic tag if the word will be an atomic tag,
+ * null otherwise
+ */
+
+ is_start_of_atomic_tag = function(word) {
+ var result;
+ result = /^<(iframe|object|math|svg)/.exec(word);
+ if (result) {
+ result = result[1];
+ }
+ return result;
+ };
+
+
+ /*
+ * Checks if the current word is the end of an atomic tag (i.e. it has all the characters,
+ * except for the end bracket of the closing tag, such as "} The list of tokens.
+ */
+
+ html_to_tokens = function(html) {
+ var atomic_tag, char, current_atomic_tag, current_word, mode, words, _i, _len;
+ mode = 'char';
+ current_word = '';
+ current_atomic_tag = '';
+ words = [];
+ for (_i = 0, _len = html.length; _i < _len; _i++) {
+ char = html[_i];
+ switch (mode) {
+ case 'tag':
+ atomic_tag = is_start_of_atomic_tag(current_word);
+ if (atomic_tag) {
+ mode = 'atomic_tag';
+ current_atomic_tag = atomic_tag;
+ current_word += char;
+ } else if (is_end_of_tag(char)) {
+ current_word += '>';
+ words.push(current_word);
+ current_word = '';
+ if (is_whitespace(char)) {
+ mode = 'whitespace';
+ } else {
+ mode = 'char';
+ }
+ } else {
+ current_word += char;
+ }
+ break;
+ case 'atomic_tag':
+ if ((is_end_of_tag(char)) && (is_end_of_atomic_tag(current_word, current_atomic_tag))) {
+ current_word += '>';
+ words.push(current_word);
+ current_word = '';
+ current_atomic_tag = '';
+ mode = 'char';
+ } else {
+ current_word += char;
+ }
+ break;
+ case 'char':
+ if (is_start_of_tag(char)) {
+ if (current_word) {
+ words.push(current_word);
+ }
+ current_word = '<';
+ mode = 'tag';
+ } else if (/\s/.test(char)) {
+ if (current_word) {
+ words.push(current_word);
+ }
+ current_word = char;
+ mode = 'whitespace';
+ } else if (/[\w\d\#@]/.test(char)) {
+ current_word += char;
+ } else if (/&/.test(char)) {
+ if (current_word) {
+ words.push(current_word);
+ }
+ current_word = char;
+ } else {
+ current_word += char;
+ words.push(current_word);
+ current_word = '';
+ }
+ break;
+ case 'whitespace':
+ if (is_start_of_tag(char)) {
+ if (current_word) {
+ words.push(current_word);
+ }
+ current_word = '<';
+ mode = 'tag';
+ } else if (is_whitespace(char)) {
+ current_word += char;
+ } else {
+ if (current_word) {
+ words.push(current_word);
+ }
+ current_word = char;
+ mode = 'char';
+ }
+ break;
+ default:
+ throw new Error("Unknown mode " + mode);
+ }
+ }
+ if (current_word) {
+ words.push(current_word);
+ }
+ return words;
+ };
+
+
+ /*
+ * Creates a key that should be used to match tokens. This is useful, for example, if we want
+ * to consider two open tag tokens as equal, even if they don't have the same attributes. We
+ * use a key instead of overwriting the token because we may want to render original string
+ * without losing the attributes.
+ *
+ * @param {string} token The token to create the key for.
+ *
+ * @return {string} The identifying key that should be used to match before and after tokens.
+ */
+
+ get_key_for_token = function(token) {
+ var tag_name;
+ tag_name = /<([^\s>]+)[\s>]/.exec(token);
+ if (tag_name) {
+ return "<" + (tag_name[1].toLowerCase()) + ">";
+ }
+ if (token) {
+ return token.replace(/(\s+| | )/g, ' ');
+ }
+ return token;
+ };
+
+ find_match = function(before_tokens, after_tokens, index_of_before_locations_in_after_tokens, start_in_before, end_in_before, start_in_after, end_in_after) {
+ var best_match_in_after, best_match_in_before, best_match_length, index_in_after, index_in_before, locations_in_after, looking_for, match, match_length_at, new_match_length, new_match_length_at, _i, _j, _len;
+ best_match_in_before = start_in_before;
+ best_match_in_after = start_in_after;
+ best_match_length = 0;
+ match_length_at = {};
+ for (index_in_before = _i = start_in_before; start_in_before <= end_in_before ? _i < end_in_before : _i > end_in_before; index_in_before = start_in_before <= end_in_before ? ++_i : --_i) {
+ new_match_length_at = {};
+ looking_for = get_key_for_token(before_tokens[index_in_before]);
+ locations_in_after = index_of_before_locations_in_after_tokens[looking_for];
+ for (_j = 0, _len = locations_in_after.length; _j < _len; _j++) {
+ index_in_after = locations_in_after[_j];
+ if (index_in_after < start_in_after) {
+ continue;
+ }
+ if (index_in_after >= end_in_after) {
+ break;
+ }
+ if (match_length_at[index_in_after - 1] == null) {
+ match_length_at[index_in_after - 1] = 0;
+ }
+ new_match_length = match_length_at[index_in_after - 1] + 1;
+ new_match_length_at[index_in_after] = new_match_length;
+ if (new_match_length > best_match_length) {
+ best_match_in_before = index_in_before - new_match_length + 1;
+ best_match_in_after = index_in_after - new_match_length + 1;
+ best_match_length = new_match_length;
+ }
+ }
+ match_length_at = new_match_length_at;
+ }
+ if (best_match_length !== 0) {
+ match = new Match(best_match_in_before, best_match_in_after, best_match_length);
+ }
+ return match;
+ };
+
+ recursively_find_matching_blocks = function(before_tokens, after_tokens, index_of_before_locations_in_after_tokens, start_in_before, end_in_before, start_in_after, end_in_after, matching_blocks) {
+ var match;
+ match = find_match(before_tokens, after_tokens, index_of_before_locations_in_after_tokens, start_in_before, end_in_before, start_in_after, end_in_after);
+ if (match != null) {
+ if (start_in_before < match.start_in_before && start_in_after < match.start_in_after) {
+ recursively_find_matching_blocks(before_tokens, after_tokens, index_of_before_locations_in_after_tokens, start_in_before, match.start_in_before, start_in_after, match.start_in_after, matching_blocks);
+ }
+ matching_blocks.push(match);
+ if (match.end_in_before <= end_in_before && match.end_in_after <= end_in_after) {
+ recursively_find_matching_blocks(before_tokens, after_tokens, index_of_before_locations_in_after_tokens, match.end_in_before + 1, end_in_before, match.end_in_after + 1, end_in_after, matching_blocks);
+ }
+ }
+ return matching_blocks;
+ };
+
+
+ /*
+ * Creates an index (A.K.A. hash table) that will be used to match the list of before
+ * tokens with the list of after tokens.
+ *
+ * @param {Object} options An object with the following:
+ * - {Array.} find_these The list of tokens that will be used to search.
+ * - {Array.} in_these The list of tokens that will be returned.
+ *
+ * @return {Object} An index that can be used to search for tokens.
+ */
+
+ create_index = function(options) {
+ var idx, index, queries, query, results, _i, _len;
+ if (options.find_these == null) {
+ throw new Error('params must have find_these key');
+ }
+ if (options.in_these == null) {
+ throw new Error('params must have in_these key');
+ }
+ queries = options.find_these.map(function(token) {
+ return get_key_for_token(token);
+ });
+ results = options.in_these.map(function(token) {
+ return get_key_for_token(token);
+ });
+ index = {};
+ for (_i = 0, _len = queries.length; _i < _len; _i++) {
+ query = queries[_i];
+ index[query] = [];
+ idx = results.indexOf(query);
+ while (idx !== -1) {
+ index[query].push(idx);
+ idx = results.indexOf(query, idx + 1);
+ }
+ }
+ return index;
+ };
+
+ find_matching_blocks = function(before_tokens, after_tokens) {
+ var index_of_before_locations_in_after_tokens, matching_blocks;
+ matching_blocks = [];
+ index_of_before_locations_in_after_tokens = create_index({
+ find_these: before_tokens,
+ in_these: after_tokens
+ });
+ return recursively_find_matching_blocks(before_tokens, after_tokens, index_of_before_locations_in_after_tokens, 0, before_tokens.length, 0, after_tokens.length, matching_blocks);
+ };
+
+ calculate_operations = function(before_tokens, after_tokens) {
+ var action_map, action_up_to_match_positions, index, is_single_whitespace, last_op, match, match_starts_at_current_position_in_after, match_starts_at_current_position_in_before, matches, op, operations, position_in_after, position_in_before, post_processed, _i, _j, _len, _len1;
+ if (before_tokens == null) {
+ throw new Error('before_tokens?');
+ }
+ if (after_tokens == null) {
+ throw new Error('after_tokens?');
+ }
+ position_in_before = position_in_after = 0;
+ operations = [];
+ action_map = {
+ 'false,false': 'replace',
+ 'true,false': 'insert',
+ 'false,true': 'delete',
+ 'true,true': 'none'
+ };
+ matches = find_matching_blocks(before_tokens, after_tokens);
+ matches.push(new Match(before_tokens.length, after_tokens.length, 0));
+ for (index = _i = 0, _len = matches.length; _i < _len; index = ++_i) {
+ match = matches[index];
+ match_starts_at_current_position_in_before = position_in_before === match.start_in_before;
+ match_starts_at_current_position_in_after = position_in_after === match.start_in_after;
+ action_up_to_match_positions = action_map[[match_starts_at_current_position_in_before, match_starts_at_current_position_in_after].toString()];
+ if (action_up_to_match_positions !== 'none') {
+ operations.push({
+ action: action_up_to_match_positions,
+ start_in_before: position_in_before,
+ end_in_before: (action_up_to_match_positions !== 'insert' ? match.start_in_before - 1 : void 0),
+ start_in_after: position_in_after,
+ end_in_after: (action_up_to_match_positions !== 'delete' ? match.start_in_after - 1 : void 0)
+ });
+ }
+ if (match.length !== 0) {
+ operations.push({
+ action: 'equal',
+ start_in_before: match.start_in_before,
+ end_in_before: match.end_in_before,
+ start_in_after: match.start_in_after,
+ end_in_after: match.end_in_after
+ });
+ }
+ position_in_before = match.end_in_before + 1;
+ position_in_after = match.end_in_after + 1;
+ }
+ post_processed = [];
+ last_op = {
+ action: 'none'
+ };
+ is_single_whitespace = function(op) {
+ if (op.action !== 'equal') {
+ return false;
+ }
+ if (op.end_in_before - op.start_in_before !== 0) {
+ return false;
+ }
+ return /^\s$/.test(before_tokens.slice(op.start_in_before, +op.end_in_before + 1 || 9e9));
+ };
+ for (_j = 0, _len1 = operations.length; _j < _len1; _j++) {
+ op = operations[_j];
+ if (((is_single_whitespace(op)) && last_op.action === 'replace') || (op.action === 'replace' && last_op.action === 'replace')) {
+ last_op.end_in_before = op.end_in_before;
+ last_op.end_in_after = op.end_in_after;
+ } else {
+ post_processed.push(op);
+ last_op = op;
+ }
+ }
+ return post_processed;
+ };
+
+ consecutive_where = function(start, content, predicate) {
+ var answer, index, last_matching_index, token, _i, _len;
+ content = content.slice(start, +content.length + 1 || 9e9);
+ last_matching_index = void 0;
+ for (index = _i = 0, _len = content.length; _i < _len; index = ++_i) {
+ token = content[index];
+ answer = predicate(token);
+ if (answer === true) {
+ last_matching_index = index;
+ }
+ if (answer === false) {
+ break;
+ }
+ }
+ if (last_matching_index != null) {
+ return content.slice(0, +last_matching_index + 1 || 9e9);
+ }
+ return [];
+ };
+
+ wrap = function(tag, content) {
+ var length, non_tags, position, rendering, tags, val;
+ rendering = '';
+ position = 0;
+ length = content.length;
+ while (true) {
+ if (position >= length) {
+ break;
+ }
+ non_tags = consecutive_where(position, content, isnt_tag);
+ position += non_tags.length;
+ if (non_tags.length !== 0) {
+ val = non_tags.join('');
+ if (val.trim()) {
+ rendering += "<" + tag + ">" + val + "" + tag + ">";
+ }
+ }
+ if (position >= length) {
+ break;
+ }
+ tags = consecutive_where(position, content, is_tag);
+ position += tags.length;
+ rendering += tags.join('');
+ }
+ return rendering;
+ };
+
+ op_map = {
+ equal: function(op, before_tokens, after_tokens) {
+ return after_tokens.slice(op.start_in_after, +op.end_in_after + 1 || 9e9).join('');
+ },
+ insert: function(op, before_tokens, after_tokens) {
+ var val;
+ val = after_tokens.slice(op.start_in_after, +op.end_in_after + 1 || 9e9);
+ return wrap('ins', val);
+ },
+ "delete": function(op, before_tokens, after_tokens) {
+ var val;
+ val = before_tokens.slice(op.start_in_before, +op.end_in_before + 1 || 9e9);
+ return wrap('del', val);
+ }
+ };
+
+ op_map.replace = function(op, before_tokens, after_tokens) {
+ return (op_map["delete"](op, before_tokens, after_tokens)) + (op_map.insert(op, before_tokens, after_tokens));
+ };
+
+ render_operations = function(before_tokens, after_tokens, operations) {
+ var op, rendering, _i, _len;
+ rendering = '';
+ for (_i = 0, _len = operations.length; _i < _len; _i++) {
+ op = operations[_i];
+ rendering += op_map[op.action](op, before_tokens, after_tokens);
+ }
+ return rendering;
+ };
+
+ diff = function(before, after) {
+ var ops;
+ if (before === after) {
+ return before;
+ }
+ before = html_to_tokens(before);
+ after = html_to_tokens(after);
+ ops = calculate_operations(before, after);
+ return render_operations(before, after, ops);
+ };
+
+ diff.html_to_tokens = html_to_tokens;
+
+ diff.find_matching_blocks = find_matching_blocks;
+
+ find_matching_blocks.find_match = find_match;
+
+ find_matching_blocks.create_index = create_index;
+
+ find_matching_blocks.get_key_for_token = get_key_for_token;
+
+ diff.calculate_operations = calculate_operations;
+
+ diff.render_operations = render_operations;
+
+ if (typeof define === 'function') {
+ define([], function() {
+ return diff;
+ });
+ } else if (typeof module !== "undefined" && module !== null) {
+ module.exports = diff;
+ } else {
+ this.htmldiff = diff;
+ }
+
+}).call(this);
diff --git a/package.json b/package.json
index aaf09cc..893846a 100644
--- a/package.json
+++ b/package.json
@@ -4,8 +4,8 @@
"description": "HTML Diffing in JavaScript (CoffeeScript)",
"main": "htmldiff.js",
"scripts": {
- "test": "mocha -R min",
- "install": "coffee --compile src"
+ "test": "mocha -R min --compilers coffee:coffee-script/register",
+ "install": "coffee --output js/ --compile src/"
},
"repository": {
"type": "git",
diff --git a/src/htmldiff.coffee b/src/htmldiff.coffee
index dd5c658..ec0475c 100644
--- a/src/htmldiff.coffee
+++ b/src/htmldiff.coffee
@@ -4,20 +4,60 @@ is_whitespace = (char)-> /^\s+$/.test char
is_tag = (token)-> /^\s*<[^>]+>\s*$/.test token
isnt_tag = (token)-> not is_tag token
+###
+ * Checks if the current word is the beginning of an atomic tag. An atomic tag is one whose
+ * child nodes should not be compared - the entire tag should be treated as one token.
+ *
+ * @param {string} word The characters of the current token read so far.
+ *
+ * @return {string|null} The name of the atomic tag if the word will be an atomic tag,
+ * null otherwise
+###
+is_start_of_atomic_tag = (word)->
+ result = /^<(iframe|object|math|svg)/.exec word
+ result = result[1] if result
+ return result
+
+###
+ * Checks if the current word is the end of an atomic tag (i.e. it has all the characters,
+ * except for the end bracket of the closing tag, such as "
+ (word.substring word.length - tag.length - 2) is "#{tag}"
+
class Match
constructor: (@start_in_before, @start_in_after, @length)->
@end_in_before = (@start_in_before + @length) - 1
@end_in_after = (@start_in_after + @length) - 1
+###
+ * Tokenizes a string of HTML.
+ *
+ * @param {string} html The string to tokenize.
+ *
+ * @return {Array.} The list of tokens.
+###
html_to_tokens = (html)->
mode = 'char'
current_word = ''
+ current_atomic_tag = ''
words = []
for char in html
switch mode
when 'tag'
- if is_end_of_tag char
+ atomic_tag = is_start_of_atomic_tag current_word
+ if atomic_tag
+ mode = 'atomic_tag'
+ current_atomic_tag = atomic_tag
+ current_word += char
+ else if is_end_of_tag char
current_word += '>'
words.push current_word
current_word = ''
@@ -27,6 +67,16 @@ html_to_tokens = (html)->
mode = 'char'
else
current_word += char
+ when 'atomic_tag'
+ if (is_end_of_tag char) \
+ and (is_end_of_atomic_tag current_word, current_atomic_tag)
+ current_word += '>'
+ words.push current_word
+ current_word = ''
+ current_atomic_tag = ''
+ mode = 'char'
+ else
+ current_word += char
when 'char'
if is_start_of_tag char
words.push current_word if current_word
@@ -36,11 +86,19 @@ html_to_tokens = (html)->
words.push current_word if current_word
current_word = char
mode = 'whitespace'
- else if /[\w\#@]+/i.test char
+ else if /[\w\d\#@]/.test char
+ # Consider '#' as part of the same word, since it might be part of an HTML escaped
+ # character (e.g. ' ').
current_word += char
- else
+ else if /&/.test char
+ # Consider '&' as the start of a new word, since it might be the start of an HTML
+ # escaped character (e.g. ' ').
words.push current_word if current_word
current_word = char
+ else
+ current_word += char
+ words.push current_word
+ current_word = ''
when 'whitespace'
if is_start_of_tag char
words.push current_word if current_word
@@ -57,6 +115,28 @@ html_to_tokens = (html)->
words.push current_word if current_word
return words
+###
+ * Creates a key that should be used to match tokens. This is useful, for example, if we want
+ * to consider two open tag tokens as equal, even if they don't have the same attributes. We
+ * use a key instead of overwriting the token because we may want to render original string
+ * without losing the attributes.
+ *
+ * @param {string} token The token to create the key for.
+ *
+ * @return {string} The identifying key that should be used to match before and after tokens.
+###
+get_key_for_token = (token)->
+ # If the token is a tag, return just the tag with no attributes since we do not compare
+ # attributes yet.
+ tag_name = /<([^\s>]+)[\s>]/.exec token
+ return "<#{tag_name[1].toLowerCase()}>" if tag_name
+
+ # If the token is text, collapse adjacent whitespace and replace non-breaking spaces with
+ # regular spaces.
+ return token.replace /(\s+| | )/g, ' ' if token
+
+ return token
+
find_match = (before_tokens, after_tokens,
index_of_before_locations_in_after_tokens,
start_in_before, end_in_before,
@@ -70,7 +150,7 @@ find_match = (before_tokens, after_tokens,
for index_in_before in [start_in_before...end_in_before]
new_match_length_at = {}
- looking_for = before_tokens[index_in_before]
+ looking_for = get_key_for_token before_tokens[index_in_before]
locations_in_after =
index_of_before_locations_in_after_tokens[looking_for]
@@ -128,17 +208,32 @@ recursively_find_matching_blocks = (before_tokens, after_tokens,
return matching_blocks
-create_index = (p)->
- throw new Error 'params must have find_these key' unless p.find_these?
- throw new Error 'params must have in_these key' unless p.in_these?
+###
+ * Creates an index (A.K.A. hash table) that will be used to match the list of before
+ * tokens with the list of after tokens.
+ *
+ * @param {Object} options An object with the following:
+ * - {Array.} find_these The list of tokens that will be used to search.
+ * - {Array.} in_these The list of tokens that will be returned.
+ *
+ * @return {Object} An index that can be used to search for tokens.
+###
+create_index = (options)->
+ throw new Error 'params must have find_these key' unless options.find_these?
+ throw new Error 'params must have in_these key' unless options.in_these?
+
+ queries = options.find_these.map (token)->
+ return get_key_for_token token
+ results = options.in_these.map (token)->
+ return get_key_for_token token
index = {}
- for token in p.find_these
- index[token] = []
- idx = p.in_these.indexOf token
+ for query in queries
+ index[query] = []
+ idx = results.indexOf query
while idx isnt -1
- index[token].push idx
- idx = p.in_these.indexOf token, idx+1
+ index[query].push idx
+ idx = results.indexOf query, idx+1
return index
@@ -240,7 +335,8 @@ wrap = (tag, content)->
non_tags = consecutive_where position, content, isnt_tag
position += non_tags.length
if non_tags.length isnt 0
- rendering += "<#{tag}>#{non_tags.join ''}#{tag}>"
+ val = non_tags.join ''
+ rendering += "<#{tag}>#{val}#{tag}>" if val.trim()
break if position >= length
tags = consecutive_where position, content, is_tag
@@ -251,7 +347,7 @@ wrap = (tag, content)->
op_map =
equal: (op, before_tokens, after_tokens)->
- before_tokens[op.start_in_before..op.end_in_before].join ''
+ after_tokens[op.start_in_after..op.end_in_after].join ''
insert: (op, before_tokens, after_tokens)->
val = after_tokens[op.start_in_after..op.end_in_after]
@@ -287,6 +383,7 @@ diff.html_to_tokens = html_to_tokens
diff.find_matching_blocks = find_matching_blocks
find_matching_blocks.find_match = find_match
find_matching_blocks.create_index = create_index
+find_matching_blocks.get_key_for_token = get_key_for_token
diff.calculate_operations = calculate_operations
diff.render_operations = render_operations
diff --git a/test/diff.spec.coffee b/test/diff.spec.coffee
index 68d0cc9..c0ad77c 100644
--- a/test/diff.spec.coffee
+++ b/test/diff.spec.coffee
@@ -9,9 +9,19 @@ describe 'Diff', ->
it 'should return the text', ->
(expect @res).equal 'input text'
- xdescribe 'When a letter is added', ->
+ describe 'When a letter is added', ->
beforeEach ->
@res = @cut 'input', 'input 2'
it 'should mark the new letter', ->
- (expect @res).to.equal 'input 2'
+ (expect @res).to.equal 'input 2'
+
+ describe 'Whitespace differences', ->
+ it 'should collapse adjacent whitespace', ->
+ (expect @cut 'Much \n\t spaces', 'Much spaces').to.equal 'Much spaces'
+
+ it 'should consider non-breaking spaces as equal', ->
+ (expect @cut 'Hello world', 'Hello world').to.equal 'Hello world'
+
+ it 'should consider non-breaking spaces and non-adjacent regular spaces as equal', ->
+ (expect @cut 'Hello world', 'Hello world').to.equal 'Hello world'
diff --git a/test/html_to_tokens.spec.coffee b/test/html_to_tokens.spec.coffee
index ac6c235..a770b18 100644
--- a/test/html_to_tokens.spec.coffee
+++ b/test/html_to_tokens.spec.coffee
@@ -28,3 +28,40 @@ describe 'html_to_tokens', ->
it 'should identify self closing tags as tokens', ->
(expect @cut '
hellogoodbye
')
.eql ['
', 'hello', '', 'goodbye', '
']
+
+ describe 'when encountering atomic tags', ->
+ it 'should identify an image tag as a single token', ->
+ (expect @cut '
')
+ .eql ['
', '', '', '
']
+
+ it 'should identify an iframe tag as a single token', ->
+ (expect @cut '')
+ .eql ['
', '', '
']
+
+ it 'should identify an object tag as a single token', ->
+ (expect @cut '')
+ .eql ['
', '', '
']
+
+ it 'should identify a math tag as a single token', ->
+ (expect @cut '')
+ .eql [
+ '
',
+ '',
+ '
']
+
+ it 'should identify a svg tag as a single token', ->
+ (expect @cut '')
+ .eql [
+ '
',
+ '',
+ '
']
diff --git a/test/render_operations.spec.coffee b/test/render_operations.spec.coffee
index a233315..179889a 100644
--- a/test/render_operations.spec.coffee
+++ b/test/render_operations.spec.coffee
@@ -63,3 +63,30 @@ describe 'render_operations', ->
it 'should keep the change inside the
', ->
(expect @res).to.equal '
thisI is awesome
'
+
+ describe 'empty tokens', ->
+ it 'should not be wrapped', ->
+ before = ['text']
+ after = ['text', ' ']
+
+ @res = @cut before, after
+
+ (expect @res).to.equal 'text'
+
+ describe 'tags with attributes', ->
+ it 'should treat attribute changes as equal and output the after tag', ->
+ before = ['
'
From fcf51222893da261f2d22eca88e8d8bfa1c90962 Mon Sep 17 00:00:00 2001
From: Keanu Lee
Date: Tue, 8 Apr 2014 12:27:45 -0700
Subject: [PATCH 2/7] PR comments
---
src/htmldiff.coffee | 5 +++--
test/html_to_tokens.spec.coffee | 2 +-
2 files changed, 4 insertions(+), 3 deletions(-)
diff --git a/src/htmldiff.coffee b/src/htmldiff.coffee
index ec0475c..d746473 100644
--- a/src/htmldiff.coffee
+++ b/src/htmldiff.coffee
@@ -6,7 +6,8 @@ isnt_tag = (token)-> not is_tag token
###
* Checks if the current word is the beginning of an atomic tag. An atomic tag is one whose
- * child nodes should not be compared - the entire tag should be treated as one token.
+ * child nodes should not be compared - the entire tag should be treated as one token. This
+ * is useful for tags where it does not make sense to insert and tags.
*
* @param {string} word The characters of the current token read so far.
*
@@ -118,7 +119,7 @@ html_to_tokens = (html)->
###
* Creates a key that should be used to match tokens. This is useful, for example, if we want
* to consider two open tag tokens as equal, even if they don't have the same attributes. We
- * use a key instead of overwriting the token because we may want to render original string
+ * use a key instead of overwriting the token because we may want to render the original string
* without losing the attributes.
*
* @param {string} token The token to create the key for.
diff --git a/test/html_to_tokens.spec.coffee b/test/html_to_tokens.spec.coffee
index a770b18..8455d61 100644
--- a/test/html_to_tokens.spec.coffee
+++ b/test/html_to_tokens.spec.coffee
@@ -55,7 +55,7 @@ describe 'html_to_tokens', ->
'r2',
'
']
- it 'should identify a svg tag as a single token', ->
+ it 'should identify an svg tag as a single token', ->
(expect @cut '')
From 31944823ab90b0d62950d69daff9bb0bbe996a13 Mon Sep 17 00:00:00 2001
From: Keanu Lee
Date: Thu, 10 Apr 2014 17:29:46 -0700
Subject: [PATCH 3/7] Wrap void and atomic tags with INS and DEL as well.
---
js/htmldiff.js | 37 ++++++++++++++++++++++++++----
src/htmldiff.coffee | 24 +++++++++++++++++--
test/html_to_tokens.spec.coffee | 4 ++++
test/render_operations.spec.coffee | 17 ++++++++++++++
4 files changed, 75 insertions(+), 7 deletions(-)
diff --git a/js/htmldiff.js b/js/htmldiff.js
index e4c56f0..76b01f4 100644
--- a/js/htmldiff.js
+++ b/js/htmldiff.js
@@ -1,6 +1,6 @@
// Generated by CoffeeScript 1.7.1
(function() {
- var Match, calculate_operations, consecutive_where, create_index, diff, find_match, find_matching_blocks, get_key_for_token, html_to_tokens, is_end_of_atomic_tag, is_end_of_tag, is_start_of_atomic_tag, is_start_of_tag, is_tag, is_whitespace, isnt_tag, op_map, recursively_find_matching_blocks, render_operations, wrap;
+ var Match, calculate_operations, consecutive_where, create_index, diff, find_match, find_matching_blocks, get_key_for_token, html_to_tokens, is_end_of_atomic_tag, is_end_of_tag, is_start_of_atomic_tag, is_start_of_tag, is_tag, is_void_tag, is_whitespace, is_wrappable, isnt_tag, op_map, recursively_find_matching_blocks, render_operations, wrap;
is_end_of_tag = function(char) {
return char === '>';
@@ -25,7 +25,8 @@
/*
* Checks if the current word is the beginning of an atomic tag. An atomic tag is one whose
- * child nodes should not be compared - the entire tag should be treated as one token.
+ * child nodes should not be compared - the entire tag should be treated as one token. This
+ * is useful for tags where it does not make sense to insert and tags.
*
* @param {string} word The characters of the current token read so far.
*
@@ -35,7 +36,7 @@
is_start_of_atomic_tag = function(word) {
var result;
- result = /^<(iframe|object|math|svg)/.exec(word);
+ result = /^<(iframe|object|math|svg|script)/.exec(word);
if (result) {
result = result[1];
}
@@ -58,6 +59,32 @@
return (word.substring(word.length - tag.length - 2)) === ("" + tag);
};
+
+ /*
+ * Checks if a tag is a void tag.
+ *
+ * @param {string} token The token to check.
+ *
+ * @return {boolean} True if the token is a void tag, false otherwise.
+ */
+
+ is_void_tag = function(token) {
+ return /^\s*<[^>]+\/>\s*$/.test(token);
+ };
+
+
+ /*
+ * Checks if a token can be wrapped inside a tag.
+ *
+ * @param {string} token The token to check.
+ *
+ * @return {boolean} True if the token can be wrapped inside a tag, false otherwise.
+ */
+
+ is_wrappable = function(token) {
+ return (isnt_tag(token)) || (is_start_of_atomic_tag(token)) || (is_void_tag(token));
+ };
+
Match = (function() {
function Match(start_in_before, start_in_after, length) {
this.start_in_before = start_in_before;
@@ -176,7 +203,7 @@
/*
* Creates a key that should be used to match tokens. This is useful, for example, if we want
* to consider two open tag tokens as equal, even if they don't have the same attributes. We
- * use a key instead of overwriting the token because we may want to render original string
+ * use a key instead of overwriting the token because we may want to render the original string
* without losing the attributes.
*
* @param {string} token The token to create the key for.
@@ -396,7 +423,7 @@
if (position >= length) {
break;
}
- non_tags = consecutive_where(position, content, isnt_tag);
+ non_tags = consecutive_where(position, content, is_wrappable);
position += non_tags.length;
if (non_tags.length !== 0) {
val = non_tags.join('');
diff --git a/src/htmldiff.coffee b/src/htmldiff.coffee
index d746473..37a84a0 100644
--- a/src/htmldiff.coffee
+++ b/src/htmldiff.coffee
@@ -15,7 +15,7 @@ isnt_tag = (token)-> not is_tag token
* null otherwise
###
is_start_of_atomic_tag = (word)->
- result = /^<(iframe|object|math|svg)/.exec word
+ result = /^<(iframe|object|math|svg|script)/.exec word
result = result[1] if result
return result
@@ -32,6 +32,26 @@ is_start_of_atomic_tag = (word)->
is_end_of_atomic_tag = (word, tag)->
(word.substring word.length - tag.length - 2) is "#{tag}"
+###
+ * Checks if a tag is a void tag.
+ *
+ * @param {string} token The token to check.
+ *
+ * @return {boolean} True if the token is a void tag, false otherwise.
+###
+is_void_tag = (token) ->
+ /^\s*<[^>]+\/>\s*$/.test token
+
+###
+ * Checks if a token can be wrapped inside a tag.
+ *
+ * @param {string} token The token to check.
+ *
+ * @return {boolean} True if the token can be wrapped inside a tag, false otherwise.
+###
+is_wrappable = (token) ->
+ (isnt_tag token) or (is_start_of_atomic_tag token) or (is_void_tag token)
+
class Match
constructor: (@start_in_before, @start_in_after, @length)->
@end_in_before = (@start_in_before + @length) - 1
@@ -333,7 +353,7 @@ wrap = (tag, content)->
loop
break if position >= length
- non_tags = consecutive_where position, content, isnt_tag
+ non_tags = consecutive_where position, content, is_wrappable
position += non_tags.length
if non_tags.length isnt 0
val = non_tags.join ''
diff --git a/test/html_to_tokens.spec.coffee b/test/html_to_tokens.spec.coffee
index 8455d61..15139f5 100644
--- a/test/html_to_tokens.spec.coffee
+++ b/test/html_to_tokens.spec.coffee
@@ -65,3 +65,7 @@ describe 'html_to_tokens', ->
'' +
'',
'']
+
+ it 'should identify a script tag as a single token', ->
+ (expect @cut '')
+ .eql ['
'
+
+ describe 'wrappable tags', ->
+ it 'should wrap void tags', ->
+ before = ['old', ' ', 'text']
+ after = ['new', ' ', ' ', 'text']
+
+ @res = @cut before, after
+
+ (expect @res).to.equal 'oldnew text'
+
+ it 'should wrap atomic tags', ->
+ before = ['old', '', ' ', 'text']
+ after = ['new', ' ', 'text']
+
+ @res = @cut before, after
+
+ (expect @res).to.equal 'oldnew text'
From af8e6d7928a6c55c8bc6b5ffaa1901e602e32fac Mon Sep 17 00:00:00 2001
From: Keanu Lee
Date: Fri, 11 Apr 2014 14:10:24 -0700
Subject: [PATCH 4/7] Pass in class name to include in wrapper tags.
---
js/htmldiff.js | 41 +++++++++++++++++++++++++++--------------
src/htmldiff.coffee | 39 +++++++++++++++++++++++++--------------
test/diff.spec.coffee | 5 +++++
3 files changed, 57 insertions(+), 28 deletions(-)
diff --git a/js/htmldiff.js b/js/htmldiff.js
index 76b01f4..1cde3f8 100644
--- a/js/htmldiff.js
+++ b/js/htmldiff.js
@@ -414,8 +414,8 @@
return [];
};
- wrap = function(tag, content) {
- var length, non_tags, position, rendering, tags, val;
+ wrap = function(tag, content, class_name) {
+ var attrs, length, non_tags, position, rendering, tags, val;
rendering = '';
position = 0;
length = content.length;
@@ -427,8 +427,9 @@
position += non_tags.length;
if (non_tags.length !== 0) {
val = non_tags.join('');
+ attrs = class_name ? " class=\"" + class_name + "\"" : '';
if (val.trim()) {
- rendering += "<" + tag + ">" + val + "" + tag + ">";
+ rendering += "<" + tag + attrs + ">" + val + "" + tag + ">";
}
}
if (position >= length) {
@@ -442,36 +443,48 @@
};
op_map = {
- equal: function(op, before_tokens, after_tokens) {
+ equal: function(op, before_tokens, after_tokens, class_name) {
return after_tokens.slice(op.start_in_after, +op.end_in_after + 1 || 9e9).join('');
},
- insert: function(op, before_tokens, after_tokens) {
+ insert: function(op, before_tokens, after_tokens, class_name) {
var val;
val = after_tokens.slice(op.start_in_after, +op.end_in_after + 1 || 9e9);
- return wrap('ins', val);
+ return wrap('ins', val, class_name);
},
- "delete": function(op, before_tokens, after_tokens) {
+ "delete": function(op, before_tokens, after_tokens, class_name) {
var val;
val = before_tokens.slice(op.start_in_before, +op.end_in_before + 1 || 9e9);
- return wrap('del', val);
+ return wrap('del', val, class_name);
}
};
- op_map.replace = function(op, before_tokens, after_tokens) {
- return (op_map["delete"](op, before_tokens, after_tokens)) + (op_map.insert(op, before_tokens, after_tokens));
+ op_map.replace = function(op, before_tokens, after_tokens, class_name) {
+ return (op_map["delete"](op, before_tokens, after_tokens, class_name)) + (op_map.insert(op, before_tokens, after_tokens, class_name));
};
- render_operations = function(before_tokens, after_tokens, operations) {
+ render_operations = function(before_tokens, after_tokens, operations, class_name) {
var op, rendering, _i, _len;
rendering = '';
for (_i = 0, _len = operations.length; _i < _len; _i++) {
op = operations[_i];
- rendering += op_map[op.action](op, before_tokens, after_tokens);
+ rendering += op_map[op.action](op, before_tokens, after_tokens, class_name);
}
return rendering;
};
- diff = function(before, after) {
+
+ /*
+ * Compares two pieces of HTML content and returns the combined content with differences
+ * wrapped in and tags.
+ *
+ * @param {string} before The HTML content before the changes.
+ * @param {string} after The HTML content after the changes.
+ * @param {string} class_name (Optional) The class attribute to include in and tags.
+ *
+ * @return {string} The combined HTML content with differences wrapped in and tags.
+ */
+
+ diff = function(before, after, class_name) {
var ops;
if (before === after) {
return before;
@@ -479,7 +492,7 @@
before = html_to_tokens(before);
after = html_to_tokens(after);
ops = calculate_operations(before, after);
- return render_operations(before, after, ops);
+ return render_operations(before, after, ops, class_name);
};
diff.html_to_tokens = html_to_tokens;
diff --git a/src/htmldiff.coffee b/src/htmldiff.coffee
index 37a84a0..27e32e6 100644
--- a/src/htmldiff.coffee
+++ b/src/htmldiff.coffee
@@ -346,7 +346,7 @@ consecutive_where = (start, content, predicate)->
return content[0..last_matching_index] if last_matching_index?
return []
-wrap = (tag, content)->
+wrap = (tag, content, class_name)->
rendering = ''
position = 0
length = content.length
@@ -357,7 +357,8 @@ wrap = (tag, content)->
position += non_tags.length
if non_tags.length isnt 0
val = non_tags.join ''
- rendering += "<#{tag}>#{val}#{tag}>" if val.trim()
+ attrs = if class_name then " class=\"#{class_name}\"" else ''
+ rendering += "<#{tag}#{attrs}>#{val}#{tag}>" if val.trim()
break if position >= length
tags = consecutive_where position, content, is_tag
@@ -367,29 +368,39 @@ wrap = (tag, content)->
return rendering
op_map =
- equal: (op, before_tokens, after_tokens)->
+ equal: (op, before_tokens, after_tokens, class_name)->
after_tokens[op.start_in_after..op.end_in_after].join ''
- insert: (op, before_tokens, after_tokens)->
+ insert: (op, before_tokens, after_tokens, class_name)->
val = after_tokens[op.start_in_after..op.end_in_after]
- wrap 'ins', val
+ wrap 'ins', val, class_name
- delete: (op, before_tokens, after_tokens)->
+ delete: (op, before_tokens, after_tokens, class_name)->
val = before_tokens[op.start_in_before..op.end_in_before]
- wrap 'del', val
+ wrap 'del', val, class_name
-op_map.replace = (op, before_tokens, after_tokens)->
- (op_map.delete op, before_tokens, after_tokens) +
- (op_map.insert op, before_tokens, after_tokens)
+op_map.replace = (op, before_tokens, after_tokens, class_name)->
+ (op_map.delete op, before_tokens, after_tokens, class_name) +
+ (op_map.insert op, before_tokens, after_tokens, class_name)
-render_operations = (before_tokens, after_tokens, operations)->
+render_operations = (before_tokens, after_tokens, operations, class_name)->
rendering = ''
for op in operations
- rendering += op_map[op.action] op, before_tokens, after_tokens
+ rendering += op_map[op.action] op, before_tokens, after_tokens, class_name
return rendering
-diff = (before, after)->
+###
+ * Compares two pieces of HTML content and returns the combined content with differences
+ * wrapped in and tags.
+ *
+ * @param {string} before The HTML content before the changes.
+ * @param {string} after The HTML content after the changes.
+ * @param {string} class_name (Optional) The class attribute to include in and tags.
+ *
+ * @return {string} The combined HTML content with differences wrapped in and tags.
+###
+diff = (before, after, class_name)->
return before if before is after
before = html_to_tokens before
@@ -397,7 +408,7 @@ diff = (before, after)->
ops = calculate_operations before, after
- render_operations before, after, ops
+ render_operations before, after, ops, class_name
diff.html_to_tokens = html_to_tokens
diff --git a/test/diff.spec.coffee b/test/diff.spec.coffee
index c0ad77c..852ff3d 100644
--- a/test/diff.spec.coffee
+++ b/test/diff.spec.coffee
@@ -25,3 +25,8 @@ describe 'Diff', ->
it 'should consider non-breaking spaces and non-adjacent regular spaces as equal', ->
(expect @cut 'Hello world', 'Hello world').to.equal 'Hello world'
+
+ describe 'When a class name is specified', ->
+ it 'should include the class in the wrapper tags', ->
+ (expect @cut 'input', 'input 2', 'diff-result').to.equal \
+ 'input 2'
From f2e637531219381f230bf0705b3f9015346da353 Mon Sep 17 00:00:00 2001
From: Keanu Lee
Date: Mon, 14 Apr 2014 17:43:53 -0700
Subject: [PATCH 5/7] Fixed compilers arg in mocha options file
---
package.json | 2 +-
test/mocha.opts | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/package.json b/package.json
index 893846a..2322191 100644
--- a/package.json
+++ b/package.json
@@ -4,7 +4,7 @@
"description": "HTML Diffing in JavaScript (CoffeeScript)",
"main": "htmldiff.js",
"scripts": {
- "test": "mocha -R min --compilers coffee:coffee-script/register",
+ "test": "mocha -R min",
"install": "coffee --output js/ --compile src/"
},
"repository": {
diff --git a/test/mocha.opts b/test/mocha.opts
index b0f5199..ab28531 100644
--- a/test/mocha.opts
+++ b/test/mocha.opts
@@ -1,4 +1,4 @@
---compilers coffee:coffee-script
+--compilers coffee:coffee-script/register
--require test/config.js
--ui bdd
--reporter spec
From e78e2153f035fd63e1990ed206fde26266bba35c Mon Sep 17 00:00:00 2001
From: Keanu Lee
Date: Tue, 15 Apr 2014 11:50:49 -0700
Subject: [PATCH 6/7] More code documentation.
---
js/htmldiff.js | 167 ++++++++++++++++++++++++++++++++++++++++++++
src/htmldiff.coffee | 149 ++++++++++++++++++++++++++++++++++++++-
2 files changed, 315 insertions(+), 1 deletion(-)
diff --git a/js/htmldiff.js b/js/htmldiff.js
index 1cde3f8..40ca063 100644
--- a/js/htmldiff.js
+++ b/js/htmldiff.js
@@ -1,4 +1,34 @@
// Generated by CoffeeScript 1.7.1
+
+/*
+ * htmldiff.js is a library that compares HTML content. It creates a diff between two
+ * HTML documents by combining the two documents and wrapping the differences with
+ * and tags. Here is a high-level overview of how the diff works.
+ *
+ * 1. Tokenize the before and after HTML with html_to_tokens.
+ * 2. Generate a list of operations that convert the before list of tokens to the after
+ * list of tokens with calculate_operations, which does the following:
+ * a. Find all the matching blocks of tokens between the before and after lists of
+ * tokens with find_matching_blocks. This is done by finding the single longest
+ * matching block with find_match, then recursively finding the next longest
+ * matching block that precede and follow the longest matching block with
+ * recursively_find_matching_blocks.
+ * b. Determine insertions, deletions, and replacements from the matching blocks.
+ * This is done in calculate_operations.
+ * 3. Render the list of operations by wrapping tokens with and tags where
+ * appropriate with render_operations.
+ *
+ * Example usage:
+ *
+ * htmldiff = require 'htmldiff.js'
+ *
+ * htmldiff '
this is some text
', '
this is some more text
'
+ * == '
this is some more text
'
+ *
+ * htmldiff '
this is some text
', '
this is some more text
', 'diff-class'
+ * == '
this is some more text
'
+ */
+
(function() {
var Match, calculate_operations, consecutive_where, create_index, diff, find_match, find_matching_blocks, get_key_for_token, html_to_tokens, is_end_of_atomic_tag, is_end_of_tag, is_start_of_atomic_tag, is_start_of_tag, is_tag, is_void_tag, is_whitespace, is_wrappable, isnt_tag, op_map, recursively_find_matching_blocks, render_operations, wrap;
@@ -85,6 +115,16 @@
return (isnt_tag(token)) || (is_start_of_atomic_tag(token)) || (is_void_tag(token));
};
+
+ /*
+ * A Match stores the information of a matching block. A matching block is a list of
+ * consecutive tokens that appear in both the before and after lists of tokens.
+ *
+ * @param {number} start_in_before The index of the first token in the list of before tokens.
+ * @param {number} start_in_after The index of the first token in the list of after tokens.
+ * @param {number} length The number of consecutive matching tokens in this block.
+ */
+
Match = (function() {
function Match(start_in_before, start_in_after, length) {
this.start_in_before = start_in_before;
@@ -223,6 +263,23 @@
return token;
};
+
+ /*
+ * Finds the matching block with the most consecutive tokens within the given range in the
+ * before and after lists of tokens.
+ *
+ * @param {Array.} before_tokens The before list of tokens.
+ * @param {Array.} after_tokens The after list of tokens.
+ * @param {Object} index_of_before_locations_in_after_tokens The index that is used to search
+ * for tokens in the after list.
+ * @param {number} start_in_before The beginning of the range in the list of before tokens.
+ * @param {number} end_in_before The end of the range in the list of before tokens.
+ * @param {number} start_in_after The beginning of the range in the list of after tokens.
+ * @param {number} end_in_after The end of the range in the list of after tokens.
+ *
+ * @return {Match} A Match that describes the best matching block in the given range.
+ */
+
find_match = function(before_tokens, after_tokens, index_of_before_locations_in_after_tokens, start_in_before, end_in_before, start_in_after, end_in_after) {
var best_match_in_after, best_match_in_before, best_match_length, index_in_after, index_in_before, locations_in_after, looking_for, match, match_length_at, new_match_length, new_match_length_at, _i, _j, _len;
best_match_in_before = start_in_before;
@@ -260,6 +317,25 @@
return match;
};
+
+ /*
+ * Finds all the matching blocks within the given range in the before and after lists of
+ * tokens. This function is called recursively to find the next best matches that precede
+ * and follow the first best match.
+ *
+ * @param {Array.} before_tokens The before list of tokens.
+ * @param {Array.} after_tokens The after list of tokens.
+ * @param {Object} index_of_before_locations_in_after_tokens The index that is used to search
+ * for tokens in the after list.
+ * @param {number} start_in_before The beginning of the range in the list of before tokens.
+ * @param {number} end_in_before The end of the range in the list of before tokens.
+ * @param {number} start_in_after The beginning of the range in the list of after tokens.
+ * @param {number} end_in_after The end of the range in the list of after tokens.
+ * @param {Array.} matching_blocks The list of matching blocks found so far.
+ *
+ * @return {Array.} The list of matching blocks in this range.
+ */
+
recursively_find_matching_blocks = function(before_tokens, after_tokens, index_of_before_locations_in_after_tokens, start_in_before, end_in_before, start_in_after, end_in_after, matching_blocks) {
var match;
match = find_match(before_tokens, after_tokens, index_of_before_locations_in_after_tokens, start_in_before, end_in_before, start_in_after, end_in_after);
@@ -314,6 +390,17 @@
return index;
};
+
+ /*
+ * Finds all the matching blocks in the before and after lists of tokens. This function
+ * is a wrapper for the recursive function recursively_find_matching_blocks.
+ *
+ * @param {Array.} before_tokens The before list of tokens.
+ * @param {Array.} after_tokens The after list of tokens.
+ *
+ * @return {Array.} The list of matching blocks.
+ */
+
find_matching_blocks = function(before_tokens, after_tokens) {
var index_of_before_locations_in_after_tokens, matching_blocks;
matching_blocks = [];
@@ -324,6 +411,25 @@
return recursively_find_matching_blocks(before_tokens, after_tokens, index_of_before_locations_in_after_tokens, 0, before_tokens.length, 0, after_tokens.length, matching_blocks);
};
+
+ /*
+ * Gets a list of operations required to transform the before list of tokens into the
+ * after list of tokens. An operation describes whether a particular list of consecutive
+ * tokens are equal, replaced, inserted, or deleted.
+ *
+ * @param {Array.} before_tokens The before list of tokens.
+ * @param {Array.} after_tokens The after list of tokens.
+ *
+ * @return {Array.