Skip to content
This repository was archived by the owner on Aug 5, 2024. It is now read-only.

Commit 08de57e

Browse files
committed
Fix reconstructing diff in Python3
1 parent d0a578f commit 08de57e

File tree

2 files changed

+8
-5
lines changed

2 files changed

+8
-5
lines changed

python3/diff_match_patch.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1174,7 +1174,8 @@ def diff_fromDelta(self, text1, delta):
11741174
ValueError: If invalid input.
11751175
"""
11761176
diffs = []
1177-
pointer = 0 # Cursor in text1
1177+
as_utf16 = text1.encode('utf-16-be')
1178+
pointer = 0 # Cursor in as_utf16
11781179
tokens = delta.split("\t")
11791180
for token in tokens:
11801181
if token == "":
@@ -1193,8 +1194,8 @@ def diff_fromDelta(self, text1, delta):
11931194
raise ValueError("Invalid number in diff_fromDelta: " + param)
11941195
if n < 0:
11951196
raise ValueError("Negative number in diff_fromDelta: " + param)
1196-
text = text1[pointer : pointer + n]
1197-
pointer += n
1197+
text = as_utf16[pointer : pointer + n * 2].decode('utf-16-be')
1198+
pointer += n * 2
11981199
if token[0] == "=":
11991200
diffs.append((self.DIFF_EQUAL, text))
12001201
else:
@@ -1203,10 +1204,10 @@ def diff_fromDelta(self, text1, delta):
12031204
# Anything else is an error.
12041205
raise ValueError("Invalid diff operation in diff_fromDelta: " +
12051206
token[0])
1206-
if pointer != len(text1):
1207+
if pointer != len(as_utf16):
12071208
raise ValueError(
12081209
"Delta length (%d) does not equal source text length (%d)." %
1209-
(pointer, len(text1)))
1210+
(pointer, len(as_utf16)))
12101211
return diffs
12111212

12121213
# MATCH FUNCTIONS

python3/tests/diff_match_patch_test.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -449,6 +449,8 @@ def testDiffDelta(self):
449449
delta = self.dmp.diff_toDelta(diffs)
450450
self.assertEqual("=2\t+%F0%9F%99%8C\t=2", delta)
451451

452+
self.assertEqual(diffs, self.dmp.diff_fromDelta("\U0001F64B\U0001F64B", "=2\t+%F0%9F%99%8C\t=2"))
453+
452454
# Verify pool of unchanged characters.
453455
diffs = [(self.dmp.DIFF_INSERT, "A-Z a-z 0-9 - _ . ! ~ * ' ( ) ; / ? : @ & = + $ , # ")]
454456
text2 = self.dmp.diff_text2(diffs)

0 commit comments

Comments
 (0)