Skip to content

Commit 87d6efa

Browse files
committed
Initial implementation of parser
1 parent 04cbafd commit 87d6efa

File tree

5 files changed

+196
-0
lines changed

5 files changed

+196
-0
lines changed

LICENSE

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
MIT License
2+
3+
Copyright (c) 2025 Jiahao
4+
5+
Permission is hereby granted, free of charge, to any person obtaining a copy
6+
of this software and associated documentation files (the "Software"), to deal
7+
in the Software without restriction, including without limitation the rights
8+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
copies of the Software, and to permit persons to whom the Software is
10+
furnished to do so, subject to the following conditions:
11+
12+
The above copyright notice and this permission notice shall be included in all
13+
copies or substantial portions of the Software.
14+
15+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21+
SOFTWARE.

README.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,9 @@
11
# diffparser
2+
23
Parser for Python's difflib. Built on top of https://github.com/yebrahim/difflibparser/blob/master/difflibparser.py
4+
5+
Key changes made to the above library:
6+
7+
1. Using generator pattern instead of using iterator pattern when iterating over diffs
8+
2. Using more `@dataclass` over generic dictionaries to enforce strict typing
9+
3. Using type annotations for strict typing

pyproject.toml

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
[build-system]
2+
requires = ["hatchling"]
3+
build-backend = "hatchling.build"
4+
5+
[project]
6+
name = "diffparser"
7+
version = "0.0.1"
8+
authors = [{ name = "Jiahao, Woo", email = "woojiahao1234@gmail.com" }]
9+
description = "Lightweight Python package for parsing Python difflib's diff results"
10+
readme = "README.md"
11+
requires-python = ">=3.13"
12+
classifiers = [
13+
"Development Status :: 4 - Beta",
14+
"Intended Audience :: Developers",
15+
"Topic :: Software Development",
16+
"Programming Language :: Python :: 3.13",
17+
]
18+
license.file = "LICENSE"
19+
20+
[project.urls]
21+
Homepage = "https://github.com/git-mastery/diffparser"
22+
Issues = "https://github.com/git-mastery/diffparser/issues"

src/diffparser/__init__.py

Whitespace-only changes.

src/diffparser/diffparser.py

Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
from typing import List, Generator, Any
2+
import difflib
3+
from enum import Enum
4+
from dataclasses import dataclass
5+
6+
7+
class DiffCode(Enum):
8+
SAME = 0
9+
RIGHT_ONLY = 1
10+
LEFT_ONLY = 2
11+
CHANGED = 3
12+
13+
14+
class DiffLineCode(Enum):
15+
ADDED = 0
16+
REMOVED = 1
17+
COMMON = 2
18+
MISSING = 3
19+
20+
21+
class DiffLine:
22+
def __init__(self, line: str | None):
23+
self.__line = line
24+
25+
@staticmethod
26+
def parse(line: str) -> "DiffLine":
27+
return DiffLine(line)
28+
29+
@property
30+
def code(self) -> DiffLineCode | None:
31+
if self.__line is None:
32+
return None
33+
34+
match self.__line[:2]:
35+
case "+ ":
36+
return DiffLineCode.ADDED
37+
case "- ":
38+
return DiffLineCode.REMOVED
39+
case " ":
40+
return DiffLineCode.COMMON
41+
case "? ":
42+
return DiffLineCode.MISSING
43+
44+
@property
45+
def line(self) -> str | None:
46+
if self.__line is None:
47+
return None
48+
49+
return self.__line[2:]
50+
51+
52+
@dataclass
53+
class DiffChange:
54+
left: List[int]
55+
right: List[int]
56+
newline: str
57+
skip_lines: int
58+
59+
60+
@dataclass
61+
class Diff:
62+
code: DiffCode
63+
line: str
64+
left_changes: List[int] | None = None
65+
right_changes: List[int] | None = None
66+
newline: str | None = None
67+
68+
69+
# Parser inspired by https://github.com/yebrahim/difflibparser/blob/master/difflibparser.py
70+
# Rather than relying on the iterator pattern, we're opting to use the generator pattern instead to avoid managing an
71+
# "end of diff" exception
72+
# Also included several code refactors to clean up the general code structure
73+
class DiffParser:
74+
def __init__(self, left_text, right_text):
75+
self.__left_text = left_text
76+
self.__right_text = right_text
77+
self.__diff = list(difflib.ndiff(self.__left_text, self.__right_text))
78+
self.__line_no = 0
79+
80+
def iter_diffs(self) -> Generator[Diff, Any, Any]:
81+
while self.__line_no < len(self.__diff):
82+
current_line = self.__diff[self.__line_no]
83+
diff_line = DiffLine.parse(current_line)
84+
code = diff_line.code
85+
diff = Diff(code=DiffCode.SAME, line=diff_line.line)
86+
if code == DiffLineCode.ADDED:
87+
diff.code = DiffCode.RIGHT_ONLY
88+
elif code == DiffLineCode.REMOVED:
89+
change = self.__get_incremental_change(self.__line_no)
90+
if change is None:
91+
diff.code = DiffCode.LEFT_ONLY
92+
else:
93+
diff.code = DiffCode.CHANGED
94+
diff.left_changes = change.left
95+
diff.right_changes = change.right
96+
diff.newline = change.newline
97+
self.__line_no = change.skip_lines
98+
self.__line_no += 1
99+
yield diff
100+
101+
def __get_incremental_change(self, line_no: int) -> DiffChange | None:
102+
lines = [
103+
DiffLine.parse(
104+
self.__diff[line_no + i] if line_no + i < len(self.__diff) else None
105+
)
106+
for i in range(4)
107+
]
108+
109+
pattern_a = [
110+
DiffLineCode.REMOVED,
111+
DiffLineCode.MISSING,
112+
DiffLineCode.ADDED,
113+
DiffLineCode.MISSING,
114+
]
115+
if self.__match_pattern(lines, pattern_a):
116+
return DiffChange(
117+
left=[i for (i, c) in enumerate(lines[1].line) if c in ["-", "^"]],
118+
right=[i for (i, c) in enumerate(lines[3].line) if c in ["+", "^"]],
119+
newline=lines[2].line,
120+
skip_lines=3,
121+
)
122+
123+
pattern_b = [DiffLineCode.REMOVED, DiffLineCode.ADDED, DiffLineCode.MISSING]
124+
if self.__match_pattern(lines, pattern_b):
125+
return DiffChange(
126+
left=[],
127+
right=[i for (i, c) in enumerate(lines[2].line) if c in ["+", "^"]],
128+
newline=lines[1].line,
129+
skip_lines=2,
130+
)
131+
132+
pattern_c = [DiffLineCode.REMOVED, DiffLineCode.MISSING, DiffLineCode.ADDED]
133+
if self.__match_pattern(lines, pattern_c):
134+
return DiffChange(
135+
left=[i for (i, c) in enumerate(lines[1].line) for c in ["-", "^"]],
136+
right=[],
137+
newline=lines[1].line,
138+
skip_lines=2,
139+
)
140+
141+
return None
142+
143+
def __match_pattern(
144+
self, diff_lines: List[DiffLine], codes: List[DiffLineCode]
145+
) -> bool:
146+
return all([line.code == code for line, code in zip(diff_lines, codes)])

0 commit comments

Comments
 (0)