|
| 1 | +from typing import List, Generator, Any |
| 2 | +import difflib |
| 3 | +from enum import Enum |
| 4 | +from dataclasses import dataclass |
| 5 | + |
| 6 | + |
| 7 | +class DiffCode(Enum): |
| 8 | + SAME = 0 |
| 9 | + RIGHT_ONLY = 1 |
| 10 | + LEFT_ONLY = 2 |
| 11 | + CHANGED = 3 |
| 12 | + |
| 13 | + |
| 14 | +class DiffLineCode(Enum): |
| 15 | + ADDED = 0 |
| 16 | + REMOVED = 1 |
| 17 | + COMMON = 2 |
| 18 | + MISSING = 3 |
| 19 | + |
| 20 | + |
| 21 | +class DiffLine: |
| 22 | + def __init__(self, line: str | None): |
| 23 | + self.__line = line |
| 24 | + |
| 25 | + @staticmethod |
| 26 | + def parse(line: str) -> "DiffLine": |
| 27 | + return DiffLine(line) |
| 28 | + |
| 29 | + @property |
| 30 | + def code(self) -> DiffLineCode | None: |
| 31 | + if self.__line is None: |
| 32 | + return None |
| 33 | + |
| 34 | + match self.__line[:2]: |
| 35 | + case "+ ": |
| 36 | + return DiffLineCode.ADDED |
| 37 | + case "- ": |
| 38 | + return DiffLineCode.REMOVED |
| 39 | + case " ": |
| 40 | + return DiffLineCode.COMMON |
| 41 | + case "? ": |
| 42 | + return DiffLineCode.MISSING |
| 43 | + |
| 44 | + @property |
| 45 | + def line(self) -> str | None: |
| 46 | + if self.__line is None: |
| 47 | + return None |
| 48 | + |
| 49 | + return self.__line[2:] |
| 50 | + |
| 51 | + |
| 52 | +@dataclass |
| 53 | +class DiffChange: |
| 54 | + left: List[int] |
| 55 | + right: List[int] |
| 56 | + newline: str |
| 57 | + skip_lines: int |
| 58 | + |
| 59 | + |
| 60 | +@dataclass |
| 61 | +class Diff: |
| 62 | + code: DiffCode |
| 63 | + line: str |
| 64 | + left_changes: List[int] | None = None |
| 65 | + right_changes: List[int] | None = None |
| 66 | + newline: str | None = None |
| 67 | + |
| 68 | + |
| 69 | +# Parser inspired by https://github.com/yebrahim/difflibparser/blob/master/difflibparser.py |
| 70 | +# Rather than relying on the iterator pattern, we're opting to use the generator pattern instead to avoid managing an |
| 71 | +# "end of diff" exception |
| 72 | +# Also included several code refactors to clean up the general code structure |
| 73 | +class DiffParser: |
| 74 | + def __init__(self, left_text, right_text): |
| 75 | + self.__left_text = left_text |
| 76 | + self.__right_text = right_text |
| 77 | + self.__diff = list(difflib.ndiff(self.__left_text, self.__right_text)) |
| 78 | + self.__line_no = 0 |
| 79 | + |
| 80 | + def iter_diffs(self) -> Generator[Diff, Any, Any]: |
| 81 | + while self.__line_no < len(self.__diff): |
| 82 | + current_line = self.__diff[self.__line_no] |
| 83 | + diff_line = DiffLine.parse(current_line) |
| 84 | + code = diff_line.code |
| 85 | + diff = Diff(code=DiffCode.SAME, line=diff_line.line) |
| 86 | + if code == DiffLineCode.ADDED: |
| 87 | + diff.code = DiffCode.RIGHT_ONLY |
| 88 | + elif code == DiffLineCode.REMOVED: |
| 89 | + change = self.__get_incremental_change(self.__line_no) |
| 90 | + if change is None: |
| 91 | + diff.code = DiffCode.LEFT_ONLY |
| 92 | + else: |
| 93 | + diff.code = DiffCode.CHANGED |
| 94 | + diff.left_changes = change.left |
| 95 | + diff.right_changes = change.right |
| 96 | + diff.newline = change.newline |
| 97 | + self.__line_no = change.skip_lines |
| 98 | + self.__line_no += 1 |
| 99 | + yield diff |
| 100 | + |
| 101 | + def __get_incremental_change(self, line_no: int) -> DiffChange | None: |
| 102 | + lines = [ |
| 103 | + DiffLine.parse( |
| 104 | + self.__diff[line_no + i] if line_no + i < len(self.__diff) else None |
| 105 | + ) |
| 106 | + for i in range(4) |
| 107 | + ] |
| 108 | + |
| 109 | + pattern_a = [ |
| 110 | + DiffLineCode.REMOVED, |
| 111 | + DiffLineCode.MISSING, |
| 112 | + DiffLineCode.ADDED, |
| 113 | + DiffLineCode.MISSING, |
| 114 | + ] |
| 115 | + if self.__match_pattern(lines, pattern_a): |
| 116 | + return DiffChange( |
| 117 | + left=[i for (i, c) in enumerate(lines[1].line) if c in ["-", "^"]], |
| 118 | + right=[i for (i, c) in enumerate(lines[3].line) if c in ["+", "^"]], |
| 119 | + newline=lines[2].line, |
| 120 | + skip_lines=3, |
| 121 | + ) |
| 122 | + |
| 123 | + pattern_b = [DiffLineCode.REMOVED, DiffLineCode.ADDED, DiffLineCode.MISSING] |
| 124 | + if self.__match_pattern(lines, pattern_b): |
| 125 | + return DiffChange( |
| 126 | + left=[], |
| 127 | + right=[i for (i, c) in enumerate(lines[2].line) if c in ["+", "^"]], |
| 128 | + newline=lines[1].line, |
| 129 | + skip_lines=2, |
| 130 | + ) |
| 131 | + |
| 132 | + pattern_c = [DiffLineCode.REMOVED, DiffLineCode.MISSING, DiffLineCode.ADDED] |
| 133 | + if self.__match_pattern(lines, pattern_c): |
| 134 | + return DiffChange( |
| 135 | + left=[i for (i, c) in enumerate(lines[1].line) for c in ["-", "^"]], |
| 136 | + right=[], |
| 137 | + newline=lines[1].line, |
| 138 | + skip_lines=2, |
| 139 | + ) |
| 140 | + |
| 141 | + return None |
| 142 | + |
| 143 | + def __match_pattern( |
| 144 | + self, diff_lines: List[DiffLine], codes: List[DiffLineCode] |
| 145 | + ) -> bool: |
| 146 | + return all([line.code == code for line, code in zip(diff_lines, codes)]) |
0 commit comments