|
| 1 | +import re |
| 2 | +from collections import Counter |
| 3 | +from collections.abc import Sequence |
| 4 | +from math import gcd |
| 5 | +from typing import NamedTuple |
| 6 | + |
| 7 | +def get_line_indent_count(line: str): |
| 8 | + return len(line) - len(line.lstrip()) |
| 9 | + |
| 10 | + |
| 11 | +class IndentationInfo(NamedTuple): |
| 12 | + """ |
| 13 | + A class to represent and manage indentation information. |
| 14 | +
|
| 15 | + This class analyzes and provides utilities for working with indentation. |
| 16 | + It detects the indentation character (space or tab), |
| 17 | + the number of characters used for each indentation level, and provides |
| 18 | + methods to adjust and normalize indentation. |
| 19 | +
|
| 20 | + Attributes: |
| 21 | + char_count (int): The number of characters used for each indentation level. |
| 22 | + char (str): The character used for indentation (' ' for space, '\t' for tab). |
| 23 | + min_indent_level (int): The minimum indentation level found in the analyzed content. |
| 24 | + consistency (bool): Whether the indentation is consistent throughout the content. |
| 25 | + message (str | None): A message describing the indentation analysis results. |
| 26 | +
|
| 27 | + Class Methods: |
| 28 | + from_content: Analyzes the indentation in the given content and creates an IndentationInfo instance. |
| 29 | +
|
| 30 | + Methods: |
| 31 | + level_difference: Calculates the difference in indentation levels. |
| 32 | + char_count_to_level: Converts a character count to an indentation level. |
| 33 | + level_to_chars: Converts an indentation level to a string of indentation characters. |
| 34 | + shift_indentation: Adjusts the indentation of a sequence of lines. |
| 35 | + apply_relative_indents: Applies relative indentation based on annotations in the content. |
| 36 | +
|
| 37 | + Note: |
| 38 | + This class is particularly useful for processing Python code with varying |
| 39 | + or inconsistent indentation, and for adjusting indentation to meet specific |
| 40 | + formatting requirements. |
| 41 | + """ |
| 42 | + char_count: int |
| 43 | + char: str |
| 44 | + min_indent_level: int |
| 45 | + consistency: bool = True |
| 46 | + message: str | None = None |
| 47 | + |
| 48 | + @classmethod |
| 49 | + def from_content[T: IndentationInfo, S: Sequence[str]](cls: T, content: str | S) -> T: |
| 50 | + """ |
| 51 | + Analyzes the indentation in the given content and creates an IndentationInfo instance. |
| 52 | +
|
| 53 | + This method examines the indentation patterns in the provided content, |
| 54 | + determines the dominant indentation character and count, and assesses |
| 55 | + the consistency of indentation throughout the content. |
| 56 | +
|
| 57 | + Args: |
| 58 | + content (str | Sequence[str]): The content to analyze. Can be a string |
| 59 | + or a sequence of strings. |
| 60 | +
|
| 61 | + Returns: |
| 62 | + IndentationInfo: An instance of IndentationInfo with the analysis results. |
| 63 | +
|
| 64 | + Note: |
| 65 | + - If no indentation is found, it assumes 4 spaces as per PEP 8. |
| 66 | + - For space indentation, it attempts to determine the most likely |
| 67 | + character count by analyzing patterns and using GCD. |
| 68 | + """ |
| 69 | + # TODO Always send str? |
| 70 | + lines = [x.lstrip() for x in content.splitlines() if x.strip()] if isinstance(content, str) else content |
| 71 | + |
| 72 | + def extract_indentation(line: str) -> str: |
| 73 | + return re.match(r'^\s*', line).group(0) |
| 74 | + |
| 75 | + indentations = [extract_indentation(line) for line in lines if line.strip()] |
| 76 | + |
| 77 | + if not indentations: |
| 78 | + return cls(4, ' ', 0, True, "No indentation found. Assuming 4 spaces (PEP 8).") |
| 79 | + |
| 80 | + indent_chars = Counter(indent[0] for indent in indentations if indent) |
| 81 | + dominant_char = ' ' if indent_chars.get(' ', 0) >= indent_chars.get('\t', 0) else '\t' |
| 82 | + |
| 83 | + indent_lengths = [len(indent) for indent in indentations] |
| 84 | + |
| 85 | + if dominant_char == '\t': |
| 86 | + char_count = 1 |
| 87 | + else: |
| 88 | + # For spaces, determine the most likely char_count |
| 89 | + space_counts = [sc for sc in indent_lengths if sc % 2 == 0 and sc > 0] |
| 90 | + if not space_counts: |
| 91 | + char_count = 2 # Default to 2 if no even space counts |
| 92 | + else: |
| 93 | + # Sort top 5 space counts and find the largest GCD |
| 94 | + sorted_counts = sorted([c[0] for c in Counter(space_counts).most_common(5)], reverse=True) |
| 95 | + char_count = sorted_counts[0] |
| 96 | + for i in range(1, len(sorted_counts)): |
| 97 | + new_gcd = gcd(char_count, sorted_counts[i]) |
| 98 | + if new_gcd <= 1: |
| 99 | + break |
| 100 | + char_count = new_gcd |
| 101 | + |
| 102 | + min_indent_chars = min(indent_lengths) if indent_lengths else 0 |
| 103 | + min_indent_level = min_indent_chars // char_count |
| 104 | + |
| 105 | + consistency = all(len(indent) % char_count == 0 for indent in indentations if indent) |
| 106 | + match dominant_char: |
| 107 | + case ' ': |
| 108 | + domcharstr = 'space' |
| 109 | + case '\t': |
| 110 | + domcharstr = 'tab' |
| 111 | + case _: |
| 112 | + domcharstr = dominant_char |
| 113 | + message = f"Found {char_count}-{domcharstr} indentation" |
| 114 | + if not consistency: |
| 115 | + message += " (inconsistent)" |
| 116 | + |
| 117 | + return cls(char_count, dominant_char, min_indent_level, consistency, message) |
| 118 | + |
| 119 | + def level_difference(self, base_indentation_count: int): |
| 120 | + return self.char_count_to_level(base_indentation_count) - self.min_indent_level |
| 121 | + |
| 122 | + def char_count_to_level(self, char_count: int) -> int: |
| 123 | + return char_count // self.char_count |
| 124 | + |
| 125 | + def level_to_chars(self, level: int) -> str: |
| 126 | + return level * self.char_count * self.char |
| 127 | + |
| 128 | + def shift_indentation(self, lines: Sequence[str], target_base_indentation_count: int) -> list[str]: |
| 129 | + """ |
| 130 | + Shifts the indentation of a sequence of lines based on a base indentation count. |
| 131 | +
|
| 132 | + This method adjusts the indentation of each non-empty line in the input sequence. |
| 133 | + It calculates the difference between the base indentation and the minimum |
| 134 | + indentation found in the content, then applies this shift to all lines. |
| 135 | +
|
| 136 | + Args: |
| 137 | + lines (Sequence[str]): A sequence of strings representing the lines to be adjusted. |
| 138 | + target_base_indentation_count (int): The base indentation count to adjust from. |
| 139 | +
|
| 140 | + Returns: |
| 141 | + list[str]: A new list of strings with adjusted indentation. |
| 142 | +
|
| 143 | + Note: |
| 144 | + - Empty lines and lines with only whitespace are preserved as-is. |
| 145 | + - The method uses the IndentationInfo of the instance to determine |
| 146 | + the indentation character and count. |
| 147 | + - This method is useful for uniformly adjusting indentation across all lines. |
| 148 | + """ |
| 149 | + raw_line_adjuster = self._shift_indentation_fun(target_base_indentation_count) |
| 150 | + # Return the transformed lines |
| 151 | + return [raw_line_adjuster(line) for line in lines] |
| 152 | + |
| 153 | + def _shift_indentation_fun(self, target_base_indentation_count: int): |
| 154 | + # Calculate the indentation difference |
| 155 | + level_difference = self.level_difference(target_base_indentation_count) |
| 156 | + |
| 157 | + def adjust_line(line: str) -> str: |
| 158 | + if not line.strip(): |
| 159 | + # Handle empty lines or lines with only whitespace |
| 160 | + return line |
| 161 | + |
| 162 | + current_indent_count = get_line_indent_count(line) |
| 163 | + current_level = self.char_count_to_level(current_indent_count) |
| 164 | + new_level = max(0, current_level + level_difference) |
| 165 | + new_indent = self.level_to_chars(new_level) |
| 166 | + |
| 167 | + return new_indent + line.lstrip() |
| 168 | + return adjust_line |
| 169 | + |
| 170 | + def apply_relative_indents[S: Sequence[str]](self, content: str | S, context_indent_count: int = 0) -> list[str]: |
| 171 | + """ |
| 172 | + Applies relative indentation based on annotations in the content. |
| 173 | +
|
| 174 | + This method processes the input content, interpreting special annotations |
| 175 | + to apply relative indentation. It uses '@' followed by a number to indicate |
| 176 | + relative indentation levels. |
| 177 | +
|
| 178 | + Args: |
| 179 | + content (str | Sequence[str]): The content to process. Can be a string |
| 180 | + or a sequence of strings. |
| 181 | + context_indent_count (int, optional): The base indentation count of the |
| 182 | + context. Defaults to 0. |
| 183 | +
|
| 184 | + Returns: |
| 185 | + list[str]: A new list of strings with normalized indentation (without the annotations) |
| 186 | +
|
| 187 | + Note: |
| 188 | + - Lines starting with '@n:' (where n is an integer) are interpreted as |
| 189 | + having a relative indentation of n levels from the context indent level. |
| 190 | + - Empty lines and lines with only whitespace are removed. |
| 191 | + - The method uses the IndentationInfo of the instance to determine |
| 192 | + the indentation character and count. |
| 193 | + - This method is particularly useful for content with varying |
| 194 | + indentation levels specified by annotations. |
| 195 | +
|
| 196 | + Raises: |
| 197 | + AssertionError: If the calculated indentation level for any line is negative. |
| 198 | + """ |
| 199 | + # TODO Always send str? |
| 200 | + lines = [line.lstrip() for line in content.splitlines() if line.strip()] if isinstance(content, str) else content |
| 201 | + |
| 202 | + context_indent_level = self.char_count_to_level(context_indent_count) |
| 203 | + for i in range(len(lines)): |
| 204 | + line = lines[i] |
| 205 | + parts = line.split(':', 1) |
| 206 | + if len(parts) == 2 and parts[0].startswith('@'): |
| 207 | + relative_indent_level = int(parts[0][1:]) |
| 208 | + absolute_indent_level = context_indent_level + relative_indent_level |
| 209 | + assert absolute_indent_level >= 0, f"Final indentation for line `{line.strip()}` cannot be negative ({absolute_indent_level})" |
| 210 | + lines[i] = self.level_to_chars(absolute_indent_level) + parts[1].lstrip() |
| 211 | + else: |
| 212 | + absolute_indent_level = context_indent_level |
| 213 | + lines[i] = self.level_to_chars(absolute_indent_level) + line.lstrip() |
| 214 | + |
| 215 | + return lines |
| 216 | + |
| 217 | + |
0 commit comments