Skip to content

Commit 99fd7e0

Browse files
committed
Refactorings
1 parent 1f1b043 commit 99fd7e0

File tree

6 files changed

+452
-350
lines changed

6 files changed

+452
-350
lines changed

src/cedarscript_editor/cedarscript_editor.py

Lines changed: 26 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,17 @@
11
import os
2-
from typing import Callable
32
from collections.abc import Sequence
3+
from typing import Callable
44

5-
from cedarscript_ast_parser import Command, CreateCommand, RmFileCommand, MvFileCommand, UpdateCommand, \
5+
from cedarscript_ast_parser import Command, RmFileCommand, MvFileCommand, UpdateCommand, \
66
SelectCommand, IdentifierFromFile, Segment, Marker, MoveClause, DeleteClause, \
77
InsertClause, ReplaceClause, EditingAction, BodyOrWhole, RegionClause, MarkerType
8-
from cedarscript_ast_parser.cedarscript_ast_parser import MarkerCompatible, RelativeMarker, RelativePositionType
8+
from cedarscript_ast_parser.cedarscript_ast_parser import MarkerCompatible, RelativeMarker, \
9+
RelativePositionType
10+
from cedarscript_editor.indentation_kit import IndentationInfo
11+
from cedarscript_editor.range_spec import IdentifierBoundaries, RangeSpec
12+
from cedarscript_editor.text_editor_kit import read_file, write_file, bow_to_search_range
913

1014
from .identifier_selector import select_finder
11-
from .python_identifier_finder import find_python_identifier
12-
from .text_editor_kit import \
13-
normalize_indent, write_file, read_file, bow_to_search_range, \
14-
IdentifierBoundaries, RangeSpec, analyze_and_adjust_indentation, analyze_and_normalize_indentation, IndentationInfo
1515

1616

1717
class CEDARScriptEditorException(Exception):
@@ -28,12 +28,13 @@ def __init__(self, command_ordinal: int, description: str):
2828
items = f"{sequence} and #{command_ordinal - 1}"
2929
if command_ordinal <= 1:
3030
note = ''
31-
plural_indicator=''
3231
previous_cmd_notes = ''
3332
else:
3433

35-
plural_indicator='s'
36-
previous_cmd_notes = f", bearing in mind the file was updated and now contains all changes expressed in command{plural_indicator} {items}"
34+
previous_cmd_notes = (
35+
f", bearing in mind the file was updated and now contains all changes expressed in "
36+
f"commands {items}"
37+
)
3738
if 'syntax' in description.casefold():
3839
probability_indicator = "most probably"
3940
else:
@@ -80,8 +81,8 @@ def apply_commands(self, commands: Sequence[Command]):
8081
match command:
8182
case UpdateCommand() as cmd:
8283
result.append(self._update_command(cmd))
83-
case CreateCommand() as cmd:
84-
result.append(self._create_command(cmd))
84+
# case CreateCommand() as cmd:
85+
# result.append(self._create_command(cmd))
8586
case RmFileCommand() as cmd:
8687
result.append(self._rm_command(cmd))
8788
case MvFileCommand() as cmd:
@@ -148,8 +149,8 @@ def _update_command(self, cmd: UpdateCommand):
148149

149150
source_info: tuple[str, str | Sequence[str]] = (file_path, src)
150151

151-
def identifier_resolver(marker: Marker):
152-
return self.find_identifier(source_info, marker)
152+
def identifier_resolver(m: Marker):
153+
return self.find_identifier(source_info, m)
153154

154155
# Set range_spec to cover the identifier
155156
search_range = restrict_search_range(action, target, identifier_resolver)
@@ -167,10 +168,9 @@ def identifier_resolver(marker: Marker):
167168
region, action, lines, search_range, identifier_resolver
168169
)
169170
content = content_range.read(lines)
170-
content = analyze_and_adjust_indentation(
171-
src_content_to_adjust=content,
172-
target_context_for_analysis=lines,
173-
base_indentation_count=dest_indent + (relindent or 0)
171+
count = dest_indent + (relindent or 0)
172+
content = IndentationInfo.from_content(lines).shift_indentation(
173+
content, count
174174
)
175175
case str() | [str(), *_] | (str(), *_):
176176
pass
@@ -191,22 +191,21 @@ def _apply_action(self, action: EditingAction, lines: Sequence[str], range_spec:
191191
saved_content = range_spec.delete(lines)
192192
# TODO Move from 'lines' to the same file or to 'other_file'
193193
dest_range = self._get_index_range(InsertClause(insert_position), lines)
194-
saved_content = analyze_and_adjust_indentation(
195-
src_content_to_adjust=saved_content,
196-
target_context_for_analysis=lines,
197-
base_indentation_count= dest_range.indent + (relindent or 0)
194+
count = dest_range.indent + (relindent or 0)
195+
saved_content = (
196+
IndentationInfo.from_content(lines).
197+
shift_indentation(saved_content, count)
198198
)
199199
dest_range.write(saved_content, lines)
200200

201201
case DeleteClause():
202202
range_spec.delete(lines)
203203

204204
case ReplaceClause() | InsertClause():
205-
content = analyze_and_normalize_indentation(
206-
src_content_to_adjust=content,
207-
target_context_for_analysis=lines,
208-
context_indent_count=range_spec.indent
205+
content = IndentationInfo.from_content(lines).apply_relative_indents(
206+
content, range_spec.indent
209207
)
208+
210209
range_spec.write(content, lines)
211210

212211
case _ as invalid:
@@ -250,7 +249,7 @@ def find_index_range_for_region(self,
250249
pass
251250
case _:
252251
# TODO transform to RangeSpec
253-
mos = self.find_identifier(lines, f'for:{region}', mos).body
252+
mos = self.find_identifier(("find_index_range_for_region", lines), mos).body
254253
index_range = mos.to_search_range(
255254
lines,
256255
search_range.start if search_range else 0,

src/cedarscript_editor/identifier_selector.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import logging
66

77
from cedarscript_editor.python_identifier_finder import find_python_identifier
8-
from cedarscript_editor.text_editor_kit import IdentifierBoundaries
8+
from cedarscript_editor.range_spec import IdentifierBoundaries
99

1010
_log = logging.getLogger(__name__)
1111

Lines changed: 217 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,217 @@
1+
import re
2+
from collections import Counter
3+
from collections.abc import Sequence
4+
from math import gcd
5+
from typing import NamedTuple
6+
7+
def get_line_indent_count(line: str):
8+
return len(line) - len(line.lstrip())
9+
10+
11+
class IndentationInfo(NamedTuple):
12+
"""
13+
A class to represent and manage indentation information.
14+
15+
This class analyzes and provides utilities for working with indentation.
16+
It detects the indentation character (space or tab),
17+
the number of characters used for each indentation level, and provides
18+
methods to adjust and normalize indentation.
19+
20+
Attributes:
21+
char_count (int): The number of characters used for each indentation level.
22+
char (str): The character used for indentation (' ' for space, '\t' for tab).
23+
min_indent_level (int): The minimum indentation level found in the analyzed content.
24+
consistency (bool): Whether the indentation is consistent throughout the content.
25+
message (str | None): A message describing the indentation analysis results.
26+
27+
Class Methods:
28+
from_content: Analyzes the indentation in the given content and creates an IndentationInfo instance.
29+
30+
Methods:
31+
level_difference: Calculates the difference in indentation levels.
32+
char_count_to_level: Converts a character count to an indentation level.
33+
level_to_chars: Converts an indentation level to a string of indentation characters.
34+
shift_indentation: Adjusts the indentation of a sequence of lines.
35+
apply_relative_indents: Applies relative indentation based on annotations in the content.
36+
37+
Note:
38+
This class is particularly useful for processing Python code with varying
39+
or inconsistent indentation, and for adjusting indentation to meet specific
40+
formatting requirements.
41+
"""
42+
char_count: int
43+
char: str
44+
min_indent_level: int
45+
consistency: bool = True
46+
message: str | None = None
47+
48+
@classmethod
49+
def from_content[T: IndentationInfo, S: Sequence[str]](cls: T, content: str | S) -> T:
50+
"""
51+
Analyzes the indentation in the given content and creates an IndentationInfo instance.
52+
53+
This method examines the indentation patterns in the provided content,
54+
determines the dominant indentation character and count, and assesses
55+
the consistency of indentation throughout the content.
56+
57+
Args:
58+
content (str | Sequence[str]): The content to analyze. Can be a string
59+
or a sequence of strings.
60+
61+
Returns:
62+
IndentationInfo: An instance of IndentationInfo with the analysis results.
63+
64+
Note:
65+
- If no indentation is found, it assumes 4 spaces as per PEP 8.
66+
- For space indentation, it attempts to determine the most likely
67+
character count by analyzing patterns and using GCD.
68+
"""
69+
# TODO Always send str?
70+
lines = [x.lstrip() for x in content.splitlines() if x.strip()] if isinstance(content, str) else content
71+
72+
def extract_indentation(line: str) -> str:
73+
return re.match(r'^\s*', line).group(0)
74+
75+
indentations = [extract_indentation(line) for line in lines if line.strip()]
76+
77+
if not indentations:
78+
return cls(4, ' ', 0, True, "No indentation found. Assuming 4 spaces (PEP 8).")
79+
80+
indent_chars = Counter(indent[0] for indent in indentations if indent)
81+
dominant_char = ' ' if indent_chars.get(' ', 0) >= indent_chars.get('\t', 0) else '\t'
82+
83+
indent_lengths = [len(indent) for indent in indentations]
84+
85+
if dominant_char == '\t':
86+
char_count = 1
87+
else:
88+
# For spaces, determine the most likely char_count
89+
space_counts = [sc for sc in indent_lengths if sc % 2 == 0 and sc > 0]
90+
if not space_counts:
91+
char_count = 2 # Default to 2 if no even space counts
92+
else:
93+
# Sort top 5 space counts and find the largest GCD
94+
sorted_counts = sorted([c[0] for c in Counter(space_counts).most_common(5)], reverse=True)
95+
char_count = sorted_counts[0]
96+
for i in range(1, len(sorted_counts)):
97+
new_gcd = gcd(char_count, sorted_counts[i])
98+
if new_gcd <= 1:
99+
break
100+
char_count = new_gcd
101+
102+
min_indent_chars = min(indent_lengths) if indent_lengths else 0
103+
min_indent_level = min_indent_chars // char_count
104+
105+
consistency = all(len(indent) % char_count == 0 for indent in indentations if indent)
106+
match dominant_char:
107+
case ' ':
108+
domcharstr = 'space'
109+
case '\t':
110+
domcharstr = 'tab'
111+
case _:
112+
domcharstr = dominant_char
113+
message = f"Found {char_count}-{domcharstr} indentation"
114+
if not consistency:
115+
message += " (inconsistent)"
116+
117+
return cls(char_count, dominant_char, min_indent_level, consistency, message)
118+
119+
def level_difference(self, base_indentation_count: int):
120+
return self.char_count_to_level(base_indentation_count) - self.min_indent_level
121+
122+
def char_count_to_level(self, char_count: int) -> int:
123+
return char_count // self.char_count
124+
125+
def level_to_chars(self, level: int) -> str:
126+
return level * self.char_count * self.char
127+
128+
def shift_indentation(self, lines: Sequence[str], target_base_indentation_count: int) -> list[str]:
129+
"""
130+
Shifts the indentation of a sequence of lines based on a base indentation count.
131+
132+
This method adjusts the indentation of each non-empty line in the input sequence.
133+
It calculates the difference between the base indentation and the minimum
134+
indentation found in the content, then applies this shift to all lines.
135+
136+
Args:
137+
lines (Sequence[str]): A sequence of strings representing the lines to be adjusted.
138+
target_base_indentation_count (int): The base indentation count to adjust from.
139+
140+
Returns:
141+
list[str]: A new list of strings with adjusted indentation.
142+
143+
Note:
144+
- Empty lines and lines with only whitespace are preserved as-is.
145+
- The method uses the IndentationInfo of the instance to determine
146+
the indentation character and count.
147+
- This method is useful for uniformly adjusting indentation across all lines.
148+
"""
149+
raw_line_adjuster = self._shift_indentation_fun(target_base_indentation_count)
150+
# Return the transformed lines
151+
return [raw_line_adjuster(line) for line in lines]
152+
153+
def _shift_indentation_fun(self, target_base_indentation_count: int):
154+
# Calculate the indentation difference
155+
level_difference = self.level_difference(target_base_indentation_count)
156+
157+
def adjust_line(line: str) -> str:
158+
if not line.strip():
159+
# Handle empty lines or lines with only whitespace
160+
return line
161+
162+
current_indent_count = get_line_indent_count(line)
163+
current_level = self.char_count_to_level(current_indent_count)
164+
new_level = max(0, current_level + level_difference)
165+
new_indent = self.level_to_chars(new_level)
166+
167+
return new_indent + line.lstrip()
168+
return adjust_line
169+
170+
def apply_relative_indents[S: Sequence[str]](self, content: str | S, context_indent_count: int = 0) -> list[str]:
171+
"""
172+
Applies relative indentation based on annotations in the content.
173+
174+
This method processes the input content, interpreting special annotations
175+
to apply relative indentation. It uses '@' followed by a number to indicate
176+
relative indentation levels.
177+
178+
Args:
179+
content (str | Sequence[str]): The content to process. Can be a string
180+
or a sequence of strings.
181+
context_indent_count (int, optional): The base indentation count of the
182+
context. Defaults to 0.
183+
184+
Returns:
185+
list[str]: A new list of strings with normalized indentation (without the annotations)
186+
187+
Note:
188+
- Lines starting with '@n:' (where n is an integer) are interpreted as
189+
having a relative indentation of n levels from the context indent level.
190+
- Empty lines and lines with only whitespace are removed.
191+
- The method uses the IndentationInfo of the instance to determine
192+
the indentation character and count.
193+
- This method is particularly useful for content with varying
194+
indentation levels specified by annotations.
195+
196+
Raises:
197+
AssertionError: If the calculated indentation level for any line is negative.
198+
"""
199+
# TODO Always send str?
200+
lines = [line.lstrip() for line in content.splitlines() if line.strip()] if isinstance(content, str) else content
201+
202+
context_indent_level = self.char_count_to_level(context_indent_count)
203+
for i in range(len(lines)):
204+
line = lines[i]
205+
parts = line.split(':', 1)
206+
if len(parts) == 2 and parts[0].startswith('@'):
207+
relative_indent_level = int(parts[0][1:])
208+
absolute_indent_level = context_indent_level + relative_indent_level
209+
assert absolute_indent_level >= 0, f"Final indentation for line `{line.strip()}` cannot be negative ({absolute_indent_level})"
210+
lines[i] = self.level_to_chars(absolute_indent_level) + parts[1].lstrip()
211+
else:
212+
absolute_indent_level = context_indent_level
213+
lines[i] = self.level_to_chars(absolute_indent_level) + line.lstrip()
214+
215+
return lines
216+
217+

src/cedarscript_editor/python_identifier_finder.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,8 @@
33
from rope.base import ast, libutils
44
from collections.abc import Sequence
55

6-
from cedarscript_editor.text_editor_kit import IdentifierBoundaries, RangeSpec, get_line_indent_count
6+
from cedarscript_editor.range_spec import IdentifierBoundaries, RangeSpec
7+
from cedarscript_editor.indentation_kit import get_line_indent_count
78

89

910
def get_by_offset(obj: Sequence, offset: int):
@@ -19,8 +20,10 @@ def find_python_identifier(root_path: str, file_name: str, source: str, marker:
1920
:param root_path:
2021
:param file_name:
2122
:param source: Source code.
22-
:param marker: Type, name and offset of the identifier to find. TODO: If `None` when there are 2 or more identifiers with the same name, raise exception.
23-
:return: IdentifierBoundaries with identifier start, body start, and end lines of the identifier or None if not found.
23+
:param marker: Type, name and offset of the identifier to find.
24+
TODO: If `None` when there are 2 or more identifiers with the same name, raise exception.
25+
:return: IdentifierBoundaries with identifier start, body start, and end lines of the identifier
26+
or None if not found.
2427
"""
2528
project = rope.base.project.Project(root_path)
2629
resource = libutils.path_to_resource(project, file_name)

0 commit comments

Comments
 (0)