Refactorings

elifarley · elifarley · commit 99fd7e0ce853 · 2024-10-11T10:38:59.000-03:00
diff --git a/src/cedarscript_editor/cedarscript_editor.py b/src/cedarscript_editor/cedarscript_editor.py
@@ -1,17 +1,17 @@
 import os
-from typing import Callable
 from collections.abc import Sequence
+from typing import Callable
 
-from cedarscript_ast_parser import Command, CreateCommand, RmFileCommand, MvFileCommand, UpdateCommand, \
+from cedarscript_ast_parser import Command, RmFileCommand, MvFileCommand, UpdateCommand, \
     SelectCommand, IdentifierFromFile, Segment, Marker, MoveClause, DeleteClause, \
     InsertClause, ReplaceClause, EditingAction, BodyOrWhole, RegionClause, MarkerType
-from cedarscript_ast_parser.cedarscript_ast_parser import MarkerCompatible, RelativeMarker, RelativePositionType
+from cedarscript_ast_parser.cedarscript_ast_parser import MarkerCompatible, RelativeMarker, \
+    RelativePositionType
+from cedarscript_editor.indentation_kit import IndentationInfo
+from cedarscript_editor.range_spec import IdentifierBoundaries, RangeSpec
+from cedarscript_editor.text_editor_kit import read_file, write_file, bow_to_search_range
 
 from .identifier_selector import select_finder
-from .python_identifier_finder import find_python_identifier
-from .text_editor_kit import \
-    normalize_indent, write_file, read_file, bow_to_search_range, \
-    IdentifierBoundaries, RangeSpec, analyze_and_adjust_indentation, analyze_and_normalize_indentation, IndentationInfo
 
 
 class CEDARScriptEditorException(Exception):
@@ -28,12 +28,13 @@ def __init__(self, command_ordinal: int, description: str):
                 items = f"{sequence} and #{command_ordinal - 1}"
         if command_ordinal <= 1:
             note = ''
-            plural_indicator=''
             previous_cmd_notes = ''
         else:
 
-            plural_indicator='s'
-            previous_cmd_notes = f", bearing in mind the file was updated and now contains all changes expressed in command{plural_indicator} {items}"
+            previous_cmd_notes = (
+                f", bearing in mind the file was updated and now contains all changes expressed in "
+                f"commands {items}"
+            )
             if 'syntax' in description.casefold():
                 probability_indicator = "most probably"
             else:
@@ -80,8 +81,8 @@ def apply_commands(self, commands: Sequence[Command]):
                 match command:
                     case UpdateCommand() as cmd:
                         result.append(self._update_command(cmd))
-                    case CreateCommand() as cmd:
-                        result.append(self._create_command(cmd))
+                    # case CreateCommand() as cmd:
+                    #     result.append(self._create_command(cmd))
                     case RmFileCommand() as cmd:
                         result.append(self._rm_command(cmd))
                     case MvFileCommand() as cmd:
@@ -148,8 +149,8 @@ def _update_command(self, cmd: UpdateCommand):
 
         source_info: tuple[str, str | Sequence[str]] = (file_path, src)
 
-        def identifier_resolver(marker: Marker):
-            return self.find_identifier(source_info, marker)
+        def identifier_resolver(m: Marker):
+            return self.find_identifier(source_info, m)
 
         # Set range_spec to cover the identifier
         search_range = restrict_search_range(action, target, identifier_resolver)
@@ -167,10 +168,9 @@ def identifier_resolver(marker: Marker):
                     region, action, lines, search_range, identifier_resolver
                 )
                 content = content_range.read(lines)
-                content = analyze_and_adjust_indentation(
-                    src_content_to_adjust=content,
-                    target_context_for_analysis=lines,
-                    base_indentation_count=dest_indent + (relindent or 0)
+                count = dest_indent + (relindent or 0)
+                content = IndentationInfo.from_content(lines).shift_indentation(
+                    content, count
                 )
             case str() | [str(), *_] | (str(), *_):
                 pass
@@ -191,22 +191,21 @@ def _apply_action(self, action: EditingAction, lines: Sequence[str], range_spec:
                 saved_content = range_spec.delete(lines)
                 # TODO Move from 'lines' to the same file or to 'other_file'
                 dest_range = self._get_index_range(InsertClause(insert_position), lines)
-                saved_content = analyze_and_adjust_indentation(
-                    src_content_to_adjust=saved_content,
-                    target_context_for_analysis=lines,
-                    base_indentation_count= dest_range.indent + (relindent or 0)
+                count = dest_range.indent + (relindent or 0)
+                saved_content = (
+                    IndentationInfo.from_content(lines).
+                    shift_indentation(saved_content, count)
                 )
                 dest_range.write(saved_content, lines)
 
             case DeleteClause():
                 range_spec.delete(lines)
 
             case ReplaceClause() | InsertClause():
-                content = analyze_and_normalize_indentation(
-                    src_content_to_adjust=content,
-                    target_context_for_analysis=lines,
-                    context_indent_count=range_spec.indent
+                content = IndentationInfo.from_content(lines).apply_relative_indents(
+                    content, range_spec.indent
                 )
+
                 range_spec.write(content, lines)
 
             case _ as invalid:
@@ -250,7 +249,7 @@ def find_index_range_for_region(self,
                                 pass
                             case _:
                                 # TODO transform to RangeSpec
-                                mos = self.find_identifier(lines, f'for:{region}', mos).body
+                                mos = self.find_identifier(("find_index_range_for_region", lines), mos).body
                 index_range = mos.to_search_range(
                     lines,
                     search_range.start if search_range else 0,
diff --git a/src/cedarscript_editor/identifier_selector.py b/src/cedarscript_editor/identifier_selector.py
@@ -5,7 +5,7 @@
 import logging
 
 from cedarscript_editor.python_identifier_finder import find_python_identifier
-from cedarscript_editor.text_editor_kit import IdentifierBoundaries
+from cedarscript_editor.range_spec import IdentifierBoundaries
 
 _log = logging.getLogger(__name__)
 
diff --git a/src/cedarscript_editor/indentation_kit.py b/src/cedarscript_editor/indentation_kit.py
@@ -0,0 +1,217 @@
+import re
+from collections import Counter
+from collections.abc import Sequence
+from math import gcd
+from typing import NamedTuple
+
+def get_line_indent_count(line: str):
+    return len(line) - len(line.lstrip())
+
+
+class IndentationInfo(NamedTuple):
+    """
+    A class to represent and manage indentation information.
+
+    This class analyzes and provides utilities for working with indentation.
+    It detects the indentation character (space or tab),
+    the number of characters used for each indentation level, and provides
+    methods to adjust and normalize indentation.
+
+    Attributes:
+        char_count (int): The number of characters used for each indentation level.
+        char (str): The character used for indentation (' ' for space, '\t' for tab).
+        min_indent_level (int): The minimum indentation level found in the analyzed content.
+        consistency (bool): Whether the indentation is consistent throughout the content.
+        message (str | None): A message describing the indentation analysis results.
+
+    Class Methods:
+        from_content: Analyzes the indentation in the given content and creates an IndentationInfo instance.
+
+    Methods:
+        level_difference: Calculates the difference in indentation levels.
+        char_count_to_level: Converts a character count to an indentation level.
+        level_to_chars: Converts an indentation level to a string of indentation characters.
+        shift_indentation: Adjusts the indentation of a sequence of lines.
+        apply_relative_indents: Applies relative indentation based on annotations in the content.
+
+    Note:
+        This class is particularly useful for processing Python code with varying
+        or inconsistent indentation, and for adjusting indentation to meet specific
+        formatting requirements.
+    """
+    char_count: int
+    char: str
+    min_indent_level: int
+    consistency: bool = True
+    message: str | None = None
+
+    @classmethod
+    def from_content[T: IndentationInfo, S: Sequence[str]](cls: T, content: str | S) -> T:
+        """
+        Analyzes the indentation in the given content and creates an IndentationInfo instance.
+
+        This method examines the indentation patterns in the provided content,
+        determines the dominant indentation character and count, and assesses
+        the consistency of indentation throughout the content.
+
+        Args:
+            content (str | Sequence[str]): The content to analyze. Can be a string
+                                           or a sequence of strings.
+
+        Returns:
+            IndentationInfo: An instance of IndentationInfo with the analysis results.
+
+        Note:
+            - If no indentation is found, it assumes 4 spaces as per PEP 8.
+            - For space indentation, it attempts to determine the most likely
+              character count by analyzing patterns and using GCD.
+        """
+        # TODO Always send str?
+        lines = [x.lstrip() for x in content.splitlines() if x.strip()] if isinstance(content, str) else content
+
+        def extract_indentation(line: str) -> str:
+            return re.match(r'^\s*', line).group(0)
+
+        indentations = [extract_indentation(line) for line in lines if line.strip()]
+
+        if not indentations:
+            return cls(4, ' ', 0, True, "No indentation found. Assuming 4 spaces (PEP 8).")
+
+        indent_chars = Counter(indent[0] for indent in indentations if indent)
+        dominant_char = ' ' if indent_chars.get(' ', 0) >= indent_chars.get('\t', 0) else '\t'
+
+        indent_lengths = [len(indent) for indent in indentations]
+
+        if dominant_char == '\t':
+            char_count = 1
+        else:
+            # For spaces, determine the most likely char_count
+            space_counts = [sc for sc in indent_lengths if sc % 2 == 0 and sc > 0]
+            if not space_counts:
+                char_count = 2  # Default to 2 if no even space counts
+            else:
+                # Sort top 5 space counts and find the largest GCD
+                sorted_counts = sorted([c[0] for c in Counter(space_counts).most_common(5)], reverse=True)
+                char_count = sorted_counts[0]
+                for i in range(1, len(sorted_counts)):
+                    new_gcd = gcd(char_count, sorted_counts[i])
+                    if new_gcd <= 1:
+                        break
+                    char_count = new_gcd
+
+        min_indent_chars = min(indent_lengths) if indent_lengths else 0
+        min_indent_level = min_indent_chars // char_count
+
+        consistency = all(len(indent) % char_count == 0 for indent in indentations if indent)
+        match dominant_char:
+            case ' ':
+                domcharstr = 'space'
+            case '\t':
+                domcharstr = 'tab'
+            case _:
+                domcharstr = dominant_char
+        message = f"Found {char_count}-{domcharstr} indentation"
+        if not consistency:
+            message += " (inconsistent)"
+
+        return cls(char_count, dominant_char, min_indent_level, consistency, message)
+
+    def level_difference(self, base_indentation_count: int):
+        return self.char_count_to_level(base_indentation_count) - self.min_indent_level
+
+    def char_count_to_level(self, char_count: int) -> int:
+        return char_count // self.char_count
+
+    def level_to_chars(self, level: int) -> str:
+        return level * self.char_count * self.char
+
+    def shift_indentation(self, lines: Sequence[str], target_base_indentation_count: int) -> list[str]:
+        """
+        Shifts the indentation of a sequence of lines based on a base indentation count.
+
+        This method adjusts the indentation of each non-empty line in the input sequence.
+        It calculates the difference between the base indentation and the minimum
+        indentation found in the content, then applies this shift to all lines.
+
+        Args:
+            lines (Sequence[str]): A sequence of strings representing the lines to be adjusted.
+            target_base_indentation_count (int): The base indentation count to adjust from.
+
+        Returns:
+            list[str]: A new list of strings with adjusted indentation.
+
+        Note:
+            - Empty lines and lines with only whitespace are preserved as-is.
+            - The method uses the IndentationInfo of the instance to determine
+              the indentation character and count.
+            - This method is useful for uniformly adjusting indentation across all lines.
+        """
+        raw_line_adjuster = self._shift_indentation_fun(target_base_indentation_count)
+        # Return the transformed lines
+        return [raw_line_adjuster(line) for line in lines]
+
+    def _shift_indentation_fun(self, target_base_indentation_count: int):
+        # Calculate the indentation difference
+        level_difference = self.level_difference(target_base_indentation_count)
+
+        def adjust_line(line: str) -> str:
+            if not line.strip():
+                # Handle empty lines or lines with only whitespace
+                return line
+
+            current_indent_count = get_line_indent_count(line)
+            current_level = self.char_count_to_level(current_indent_count)
+            new_level = max(0, current_level + level_difference)
+            new_indent = self.level_to_chars(new_level)
+
+            return new_indent + line.lstrip()
+        return adjust_line
+
+    def apply_relative_indents[S: Sequence[str]](self, content: str | S, context_indent_count: int = 0) -> list[str]:
+        """
+        Applies relative indentation based on annotations in the content.
+
+        This method processes the input content, interpreting special annotations
+        to apply relative indentation. It uses '@' followed by a number to indicate
+        relative indentation levels.
+
+        Args:
+            content (str | Sequence[str]): The content to process. Can be a string
+                                           or a sequence of strings.
+            context_indent_count (int, optional): The base indentation count of the
+                                                  context. Defaults to 0.
+
+        Returns:
+            list[str]: A new list of strings with normalized indentation (without the annotations)
+
+        Note:
+            - Lines starting with '@n:' (where n is an integer) are interpreted as
+              having a relative indentation of n levels from the context indent level.
+            - Empty lines and lines with only whitespace are removed.
+            - The method uses the IndentationInfo of the instance to determine
+              the indentation character and count.
+            - This method is particularly useful for content with varying
+              indentation levels specified by annotations.
+
+        Raises:
+            AssertionError: If the calculated indentation level for any line is negative.
+        """
+        # TODO Always send str?
+        lines = [line.lstrip() for line in content.splitlines() if line.strip()] if isinstance(content, str) else content
+
+        context_indent_level = self.char_count_to_level(context_indent_count)
+        for i in range(len(lines)):
+            line = lines[i]
+            parts = line.split(':', 1)
+            if len(parts) == 2 and parts[0].startswith('@'):
+                relative_indent_level = int(parts[0][1:])
+                absolute_indent_level = context_indent_level + relative_indent_level
+                assert absolute_indent_level >= 0, f"Final indentation for line `{line.strip()}` cannot be negative ({absolute_indent_level})"
+                lines[i] = self.level_to_chars(absolute_indent_level) + parts[1].lstrip()
+            else:
+                absolute_indent_level = context_indent_level
+                lines[i] = self.level_to_chars(absolute_indent_level) + line.lstrip()
+
+        return lines
+
+
diff --git a/src/cedarscript_editor/python_identifier_finder.py b/src/cedarscript_editor/python_identifier_finder.py
@@ -3,7 +3,8 @@
 from rope.base import ast, libutils
 from collections.abc import Sequence
 
-from cedarscript_editor.text_editor_kit import IdentifierBoundaries, RangeSpec, get_line_indent_count
+from cedarscript_editor.range_spec import IdentifierBoundaries, RangeSpec
+from cedarscript_editor.indentation_kit import get_line_indent_count
 
 
 def get_by_offset(obj: Sequence, offset: int):
@@ -19,8 +20,10 @@ def find_python_identifier(root_path: str, file_name: str, source: str, marker:
     :param root_path:
     :param file_name:
     :param source: Source code.
-    :param marker: Type, name and offset of the identifier to find. TODO: If `None` when there are 2 or more identifiers with the same name, raise exception.
-    :return: IdentifierBoundaries with identifier start, body start, and end lines of the identifier or None if not found.
+    :param marker: Type, name and offset of the identifier to find.
+    TODO: If `None` when there are 2 or more identifiers with the same name, raise exception.
+    :return: IdentifierBoundaries with identifier start, body start, and end lines of the identifier
+    or None if not found.
     """
     project = rope.base.project.Project(root_path)
     resource = libutils.path_to_resource(project, file_name)
diff --git a/src/cedarscript_editor/range_spec.py b/src/cedarscript_editor/range_spec.py
diff --git a/src/cedarscript_editor/text_editor_kit.py b/src/cedarscript_editor/text_editor_kit.py