From 7bab5c7a6756aa8cce82fe59b1aa86dde0e3fd92 Mon Sep 17 00:00:00 2001
From: Yaroslav Halchenko <debian@onerussian.com>
Date: Fri, 22 Dec 2023 17:33:48 -0500
Subject: [PATCH 1/6] Disable complexity checks/warnings for the main()

Subsequent changes would make it only more complex etc -- it needs proper
de-spagettification
---
 codespell_lib/_codespell.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/codespell_lib/_codespell.py b/codespell_lib/_codespell.py
index 6e3662a8b7..93ffad3dc4 100644
--- a/codespell_lib/_codespell.py
+++ b/codespell_lib/_codespell.py
@@ -1026,7 +1026,7 @@ def _script_main() -> int:
     return main(*sys.argv[1:])
 
 
-def main(*args: str) -> int:
+def main(*args: str) -> int:  # noqa: C901,PLR0915
     """Contains flow control"""
     try:
         options, parser, used_cfg_files = parse_options(args)

From 8cce2e7fa5ad65193121c861d010d2f8b74f8421 Mon Sep 17 00:00:00 2001
From: Yaroslav Halchenko <debian@onerussian.com>
Date: Fri, 22 Dec 2023 13:46:25 -0500
Subject: [PATCH 2/6] RF: move finding files into an embeded function to
 centralize invocation of parse_file

---
 codespell_lib/_codespell.py | 118 +++++++++++++++++++-----------------
 1 file changed, 61 insertions(+), 57 deletions(-)

diff --git a/codespell_lib/_codespell.py b/codespell_lib/_codespell.py
index 93ffad3dc4..ac4546469b 100644
--- a/codespell_lib/_codespell.py
+++ b/codespell_lib/_codespell.py
@@ -23,7 +23,18 @@
 import re
 import sys
 import textwrap
-from typing import Any, Dict, List, Match, Optional, Pattern, Sequence, Set, Tuple
+from typing import (
+    Any,
+    Dict,
+    Generator,
+    List,
+    Match,
+    Optional,
+    Pattern,
+    Sequence,
+    Set,
+    Tuple,
+)
 
 # autogenerated by setuptools_scm
 from ._version import __version__ as VERSION  # type: ignore  # noqa: N812
@@ -1177,65 +1188,58 @@ def main(*args: str) -> int:  # noqa: C901,PLR0915
         return EX_USAGE
 
     bad_count = 0
-    for filename in sorted(options.files):
-        # ignore hidden files
-        if is_hidden(filename, options.check_hidden):
-            continue
 
-        if os.path.isdir(filename):
-            for root, dirs, files in os.walk(filename):
-                if glob_match.match(root):  # skip (absolute) directories
-                    dirs.clear()
-                    continue
-                if is_hidden(root, options.check_hidden):  # dir itself hidden
-                    continue
-                for file_ in sorted(files):
-                    # ignore hidden files in directories
-                    if is_hidden(file_, options.check_hidden):
+    def _find_files() -> Generator[str, None, None]:
+        """Yields filename for the parsing"""
+        for filename in sorted(options.files):
+            # ignore hidden files
+            if is_hidden(filename, options.check_hidden):
+                continue
+
+            if os.path.isdir(filename):
+                for root, dirs, files in os.walk(filename):
+                    if glob_match.match(root):  # skip (absolute) directories
+                        dirs.clear()
                         continue
-                    if glob_match.match(file_):  # skip files
+                    if is_hidden(root, options.check_hidden):  # dir itself hidden
                         continue
-                    fname = os.path.join(root, file_)
-                    if glob_match.match(fname):  # skip paths
-                        continue
-                    bad_count += parse_file(
-                        fname,
-                        colors,
-                        summary,
-                        misspellings,
-                        exclude_lines,
-                        file_opener,
-                        word_regex,
-                        ignore_word_regex,
-                        uri_regex,
-                        uri_ignore_words,
-                        context,
-                        options,
-                    )
-
-                # skip (relative) directories
-                dirs[:] = [
-                    dir_
-                    for dir_ in dirs
-                    if not glob_match.match(dir_)
-                    and not is_hidden(dir_, options.check_hidden)
-                ]
-
-        elif not glob_match.match(filename):  # skip files
-            bad_count += parse_file(
-                filename,
-                colors,
-                summary,
-                misspellings,
-                exclude_lines,
-                file_opener,
-                word_regex,
-                ignore_word_regex,
-                uri_regex,
-                uri_ignore_words,
-                context,
-                options,
-            )
+                    for file_ in sorted(files):
+                        # ignore hidden files in directories
+                        if is_hidden(file_, options.check_hidden):
+                            continue
+                        if glob_match.match(file_):  # skip files
+                            continue
+                        fname = os.path.join(root, file_)
+                        if glob_match.match(fname):  # skip paths
+                            continue
+                        yield fname
+
+                    # skip (relative) directories
+                    dirs[:] = [
+                        dir_
+                        for dir_ in dirs
+                        if not glob_match.match(dir_)
+                        and not is_hidden(dir_, options.check_hidden)
+                    ]
+
+            elif not glob_match.match(filename):  # skip files
+                yield filename
+
+    for filename in _find_files():
+        bad_count += parse_file(
+            filename,
+            colors,
+            summary,
+            misspellings,
+            exclude_lines,
+            file_opener,
+            word_regex,
+            ignore_word_regex,
+            uri_regex,
+            uri_ignore_words,
+            context,
+            options,
+        )
 
     if summary:
         print("\n-------8<-------\nSUMMARY:")

From 016b730e9057a8b8b7852437d84a7f5ae8b6e17f Mon Sep 17 00:00:00 2001
From: Yaroslav Halchenko <debian@onerussian.com>
Date: Fri, 22 Dec 2023 13:51:41 -0500
Subject: [PATCH 3/6] RF: provide closure for parse_file to pass only filename

RF right away to use sum(map()) to reduce number of statements
---
 codespell_lib/_codespell.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/codespell_lib/_codespell.py b/codespell_lib/_codespell.py
index ac4546469b..e96d9cd14a 100644
--- a/codespell_lib/_codespell.py
+++ b/codespell_lib/_codespell.py
@@ -1187,8 +1187,6 @@ def main(*args: str) -> int:  # noqa: C901,PLR0915
         )
         return EX_USAGE
 
-    bad_count = 0
-
     def _find_files() -> Generator[str, None, None]:
         """Yields filename for the parsing"""
         for filename in sorted(options.files):
@@ -1225,8 +1223,9 @@ def _find_files() -> Generator[str, None, None]:
             elif not glob_match.match(filename):  # skip files
                 yield filename
 
-    for filename in _find_files():
-        bad_count += parse_file(
+    # closure to pass only relevant to the job filename
+    def _parse_file(filename: str) -> int:
+        return parse_file(
             filename,
             colors,
             summary,
@@ -1241,6 +1240,8 @@ def _find_files() -> Generator[str, None, None]:
             options,
         )
 
+    bad_count = sum(map(_parse_file, _find_files()))
+
     if summary:
         print("\n-------8<-------\nSUMMARY:")
         print(summary)

From a6081f24bf2f54befcdd395ed437d5e766b89ed8 Mon Sep 17 00:00:00 2001
From: Yaroslav Halchenko <debian@onerussian.com>
Date: Fri, 22 Dec 2023 17:32:55 -0500
Subject: [PATCH 4/6] TEMP: Tried with multiprocessing -- not good since can't
 pickle embedded function

---
 codespell_lib/_codespell.py | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/codespell_lib/_codespell.py b/codespell_lib/_codespell.py
index e96d9cd14a..9cd413c9ca 100644
--- a/codespell_lib/_codespell.py
+++ b/codespell_lib/_codespell.py
@@ -23,6 +23,7 @@
 import re
 import sys
 import textwrap
+from multiprocessing import Pool
 from typing import (
     Any,
     Dict,
@@ -1240,7 +1241,18 @@ def _parse_file(filename: str) -> int:
             options,
         )
 
-    bad_count = sum(map(_parse_file, _find_files()))
+    njobs = os.cpu_count() or 1
+    if njobs:
+        # parse_file would be in subprocess(es)
+        with Pool(njobs) as pool:
+            results = pool.map(_parse_file, _find_files())
+            for result in results:
+                if isinstance(result, Exception):
+                    raise result
+            bad_count = sum(results)
+    else:
+        # serial
+        bad_count = sum(map(_parse_file, _find_files()))
 
     if summary:
         print("\n-------8<-------\nSUMMARY:")

From 045231e221d3f506bc8e113e6d6b05a47980812a Mon Sep 17 00:00:00 2001
From: Yaroslav Halchenko <debian@onerussian.com>
Date: Fri, 22 Dec 2023 18:42:34 -0500
Subject: [PATCH 5/6] RF: Define file level FileParser class to pass options to
 parse_file

---
 codespell_lib/_codespell.py | 78 +++++++++++++++++++++++++++++--------
 1 file changed, 61 insertions(+), 17 deletions(-)

diff --git a/codespell_lib/_codespell.py b/codespell_lib/_codespell.py
index 9cd413c9ca..0fe7275244 100644
--- a/codespell_lib/_codespell.py
+++ b/codespell_lib/_codespell.py
@@ -1033,6 +1033,52 @@ def parse_file(
     return bad_count
 
 
+class _FileParser:
+    """A helper class to provide top level closure for parse_file()"""
+
+    def __init__(
+        self,
+        colors: TermColors,
+        summary: Optional[Summary],
+        misspellings: Dict[str, Misspelling],
+        exclude_lines: Set[str],
+        file_opener: FileOpener,
+        word_regex: Pattern[str],
+        ignore_word_regex: Optional[Pattern[str]],
+        uri_regex: Pattern[str],
+        uri_ignore_words: Set[str],
+        context: Optional[Tuple[int, int]],
+        options: argparse.Namespace,
+    ) -> None:
+        self.colors = colors
+        self.summary = summary
+        self.misspellings = misspellings
+        self.exclude_lines = exclude_lines
+        self.file_opener = file_opener
+        self.word_regex = word_regex
+        self.ignore_word_regex = ignore_word_regex
+        self.uri_regex = uri_regex
+        self.uri_ignore_words = uri_ignore_words
+        self.context = context
+        self.options = options
+
+    def __call__(self, filename: str) -> int:
+        return parse_file(
+            filename,
+            self.colors,
+            self.summary,
+            self.misspellings,
+            self.exclude_lines,
+            self.file_opener,
+            self.word_regex,
+            self.ignore_word_regex,
+            self.uri_regex,
+            self.uri_ignore_words,
+            self.context,
+            self.options,
+        )
+
+
 def _script_main() -> int:
     """Wrap to main() for setuptools."""
     return main(*sys.argv[1:])
@@ -1225,34 +1271,32 @@ def _find_files() -> Generator[str, None, None]:
                 yield filename
 
     # closure to pass only relevant to the job filename
-    def _parse_file(filename: str) -> int:
-        return parse_file(
-            filename,
-            colors,
-            summary,
-            misspellings,
-            exclude_lines,
-            file_opener,
-            word_regex,
-            ignore_word_regex,
-            uri_regex,
-            uri_ignore_words,
-            context,
-            options,
-        )
+    file_parser = _FileParser(
+        colors,
+        summary,
+        misspellings,
+        exclude_lines,
+        file_opener,
+        word_regex,
+        ignore_word_regex,
+        uri_regex,
+        uri_ignore_words,
+        context,
+        options,
+    )
 
     njobs = os.cpu_count() or 1
     if njobs:
         # parse_file would be in subprocess(es)
         with Pool(njobs) as pool:
-            results = pool.map(_parse_file, _find_files())
+            results = pool.map(file_parser, _find_files())
             for result in results:
                 if isinstance(result, Exception):
                     raise result
             bad_count = sum(results)
     else:
         # serial
-        bad_count = sum(map(_parse_file, _find_files()))
+        bad_count = sum(map(file_parser, _find_files()))
 
     if summary:
         print("\n-------8<-------\nSUMMARY:")

From abebc4f162efda3a0151412cc3a40a083b22aa15 Mon Sep 17 00:00:00 2001
From: Yaroslav Halchenko <debian@onerussian.com>
Date: Fri, 22 Dec 2023 19:28:42 -0500
Subject: [PATCH 6/6] Provide CLI option -J|--jobs to control number of jobs

---
 codespell_lib/_codespell.py | 40 +++++++++++++++++++++++++++++++++----
 1 file changed, 36 insertions(+), 4 deletions(-)

diff --git a/codespell_lib/_codespell.py b/codespell_lib/_codespell.py
index 0fe7275244..cc14fcc22c 100644
--- a/codespell_lib/_codespell.py
+++ b/codespell_lib/_codespell.py
@@ -467,6 +467,20 @@ def parse_options(
         "should match the to-be-excluded lines exactly",
     )
 
+    parser.add_argument(
+        "-J",
+        "--jobs",
+        action="store",
+        type=int,
+        default=0,
+        help="set number of jobs to parallelize processing - one "
+        "subprocess per file:\n"
+        "- 0: no parallelization (default)"
+        "- positive integer: number of sub-processes to use\n"
+        "- -1: use all available CPUs\n"
+        "Interactive mode is not compatible with parallel processing",
+    )
+
     parser.add_argument(
         "-i",
         "--interactive",
@@ -1084,7 +1098,7 @@ def _script_main() -> int:
     return main(*sys.argv[1:])
 
 
-def main(*args: str) -> int:  # noqa: C901,PLR0915
+def main(*args: str) -> int:  # noqa: C901,PLR0915,PLR0911
     """Contains flow control"""
     try:
         options, parser, used_cfg_files = parse_options(args)
@@ -1196,6 +1210,25 @@ def main(*args: str) -> int:  # noqa: C901,PLR0915
     else:
         summary = None
 
+    if options.jobs and options.interactive:
+        print(
+            "ERROR: do not enable parallelization in interactive mode",
+            file=sys.stderr,
+        )
+        # no point to parser.print_help() - just hides ERROR away here
+        return EX_USAGE
+
+    jobs = options.jobs
+    if jobs == -1:
+        jobs = os.cpu_count()
+    elif jobs < -1:
+        print(
+            f"ERROR: invalid number of jobs: {jobs}",
+            file=sys.stderr,
+        )
+        parser.print_help()
+        return EX_USAGE
+
     context = None
     if options.context is not None:
         if (options.before_context is not None) or (options.after_context is not None):
@@ -1285,10 +1318,9 @@ def _find_files() -> Generator[str, None, None]:
         options,
     )
 
-    njobs = os.cpu_count() or 1
-    if njobs:
+    if jobs:
         # parse_file would be in subprocess(es)
-        with Pool(njobs) as pool:
+        with Pool(jobs) as pool:
             results = pool.map(file_parser, _find_files())
             for result in results:
                 if isinstance(result, Exception):