From 8d07e24f0699c355a477206b554900693a611dc0 Mon Sep 17 00:00:00 2001 From: dan mackinlay Date: Wed, 17 Sep 2025 06:34:38 +0800 Subject: [PATCH 1/4] support --since flag for restricting ourselves to files changed since a specific git revision --- README.md | 24 ++++ files_to_prompt/cli.py | 120 +++++++++++++++++ tests/test_files_to_prompt.py | 241 +++++++++++++++++++++++++++++++++- 3 files changed, 378 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 06e1dad..a22ec80 100644 --- a/README.md +++ b/README.md @@ -191,6 +191,30 @@ You can mix and match paths from command line arguments and stdin: find . -mtime -1 | files-to-prompt README.md ``` +### Selecting files changed since a Git revision + +Use `--since REF` to include only files that have changed since a given Git revision +(commit SHA, tag, or branch). + +Includes: +- Tracked changes between `REF` and your current working tree. +- Untracked files (Git's ignore rules always apply). + +Notes: +- Deleted files are not included. +- Paths are repo-root-relative in this mode. +- `--ignore-gitignore` is ignored (a warning is printed). + +Other filters (`-e/--extension`, `--ignore`, `--ignore-files-only`, `--include-hidden`) still apply. +Passing paths (args or stdin) further restricts the set. + +Examples: +```bash +files-to-prompt --since HEAD +files-to-prompt --since abc123 -e py +files-to-prompt --since main src tests +``` + ### Claude XML Output Anthropic has provided [specific guidelines](https://docs.anthropic.com/claude/docs/long-context-window-tips) for optimally structuring prompts to take advantage of Claude's extended context window. diff --git a/files_to_prompt/cli.py b/files_to_prompt/cli.py index 7eee04f..5b801e9 100644 --- a/files_to_prompt/cli.py +++ b/files_to_prompt/cli.py @@ -1,11 +1,53 @@ import os import sys +import subprocess from fnmatch import fnmatch import click global_index = 1 + +def _run_git(args, cwd): + try: + proc = subprocess.run( + ["git", *args], + cwd=cwd, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + check=True, + ) + return proc.stdout.strip() + except FileNotFoundError: + raise click.ClickException("git not found on PATH; --since requires Git") + except subprocess.CalledProcessError as e: + raise click.ClickException(e.stderr.strip() or e.stdout.strip() or str(e)) + + +def _git_repo_root(): + inside = _run_git(["rev-parse", "--is-inside-work-tree"], os.getcwd()) + if inside != "true": + raise click.ClickException( + "The --since option requires running inside a Git repository" + ) + return _run_git(["rev-parse", "--show-toplevel"], os.getcwd()) + + +def _git_changed_paths_since(ref, repo_root): + # tracked changes + diff_out = _run_git(["diff", "--name-only", ref, "--"], repo_root) + tracked = set(line for line in diff_out.splitlines() if line) + + # untracked (respect ignore rules always) + untracked_out = _run_git(["ls-files", "--others", "--exclude-standard"], repo_root) + untracked = set(line for line in untracked_out.splitlines() if line) + + candidates = tracked | untracked + # only existing files + return {p for p in candidates if os.path.isfile(os.path.join(repo_root, p))} + + EXT_TO_LANG = { "py": "python", "c": "c", @@ -244,6 +286,16 @@ def read_paths_from_stdin(use_null_separator): is_flag=True, help="Use NUL character as separator when reading from stdin", ) +@click.option( + "since_ref", + "--since", + metavar="REF", + help=( + "Only include files changed since this Git revision (commit/tag/branch). " + "Paths given on the command line (or stdin) further restrict results. " + "In this mode, --ignore-gitignore is ignored." + ), +) @click.version_option() def cli( paths, @@ -257,6 +309,7 @@ def cli( markdown, line_numbers, null, + since_ref, ): """ Takes one or more paths to files or directories and outputs every file, @@ -302,6 +355,73 @@ def cli( # Combine paths from arguments and stdin paths = [*paths, *stdin_paths] + # Handle --since specially + if since_ref: + if ignore_gitignore: + click.echo( + click.style( + "--ignore-gitignore is ignored with --since; Git's ignore rules always apply.", + fg="yellow", + ), + err=True, + ) + repo_root = _git_repo_root() + changed = _git_changed_paths_since(since_ref, repo_root) + + # restrict to explicit paths if given + if paths: + abs_paths = [os.path.abspath(p) for p in paths] + changed = { + rel + for rel in changed + if any(os.path.join(repo_root, rel).startswith(ap) for ap in abs_paths) + } + + # apply include_hidden / ignore / extensions filters + rels = [] + for rel in sorted(changed): + if not include_hidden and any( + part.startswith(".") for part in rel.split(os.sep) + ): + continue + if extensions and not rel.endswith(tuple(extensions)): + continue + if ignore_patterns: + base = os.path.basename(rel) + if ignore_files_only: + if any(fnmatch(base, pat) for pat in ignore_patterns): + continue + else: + parts = rel.split(os.sep) + if any(fnmatch(p, pat) for pat in ignore_patterns for p in parts): + continue + rels.append(rel) + + writer = click.echo + fp = None + if output_file: + fp = open(output_file, "w", encoding="utf-8") + writer = lambda s: print(s, file=fp) + if claude_xml: + writer("") + for rel in rels: + abs_fp = os.path.join(repo_root, rel) + try: + with open(abs_fp, "r") as f: + print_path( + writer, rel, f.read(), claude_xml, markdown, line_numbers + ) + except UnicodeDecodeError: + warning_message = ( + f"Warning: Skipping file {rel} due to UnicodeDecodeError" + ) + click.echo(click.style(warning_message, fg="red"), err=True) + if claude_xml: + writer("") + if fp: + fp.close() + return + gitignore_rules = [] writer = click.echo fp = None diff --git a/tests/test_files_to_prompt.py b/tests/test_files_to_prompt.py index 5268995..ab01f04 100644 --- a/tests/test_files_to_prompt.py +++ b/tests/test_files_to_prompt.py @@ -1,12 +1,24 @@ import os import pytest import re +import subprocess from click.testing import CliRunner from files_to_prompt.cli import cli +def _git(cmd, cwd): + return subprocess.run( + ["git", *cmd], + cwd=cwd, + check=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) + + def filenames_from_cxml(cxml_string): "Return set of filenames from ... tags" return set(re.findall(r"(.*?)", cxml_string)) @@ -235,7 +247,7 @@ def test_mixed_paths_with_options(tmpdir): def test_binary_file_warning(tmpdir): - runner = CliRunner(mix_stderr=False) + runner = CliRunner() with tmpdir.as_cwd(): os.makedirs("test_dir") with open("test_dir/binary_file.bin", "wb") as f: @@ -246,15 +258,17 @@ def test_binary_file_warning(tmpdir): result = runner.invoke(cli, ["test_dir"]) assert result.exit_code == 0 - stdout = result.stdout - stderr = result.stderr + # Check output and stderr (may be combined) + output = result.output or "" + stderr = getattr(result, "stderr", "") or "" + combined = output + stderr - assert "test_dir/text_file.txt" in stdout - assert "This is a text file" in stdout - assert "\ntest_dir/binary_file.bin" not in stdout + assert "test_dir/text_file.txt" in output + assert "This is a text file" in output + assert "\ntest_dir/binary_file.bin" not in output assert ( "Warning: Skipping file test_dir/binary_file.bin due to UnicodeDecodeError" - in stderr + in combined ) @@ -439,3 +453,216 @@ def test_markdown(tmpdir, option): "`````\n" ) assert expected.strip() == actual.strip() + + +def test_since_requires_git_repo(tmpdir): + runner = CliRunner() + with tmpdir.as_cwd(): + # Not a repo + result = runner.invoke(cli, ["--since", "HEAD"]) + assert result.exit_code != 0 + # either output or stderr can contain the message depending on click version + combined = (result.output or "") + (getattr(result, "stderr", "") or "") + assert "not a git repository" in combined + + +def test_since_head_changes_and_untracked(tmpdir): + runner = CliRunner() + with tmpdir.as_cwd(): + _git(["init", "."], os.getcwd()) + # base commit + with open("a.txt", "w") as f: + f.write("v1\n") + _git(["add", "a.txt"], os.getcwd()) + _git( + [ + "-c", + "user.name=T", + "-c", + "user.email=t@example.com", + "commit", + "-m", + "base", + ], + os.getcwd(), + ) + + # modify tracked + add untracked + with open("a.txt", "w") as f: + f.write("v2\n") + with open("b.txt", "w") as f: + f.write("new file\n") + + result = runner.invoke(cli, ["--since", "HEAD"]) + assert result.exit_code == 0 + # repo-root-relative paths + assert "a.txt" in result.output + assert "b.txt" in result.output + assert "v2" in result.output + assert "new file" in result.output + + +def test_since_respects_filters(tmpdir): + runner = CliRunner() + with tmpdir.as_cwd(): + _git(["init", "."], os.getcwd()) + + # Create .gitignore that ignores *.log (Git-level ignores) + with open(".gitignore", "w") as f: + f.write("*.log\n") + _git(["add", ".gitignore"], os.getcwd()) + _git( + [ + "-c", + "user.name=T", + "-c", + "user.email=t@example.com", + "commit", + "-m", + "gi", + ], + os.getcwd(), + ) + + # base commit + os.makedirs("pkg", exist_ok=True) + with open("pkg/keep.py", "w") as f: + f.write("base\n") + _git(["add", "pkg/keep.py"], os.getcwd()) + _git( + [ + "-c", + "user.name=T", + "-c", + "user.email=t@example.com", + "commit", + "-m", + "base", + ], + os.getcwd(), + ) + + # changes after base + with open("pkg/keep.py", "w") as f: + f.write("changed\n") + with open("note.md", "w") as f: + f.write("doc\n") + with open("tmp.log", "w") as f: + f.write("ignored by gitignore\n") + os.makedirs(".hidden_dir", exist_ok=True) + with open(".hidden_dir/x.txt", "w") as f: + f.write("hidden\n") + + # 1) extension filter: only md, hidden excluded by default, gitignored excluded by Git + result = runner.invoke(cli, ["--since", "HEAD", "-e", "md"]) + assert result.exit_code == 0 + out = result.output + assert "note.md" in out + assert "pkg/keep.py" not in out + assert "tmp.log" not in out + assert ".hidden_dir/x.txt" not in out + + # 2) include hidden: now hidden file appears, but gitignored (*.log) still excluded + result2 = runner.invoke(cli, ["--since", "HEAD", "--include-hidden"]) + assert result2.exit_code == 0 + out2 = result2.output + assert "pkg/keep.py" in out2 + assert "note.md" in out2 + assert ".hidden_dir/x.txt" in out2 + assert "tmp.log" not in out2 # still excluded by Git ignore rules + + # 3) tool-level ignore patterns: ignore *.md at the tool layer + result3 = runner.invoke(cli, ["--since", "HEAD", "--ignore", "*.md"]) + assert result3.exit_code == 0 + out3 = result3.output + assert "note.md" not in out3 + assert "pkg/keep.py" in out3 + + +def test_since_ignores_ignore_gitignore_flag_with_warning(tmpdir): + runner = CliRunner() + with tmpdir.as_cwd(): + _git(["init", "."], os.getcwd()) + with open(".gitignore", "w") as f: + f.write("*.log\n") + _git(["add", ".gitignore"], os.getcwd()) + _git( + [ + "-c", + "user.name=T", + "-c", + "user.email=t@example.com", + "commit", + "-m", + "gi", + ], + os.getcwd(), + ) + + # base commit + with open("a.py", "w") as f: + f.write("base\n") + _git(["add", "a.py"], os.getcwd()) + _git( + [ + "-c", + "user.name=T", + "-c", + "user.email=t@example.com", + "commit", + "-m", + "base", + ], + os.getcwd(), + ) + + # change tracked; create ignored untracked + with open("a.py", "w") as f: + f.write("changed\n") + with open("build.log", "w") as f: + f.write("ignored by git\n") + + result = runner.invoke(cli, ["--since", "HEAD", "--ignore-gitignore"]) + # Should warn and still exclude the .log file + # Check both output and stderr for the warning + combined = (result.output or "") + (getattr(result, "stderr", "") or "") + assert "ignored with --since" in combined + assert "a.py" in result.output + assert "build.log" not in result.output + + +def test_since_respects_path_restrictions(tmpdir): + runner = CliRunner() + with tmpdir.as_cwd(): + _git(["init", "."], os.getcwd()) + + os.makedirs("src", exist_ok=True) + os.makedirs("tests", exist_ok=True) + + with open("src/a.py", "w") as f: + f.write("v1\n") + _git(["add", "src/a.py"], os.getcwd()) + _git( + [ + "-c", + "user.name=T", + "-c", + "user.email=t@example.com", + "commit", + "-m", + "base", + ], + os.getcwd(), + ) + + with open("src/a.py", "w") as f: + f.write("v2\n") + with open("tests/t_test.py", "w") as f: + f.write("new\n") + + # restrict to src/ only + result = runner.invoke(cli, ["--since", "HEAD", "src"]) + assert result.exit_code == 0 + out = result.output + assert "src/a.py" in out + assert "tests/t_test.py" not in out From 266ecfc4b79f29e593b1bc2939c0a61ed52a9315 Mon Sep 17 00:00:00 2001 From: dan mackinlay Date: Wed, 17 Sep 2025 07:03:15 +0800 Subject: [PATCH 2/4] support `--since-scope` --- README.md | 34 ++- files_to_prompt/cli.py | 88 +++++-- tests/test_files_to_prompt.py | 423 +++++++++++++++++++++++++++++++++- 3 files changed, 512 insertions(+), 33 deletions(-) diff --git a/README.md b/README.md index a22ec80..c91b138 100644 --- a/README.md +++ b/README.md @@ -196,25 +196,41 @@ find . -mtime -1 | files-to-prompt README.md Use `--since REF` to include only files that have changed since a given Git revision (commit SHA, tag, or branch). -Includes: -- Tracked changes between `REF` and your current working tree. -- Untracked files (Git's ignore rules always apply). - -Notes: -- Deleted files are not included. -- Paths are repo-root-relative in this mode. -- `--ignore-gitignore` is ignored (a warning is printed). - Other filters (`-e/--extension`, `--ignore`, `--ignore-files-only`, `--include-hidden`) still apply. Passing paths (args or stdin) further restricts the set. +#### Controlling what "changed since REF" means + +`--since REF` accepts an optional scope with `--since-scope`: + +- **working** (default): commits after `REF` + staged changes + unstaged changes, + plus untracked files (Git ignores respected). +- **committed**: only commits after `REF` (no staged/unstaged/untracked). +- **staged**: only files staged in the index relative to `REF` (no unstaged/untracked). + Examples: ```bash +# Default: everything in your working copy since REF files-to-prompt --since HEAD + +# Only what's been committed after REF +files-to-prompt --since v1.2.0 --since-scope committed + +# Only what you've staged to commit (index vs REF) +files-to-prompt --since HEAD --since-scope staged + +# Other examples with filters files-to-prompt --since abc123 -e py files-to-prompt --since main src tests ``` +Notes: +- Paths are repo-root-relative in `--since` mode. +- Deleted files are not emitted. +- Untracked files are included **only** with `--since-scope working`. +- `--ignore-gitignore` is ignored with `--since` (a warning is printed); Git's ignore rules are always used for untracked detection. +- `-e/--extension`, `--ignore`, `--ignore-files-only`, `--include-hidden`, and path arguments/stdin still apply after the changed set is computed. + ### Claude XML Output Anthropic has provided [specific guidelines](https://docs.anthropic.com/claude/docs/long-context-window-tips) for optimally structuring prompts to take advantage of Claude's extended context window. diff --git a/files_to_prompt/cli.py b/files_to_prompt/cli.py index 5b801e9..3603c30 100644 --- a/files_to_prompt/cli.py +++ b/files_to_prompt/cli.py @@ -34,14 +34,36 @@ def _git_repo_root(): return _run_git(["rev-parse", "--show-toplevel"], os.getcwd()) -def _git_changed_paths_since(ref, repo_root): - # tracked changes - diff_out = _run_git(["diff", "--name-only", ref, "--"], repo_root) - tracked = set(line for line in diff_out.splitlines() if line) - - # untracked (respect ignore rules always) - untracked_out = _run_git(["ls-files", "--others", "--exclude-standard"], repo_root) - untracked = set(line for line in untracked_out.splitlines() if line) +def _validate_git_ref(ref, repo_root): + try: + _run_git(["rev-parse", "--quiet", "--verify", f"{ref}^{{commit}}"], repo_root) + except click.ClickException as e: + raise click.ClickException(f"Invalid Git revision '{ref}'.") + + +def _git_changed_paths_since(ref, repo_root, scope="working"): + tracked = set() + + if scope == "working": + # tracked changes between REF and working tree + diff_out = _run_git(["diff", "--name-only", ref, "--"], repo_root) + tracked = set(line for line in diff_out.splitlines() if line) + elif scope == "committed": + # only commits after REF + diff_out = _run_git(["diff", "--name-only", f"{ref}..HEAD", "--"], repo_root) + tracked = set(line for line in diff_out.splitlines() if line) + elif scope == "staged": + # only what's staged in index relative to REF + diff_out = _run_git(["diff", "--name-only", "--cached", ref, "--"], repo_root) + tracked = set(line for line in diff_out.splitlines() if line) + + # untracked files only for working scope + untracked = set() + if scope == "working": + untracked_out = _run_git( + ["ls-files", "--others", "--exclude-standard"], repo_root + ) + untracked = set(line for line in untracked_out.splitlines() if line) candidates = tracked | untracked # only existing files @@ -296,6 +318,14 @@ def read_paths_from_stdin(use_null_separator): "In this mode, --ignore-gitignore is ignored." ), ) +@click.option( + "since_scope", + "--since-scope", + type=click.Choice(["working", "committed", "staged"]), + default="working", + show_default=True, + help="Which changes to include relative to REF: 'working' = commits after REF + staged + unstaged + untracked; 'committed' = commits after REF; 'staged' = index vs REF.", +) @click.version_option() def cli( paths, @@ -310,6 +340,7 @@ def cli( line_numbers, null, since_ref, + since_scope, ): """ Takes one or more paths to files or directories and outputs every file, @@ -366,7 +397,8 @@ def cli( err=True, ) repo_root = _git_repo_root() - changed = _git_changed_paths_since(since_ref, repo_root) + _validate_git_ref(since_ref, repo_root) + changed = _git_changed_paths_since(since_ref, repo_root, since_scope) # restrict to explicit paths if given if paths: @@ -374,26 +406,32 @@ def cli( changed = { rel for rel in changed - if any(os.path.join(repo_root, rel).startswith(ap) for ap in abs_paths) + if any( + os.path.commonpath([os.path.join(repo_root, rel), ap]) == ap + for ap in abs_paths + ) } # apply include_hidden / ignore / extensions filters rels = [] for rel in sorted(changed): + rel_norm = os.path.normpath(rel.replace("/", os.sep)) if not include_hidden and any( - part.startswith(".") for part in rel.split(os.sep) + part.startswith(".") for part in rel_norm.split(os.sep) ): continue if extensions and not rel.endswith(tuple(extensions)): continue if ignore_patterns: - base = os.path.basename(rel) + base = os.path.basename(rel_norm) if ignore_files_only: if any(fnmatch(base, pat) for pat in ignore_patterns): continue else: - parts = rel.split(os.sep) - if any(fnmatch(p, pat) for pat in ignore_patterns for p in parts): + parts = rel_norm.split(os.sep) + if any( + fnmatch(part, pat) for part in parts for pat in ignore_patterns + ): continue rels.append(rel) @@ -405,15 +443,23 @@ def cli( if claude_xml: writer("") for rel in rels: - abs_fp = os.path.join(repo_root, rel) try: - with open(abs_fp, "r") as f: - print_path( - writer, rel, f.read(), claude_xml, markdown, line_numbers - ) - except UnicodeDecodeError: + # For staged scope, read content from index; otherwise from working tree + if since_scope == "staged": + content = _run_git(["show", f":{rel}"], repo_root) + else: + abs_fp = os.path.join(repo_root, rel) + with open(abs_fp, "r") as f: + content = f.read() + + print_path(writer, rel, content, claude_xml, markdown, line_numbers) + except ( + UnicodeDecodeError, + subprocess.CalledProcessError, + click.ClickException, + ): warning_message = ( - f"Warning: Skipping file {rel} due to UnicodeDecodeError" + f"Warning: Skipping file {rel} due to Unicode/Git error" ) click.echo(click.style(warning_message, fg="red"), err=True) if claude_xml: diff --git a/tests/test_files_to_prompt.py b/tests/test_files_to_prompt.py index ab01f04..2cc4bb4 100644 --- a/tests/test_files_to_prompt.py +++ b/tests/test_files_to_prompt.py @@ -368,7 +368,6 @@ def test_line_numbers(tmpdir): @pytest.mark.parametrize( "input,extra_args", ( - ("test_dir1/file1.txt\ntest_dir2/file2.txt", []), ("test_dir1/file1.txt\ntest_dir2/file2.txt", []), ("test_dir1/file1.txt\0test_dir2/file2.txt", ["--null"]), ("test_dir1/file1.txt\0test_dir2/file2.txt", ["-0"]), @@ -463,7 +462,10 @@ def test_since_requires_git_repo(tmpdir): assert result.exit_code != 0 # either output or stderr can contain the message depending on click version combined = (result.output or "") + (getattr(result, "stderr", "") or "") - assert "not a git repository" in combined + assert ( + "requires running inside a Git repository" in combined + or "not a git repository" in combined + ) def test_since_head_changes_and_untracked(tmpdir): @@ -624,7 +626,7 @@ def test_since_ignores_ignore_gitignore_flag_with_warning(tmpdir): result = runner.invoke(cli, ["--since", "HEAD", "--ignore-gitignore"]) # Should warn and still exclude the .log file - # Check both output and stderr for the warning + # Check both output and stderr for the warning (click versions vary) combined = (result.output or "") + (getattr(result, "stderr", "") or "") assert "ignored with --since" in combined assert "a.py" in result.output @@ -666,3 +668,418 @@ def test_since_respects_path_restrictions(tmpdir): out = result.output assert "src/a.py" in out assert "tests/t_test.py" not in out + + +def test_since_scope_committed(tmpdir): + runner = CliRunner() + with tmpdir.as_cwd(): + _git(["init", "."], os.getcwd()) + + # Create initial commit + with open("a.py", "w") as f: + f.write("initial\n") + _git(["add", "a.py"], os.getcwd()) + _git( + [ + "-c", + "user.name=T", + "-c", + "user.email=t@example.com", + "commit", + "-m", + "initial", + ], + os.getcwd(), + ) + + # Create base commit after which we want to see changes + with open("b.py", "w") as f: + f.write("base\n") + _git(["add", "b.py"], os.getcwd()) + _git( + [ + "-c", + "user.name=T", + "-c", + "user.email=t@example.com", + "commit", + "-m", + "base", + ], + os.getcwd(), + ) + + # Get the base commit hash + base_commit = _git(["rev-parse", "HEAD"], os.getcwd()).stdout.strip() + + # Make a committed change after base + with open("c.py", "w") as f: + f.write("committed\n") + _git(["add", "c.py"], os.getcwd()) + _git( + [ + "-c", + "user.name=T", + "-c", + "user.email=t@example.com", + "commit", + "-m", + "committed", + ], + os.getcwd(), + ) + + # Make staged and unstaged changes + untracked file + with open("d.py", "w") as f: + f.write("staged\n") + _git(["add", "d.py"], os.getcwd()) # staged + + with open("e.py", "w") as f: + f.write("unstaged\n") # unstaged + + with open("f.py", "w") as f: + f.write("untracked\n") # untracked + + # Test committed scope: should only show c.py (committed after base) + result = runner.invoke( + cli, ["--since", base_commit, "--since-scope", "committed"] + ) + assert result.exit_code == 0 + out = result.output + assert "c.py" in out + assert "committed" in out + assert "d.py" not in out # staged, not committed + assert "e.py" not in out # unstaged + assert "f.py" not in out # untracked + + +def test_since_scope_staged(tmpdir): + runner = CliRunner() + with tmpdir.as_cwd(): + _git(["init", "."], os.getcwd()) + + # Create base commit + with open("a.py", "w") as f: + f.write("base\n") + _git(["add", "a.py"], os.getcwd()) + _git( + [ + "-c", + "user.name=T", + "-c", + "user.email=t@example.com", + "commit", + "-m", + "base", + ], + os.getcwd(), + ) + + # Stage a change + with open("a.py", "w") as f: + f.write("staged change\n") + _git(["add", "a.py"], os.getcwd()) + + # Make an unstaged change on top + with open("a.py", "w") as f: + f.write("staged change\nplus unstaged\n") + + # Add untracked file + with open("untracked.py", "w") as f: + f.write("untracked\n") + + # Test staged scope: should only show staged changes + result = runner.invoke(cli, ["--since", "HEAD", "--since-scope", "staged"]) + assert result.exit_code == 0 + out = result.output + assert "a.py" in out + assert "staged change" in out + assert "plus unstaged" not in out # unstaged part not included + assert "untracked.py" not in out # untracked not included + + +def test_since_scope_working_includes_all(tmpdir): + runner = CliRunner() + with tmpdir.as_cwd(): + _git(["init", "."], os.getcwd()) + + # Create base commit + with open("a.py", "w") as f: + f.write("base\n") + _git(["add", "a.py"], os.getcwd()) + _git( + [ + "-c", + "user.name=T", + "-c", + "user.email=t@example.com", + "commit", + "-m", + "base", + ], + os.getcwd(), + ) + + # Modify tracked file (unstaged) + with open("a.py", "w") as f: + f.write("modified\n") + + # Stage a new file + with open("staged.py", "w") as f: + f.write("staged content\n") + _git(["add", "staged.py"], os.getcwd()) + + # Add untracked file + with open("untracked.py", "w") as f: + f.write("untracked content\n") + + # Test working scope (default): should include all + result = runner.invoke(cli, ["--since", "HEAD", "--since-scope", "working"]) + assert result.exit_code == 0 + out = result.output + assert "a.py" in out + assert "modified" in out + assert "staged.py" in out + assert "staged content" in out + assert "untracked.py" in out + assert "untracked content" in out + + +def test_since_scope_default_is_working(tmpdir): + runner = CliRunner() + with tmpdir.as_cwd(): + _git(["init", "."], os.getcwd()) + + # Create base commit + with open("a.py", "w") as f: + f.write("base\n") + _git(["add", "a.py"], os.getcwd()) + _git( + [ + "-c", + "user.name=T", + "-c", + "user.email=t@example.com", + "commit", + "-m", + "base", + ], + os.getcwd(), + ) + + # Add untracked file + with open("untracked.py", "w") as f: + f.write("untracked\n") + + # Test that default behavior includes untracked (working scope) + result = runner.invoke(cli, ["--since", "HEAD"]) + assert result.exit_code == 0 + out = result.output + assert "untracked.py" in out + assert "untracked" in out + + +def test_since_scope_committed_excludes_staged_unstaged_untracked(tmpdir): + runner = CliRunner() + with tmpdir.as_cwd(): + _git(["init", "."], os.getcwd()) + # base commit (REF will be this HEAD) + with open("base.txt", "w") as f: + f.write("base\n") + _git(["add", "base.txt"], os.getcwd()) + _git( + [ + "-c", + "user.name=T", + "-c", + "user.email=t@example.com", + "commit", + "-m", + "base", + ], + os.getcwd(), + ) + + # create committed change after REF + with open("committed_only.py", "w") as f: + f.write("c1\n") + _git(["add", "committed_only.py"], os.getcwd()) + _git( + [ + "-c", + "user.name=T", + "-c", + "user.email=t@example.com", + "commit", + "-m", + "commit after ref", + ], + os.getcwd(), + ) + + # also create staged, unstaged, and untracked noise + with open("mix.txt", "w") as f: + f.write("v1\n") + _git(["add", "mix.txt"], os.getcwd()) # staged + with open("unstaged.txt", "w") as f: + f.write("u1\n") # unstaged (tracked only if added later—so keep untracked) + with open("untracked.md", "w") as f: + f.write("new\n") # untracked + + # since-scope=committed should ONLY show committed_only.py + result = runner.invoke(cli, ["--since", "HEAD~1", "--since-scope", "committed"]) + assert result.exit_code == 0 + out = result.output + assert "committed_only.py" in out + assert "mix.txt" not in out + assert "unstaged.txt" not in out + assert "untracked.md" not in out + + +def test_since_scope_staged_includes_only_staged(tmpdir): + runner = CliRunner() + with tmpdir.as_cwd(): + _git(["init", "."], os.getcwd()) + # base commit + with open("a.txt", "w") as f: + f.write("v1\n") + _git(["add", "a.txt"], os.getcwd()) + _git( + [ + "-c", + "user.name=T", + "-c", + "user.email=t@example.com", + "commit", + "-m", + "base", + ], + os.getcwd(), + ) + + # staged change to a.txt + with open("a.txt", "w") as f: + f.write("v2\n") + _git(["add", "a.txt"], os.getcwd()) + + # unstaged change on top of staged (same file) — remains staged at v2, working tree at v3 + with open("a.txt", "w") as f: + f.write("v3\n") + + # untracked file + with open("note.md", "w") as f: + f.write("doc\n") + + # since-scope=staged: only a.txt should show up + result = runner.invoke(cli, ["--since", "HEAD", "--since-scope", "staged"]) + assert result.exit_code == 0 + out = result.output + assert "a.txt" in out + assert "note.md" not in out + # No other files + assert "base.txt" not in out + + +def test_since_scope_working_includes_staged_unstaged_untracked(tmpdir): + runner = CliRunner() + with tmpdir.as_cwd(): + _git(["init", "."], os.getcwd()) + # base commit + with open("tracked.py", "w") as f: + f.write("t1\n") + _git(["add", "tracked.py"], os.getcwd()) + _git( + [ + "-c", + "user.name=T", + "-c", + "user.email=t@example.com", + "commit", + "-m", + "base", + ], + os.getcwd(), + ) + + # staged change to tracked.py + with open("tracked.py", "w") as f: + f.write("t2\n") + _git(["add", "tracked.py"], os.getcwd()) + + # unstaged change to another tracked file (create + commit, then modify without staging) + with open("other.py", "w") as f: + f.write("o1\n") + _git(["add", "other.py"], os.getcwd()) + _git( + [ + "-c", + "user.name=T", + "-c", + "user.email=t@example.com", + "commit", + "-m", + "introduce other", + ], + os.getcwd(), + ) + with open("other.py", "w") as f: + f.write("o2\n") # unstaged change + + # untracked file + with open("readme.md", "w") as f: + f.write("u\n") + + # Default (working): should include staged (tracked.py), unstaged (other.py), and untracked (readme.md) + result = runner.invoke(cli, ["--since", "HEAD~1"]) + assert result.exit_code == 0 + out = result.output + assert "tracked.py" in out + assert "other.py" in out + assert "readme.md" in out + + +def test_since_scope_default_equals_working(tmpdir): + runner = CliRunner() + with tmpdir.as_cwd(): + _git(["init", "."], os.getcwd()) + # base commit + with open("base.txt", "w") as f: + f.write("b1\n") + _git(["add", "base.txt"], os.getcwd()) + _git( + [ + "-c", + "user.name=T", + "-c", + "user.email=t@example.com", + "commit", + "-m", + "base", + ], + os.getcwd(), + ) + + # Make one staged, one unstaged, one untracked + with open("staged.py", "w") as f: + f.write("s1\n") + _git(["add", "staged.py"], os.getcwd()) + + with open("unstaged.py", "w") as f: + f.write("u1\n") # untracked (we won't add) + # also tweak base.txt unstaged + with open("base.txt", "w") as f: + f.write("b2\n") + + # run twice: default & explicit working + res_default = runner.invoke(cli, ["--since", "HEAD"]) + res_working = runner.invoke( + cli, ["--since", "HEAD", "--since-scope", "working"] + ) + assert res_default.exit_code == 0 + assert res_working.exit_code == 0 + + # Both should contain staged.py, base.txt (unstaged now differs), and unstaged.py (untracked) + for out in (res_default.output, res_working.output): + assert "staged.py" in out + assert "base.txt" in out + assert "unstaged.py" in out From 636531a9efdde9abf7b87508f5e931a7e347eb8d Mon Sep 17 00:00:00 2001 From: dan mackinlay Date: Wed, 17 Sep 2025 07:12:58 +0800 Subject: [PATCH 3/4] credit myself --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 9cf07cb..38424ad 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,7 +3,7 @@ name = "files-to-prompt" version = "0.6" description = "Concatenate a directory full of files into a single prompt for use with LLMs" readme = "README.md" -authors = [{name = "Simon Willison"}] +authors = [{name = "Simon Willison"}, {name = "Dan MacKinlay"}] license = {text = "Apache-2.0"} requires-python = ">=3.8" classifiers = [ From 530d967e93410f00a8db33355bd0faa946c7f016 Mon Sep 17 00:00:00 2001 From: dan mackinlay Date: Wed, 17 Sep 2025 07:26:44 +0800 Subject: [PATCH 4/4] match existing docs style when documenting `--since`, `--since-scope` --- README.md | 63 +++++++++++++++++++++---------------------------------- 1 file changed, 24 insertions(+), 39 deletions(-) diff --git a/README.md b/README.md index c91b138..b449f36 100644 --- a/README.md +++ b/README.md @@ -98,6 +98,29 @@ This will output the contents of every file, with each file preceded by its rela find . -name "*.py" -print0 | files-to-prompt --null ``` +- `--since `: Only include files changed since a Git revision (commit SHA, tag, or branch). + + * Default scope is **working**: commits after `REF` + staged + unstaged + untracked (Git ignores respected). + * Paths given on the command line (or via stdin) further restrict the set. + * In this mode, `--ignore-gitignore` is ignored. + + ```bash + files-to-prompt --since HEAD + files-to-prompt --since v1.2.0 -e py + files-to-prompt --since main src tests + ``` + +- `--since-scope `: Control what "changed since REF" means (default: `working`). + + * `working`: commits after `REF` + staged + unstaged + untracked + * `committed`: only commits after `REF` (no staged/unstaged/untracked) + * `staged`: only index vs `REF` (no unstaged/untracked) + + ```bash + files-to-prompt --since v1.2.0 --since-scope committed + files-to-prompt --since HEAD --since-scope staged + ``` + ### Example Suppose you have a directory structure like this: @@ -191,45 +214,7 @@ You can mix and match paths from command line arguments and stdin: find . -mtime -1 | files-to-prompt README.md ``` -### Selecting files changed since a Git revision - -Use `--since REF` to include only files that have changed since a given Git revision -(commit SHA, tag, or branch). - -Other filters (`-e/--extension`, `--ignore`, `--ignore-files-only`, `--include-hidden`) still apply. -Passing paths (args or stdin) further restricts the set. - -#### Controlling what "changed since REF" means - -`--since REF` accepts an optional scope with `--since-scope`: - -- **working** (default): commits after `REF` + staged changes + unstaged changes, - plus untracked files (Git ignores respected). -- **committed**: only commits after `REF` (no staged/unstaged/untracked). -- **staged**: only files staged in the index relative to `REF` (no unstaged/untracked). - -Examples: -```bash -# Default: everything in your working copy since REF -files-to-prompt --since HEAD - -# Only what's been committed after REF -files-to-prompt --since v1.2.0 --since-scope committed - -# Only what you've staged to commit (index vs REF) -files-to-prompt --since HEAD --since-scope staged - -# Other examples with filters -files-to-prompt --since abc123 -e py -files-to-prompt --since main src tests -``` - -Notes: -- Paths are repo-root-relative in `--since` mode. -- Deleted files are not emitted. -- Untracked files are included **only** with `--since-scope working`. -- `--ignore-gitignore` is ignored with `--since` (a warning is printed); Git's ignore rules are always used for untracked detection. -- `-e/--extension`, `--ignore`, `--ignore-files-only`, `--include-hidden`, and path arguments/stdin still apply after the changed set is computed. +**Notes for `--since`**: paths are repo-root-relative; deleted files are not emitted. Untracked files are included only with `--since-scope working`. Other filters (`-e/--extension`, `--ignore`, `--ignore-files-only`, `--include-hidden`) still apply. ### Claude XML Output