Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,29 @@ This will output the contents of every file, with each file preceded by its rela
find . -name "*.py" -print0 | files-to-prompt --null
```

- `--since <REF>`: Only include files changed since a Git revision (commit SHA, tag, or branch).

* Default scope is **working**: commits after `REF` + staged + unstaged + untracked (Git ignores respected).
* Paths given on the command line (or via stdin) further restrict the set.
* In this mode, `--ignore-gitignore` is ignored.

```bash
files-to-prompt --since HEAD
files-to-prompt --since v1.2.0 -e py
files-to-prompt --since main src tests
```

- `--since-scope <working|committed|staged>`: Control what "changed since REF" means (default: `working`).

* `working`: commits after `REF` + staged + unstaged + untracked
* `committed`: only commits after `REF` (no staged/unstaged/untracked)
* `staged`: only index vs `REF` (no unstaged/untracked)

```bash
files-to-prompt --since v1.2.0 --since-scope committed
files-to-prompt --since HEAD --since-scope staged
```

### Example

Suppose you have a directory structure like this:
Expand Down Expand Up @@ -191,6 +214,8 @@ You can mix and match paths from command line arguments and stdin:
find . -mtime -1 | files-to-prompt README.md
```

**Notes for `--since`**: paths are repo-root-relative; deleted files are not emitted. Untracked files are included only with `--since-scope working`. Other filters (`-e/--extension`, `--ignore`, `--ignore-files-only`, `--include-hidden`) still apply.

### Claude XML Output

Anthropic has provided [specific guidelines](https://docs.anthropic.com/claude/docs/long-context-window-tips) for optimally structuring prompts to take advantage of Claude's extended context window.
Expand Down
166 changes: 166 additions & 0 deletions files_to_prompt/cli.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,75 @@
import os
import sys
import subprocess
from fnmatch import fnmatch

import click

global_index = 1


def _run_git(args, cwd):
try:
proc = subprocess.run(
["git", *args],
cwd=cwd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
check=True,
)
return proc.stdout.strip()
except FileNotFoundError:
raise click.ClickException("git not found on PATH; --since requires Git")
except subprocess.CalledProcessError as e:
raise click.ClickException(e.stderr.strip() or e.stdout.strip() or str(e))


def _git_repo_root():
inside = _run_git(["rev-parse", "--is-inside-work-tree"], os.getcwd())
if inside != "true":
raise click.ClickException(
"The --since option requires running inside a Git repository"
)
return _run_git(["rev-parse", "--show-toplevel"], os.getcwd())


def _validate_git_ref(ref, repo_root):
try:
_run_git(["rev-parse", "--quiet", "--verify", f"{ref}^{{commit}}"], repo_root)
except click.ClickException as e:
raise click.ClickException(f"Invalid Git revision '{ref}'.")


def _git_changed_paths_since(ref, repo_root, scope="working"):
tracked = set()

if scope == "working":
# tracked changes between REF and working tree
diff_out = _run_git(["diff", "--name-only", ref, "--"], repo_root)
tracked = set(line for line in diff_out.splitlines() if line)
elif scope == "committed":
# only commits after REF
diff_out = _run_git(["diff", "--name-only", f"{ref}..HEAD", "--"], repo_root)
tracked = set(line for line in diff_out.splitlines() if line)
elif scope == "staged":
# only what's staged in index relative to REF
diff_out = _run_git(["diff", "--name-only", "--cached", ref, "--"], repo_root)
tracked = set(line for line in diff_out.splitlines() if line)

# untracked files only for working scope
untracked = set()
if scope == "working":
untracked_out = _run_git(
["ls-files", "--others", "--exclude-standard"], repo_root
)
untracked = set(line for line in untracked_out.splitlines() if line)

candidates = tracked | untracked
# only existing files
return {p for p in candidates if os.path.isfile(os.path.join(repo_root, p))}


EXT_TO_LANG = {
"py": "python",
"c": "c",
Expand Down Expand Up @@ -244,6 +308,24 @@ def read_paths_from_stdin(use_null_separator):
is_flag=True,
help="Use NUL character as separator when reading from stdin",
)
@click.option(
"since_ref",
"--since",
metavar="REF",
help=(
"Only include files changed since this Git revision (commit/tag/branch). "
"Paths given on the command line (or stdin) further restrict results. "
"In this mode, --ignore-gitignore is ignored."
),
)
@click.option(
"since_scope",
"--since-scope",
type=click.Choice(["working", "committed", "staged"]),
default="working",
show_default=True,
help="Which changes to include relative to REF: 'working' = commits after REF + staged + unstaged + untracked; 'committed' = commits after REF; 'staged' = index vs REF.",
)
@click.version_option()
def cli(
paths,
Expand All @@ -257,6 +339,8 @@ def cli(
markdown,
line_numbers,
null,
since_ref,
since_scope,
):
"""
Takes one or more paths to files or directories and outputs every file,
Expand Down Expand Up @@ -302,6 +386,88 @@ def cli(
# Combine paths from arguments and stdin
paths = [*paths, *stdin_paths]

# Handle --since specially
if since_ref:
if ignore_gitignore:
click.echo(
click.style(
"--ignore-gitignore is ignored with --since; Git's ignore rules always apply.",
fg="yellow",
),
err=True,
)
repo_root = _git_repo_root()
_validate_git_ref(since_ref, repo_root)
changed = _git_changed_paths_since(since_ref, repo_root, since_scope)

# restrict to explicit paths if given
if paths:
abs_paths = [os.path.abspath(p) for p in paths]
changed = {
rel
for rel in changed
if any(
os.path.commonpath([os.path.join(repo_root, rel), ap]) == ap
for ap in abs_paths
)
}

# apply include_hidden / ignore / extensions filters
rels = []
for rel in sorted(changed):
rel_norm = os.path.normpath(rel.replace("/", os.sep))
if not include_hidden and any(
part.startswith(".") for part in rel_norm.split(os.sep)
):
continue
if extensions and not rel.endswith(tuple(extensions)):
continue
if ignore_patterns:
base = os.path.basename(rel_norm)
if ignore_files_only:
if any(fnmatch(base, pat) for pat in ignore_patterns):
continue
else:
parts = rel_norm.split(os.sep)
if any(
fnmatch(part, pat) for part in parts for pat in ignore_patterns
):
continue
rels.append(rel)

writer = click.echo
fp = None
if output_file:
fp = open(output_file, "w", encoding="utf-8")
writer = lambda s: print(s, file=fp)
if claude_xml:
writer("<documents>")
for rel in rels:
try:
# For staged scope, read content from index; otherwise from working tree
if since_scope == "staged":
content = _run_git(["show", f":{rel}"], repo_root)
else:
abs_fp = os.path.join(repo_root, rel)
with open(abs_fp, "r") as f:
content = f.read()

print_path(writer, rel, content, claude_xml, markdown, line_numbers)
except (
UnicodeDecodeError,
subprocess.CalledProcessError,
click.ClickException,
):
warning_message = (
f"Warning: Skipping file {rel} due to Unicode/Git error"
)
click.echo(click.style(warning_message, fg="red"), err=True)
if claude_xml:
writer("</documents>")
if fp:
fp.close()
return

gitignore_rules = []
writer = click.echo
fp = None
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ name = "files-to-prompt"
version = "0.6"
description = "Concatenate a directory full of files into a single prompt for use with LLMs"
readme = "README.md"
authors = [{name = "Simon Willison"}]
authors = [{name = "Simon Willison"}, {name = "Dan MacKinlay"}]
license = {text = "Apache-2.0"}
requires-python = ">=3.8"
classifiers = [
Expand Down
Loading