diff --git a/.pre-commit-hooks.yaml b/.pre-commit-hooks.yaml index a98a1e57..fa385c6c 100644 --- a/.pre-commit-hooks.yaml +++ b/.pre-commit-hooks.yaml @@ -23,6 +23,13 @@ entry: check-builtin-literals language: python types: [python] +- id: catch-dotenv + name: catch dotenv files + description: blocks committing .env files. optionally creates value-sanitized .env.example. + entry: catch-dotenv + language: python + pass_filenames: true + always_run: false - id: check-case-conflict name: check for case conflicts description: checks for files that would conflict in case-insensitive filesystems. diff --git a/README.md b/README.md index 9ee16774..2a7c19c5 100644 --- a/README.md +++ b/README.md @@ -42,6 +42,12 @@ Require literal syntax when initializing empty or zero Python builtin types. - Ignore this requirement for specific builtin types with `--ignore=type1,type2,…`. - Forbid `dict` keyword syntax with `--no-allow-dict-kwargs`. +#### `catch-dotenv` +Prevents committing `.env` files to version control and optionally generates `.env.example` files. + - Use `--create-example` to generate a `.env.example` file with variable names but no values. + - Automatically adds `.env` to `.gitignore` if not already present. + - Helps prevent accidental exposure of secrets and sensitive configuration. + #### `check-case-conflict` Check for files with names that would conflict on a case-insensitive filesystem like MacOS HFS+ or Windows FAT. diff --git a/pre_commit_hooks/catch_dotenv.py b/pre_commit_hooks/catch_dotenv.py new file mode 100644 index 00000000..fedbe4c0 --- /dev/null +++ b/pre_commit_hooks/catch_dotenv.py @@ -0,0 +1,236 @@ +#!/usr/bin/env python +from __future__ import annotations + +import argparse +import os +import re +import sys +import tempfile +from collections.abc import Iterable +from collections.abc import Sequence + +# Defaults / constants +DEFAULT_ENV_FILE = '.env' +DEFAULT_GITIGNORE_FILE = '.gitignore' +DEFAULT_EXAMPLE_ENV_FILE = '.env.example' +GITIGNORE_BANNER = '# Added by pre-commit hook to prevent committing secrets' + +_KEY_REGEX = re.compile(r'^\s*(?:export\s+)?([A-Za-z_][A-Za-z0-9_]*)\s*=') + + +def _atomic_write(path: str, data: str) -> None: + """Atomically (best-effort) write text. + + Writes to a same-directory temporary file then replaces the target with + os.replace(). This is a slight divergence from most existing hooks which + write directly, but here we intentionally reduce the (small) risk of + partially-written files because the hook may be invoked rapidly / in + parallel (tests exercise concurrent normalization). Keeping this helper + local avoids adding any dependency. + """ + fd, tmp_path = tempfile.mkstemp(dir=os.path.dirname(path) or '.') + try: + with os.fdopen(fd, 'w', encoding='utf-8', newline='') as tmp_f: + tmp_f.write(data) + os.replace(tmp_path, path) + finally: # Clean up if replace failed + if os.path.exists(tmp_path): # (rare failure case) + try: + os.remove(tmp_path) + except OSError: + pass + + +def _read_gitignore(gitignore_file: str) -> tuple[str, list[str]]: + """Read and parse .gitignore file content.""" + try: + if os.path.exists(gitignore_file): + with open(gitignore_file, encoding='utf-8') as f: + original_text = f.read() + lines = original_text.splitlines() + else: + original_text = '' + lines = [] + except OSError as exc: + print( + f"ERROR: unable to read {gitignore_file}: {exc}", + file=sys.stderr, + ) + raise + return original_text, lines + + +def _normalize_gitignore_lines( + lines: list[str], + env_file: str, + banner: str, +) -> list[str]: + """Normalize .gitignore lines by removing duplicates and canonical tail.""" + # Trim trailing blank lines + while lines and not lines[-1].strip(): + lines.pop() + + # Remove existing occurrences + filtered: list[str] = [ + ln for ln in lines if ln.strip() not in {env_file, banner} + ] + + if filtered and filtered[-1].strip(): + filtered.append('') # ensure single blank before banner + elif not filtered: # empty file -> still separate section visually + filtered.append('') + + filtered.append(banner) + filtered.append(env_file) + return filtered + + +def ensure_env_in_gitignore( + env_file: str, + gitignore_file: str, + banner: str, +) -> bool: + """Ensure canonical banner + env tail in .gitignore. + + Returns True only when the file content was changed. Returns False both + when unchanged and on IO errors (we intentionally conflate for the simple + hook contract; errors are still surfaced via stderr output). + """ + try: + original_content_str, lines = _read_gitignore(gitignore_file) + except OSError: + return False + + filtered = _normalize_gitignore_lines(lines, env_file, banner) + new_content = '\n'.join(filtered) + '\n' + + # Normalize original content to a single trailing newline for comparison + normalized_original = original_content_str + if normalized_original and not normalized_original.endswith('\n'): + normalized_original += '\n' + if new_content == normalized_original: + return False + + try: + _atomic_write(gitignore_file, new_content) + return True + except OSError as exc: + print( + f"ERROR: unable to write {gitignore_file}: {exc}", + file=sys.stderr, + ) + return False + + +def create_example_env(src_env: str, example_file: str) -> bool: + """Generate .env.example with unique KEY= lines (no values).""" + try: + with open(src_env, encoding='utf-8') as f_env: + lines = f_env.readlines() + except OSError as exc: + print(f"ERROR: unable to read {src_env}: {exc}", file=sys.stderr) + return False + + seen: set[str] = set() + keys: list[str] = [] + for line in lines: + stripped = line.strip() + if not stripped or stripped.startswith('#'): + continue + m = _KEY_REGEX.match(stripped) + if not m: + continue + key = m.group(1) + if key not in seen: + seen.add(key) + keys.append(key) + + header = [ + '# Generated by catch-dotenv hook.', + '# Variable names only – fill in sample values as needed.', + '', + ] + body = [f"{k}=" for k in keys] + try: + _atomic_write(example_file, '\n'.join(header + body) + '\n') + return True + except OSError as exc: # pragma: no cover + print( + f"ERROR: unable to write '{example_file}': {exc}", + file=sys.stderr, + ) + return False + + +def _has_env(filenames: Iterable[str], env_file: str) -> bool: + """Return True if any staged path refers to target env file by basename.""" + return any(os.path.basename(name) == env_file for name in filenames) + + +def _print_failure( + env_file: str, + gitignore_file: str, + example_created: bool, + gitignore_modified: bool, +) -> None: + # Match typical hook output style: one short line per action. + print(f"Blocked committing {env_file}.") + if gitignore_modified: + print(f"Updated {gitignore_file}.") + if example_created: + print('Generated .env.example.') + print(f"Remove {env_file} from the commit and retry.") + + +def main(argv: Sequence[str] | None = None) -> int: + """Hook entry-point.""" + parser = argparse.ArgumentParser( + description='Blocks committing .env files.', + ) + parser.add_argument( + 'filenames', + nargs='*', + help='Staged filenames (supplied by pre-commit).', + ) + parser.add_argument( + '--create-example', + action='store_true', + help='Generate example env file (.env.example).', + ) + args = parser.parse_args(argv) + env_file = DEFAULT_ENV_FILE + # Use current working directory as repository root (pre-commit executes + # hooks from the repo root). + repo_root = os.getcwd() + gitignore_file = os.path.join(repo_root, DEFAULT_GITIGNORE_FILE) + example_file = os.path.join(repo_root, DEFAULT_EXAMPLE_ENV_FILE) + env_abspath = os.path.join(repo_root, env_file) + + if not _has_env(args.filenames, env_file): + return 0 + + gitignore_modified = ensure_env_in_gitignore( + env_file, + gitignore_file, + GITIGNORE_BANNER, + ) + example_created = False + if args.create_example: + # Source env is always looked up relative to repo root + if os.path.exists(env_abspath): + example_created = create_example_env( + env_abspath, + example_file, + ) + + _print_failure( + env_file, + gitignore_file, + example_created, + gitignore_modified, + ) + return 1 # Block commit + + +if __name__ == '__main__': + raise SystemExit(main()) diff --git a/setup.cfg b/setup.cfg index 14f7a91c..65a319c4 100644 --- a/setup.cfg +++ b/setup.cfg @@ -29,6 +29,7 @@ exclude = [options.entry_points] console_scripts = + catch-dotenv = pre_commit_hooks.catch_dotenv:main check-added-large-files = pre_commit_hooks.check_added_large_files:main check-ast = pre_commit_hooks.check_ast:main check-builtin-literals = pre_commit_hooks.check_builtin_literals:main diff --git a/testing/resources/test.env b/testing/resources/test.env new file mode 100644 index 00000000..1479aedc --- /dev/null +++ b/testing/resources/test.env @@ -0,0 +1,82 @@ +# ============================================================================= +# DUMMY SECRETS FOR DOTENV TEST +# ============================================================================= + +# Container Internal Ports (what each service listens on inside containers) +BACKEND_CONTAINER_PORT=3000 # FastAPI server internal port +FRONTEND_CONTAINER_PORT=3001 # Vite dev server internal port + +# External Access (what users/browsers connect to) +CADDY_EXTERNAL_PORT=80 # External port exposed to host system + +# URLs (how different components reference each other) +BASE_HOSTNAME=http://localhost +PUBLIC_FRONTEND_URL=${BASE_HOSTNAME}:${CADDY_EXTERNAL_PORT} +LEGACY_BACKEND_DIRECT_URL=${BASE_HOSTNAME}:${BACKEND_CONTAINER_PORT} # Deprecated: direct backend access +VITE_BROWSER_API_URL=${BASE_HOSTNAME}:${CADDY_EXTERNAL_PORT}/api # Frontend API calls through Caddy + +# Environment +NODE_ENV=development +# Supabase +SUPABASE_PROJECT_ID=979090c33e5da06f67921e70 +SUPABASE_PASSWORD=1bbad0861dbca0bad3bd58ac90fd87e1cfd13ebbbeaed730868a11fa38bf6a65 +SUPABASE_URL=https://${SUPABASE_PROJECT_ID}.supabase.co +DATABASE_URL=postgresql://postgres.${SUPABASE_PROJECT_ID}:${SUPABASE_PASSWORD}@aws-0-us-west-1.pooler.supabase.com:5432/postgres +SUPABASE_SERVICE_KEY=f37f35e070475d4003ea0973cc15ef8bd9956fd140c80d247a187f8e5b0d69d70a9555decd28ea405051bf31d1d1f949dba277f058ba7c0279359ccdeda0f0696ea803403b8ad76dbbf45c4220b45a44a66e643bf0ca575dffc69f22a57c7d6c693e4d55b5f02e8a0da192065a38b24cbed2234d005661beba6d58e3ef234e0f +SUPABASE_S3_STORAGE_ENDPOINT=${SUPABASE_URL}/storage/v1/s3 +SUPABASE_STORAGE_BUCKET=my-bucket +SUPABASE_REGION=us-west-1 +SUPABASE_S3_ACCESS_KEY_ID=323157dcde28202bda94ff4db4be5266 +SUPABASE_S3_SECRET_ACCESS_KEY=d37c900e43e9dfb2c9998fa65aaeea703014504bbfebfddbcf286ee7197dc975 + +# Storage (aliases for compatibility) +STORAGE_URL=https://b8991834720f5477910eded7.supabase.co/storage/v1/s3 +STORAGE_BUCKET=my-bucket +STORAGE_ACCESS_KEY=FEvMws2HMGW96oBMx6Cg98pP8k3h4eki +STORAGE_SECRET_KEY=shq7peEUeYkdzuUDohoK6qx9Zpjvjq6Zz2coUDvyQARM3qk9QryKZmQqRmz4szzM +STORAGE_REGION=us-west-1 +STORAGE_SKIP_BUCKET_CHECK=true + +# Authentication +ACCESS_TOKEN_SECRET=ghp_c9d4307ceb82d06b522c1a5e37a8b5d0BMwJpgMT +REFRESH_TOKEN_SECRET=09cb1b7920aea0d2b63ae3264e27595225ca7132f92f4cc5eff6dc066957118d +JWT_ALGORITHM=HS256 + +# Mail +MAIL_FROM=noreply@example.com + +# Chrome Browser +CHROME_TOKEN=ac126eb015837628b05ff2f0f568ff46 +CHROME_PROXY_HOST=chrome +CHROME_PROXY_PORT=3002 +CHROME_PROXY_SSL=false +CHROME_HEALTH=true +CHROME_PORT=8080 + +# Test Configuration (for e2e) +TEST_HOST=${BASE_HOSTNAME} +TEST_TIMEOUT=35 +TEST_EMAIL=test@example.com +TEST_PASSWORD=changeme +POSTGRES_PORT=5432 +MINIO_PORT=9000 +REDIS_PORT=6379 + +# Database and Storage Paths +SQLITE_DB_PATH=database.db +TEST_DB_PATH=tests/testdb.duckdb +STATIC_FILES_DIR=/app/static + +# AI +OPENAI_API_KEY = "sk-proj-a1b2c3d4e5f6g7h8i9j0k1l2m3n4o5p6q7r8s9t0" +COHERE_API_KEY = "a1b2c3d4e5f6g7h8i9j0k1l2m3n4o5p6q7r8s9t0" +OR_API_KEY = "a1b2c3d4e5f6g7h8i9j0k1l2m3n4o5p6q7r8s9t0" +AZURE_API_KEY = "a1b2c3d4e5f6g7h8i9j0k1l2m3n4o5p6" +GEMINI_API_KEY = "AIzaSyA1b2c3d4e5f6g7h8i9j0k1l2m3n4o5p6q7r" +VERTEXAI_API_KEY = "a1b2c3d4e5f6g7h8i9j0k1l2m3n4o5p6q7r8s9t0" +REPLICATE_API_KEY = "r8_a1b2c3d4e5f6g7h8i9j0k1l2m3n4o5p6q7r8s9" +REPLICATE_API_TOKEN = "a1b2c3d4e5f6g7h8i9j0k1l2m3n4o5p6q7r8s9t0" +ANTHROPIC_API_KEY = "sk-ant-a1b2c3d4e5f6g7h8i9j0k1l2m3n4o5p6q7r8s9t0u1v2w3x4y5z6a1b2c3d4e5f6g7h8i9j0k1l2m3n4o5p6" +INFISICAL_TOKEN = "a1b2c3d4e5f6g7h8i9j0k1l2m3n4o5p6q7r8s9t0" +NOVITA_API_KEY = "a1b2c3d4e5f6g7h8i9j0k1l2m3n4o5p6q7r8s9t0" +INFINITY_API_KEY = "a1b2c3d4e5f6g7h8i9j0k1l2m3n4o5p6q7r8s9t0" diff --git a/tests/catch_dotenv_test.py b/tests/catch_dotenv_test.py new file mode 100644 index 00000000..4ad5d007 --- /dev/null +++ b/tests/catch_dotenv_test.py @@ -0,0 +1,550 @@ +from __future__ import annotations + +import os +import re +import shutil +import threading +import time +from pathlib import Path + +import pytest + +from pre_commit_hooks.catch_dotenv import DEFAULT_ENV_FILE +from pre_commit_hooks.catch_dotenv import DEFAULT_EXAMPLE_ENV_FILE +from pre_commit_hooks.catch_dotenv import DEFAULT_GITIGNORE_FILE +from pre_commit_hooks.catch_dotenv import ensure_env_in_gitignore +from pre_commit_hooks.catch_dotenv import GITIGNORE_BANNER +from pre_commit_hooks.catch_dotenv import main + +# Tests cover hook behavior: detection gating, .gitignore normalization, +# example file generation parsing edge cases, idempotency, and preservation of +# existing content. Each test isolates a single behavioral contract. + + +@pytest.fixture() +def env_file(tmp_path: Path) -> Path: + """Copy shared resource .env into tmp workspace as the canonical .env. + + All tests rely on this baseline content (optionally appending extra lines + for edge cases) to ensure consistent parsing behavior. + """ + # Find repository root by looking for .git directory + test_file_path = Path(__file__).resolve() + repo_root = test_file_path + while repo_root.parent != repo_root: # Stop at filesystem root + if (repo_root / '.git').exists(): + break + repo_root = repo_root.parent + else: + raise RuntimeError('Could not find repository root (.git directory)') + + # Source file stored as test.env in repo (cannot commit a real .env in CI) + resource_env = repo_root / 'testing' / 'resources' / 'test.env' + dest = tmp_path / DEFAULT_ENV_FILE + shutil.copyfile(resource_env, dest) + return dest + + +def run_hook( + tmp_path: Path, staged: list[str], create_example: bool = False, +) -> int: + cwd = os.getcwd() + os.chdir(tmp_path) + try: + args = staged[:] + if create_example: + args.append('--create-example') + return main(args) + finally: + os.chdir(cwd) + + +def test_no_env_file(tmp_path: Path, env_file: Path) -> None: + """Hook should no-op (return 0) if .env not staged even if it exists.""" + (tmp_path / 'foo.txt').write_text('x') + assert run_hook(tmp_path, ['foo.txt']) == 0 + + +def test_blocks_env_and_updates_gitignore( + tmp_path: Path, env_file: Path, +) -> None: + """Staging .env triggers block (exit 1) and appends banner + env entry.""" + ret = run_hook(tmp_path, [DEFAULT_ENV_FILE]) + assert ret == 1 + gi = (tmp_path / DEFAULT_GITIGNORE_FILE).read_text().splitlines() + assert gi[-2] == GITIGNORE_BANNER + assert gi[-1] == DEFAULT_ENV_FILE + + +def test_env_present_but_not_staged(tmp_path: Path, env_file: Path) -> None: + """Existing .env on disk but not staged should not block commit.""" + assert run_hook(tmp_path, ['unrelated.txt']) == 0 + + +def test_idempotent_gitignore(tmp_path: Path, env_file: Path) -> None: + """Re-running after initial normalization leaves .gitignore unchanged.""" + g = tmp_path / DEFAULT_GITIGNORE_FILE + g.write_text(f"{GITIGNORE_BANNER}\n{DEFAULT_ENV_FILE}\n") + first = run_hook(tmp_path, [DEFAULT_ENV_FILE]) + assert first == 1 + content1 = g.read_text() + second = run_hook(tmp_path, [DEFAULT_ENV_FILE]) + assert second == 1 + assert g.read_text() == content1 # unchanged + + +def test_gitignore_with_existing_content_preserved( + tmp_path: Path, env_file: Path, +) -> None: + """Existing entries stay intact; banner/env appended at end cleanly.""" + g = tmp_path / DEFAULT_GITIGNORE_FILE + g.write_text( + 'node_modules/\n# comment line\n', + ) # existing content with trailing newline + run_hook(tmp_path, [DEFAULT_ENV_FILE]) + lines = g.read_text().splitlines() + # original content should still be at top + assert lines[0] == 'node_modules/' + assert '# comment line' in lines[1] + # Last two lines should be banner + env file + assert lines[-2:] == [GITIGNORE_BANNER, DEFAULT_ENV_FILE] + + +def test_gitignore_duplicates_are_collapsed( + tmp_path: Path, env_file: Path, +) -> None: + """Multiple prior duplicate banner/env lines collapse to single pair.""" + g = tmp_path / DEFAULT_GITIGNORE_FILE + g.write_text( + f"other\n{GITIGNORE_BANNER}\n{DEFAULT_ENV_FILE}\n" + f"{GITIGNORE_BANNER}\n{DEFAULT_ENV_FILE}\n\n\n", + ) + run_hook(tmp_path, [DEFAULT_ENV_FILE]) + lines = g.read_text().splitlines() + assert lines.count(GITIGNORE_BANNER) == 1 + assert lines.count(DEFAULT_ENV_FILE) == 1 + assert lines[-2:] == [GITIGNORE_BANNER, DEFAULT_ENV_FILE] + + +def test_create_example(tmp_path: Path, env_file: Path) -> None: + """Example file includes discovered keys; values stripped to KEY=.""" + ret = run_hook(tmp_path, [DEFAULT_ENV_FILE], create_example=True) + assert ret == 1 + example = (tmp_path / DEFAULT_EXAMPLE_ENV_FILE).read_text().splitlines() + key_lines = [ln for ln in example if ln and not ln.startswith('#')] + # All key lines should be KEY= + assert all(re.match(r'^[A-Za-z_][A-Za-z0-9_]*=$', ln) for ln in key_lines) + # Spot check a few known keys from resource file + for k in [ + 'OPENAI_API_KEY=', + 'ACCESS_TOKEN_SECRET=', + 'SUPABASE_SERVICE_KEY=', + ]: + assert k in key_lines + + +def test_create_example_duplicate_key_variant_ignored( + tmp_path: Path, env_file: Path, +) -> None: + """Appending whitespace duplicate of existing key should not duplicate + in example. + """ + # Create a copy of the env_file to avoid contaminating the fixture + modified_env = tmp_path / 'modified.env' + shutil.copyfile(env_file, modified_env) + with open(modified_env, 'a', encoding='utf-8') as f: + f.write('BACKEND_CONTAINER_PORT =999 # duplicate variant\n') + + # Override the env file path for this test + original_env = tmp_path / DEFAULT_ENV_FILE + shutil.copyfile(modified_env, original_env) + run_hook(tmp_path, [DEFAULT_ENV_FILE], create_example=True) + lines = (tmp_path / DEFAULT_EXAMPLE_ENV_FILE).read_text().splitlines() + key_lines = [ln for ln in lines if ln and not ln.startswith('#')] + assert key_lines.count('BACKEND_CONTAINER_PORT=') == 1 + + +def test_gitignore_without_trailing_newline( + tmp_path: Path, env_file: Path, +) -> None: + """Normalization works when original .gitignore lacks trailing newline.""" + g = tmp_path / DEFAULT_GITIGNORE_FILE + g.write_text('existing_line') # no newline at EOF + run_hook(tmp_path, [DEFAULT_ENV_FILE]) + lines = g.read_text().splitlines() + assert lines[0] == 'existing_line' + assert lines[-2:] == [GITIGNORE_BANNER, DEFAULT_ENV_FILE] + + +def test_ensure_env_in_gitignore_normalizes( + tmp_path: Path, env_file: Path, +) -> None: + """Direct API call collapses duplicates and produces canonical tail + layout. + """ + g = tmp_path / DEFAULT_GITIGNORE_FILE + g.write_text( + f"{GITIGNORE_BANNER}\n{DEFAULT_ENV_FILE}\n" + f"{GITIGNORE_BANNER}\n{DEFAULT_ENV_FILE}\n\n", + ) + modified = ensure_env_in_gitignore( + DEFAULT_ENV_FILE, str(g), GITIGNORE_BANNER, + ) + assert modified is True + lines = g.read_text().splitlines() + # final two lines should be banner + env + assert lines[-2:] == [GITIGNORE_BANNER, DEFAULT_ENV_FILE] + # only one occurrence each + assert lines.count(GITIGNORE_BANNER) == 1 + assert lines.count(DEFAULT_ENV_FILE) == 1 + + +def test_source_env_file_not_modified( + tmp_path: Path, env_file: Path, +) -> None: + """Hook must not alter original .env (comments and formatting stay).""" + original = env_file.read_text() + run_hook(tmp_path, [DEFAULT_ENV_FILE], create_example=True) + assert env_file.read_text() == original + + +def test_large_resource_env_parsing( + tmp_path: Path, env_file: Path, +) -> None: + """Generate example from resource env; assert broad key coverage & + format. + """ + ret = run_hook(tmp_path, [DEFAULT_ENV_FILE], create_example=True) + assert ret == 1 + example_lines = ( + (tmp_path / DEFAULT_EXAMPLE_ENV_FILE).read_text().splitlines() + ) + key_lines = [ln for ln in example_lines if ln and not ln.startswith('#')] + assert len(key_lines) > 20 + assert all(re.match(r'^[A-Za-z_][A-Za-z0-9_]*=$', ln) for ln in key_lines) + for k in [ + 'BACKEND_CONTAINER_PORT=', + 'SUPABASE_SERVICE_KEY=', + 'ACCESS_TOKEN_SECRET=', + ]: + assert k in key_lines + + +def test_failure_message_content( + tmp_path: Path, + env_file: Path, + capsys: pytest.CaptureFixture[str], +) -> None: + """Hook stdout message should contain key phrases when blocking commit.""" + ret = run_hook(tmp_path, [DEFAULT_ENV_FILE], create_example=True) + assert ret == 1 + out = capsys.readouterr().out.strip() + assert 'Blocked committing' in out + assert DEFAULT_GITIGNORE_FILE in out # updated path appears + assert 'Generated .env.example.' in out + assert 'Remove .env' in out + + +def test_create_example_when_env_missing( + tmp_path: Path, env_file: Path, +) -> None: + """--create-example with no .env staged or present should no-op (exit 0). + + Uses env_file fixture (requirement: all tests use fixture) then removes the + copied .env to simulate absence. + """ + env_file.unlink() + ret = run_hook(tmp_path, ['unrelated.txt'], create_example=True) + assert ret == 0 + assert not (tmp_path / DEFAULT_EXAMPLE_ENV_FILE).exists() + + +def test_gitignore_is_directory_error( + tmp_path: Path, + env_file: Path, + capsys: pytest.CaptureFixture[str], +) -> None: + """If .gitignore path is a directory, hook should print error and still + block. + """ + gitignore_dir = tmp_path / DEFAULT_GITIGNORE_FILE + gitignore_dir.mkdir() + ret = run_hook(tmp_path, [DEFAULT_ENV_FILE]) + assert ret == 1 # still blocks commit + captured = capsys.readouterr() + assert 'ERROR:' in captured.err # error now printed to stderr + + +def test_env_example_overwrites_existing( + tmp_path: Path, env_file: Path, +) -> None: + """Pre-existing example file with junk should be overwritten with header + & keys. + """ + example = tmp_path / DEFAULT_EXAMPLE_ENV_FILE + example.write_text('junk=1\nSHOULD_NOT_REMAIN=2\n') + run_hook(tmp_path, [DEFAULT_ENV_FILE], create_example=True) + content = example.read_text().splitlines() + assert content[0].startswith('# Generated by catch-dotenv') + assert any(ln.startswith('BACKEND_CONTAINER_PORT=') for ln in content) + assert 'junk=1' not in content + assert 'SHOULD_NOT_REMAIN=2' not in content + + +def test_large_gitignore_normalization_performance( + tmp_path: Path, env_file: Path, +) -> None: + """Very large .gitignore remains normalized quickly (functional smoke).""" + g = tmp_path / DEFAULT_GITIGNORE_FILE + # Generate many lines with scattered duplicates of banner/env + lines = ( + [f"file_{i}" for i in range(3000)] + + [GITIGNORE_BANNER, DEFAULT_ENV_FILE] * 3 + ) + g.write_text('\n'.join(lines) + '\n') + start = time.time() + run_hook(tmp_path, [DEFAULT_ENV_FILE]) + elapsed = time.time() - start + result_lines = g.read_text().splitlines() + assert result_lines[-2:] == [GITIGNORE_BANNER, DEFAULT_ENV_FILE] + assert result_lines.count(GITIGNORE_BANNER) == 1 + assert result_lines.count(DEFAULT_ENV_FILE) == 1 + # Soft performance expectation: should finish fast + # (< 0.5s on typical dev machine) + assert elapsed < 0.5 + + +def test_concurrent_gitignore_writes( + tmp_path: Path, env_file: Path, +) -> None: + """Concurrent ensure_env_in_gitignore calls result in canonical final + state. + """ + g = tmp_path / DEFAULT_GITIGNORE_FILE + # Seed with messy duplicates + g.write_text(f"other\n{GITIGNORE_BANNER}\n{DEFAULT_ENV_FILE}\n\n") + + def worker(): + ensure_env_in_gitignore(DEFAULT_ENV_FILE, str(g), GITIGNORE_BANNER) + + threads = [threading.Thread(target=worker) for _ in range(5)] + for t in threads: + t.start() + for t in threads: + t.join() + lines = g.read_text().splitlines() + assert lines[-2:] == [GITIGNORE_BANNER, DEFAULT_ENV_FILE] + assert lines.count(GITIGNORE_BANNER) == 1 + assert lines.count(DEFAULT_ENV_FILE) == 1 + + +def test_mixed_staged_files( + tmp_path: Path, env_file: Path, +) -> None: + """Staging .env with other files still blocks and only normalizes + gitignore once. + """ + other = tmp_path / 'README.md' + other.write_text('hi') + ret = run_hook(tmp_path, [DEFAULT_ENV_FILE, 'README.md']) + assert ret == 1 + lines = (tmp_path / DEFAULT_GITIGNORE_FILE).read_text().splitlines() + assert lines[-2:] == [GITIGNORE_BANNER, DEFAULT_ENV_FILE] + + +def test_already_ignored_env_with_variations( + tmp_path: Path, env_file: Path, +) -> None: + """Pre-existing ignore lines with spacing normalize to single + canonical pair. + """ + g = tmp_path / DEFAULT_GITIGNORE_FILE + g.write_text( + f" {DEFAULT_ENV_FILE} \n{GITIGNORE_BANNER}\n" + f" {DEFAULT_ENV_FILE}\n", + ) + run_hook(tmp_path, [DEFAULT_ENV_FILE]) + lines = g.read_text().splitlines() + assert lines[-2:] == [GITIGNORE_BANNER, DEFAULT_ENV_FILE] + assert lines.count(DEFAULT_ENV_FILE) == 1 + + +def test_subdirectory_invocation( + tmp_path: Path, env_file: Path, +) -> None: + """Running from a subdirectory now writes .gitignore relative to CWD + (simplified behavior). + """ + sub = tmp_path / 'subdir' + sub.mkdir() + # simulate repository root marker + (tmp_path / '.git').mkdir() + # simulate running hook from subdir while staged path relative to repo root + cwd = os.getcwd() + os.chdir(sub) + try: + ret = main( + [str(Path('..') / DEFAULT_ENV_FILE)], + ) # staged path relative to subdir + gi = (sub / DEFAULT_GITIGNORE_FILE).read_text().splitlines() + finally: + os.chdir(cwd) + assert ret == 1 + assert gi[-2:] == [GITIGNORE_BANNER, DEFAULT_ENV_FILE] + + +def test_atomic_write_failure_gitignore( + monkeypatch: pytest.MonkeyPatch, + tmp_path: Path, + env_file: Path, + capsys: pytest.CaptureFixture[str], +) -> None: + """Simulate os.replace failure during gitignore write to exercise error + path. + """ + def boom(*_a: object, **_k: object) -> None: + raise OSError('replace-fail') + monkeypatch.setattr('pre_commit_hooks.catch_dotenv.os.replace', boom) + modified = ensure_env_in_gitignore( + DEFAULT_ENV_FILE, + str(tmp_path / DEFAULT_GITIGNORE_FILE), + GITIGNORE_BANNER, + ) + assert modified is False + captured = capsys.readouterr() + assert 'ERROR: unable to write' in captured.err + + +def test_atomic_write_failure_example( + monkeypatch: pytest.MonkeyPatch, + tmp_path: Path, + env_file: Path, + capsys: pytest.CaptureFixture[str], +) -> None: + """Simulate os.replace failure when writing example env file.""" + def boom(*_a: object, **_k: object) -> None: + raise OSError('replace-fail') + monkeypatch.setattr('pre_commit_hooks.catch_dotenv.os.replace', boom) + ok = False + # create_example_env requires source .env to exist; env_file fixture + # provides it in tmp_path root + cwd = os.getcwd() + os.chdir(tmp_path) + try: + ok = main([DEFAULT_ENV_FILE, '--create-example']) == 1 + finally: + os.chdir(cwd) + # hook still blocks; but example creation failed -> message should + # not claim Example file generated + assert ok is True + captured = capsys.readouterr() + out = captured.out + err = captured.err + assert 'Example file generated' not in out + assert 'ERROR: unable to write' in err + + +def test_atomic_write_cleanup_failure( + monkeypatch: pytest.MonkeyPatch, + tmp_path: Path, + env_file: Path, +) -> None: + """Test rare case where os.remove fails during cleanup after os.replace + failure. + """ + def failing_remove(_path: str) -> None: + # Simulate os.remove failure during cleanup + raise OSError('remove-fail') + + def failing_replace(*_a: object, **_k: object) -> None: + # First fail os.replace to trigger cleanup path + raise OSError('replace-fail') + + monkeypatch.setattr( + 'pre_commit_hooks.catch_dotenv.os.replace', failing_replace, + ) + monkeypatch.setattr( + 'pre_commit_hooks.catch_dotenv.os.remove', failing_remove, + ) + + # This should not raise an exception even if both replace and remove fail + modified = ensure_env_in_gitignore( + DEFAULT_ENV_FILE, + str(tmp_path / DEFAULT_GITIGNORE_FILE), + GITIGNORE_BANNER, + ) + assert modified is False + + +def test_create_example_read_error( + monkeypatch: pytest.MonkeyPatch, + tmp_path: Path, + env_file: Path, + capsys: pytest.CaptureFixture[str], +) -> None: + """Test OSError when reading source env file for create_example.""" + def failing_open(*_args: object, **_kwargs: object) -> None: + raise OSError('Permission denied') + + # Mock open to fail when trying to read the env file + monkeypatch.setattr('builtins.open', failing_open) + + from pre_commit_hooks.catch_dotenv import create_example_env + + result = create_example_env(str(env_file), str(tmp_path / 'test.example')) + assert result is False + + captured = capsys.readouterr() + assert 'ERROR: unable to read' in captured.err + + +def test_malformed_env_lines_ignored(tmp_path: Path, env_file: Path) -> None: + """Test that malformed env lines that don't match regex are ignored.""" + # Create env file with malformed lines + malformed_env = tmp_path / 'malformed.env' + malformed_content = [ + 'VALID_KEY=value', + 'invalid-line-no-equals', + '# comment line', + '', # empty line + '=INVALID_EQUALS_FIRST', + 'ANOTHER_VALID=value2', + 'spaces in key=invalid', + '123_INVALID_START=value', # starts with number + ] + malformed_env.write_text('\n'.join(malformed_content)) + + # Copy to .env location + shutil.copyfile(malformed_env, tmp_path / DEFAULT_ENV_FILE) + + # Run create-example - should only extract valid keys + run_hook(tmp_path, [DEFAULT_ENV_FILE], create_example=True) + + example_lines = ( + (tmp_path / DEFAULT_EXAMPLE_ENV_FILE).read_text().splitlines() + ) + key_lines = [ln for ln in example_lines if ln and not ln.startswith('#')] + + # Should only have the valid keys + assert 'VALID_KEY=' in key_lines + assert 'ANOTHER_VALID=' in key_lines + assert len([k for k in key_lines if '=' in k]) == 2 # Only 2 valid keys + + +def test_create_example_when_source_missing( + tmp_path: Path, env_file: Path, +) -> None: + """Test --create-example when source .env doesn't exist but .env is + staged. + """ + # Remove the source .env file but keep it in the staged files list + env_file.unlink() # Remove the .env file + + # Stage .env even though it doesn't exist on disk + ret = run_hook(tmp_path, [DEFAULT_ENV_FILE], create_example=True) + + # Hook should still block commit + assert ret == 1 + + # But no example file should be created since source doesn't exist + assert not (tmp_path / DEFAULT_EXAMPLE_ENV_FILE).exists()