Add HTML corpus decoder and publish to GitHub Pages

cmeister2 · cmeister2 · commit 2d64770d09ce · 2025-10-22T20:46:18.000+01:00
diff --git a/.github/workflows/pages-ci.yml b/.github/workflows/pages-ci.yml
@@ -0,0 +1,33 @@
+name: Playwright browser test for corpus decoder
+
+on:
+  push:
+    branches: [master]
+  pull_request:
+    branches: [master]
+  workflow_dispatch:
+
+permissions:
+  contents: read
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install .[browser-tests]
+          pip install pytest pytest-playwright
+          python -m playwright install
+
+      - name: Run Playwright browser test
+        run: pytest tests/browser/test_corpus_decoder.py
diff --git a/.github/workflows/pages.yml b/.github/workflows/pages.yml
@@ -0,0 +1,51 @@
+name: Deploy decoder to GitHub Pages
+
+on:
+  push:
+    branches: [master]
+  workflow_dispatch:
+
+permissions:
+  contents: read
+  pages: write
+  id-token: write
+
+concurrency:
+  group: "pages"
+  cancel-in-progress: false
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.12"
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install .
+
+      - name: Generate decoder HTML
+        run: python -m curl_fuzzer_tools.generate_decoder_html
+
+      - name: Upload artifact
+        uses: actions/upload-pages-artifact@v3
+        with:
+          path: docs
+
+  deploy:
+    needs: build
+    runs-on: ubuntu-latest
+    environment:
+      name: github-pages
+      url: ${{ steps.deployment.outputs.page_url }}
+    steps:
+      - name: Deploy to GitHub Pages
+        id: deployment
+        uses: actions/deploy-pages@v4
diff --git a/.gitignore b/.gitignore
@@ -34,3 +34,6 @@ libstandaloneengine.a
 __pycache__/
 logs/
 *.egg-info/
+
+# Docs
+docs/
diff --git a/README.md b/README.md
@@ -80,6 +80,34 @@ read_corpus <path/to/file>
 ```
 This will print out a list of contents inside the file.
 
+## I want an HTML decoder for corpus files
+
+Generate a standalone HTML page that can inspect TLV corpora directly in your browser:
+
+```shell
+python -m curl_fuzzer_tools.generate_decoder_html
+```
+
+By default the generator writes to `docs/corpus-decoder/index.html`. The page is entirely client-side; it never uploads the selected file. You can open the output straight from the filesystem, for example `file:///.../docs/corpus-decoder/index.html`.
+
+**View the latest published decoder:**
+
+[curl corpus decoder (GitHub Pages)](https://curl.github.io/curl-fuzzer/corpus-decoder/index.html)
+
+GitHub Pages is configured to deploy automatically from the `docs/` folder whenever the `main` branch is updated. Use the command above locally before pushing if you need to refresh the published site.
+
+### Optional browser smoke-test (Playwright)
+
+The Playwright regression test is opt-in so the default install stays light. If you want to run it:
+
+```shell
+pip install -e '.[browser-tests]'
+playwright install chromium
+pytest tests/browser/test_corpus_decoder.py
+```
+
+These commands exercise the generated HTML by uploading a sample TLV corpus in a headless Chromium run.
+
 ## I want to generate a new testcase
 
 To generate a new testcase, run
diff --git a/pyproject.toml b/pyproject.toml
@@ -22,13 +22,23 @@ classifiers = [
     "Topic :: Software Development :: Testing",
     "Typing :: Typed",
 ]
-dependencies = ["scapy (>=2.6.1,<3.0.0)"]
+dependencies = [
+    "scapy (>=2.6.1,<3.0.0)",
+    "jinja2 (>=3.1.0,<4.0.0)",
+]
+
+[project.optional-dependencies]
+browser-tests = [
+    "pytest>=8.3,<9",
+    "playwright>=1.46,<1.47",
+]
 
 [project.scripts]
 read_corpus = "curl_fuzzer_tools.read_corpus:run"
 generate_corpus = "curl_fuzzer_tools.generate_corpus:run"
 corpus_to_pcap = "curl_fuzzer_tools.corpus_to_pcap:run"
 generate_matrix = "curl_fuzzer_tools.generate_matrix:run"
+generate_decoder_html = "curl_fuzzer_tools.generate_decoder_html:run"
 
 [build-system]
 requires = ["setuptools>=61.0"]
@@ -39,6 +49,10 @@ dev = [
     "mypy==1.18.2",
     "ruff==0.14.1",
 ]
+browser-tests = [
+    "pytest>=8.3,<9",
+    "playwright>=1.46,<1.47",
+]
 
 [tool.mypy]
 warn_unused_configs = true
diff --git a/src/curl_fuzzer_tools/__init__.py b/src/curl_fuzzer_tools/__init__.py
@@ -1,6 +1,7 @@
 """Tooling for the curl-fuzzer repository."""
 
 from .logger import common_logging
+from .generate_decoder_html import generate_html
 
 # Import * imports
-__all__ = ["common_logging"]
+__all__ = ["common_logging", "generate_html"]
diff --git a/src/curl_fuzzer_tools/generate_decoder_html.py b/src/curl_fuzzer_tools/generate_decoder_html.py
@@ -0,0 +1,75 @@
+"""Generate an interactive HTML page for decoding curl corpus files."""
+
+from __future__ import annotations
+
+import argparse
+from datetime import datetime, timezone
+from collections.abc import Sequence
+from pathlib import Path
+
+from jinja2 import Environment, FileSystemLoader, select_autoescape
+
+from .corpus import BaseType
+from .logger import common_logging
+
+_TEMPLATE_NAME = "corpus_decoder.html"
+_DEFAULT_OUTPUT = Path("docs/corpus-decoder/index.html")
+
+
+def _jinja_env() -> Environment:
+    template_dir = Path(__file__).with_name("templates")
+    if not template_dir.exists():
+        raise FileNotFoundError(f"Template directory not found at {template_dir}")
+    return Environment(
+        loader=FileSystemLoader(template_dir),
+        autoescape=select_autoescape(["html", "xml"]),
+        trim_blocks=True,
+        lstrip_blocks=True,
+    )
+
+
+def _render_html(env: Environment) -> str:
+    template = env.get_template(_TEMPLATE_NAME)
+    generated_at = datetime.now(tz=timezone.utc).strftime("%Y-%m-%d %H:%M:%SZ")
+    typemap = {str(key): value for key, value in BaseType.TYPEMAP.items()}
+    return template.render(generated_at=generated_at, typemap=typemap)
+
+
+def generate_html(output: Path) -> Path:
+    """Generate the HTML decoder page to the provided output path."""
+    env = _jinja_env()
+    html = _render_html(env)
+
+    output.parent.mkdir(parents=True, exist_ok=True)
+    output.write_text(html, encoding="utf-8")
+    return output
+
+
+def _parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument(
+        "--output",
+        type=Path,
+        default=_DEFAULT_OUTPUT,
+        help=f"Target path for the generated HTML file (default: {_DEFAULT_OUTPUT})",
+    )
+    return parser.parse_args()
+
+
+def main() -> Path:
+    """CLI entry point for generating the decoder HTML."""
+    args = _parse_args()
+    output_path = args.output
+    generated_file = generate_html(output_path)
+    print(f"Generated decoder HTML at {generated_file}")
+    return generated_file
+
+
+def run() -> None:
+    """Wrapper to set up logging before running the tool."""
+    common_logging(__name__, __file__)
+    main()
+
+
+if __name__ == "__main__":
+    run()
diff --git a/src/curl_fuzzer_tools/templates/corpus_decoder.html b/src/curl_fuzzer_tools/templates/corpus_decoder.html
diff --git a/tests/browser/test_corpus_decoder.py b/tests/browser/test_corpus_decoder.py