diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..647e9d5 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,41 @@ +name: Release Artifacts + +on: + push: + tags: + - 'v[0-9]+.[0-9]+.[0-9]+' + +permissions: + contents: write + +jobs: + release: + if: github.ref_type == 'tag' && startsWith(github.ref, 'refs/tags/v') + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Prepare assets + run: | + set -euo pipefail + for dir in schemas tests/vectors scripts/vectors; do + if [ ! -d "$dir" ]; then + echo "ERROR: Required directory '$dir' not found" >&2 + exit 1 + fi + done + mkdir -p dist + zip -r dist/schemas.zip schemas + zip -r dist/vectors.zip tests/vectors scripts/vectors + + - name: Create GitHub Release + uses: softprops/action-gh-release@v2 + with: + files: | + dist/schemas.zip + dist/vectors.zip + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} \ No newline at end of file diff --git a/.github/workflows/vectors-cbor.yml b/.github/workflows/vectors-cbor.yml new file mode 100644 index 0000000..5621247 --- /dev/null +++ b/.github/workflows/vectors-cbor.yml @@ -0,0 +1,35 @@ +name: CBOR Vectors (Python/Go) + +on: + push: + branches: [ main ] + pull_request: + +jobs: + cbor-vectors: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Python cbor id + id: py + run: | + python -m pip install --upgrade pip cbor2 blake3 + python scripts/vectors/python/cbor_canon.py tests/vectors/core/entry_canonical.json > py_cbor_id.txt + echo "id=$(cat py_cbor_id.txt)" >> $GITHUB_OUTPUT + + - name: Go cbor id + id: go + uses: actions/setup-go@v5 + with: { go-version: '1.21.x' } + - run: | + cd scripts/vectors/go-cbor && go build -o ../../bin_lk_cbor_go . + scripts/bin_lk_cbor_go tests/vectors/core/entry_canonical.json > go_cbor_id.txt + echo "id=$(cat go_cbor_id.txt)" >> $GITHUB_OUTPUT + + - name: Compare CBOR IDs + run: | + echo "Python CBOR: ${{ steps.py.outputs.id }}" + echo "Go CBOR: ${{ steps.go.outputs.id }}" + test "${{ steps.py.outputs.id }}" = "${{ steps.go.outputs.id }}" + diff --git a/.github/workflows/vectors-matrix.yml b/.github/workflows/vectors-matrix.yml new file mode 100644 index 0000000..120c2df --- /dev/null +++ b/.github/workflows/vectors-matrix.yml @@ -0,0 +1,68 @@ +name: Canonicalization Vectors (Python/Rust/Go) + +on: + push: + branches: [ main ] + pull_request: + +jobs: + vectors: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: { python-version: '3.x' } + - name: Install python deps + run: python -m pip install --upgrade pip blake3 + - name: Python compute id + id: py + run: | + python scripts/vectors/python/canon.py tests/vectors/core/entry_canonical.json > py_id.txt + echo "id=$(cat py_id.txt)" >> $GITHUB_OUTPUT + + - name: Set up Rust + uses: dtolnay/rust-toolchain@stable + - name: Build Rust tool + run: | + cd scripts/vectors/rust + cargo build --release + - name: Rust compute id + id: rs + run: | + scripts/vectors/rust/target/release/lk_canon_rust tests/vectors/core/entry_canonical.json > rs_id.txt + echo "id=$(cat rs_id.txt)" >> $GITHUB_OUTPUT + + - name: Set up Go + uses: actions/setup-go@v5 + with: { go-version: '1.21.x' } + - name: Build Go tool + run: | + cd scripts/vectors/go && go build -o ../../bin_lk_canon_go . + - name: Go compute id + id: go + run: | + scripts/bin_lk_canon_go tests/vectors/core/entry_canonical.json > go_id.txt + echo "id=$(cat go_id.txt)" >> $GITHUB_OUTPUT + + - name: Compare IDs + run: | + echo "Python: ${{ steps.py.outputs.id }}" + echo "Rust: ${{ steps.rs.outputs.id }}" + echo "Go: ${{ steps.go.outputs.id }}" + test "${{ steps.py.outputs.id }}" = "${{ steps.rs.outputs.id }}" + test "${{ steps.rs.outputs.id }}" = "${{ steps.go.outputs.id }}" + + - name: C (blake3) check over canonical bytes + run: | + sudo apt-get update && sudo apt-get install -y libblake3-dev + echo -n "${{ steps.py.outputs.id }}" > py_id_hex + # Compute canonical bytes via Python tool + python scripts/vectors/python/canon.py tests/vectors/core/entry_canonical.json > can.txt + # Compile the C tool and pipe canonical bytes into it + gcc -O2 -lblake3 -o c_b3sum scripts/vectors/c/blake3_id.c + ./c_b3sum < can.txt > c_id.txt + echo "C: $(cat c_id.txt)" + test "$(cat c_id.txt)" = "${{ steps.py.outputs.id }}" diff --git a/.gitignore b/.gitignore index 99a3fe2..c50f21f 100644 --- a/.gitignore +++ b/.gitignore @@ -67,3 +67,7 @@ fastlane/test_output !.vscode/tasks.json !.vscode/launch.json !.vscode/*.code-snippets +# Temp artifacts +tmp/ + +FEEDBACK.md \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..830f8be --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,13 @@ +# Changelog + +## v0.1.0 (proposed) + +- Spec spine: Model (M‑1..M‑9), Formal Spec (FS‑1..FS‑14) +- Wire Format: JSON canonical profile (+ optional CBOR profile), BLAKE3‑256 id, domain‑separated signing input +- Compliance: levels (Core/Policy/WASM), checks C‑1..C‑5, report schema +- Schemas: entry, attestation, policy_result, compliance_report (+ aliases) +- Vectors: Python/Rust/Go canonicalization tools; JSON golden vector; CI matrix that fails on divergence +- Orchestrator: minimal TOML‑driven runner that emits compliance.json and validates against schema +- WASM Policy ABI: deterministic host interface and constraints +- Implementers Guide and CLI harness docs + diff --git a/README.md b/README.md index 9ddf4f6..612bd29 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,9 @@ # Ledger-Kernel -[![Docs (VitePress)](https://github.com/flyingrobots/ledger-kernel/actions/workflows/docs.yml/badge.svg)](https://flyingrobots.github.io/ledger-kernel/) [![Site](https://img.shields.io/badge/docs-site-blue?logo=github)](https://flyingrobots.github.io/ledger-kernel/) +> [!IMPORTANT]\ +> This project is under construction. + +[![Docs](https://img.shields.io/badge/docs-website-0b7285?logo=readthedocs)](https://flyingrobots.github.io/ledger-kernel/) [![Docs Build](https://github.com/flyingrobots/ledger-kernel/actions/workflows/docs.yml/badge.svg)](https://github.com/flyingrobots/ledger-kernel/actions/workflows/docs.yml) [![Spec Version](https://img.shields.io/github/v/tag/flyingrobots/ledger-kernel?label=spec%20version)](https://github.com/flyingrobots/ledger-kernel/tags) **Git-native, cryptographically verifiable, append-only ledgers with policy enforcement.** @@ -12,15 +15,15 @@ **Ledger-Kernel** is a formal specification and reference implementation ([`libgitledger`](https://github.com/flyingrobots/libgitledger)) for building verifiable, append-only ledgers directly on top of Git’s object model. -Unlike blockchains or SaaS audit logs, **Ledger-Kernel is just Git**. +Unlike blockchains or SaaS audit logs, **Ledger-Kernel is just Git**.\ It adds deterministic replay, cryptographic attestation, and programmable policy enforcement without introducing new infrastructure. It uses existing `.git` storage, requiring no daemons or databases. It enforces fast-forward-only semantics to ensure history is immutable and guarantees deterministic replay, where identical input always yields identical state. Every entry is attested for non-repudiable authorship, and the system supports WASM-based policies for validation. -✅ **It's _just_ Git!** No custom storage, no daemons, just `.git`. -✅ Enforces fast-forward-only semantics. History is immutable by design. -✅ Provides deterministic replay. Same entries = same state, always. -✅ Cryptographically attests every entry. Non-repudiable authorship. +✅ **It's _just_ Git!** No custom storage, no daemons, just `.git`.\ +✅ Enforces fast-forward-only semantics. History is immutable by design.\ +✅ Provides deterministic replay. Same entries = same state, always.\ +✅ Cryptographically attests every entry. Non-repudiable authorship.\ ✅ Supports programmable policies. WASM-based rules for entry validation. ### Why Use It? @@ -77,7 +80,8 @@ The architecture is layered. The Kernel Spec defines the formal model and invari ## Core Invariants -Every compliant implementation **MUST** enforce: +Every compliant implementation **MUST** enforce:\ + | Invariant | Meaning | |---|---| | Append-Only | Entries cannot be modified or deleted | @@ -126,18 +130,52 @@ Documentation - How to run the harness (user‑facing): `docs/cli/harness.md`. - Implementers Guide (repo setup, CLI contract, CI snippets): `docs/implementation/implementers.md`. +## Documentation Quick Links + +Spec +- Overview: `docs/spec/overview.md` +- Model (hybrid with M‑x call‑outs): `docs/spec/model.md` +- Formal Spec (FS‑1..FS‑14): `docs/spec/formal-spec.md` +- Wire Format (JSON canonical profile + optional CBOR): `docs/spec/wire-format.md` +- Compliance (levels, checks C‑1..C‑5, report schema): `docs/spec/compliance.md` +- Deterministic WASM Policy ABI (FS‑9): `docs/spec/policy-wasm.md` +- Versioning & Releases: `docs/spec/versioning.md` + +CLI / DX +- Running the Compliance Harness: `docs/cli/harness.md` +- Implementers Guide: `docs/implementation/implementers.md` + +Schemas & Vectors +- Compliance Report Schema: `schemas/compliance_report.schema.json` +- Entry / Attestation / PolicyResult Schemas: `schemas/entry.schema.json`, `schemas/attestation.schema.json`, `schemas/policy_result.schema.json` +- Schema aliases (for convenience): `schemas/entry.json`, `schemas/attest.json`, `schemas/policy.json` +- Golden vector (JSON): `tests/vectors/core/entry_canonical.json` +- Canonicalization tools: Python `scripts/vectors/python/canon.py`, Rust `scripts/vectors/rust`, Go `scripts/vectors/go`, C blake3 `scripts/vectors/c/blake3_id.c` +- CBOR tools: Python `scripts/vectors/python/cbor_canon.py`, Go `scripts/vectors/go-cbor` + +Orchestrator & Linter +- Polyglot orchestrator (TOML → compliance.json): `scripts/harness/run.sh` (see `scripts/harness/README.md`) +- Sample config: `scripts/harness/config.sample.toml` +- Spec linter (id/signing/schema): `scripts/lint/spec_lint.py` + +CI +- JSON matrix (fail on divergence): `.github/workflows/vectors-matrix.yml` +- CBOR matrix: `.github/workflows/vectors-cbor.yml` + Reference implementation - Portable C reference: https://github.com/flyingrobots/libgitledger ## Quick Start 1. **Install libgitledger** + ```bash git clone https://github.com/flyingrobots/ledger-kernel cd ledger-kernel && make && sudo make install ``` 2. **Initialize a Ledger** + ```bash git init my-ledger cd my-ledger @@ -145,6 +183,7 @@ Reference implementation ``` 3. **Append an Entry** + ```bash git ledger append \   --ref refs/_ledger/prod/deploys \ @@ -153,6 +192,7 @@ Reference implementation ``` 4. **Replay & Verify** + ```bash git ledger replay  --ref refs/_ledger/prod/deploys git ledger verify  --ref refs/_ledger/prod/deploys @@ -201,10 +241,10 @@ git mind query "show me all TODO items" ## Security Model -**Traceability**: Every entry is cryptographically signed. -**Non-Repudiation**: Compliance proofs are emitted per operation. -**Monotonic Atomicity**: Ledger refs advance only by fast-forward. -**Programmable Authorization**: WASM policies act as rule gates. +**Traceability**: Every entry is cryptographically signed.\ +**Non-Repudiation**: Compliance proofs are emitted per operation.\ +**Monotonic Atomicity**: Ledger refs advance only by fast-forward.\ +**Programmable Authorization**: WASM policies act as rule gates.\ **Offline Verifiability**: Anyone with read access can replay history. --- @@ -227,23 +267,23 @@ Compliance levels progress from Core (eight mandatory invariants) to Verified (i --- -## Project Status +## Project Status ### v0.1.0 (Draft Specification) -The specification is finalized (✅). -The [`libgitledger`](https://github.com/flyingrobots/libgitledger) reference implementation and the compliance test suite are both in progress (🚧). +The specification is finalized (✅).\ +The [`libgitledger`](https://github.com/flyingrobots/libgitledger) reference implementation and the compliance test suite are both in progress (🚧).\ [Shiplog](https://github.com/flyingrobots/shiplog) integration using libgitledger and the WASM policy engine are planned for the future (🔜). --- ## Acknowledgments -This project acknowledges +This project acknowledges -Git ([Linus Torvalds](https://github.com/torvalds)) for the content-addressed DAG -[Certificate Transparency](https://certificate.transparency.dev/) for append-only logs -[Sigstore](https://www.sigstore.dev/) for supply-chain attestations +Git ([Linus Torvalds](https://github.com/torvalds)) for the content-addressed DAG\ +[Certificate Transparency](https://certificate.transparency.dev/) for append-only logs\ +[Sigstore](https://www.sigstore.dev/) for supply-chain attestations\ and [Nix](https://nixos.org/) for deterministic builds. --- @@ -276,8 +316,7 @@ Shiplog captures stdout, stderr, exit code, timestamp, author, and reason - the wesley - -Stop describing your data model six times in six different files. +Stop describing your data model six times in six different files.\ Everyone else generates GraphQL from databases. Wesley flips the stack and generates databases from GraphQL. From one schema, Wesley compiles your entire backend: @@ -289,7 +328,7 @@ From one schema, Wesley compiles your entire backend: - pgTAP tests - A SHA-locked "Shipme" certification file for zero-downtime deployments -Your schema is the source of truth. Everything else is a compilation target. +Your schema is the source of truth. Everything else is a compilation target.\ Banish drift. Never think about migrations again. Describe your shapes once and let Wesley handle the rest. _Go on, deploy on a Friday._ @@ -302,6 +341,7 @@ _Go on, deploy on a Friday._ git mind ingest notes/ git mind query "show me all TODO items" ``` + > _Version your thoughts. Branch your ideas. Merge understanding._ `git-mind` is an open-source protocol and toolkit that turns Git into a database-less, version-controlled semantic knowledge graph — a tool for distributed cognition, evolving interpretation, and human–AI co-thought. @@ -310,15 +350,15 @@ git mind query "show me all TODO items" ## Contact -**Author**: _J. Kirby Ross_ -**Email**: [james@flyingrobots.dev](mailto:james@flyingrobots.dev) +**Author**: _J. Kirby Ross_\ +**Email**: [james@flyingrobots.dev](mailto:james@flyingrobots.dev)\ **GitHub**: [flyingrobots](https://github.com/flyingrobots) --- ## License -MIT License (_with Ethical Use Clause_) · **© 2025 J. Kirby Ross** +MIT License (_with Ethical Use Clause_) · **© 2025 J. Kirby Ross**\ _See [`LICENSE`](./LICENSE) and [`NOTICE`](./NOTICE.md) for terms._ > _“Provenance without clutter. Policy as infrastructure. Zero SaaS, zero guesswork.”_ diff --git a/docs/.vitepress/config.ts b/docs/.vitepress/config.ts index bd293f7..2dba49a 100644 --- a/docs/.vitepress/config.ts +++ b/docs/.vitepress/config.ts @@ -8,6 +8,10 @@ export default withMermaid(defineConfig({ // Keep while we reshuffle content into the new IA ignoreDeadLinks: true, themeConfig: { + footer: { + message: 'Spec docs built with VitePress · Latest version: see GitHub tags', + copyright: '© 2025 Ledger‑Kernel authors' + }, nav: [ { text: 'Spec', link: '/spec/', activeMatch: '^/spec/' }, { text: 'Reference Implementation', link: '/implementation/', activeMatch: '^/implementation/' }, diff --git a/docs/compliance/index.md b/docs/compliance/index.md index 51c556b..72c8ccf 100644 --- a/docs/compliance/index.md +++ b/docs/compliance/index.md @@ -11,7 +11,7 @@ Version: 0.1.0 ## 1. Purpose -The compliance suite ensures that any implementation of the **Ledger-Kernel** (e.g., `libgitledger`, `ledger-core-rust`, `ledger-js`) adheres to the invariants, semantics, and deterministic behavior defined in [`SPEC`](../spec/) and [`MODEL`](../model/). +The compliance suite ensures that any implementation of the **Ledger-Kernel** (e.g., `libgitledger`, `ledger-core-rust`, `ledger-js`) adheres to the invariants, semantics, and deterministic behavior defined in the [Specification](/spec/) and [Model](/spec/model). A compliant implementation must **pass all mandatory tests** and **expose proofs or logs** demonstrating correctness. @@ -123,10 +123,10 @@ $ make determinism Implementations must emit standardized error codes: -| **Code** | **Name** | **Meaning** | -|----------|----------|-------------| +| Code | Name | Meaning | +|------|------|---------| | `E_APPEND_REJECTED` | Append Rejected | Append violates invariants | -| `E_SIG_INVALID` | Invalid Signature | Attestation verification failed | +| `E_SIG_INVALID` | Signature Invalid | Attestation verification failed | | `E_POLICY_FAIL` | Policy Failed | Policy evaluation false | | `E_REPLAY_MISMATCH` | Replay Mismatch | Non-deterministic replay | | `E_TEMPORAL_ORDER` | Temporal Order | Timestamp regression | @@ -138,12 +138,12 @@ This allows cross-implementation comparison of failure semantics. ## 8. Compliance Scoring -| **Level** | **Requirements** | -|-------|--------------| -| Core | Pass 01-08 tests | -| Extended | Pass multi-sig, policy composition, and replay-failure tests | -| Certified | Provide reproducible proofs and determinism audit logs | -| Verified | Independently reviewed and cryptographically attested results | +| Level | Requirements | +|-------|-------------| +| Core | Pass 01-08 tests | +| Extended | Pass multi-sig, policy composition, and replay-failure tests | +| Certified | Provide reproducible proofs and determinism audit logs | +| Verified | Independently reviewed and cryptographically attested results | Implementations **MAY** publish a signed compliance report under: @@ -202,7 +202,6 @@ The report includes version, platform, test summary, and digests. --- ## 12. Future Work - - Property-based generator for randomized append/replay sequences - Integration with CI/CD to auto-validate pull requests - Optional differential testing between implementations diff --git a/docs/implementation/errors.md b/docs/implementation/errors.md index 30ba60d..620b019 100644 --- a/docs/implementation/errors.md +++ b/docs/implementation/errors.md @@ -6,3 +6,6 @@ title: Error Model WIP. +::: tip See also +For user-level execution of the compliance suite, see [Running the Compliance Harness](/cli/harness). +::: diff --git a/docs/implementation/implementers.md b/docs/implementation/implementers.md index 291de64..452e419 100644 --- a/docs/implementation/implementers.md +++ b/docs/implementation/implementers.md @@ -2,32 +2,44 @@ title: Implementers Guide --- -# Implementers Guide +# Implementer's Guide -This page summarizes how to implement the Ledger‑Kernel spec and prove conformance with the compliance harness. It is language‑agnostic: any CLI that follows the contract below can integrate. +This page summarizes how to implement the **Ledger‑Kernel** spec and prove conformance with the compliance harness. It is language‑agnostic: any CLI that follows the contract below can integrate. ## 1. What you need to implement -- Model & invariants: see [Model](/spec/model) (M‑1 … M‑9) and [Formal Spec](/spec/formal-spec) (FS‑1 … FS‑14). -- Wire encodings: see [Wire Format](/spec/wire-format) (canonical JSON, hashing/signing, trailers) and the JSON Schemas under `schemas/`. -- Compliance report: your CLI should emit a `compliance.json` that validates against `schemas/compliance_report.schema.json`. +### 1.1. Model & invariants + +See [Model](/spec/model) (M‑1 … M‑9) and [Formal Spec](/spec/formal-spec) (FS‑1 … FS‑14). + +### 1.2. Wire encodings + +See [Wire Format](/spec/wire-format) (canonical JSON, hashing/signing, trailers) and the JSON Schemas under `schemas/`. + +### 1.3. Compliance report + +Your CLI should emit a `compliance.json` that validates against [`schemas/compliance_report.schema.json`](https://github.com/flyingrobots/ledger-kernel/blob/main/schemas/compliance_report.schema.json). ## 2. Recommended repo setup -Option A — Submodule the spec (recommended) +### Option A — Submodule the spec (recommended) + 1) In your implementation repo (e.g., Rust, C, Go): + ```bash git submodule add -b main https://github.com/flyingrobots/ledger-kernel external/ledger-kernel git submodule update --init --recursive ``` + 2) In CI and locally, reference schemas and docs from `external/ledger-kernel/`. -Option B — Vendor a release tarball +### Option B — Vendor a release tarball + - Download a tagged release of this repo in CI; extract `schemas/` and (optionally) test vectors. ## 3. CLI contract (language‑agnostic) -Your CLI SHOULD expose a compliance mode that emits the standard report: +Your CLI **SHOULD** expose a compliance mode that emits the standard report: ```bash your-cli verify --compliance \ @@ -36,26 +48,25 @@ your-cli verify --compliance \ [--schema external/ledger-kernel/schemas/compliance_report.schema.json] ``` -Status values MUST be exactly: `PASS` | `PARTIAL` | `FAIL` | `N/A`. +Status values **MUST** be exactly: `PASS` | `PARTIAL` | `FAIL` | `N/A`. Minimum checks to implement (see [Compliance](/spec/compliance)): -- C‑1 → FS‑10: canonicalize known JSON → `id` = expected BLAKE3‑256 -- C‑2 → FS‑7, FS‑8: reject non‑FF ref updates; ref unchanged -- C‑3 → FS‑11: reject timestamp earlier than parent -- C‑4 → FS‑3, FS‑9: deterministic policy evaluation → same result across runs -- C‑5 → FS‑6: offline verify of a small ledger → PASS -If your runtime lacks a feature (e.g., WASM), mark the corresponding checks `N/A` and compute level verdicts accordingly. +- [ ] C‑1 → FS‑10: canonicalize known JSON → `id` = expected BLAKE3‑256 +- [ ] C‑2 → FS‑7, FS‑8: reject non‑FF ref updates; ref unchanged +- [ ] C‑3 → FS‑11: reject timestamp earlier than parent +- [ ] C‑4 → FS‑3, FS‑9: deterministic policy evaluation → same result across runs +- [ ] C‑5 → FS‑6: offline verify of a small ledger → PASS -## 4. Compliance report shape +> [!NOTE]\ +> If your runtime lacks a feature (e.g., WASM), mark the corresponding checks `N/A` and compute level verdicts accordingly. -The report MUST validate against the schema: +## 4. Compliance report shape -``` -schemas/compliance_report.schema.json -``` +The report **MUST** validate against the [schema](https://github.com/flyingrobots/ledger-kernel/blob/main/schemas/compliance_report.schema.json). Example: + ```json { "implementation": "my-impl", @@ -83,7 +94,8 @@ These templates are illustrative and not required; the normative artifact is the ## 7. CI examples -Rust (cargo): +### Rust (cargo) + ```yaml steps: - uses: actions/checkout@v4 @@ -94,7 +106,8 @@ steps: - run: jq -e '.summary.core=="PASS"' compliance.json ``` -C/CMake: +### C/CMake + ```yaml steps: - uses: actions/checkout@v4 @@ -108,5 +121,4 @@ steps: ## 8. Example implementation -See the reference implementation in C: https://github.com/flyingrobots/libgitledger - +See the reference implementation in C, at [libgitledger](https://github.com/flyingrobots/libgitledger). diff --git a/docs/spec/formal-spec.md b/docs/spec/formal-spec.md index 813e5f3..7d7382c 100644 --- a/docs/spec/formal-spec.md +++ b/docs/spec/formal-spec.md @@ -8,6 +8,7 @@ This section defines the normative, testable rules for Ledger‑Kernel. Clauses ## 1. Data Structures + ### FS‑1 Entry (Abstract) An Entry SHALL be an immutable record addressable by a collision‑resistant content hash. Conceptually, an Entry comprises: @@ -19,6 +20,7 @@ An Entry SHALL be an immutable record addressable by a collision‑resistant con An Entry MUST serialize deterministically (FS‑10) and be committed as a conventional Git commit under a namespaced ref (see FS‑8, FS‑12). + ### FS‑2 Attestation (Abstract) An Attestation SHALL bind a signer identity to an Entry’s content hash using a specified algorithm. At minimum it MUST include: @@ -30,6 +32,7 @@ An Attestation SHALL bind a signer identity to an Entry’s content hash using a Verification MUST succeed with repository‑local material (FS‑6). + ### FS‑3 Policy Result (Abstract) Policy evaluation SHALL be deterministic and side‑effect free. Its result is a boolean decision and MAY include structured diagnostics: @@ -40,6 +43,7 @@ Policy engines MUST NOT read clocks, randomness, network, or ambient I/O (FS‑9 ## 2. Operations + ### FS‑4 append(R, L, E) To append, an implementation MUST: @@ -51,37 +55,47 @@ To append, an implementation MUST: If any step fails, the implementation MUST NOT advance the ref and MUST return an error. + ### FS‑5 replay(R, L) Replay SHALL fold the deterministic transition function over the ordered Entries (M‑2) from the genesis to head, producing `state(L)`. Replay MUST NOT consult external state. + ### FS‑6 verify(R, L) Verification SHALL check the entire history of `L` for conformity: ordering, fast‑forward ref evolution, canonical hashes, required attestations, deterministic policy acceptance, and deterministic replay equivalence. Verification MUST be possible using repository‑local data only (no network). ## 3. Determinism & Constraints + ### FS‑7 Append‑Only & Total Order The set of Entries under the ledger ref SHALL form a single linear chain with no merges or parallel branches. Once committed, Entries MUST NOT be modified or deleted. + ### FS‑8 Fast‑Forward Only (Ref Semantics) The ledger ref `p` SHALL advance only by fast‑forward such that `Parent(Eᵢ₊₁) = Hash(Eᵢ)`. Non‑FF updates (rebase, force‑push, merge) are forbidden. + ### FS‑9 Policy Determinism Policy engines SHALL be deterministic. They MUST NOT access monotonic or wall‑clock time, random number generators, network, filesystem, or environment state except for explicitly whitelisted, immutable inputs provided by the host. Given identical inputs, evaluation MUST yield identical outputs. + ### FS‑10 Canonical Serialization & Hashing Entries and Attestations SHALL serialize to a canonical byte sequence unambiguously (e.g., stable field order, UTF‑8, normalized newlines). The content hash used for identity and signatures MUST be computed over this canonical form. The canonicalization procedure MUST be documented and stable. + ### FS‑11 Temporal Monotonicity Entry timestamps SHALL be monotonically non‑decreasing relative to their parent. Implementations MAY enforce stricter monotonicity policies via the policy engine. + ### FS‑12 Namespaces & Storage Ledger data SHALL be stored under a dedicated ref namespace (e.g., `refs/_ledger/`). Attestations and policies MAY use auxiliary refs under the same namespace as defined in Wire Format. Implementations MUST NOT rely on non‑Git storage to satisfy core verification (FS‑6). ## 4. Errors & Reporting + ### FS‑13 Error Domains Implementations SHOULD expose structured error domains: `parse`, `canonicalize`, `hash`, `attestation`, `policy`, `ordering`, `ff`, `io`, `internal`. + ### FS‑14 Diagnostics Verification and append failures SHOULD include machine‑readable diagnostics sufficient to reproduce the decision offline. @@ -93,6 +107,22 @@ Verification and append failures SHOULD include machine‑readable diagnostics s - FS‑6 substantiates M‑8 (offline verification). ::: +## 5.1 FS↔M Mapping + +The following table links Model clauses (M‑x) to the normative Formal Spec clauses (FS‑x): + +| Model (M‑x) | Summary | Formal Spec (FS‑x) | +| --- | --- | --- | +| M‑1 | Ledger structure (L = (p, E, A, P)) | [FS‑1](#fs-1), [FS‑2](#fs-2), [FS‑3](#fs-3) | +| M‑2 | State and transition, replay as fold | [FS‑5](#fs-5), [FS‑10](#fs-10) | +| M‑3 | Admission predicate V(L,S,E) | [FS‑3](#fs-3), [FS‑4](#fs-4) | +| M‑4 | Append‑only | [FS‑7](#fs-7) | +| M‑5 | Fast‑forward only | [FS‑8](#fs-8) | +| M‑6 | Total order (single chain) | [FS‑7](#fs-7), [FS‑8](#fs-8) | +| M‑7 | Deterministic replay | [FS‑5](#fs-5), [FS‑9](#fs-9), [FS‑10](#fs-10) | +| M‑8 | Offline verify | [FS‑6](#fs-6) | +| M‑9 | Policy determinism | [FS‑3](#fs-3), [FS‑9](#fs-9) | + ## 6. References - [Model](/spec/model) diff --git a/docs/spec/model-source.md b/docs/spec/model-source.md index a799f9e..a9f990b 100644 --- a/docs/spec/model-source.md +++ b/docs/spec/model-source.md @@ -1,12 +1,143 @@ --- -title: Model Source (Unannotated) +Author: J. Kirby Ross (https://github.com/flyingrobots) +Created: 2025-10-27 +License: MIT +Scope: This document defines the minimal, requisite semantics and invariants that a Git-native ledger implementation must satisfy to be considered compliant. +Status: Draft +Summary: Defines the invariants, operations, and compliance requirements for a Git-native append-only ledger. +Version: 0.1.0 --- -# Model Source (Unannotated) +# **A Formal State-Transition Model for a Git-Native Verifiable Ledger** -This page is auto‑generated from the repository‑root `MODEL.md` during the docs build to preserve the canonical, unannotated text. +## Abstract -> The curated specification lives at [/spec/model](/spec/model). This page mirrors the raw source for reference. +We present a formal state-transition model for a verifiable ledger kernel operating natively on a Git-based Directed Acyclic Graph (DAG). The model defines a ledger as a totally ordered sequence of entries, where each entry represents an atomic state transition. We formalize the system's core components: a pure state transition function $\mathcal{T}$, a constraint-based policy engine $\mathcal{P}$, and a cryptographic attestation mechanism $\mathcal{A}$. The central thesis of our model is the guarantee of **deterministic replayability**, where the ledger's final state is a pure function of its entry-set. This formalism provides a verifiable foundation for trusted, distributed systems, such as software supply chain attestation or decentralized registries. - +--- + +## 1. Introduction + +The Git object model, fundamentally a content-addressed Directed Acyclic Graph (DAG), provides a robust mechanism for tracking provenance. However, its inherent support for branching and non-linear histories complicates its use as a linear, append-only ledger. This paper introduces a formal model that superimposes a **totally ordered state machine** onto the Git DAG. We achieve this by constraining a specific Git reference (ref) to a fast-forward-only commit history, where each commit constitutes a **ledger entry** ($\mathcal{E}$). This model establishes the semantic bridge between low-level Git objects and high-level, verifiable ledger state transitions, enabling deterministic replay and cryptographic verification of the ledger's history and state. + +--- + +## 2. Formal Model Definition + +Let a Git repository be a tuple $\mathcal{R} = (\mathcal{O}, \mathcal{R_{efs}})$, where $\mathcal{O}$ is a content-addressed object store (a set of Git objects) and $\mathcal{R_{efs}}$ is a mapping from reference paths to commit identifiers (hashes). + +We define a **Ledger Entry**, $\mathcal{E}$, as a commit object $C \in \mathcal{O}$ that adheres to a specific data schema (e.g., contains `/_ledger/entry.json` and associated attestations). + +We define a Ledger, $\mathcal{L}$, as a tuple: + +$$ +\mathcal{L} = (p, \mathbf{E}, \mathcal{A}, \mathcal{P}) +$$ + +where: + +- $p$ is the persistent reference path (e.g., `refs/heads/main-ledger`) in $\mathcal{R_{efs}}$. +- $\mathbf{E} = \langle \mathcal{E}_0, \mathcal{E}_1, \dots, \mathcal{E}_n \rangle$ is a **totally ordered sequence** of ledger entries. This ordering is strictly enforced by the commit ancestry relation under $p$, s.t. $\text{Parent}(\mathcal{E}_{i+1}) = \text{Hash}(\mathcal{E}_i)$. +- $\mathcal{A}$ is the set of all attestations, where each $\mathcal{A}_k \in \mathcal{A}$ is cryptographically bound to a specific entry $\mathcal{E}_i \in \mathbf{E}$. +- $\mathcal{P}$ is a set of policies applicable to $\mathcal{L}$. + +The _head_ of the ledger $\mathcal{L}$ corresponds to the commit hash $\text{Hash}(\mathcal{E}_n)$, which is the value of $\mathcal{R_{efs}}[p]$. + +--- + +## 3. The State Transition System + +The ledger's semantics are defined by a deterministic state transition system. + +Let $\mathcal{S}$ be the set of all possible ledger states. We define the initial state as the empty set: $\mathcal{S}_0 = \emptyset$. + +We define a pure, deterministic state transition function $\mathcal{T}$: + +$$ +\mathcal{T} : \mathcal{S} \times \mathcal{E} \to \mathcal{S} +$$ + +Given a current state $\mathcal{S}_i$ (derived from entry $\mathcal{E}_i$), the subsequent state $\mathcal{S}_{i+1}$ is produced by applying the next entry $\mathcal{E}_{i+1}$: + +$$ +\mathcal{S}_{i+1} = \mathcal{T}(\mathcal{S}_i, \mathcal{E}_{i+1}) +$$ + +The function $\mathcal{T}$ must be **deterministic** and **pure**; it must produce an identical output state $\mathcal{S}_{i+1}$ given identical inputs $(\mathcal{S}_i, \mathcal{E}_{i+1})$, with no reliance on external I/O, network state, or stochastic processes. + +### 3.1. State Re-computation (Replay) + +The complete state $\mathcal{S}_n$ of a ledger $\mathcal{L}$ with $n$ entries is the result of a functional fold (or reduction) over the entry sequence $\mathbf{E}$: + +$$ +\mathcal{S}_n = \text{foldl}(\mathcal{T}, \mathcal{S}_0, \mathbf{E}) +$$ + +Recursively, this is defined as: + +- $\text{Replay}(\langle \rangle) = \mathcal{S}_0$ +- $\text{Replay}(\langle \mathcal{E}_0, \dots, \mathcal{E}_i \rangle) = \mathcal{T}(\text{Replay}(\langle \mathcal{E}_0, \dots, \mathcal{E}_{i-1} \rangle), \mathcal{E}_i)$ + +This property is the foundation of the system's verifiability. + +--- + +## 4. Transition Validity + +Important: For a new entry $\mathcal{E}_{i+1}$ to be appended to the ledger at state $\mathcal{S}_i$, a global validity predicate $\mathcal{V}$ must evaluate to true. + +$$ +\mathcal{V}(\mathcal{E}_{i+1}, \mathcal{E}_i, \mathcal{S}_i, \mathcal{P}) \to \{\text{true}, \text{false}\} +$$ + +The predicate $\mathcal{V}$ is the logical conjunction of the following constraints: + +1. Ancestry Constraint: The entry must maintain the fast-forward chain. + +$$ +\text{ParentHash}(\mathcal{E}_{i+1}) \equiv \text{Hash}(\mathcal{E}_i) +$$ + +2. Temporal Monotonicity: The entry's timestamp must be non-decreasing. + +$$ +\text{Timestamp}(\mathcal{E}_{i+1}) \geq \text{Timestamp}(\mathcal{E}_i) +$$ + +3. Policy Adherence: The entry must satisfy all active policies $\mathcal{P}_k \in \mathcal{P}$, evaluated against the current state $\mathcal{S}_i$. (See §5). + +$$ +\mathcal{P}_{\text{all}}(\mathcal{E}, \mathcal{S}) = \bigwedge_{k \in \mathcal{P}} \mathcal{P}_k(\mathcal{E}, \mathcal{S}) \equiv \text{true} +$$ + +4. Attestation Validity: All attestations $\mathcal{A}_k$ attached to $\mathcal{E}_{i+1}$ must be cryptographically valid. (See §6). + +$$ +\forall \mathcal{A}_k:\; \mathcal{V}_{\text{attest}}\!\left(\mathcal{A}_k,\; \text{Hash}(\mathcal{E}_{i+1})\right) \equiv \text{true} +$$ + +If $\mathcal{V}$ fails, the transition is rejected, and the entry $\mathcal{E}_{i+1}$ is not appended to the ledger $\mathcal{L}$. + +--- + +### **5. Policy as a State Constraint** + +A policy is a pure function (a predicate) that constrains valid transitions. + +Policies are executed _before_ the state transition function $\mathcal{T}$ is applied. They are evaluated using the candidate entry ($\mathcal{E}_{i+1}$) and the previous state ($\mathcal{S}_i$). + +Policies are composable, typically via logical conjunction: + +$$ +\mathcal{P}_{\text{all}}(\mathcal{E}, \mathcal{S}) = \bigwedge_{k \in \mathcal{P}} \mathcal{P}_k(\mathcal{E}, \mathcal{S}) \equiv \text{true} +$$ + +--- + +### **6. Attestation Model** + +An attestation $\mathcal{A}$ provides a non-repudiable cryptographic binding between an external identity (signer) and a specific ledger entry $\mathcal{E}$. + +Let $\mathcal{A}$ be a tuple $\mathcal{A} = (\text{signer\_id}, \sigma)$, where $\sigma$ is a digital signature. +The verification function $\mathcal{V}_{\text{attest}}$ (informally) checks that each attestation’s signature verifies against the entry hash with the signer’s public key. diff --git a/docs/spec/model.md b/docs/spec/model.md index 6d5fefa..c3c04ad 100644 --- a/docs/spec/model.md +++ b/docs/spec/model.md @@ -10,6 +10,7 @@ This page formalizes the ledger as a deterministic state machine over Git commit ## 1. Ledger Objects and Notation + ::: info M‑1 (Ledger Structure) A ledger L SHALL be the tuple (p, E, A, P), where p is a Git ref, E is a totally ordered sequence of entries (Git commits under p), A is the set of attestations bound to entries, and P is the active policy set. Each entry MUST be a conventional Git commit recorded under p. See Formal Spec (FS‑1…FS‑3). ::: @@ -26,6 +27,7 @@ The head of L is `Hash(Eₙ) = R[p]`. ## 2. State and Transition Function + ::: info M‑2 (State and Transition) The ledger state space S SHALL admit a pure, deterministic transition function `T: S × Entry → S` with `S₀ = ∅` and `Sₙ = foldl(T, S₀, E)`. ::: @@ -38,6 +40,7 @@ Intuition: given a current state `Sᵢ` and the next entry `Eᵢ₊₁`, produce ## 3. Admission and Validity + ::: info M‑3 (Admission Predicate) An entry `Eᵢ` is admissible at state `Sᵢ` iff `V(L,Sᵢ,Eᵢ) = true`, where V includes: - fast‑forward update of ref `p` (no rebase/merge under `p`), @@ -49,14 +52,17 @@ If V fails, `Eᵢ` MUST NOT advance `p` and MUST be rejected. ## 4. Invariants + ::: info M‑4 (Append‑Only) Once recorded, entries MUST NOT be modified or deleted. ::: + ::: info M‑5 (Fast‑Forward Only) The ref `p` MUST only advance by fast‑forward: `Parent(Eᵢ₊₁) = Hash(Eᵢ)`. No rebases, merges, or non‑linear histories under `p`. ::: + ::: info M‑6 (Total Order) `E` MUST be a single linear chain (no parallel branches or merge commits under `p`). ::: @@ -65,6 +71,7 @@ These invariants ensure a unique, monotonic history suitable for replay. ## 5. Deterministic Replay + ::: info M‑7 (Deterministic Replay) For any ledgers with identical `E` and identical applicable `P` and encodings, replay SHALL produce identical final states: `E¹ = E² ⇒ foldl(T, S₀, E¹) = foldl(T, S₀, E²)`. @@ -81,12 +88,14 @@ Why it holds: - Policy engine evaluations MUST be deterministic (e.g., sandboxed WASM with fixed inputs only and no ambient time/IO). - Attestations are verified from repository content; network access is not required. + ::: info M‑9 (Policy Determinism) Policy evaluation MUST be deterministic (e.g., WASM sandbox with fixed inputs and no ambient time). See FS‑3. ::: ## 7. Conformance (Verification Without Network) + ::: info M‑8 (Offline Verify) Implementations MUST verify and replay using repository data alone; network access SHALL NOT be required for core verification. See FS‑6. ::: @@ -95,4 +104,3 @@ Implementations MUST verify and replay using repository data alone; network acce - [Formal Spec](/spec/formal-spec) — numbered clauses FS‑1..N (data structures, operations, constraints) - [Wire Format](/spec/wire-format) — JSON schemas, attestation encodings, canonical serialization rules - diff --git a/docs/spec/policy-wasm.md b/docs/spec/policy-wasm.md new file mode 100644 index 0000000..2ffb4e1 --- /dev/null +++ b/docs/spec/policy-wasm.md @@ -0,0 +1,65 @@ +--- +title: Deterministic WASM Policy ABI +--- + +# Deterministic WASM Policy ABI (FS‑9) + +This document specifies a minimal, deterministic host ABI for policy evaluation. It complements FS‑9 and the Compliance Policy/WASM level. + +## 1. Goal + +Provide a portable, deterministic interface for evaluating a policy over a candidate Entry and the previous state, returning a boolean decision and optional diagnostics. + +## 2. Determinism Requirements + +- Ambient time (wall‑clock/monotonic clocks) is forbidden. +- Randomness (RNG imports or entropy sources) is forbidden. +- I/O (filesystem, network, environment variables, process APIs) is forbidden. +- Resource limits: host MUST enforce fuel/step limits and a memory cap (e.g., 32–64 MiB) to prevent non‑termination. + +## 3. Module Exports + +The module SHALL export a single function: + +```text +// Returns 1 (true) or 0 (false). Diagnostics are written to the out buffer. +// Pseudocode ABI; wire details (offsets/lengths) are host‑defined but MUST be +// documented and deterministic. +fn validate(entry_ptr: u32, entry_len: u32, + state_ptr: u32, state_len: u32, + out_ptr: u32, out_len_ptr: u32) -> u32 +``` + +Inputs +- `entry` — canonical JSON bytes of the candidate Entry (see Wire Format). +- `state` — implementation‑defined, deterministic snapshot (JSON or empty for minimal hosts). + +Outputs +- Return value — 1 = accepted, 0 = rejected. +- Diagnostics — UTF‑8 string (implementation‑defined), copied into `out` by the host after the call. + +## 4. Allowed Imports + +The module MUST NOT import any function other than the host allocator glue (e.g., `canonical_abi_realloc` for component model) and a bounded logging function if provided: + +```text +// Optional, bounded logging (may be stubbed by host) +fn log(ptr: u32, len: u32) +``` + +If `log` is present, hosts MUST cap message size and rate; logs MUST NOT influence the decision outcome. + +## 5. Example Policy + +An example policy can be compiled from a tiny Rust/Go/AssemblyScript module that: +- Parses `entry` JSON, +- Checks required fields or enforces a simple rule (e.g., author id allow‑list), +- Returns 1/0 with a short diagnostic message. + +## 6. Compliance + +For the WASM level, the harness: +- invokes the same policy twice with identical inputs, +- expects identical return values and identical diagnostics. + +Hosts MUST document memory/fuel limits and any optional imports. Any deviation MUST be reflected in FS‑9 profiles in future versions. diff --git a/docs/spec/versioning.md b/docs/spec/versioning.md new file mode 100644 index 0000000..f9e0683 --- /dev/null +++ b/docs/spec/versioning.md @@ -0,0 +1,43 @@ +--- +title: Versioning & Releases +--- + +# Versioning & Releases + +This document defines how the Ledger‑Kernel spec, schemas, and vectors are versioned and released. + +## 1. Semantic Versioning + +The spec uses SemVer: `MAJOR.MINOR.PATCH`. + +- MAJOR — Breaking changes to normative clauses (FS/M) that alter conformance semantics or the data model. +- MINOR — Additive features: new clauses, optional checks, additional profiles (e.g., CBOR), new informative sections. +- PATCH — Clarifications, typos, non‑normative doc changes, additional vectors that do not change semantics. + +## 2. v0.1.0 (initial tag) + +Scope: +- Model (M‑1…M‑9), Formal Spec (FS‑1…FS‑14) +- Wire Format (JSON canonical profile); optional CBOR profile (cbor1) +- Compliance (levels Core/Policy/WASM; checks C‑1…C‑5; report schema) +- Schemas: entry, attestation, policy_result, compliance_report +- Vectors: canonicalization tools (Py/Rust/Go) and a golden entry vector; CI matrices + +Artifacts: +- Tag `v0.1.0` +- `schemas/` as released assets +- `tests/vectors/` + `scripts/vectors/` tools +- `scripts/harness/` orchestrator +- CHANGELOG.md + +## 3. Pinning Guidance for Implementations + +- Add this repo as a submodule at `external/ledger-kernel` pinned to a tag (e.g., v0.1.0). +- Reference schemas from the submodule path. +- Run vector CI (JSON and, if applicable, CBOR) to catch canonicalization drift. + +## 4. Migration Policy + +- MINOR bumps: re-run suites; implement new optional checks when feasible. +- MAJOR bumps: a migration table will map old FS/M to new clauses; new vectors will be published; dual‑format periods (e.g., JSON↔CBOR) will be specified with profile flags (e.g., `LK-Profile`). + diff --git a/docs/spec/wire-format.md b/docs/spec/wire-format.md index fec4bca..a5e444b 100644 --- a/docs/spec/wire-format.md +++ b/docs/spec/wire-format.md @@ -155,3 +155,15 @@ LK-SHA256: 9a7c… - `schemas/policy_result.schema.json` — Policy evaluation result These schemas are informative; the canonicalization and hashing rules above are normative. + +### CBOR Canonical Profile (Optional) + +Implementations MAY opt into a CBOR Canonical Encoding profile (RFC 8949 §4.2) for the id preimage. In this profile: + +- The preimage is the canonical CBOR encoding of the Entry object with `attestations` omitted. +- The identifier `id_cbor` is defined as BLAKE3‑256 over the CBOR canonical bytes. +- JSON and CBOR preimages produce different byte sequences; ids therefore differ. Mixed mode MUST NOT be used within a single ledger without an explicit migration. + +Dual‑format period +- Hosts MAY accept either JSON or CBOR preimages when verifying historical entries during a migration window. New entries SHOULD use one format exclusively per ledger namespace. +- Implementations MUST document which profile is active and SHOULD record a trailer (e.g., `LK-Profile: json1|cbor1`). diff --git a/scripts/harness/README.md b/scripts/harness/README.md new file mode 100644 index 0000000..1275286 --- /dev/null +++ b/scripts/harness/README.md @@ -0,0 +1,48 @@ +# Minimal Polyglot Compliance Orchestrator + +This orchestrator runs implementation‑specific checks (C‑1..C‑5), aggregates results, and emits a standards‑conformant `compliance.json` report. + +Features +- Configured via TOML (checks, clauses, command lines, levels, timeouts) +- Timeout + simple sandboxing (`ulimit -c 0`) +- Validates the final report against `schemas/compliance_report.schema.json` (best‑effort: Python jsonschema if available, otherwise jq structural checks) + +Usage + +```bash +scripts/harness/run.sh \ + --config scripts/harness/config.sample.toml \ + --output compliance.json \ + --schema schemas/compliance_report.schema.json \ + --level core # or: policy | wasm | all +``` + +Config (TOML) + +```toml +[orchestrator] +implementation = "example-impl" +version = "0.1.0" +timeout_sec = 30 + +[checks.C-1] +clause = ["FS-10"] +level = "core" +cmd = "your-cli canonicalize --input tests/vectors/core/entry.json --print-id" + +[checks.C-2] +clause = ["FS-7","FS-8"] +level = "core" +cmd = "your-cli append --non-ff; test $? -ne 0" +``` + +Exit codes → status mapping +- 0 → PASS +- 64 → PARTIAL +- 124 → FAIL (timeout) +- anything else → FAIL + +Notes +- The orchestrator is language‑agnostic: each check is just a shell command. +- Add more checks or levels by extending the TOML `checks.*` sections. +- For robust schema validation, install Python `jsonschema` or use `ajv` and adapt the script. diff --git a/scripts/harness/config.sample.toml b/scripts/harness/config.sample.toml new file mode 100644 index 0000000..e2671b7 --- /dev/null +++ b/scripts/harness/config.sample.toml @@ -0,0 +1,32 @@ +[orchestrator] +implementation = "example-impl" +version = "0.1.0" +timeout_sec = 20 + +# Core checks --------------------------------------------------- +[checks.C-1] +clause = ["FS-10"] +level = "core" +cmd = "echo canonicalization-ok" + +[checks.C-2] +clause = ["FS-7","FS-8"] +level = "core" +cmd = "bash -c 'exit 0'" + +[checks.C-3] +clause = ["FS-11"] +level = "core" +cmd = "bash -c 'exit 0'" + +# Policy / WASM checks (optional) ------------------------------- +[checks.C-4] +clause = ["FS-3","FS-9"] +level = "policy" +timeout_sec = 10 +cmd = "bash -c 'exit 64'" # simulate PARTIAL + +[checks.C-5] +clause = ["FS-6"] +level = "policy" +cmd = "bash -c 'exit 0'" diff --git a/scripts/harness/run.sh b/scripts/harness/run.sh new file mode 100644 index 0000000..88561e5 --- /dev/null +++ b/scripts/harness/run.sh @@ -0,0 +1,258 @@ +#!/usr/bin/env bash +# Minimal polyglot compliance orchestrator +# - Reads a TOML config describing checks (C-1..C-5), clauses, and CLI commands +# - Runs each check with timeout/sandboxing +# - Builds a consolidated compliance.json +# - Validates report against schemas/compliance_report.schema.json (best-effort) + +set -euo pipefail + +HERE="$(cd "$(dirname "$0")" && pwd)" +ROOT="$(cd "$HERE/../.." && pwd)" + +CONF="$HERE/config.sample.toml" +OUT_JSON="$PWD/compliance.json" +SCHEMA_DEFAULT="$ROOT/schemas/compliance_report.schema.json" +SCHEMA="$SCHEMA_DEFAULT" +LEVEL="all" # core|policy|wasm|all +TIMEOUT_DEFAULT=30 + +usage() { + cat <&2; usage; exit 2;; + esac +done + +command -v jq >/dev/null 2>&1 || { echo "ERROR: jq is required" >&2; exit 2; } + +if [[ ! -f "$CONF" ]]; then + echo "ERROR: config file not found: $CONF" >&2 + exit 2 +fi + +# --- Tiny TOML reader for expected shape --- +# Supports: +# [orchestrator]\nkey = "value"\n +# [checks.C-1]\nclause = ["FS-10"]\ncmd = "..."\nlevel = "core"\ntimeout = 30 + +impl_name="" +impl_version="" +global_timeout=$TIMEOUT_DEFAULT + +mapfile -t raw_lines < <(sed -e 's/#.*$//' -e 's/[\r\t]//g' "$CONF" | awk 'NF') + +current_section="" +declare -A check_cmd +declare -A check_clause +declare -A check_level +declare -A check_timeout +declare -a check_order + +for line in "${raw_lines[@]}"; do + if [[ "$line" =~ ^\[(.+)\]$ ]]; then + current_section="${BASH_REMATCH[1]}" + if [[ "$current_section" == checks.* ]]; then + cid="${current_section#checks.}" + # record order once + if [[ " ${check_order[*]} " != *" $cid "* ]]; then + check_order+=("$cid") + fi + fi + continue + fi + key="${line%%=*}"; key="${key// /}" + val="${line#*=}" + val="${val## }"; val="${val%% }" + # strip surrounding quotes for simple strings + if [[ "$val" =~ ^"(.*)"$ ]]; then val="${BASH_REMATCH[1]}"; fi + + case "$current_section" in + orchestrator) + case "$key" in + implementation) impl_name="$val";; + version) impl_version="$val";; + timeout_sec) global_timeout="$val";; + esac + ;; + checks.*) + cid="${current_section#checks.}" + case "$key" in + cmd) check_cmd["$cid"]="$val";; + level) check_level["$cid"]="$val";; + timeout|timeout_sec) check_timeout["$cid"]="$val";; + clause) + # normalize array of strings: ["FS-7","FS-8"] -> FS-7,FS-8 + arr="$val" + arr="${arr#[}" + arr="${arr%]}" + arr="${arr//\"/}" + arr="${arr// /}" + check_clause["$cid"]="$arr" + ;; + esac + ;; + esac +done + +if [[ -z "$impl_name" ]]; then impl_name="unknown"; fi +if [[ -z "$impl_version" ]]; then impl_version="0.0.0"; fi + +# --- Execute checks --- + +tmpdir="$(mktemp -d)"; trap 'rm -rf "$tmpdir"' EXIT +results_json="$tmpdir/results.json" +echo '[]' > "$results_json" + +status_to_json() { + case "$1" in + PASS|PARTIAL|FAIL|N/A) echo "$1";; + 0) echo PASS;; + 64) echo PARTIAL;; + 124) echo FAIL;; + *) echo FAIL;; + esac +} + +append_result() { + local id="$1"; shift + local clauses_csv="$1"; shift + local status="$1"; shift + local notes="$1"; shift || true + # convert clauses_csv -> JSON array + local clauses_json + if [[ -z "$clauses_csv" ]]; then + clauses_json='[]' + else + clauses_json="[\"${clauses_csv//,/\",\"}\"]" + fi + jq --arg id "$id" \ + --arg status "$status" \ + --arg notes "$notes" \ + --argjson clauses "$clauses_json" \ + '. += [{id:$id, clause: clauses, status:$status} + ( $notes|length>0 ? {notes:$notes} : {} )]' "$results_json" > "$results_json.tmp" + mv "$results_json.tmp" "$results_json" +} + +run_one() { + local id="$1"; local level_hint="$2"; local cmd="$3"; local clauses_csv="$4"; local to_sec="$5" + # level filter + if [[ "$LEVEL" != "all" && "$level_hint" != "$LEVEL" ]]; then + append_result "$id" "$clauses_csv" "N/A" "skipped by --level=$LEVEL" + return + fi + local t=${to_sec:-$global_timeout} + ulimit -c 0 || true + local out err rc + out="$tmpdir/$id.out"; err="$tmpdir/$id.err" + if command -v timeout >/dev/null 2>&1; then + bash -c "timeout $t bash -c '$cmd'" >"$out" 2>"$err" || rc=$? + else + bash -c "$cmd" >"$out" 2>"$err" || rc=$? + fi + rc=${rc:-0} + local status + status=$(status_to_json "$rc") + # trim notes from stderr (last line) + local note="" + if [[ -s "$err" ]]; then note="$(tail -n1 "$err" | sed 's/\r$//')"; fi + append_result "$id" "$clauses_csv" "$status" "$note" +} + +# Iterate checks in declared order +for key in "${check_order[@]}"; do + lvl="${check_level[$key]:-core}" + clauses="${check_clause[$key]:-}" + to="${check_timeout[$key]:-}" + run_one "$key" "$lvl" "${check_cmd[$key]}" "$clauses" "$to" +done + +# --- Build consolidated report --- + +utc() { date -u +"%Y-%m-%dT%H:%M:%SZ"; } + +# compute summaries per level +compute_summary_level() { + local level="$1" + # filter results that belong to this level by config lookup + local statuses=() + for id in "${!check_cmd[@]}"; do + local lvl="${check_level[$id]:-core}" + if [[ "$lvl" == "$level" ]]; then + local st + st=$(jq -r ".[] | select(.id==\"$id\") | .status" "$results_json" | tail -n1) + statuses+=("${st:-N/A}") + fi + done + if [[ ${#statuses[@]} -eq 0 ]]; then echo "N/A"; return; fi + local any_fail any_partial all_na + any_fail=0; any_partial=0; all_na=1 + for s in "${statuses[@]}"; do + [[ "$s" != "N/A" ]] && all_na=0 + [[ "$s" == "FAIL" ]] && any_fail=1 + [[ "$s" == "PARTIAL" ]] && any_partial=1 + done + if [[ $all_na -eq 1 ]]; then echo "N/A"; return; fi + if [[ $any_fail -eq 1 ]]; then echo "FAIL"; return; fi + if [[ $any_partial -eq 1 ]]; then echo "PARTIAL"; return; fi + echo "PASS" +} + +summary_core=$(compute_summary_level core) +summary_policy=$(compute_summary_level policy) +summary_wasm=$(compute_summary_level wasm) + +jq -n \ + --arg impl "$impl_name" \ + --arg ver "$impl_version" \ + --arg date "$(utc)" \ + --slurpfile results "$results_json" \ + --arg core "$summary_core" \ + --arg policy "$summary_policy" \ + --arg wasm "$summary_wasm" \ + '{implementation:$impl, version:$ver, date:$date, results:$results[0], summary:{core:$core, policy:$policy, wasm:$wasm}}' \ + > "$OUT_JSON" + +echo "Wrote report: $OUT_JSON" + +# --- Validate report (best-effort) --- +validate_ok=0 +if command -v python3 >/dev/null 2>&1; then + python3 - <<'PY' "$SCHEMA" "$OUT_JSON" && exit 0 || exit 1 +import json, sys +try: + import jsonschema +except Exception: + sys.exit(1) +with open(sys.argv[1]) as s: schema=json.load(s) +with open(sys.argv[2]) as f: data=json.load(f) +jsonschema.validate(data, schema) +print("Schema validation: OK") +PY + validate_ok=$? +fi +if [[ $validate_ok -ne 0 ]]; then + # fallback: structural checks via jq + jq -e '.implementation and .version and .date and (.results|type=="array") and (.summary.core and .summary.policy and .summary.wasm)' "$OUT_JSON" >/dev/null && echo "Basic structure: OK" || { echo "Basic structure: FAIL" >&2; exit 1; } +fi + +# Exit non-zero if a requested level failed +if [[ "$LEVEL" != "all" ]]; then + verdict=$(jq -r ".summary.$LEVEL" "$OUT_JSON") + [[ "$verdict" == "PASS" ]] || { echo "Requested level failed: $verdict" >&2; exit 1; } +fi + +exit 0 diff --git a/scripts/lint/spec_lint.py b/scripts/lint/spec_lint.py new file mode 100644 index 0000000..614d576 --- /dev/null +++ b/scripts/lint/spec_lint.py @@ -0,0 +1,74 @@ +#!/usr/bin/env python3 +""" +Spec linter: +- recomputes id from canonical preimage (entry without attestations) +- prints expected signing input ("ledger-entry:" + id) +- validates compliance.json against schema (if provided) +""" +import sys, json, argparse +from pathlib import Path + +def canonical(v): + if isinstance(v, dict): + items = sorted(v.items(), key=lambda kv: kv[0]) + return '{' + ','.join([json.dumps(k, ensure_ascii=False)+":"+canonical(x) for k,x in items]) + '}' + if isinstance(v, list): + return '[' + ','.join([canonical(x) for x in v]) + ']' + if v is None: + return 'null' + if isinstance(v, bool): + return 'true' if v else 'false' + if isinstance(v, int): + return str(v) + if isinstance(v, float): + raise SystemExit("ERROR: floats are forbidden in canonical positions; encode as string") + if isinstance(v, str): + return json.dumps(v, ensure_ascii=False) + raise SystemExit(f"ERROR: unsupported type {type(v)}") + +def preimage(entry): + e = dict(entry) + e.pop('attestations', None) + return canonical(e).encode('utf-8') + +def b3_hex(data: bytes) -> str: + try: + import blake3 + except Exception: + raise SystemExit("ERROR: python module 'blake3' not installed; pip install blake3") + return blake3.blake3(data).hexdigest() + +def validate_schema(report: Path, schema: Path) -> bool: + try: + import jsonschema + except Exception: + print("WARN: jsonschema not installed; skipping schema validation", file=sys.stderr) + return False + with report.open('r', encoding='utf-8') as f: data = json.load(f) + with schema.open('r', encoding='utf-8') as s: sch = json.load(s) + jsonschema.validate(data, sch) + return True + +def main(): + ap = argparse.ArgumentParser() + ap.add_argument('--entry', help='Entry JSON to lint') + ap.add_argument('--report', help='compliance.json to validate') + ap.add_argument('--schema', help='schema to validate report against', default='schemas/compliance_report.schema.json') + args = ap.parse_args() + + if args.entry: + data = json.loads(Path(args.entry).read_text(encoding='utf-8')) + pid = b3_hex(preimage(data)) + print(f"computed_id={pid}") + print(f"expected_signing_input=ledger-entry:{pid}") + if 'id' in data and data['id'] and data['id'] != pid: + print(f"DIFF: entry.id != computed id\n entry.id = {data['id']}\n computed_id = {pid}") + + if args.report: + ok = validate_schema(Path(args.report), Path(args.schema)) + if ok: + print("report_schema=OK") + +if __name__ == '__main__': + main() + diff --git a/scripts/tests/feedback/01_wasm_panic_handler.sh b/scripts/tests/feedback/01_wasm_panic_handler.sh new file mode 100755 index 0000000..20bdcb5 --- /dev/null +++ b/scripts/tests/feedback/01_wasm_panic_handler.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env bash +set -euo pipefail +f=tests/policy/wasm/demo/src/lib.rs +# Check for a panic handler attribute +if ! rg -n "^#\[panic_handler\]" "$f" >/dev/null; then + echo "panic handler missing in $f" >&2 + exit 1 +fi +# Try to build (requires toolchain + target installed) +( cd tests/policy/wasm/demo && cargo build --release --target wasm32-unknown-unknown >/dev/null ) diff --git a/scripts/tests/feedback/02_release_tag_guard.sh b/scripts/tests/feedback/02_release_tag_guard.sh new file mode 100755 index 0000000..3b2dd79 --- /dev/null +++ b/scripts/tests/feedback/02_release_tag_guard.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash +set -euo pipefail +f=.github/workflows/release.yml +# Check a tags line contains a strict semver-like pattern (literal) +rg -n "^\s*tags:\s*$" "$f" >/dev/null +grep -nF -- "- 'v[0-9]+.[0-9]+.[0-9]+'" "$f" >/dev/null +# Check we guard by ref_type/startsWith +rg -n "^\s*if:\s*github.ref_type == 'tag'.*startsWith\(github.ref, 'refs/tags/v'\)" "$f" >/dev/null diff --git a/scripts/tests/feedback/03_release_dirs_validation.sh b/scripts/tests/feedback/03_release_dirs_validation.sh new file mode 100755 index 0000000..a9ea2e6 --- /dev/null +++ b/scripts/tests/feedback/03_release_dirs_validation.sh @@ -0,0 +1,5 @@ +#!/usr/bin/env bash +set -euo pipefail +f=.github/workflows/release.yml +rg -n "for dir in schemas tests/vectors scripts/vectors; do" "$f" >/dev/null +rg -n "ERROR: Required directory '" "$f" >/dev/null diff --git a/scripts/tests/feedback/04_release_eof.sh b/scripts/tests/feedback/04_release_eof.sh new file mode 100755 index 0000000..45ad633 --- /dev/null +++ b/scripts/tests/feedback/04_release_eof.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash +set -euo pipefail +f=.github/workflows/release.yml +# Get last non-empty char +if [ ! -f "$f" ]; then echo "release.yml missing" >&2; exit 1; fi +last=$(tail -c 1 "$f" | od -An -t u1) +# 10 = newline; we want last char to NOT be newline +if [ "$last" = " 10" ]; then + echo "Trailing newline at EOF in $f" >&2 + exit 1 +fi diff --git a/scripts/tests/feedback/05_docs_compliance_links.sh b/scripts/tests/feedback/05_docs_compliance_links.sh new file mode 100755 index 0000000..5a7c08e --- /dev/null +++ b/scripts/tests/feedback/05_docs_compliance_links.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash +set -euo pipefail +f=docs/compliance/index.md +if rg -n "\./SPEC\.md|\./MODEL\.md" "$f"; then + echo "Found broken relative links in $f" >&2 + exit 1 +fi diff --git a/scripts/tests/feedback/06_docs_error_table_tabs.sh b/scripts/tests/feedback/06_docs_error_table_tabs.sh new file mode 100755 index 0000000..12cc449 --- /dev/null +++ b/scripts/tests/feedback/06_docs_error_table_tabs.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash +set -euo pipefail +f=docs/compliance/index.md +if rg -n "\t" "$f"; then + echo "Found tabs in $f" >&2 + exit 1 +fi diff --git a/scripts/tests/feedback/07_docs_heading_future.sh b/scripts/tests/feedback/07_docs_heading_future.sh new file mode 100755 index 0000000..522532d --- /dev/null +++ b/scripts/tests/feedback/07_docs_heading_future.sh @@ -0,0 +1,7 @@ +#!/usr/bin/env bash +set -euo pipefail +f=docs/compliance/index.md +if rg -n "^12\. Future Work$" "$f"; then + echo "Found list-style '12. Future Work' — should be '## 12. Future Work'" >&2 + exit 1 +fi diff --git a/scripts/tests/feedback/08_model_source_codefence_lang.sh b/scripts/tests/feedback/08_model_source_codefence_lang.sh new file mode 100755 index 0000000..7c30c86 --- /dev/null +++ b/scripts/tests/feedback/08_model_source_codefence_lang.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash +set -euo pipefail +f=docs/spec/model-source.md +# Fail if any bare code fence exists (exact line of ```) +if grep -xF '```' "$f"; then + echo "Found bare code fence without language in $f" >&2 + exit 1 +fi diff --git a/scripts/tests/feedback/09_wasm_cargo_optimizations.sh b/scripts/tests/feedback/09_wasm_cargo_optimizations.sh new file mode 100755 index 0000000..14f5822 --- /dev/null +++ b/scripts/tests/feedback/09_wasm_cargo_optimizations.sh @@ -0,0 +1,5 @@ +#!/usr/bin/env bash +set -euo pipefail +f=tests/policy/wasm/demo/Cargo.toml +rg -n "^strip\s*=\s*true" "$f" >/dev/null +rg -n "^panic\s*=\s*\"abort\"" "$f" >/dev/null diff --git a/scripts/tests/feedback/10_wasm_makefile_errors.sh b/scripts/tests/feedback/10_wasm_makefile_errors.sh new file mode 100755 index 0000000..005ff7b --- /dev/null +++ b/scripts/tests/feedback/10_wasm_makefile_errors.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash +set -euo pipefail +f=tests/policy/wasm/demo/Makefile +if rg -n "\|\| true" "$f"; then + echo "Makefile contains silent error suppression (|| true)" >&2 + exit 1 +fi +# Ensure artifact check present +rg -n 'test -f \$\(OUT\)' "$f" >/dev/null diff --git a/scripts/tests/feedback/11_wasm_readme_codefences.sh b/scripts/tests/feedback/11_wasm_readme_codefences.sh new file mode 100755 index 0000000..b9fb39a --- /dev/null +++ b/scripts/tests/feedback/11_wasm_readme_codefences.sh @@ -0,0 +1,6 @@ +#!/usr/bin/env bash +set -euo pipefail +f=tests/policy/wasm/demo/README.md +# Ensure at least one bash fence and one c or text fence exist +rg -n '^```bash$' "$f" >/dev/null +rg -n '^```(c|text)$' "$f" >/dev/null diff --git a/scripts/tests/feedback/12_wasm_safety_doc.sh b/scripts/tests/feedback/12_wasm_safety_doc.sh new file mode 100755 index 0000000..f91d89d --- /dev/null +++ b/scripts/tests/feedback/12_wasm_safety_doc.sh @@ -0,0 +1,4 @@ +#!/usr/bin/env bash +set -euo pipefail +f=tests/policy/wasm/demo/src/lib.rs +rg -n '^/// Safety' "$f" >/dev/null diff --git a/scripts/tests/feedback/13_ci_yaml_style.sh b/scripts/tests/feedback/13_ci_yaml_style.sh new file mode 100755 index 0000000..c437357 --- /dev/null +++ b/scripts/tests/feedback/13_ci_yaml_style.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash +set -euo pipefail +for f in .github/workflows/vectors-cbor.yml .github/workflows/vectors-matrix.yml; do + [ -f "$f" ] || continue + rg -n "branches: \[ *main *\]" "$f" >/dev/null + # Allow either with: { go-version: '1.21.x' } OR no Go step + if rg -n "with: \{ *go-version: '1\.[0-9]+\.[0-9]+' *\}" "$f" -U -N >/dev/null 2>&1; then :; else :; fi +done diff --git a/scripts/tests/feedback/14_ci_c_compile_fix.sh b/scripts/tests/feedback/14_ci_c_compile_fix.sh new file mode 100755 index 0000000..b773f79 --- /dev/null +++ b/scripts/tests/feedback/14_ci_c_compile_fix.sh @@ -0,0 +1,5 @@ +#!/usr/bin/env bash +set -euo pipefail +f=.github/workflows/vectors-matrix.yml +# Expect an explicit compile command referencing the source and -o c_b3sum +rg -n -- "gcc .* -o c_b3sum .*scripts/vectors/c/blake3_id.c" "$f" >/dev/null diff --git a/scripts/tests/feedback/15_implementers_schema_link.sh b/scripts/tests/feedback/15_implementers_schema_link.sh new file mode 100755 index 0000000..93d2a56 --- /dev/null +++ b/scripts/tests/feedback/15_implementers_schema_link.sh @@ -0,0 +1,4 @@ +#!/usr/bin/env bash +set -euo pipefail +f=docs/implementation/implementers.md +rg -n "https://github.com/.*/ledger-kernel/.*/schemas/compliance_report.schema.json" "$f" >/dev/null diff --git a/scripts/tests/feedback/16_policy_wasm_bullets.sh b/scripts/tests/feedback/16_policy_wasm_bullets.sh new file mode 100755 index 0000000..23fbcf4 --- /dev/null +++ b/scripts/tests/feedback/16_policy_wasm_bullets.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash +set -euo pipefail +f=docs/spec/policy-wasm.md +# Count bullets that start with '- No ' +count=$(rg -n "^- No " "$f" || true | wc -l | tr -d ' ') +if [ "$count" -ge 3 ]; then + echo "Found repetitive '- No' bullets ($count)" >&2 + exit 1 +fi diff --git a/scripts/tests/feedback/17_wire_format_blankline.sh b/scripts/tests/feedback/17_wire_format_blankline.sh new file mode 100755 index 0000000..aac8718 --- /dev/null +++ b/scripts/tests/feedback/17_wire_format_blankline.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash +set -euo pipefail +f=docs/spec/wire-format.md +line=$(rg -n "^### CBOR Canonical Profile \(Optional\)" "$f" | cut -d: -f1 | head -n1) +[ -n "$line" ] +prev=$((line-1)) +# Extract previous line +pl=$(sed -n "${prev}p" "$f") +[ -z "$pl" ] || { echo "No blank line before CBOR heading" >&2; exit 1; } diff --git a/scripts/tests/feedback/18_config_timeout_override.sh b/scripts/tests/feedback/18_config_timeout_override.sh new file mode 100755 index 0000000..6542d40 --- /dev/null +++ b/scripts/tests/feedback/18_config_timeout_override.sh @@ -0,0 +1,5 @@ +#!/usr/bin/env bash +set -euo pipefail +f=scripts/harness/config.sample.toml +rg -n "^\[checks.C-4\]" "$f" >/dev/null +rg -n "^timeout_sec\s*=\s*\d+" "$f" >/dev/null diff --git a/scripts/tests/feedback/19_harness_readme_fences.sh b/scripts/tests/feedback/19_harness_readme_fences.sh new file mode 100755 index 0000000..1031704 --- /dev/null +++ b/scripts/tests/feedback/19_harness_readme_fences.sh @@ -0,0 +1,14 @@ +#!/usr/bin/env bash +set -euo pipefail +f=scripts/harness/README.md +# Check opening fences only (```lang) +ok=1 +while IFS= read -r line; do + if echo "$line" | grep -qE '^```[a-zA-Z]+'; then + ok=0; break + fi +done < "$f" +if [ $ok -ne 0 ]; then + echo "No opening fenced code block with language found in $f" >&2 + exit 1 +fi diff --git a/scripts/tests/feedback/20_harness_order_iteration.sh b/scripts/tests/feedback/20_harness_order_iteration.sh new file mode 100755 index 0000000..121352c --- /dev/null +++ b/scripts/tests/feedback/20_harness_order_iteration.sh @@ -0,0 +1,11 @@ +#!/usr/bin/env bash +set -euo pipefail +f=scripts/harness/run.sh +# Must not iterate associative array keys directly +if rg -n -- 'for key in "\$\{!check_cmd\[@\]\}"' "$f"; then + echo "Unordered associative array iteration detected" >&2 + exit 1 +fi +# Must iterate using check_order array +rg -n -- 'check_order\[@\]' "$f" >/dev/null +rg -n -- 'for key in "\$\{check_order\[@\]\}"' "$f" >/dev/null diff --git a/scripts/tests/feedback/21_harness_clause_default.sh b/scripts/tests/feedback/21_harness_clause_default.sh new file mode 100755 index 0000000..d5c85dc --- /dev/null +++ b/scripts/tests/feedback/21_harness_clause_default.sh @@ -0,0 +1,10 @@ +#!/usr/bin/env bash +set -euo pipefail +f=scripts/harness/run.sh +if rg -n -- 'check_clause\[\$key\]:-"-"' "$f"; then + echo "Bogus default '-' for clauses found" >&2 + exit 1 +fi +# Must use empty default and append_result handles [] +rg -n -- 'check_clause\[\$key\]:-\}' "$f" >/dev/null +rg -n -- 'if \[\[ -z "\$clauses_csv" \]\]; then' "$f" >/dev/null diff --git a/scripts/tests/run_feedback_tests.sh b/scripts/tests/run_feedback_tests.sh new file mode 100755 index 0000000..d86f538 --- /dev/null +++ b/scripts/tests/run_feedback_tests.sh @@ -0,0 +1,148 @@ +#!/usr/bin/env bash +set -euo pipefail +fail=0 +run() { echo "[TEST] $1"; shift; if bash -e "$@"; then echo " PASS"; else echo " FAIL"; fail=$((fail+1)); fi; } + +# 1) WASM panic handler present and buildable +cat > scripts/tests/feedback/01_wasm_panic_handler.sh << 'T1' +#!/usr/bin/env bash +set -euo pipefail +f=tests/policy/wasm/demo/src/lib.rs +# Check for a panic handler attribute +if ! rg -n "^#\[panic_handler\]" "$f" >/dev/null; then + echo "panic handler missing in $f" >&2 + exit 1 +fi +# Try to build (requires toolchain + target installed) +( cd tests/policy/wasm/demo && cargo build --release --target wasm32-unknown-unknown >/dev/null ) +T1 +chmod +x scripts/tests/feedback/01_wasm_panic_handler.sh + +# 2) Release workflow tag guard and if-condition +cat > scripts/tests/feedback/02_release_tag_guard.sh << 'T2' +#!/usr/bin/env bash +set -euo pipefail +f=.github/workflows/release.yml +# Check a tags line contains a strict semver-like pattern (literal) +rg -n "^\s*tags:\s*$" "$f" >/dev/null +grep -nF -- "- 'v[0-9]+.[0-9]+.[0-9]+'" "$f" >/dev/null +# Check we guard by ref_type/startsWith +rg -n "^\s*if:\s*github.ref_type == 'tag'.*startsWith\(github.ref, 'refs/tags/v'\)" "$f" >/dev/null +T2 +chmod +x scripts/tests/feedback/02_release_tag_guard.sh + +# 3) Release assets directory validation exists +cat > scripts/tests/feedback/03_release_dirs_validation.sh << 'T3' +#!/usr/bin/env bash +set -euo pipefail +f=.github/workflows/release.yml +rg -n "for dir in schemas tests/vectors scripts/vectors; do" "$f" >/dev/null +rg -n "ERROR: Required directory '" "$f" >/dev/null +T3 +chmod +x scripts/tests/feedback/03_release_dirs_validation.sh + +# 4) Release workflow EOF no trailing blank line +cat > scripts/tests/feedback/04_release_eof.sh << 'T4' +#!/usr/bin/env bash +set -euo pipefail +f=.github/workflows/release.yml +# Get last non-empty char +if [ ! -f "$f" ]; then echo "release.yml missing" >&2; exit 1; fi +last=$(tail -c 1 "$f" | od -An -t u1) +# 10 = newline; we want last char to NOT be newline +if [ "$last" = " 10" ]; then + echo "Trailing newline at EOF in $f" >&2 + exit 1 +fi +T4 +chmod +x scripts/tests/feedback/04_release_eof.sh + +# 5) Compliance links fixed (no ./SPEC.md or ./MODEL.md) +cat > scripts/tests/feedback/05_docs_compliance_links.sh << 'T5' +#!/usr/bin/env bash +set -euo pipefail +f=docs/compliance/index.md +if rg -n "\./SPEC\.md|\./MODEL\.md" "$f"; then + echo "Found broken relative links in $f" >&2 + exit 1 +fi +T5 +chmod +x scripts/tests/feedback/05_docs_compliance_links.sh + +# 6) Error taxonomy table has no hard tabs +cat > scripts/tests/feedback/06_docs_error_table_tabs.sh << 'T6' +#!/usr/bin/env bash +set -euo pipefail +f=docs/compliance/index.md +if rg -n "\t" "$f"; then + echo "Found tabs in $f" >&2 + exit 1 +fi +T6 +chmod +x scripts/tests/feedback/06_docs_error_table_tabs.sh + +# 7) Heading for Future Work is proper heading (no '12. Future Work' as list) +cat > scripts/tests/feedback/07_docs_heading_future.sh << 'T7' +#!/usr/bin/env bash +set -euo pipefail +f=docs/compliance/index.md +if rg -n "^12\. Future Work$" "$f"; then + echo "Found list-style '12. Future Work' — should be '## 12. Future Work'" >&2 + exit 1 +fi +T7 +chmod +x scripts/tests/feedback/07_docs_heading_future.sh + +# 8) Model source no bare code fences without language +cat > scripts/tests/feedback/08_model_source_codefence_lang.sh << 'T8' +#!/usr/bin/env bash +set -euo pipefail +f=docs/spec/model-source.md +# Fail if any bare code fence exists (exact line of ```) +if grep -xF '```' "$f"; then + echo "Found bare code fence without language in $f" >&2 + exit 1 +fi +T8 +chmod +x scripts/tests/feedback/08_model_source_codefence_lang.sh + +# 9) WASM Cargo.toml has size opts +cat > scripts/tests/feedback/09_wasm_cargo_optimizations.sh << 'T9' +#!/usr/bin/env bash +set -euo pipefail +f=tests/policy/wasm/demo/Cargo.toml +rg -n "^strip\s*=\s*true" "$f" >/dev/null +rg -n "^panic\s*=\s*\"abort\"" "$f" >/dev/null +T9 +chmod +x scripts/tests/feedback/09_wasm_cargo_optimizations.sh + +# 10) WASM Makefile no silent error suppression +cat > scripts/tests/feedback/10_wasm_makefile_errors.sh << 'T10' +#!/usr/bin/env bash +set -euo pipefail +f=tests/policy/wasm/demo/Makefile +if rg -n "\|\| true" "$f"; then + echo "Makefile contains silent error suppression (|| true)" >&2 + exit 1 +fi +# Ensure artifact check present +rg -n 'test -f \$\(OUT\)' "$f" >/dev/null +T10 +chmod +x scripts/tests/feedback/10_wasm_makefile_errors.sh + +# Run all +run 01 scripts/tests/feedback/01_wasm_panic_handler.sh || true +run 02 scripts/tests/feedback/02_release_tag_guard.sh || true +run 03 scripts/tests/feedback/03_release_dirs_validation.sh || true +run 04 scripts/tests/feedback/04_release_eof.sh || true +run 05 scripts/tests/feedback/05_docs_compliance_links.sh || true +run 06 scripts/tests/feedback/06_docs_error_table_tabs.sh || true +run 07 scripts/tests/feedback/07_docs_heading_future.sh || true +run 08 scripts/tests/feedback/08_model_source_codefence_lang.sh || true +run 09 scripts/tests/feedback/09_wasm_cargo_optimizations.sh || true +run 10 scripts/tests/feedback/10_wasm_makefile_errors.sh || true + +if [ $fail -ne 0 ]; then + echo "FAILED: $fail feedback checks failed" >&2 + exit 1 +fi diff --git a/scripts/vectors/c/blake3_id.c b/scripts/vectors/c/blake3_id.c new file mode 100644 index 0000000..0e75e70 --- /dev/null +++ b/scripts/vectors/c/blake3_id.c @@ -0,0 +1,21 @@ +#include +#include +#include +#include +#include + +int main(void) { + blake3_hasher hasher; + blake3_hasher_init(&hasher); + uint8_t buf[8192]; + size_t n; + while ((n = fread(buf, 1, sizeof buf, stdin)) > 0) { + blake3_hasher_update(&hasher, buf, n); + } + uint8_t out[32]; + blake3_hasher_finalize(&hasher, out, 32); + for (int i=0; i<32; i++) printf("%02x", out[i]); + printf("\n"); + return 0; +} + diff --git a/scripts/vectors/go-cbor/go.mod b/scripts/vectors/go-cbor/go.mod new file mode 100644 index 0000000..8669c4d --- /dev/null +++ b/scripts/vectors/go-cbor/go.mod @@ -0,0 +1,9 @@ +module lk_cbor_go + +go 1.21 + +require ( + github.com/fxamacker/cbor/v2 v2.6.0 + lukechampine.com/blake3 v1.2.1 +) + diff --git a/scripts/vectors/go-cbor/main.go b/scripts/vectors/go-cbor/main.go new file mode 100644 index 0000000..0d69629 --- /dev/null +++ b/scripts/vectors/go-cbor/main.go @@ -0,0 +1,28 @@ +package main + +import ( + "encoding/json" + "fmt" + "io/ioutil" + "log" + "os" + cbor "github.com/fxamacker/cbor/v2" + b3 "lukechampine.com/blake3" +) + +func main() { + if len(os.Args) < 2 { log.Fatalf("usage: lk_cbor_go entry.json") } + b, err := ioutil.ReadFile(os.Args[1]) + if err != nil { log.Fatal(err) } + var v map[string]interface{} + if err := json.Unmarshal(b, &v); err != nil { log.Fatal(err) } + delete(v, "attestations") + encOpts := cbor.CanonicalEncOptions() + em, err := encOpts.EncMode() + if err != nil { log.Fatal(err) } + can, err := em.Marshal(v) + if err != nil { log.Fatal(err) } + h := b3.Sum256(can) + fmt.Printf("%x\n", h[:]) +} + diff --git a/scripts/vectors/go/go.mod b/scripts/vectors/go/go.mod new file mode 100644 index 0000000..7fc4267 --- /dev/null +++ b/scripts/vectors/go/go.mod @@ -0,0 +1,6 @@ +module lk_canon_go + +go 1.21 + +require lukechampine.com/blake3 v1.2.1 + diff --git a/scripts/vectors/go/main.go b/scripts/vectors/go/main.go new file mode 100644 index 0000000..f3d1901 --- /dev/null +++ b/scripts/vectors/go/main.go @@ -0,0 +1,76 @@ +package main + +import ( + "encoding/json" + "fmt" + "io/ioutil" + "log" + "os" + "sort" + b3 "lukechampine.com/blake3" +) + +func canonical(v interface{}) string { + switch t := v.(type) { + case map[string]interface{}: + keys := make([]string, 0, len(t)) + for k := range t { keys = append(keys, k) } + sort.Strings(keys) + parts := make([]string, 0, len(keys)) + for _, k := range keys { + parts = append(parts, fmt.Sprintf("%s:%s", mustJSON(k), canonical(t[k]))) + } + return fmt.Sprintf("{%s}", join(parts)) + case []interface{}: + parts := make([]string, 0, len(t)) + for _, x := range t { parts = append(parts, canonical(x)) } + return fmt.Sprintf("[%s]", join(parts)) + case nil: + return "null" + case bool: + if t { return "true" } else { return "false" } + case float64: + // JSON decoder uses float64 for numbers by default; enforce integer + if t != float64(int64(t)) { log.Fatalf("floats forbidden in canonical positions") } + return fmt.Sprintf("%d", int64(t)) + case string: + return mustJSON(t) + default: + log.Fatalf("unsupported type %T", t) + } + return "" +} + +func mustJSON(v interface{}) string { + b, err := json.Marshal(v) + if err != nil { log.Fatal(err) } + return string(b) +} + +func join(parts []string) string { + out := "" + for i, s := range parts { + if i > 0 { out += "," } + out += s + } + return out +} + +func preimage(entry map[string]interface{}) string { + // copy without attestations + e := map[string]interface{}{} + for k, v := range entry { if k != "attestations" { e[k] = v } } + return canonical(e) +} + +func main() { + if len(os.Args) < 2 { log.Fatalf("usage: lk_canon_go entry.json") } + b, err := ioutil.ReadFile(os.Args[1]) + if err != nil { log.Fatal(err) } + var v map[string]interface{} + if err := json.Unmarshal(b, &v); err != nil { log.Fatal(err) } + can := preimage(v) + h := b3.Sum256([]byte(can)) + fmt.Printf("%x\n", h[:]) +} + diff --git a/scripts/vectors/python/canon.py b/scripts/vectors/python/canon.py new file mode 100644 index 0000000..e7da4cb --- /dev/null +++ b/scripts/vectors/python/canon.py @@ -0,0 +1,53 @@ +#!/usr/bin/env python3 +import sys, json +try: + import blake3 # pip install blake3 +except Exception: + blake3 = None + +def die(msg): + print(f"ERROR: {msg}", file=sys.stderr) + sys.exit(2) + +def canonical(obj): + if isinstance(obj, dict): + # sort keys by Unicode code point + items = sorted(obj.items(), key=lambda kv: kv[0]) + return '{' + ','.join([f"{json.dumps(k, ensure_ascii=False)}:{canonical(v)}" for k,v in items]) + '}' + elif isinstance(obj, list): + return '[' + ','.join([canonical(x) for x in obj]) + ']' + elif isinstance(obj, (int,)): + return str(obj) + elif isinstance(obj, float): + die("floats are forbidden in canonical positions; encode as string") + elif obj is None: + return 'null' + elif isinstance(obj, bool): + return 'true' if obj else 'false' + elif isinstance(obj, str): + return json.dumps(obj, ensure_ascii=False) + else: + die(f"unsupported type: {type(obj)}") + +def preimage(entry): + # omit attestations for id preimage + e = dict(entry) + e.pop('attestations', None) + return canonical(e) + +def main(): + if len(sys.argv) < 2: + print("Usage: canon.py entry.json", file=sys.stderr) + sys.exit(2) + with open(sys.argv[1], 'r', encoding='utf-8') as f: + data = json.load(f) + can_bytes = preimage(data).encode('utf-8') + if blake3 is None: + sys.stdout.write(can_bytes.decode('utf-8')) + return + h = blake3.blake3(can_bytes).hexdigest() + print(h) + +if __name__ == '__main__': + main() + diff --git a/scripts/vectors/python/cbor_canon.py b/scripts/vectors/python/cbor_canon.py new file mode 100644 index 0000000..b32ca2a --- /dev/null +++ b/scripts/vectors/python/cbor_canon.py @@ -0,0 +1,29 @@ +#!/usr/bin/env python3 +import sys, json +try: + import cbor2 # pip install cbor2 +except Exception: + print("ERROR: python module 'cbor2' is required (pip install cbor2)", file=sys.stderr) + sys.exit(2) +try: + import blake3 +except Exception: + print("ERROR: python module 'blake3' is required (pip install blake3)", file=sys.stderr) + sys.exit(2) + +def main(): + if len(sys.argv) < 2: + print("Usage: cbor_canon.py entry.json", file=sys.stderr) + sys.exit(2) + with open(sys.argv[1], 'r', encoding='utf-8') as f: + entry = json.load(f) + # Omit attestations + if isinstance(entry, dict) and 'attestations' in entry: + entry = {k:v for k,v in entry.items() if k != 'attestations'} + # Canonical CBOR: cbor2.dumps(..., canonical=True) + can = cbor2.dumps(entry, canonical=True) + print(blake3.blake3(can).hexdigest()) + +if __name__ == '__main__': + main() + diff --git a/scripts/vectors/rust/Cargo.toml b/scripts/vectors/rust/Cargo.toml new file mode 100644 index 0000000..5ec32c5 --- /dev/null +++ b/scripts/vectors/rust/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "lk_canon_rust" +version = "0.1.0" +edition = "2021" + +[dependencies] +serde = { version = "1", features = ["derive"] } +serde_json = "1" +blake3 = "1" + diff --git a/scripts/vectors/rust/src/main.rs b/scripts/vectors/rust/src/main.rs new file mode 100644 index 0000000..0bb030e --- /dev/null +++ b/scripts/vectors/rust/src/main.rs @@ -0,0 +1,43 @@ +use serde_json::Value; +use std::env; +use std::fs; + +fn canonical(v: &Value) -> String { + match v { + Value::Object(map) => { + let mut keys: Vec<&String> = map.keys().collect(); + keys.sort(); + let parts: Vec = keys + .into_iter() + .map(|k| format!("{}:{}", serde_json::to_string(k).unwrap(), canonical(&map[k]))) + .collect(); + format!("{{{}}}", parts.join(",")) + } + Value::Array(arr) => { + let parts: Vec = arr.iter().map(canonical).collect(); + format!("[{}]", parts.join(",")) + } + Value::Null => "null".to_string(), + Value::Bool(b) => if *b { "true" } else { "false" }.to_string(), + Value::Number(n) => { + if let Some(i) = n.as_i64() { i.to_string() } else { panic!("floats forbidden in canonical positions") } + } + Value::String(s) => serde_json::to_string(s).unwrap(), + } +} + +fn preimage(entry: &Value) -> String { + let mut e = entry.clone(); + if let Some(obj) = e.as_object_mut() { obj.remove("attestations"); } + canonical(&e) +} + +fn main() { + let p = env::args().nth(1).expect("usage: lk_canon_rust entry.json"); + let data = fs::read_to_string(p).unwrap(); + let v: Value = serde_json::from_str(&data).unwrap(); + let can = preimage(&v); + let id = blake3::hash(can.as_bytes()); + println!("{}", id.to_hex()); +} + diff --git a/tests/policy/wasm/demo/.gitignore b/tests/policy/wasm/demo/.gitignore new file mode 100644 index 0000000..2c96eb1 --- /dev/null +++ b/tests/policy/wasm/demo/.gitignore @@ -0,0 +1,2 @@ +target/ +Cargo.lock diff --git a/tests/policy/wasm/demo/Cargo.toml b/tests/policy/wasm/demo/Cargo.toml new file mode 100644 index 0000000..ba9ab4c --- /dev/null +++ b/tests/policy/wasm/demo/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "policy_wasm_demo" +version = "0.1.0" +edition = "2021" + +[lib] +crate-type = ["cdylib"] + +[profile.release] +opt-level = "s" +codegen-units = 1 +debug = false +lto = true +strip = true +panic = "abort" diff --git a/tests/policy/wasm/demo/Makefile b/tests/policy/wasm/demo/Makefile new file mode 100644 index 0000000..7d2b01f --- /dev/null +++ b/tests/policy/wasm/demo/Makefile @@ -0,0 +1,16 @@ +TARGET=wasm32-unknown-unknown +NAME=policy_wasm_demo +OUT=target/$(TARGET)/release/$(NAME).wasm + +.PHONY: all build clean + +all: build + +build: + command -v rustup >/dev/null + rustup target add $(TARGET) + cargo build --release --target $(TARGET) + @test -f $(OUT) + +clean: + cargo clean diff --git a/tests/policy/wasm/demo/README.md b/tests/policy/wasm/demo/README.md new file mode 100644 index 0000000..69f9720 --- /dev/null +++ b/tests/policy/wasm/demo/README.md @@ -0,0 +1,43 @@ +# Deterministic WASM Policy Demo + +This minimal Rust crate builds a `wasm32-unknown-unknown` module exporting a single function: + +```c +// extern "C" ABI +u32 validate(entry_ptr, entry_len, state_ptr, state_len, out_ptr, out_len_ptr) +``` + +- Returns `0` on acceptance; non-zero on rejection. +- Writes a short message (`ACCEPT` or `REJECT`) into the caller-provided buffer if non-null. +- Deterministic: no clock, RNG, network, filesystem, or host I/O. + +Acceptance rule (demo): accept iff both `entry_len` and `state_len` are even. + +## Build + +- Install the target once: + +```bash +rustup target add wasm32-unknown-unknown +``` + +- Build release artifact: + +```bash +cargo build --release --target wasm32-unknown-unknown +``` + +- The resulting module will be at: + +```text +./target/wasm32-unknown-unknown/release/policy_wasm_demo.wasm +``` + +## Host ABI Notes + +A real host would: +- pass pointers to the canonical entry bytes and current state bytes; +- provide an output buffer and read the length written at `out_len_ptr`; +- sandbox execution (fuel/step limits, memory limits) and enforce determinism. + +This crate is for conformance demos only. diff --git a/tests/policy/wasm/demo/src/lib.rs b/tests/policy/wasm/demo/src/lib.rs new file mode 100644 index 0000000..ef0a681 --- /dev/null +++ b/tests/policy/wasm/demo/src/lib.rs @@ -0,0 +1,51 @@ +#![no_std] + +// Minimal, deterministic policy demo ABI. +// +// Exported symbol: +// validate(entry_ptr, entry_len, state_ptr, state_len, out_ptr, out_len_ptr) -> u32 +// +// Semantics: +// - Returns 0 to indicate acceptance; non-zero for rejection. +// - Writes a short UTF-8 message into [out_ptr, out_ptr+*out_len_ptr) if non-null. +// - This demo accepts iff entry_len is even AND state_len is even; otherwise rejects. +// - Purely deterministic: no clock, randomness, or host I/O. + +/// Safety +/// +/// Caller must uphold the following preconditions: +/// - `entry_ptr` is valid for reads of `entry_len` bytes when `entry_len>0`. +/// - `state_ptr` is valid for reads of `state_len` bytes when `state_len>0`. +/// - If `out_ptr` is non-null, it is valid for writes of `*out_len_ptr` bytes. +/// - If `out_len_ptr` is non-null, it is valid for reading and writing a `usize`. +/// - All non-null pointers are properly aligned for their types. +/// - The output buffer `[out_ptr, out_ptr + *out_len_ptr)` does not overlap inputs. +#[no_mangle] +pub extern "C" fn validate( + _entry_ptr: *const u8, + entry_len: usize, + _state_ptr: *const u8, + state_len: usize, + out_ptr: *mut u8, + out_len_ptr: *mut usize, + ) -> u32 { + let accept = (entry_len & 1) == 0 && (state_len & 1) == 0; + let msg = if accept { b"ACCEPT\n" } else { b"REJECT\n" }; + + unsafe { + if !out_ptr.is_null() && !out_len_ptr.is_null() { + let mut n = core::ptr::read(out_len_ptr); + if n > msg.len() { n = msg.len(); } + core::ptr::copy_nonoverlapping(msg.as_ptr(), out_ptr, n); + core::ptr::write(out_len_ptr, n); + } + } + if accept { 0 } else { 1 } +} + +use core::panic::PanicInfo; + +#[panic_handler] +fn panic(_info: &PanicInfo) -> ! { + loop {} +} diff --git a/tests/vectors/core/entry_canonical.json b/tests/vectors/core/entry_canonical.json new file mode 100644 index 0000000..88150ff --- /dev/null +++ b/tests/vectors/core/entry_canonical.json @@ -0,0 +1,9 @@ +{ + "id": "", + "parent": null, + "timestamp": "2025-10-31T00:00:00Z", + "author": { "id": "alice@example.org" }, + "payload": { "type": "text/json", "data": { "op": "append", "v": "1" } }, + "attestations": [] +} +