From fad1d080a86bf4bd0aa4a099f9af84afdb24a619 Mon Sep 17 00:00:00 2001 From: Daniel Milroy Date: Sun, 2 Nov 2025 23:17:41 -0800 Subject: [PATCH 1/2] flux-validator: validate, count canonical jobspec resources Add support for validating canonical jobspecs in YAML or JSON format. The Flux Jobspec class has a function that validates canonical jobspec and throws errors with specific reasons why an input jobspec is invalid. Integrate this functionality into the flux-validator. Also add support for walking a canonical jobspec and validating the resource counts via `.resource_walk()`. Outputting the counts will provide feedback for an agent to correct a generated canonical jobspec. --- docker/flux-validator/validate.py | 46 +++++++++++++++++++++++++------ 1 file changed, 38 insertions(+), 8 deletions(-) diff --git a/docker/flux-validator/validate.py b/docker/flux-validator/validate.py index 53f6eca..128a0ae 100644 --- a/docker/flux-validator/validate.py +++ b/docker/flux-validator/validate.py @@ -2,12 +2,16 @@ import argparse import sys +import yaml +import json from rich import box from rich.console import Console from rich.padding import Padding from rich.panel import Panel +from flux.job.Jobspec import validate_jobspec + import fractale.utils as utils # This will pretty print all exceptions in rich @@ -48,10 +52,17 @@ def get_parser(): description="validate flux batch script", ) validate.add_argument("path", help="path to batch.sh to validate") + + count = subparsers.add_parser( + "count", + formatter_class=argparse.RawTextHelpFormatter, + description="count resources in flux batch script", + ) + count.add_argument("path", help="path to batch.yaml to count resources") return parser -def run_validate(): +def run_command(): parser = get_parser() if len(sys.argv) == 1: help() @@ -62,6 +73,8 @@ def run_validate(): # Here we can assume instantiated to get args if args.command == "validate": return validate(args.path) + elif args.command == "count": + return count_resources(args.path) raise ValueError(f"The command {args.command} is not known") @@ -69,15 +82,32 @@ def validate(path): """ Validate the path to a batch.sh or similar. """ - validator = Validator("batch") + jobspec = None content = utils.read_file(path) try: - # Setting fail fast to False means we will get ALL errors at once - validator.validate(path, fail_fast=False) - except Exception as e: - display_error(content, str(e)) - sys.exit(1) + yaml_content = yaml.safe_load(content) + json_content = json.dumps(yaml_content) + except Exception: + validator = Validator("batch") + try: + # Setting fail fast to False means we will get ALL errors at once + validator.validate(path, fail_fast=False) + except Exception as e: + display_error(content, str(e)) + sys.exit(1) + else: + jobspec = validate_jobspec(json_content) + return jobspec + + +def count_resources(path): + """ + Count the resources in the path to a batch.yaml or similar. + """ + jobspec = validate(path) + for res in jobspec[1].resource_walk(): + print(f"Type: {res[1]['type']}, count: {res[2]}") if __name__ == "__main__": - run_validate() + run_command() From 77dad4e4e4a0ef55f85ab7733e279927704ffe7b Mon Sep 17 00:00:00 2001 From: Daniel Milroy Date: Sun, 2 Nov 2025 23:23:48 -0800 Subject: [PATCH 2/2] flux-validator: canonical jobspec validation, resource count to README Add instructions for validating and counting resources in a canonical jobspec, including an example for overriding the entrypoint. --- docker/flux-validator/README.md | 80 +++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) diff --git a/docker/flux-validator/README.md b/docker/flux-validator/README.md index 8d2ad66..31b1dd2 100644 --- a/docker/flux-validator/README.md +++ b/docker/flux-validator/README.md @@ -41,3 +41,83 @@ Validation failed at directives: --noodles=2: 2 Sep 09 06:48:51.615419 UTC 2025 broker.err[0]: rc2.0: python3 /code/docker/flux-validator/validate.py validate /data/docker/flux-validator/batch-invalid.sh Exited (rc=1) 0.1s ``` + +#### Canonical jobspecs in YAML or JSON format + +##### Valid +```bash +$ docker run -it -v $(pwd):/data ghcr.io/compspec/fractale:flux-validator /data/docker/flux-validator/implicit-slot.yaml +$ echo $? +``` + +##### Invalid +```bash +$ docker run -it -v $(pwd):/data ghcr.io/compspec/fractale:flux-validator /data/docker/flux-validator/implicit-slot-invalid.yaml +Traceback (most recent call last): + File "/code/docker/flux-validator/validate.py", line 113, in + run_command() + File "/code/docker/flux-validator/validate.py", line 75, in run_command + return validate(args.path) + File "/code/docker/flux-validator/validate.py", line 99, in validate + jobspec = validate_jobspec(json_content) + File "/usr/lib/python3.10/site-packages/flux/job/Jobspec.py", line 131, in validate_jobspec + jobspec = Jobspec(**jobspec_obj) + File "/usr/lib/python3.10/site-packages/flux/job/Jobspec.py", line 198, in __init__ + self._validate_resource(res) + File "/usr/lib/python3.10/site-packages/flux/job/Jobspec.py", line 306, in _validate_resource + raise ValueError("slots must have labels") +ValueError: slots must have labels +``` + +##### Validate counts +Note: need to override the entrypoint. + +```bash +$ docker run --entrypoint flux -it -v $(pwd):/data ghcr.io/compspec/fractale:flux-validator start python3 /code/docker/flux-validator/validate.py count /data/docker/flux-validator/implicit-slot.yaml +Type: node, count: 1 +Type: memory, count: 256 +Type: socket, count: 2 +Type: gpu, count: 8 +Type: slot, count: 4 +Type: L3cache, count: 4 +Type: core, count: 16 +Type: pu, count: 16 +``` + +Where `implicit-slot.yaml` has the following content: +```yaml +version: 9999 +resources: + - type: node + count: 1 + with: + - type: memory + count: 256 + - type: socket + count: 2 + with: + - type: gpu + count: 4 + - type: slot + count: 2 + label: default + with: + - type: L3cache + count: 1 + with: + - type: core + count: 4 + with: + - type: pu + count: 1 + +# a comment +attributes: + system: + duration: 3600 +tasks: + - command: [ "app" ] + slot: default + count: + per_slot: 1 +``` \ No newline at end of file