Skip to content

Commit bf5a1d4

Browse files
Merge pull request rllm-org#158 from MattFisher/cleanup/cybermetric
Follow-up: CyberMetric
2 parents da8bcc5 + 1b2bd68 commit bf5a1d4

File tree

6 files changed

+55
-127936
lines changed

6 files changed

+55
-127936
lines changed

src/inspect_evals/cybermetric/cybermetric.py

Lines changed: 43 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
"""
2-
32
CyberMetric: A Benchmark Dataset based on Retrieval-Augmented Generation for Evaluating LLMs in Cybersecurity Knowledge
43
54
Norbert Tihanyi, Mohamed Amine Ferrag, Ridhi Jain, Tamas Bisztray, Merouane Debbah
@@ -16,63 +15,94 @@
1615
1716
"""
1817

18+
import json
1919
from pathlib import Path
2020
from typing import Any
2121

22-
from datasets import Dataset # type: ignore
22+
import requests
2323
from inspect_ai import Task, task
24-
from inspect_ai.dataset import Sample, json_dataset
24+
from inspect_ai.dataset import Dataset, Sample, json_dataset
2525
from inspect_ai.scorer import choice
2626
from inspect_ai.solver import multiple_choice, system_message
27+
from platformdirs import user_cache_dir
2728

28-
DATASET_DIR = Path(__file__).parent / "datasets"
29+
GITHUB_DATASET_URL_TEMPLATE = "https://raw.githubusercontent.com/cybermetric/CyberMetric/205262cdf5022ba890e792efd176fb19d42913fa/{name}-v1.json"
2930
SYSTEM_MESSAGE = "You are a security expert who answers questions."
31+
CACHE_DIR = Path(user_cache_dir("inspect_evals")) / "cybermetric" / "data"
3032

3133

3234
@task
3335
def cybermetric_80() -> Task:
3436
"""Evaluates model on CyberMetric-80"""
35-
return create_task(80)
37+
return create_task("CyberMetric-80")
3638

3739

3840
@task
3941
def cybermetric_500() -> Task:
4042
"""Evaluates model on CyberMetric-500"""
41-
return create_task(500)
43+
return create_task("CyberMetric-500")
4244

4345

4446
@task
4547
def cybermetric_2000() -> Task:
4648
"""Evaluates model on CyberMetric-2000"""
47-
return create_task(2000)
49+
return create_task("CyberMetric-2000")
4850

4951

5052
@task
5153
def cybermetric_10000() -> Task:
5254
"""Evaluates model on CyberMetric-10000"""
53-
return create_task(10000)
55+
return create_task("CyberMetric-10000")
5456

5557

56-
def create_task(dataset_size: int) -> Task:
57-
name = f"CyberMetric-{dataset_size}"
58+
def create_task(dataset_name: str) -> Task:
5859
return Task(
59-
name=name,
60-
dataset=load_dataset(name),
60+
name=dataset_name,
61+
dataset=load_dataset(dataset_name),
6162
solver=[system_message(SYSTEM_MESSAGE), multiple_choice()],
6263
scorer=choice(),
6364
)
6465

6566

6667
def load_dataset(name: str) -> Dataset:
68+
"""
69+
Load a dataset from the CyberMetric repository on GitHub.
70+
71+
The json files on GitHub have the questions nested under a "questions" key, which makes it incompatible with the `json_dataset` loader. This function reads the json file, extracts the questions, and writes them to a jsonl file, which can then be read by `json_dataset`.
72+
"""
73+
dataset_url = GITHUB_DATASET_URL_TEMPLATE.format(name=name)
74+
json_file = CACHE_DIR / f"{name}.json"
75+
jsonl_file = CACHE_DIR / f"{name}.jsonl"
76+
77+
if not jsonl_file.exists():
78+
_ensure_data(json_file, dataset_url)
79+
80+
# Read data from json file and un-nest
81+
with open(json_file, "r") as f:
82+
data = json.load(f)["questions"]
83+
84+
# Write data to back to jsonl file
85+
with jsonl_file.open("w") as f:
86+
f.writelines(json.dumps(record) + "\n" for record in data)
87+
6788
dataset = json_dataset(
68-
json_file=f"{DATASET_DIR}/{name.lower()}.json",
89+
json_file=str(jsonl_file),
6990
name=name,
7091
sample_fields=record_to_sample,
7192
auto_id=True,
7293
)
7394
return dataset
7495

7596

97+
def _ensure_data(json_file: Path, dataset_url: str) -> None:
98+
if not json_file.exists():
99+
json_file.parent.mkdir(parents=True, exist_ok=True)
100+
response = requests.get(dataset_url)
101+
response.raise_for_status()
102+
with open(json_file, "wb") as f:
103+
f.write(response.content)
104+
105+
76106
def record_to_sample(record: dict[str, Any]) -> Sample:
77107
options = ", ".join([f"{key}) {value}" for key, value in record["answers"].items()])
78108
input = f"Question: {record['question']}\nOptions: {options}\n\n"

0 commit comments

Comments
 (0)