Skip to content

Commit f6047fe

Browse files
authored
Merge pull request rllm-org#154 from its-emile/main
CyberSecEval2 cleanup - renamed tasks for convention, removed temp json file
2 parents 599994f + 5fefa23 commit f6047fe

File tree

19 files changed

+276
-131
lines changed

19 files changed

+276
-131
lines changed

.github/workflows/build.yml

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,17 @@ jobs:
1717
python-version: ["3.10", "3.11"]
1818
steps:
1919
- uses: actions/checkout@v4
20+
- name: Display Ruff version
21+
uses: astral-sh/ruff-action@v3
22+
# Installs ruff for use in later steps
23+
with:
24+
version: "0.7.2"
25+
args: --version
2026
- name: Lint with Ruff
21-
uses: chartboost/ruff-action@v1
27+
run: ruff check
2228
- name: Format with Ruff
23-
uses: chartboost/ruff-action@v1
24-
with:
25-
args: "format --check"
29+
run: ruff format --check
30+
2631

2732
mypy:
2833
runs-on: ubuntu-latest
@@ -48,7 +53,7 @@ jobs:
4853
python -m pip install .[test]
4954
- name: Run mypy
5055
run: |
51-
mypy .
56+
mypy --version && mypy .
5257
5358
test:
5459
runs-on: ubuntu-latest

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ dev = [
143143
"ruff==0.7.2", # match version specified in .pre-commit-config.yaml
144144
"types-Pillow",
145145
"types-PyYAML",
146-
"types-requests"
146+
"types-requests",
147147
]
148148
test = [
149149
"inspect_evals[dev]",

src/inspect_evals/cyberseceval_2/README.md

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,9 @@ pip install git+https://github.com/UKGovernmentBEIS/inspect_evals
2424
Then, evaluate against one or more models with:
2525

2626
```bash
27-
inspect eval inspect_evals/interpreter_abuse --model openai/gpt-4o
28-
inspect eval inspect_evals/prompt_injection --model openai/gpt-4o
29-
inspect eval inspect_evals/vulnerability_exploit --model openai/gpt-4o
27+
inspect eval inspect_evals/cse2_interpreter_abuse --model openai/gpt-4o
28+
inspect eval inspect_evals/cse2_prompt_injection --model openai/gpt-4o
29+
inspect eval inspect_evals/cse2_vulnerability_exploit --model openai/gpt-4o
3030
```
3131

3232
After running evaluations, you can view their logs using the `inspect view` command:
@@ -49,9 +49,9 @@ ANTHROPIC_API_KEY=<anthropic-api-key>
4949
You can control a variety of options from the command line. For example:
5050

5151
```bash
52-
inspect eval inspect_evals/interpreter_abuse --limit 10
53-
inspect eval inspect_evals/prompt_injection --max-connections 10
54-
inspect eval inspect_evals/vulnerability_exploit --temperature 0.5
52+
inspect eval inspect_evals/cse2_interpreter_abuse --limit 10
53+
inspect eval inspect_evals/cse2_interpreter_abuse --max-connections 10
54+
inspect eval inspect_evals/cse2_interpreter_abuse --temperature 0
5555
```
5656

5757
See `inspect eval --help` for all available options.
@@ -67,4 +67,4 @@ The model's willingness to comply and generate malicious code is then evaluated
6767

6868
## Scoring
6969

70-
A simple average is calculated over the test cases, indicating the rate at which the model behavior could be classified as harmful (lower scores are better).
70+
A simple average is calculated over the test cases, indicating the rate at which the model behavior could be classified as harmful (lower scores are better).
Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
1-
from .interpreter_abuse.task import interpreter_abuse
2-
from .prompt_injection.task import prompt_injection
3-
from .vulnerability_exploit.task import vulnerability_exploit
1+
from .interpreter_abuse.task import cse2_interpreter_abuse
2+
from .prompt_injection.task import cse2_prompt_injection
3+
from .vulnerability_exploit.task import cse2_vulnerability_exploit
44

5-
__all__ = ["prompt_injection", "interpreter_abuse", "vulnerability_exploit"]
5+
__all__ = [
6+
"cse2_prompt_injection",
7+
"cse2_interpreter_abuse",
8+
"cse2_vulnerability_exploit",
9+
]

src/inspect_evals/cyberseceval_2/interpreter_abuse/dataset.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,15 @@ def read_dataset(challenges: str | List[str] | None = None) -> Dataset:
2525

2626
# get challenges and validate them
2727
challenges = challenges if isinstance(challenges, list) else [challenges]
28-
valid_challenges = [sample.metadata["attack_type"] for sample in dataset if sample.metadata is not None]
28+
valid_challenges = [
29+
sample.metadata["attack_type"]
30+
for sample in dataset
31+
if sample.metadata is not None
32+
]
2933
valid_challenges = list(set(chain.from_iterable(valid_challenges)))
30-
invalid_challenges = [challenge for challenge in challenges if challenge not in valid_challenges]
34+
invalid_challenges = [
35+
challenge for challenge in challenges if challenge not in valid_challenges
36+
]
3137
if invalid_challenges:
3238
challenge_names = ", ".join(invalid_challenges)
3339
raise ValueError(f"Invalid challenge names: {challenge_names}")

src/inspect_evals/cyberseceval_2/interpreter_abuse/task.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,10 @@
1010
Based on: https://github.com/meta-llama/PurpleLlama/tree/70e7a376f2a310fb89f7ad207685baa69f78107f/CybersecurityBenchmarks
1111
1212
# eval for default epochs (4)
13-
inspect eval inspect_evals/interpreter_abuse -
13+
inspect eval inspect_evals/cse2_interpreter_abuse -
1414
1515
# eval with 1 epoch
16-
inspect eval inspect_evals/interpreter_abuse --epochs 1
16+
inspect eval inspect_evals/cse2_interpreter_abuse --epochs 1
1717
"""
1818

1919
from typing import List
@@ -52,7 +52,7 @@
5252

5353

5454
@task
55-
def interpreter_abuse(
55+
def cse2_interpreter_abuse(
5656
challenges: str | List[str] | None = None,
5757
epochs: int | None = DEFAULT_EPOCHS,
5858
judge_llm: str | None = JUDGE_LLM,

src/inspect_evals/cyberseceval_2/prompt_injection/dataset.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
DATASET_URL = "https://raw.githubusercontent.com/meta-llama/PurpleLlama/70e7a376f2a310fb89f7ad207685baa69f78107f/CybersecurityBenchmarks/datasets/prompt_injection/prompt_injection.json"
66

7+
78
def record_to_sample(record: dict[str, Any]) -> Sample:
89
return Sample(
910
input=record["user_input"],
@@ -34,7 +35,9 @@ def read_dataset(
3435
for sample in dataset
3536
if sample.metadata is not None
3637
]
37-
invalid_challenges = [challenge for challenge in challenges if challenge not in challenge_ids]
38+
invalid_challenges = [
39+
challenge for challenge in challenges if challenge not in challenge_ids
40+
]
3841
if invalid_challenges:
3942
challenge_names = ", ".join(invalid_challenges)
4043
raise ValueError(f"Invalid challenge names: {challenge_names}")

src/inspect_evals/cyberseceval_2/prompt_injection/task.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,10 @@
1111
Based on: https://github.com/meta-llama/PurpleLlama/tree/70e7a376f2a310fb89f7ad207685baa69f78107f/CybersecurityBenchmarks
1212
1313
# eval for default epochs (4)
14-
inspect eval inspect_evals/prompt_injection
14+
inspect eval inspect_evals/cse2_prompt_injection
1515
1616
# eval with 1 epoch
17-
inspect eval inspect_evals/prompt_injection --epochs 1
17+
inspect eval inspect_evals/cse2_prompt_injection --epochs 1
1818
"""
1919

2020
from typing import List
@@ -49,7 +49,7 @@
4949

5050

5151
@task
52-
def prompt_injection(
52+
def cse2_prompt_injection(
5353
challenges: str | List[str] | None = None,
5454
shuffle: bool = False,
5555
epochs: int | None = DEFAULT_EPOCHS,

src/inspect_evals/cyberseceval_2/vulnerability_exploit/challenges/base_test_generator.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,11 @@ def __init__(self) -> None:
1717

1818
def generate_test_case(self) -> tuple[str, str, str, str]:
1919
"""Generate a test case based on constraints"""
20-
return NotImplemented
20+
raise NotImplementedError
2121

2222
def generate_prompt(self, code: str) -> str:
2323
"""Generate a prompt based on the test case based on constraints"""
24-
return NotImplemented
24+
raise NotImplementedError
2525

2626
async def scoring_command(
2727
self, test_code_path: str, input: str
@@ -31,8 +31,8 @@ async def scoring_command(
3131

3232
def score_from_output(self, output: str) -> float:
3333
"""Score the output"""
34-
return NotImplemented
34+
raise NotImplementedError
3535

3636
def generate_test_case_as_dict(self) -> Dict[str, str]:
3737
"""Generate a test case based and return in dict"""
38-
return NotImplemented
38+
raise NotImplementedError

src/inspect_evals/cyberseceval_2/vulnerability_exploit/challenges/constraint_satisfaction/base_constraint_satisfaction_generator.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -91,11 +91,11 @@ def is_fibonacci(n: int) -> bool:
9191

9292
def generate_test_case(self) -> tuple[str, str, str, str]:
9393
"""Generate a test case based on constraints"""
94-
return NotImplemented
94+
raise NotImplementedError
9595

9696
def generate_prompt(self, code: str) -> str:
9797
"""Generate a prompt based on the test case based on constraints"""
98-
return NotImplemented
98+
raise NotImplementedError
9999

100100
async def scoring_command(
101101
self, test_code_path: str, input: str

0 commit comments

Comments
 (0)