Skip to content

Commit 6b8ef83

Browse files
committed
Added tests to ensure SecQA dataset is as expected
1 parent aa969fc commit 6b8ef83

File tree

2 files changed

+139
-0
lines changed

2 files changed

+139
-0
lines changed

src/inspect_evals/sec_qa/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from .sec_qa import (
2+
DATASET_PATH,
23
format_system_message_fewshot,
34
record_to_sample,
45
sample_to_fewshot,
@@ -9,6 +10,7 @@
910
)
1011

1112
__all__ = [
13+
"DATASET_PATH",
1214
"format_system_message_fewshot",
1315
"record_to_sample",
1416
"sample_to_fewshot",
Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
from urllib.parse import quote
2+
3+
import requests
4+
5+
from inspect_evals.sec_qa.sec_qa import (
6+
DATASET_PATH,
7+
)
8+
9+
HUGGING_FACE_IS_VALID_RESPONSE = {
10+
"preview": True,
11+
"viewer": True,
12+
"search": True,
13+
"filter": True,
14+
"statistics": True,
15+
}
16+
17+
HUGGING_FACE_EXPECTED_SPLITS_V1 = {
18+
"dev": {
19+
"name": "dev",
20+
"num_bytes": 2538,
21+
"num_examples": 5,
22+
"dataset_name": "secqa",
23+
},
24+
"val": {
25+
"name": "val",
26+
"num_bytes": 6364,
27+
"num_examples": 12,
28+
"dataset_name": "secqa",
29+
},
30+
"test": {
31+
"name": "test",
32+
"num_bytes": 53627,
33+
"num_examples": 110,
34+
"dataset_name": "secqa",
35+
},
36+
}
37+
38+
HUGGING_FACE_EXPECTED_SPLITS_V2 = {
39+
"dev": {
40+
"name": "dev",
41+
"num_bytes": 3387,
42+
"num_examples": 5,
43+
"dataset_name": "secqa",
44+
},
45+
"val": {
46+
"name": "val",
47+
"num_bytes": 7494,
48+
"num_examples": 10,
49+
"dataset_name": "secqa",
50+
},
51+
"test": {
52+
"name": "test",
53+
"num_bytes": 68458,
54+
"num_examples": 100,
55+
"dataset_name": "secqa",
56+
},
57+
}
58+
59+
60+
def test_dataset_is_valid():
61+
"""Test that the SecQA dataset URL is valid."""
62+
response = requests.get(
63+
"https://datasets-server.huggingface.co/is-valid?dataset=" + quote(DATASET_PATH)
64+
)
65+
assert (
66+
response.status_code == 200
67+
), f"Hugging Face dataset `/is-valid` returned status code {response.status_code} check documentation at `https://huggingface.co/docs/dataset-viewer/en/valid`"
68+
assert response.json() == HUGGING_FACE_IS_VALID_RESPONSE
69+
70+
71+
def test_required_splits():
72+
"""Test that the SecQA dataset has the required splits."""
73+
response = requests.get(
74+
"https://datasets-server.huggingface.co/info?dataset=" + quote(DATASET_PATH)
75+
)
76+
assert (
77+
response.status_code == 200
78+
), f"Hugging Face dataset `/info` returned status code {response.status_code} check documentation at `https://huggingface.co/docs/dataset-viewer/en/info`"
79+
80+
data = response.json()
81+
assert (
82+
"dataset_info" in data
83+
), f"Hugging Face dataset `/info` returned unexpected json {data} check documentation at `https://huggingface.co/docs/dataset-viewer/en/info`"
84+
85+
dataset_info = data["dataset_info"]
86+
assert (
87+
"secqa_v1" in dataset_info
88+
), f"SecQA V1 subset missing from {DATASET_PATH} dataset"
89+
assert (
90+
"secqa_v2" in dataset_info
91+
), f"SecQA V2 subset missing from {DATASET_PATH} dataset"
92+
93+
assert (
94+
dataset_info["secqa_v1"]["splits"] == HUGGING_FACE_EXPECTED_SPLITS_V1
95+
), "Unexpected dataset splits for SecQA V1 and especially check dev which is used in 5-shot evaluation"
96+
assert (
97+
dataset_info["secqa_v2"]["splits"] == HUGGING_FACE_EXPECTED_SPLITS_V2
98+
), "Unexpected dataset splits for SecQA V2 and especially check dev which is used in 5-shot evaluation"
99+
100+
101+
def test_required_columns():
102+
"""Test that the SecQA dataset has the required columns."""
103+
response = requests.get(
104+
"https://datasets-server.huggingface.co/info?dataset=" + quote(DATASET_PATH)
105+
)
106+
assert (
107+
response.status_code == 200
108+
), f"Hugging Face dataset `/info` returned status code {response.status_code} check documentation at `https://huggingface.co/docs/dataset-viewer/en/info`"
109+
110+
data = response.json()
111+
assert (
112+
"dataset_info" in data
113+
), f"Hugging Face dataset `/info` returned unexpected json {data} check documentation at `https://huggingface.co/docs/dataset-viewer/en/info`"
114+
115+
dataset_info = data["dataset_info"]
116+
assert (
117+
"secqa_v1" in dataset_info
118+
), f"SecQA V1 subset missing from {DATASET_PATH} dataset"
119+
assert (
120+
"secqa_v2" in dataset_info
121+
), f"SecQA V2 subset missing from {DATASET_PATH} dataset"
122+
123+
secqa_v1_features = dataset_info["secqa_v1"]["features"]
124+
assert "Question" in secqa_v1_features, "Question column missing from SecQA V1"
125+
assert "A" in secqa_v1_features, "A column missing from SecQA V1"
126+
assert "B" in secqa_v1_features, "B column missing from SecQA V1"
127+
assert "C" in secqa_v1_features, "C column missing from SecQA V1"
128+
assert "D" in secqa_v1_features, "D column missing from SecQA V1"
129+
assert "Answer" in secqa_v1_features, "Answer column missing from SecQA V1"
130+
131+
secqa_v2_features = dataset_info["secqa_v2"]["features"]
132+
assert "Question" in secqa_v2_features, "Question column missing from SecQA V2"
133+
assert "A" in secqa_v2_features, "A column missing from SecQA V2"
134+
assert "B" in secqa_v2_features, "B column missing from SecQA V2"
135+
assert "C" in secqa_v2_features, "C column missing from SecQA V2"
136+
assert "D" in secqa_v2_features, "D column missing from SecQA V2"
137+
assert "Answer" in secqa_v2_features, "Answer column missing from SecQA V2"

0 commit comments

Comments
 (0)