|
| 1 | +from urllib.parse import quote |
| 2 | + |
| 3 | +import requests |
| 4 | + |
| 5 | +from inspect_evals.sec_qa.sec_qa import ( |
| 6 | + DATASET_PATH, |
| 7 | +) |
| 8 | + |
| 9 | +HUGGING_FACE_IS_VALID_RESPONSE = { |
| 10 | + "preview": True, |
| 11 | + "viewer": True, |
| 12 | + "search": True, |
| 13 | + "filter": True, |
| 14 | + "statistics": True, |
| 15 | +} |
| 16 | + |
| 17 | +HUGGING_FACE_EXPECTED_SPLITS_V1 = { |
| 18 | + "dev": { |
| 19 | + "name": "dev", |
| 20 | + "num_bytes": 2538, |
| 21 | + "num_examples": 5, |
| 22 | + "dataset_name": "secqa", |
| 23 | + }, |
| 24 | + "val": { |
| 25 | + "name": "val", |
| 26 | + "num_bytes": 6364, |
| 27 | + "num_examples": 12, |
| 28 | + "dataset_name": "secqa", |
| 29 | + }, |
| 30 | + "test": { |
| 31 | + "name": "test", |
| 32 | + "num_bytes": 53627, |
| 33 | + "num_examples": 110, |
| 34 | + "dataset_name": "secqa", |
| 35 | + }, |
| 36 | +} |
| 37 | + |
| 38 | +HUGGING_FACE_EXPECTED_SPLITS_V2 = { |
| 39 | + "dev": { |
| 40 | + "name": "dev", |
| 41 | + "num_bytes": 3387, |
| 42 | + "num_examples": 5, |
| 43 | + "dataset_name": "secqa", |
| 44 | + }, |
| 45 | + "val": { |
| 46 | + "name": "val", |
| 47 | + "num_bytes": 7494, |
| 48 | + "num_examples": 10, |
| 49 | + "dataset_name": "secqa", |
| 50 | + }, |
| 51 | + "test": { |
| 52 | + "name": "test", |
| 53 | + "num_bytes": 68458, |
| 54 | + "num_examples": 100, |
| 55 | + "dataset_name": "secqa", |
| 56 | + }, |
| 57 | +} |
| 58 | + |
| 59 | + |
| 60 | +def test_dataset_is_valid(): |
| 61 | + """Test that the SecQA dataset URL is valid.""" |
| 62 | + response = requests.get( |
| 63 | + "https://datasets-server.huggingface.co/is-valid?dataset=" + quote(DATASET_PATH) |
| 64 | + ) |
| 65 | + assert ( |
| 66 | + response.status_code == 200 |
| 67 | + ), f"Hugging Face dataset `/is-valid` returned status code {response.status_code} check documentation at `https://huggingface.co/docs/dataset-viewer/en/valid`" |
| 68 | + assert response.json() == HUGGING_FACE_IS_VALID_RESPONSE |
| 69 | + |
| 70 | + |
| 71 | +def test_required_splits(): |
| 72 | + """Test that the SecQA dataset has the required splits.""" |
| 73 | + response = requests.get( |
| 74 | + "https://datasets-server.huggingface.co/info?dataset=" + quote(DATASET_PATH) |
| 75 | + ) |
| 76 | + assert ( |
| 77 | + response.status_code == 200 |
| 78 | + ), f"Hugging Face dataset `/info` returned status code {response.status_code} check documentation at `https://huggingface.co/docs/dataset-viewer/en/info`" |
| 79 | + |
| 80 | + data = response.json() |
| 81 | + assert ( |
| 82 | + "dataset_info" in data |
| 83 | + ), f"Hugging Face dataset `/info` returned unexpected json {data} check documentation at `https://huggingface.co/docs/dataset-viewer/en/info`" |
| 84 | + |
| 85 | + dataset_info = data["dataset_info"] |
| 86 | + assert ( |
| 87 | + "secqa_v1" in dataset_info |
| 88 | + ), f"SecQA V1 subset missing from {DATASET_PATH} dataset" |
| 89 | + assert ( |
| 90 | + "secqa_v2" in dataset_info |
| 91 | + ), f"SecQA V2 subset missing from {DATASET_PATH} dataset" |
| 92 | + |
| 93 | + assert ( |
| 94 | + dataset_info["secqa_v1"]["splits"] == HUGGING_FACE_EXPECTED_SPLITS_V1 |
| 95 | + ), "Unexpected dataset splits for SecQA V1 and especially check dev which is used in 5-shot evaluation" |
| 96 | + assert ( |
| 97 | + dataset_info["secqa_v2"]["splits"] == HUGGING_FACE_EXPECTED_SPLITS_V2 |
| 98 | + ), "Unexpected dataset splits for SecQA V2 and especially check dev which is used in 5-shot evaluation" |
| 99 | + |
| 100 | + |
| 101 | +def test_required_columns(): |
| 102 | + """Test that the SecQA dataset has the required columns.""" |
| 103 | + response = requests.get( |
| 104 | + "https://datasets-server.huggingface.co/info?dataset=" + quote(DATASET_PATH) |
| 105 | + ) |
| 106 | + assert ( |
| 107 | + response.status_code == 200 |
| 108 | + ), f"Hugging Face dataset `/info` returned status code {response.status_code} check documentation at `https://huggingface.co/docs/dataset-viewer/en/info`" |
| 109 | + |
| 110 | + data = response.json() |
| 111 | + assert ( |
| 112 | + "dataset_info" in data |
| 113 | + ), f"Hugging Face dataset `/info` returned unexpected json {data} check documentation at `https://huggingface.co/docs/dataset-viewer/en/info`" |
| 114 | + |
| 115 | + dataset_info = data["dataset_info"] |
| 116 | + assert ( |
| 117 | + "secqa_v1" in dataset_info |
| 118 | + ), f"SecQA V1 subset missing from {DATASET_PATH} dataset" |
| 119 | + assert ( |
| 120 | + "secqa_v2" in dataset_info |
| 121 | + ), f"SecQA V2 subset missing from {DATASET_PATH} dataset" |
| 122 | + |
| 123 | + secqa_v1_features = dataset_info["secqa_v1"]["features"] |
| 124 | + assert "Question" in secqa_v1_features, "Question column missing from SecQA V1" |
| 125 | + assert "A" in secqa_v1_features, "A column missing from SecQA V1" |
| 126 | + assert "B" in secqa_v1_features, "B column missing from SecQA V1" |
| 127 | + assert "C" in secqa_v1_features, "C column missing from SecQA V1" |
| 128 | + assert "D" in secqa_v1_features, "D column missing from SecQA V1" |
| 129 | + assert "Answer" in secqa_v1_features, "Answer column missing from SecQA V1" |
| 130 | + |
| 131 | + secqa_v2_features = dataset_info["secqa_v2"]["features"] |
| 132 | + assert "Question" in secqa_v2_features, "Question column missing from SecQA V2" |
| 133 | + assert "A" in secqa_v2_features, "A column missing from SecQA V2" |
| 134 | + assert "B" in secqa_v2_features, "B column missing from SecQA V2" |
| 135 | + assert "C" in secqa_v2_features, "C column missing from SecQA V2" |
| 136 | + assert "D" in secqa_v2_features, "D column missing from SecQA V2" |
| 137 | + assert "Answer" in secqa_v2_features, "Answer column missing from SecQA V2" |
0 commit comments