|
3 | 3 | from datetime import datetime |
4 | 4 | import gzip |
5 | 5 | import io |
| 6 | +import itertools |
6 | 7 | import os |
| 8 | +import string |
7 | 9 | import struct |
8 | 10 | import tarfile |
9 | 11 | import zipfile |
@@ -1163,28 +1165,13 @@ def test_categorical_writing(self, version, temp_file): |
1163 | 1165 |
|
1164 | 1166 | def test_categorical_warnings_and_errors(self, temp_file): |
1165 | 1167 | # Warning for non-string labels |
1166 | | - # Error for labels too long |
1167 | | - original = DataFrame.from_records( |
1168 | | - [["a" * 10000], ["b" * 10000], ["c" * 10000], ["d" * 10000]], |
1169 | | - columns=["Too_long"], |
1170 | | - ) |
1171 | | - |
1172 | | - original = original.astype("category") |
1173 | | - path = temp_file |
1174 | | - msg = ( |
1175 | | - "Stata value labels for a single variable must have " |
1176 | | - r"a combined length less than 32,000 characters\." |
1177 | | - ) |
1178 | | - with pytest.raises(ValueError, match=msg): |
1179 | | - original.to_stata(path) |
1180 | | - |
1181 | 1168 | original = DataFrame.from_records( |
1182 | 1169 | [["a"], ["b"], ["c"], ["d"], [1]], columns=["Too_long"] |
1183 | 1170 | ).astype("category") |
1184 | 1171 |
|
1185 | 1172 | msg = "data file created has not lost information due to duplicate labels" |
1186 | 1173 | with tm.assert_produces_warning(ValueLabelTypeMismatch, match=msg): |
1187 | | - original.to_stata(path) |
| 1174 | + original.to_stata(temp_file) |
1188 | 1175 | # should get a warning for mixed content |
1189 | 1176 |
|
1190 | 1177 | @pytest.mark.parametrize("version", [114, 117, 118, 119, None]) |
@@ -2592,3 +2579,12 @@ def test_empty_frame(temp_file): |
2592 | 2579 | df3 = read_stata(path, columns=["a"]) |
2593 | 2580 | assert "b" not in df3 |
2594 | 2581 | tm.assert_series_equal(df3.dtypes, dtypes.loc[["a"]]) |
| 2582 | + |
| 2583 | + |
| 2584 | +@pytest.mark.parametrize("version", [114, 117, 118, 119, None]) |
| 2585 | +def test_many_strl(temp_file, version): |
| 2586 | + n = 65534 |
| 2587 | + df = DataFrame(np.arange(n), columns=["col"]) |
| 2588 | + lbls = ["".join(v) for v in itertools.product(*([string.ascii_letters] * 3))] |
| 2589 | + value_labels = {"col": {i: lbls[i] for i in range(n)}} |
| 2590 | + df.to_stata(temp_file, value_labels=value_labels, version=version) |
0 commit comments