From 5793242a8e559a9e973f21fc3b21c0d050188310 Mon Sep 17 00:00:00 2001 From: Austin <504977925@qq.com> Date: Mon, 17 Nov 2025 17:57:22 +0800 Subject: [PATCH 1/8] BUG: Fix json_normalize meta validation GH#63019 --- pandas/io/json/_normalize.py | 25 +++++++++++++++++++++++++ pandas/tests/io/json/test_normalize.py | 18 ++++++++++++++++++ 2 files changed, 43 insertions(+) diff --git a/pandas/io/json/_normalize.py b/pandas/io/json/_normalize.py index 7d3eefae39679..c09f6c95243e1 100644 --- a/pandas/io/json/_normalize.py +++ b/pandas/io/json/_normalize.py @@ -266,6 +266,28 @@ def _simple_json_normalize( return normalised_json_object +def _validate_meta(meta: list) -> None: + """ + Validate that meta parameter contains only strings. + + Parameters + ---------- + meta : list + The meta parameter to validate. + + Raises + ------ + TypeError + If meta contains non-string elements. + """ + for item in meta: + if not isinstance(item, str): + raise TypeError( + "All elements in 'meta' must be strings. " + f"Found {type(item).__name__}: {item!r}" + ) + + def json_normalize( data: dict | list[dict] | Series, record_path: str | list | None = None, @@ -426,6 +448,9 @@ def json_normalize( Returns normalized data with columns prefixed with the given string. """ + if meta is not None: + _validate_meta(meta) + def _pull_field( js: dict[str, Any], spec: list | str, extract_record: bool = False diff --git a/pandas/tests/io/json/test_normalize.py b/pandas/tests/io/json/test_normalize.py index fdbfbd004617e..df9c3eafad99a 100644 --- a/pandas/tests/io/json/test_normalize.py +++ b/pandas/tests/io/json/test_normalize.py @@ -903,3 +903,21 @@ def test_series_non_zero_index(self): index=[1, 2, 3], ) tm.assert_frame_equal(result, expected) + +def test_json_normalize_meta_string_validation(self): + # GH 63019 + data = [{"a": 1, 12: "meta_value", "nested": [{"b": 2}]}] + + # Test non-string meta raises TypeError consistently + with pytest.raises(TypeError, match="must be strings"): + json_normalize(data, meta=[12]) + + with pytest.raises(TypeError, match="must be strings"): + json_normalize(data, record_path=["nested"], meta=[12]) + + # Test string meta works correctly + result1 = json_normalize(data, meta=["a"]) + assert "a" in result1.columns + + result2 = json_normalize(data, record_path=["nested"], meta=["a"]) + assert "a" in result2.columns \ No newline at end of file From 210f8f8ac8ed0046cc3bb3bee6ef9383be9d1c51 Mon Sep 17 00:00:00 2001 From: Austin <504977925@qq.com> Date: Mon, 17 Nov 2025 18:08:43 +0800 Subject: [PATCH 2/8] DOC: add whatsnew entry for json_normalize fix --- doc/source/whatsnew/v3.0.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index e5376177d3381..4af26b7868de2 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -698,6 +698,7 @@ I/O - Bug in :meth:`read_stata` raising ``KeyError`` when input file is stored in big-endian format and contains strL data. (:issue:`58638`) - Bug in :meth:`read_stata` where extreme value integers were incorrectly interpreted as missing for format versions 111 and prior (:issue:`58130`) - Bug in :meth:`read_stata` where the missing code for double was not recognised for format versions 105 and prior (:issue:`58149`) +- Bug in :func:`pandas.json_normalize` raising ``TypeError`` when ``meta`` contained a non-string key (e.g., ``int``) and ``record_path`` was specified, which was inconsistent with the behavior when ``record_path`` was ``None`` (:issue:`63019`) Period ^^^^^^ From c1fc46260bb35a3d0b1b16cbb3ec4ebaf054f7f9 Mon Sep 17 00:00:00 2001 From: Austin <504977925@qq.com> Date: Mon, 17 Nov 2025 18:19:22 +0800 Subject: [PATCH 3/8] Doc: add entry in whatsnew --- doc/source/whatsnew/v3.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index dad1bc61a5fcc..df3abe332b9ca 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -1175,6 +1175,7 @@ I/O - Fix bug in ``on_bad_lines`` callable when returning too many fields: now emits ``ParserWarning`` and truncates extra fields regardless of ``index_col`` (:issue:`61837`) - Bug in :func:`pandas.json_normalize` inconsistently handling non-dict items in ``data`` when ``max_level`` was set. The function will now raise a ``TypeError`` if ``data`` is a list containing non-dict items (:issue:`62829`) +- Bug in :func:`pandas.json_normalize` raising ``TypeError`` when ``meta`` contained a non-string key (e.g., ``int``) and ``record_path`` was specified, which was inconsistent with the behavior when ``record_path`` was ``None`` (:issue:`63019`) - Bug in :meth:`.DataFrame.to_json` when ``"index"`` was a value in the :attr:`DataFrame.column` and :attr:`Index.name` was ``None``. Now, this will fail with a ``ValueError`` (:issue:`58925`) - Bug in :meth:`.io.common.is_fsspec_url` not recognizing chained fsspec URLs (:issue:`48978`) - Bug in :meth:`DataFrame._repr_html_` which ignored the ``"display.float_format"`` option (:issue:`59876`) @@ -1208,7 +1209,6 @@ I/O - Bug in :meth:`read_stata` raising ``KeyError`` when input file is stored in big-endian format and contains strL data. (:issue:`58638`) - Bug in :meth:`read_stata` where extreme value integers were incorrectly interpreted as missing for format versions 111 and prior (:issue:`58130`) - Bug in :meth:`read_stata` where the missing code for double was not recognised for format versions 105 and prior (:issue:`58149`) -- Bug in :func:`pandas.json_normalize` raising ``TypeError`` when ``meta`` contained a non-string key (e.g., ``int``) and ``record_path`` was specified, which was inconsistent with the behavior when ``record_path`` was ``None`` (:issue:`63019`) - Bug in :meth:`set_option` where setting the pandas option ``display.html.use_mathjax`` to ``False`` has no effect (:issue:`59884`) - Bug in :meth:`to_csv` where ``quotechar``` is not escaped when ``escapechar`` is not None (:issue:`61407`) - Bug in :meth:`to_excel` where :class:`MultiIndex` columns would be merged to a single row when ``merge_cells=False`` is passed (:issue:`60274`) From 7be78ec3523d6724a6c1d6b2d6161b95beb941a6 Mon Sep 17 00:00:00 2001 From: Austin <504977925@qq.com> Date: Mon, 17 Nov 2025 18:33:50 +0800 Subject: [PATCH 4/8] Fix indentation for test_json_normalize_meta_string_validation --- pandas/tests/io/json/test_normalize.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/io/json/test_normalize.py b/pandas/tests/io/json/test_normalize.py index 0954724ff536f..26feb5dc0d916 100644 --- a/pandas/tests/io/json/test_normalize.py +++ b/pandas/tests/io/json/test_normalize.py @@ -928,7 +928,7 @@ def test_series_non_zero_index(self): ) tm.assert_frame_equal(result, expected) -def test_json_normalize_meta_string_validation(self): + def test_json_normalize_meta_string_validation(self): # GH 63019 data = [{"a": 1, 12: "meta_value", "nested": [{"b": 2}]}] @@ -944,4 +944,4 @@ def test_json_normalize_meta_string_validation(self): assert "a" in result1.columns result2 = json_normalize(data, record_path=["nested"], meta=["a"]) - assert "a" in result2.columns \ No newline at end of file + assert "a" in result2.columns From cab741aaebf656ca6710e6e1301d2c4b841b5d49 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 17 Nov 2025 10:46:39 +0000 Subject: [PATCH 5/8] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- pandas/io/json/_normalize.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/io/json/_normalize.py b/pandas/io/json/_normalize.py index 70b173b702e5a..57c245e41346c 100644 --- a/pandas/io/json/_normalize.py +++ b/pandas/io/json/_normalize.py @@ -462,7 +462,6 @@ def json_normalize( if meta is not None: _validate_meta(meta) - def _pull_field( js: dict[str, Any], spec: list | str, extract_record: bool = False ) -> Scalar | Iterable: From ddbe74c1c67e1bb693835aa76646f89fc7a2ee56 Mon Sep 17 00:00:00 2001 From: Austin <504977925@qq.com> Date: Mon, 17 Nov 2025 19:10:46 +0800 Subject: [PATCH 6/8] Update meta validation to allow lists of strings --- pandas/io/json/_normalize.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/pandas/io/json/_normalize.py b/pandas/io/json/_normalize.py index 57c245e41346c..94d9f90630e27 100644 --- a/pandas/io/json/_normalize.py +++ b/pandas/io/json/_normalize.py @@ -269,7 +269,7 @@ def _simple_json_normalize( def _validate_meta(meta: list) -> None: """ - Validate that meta parameter contains only strings. + Validate that meta parameter contains only strings or lists of strings. Parameters ---------- @@ -279,12 +279,19 @@ def _validate_meta(meta: list) -> None: Raises ------ TypeError - If meta contains non-string elements. + If meta contains elements that are not strings or lists of strings. """ for item in meta: - if not isinstance(item, str): + if isinstance(item, list): + for subitem in item: + if not isinstance(subitem, str): + raise TypeError( + "All elements in nested meta paths must be strings. " + f"Found {type(subitem).__name__}: {subitem!r}" + ) + elif not isinstance(item, str): raise TypeError( - "All elements in 'meta' must be strings. " + "All elements in 'meta' must be strings or lists of strings. " f"Found {type(item).__name__}: {item!r}" ) From 100de6f51020aaa902fc911f36a91cc9a9e76da7 Mon Sep 17 00:00:00 2001 From: Austin <504977925@qq.com> Date: Mon, 17 Nov 2025 19:44:13 +0800 Subject: [PATCH 7/8] Modify _validate_meta to accept multiple types Updated the _validate_meta function to accept a string or a list of strings/lists of strings as input. --- pandas/io/json/_normalize.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/io/json/_normalize.py b/pandas/io/json/_normalize.py index 94d9f90630e27..53f8764ee4c82 100644 --- a/pandas/io/json/_normalize.py +++ b/pandas/io/json/_normalize.py @@ -267,13 +267,13 @@ def _simple_json_normalize( return normalized_json_object -def _validate_meta(meta: list) -> None: +def _validate_meta(meta: str | list[str | list[str]]) -> None: """ Validate that meta parameter contains only strings or lists of strings. Parameters ---------- - meta : list + meta : str or list of str or list of list of str The meta parameter to validate. Raises @@ -281,6 +281,8 @@ def _validate_meta(meta: list) -> None: TypeError If meta contains elements that are not strings or lists of strings. """ + if isinstance(meta, str): + return for item in meta: if isinstance(item, list): for subitem in item: From 6906eb1101b14e1609c67c35f52efcce7371ee6f Mon Sep 17 00:00:00 2001 From: Austin <504977925@qq.com> Date: Wed, 19 Nov 2025 14:43:59 +0800 Subject: [PATCH 8/8] Remove non-string meta tests from json_normalize Removed tests for non-string meta in json_normalize. --- pandas/tests/io/json/test_normalize.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/pandas/tests/io/json/test_normalize.py b/pandas/tests/io/json/test_normalize.py index 26feb5dc0d916..545820879e651 100644 --- a/pandas/tests/io/json/test_normalize.py +++ b/pandas/tests/io/json/test_normalize.py @@ -938,10 +938,3 @@ def test_json_normalize_meta_string_validation(self): with pytest.raises(TypeError, match="must be strings"): json_normalize(data, record_path=["nested"], meta=[12]) - - # Test string meta works correctly - result1 = json_normalize(data, meta=["a"]) - assert "a" in result1.columns - - result2 = json_normalize(data, record_path=["nested"], meta=["a"]) - assert "a" in result2.columns