From 4fd34357b00cfb9e15f89f45c64765648558ae32 Mon Sep 17 00:00:00 2001 From: skalwaghe-56 Date: Sat, 6 Sep 2025 16:01:02 +0530 Subject: [PATCH 1/2] =?UTF-8?q?BUG:=20json=5Fnormalize=20handles=20non?= =?UTF-8?q?=E2=80=91string=20keys=20in=20meta=20with=20record=5Fpath=20(GH?= =?UTF-8?q?#62264)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Coerce meta path elements to str when constructing meta_keys to avoid TypeError during sep.join and align with nested_to_record behavior. Add regression test in TestJSONNormalize to verify integer meta keys with record_path produce string-labeled columns and correct repetition. Closes GH#62264. --- pandas/io/json/_normalize.py | 2 +- pandas/tests/io/json/test_normalize.py | 17 +++++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/pandas/io/json/_normalize.py b/pandas/io/json/_normalize.py index 642408b35ba24..9c604d9cd5125 100644 --- a/pandas/io/json/_normalize.py +++ b/pandas/io/json/_normalize.py @@ -540,7 +540,7 @@ def _pull_records(js: dict[str, Any], spec: list | str) -> list: lengths = [] meta_vals: DefaultDict = defaultdict(list) - meta_keys = [sep.join(val) for val in _meta] + meta_keys = [sep.join(str(v) for v in val) for val in _meta] def _recursive_extract(data, path, seen_meta, level: int = 0) -> None: if isinstance(data, dict): diff --git a/pandas/tests/io/json/test_normalize.py b/pandas/tests/io/json/test_normalize.py index cde0a7a378cff..31829eff351c3 100644 --- a/pandas/tests/io/json/test_normalize.py +++ b/pandas/tests/io/json/test_normalize.py @@ -569,6 +569,23 @@ def test_series_index(self, state_data): result = json_normalize(series, "counties") tm.assert_index_equal(result.index, idx.repeat([3, 2])) + def test_json_normalize_meta_int_key_with_record_path(self): + # GH#62264 + data = [{"name": "Alice", 12: 20, "purchases": [{"pid": 301}, {"pid": 302}]}] + + result = json_normalize(data, record_path=["purchases"], meta=[12, "name"]) + + expected = DataFrame( + { + "pid": [301, 302], + "12": np.array([20, 20], dtype=object), + "name": ["Alice", "Alice"], + }, + columns=["pid", "12", "name"], + ) + + tm.assert_frame_equal(result[["pid", "12", "name"]], expected) + class TestNestedToRecord: def test_flat_stays_flat(self): From 18d1fefeafd0cd3ca09d759c506dee42834e164c Mon Sep 17 00:00:00 2001 From: skalwaghe-56 Date: Sat, 6 Sep 2025 16:43:44 +0530 Subject: [PATCH 2/2] DOC: Update json_normalize docstring and latest whatsnewvX.X.X.rst file. --- doc/source/whatsnew/v2.3.3.rst | 1 + pandas/io/json/_normalize.py | 7 +++++++ 2 files changed, 8 insertions(+) diff --git a/doc/source/whatsnew/v2.3.3.rst b/doc/source/whatsnew/v2.3.3.rst index cbde6f52d4472..64f1dc51fd1a5 100644 --- a/doc/source/whatsnew/v2.3.3.rst +++ b/doc/source/whatsnew/v2.3.3.rst @@ -24,6 +24,7 @@ Bug fixes ^^^^^^^^^ - Fix regression in ``~Series.str.contains``, ``~Series.str.match`` and ``~Series.str.fullmatch`` with a compiled regex and custom flags (:issue:`62240`) +- Fixed bug in :func:`pandas.json_normalize` raising ``TypeError`` when non‑string elements were used in ``meta`` with ``record_path``; ``meta`` path elements are now coerced to strings when forming column labels (:issue:`62264`). .. --------------------------------------------------------------------------- .. _whatsnew_233.contributors: diff --git a/pandas/io/json/_normalize.py b/pandas/io/json/_normalize.py index 9c604d9cd5125..d396914c739af 100644 --- a/pandas/io/json/_normalize.py +++ b/pandas/io/json/_normalize.py @@ -292,6 +292,7 @@ def json_normalize( assumed to be an array of records. meta : list of paths (str or list of str), default None Fields to use as metadata for each record in resulting table. + Path elements are converted to strings before joining into column labels. meta_prefix : str, default None If True, prefix records with dotted path, e.g. foo.bar.field if meta is ['foo', 'bar']. @@ -322,6 +323,12 @@ def json_normalize( DataFrame : Two-dimensional, size-mutable, potentially heterogeneous tabular data. Series : One-dimensional ndarray with axis labels (including time series). + Notes + ----- + Column labels are constructed by joining path elements, + with sep and are always strings after normalization; + non-string elements in meta paths are coerced to strings. + Examples -------- >>> data = [