diff --git a/doc/source/whatsnew/v2.3.3.rst b/doc/source/whatsnew/v2.3.3.rst index cbde6f52d4472..64f1dc51fd1a5 100644 --- a/doc/source/whatsnew/v2.3.3.rst +++ b/doc/source/whatsnew/v2.3.3.rst @@ -24,6 +24,7 @@ Bug fixes ^^^^^^^^^ - Fix regression in ``~Series.str.contains``, ``~Series.str.match`` and ``~Series.str.fullmatch`` with a compiled regex and custom flags (:issue:`62240`) +- Fixed bug in :func:`pandas.json_normalize` raising ``TypeError`` when non‑string elements were used in ``meta`` with ``record_path``; ``meta`` path elements are now coerced to strings when forming column labels (:issue:`62264`). .. --------------------------------------------------------------------------- .. _whatsnew_233.contributors: diff --git a/pandas/io/json/_normalize.py b/pandas/io/json/_normalize.py index 642408b35ba24..d396914c739af 100644 --- a/pandas/io/json/_normalize.py +++ b/pandas/io/json/_normalize.py @@ -292,6 +292,7 @@ def json_normalize( assumed to be an array of records. meta : list of paths (str or list of str), default None Fields to use as metadata for each record in resulting table. + Path elements are converted to strings before joining into column labels. meta_prefix : str, default None If True, prefix records with dotted path, e.g. foo.bar.field if meta is ['foo', 'bar']. @@ -322,6 +323,12 @@ def json_normalize( DataFrame : Two-dimensional, size-mutable, potentially heterogeneous tabular data. Series : One-dimensional ndarray with axis labels (including time series). + Notes + ----- + Column labels are constructed by joining path elements, + with sep and are always strings after normalization; + non-string elements in meta paths are coerced to strings. + Examples -------- >>> data = [ @@ -540,7 +547,7 @@ def _pull_records(js: dict[str, Any], spec: list | str) -> list: lengths = [] meta_vals: DefaultDict = defaultdict(list) - meta_keys = [sep.join(val) for val in _meta] + meta_keys = [sep.join(str(v) for v in val) for val in _meta] def _recursive_extract(data, path, seen_meta, level: int = 0) -> None: if isinstance(data, dict): diff --git a/pandas/tests/io/json/test_normalize.py b/pandas/tests/io/json/test_normalize.py index cde0a7a378cff..31829eff351c3 100644 --- a/pandas/tests/io/json/test_normalize.py +++ b/pandas/tests/io/json/test_normalize.py @@ -569,6 +569,23 @@ def test_series_index(self, state_data): result = json_normalize(series, "counties") tm.assert_index_equal(result.index, idx.repeat([3, 2])) + def test_json_normalize_meta_int_key_with_record_path(self): + # GH#62264 + data = [{"name": "Alice", 12: 20, "purchases": [{"pid": 301}, {"pid": 302}]}] + + result = json_normalize(data, record_path=["purchases"], meta=[12, "name"]) + + expected = DataFrame( + { + "pid": [301, 302], + "12": np.array([20, 20], dtype=object), + "name": ["Alice", "Alice"], + }, + columns=["pid", "12", "name"], + ) + + tm.assert_frame_equal(result[["pid", "12", "name"]], expected) + class TestNestedToRecord: def test_flat_stays_flat(self):