diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 6b78f63f92988..b8c5c442a4d32 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -1175,6 +1175,7 @@ I/O - Fix bug in ``on_bad_lines`` callable when returning too many fields: now emits ``ParserWarning`` and truncates extra fields regardless of ``index_col`` (:issue:`61837`) - Bug in :func:`pandas.json_normalize` inconsistently handling non-dict items in ``data`` when ``max_level`` was set. The function will now raise a ``TypeError`` if ``data`` is a list containing non-dict items (:issue:`62829`) +- Bug in :func:`pandas.json_normalize` raising ``TypeError`` when ``meta`` contained a non-string key (e.g., ``int``) and ``record_path`` was specified, which was inconsistent with the behavior when ``record_path`` was ``None`` (:issue:`63019`) - Bug in :meth:`.DataFrame.to_json` when ``"index"`` was a value in the :attr:`DataFrame.column` and :attr:`Index.name` was ``None``. Now, this will fail with a ``ValueError`` (:issue:`58925`) - Bug in :meth:`.io.common.is_fsspec_url` not recognizing chained fsspec URLs (:issue:`48978`) - Bug in :meth:`DataFrame._repr_html_` which ignored the ``"display.float_format"`` option (:issue:`59876`) diff --git a/pandas/io/json/_normalize.py b/pandas/io/json/_normalize.py index 6194e699c12a8..3dec7349271d9 100644 --- a/pandas/io/json/_normalize.py +++ b/pandas/io/json/_normalize.py @@ -552,7 +552,16 @@ def _pull_records(js: dict[str, Any], spec: list | str) -> list: lengths = [] meta_vals: DefaultDict = defaultdict(list) - meta_keys = [sep.join(val) for val in _meta] + meta_keys = [] + for val in _meta: + if len(val) == 1: + # Simple path: [12] -> 12 (preserves int type for consistency) + # Use the key directly, avoiding sep.join + meta_keys.append(val[0]) + else: + # Nested path: ['info', 'governor'] -> "info.governor" + # Must join, converting all parts to str to avoid TypeError + meta_keys.append(sep.join(str(x) for x in val)) def _recursive_extract(data, path, seen_meta, level: int = 0) -> None: if isinstance(data, dict): @@ -568,9 +577,11 @@ def _recursive_extract(data, path, seen_meta, level: int = 0) -> None: for obj in data: recs = _pull_records(obj, path[0]) recs = [ - nested_to_record(r, sep=sep, max_level=max_level) - if isinstance(r, dict) - else r + ( + nested_to_record(r, sep=sep, max_level=max_level) + if isinstance(r, dict) + else r + ) for r in recs ] diff --git a/pandas/tests/io/json/test_normalize.py b/pandas/tests/io/json/test_normalize.py index f03fd235fef85..691c1765c59f0 100644 --- a/pandas/tests/io/json/test_normalize.py +++ b/pandas/tests/io/json/test_normalize.py @@ -593,6 +593,35 @@ def test_series_index(self, state_data): result = json_normalize(series, "counties") tm.assert_index_equal(result.index, idx.repeat([3, 2])) + def test_json_normalize_int_key_with_record_path(self): + # 63019 + data = [ + { + "a": 1, + 12: "meta_value_1", + "nested": [{"b": 2, "c": 3}], + }, + { + "a": 6, + 12: "meta_value_2", + "nested": [{"b": 7, "c": 8}], + }, + ] + + result = json_normalize(data, record_path=["nested"], meta=[12, "a"]) + + expected_data = { + "b": [2, 7], + "c": [3, 8], + 12: ["meta_value_1", "meta_value_2"], + "a": [1, 6], + } + expected_columns = ["b", "c", 12, "a"] + expected = DataFrame(expected_data, columns=expected_columns) + expected["a"] = expected["a"].astype(object) + + tm.assert_frame_equal(result, expected) + class TestNestedToRecord: def test_flat_stays_flat(self):