Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1175,6 +1175,7 @@ I/O
- Fix bug in ``on_bad_lines`` callable when returning too many fields: now emits
``ParserWarning`` and truncates extra fields regardless of ``index_col`` (:issue:`61837`)
- Bug in :func:`pandas.json_normalize` inconsistently handling non-dict items in ``data`` when ``max_level`` was set. The function will now raise a ``TypeError`` if ``data`` is a list containing non-dict items (:issue:`62829`)
- Bug in :func:`pandas.json_normalize` raising ``TypeError`` when ``meta`` contained a non-string key (e.g., ``int``) and ``record_path`` was specified, which was inconsistent with the behavior when ``record_path`` was ``None`` (:issue:`63019`)
- Bug in :meth:`.DataFrame.to_json` when ``"index"`` was a value in the :attr:`DataFrame.column` and :attr:`Index.name` was ``None``. Now, this will fail with a ``ValueError`` (:issue:`58925`)
- Bug in :meth:`.io.common.is_fsspec_url` not recognizing chained fsspec URLs (:issue:`48978`)
- Bug in :meth:`DataFrame._repr_html_` which ignored the ``"display.float_format"`` option (:issue:`59876`)
Expand Down
19 changes: 15 additions & 4 deletions pandas/io/json/_normalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -552,7 +552,16 @@ def _pull_records(js: dict[str, Any], spec: list | str) -> list:
lengths = []

meta_vals: DefaultDict = defaultdict(list)
meta_keys = [sep.join(val) for val in _meta]
meta_keys = []
for val in _meta:
if len(val) == 1:
# Simple path: [12] -> 12 (preserves int type for consistency)
# Use the key directly, avoiding sep.join
meta_keys.append(val[0])
else:
# Nested path: ['info', 'governor'] -> "info.governor"
# Must join, converting all parts to str to avoid TypeError
meta_keys.append(sep.join(str(x) for x in val))

def _recursive_extract(data, path, seen_meta, level: int = 0) -> None:
if isinstance(data, dict):
Expand All @@ -568,9 +577,11 @@ def _recursive_extract(data, path, seen_meta, level: int = 0) -> None:
for obj in data:
recs = _pull_records(obj, path[0])
recs = [
nested_to_record(r, sep=sep, max_level=max_level)
if isinstance(r, dict)
else r
(
nested_to_record(r, sep=sep, max_level=max_level)
if isinstance(r, dict)
else r
)
for r in recs
]

Expand Down
29 changes: 29 additions & 0 deletions pandas/tests/io/json/test_normalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -593,6 +593,35 @@ def test_series_index(self, state_data):
result = json_normalize(series, "counties")
tm.assert_index_equal(result.index, idx.repeat([3, 2]))

def test_json_normalize_int_key_with_record_path(self):
# 63019
data = [
{
"a": 1,
12: "meta_value_1",
"nested": [{"b": 2, "c": 3}],
},
{
"a": 6,
12: "meta_value_2",
"nested": [{"b": 7, "c": 8}],
},
]

result = json_normalize(data, record_path=["nested"], meta=[12, "a"])

expected_data = {
"b": [2, 7],
"c": [3, 8],
12: ["meta_value_1", "meta_value_2"],
"a": [1, 6],
}
expected_columns = ["b", "c", 12, "a"]
expected = DataFrame(expected_data, columns=expected_columns)
expected["a"] = expected["a"].astype(object)

tm.assert_frame_equal(result, expected)


class TestNestedToRecord:
def test_flat_stays_flat(self):
Expand Down
Loading