BUG: Handle non-dict items in json_normalize with max_level

parthava-adabala · parthava-adabala · commit 4748ac4edc37 · 2025-10-25T16:41:28.000-05:00
diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
@@ -1105,6 +1105,7 @@ I/O
 - Bug in :meth:`DataFrame.to_string` that raised ``StopIteration`` with nested DataFrames. (:issue:`16098`)
 - Bug in :meth:`HDFStore.get` was failing to save data of dtype datetime64[s] correctly (:issue:`59004`)
 - Bug in :meth:`HDFStore.select` causing queries on categorical string columns to return unexpected results (:issue:`57608`)
+- Bug in :func:`pandas.json_normalize` raising ``AttributeError`` when ``max_level`` was set and the input data contained ``NaN`` values (:issue:`62829`)
 - Bug in :meth:`MultiIndex.factorize` incorrectly raising on length-0 indexes (:issue:`57517`)
 - Bug in :meth:`read_csv` causing segmentation fault when ``encoding_errors`` is not a string. (:issue:`59059`)
 - Bug in :meth:`read_csv` for the ``c`` and ``python`` engines where parsing numbers with large exponents caused overflows. Now, numbers with large positive exponents are parsed as ``inf`` or ``-inf`` depending on the sign of the mantissa, while those with large negative exponents are parsed as ``0.0`` (:issue:`62617`, :issue:`38794`, :issue:`62740`)
diff --git a/pandas/io/json/_normalize.py b/pandas/io/json/_normalize.py
@@ -117,6 +117,9 @@ def nested_to_record(
         singleton = True
     new_ds = []
     for d in ds:
+        if not isinstance(d, dict):
+            new_ds.append({})
+            continue
         new_d = copy.deepcopy(d)
         for k, v in d.items():
             # each key gets renamed with prefix
@@ -517,7 +520,7 @@ def _pull_records(js: dict[str, Any], spec: list | str) -> list:
         return DataFrame(_simple_json_normalize(data, sep=sep), index=index)
 
     if record_path is None:
-        if any([isinstance(x, dict) for x in y.values()] for y in data):
+        if any(isinstance(y, dict) for y in data):
             # naive normalization, this is idempotent for flat records
             # and potentially will inflate the data considerably for
             # deeply nested structures:
diff --git a/pandas/tests/io/json/test_normalize.py b/pandas/tests/io/json/test_normalize.py
@@ -511,6 +511,19 @@ def test_max_level_with_records_path(self, max_level, expected):
         expected_df = DataFrame(data=expected, columns=result.columns.values)
         tm.assert_equal(expected_df, result)
 
+    def test_json_normalize_max_level_with_nan(self):
+        # GH 62829 - test for bug where max_level=0 fails with nan in input list
+        d = {
+            1: {"id": 10, "status": "AVAL"},
+            2: {"id": 30, "status": "AVAL", "items": {"id": 12, "size": 20}},
+            3: {"id": 50, "status": "AVAL", "items": {"id": 13, "size": 30}},
+        }
+        df = DataFrame.from_dict(d, orient="index")
+        data_list = df["items"].tolist()
+        expected = DataFrame({"id": [np.nan, 12.0, 13.0], "size": [np.nan, 20.0, 30.0]})
+        result = json_normalize(data_list, max_level=0)
+        tm.assert_frame_equal(result, expected)
+
     def test_nested_flattening_consistent(self):
         # see gh-21537
         df1 = json_normalize([{"A": {"B": 1}}])