use token in process_dataframe_hierarchy

FBruzzesi · FBruzzesi · commit 192e0a892676 · 2024-10-29T16:05:47.000+01:00
diff --git a/packages/python/plotly/plotly/express/_core.py b/packages/python/plotly/plotly/express/_core.py
@@ -1868,6 +1868,10 @@ def process_dataframe_hierarchy(args):
     discrete_aggs = []
     continuous_aggs = []
 
+    n_unique_token = nw.generate_temporary_column_name(
+        n_bytes=16, columns=[*path, count_colname]
+    )
+
     if args["color"]:
         if discrete_color:
 
@@ -1888,10 +1892,10 @@ def process_dataframe_hierarchy(args):
             # ```
             # However we cannot do that just yet, therefore a workaround is provided
             agg_f[args["color"]] = nw.col(args["color"]).max()
-            agg_f[f'{args["color"]}__plotly_n_unique__'] = (
+            agg_f[f'{args["color"]}_{n_unique_token}__'] = (
                 nw.col(args["color"])
                 .n_unique()
-                .alias(f'{args["color"]}__plotly_n_unique__')
+                .alias(f'{args["color"]}_{n_unique_token}__')
             )
         else:
             # This first needs to be multiplied by `count_colname`
@@ -1909,8 +1913,8 @@ def process_dataframe_hierarchy(args):
             # Similar trick as above
             discrete_aggs.append(col)
             agg_f[col] = nw.col(col).max()
-            agg_f[f"{col}__plotly_n_unique__"] = (
-                nw.col(col).n_unique().alias(f"{col}__plotly_n_unique__")
+            agg_f[f"{col}_{n_unique_token}__"] = (
+                nw.col(col).n_unique().alias(f"{col}_{n_unique_token}__")
             )
     # Avoid collisions with reserved names - columns in the path have been copied already
     cols = list(set(cols) - set(["labels", "parent", "id"]))
@@ -1930,12 +1934,12 @@ def post_agg(dframe: nw.LazyFrame, continuous_aggs, discrete_aggs) -> nw.LazyFra
         return dframe.with_columns(
             **{c: nw.col(c) / nw.col(count_colname) for c in continuous_aggs},
             **{
-                c: nw.when(nw.col(f"{c}__plotly_n_unique__") == 1)
+                c: nw.when(nw.col(f"{c}_{n_unique_token}__") == 1)
                 .then(nw.col(c))
                 .otherwise(nw.lit("(?)"))
                 for c in discrete_aggs
             },
-        ).drop([f"{c}__plotly_n_unique__" for c in discrete_aggs])
+        ).drop([f"{c}_{n_unique_token}__" for c in discrete_aggs])
 
     for i, level in enumerate(path):
 
@@ -1953,11 +1957,13 @@ def post_agg(dframe: nw.LazyFrame, continuous_aggs, discrete_aggs) -> nw.LazyFra
             id=nw.col(level).cast(nw.String()),
         )
         if i < len(path) - 1:
-            token = generate_unique_token(n_bytes=8, columns=df_tree.columns)
+            _concat_str_token = nw.generate_temporary_column_name(
+                n_bytes=8, columns=[*cols, "labels", "parent", "id"]
+            )
             df_tree = (
                 df_tree.with_columns(
                     **{
-                        token: nw.concat_str(
+                        _concat_str_token: nw.concat_str(
                             [
                                 nw.col(path[j]).cast(nw.String())
                                 for j in range(len(path) - 1, i, -1)
@@ -1969,14 +1975,14 @@ def post_agg(dframe: nw.LazyFrame, continuous_aggs, discrete_aggs) -> nw.LazyFra
                 .with_columns(
                     **{
                         "parent": nw.concat_str(
-                            [nw.col(token), nw.col("parent")], separator="/"
+                            [nw.col(_concat_str_token), nw.col("parent")], separator="/"
                         ),
                         "id": nw.concat_str(
-                            [nw.col(token), nw.col("id")], separator="/"
+                            [nw.col(_concat_str_token), nw.col("id")], separator="/"
                         ),
                     }
                 )
-                .drop(token)
+                .drop(_concat_str_token)
             )
 
         # strip "/" if at the end of the string, equivalent to `.str.rstrip`
diff --git a/packages/python/plotly/plotly/tests/test_optional/test_px/test_px_functions.py b/packages/python/plotly/plotly/tests/test_optional/test_px/test_px_functions.py
@@ -542,9 +542,7 @@ def check_label(label, fig):
     check_label("density of max of tip", fig)
 
 
-def test_timeline(request, constructor):
-    if "pyarrow_table" in str(constructor) or "polars_eager" in str(constructor):
-        request.applymarker(pytest.mark.xfail)
+def test_timeline(constructor):
 
     df = constructor(
         {
diff --git a/packages/python/plotly/plotly/tests/test_optional/test_px/test_px_hover.py b/packages/python/plotly/plotly/tests/test_optional/test_px/test_px_hover.py
@@ -191,10 +191,13 @@ def test_sunburst_hoverdict_color(constructor):
 
 def test_date_in_hover(request, constructor):
     if "pyarrow_table" in str(constructor) or "polars_eager" in str(constructor):
+        # fig.data[0].customdata[0][0] is a numpy.datetime64 for non pandas
+        # input, and it does not keep the timezone when converting to py scalar
         request.applymarker(pytest.mark.xfail)
 
     df = nw.from_native(
         constructor({"date": ["2015-04-04 19:31:30+01:00"], "value": [3]})
     ).with_columns(date=nw.col("date").str.to_datetime(format="%Y-%m-%d %H:%M:%S%z"))
     fig = px.scatter(df.to_native(), x="value", y="value", hover_data=["date"])
+
     assert fig.data[0].customdata[0][0] == df.item(row=0, column="date")
diff --git a/packages/python/plotly/plotly/tests/test_optional/test_px/test_px_input.py b/packages/python/plotly/plotly/tests/test_optional/test_px/test_px_input.py
@@ -53,13 +53,13 @@ def test_with_index():
 
 def test_series(request, constructor):
     if "pyarrow_table" in str(constructor):
+        # By converting to native, we lose the name for pyarrow chunked_array
+        # and the assertions fail
         request.applymarker(pytest.mark.xfail)
 
     data = px.data.tips().to_dict(orient="list")
     tips = nw.from_native(constructor(data))
     before_tip = (tips.get_column("total_bill") - tips.get_column("tip")).to_native()
-    # By converting to native, we lose the name for pyarrow chunked_array and the last
-    # assertion fails
     day = tips.get_column("day").to_native()
     tips = tips.to_native()
 
@@ -74,6 +74,8 @@ def test_series(request, constructor):
 
 def test_several_dataframes(request, constructor):
     if "pyarrow_table" in str(constructor):
+        # By converting to native, we lose the name for pyarrow chunked_array
+        # and the assertions fail
         request.applymarker(pytest.mark.xfail)
 
     df = nw.from_native(constructor(dict(x=[0, 1], y=[1, 10], z=[0.1, 0.8])))
@@ -153,6 +155,8 @@ def test_several_dataframes(request, constructor):
 
 def test_name_heuristics(request, constructor):
     if "pyarrow_table" in str(constructor):
+        # By converting to native, we lose the name for pyarrow chunked_array
+        # and the assertions fail
         request.applymarker(pytest.mark.xfail)
 
     df = nw.from_native(constructor(dict(x=[0, 1], y=[3, 4], z=[0.1, 0.2])))
@@ -482,6 +486,8 @@ def test_pass_df_columns(constructor):
 
 def test_size_column(request, constructor):
     if "pyarrow_table" in str(constructor):
+        # By converting to native, we lose the name for pyarrow chunked_array
+        # and the assertions fail
         request.applymarker(pytest.mark.xfail)
     data = px.data.tips().to_dict(orient="list")
     tips = nw.from_native(constructor(data))

Original file line number	Diff line number	Diff line change
`@@ -542,9 +542,7 @@ def check_label(label, fig):`
`542`	`542`	`check_label("density of max of tip", fig)`
`543`	`543`
`544`	`544`
`545`		`-def test_timeline(request, constructor):`
`546`		`- if "pyarrow_table" in str(constructor) or "polars_eager" in str(constructor):`
`547`		`- request.applymarker(pytest.mark.xfail)`
	`545`	`+def test_timeline(constructor):`
`548`	`546`
`549`	`547`	`df = constructor(`
`550`	`548`	`{`