Skip to content

Commit 519cc68

Browse files
committed
better comments
1 parent bcfef68 commit 519cc68

File tree

2 files changed

+35
-18
lines changed

2 files changed

+35
-18
lines changed

packages/python/plotly/plotly/express/_core.py

Lines changed: 31 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1480,7 +1480,6 @@ def build_dataframe(args, constructor):
14801480
# data_frame is PySpark: it does not support interchange protocol and it is not
14811481
# integrated in Narwhals. We use its native method to convert it to pandas.
14821482
elif hasattr(args["data_frame"], "toPandas"):
1483-
# data_frame is PySpark:
14841483
args["data_frame"] = nw.from_native(
14851484
args["data_frame"].toPandas(), eager_only=True
14861485
)
@@ -1603,8 +1602,10 @@ def build_dataframe(args, constructor):
16031602
value_name = _escape_col_name(columns, "value", [])
16041603
var_name = _escape_col_name(columns, var_name, [])
16051604

1605+
# If the data_frame has interchange-only support levelin Narwhals, then we need to
1606+
# convert it to a full support level backend.
1607+
# Hence we convert requires Interchange to PyArrow.
16061608
if needs_interchanging:
1607-
# Interchange to PyArrow
16081609
if wide_mode:
16091610
args["data_frame"] = nw.from_native(
16101611
args["data_frame"].to_arrow(), eager_only=True
@@ -1908,25 +1909,38 @@ def process_dataframe_hierarchy(args):
19081909
n_bytes=16, columns=[*path, count_colname]
19091910
)
19101911

1912+
# In theory, for discrete columns aggregation, we should have a way to do
1913+
# `.agg(nw.col(x).unique())` in group_by and successively unpack/parse it as:
1914+
# ```
1915+
# (nw.when(nw.col(x).list.len()==1)
1916+
# .then(nw.col(x).list.first())
1917+
# .otherwise(nw.lit("(?)"))
1918+
# )
1919+
# ```
1920+
# which replicates the original pandas only codebase:
1921+
# ```
1922+
# def discrete_agg(x):
1923+
# uniques = x.unique()
1924+
# return uniques[0] if len(uniques) == 1 else "(?)"
1925+
#
1926+
# df.groupby(path[i:]).agg(...)
1927+
# ```
1928+
# However this is not possible, therefore the following workaround is provided.
1929+
# We make two aggregations for the same column:
1930+
# - take the max value
1931+
# - take the number of unique values
1932+
# Finally, after the group by statement, it is unpacked via:
1933+
# ```
1934+
# (nw.when(nw.col(col_n_unique) == 1)
1935+
# .then(nw.col(col_max_value)) # which is the unique value
1936+
# .otherwise(nw.lit("(?)"))
1937+
# )
1938+
# ```
1939+
19111940
if args["color"]:
19121941
if discrete_color:
19131942

19141943
discrete_aggs.append(args["color"])
1915-
# Hack: In theory, we should have a way to do `.agg(nw.col(x).unique())` and
1916-
# successively unpack/parse it as:
1917-
# ```
1918-
# (nw.when(nw.col(x).list.len()==1)
1919-
# .then(nw.col(x).list.first())
1920-
# .otherwise(nw.lit("(?)"))
1921-
# )
1922-
# ```
1923-
# which replicates:
1924-
# ```
1925-
# def discrete_agg(x):
1926-
# uniques = x.unique()
1927-
# return uniques[0] if len(uniques) == 1 else "(?)"
1928-
# ```
1929-
# However we cannot do that just yet, therefore a workaround is provided
19301944
agg_f[args["color"]] = nw.col(args["color"]).max()
19311945
agg_f[f'{args["color"]}_{n_unique_token}__'] = (
19321946
nw.col(args["color"])

packages/python/plotly/plotly/express/trendline_functions/__init__.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,10 @@ def _pandas(mode, trendline_options, x_raw, y, non_missing):
124124

125125
series = pd.Series(np.copy(y), index=x_raw.to_pandas())
126126

127-
# TODO: If narwhals were to support rolling, ewm and expanding then we could go around these
127+
# TODO: Narwhals Series/DataFrame do not support rolling, ewm nor expanding, therefore
128+
# it fallbacks to pandas Series independently of the original type.
129+
# Plotly issue: https://github.com/plotly/plotly.py/issues/4834
130+
# Narwhals issue: https://github.com/narwhals-dev/narwhals/issues/1254
128131
agg = getattr(series, mode) # e.g. series.rolling
129132
agg_obj = agg(**trendline_options) # e.g. series.rolling(**opts)
130133
function = getattr(agg_obj, function_name) # e.g. series.rolling(**opts).mean

0 commit comments

Comments
 (0)