@@ -321,7 +321,6 @@ def make_trace_kwargs(args, trace_spec, trace_data, mapping_labels, sizeref):
321321 and args ["y" ]
322322 and len (trace_data [[args ["x" ], args ["y" ]]].dropna ()) > 1
323323 ):
324-
325324 # sorting is bad but trace_specs with "trendline" have no other attrs
326325 sorted_trace_data = trace_data .sort_values (by = args ["x" ])
327326 y = sorted_trace_data [args ["y" ]].values
@@ -562,7 +561,6 @@ def set_cartesian_axis_opts(args, axis, letter, orders):
562561
563562
564563def configure_cartesian_marginal_axes (args , fig , orders ):
565-
566564 if "histogram" in [args ["marginal_x" ], args ["marginal_y" ]]:
567565 fig .layout ["barmode" ] = "overlay"
568566
@@ -1064,14 +1062,14 @@ def _escape_col_name(df_input, col_name, extra):
10641062 return col_name
10651063
10661064
1067- def to_unindexed_series (x ):
1065+ def to_unindexed_series (x , name = None ):
10681066 """
10691067 assuming x is list-like or even an existing pd.Series, return a new pd.Series with
10701068 no index, without extracting the data from an existing Series via numpy, which
10711069 seems to mangle datetime columns. Stripping the index from existing pd.Series is
10721070 required to get things to match up right in the new DataFrame we're building
10731071 """
1074- return pd .Series (x ).reset_index (drop = True )
1072+ return pd .Series (x , name = name ).reset_index (drop = True )
10751073
10761074
10771075def process_args_into_dataframe (args , wide_mode , var_name , value_name ):
@@ -1086,9 +1084,12 @@ def process_args_into_dataframe(args, wide_mode, var_name, value_name):
10861084 df_input = args ["data_frame" ]
10871085 df_provided = df_input is not None
10881086
1089- df_output = pd .DataFrame ()
1090- constants = dict ()
1091- ranges = list ()
1087+ # we use a dict instead of a dataframe directly so that it doesn't cause
1088+ # PerformanceWarning by pandas by repeatedly setting the columns.
1089+ # a dict is used instead of a list as the columns needs to be overwritten.
1090+ df_output = {}
1091+ constants = {}
1092+ ranges = []
10921093 wide_id_vars = set ()
10931094 reserved_names = _get_reserved_col_names (args ) if df_provided else set ()
10941095
@@ -1099,7 +1100,7 @@ def process_args_into_dataframe(args, wide_mode, var_name, value_name):
10991100 "No data were provided. Please provide data either with the `data_frame` or with the `dimensions` argument."
11001101 )
11011102 else :
1102- df_output [ df_input . columns ] = df_input [ df_input . columns ]
1103+ df_output = { col : series for col , series in df_input . items ()}
11031104
11041105 # hover_data is a dict
11051106 hover_data_is_dict = (
@@ -1140,7 +1141,7 @@ def process_args_into_dataframe(args, wide_mode, var_name, value_name):
11401141 # argument_list and field_list ready, iterate over them
11411142 # Core of the loop starts here
11421143 for i , (argument , field ) in enumerate (zip (argument_list , field_list )):
1143- length = len (df_output )
1144+ length = len (df_output [ next ( iter ( df_output ))]) if len ( df_output ) else 0
11441145 if argument is None :
11451146 continue
11461147 col_name = None
@@ -1181,11 +1182,11 @@ def process_args_into_dataframe(args, wide_mode, var_name, value_name):
11811182 % (
11821183 argument ,
11831184 len (real_argument ),
1184- str (list (df_output .columns )),
1185+ str (list (df_output .keys () )),
11851186 length ,
11861187 )
11871188 )
1188- df_output [col_name ] = to_unindexed_series (real_argument )
1189+ df_output [col_name ] = to_unindexed_series (real_argument , col_name )
11891190 elif not df_provided :
11901191 raise ValueError (
11911192 "String or int arguments are only possible when a "
@@ -1214,13 +1215,15 @@ def process_args_into_dataframe(args, wide_mode, var_name, value_name):
12141215 % (
12151216 field ,
12161217 len (df_input [argument ]),
1217- str (list (df_output .columns )),
1218+ str (list (df_output .keys () )),
12181219 length ,
12191220 )
12201221 )
12211222 else :
12221223 col_name = str (argument )
1223- df_output [col_name ] = to_unindexed_series (df_input [argument ])
1224+ df_output [col_name ] = to_unindexed_series (
1225+ df_input [argument ], col_name
1226+ )
12241227 # ----------------- argument is likely a column / array / list.... -------
12251228 else :
12261229 if df_provided and hasattr (argument , "name" ):
@@ -1247,9 +1250,9 @@ def process_args_into_dataframe(args, wide_mode, var_name, value_name):
12471250 "All arguments should have the same length. "
12481251 "The length of argument `%s` is %d, whereas the "
12491252 "length of previously-processed arguments %s is %d"
1250- % (field , len (argument ), str (list (df_output .columns )), length )
1253+ % (field , len (argument ), str (list (df_output .keys () )), length )
12511254 )
1252- df_output [str (col_name )] = to_unindexed_series (argument )
1255+ df_output [str (col_name )] = to_unindexed_series (argument , str ( col_name ) )
12531256
12541257 # Finally, update argument with column name now that column exists
12551258 assert col_name is not None , (
@@ -1267,12 +1270,19 @@ def process_args_into_dataframe(args, wide_mode, var_name, value_name):
12671270 if field_name != "wide_variable" :
12681271 wide_id_vars .add (str (col_name ))
12691272
1270- for col_name in ranges :
1271- df_output [col_name ] = range (len (df_output ))
1272-
1273- for col_name in constants :
1274- df_output [col_name ] = constants [col_name ]
1273+ length = len (df_output [next (iter (df_output ))]) if len (df_output ) else 0
1274+ df_output .update (
1275+ {col_name : pd .Series (range (length ), name = col_name ) for col_name in ranges }
1276+ )
1277+ df_output .update (
1278+ {
1279+ # constant is single value. repeat by len to avoid creating NaN on concating
1280+ col_name : pd .Series ([constants [col_name ]] * length , name = col_name )
1281+ for col_name in constants
1282+ }
1283+ )
12751284
1285+ df_output = pd .DataFrame (df_output )
12761286 return df_output , wide_id_vars
12771287
12781288
0 commit comments