@@ -321,6 +321,7 @@ def make_trace_kwargs(args, trace_spec, trace_data, mapping_labels, sizeref):
321321 and args ["y" ]
322322 and len (trace_data [[args ["x" ], args ["y" ]]].dropna ()) > 1
323323 ):
324+
324325 # sorting is bad but trace_specs with "trendline" have no other attrs
325326 sorted_trace_data = trace_data .sort_values (by = args ["x" ])
326327 y = sorted_trace_data [args ["y" ]].values
@@ -561,6 +562,7 @@ def set_cartesian_axis_opts(args, axis, letter, orders):
561562
562563
563564def configure_cartesian_marginal_axes (args , fig , orders ):
565+
564566 if "histogram" in [args ["marginal_x" ], args ["marginal_y" ]]:
565567 fig .layout ["barmode" ] = "overlay"
566568
@@ -883,8 +885,8 @@ def make_trace_spec(args, constructor, attrs, trace_patch):
883885def make_trendline_spec (args , constructor ):
884886 trace_spec = TraceSpec (
885887 constructor = go .Scattergl
886- if constructor == go .Scattergl
887- else go .Scatter , # could be contour
888+ if constructor == go .Scattergl # could be contour
889+ else go .Scatter ,
888890 attrs = ["trendline" ],
889891 trace_patch = dict (mode = "lines" ),
890892 marginal = None ,
@@ -1062,25 +1064,14 @@ def _escape_col_name(df_input, col_name, extra):
10621064 return col_name
10631065
10641066
1065- def to_unindexed_series (x , name = None ):
1067+ def to_unindexed_series (x ):
10661068 """
1067- assuming x is list-like or even an existing pd.Series, return a new pd.DataFrame
1068- with no index, without extracting the data from an existing Series via numpy, which
1069+ assuming x is list-like or even an existing pd.Series, return a new pd.Series with
1070+ no index, without extracting the data from an existing Series via numpy, which
10691071 seems to mangle datetime columns. Stripping the index from existing pd.Series is
1070- required to get things to match up right in the new DataFrame we're building.
1071- It's converted to a frame so that it can be concated easily and it contains
1072- `columns` attribute, so `_get_cols` can be used.
1072+ required to get things to match up right in the new DataFrame we're building
10731073 """
1074- return pd .Series (x , name = name ).reset_index (drop = True ).to_frame ()
1075-
1076-
1077- def _get_cols (df_list ):
1078- """
1079- get all the columns in the current df_list.
1080- Since this func is called when we raise error, the func is called once.
1081- So inefficiency here can be tolerated.
1082- """
1083- return [column for df in df_list for column in df .columns ]
1074+ return pd .Series (x ).reset_index (drop = True )
10841075
10851076
10861077def process_args_into_dataframe (args , wide_mode , var_name , value_name ):
@@ -1095,11 +1086,9 @@ def process_args_into_dataframe(args, wide_mode, var_name, value_name):
10951086 df_input = args ["data_frame" ]
10961087 df_provided = df_input is not None
10971088
1098- # we use append it as list to avoid performance issues in pandas
1099- # when dealing with large dataframes.
1100- df_outputs = []
1101- constants = {}
1102- ranges = []
1089+ df_output = pd .DataFrame ()
1090+ constants = dict ()
1091+ ranges = list ()
11031092 wide_id_vars = set ()
11041093 reserved_names = _get_reserved_col_names (args ) if df_provided else set ()
11051094
@@ -1110,7 +1099,7 @@ def process_args_into_dataframe(args, wide_mode, var_name, value_name):
11101099 "No data were provided. Please provide data either with the `data_frame` or with the `dimensions` argument."
11111100 )
11121101 else :
1113- df_outputs . append ( df_input [df_input .columns ])
1102+ df_output [ df_input . columns ] = df_input [df_input .columns ]
11141103
11151104 # hover_data is a dict
11161105 hover_data_is_dict = (
@@ -1151,7 +1140,7 @@ def process_args_into_dataframe(args, wide_mode, var_name, value_name):
11511140 # argument_list and field_list ready, iterate over them
11521141 # Core of the loop starts here
11531142 for i , (argument , field ) in enumerate (zip (argument_list , field_list )):
1154- length = len (df_outputs [ 0 ]) if len ( df_outputs ) else 0
1143+ length = len (df_output )
11551144 if argument is None :
11561145 continue
11571146 col_name = None
@@ -1192,11 +1181,11 @@ def process_args_into_dataframe(args, wide_mode, var_name, value_name):
11921181 % (
11931182 argument ,
11941183 len (real_argument ),
1195- str (_get_cols ( df_outputs )),
1184+ str (list ( df_output . columns )),
11961185 length ,
11971186 )
11981187 )
1199- df_outputs . append ( to_unindexed_series (real_argument , col_name ) )
1188+ df_output [ col_name ] = to_unindexed_series (real_argument )
12001189 elif not df_provided :
12011190 raise ValueError (
12021191 "String or int arguments are only possible when a "
@@ -1225,13 +1214,13 @@ def process_args_into_dataframe(args, wide_mode, var_name, value_name):
12251214 % (
12261215 field ,
12271216 len (df_input [argument ]),
1228- str (_get_cols ( df_outputs )),
1217+ str (list ( df_output . columns )),
12291218 length ,
12301219 )
12311220 )
12321221 else :
12331222 col_name = str (argument )
1234- df_outputs . append ( to_unindexed_series (df_input [argument ], col_name ) )
1223+ df_output [ col_name ] = to_unindexed_series (df_input [argument ])
12351224 # ----------------- argument is likely a column / array / list.... -------
12361225 else :
12371226 if df_provided and hasattr (argument , "name" ):
@@ -1258,9 +1247,9 @@ def process_args_into_dataframe(args, wide_mode, var_name, value_name):
12581247 "All arguments should have the same length. "
12591248 "The length of argument `%s` is %d, whereas the "
12601249 "length of previously-processed arguments %s is %d"
1261- % (field , len (argument ), str (_get_cols ( df_outputs )), length )
1250+ % (field , len (argument ), str (list ( df_output . columns )), length )
12621251 )
1263- df_outputs . append ( to_unindexed_series ( argument , str (col_name )) )
1252+ df_output [ str (col_name )] = to_unindexed_series ( argument )
12641253
12651254 # Finally, update argument with column name now that column exists
12661255 assert col_name is not None , (
@@ -1278,14 +1267,12 @@ def process_args_into_dataframe(args, wide_mode, var_name, value_name):
12781267 if field_name != "wide_variable" :
12791268 wide_id_vars .add (str (col_name ))
12801269
1281- length = len ( df_outputs [ 0 ])
1282- df_outputs . extend ([ pd . Series ( range ( length ), name = col_name ) for col_name in ranges ] )
1270+ for col_name in ranges :
1271+ df_output [ col_name ] = range ( len ( df_output ) )
12831272
1284- df_outputs .extend (
1285- [pd .Series (constants [col_name ], name = col_name ) for col_name in constants ]
1286- )
1273+ for col_name in constants :
1274+ df_output [col_name ] = constants [col_name ]
12871275
1288- df_output = pd .concat (df_outputs , axis = 1 )
12891276 return df_output , wide_id_vars
12901277
12911278
0 commit comments