@@ -5,7 +5,7 @@ Transformers run both when transforming data before model training and when resp
55## Implementation
66
77``` python
8- def transform_spark (data , columns , args , transformed_column ):
8+ def transform_spark (data , columns , args , transformed_column_name ):
99 """ Transform a column in a PySpark context.
1010
1111 This function is optional (recommended for large-scale data processing).
@@ -20,12 +20,12 @@ def transform_spark(data, columns, args, transformed_column):
2020 args: A dict with the same structure as the transformer's input args
2121 containing the runtime values of the args.
2222
23- transformed_column : The name of the column containing the transformed
23+ transformed_column_name : The name of the column containing the transformed
2424 data that is to be appended to the dataframe.
2525
2626 Returns:
27- The original 'data' dataframe with an added column with the name of the
28- transformed_column arg containing the transformed data.
27+ The original 'data' dataframe with an added column named <transformed_column_name>
28+ which contains the transformed data.
2929 """
3030 pass
3131
@@ -69,9 +69,9 @@ def reverse_transform_python(transformed_value, args):
6969## Example
7070
7171``` python
72- def transform_spark (data , columns , args , transformed_column ):
72+ def transform_spark (data , columns , args , transformed_column_name ):
7373 return data.withColumn(
74- transformed_column , ((data[columns[" num" ]] - args[" mean" ]) / args[" stddev" ])
74+ transformed_column_name , ((data[columns[" num" ]] - args[" mean" ]) / args[" stddev" ])
7575 )
7676
7777def transform_python (sample , args ):
0 commit comments