Skip to content

Commit 39e9fab

Browse files
committed
Rename transformed_column_name param in transform_spark()
1 parent 4a67d14 commit 39e9fab

File tree

7 files changed

+22
-22
lines changed

7 files changed

+22
-22
lines changed

cli/cmd/init.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -299,7 +299,7 @@ def create_estimator(run_config, model_config):
299299
# arg2: FLOAT
300300
`,
301301

302-
"implementations/transformers/transformer.py": `def transform_spark(data, columns, args, transformed_column):
302+
"implementations/transformers/transformer.py": `def transform_spark(data, columns, args, transformed_column_name):
303303
"""Transform a column in a PySpark context.
304304
305305
This function is optional (recommended for large-scale data processing).
@@ -314,18 +314,18 @@ def create_estimator(run_config, model_config):
314314
args: A dict with the same structure as the transformer's input args
315315
containing the runtime values of the args.
316316
317-
transformed_column: The name of the column containing the transformed
317+
transformed_column_name: The name of the column containing the transformed
318318
data that is to be appended to the dataframe.
319319
320320
Returns:
321-
The original 'data' dataframe with an added column with the name of the
322-
transformed_column arg containing the transformed data.
321+
The original 'data' dataframe with an added column named <transformed_column_name>
322+
which contains the transformed data.
323323
"""
324324
325325
## Sample transform_spark implementation:
326326
#
327327
# return data.withColumn(
328-
# transformed_column, ((data[columns["num"]] - args["mean"]) / args["stddev"])
328+
# transformed_column_name, ((data[columns["num"]] - args["mean"]) / args["stddev"])
329329
# )
330330
331331
pass

docs/applications/implementations/transformers.md

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ Transformers run both when transforming data before model training and when resp
55
## Implementation
66

77
```python
8-
def transform_spark(data, columns, args, transformed_column):
8+
def transform_spark(data, columns, args, transformed_column_name):
99
"""Transform a column in a PySpark context.
1010
1111
This function is optional (recommended for large-scale data processing).
@@ -20,12 +20,12 @@ def transform_spark(data, columns, args, transformed_column):
2020
args: A dict with the same structure as the transformer's input args
2121
containing the runtime values of the args.
2222
23-
transformed_column: The name of the column containing the transformed
23+
transformed_column_name: The name of the column containing the transformed
2424
data that is to be appended to the dataframe.
2525
2626
Returns:
27-
The original 'data' dataframe with an added column with the name of the
28-
transformed_column arg containing the transformed data.
27+
The original 'data' dataframe with an added column named <transformed_column_name>
28+
which contains the transformed data.
2929
"""
3030
pass
3131

@@ -69,9 +69,9 @@ def reverse_transform_python(transformed_value, args):
6969
## Example
7070

7171
```python
72-
def transform_spark(data, columns, args, transformed_column):
72+
def transform_spark(data, columns, args, transformed_column_name):
7373
return data.withColumn(
74-
transformed_column, ((data[columns["num"]] - args["mean"]) / args["stddev"])
74+
transformed_column_name, ((data[columns["num"]] - args["mean"]) / args["stddev"])
7575
)
7676

7777
def transform_python(sample, args):
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
1-
def transform_spark(data, columns, args, transformed_column):
1+
def transform_spark(data, columns, args, transformed_column_name):
22
import pyspark.sql.functions as F
33

44
distribution = args["class_distribution"]
55

66
return data.withColumn(
7-
transformed_column,
7+
transformed_column_name,
88
F.when(data[columns["col"]] == 0, distribution[1]).otherwise(distribution[0]),
99
)

pkg/transformers/bucketize.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,15 +13,15 @@
1313
# limitations under the License.
1414

1515

16-
def transform_spark(data, columns, args, transformed_column):
16+
def transform_spark(data, columns, args, transformed_column_name):
1717
from pyspark.ml.feature import Bucketizer
1818
import pyspark.sql.functions as F
1919

2020
new_b = Bucketizer(
21-
splits=args["bucket_boundaries"], inputCol=columns["num"], outputCol=transformed_column
21+
splits=args["bucket_boundaries"], inputCol=columns["num"], outputCol=transformed_column_name
2222
)
2323
return new_b.transform(data).withColumn(
24-
transformed_column, F.col(transformed_column).cast("int")
24+
transformed_column_name, F.col(transformed_column_name).cast("int")
2525
)
2626

2727

pkg/transformers/index_string.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,16 +13,16 @@
1313
# limitations under the License.
1414

1515

16-
def transform_spark(data, columns, args, transformed_column):
16+
def transform_spark(data, columns, args, transformed_column_name):
1717
from pyspark.ml.feature import StringIndexerModel
1818
import pyspark.sql.functions as F
1919

2020
indexer = StringIndexerModel.from_labels(
21-
args["index"], inputCol=columns["text"], outputCol=transformed_column
21+
args["index"], inputCol=columns["text"], outputCol=transformed_column_name
2222
)
2323

2424
return indexer.transform(data).withColumn(
25-
transformed_column, F.col(transformed_column).cast("int")
25+
transformed_column_name, F.col(transformed_column_name).cast("int")
2626
)
2727

2828

pkg/transformers/normalize.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,9 @@
1313
# limitations under the License.
1414

1515

16-
def transform_spark(data, columns, args, transformed_column):
16+
def transform_spark(data, columns, args, transformed_column_name):
1717
return data.withColumn(
18-
transformed_column, ((data[columns["num"]] - args["mean"]) / args["stddev"])
18+
transformed_column_name, ((data[columns["num"]] - args["mean"]) / args["stddev"])
1919
)
2020

2121

pkg/workloads/lib/context.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -481,7 +481,7 @@ def resource_status_key(self, resource):
481481

482482
TRANSFORMER_IMPL_VALIDATION = {
483483
"optional": [
484-
{"name": "transform_spark", "args": ["data", "columns", "args", "transformed_column"]},
484+
{"name": "transform_spark", "args": ["data", "columns", "args", "transformed_column_name"]},
485485
{"name": "reverse_transform_python", "args": ["transformed_value", "args"]},
486486
{"name": "transform_python", "args": ["sample", "args"]},
487487
]

0 commit comments

Comments
 (0)