@@ -38,6 +38,8 @@ class BaseDatasetValidator(BaseValidator, ABC):
3838 The path to the dataset file.
3939 dataset_df : pd.DataFrame, optional
4040 The dataset to validate.
41+ log_file_path : str, optional
42+ The path to the log file.
4143 """
4244
4345 def __init__ (
@@ -47,8 +49,18 @@ def __init__(
4749 dataset_config : Optional [Dict ] = None ,
4850 dataset_file_path : Optional [str ] = None ,
4951 dataset_df : Optional [pd .DataFrame ] = None ,
52+ log_file_path : Optional [str ] = None ,
5053 ):
5154 super ().__init__ (resource_display_name = "dataset" )
55+
56+ if log_file_path :
57+ bundle_file_handler = logging .FileHandler (log_file_path )
58+ bundle_formatter = logging .Formatter (
59+ "[%(asctime)s] - %(levelname)s - %(message)s"
60+ )
61+ bundle_file_handler .setFormatter (bundle_formatter )
62+ logger .addHandler (bundle_file_handler )
63+
5264 if dataset_df is not None and dataset_file_path :
5365 raise ValueError (
5466 "Both dataset_df and dataset_file_path are provided."
@@ -930,6 +942,7 @@ def get_validator(
930942 dataset_config : Optional [Dict ] = None ,
931943 dataset_file_path : Optional [str ] = None ,
932944 dataset_df : Optional [pd .DataFrame ] = None ,
945+ log_file_path : Optional [str ] = None ,
933946) -> BaseDatasetValidator :
934947 """Factory function to get the correct dataset validator for the task type.
935948
@@ -945,6 +958,8 @@ def get_validator(
945958 The path to the dataset file.
946959 dataset_df : pd.DataFrame, optional
947960 The dataset to validate.
961+ log_file_path : str, optional
962+ The path to the log file.
948963
949964 Returns
950965 -------
@@ -985,6 +1000,7 @@ def get_validator(
9851000 dataset_file_path = dataset_file_path ,
9861001 dataset_df = dataset_df ,
9871002 task_type = task_type ,
1003+ log_file_path = log_file_path ,
9881004 )
9891005 elif task_type == tasks .TaskType .TabularRegression :
9901006 return TabularRegressionDatasetValidator (
@@ -993,6 +1009,7 @@ def get_validator(
9931009 dataset_file_path = dataset_file_path ,
9941010 dataset_df = dataset_df ,
9951011 task_type = task_type ,
1012+ log_file_path = log_file_path ,
9961013 )
9971014 elif task_type == tasks .TaskType .TextClassification :
9981015 return TextClassificationDatasetValidator (
@@ -1001,6 +1018,7 @@ def get_validator(
10011018 dataset_file_path = dataset_file_path ,
10021019 dataset_df = dataset_df ,
10031020 task_type = task_type ,
1021+ log_file_path = log_file_path ,
10041022 )
10051023 elif task_type in [
10061024 tasks .TaskType .LLM ,
@@ -1015,6 +1033,7 @@ def get_validator(
10151033 dataset_file_path = dataset_file_path ,
10161034 dataset_df = dataset_df ,
10171035 task_type = task_type ,
1036+ log_file_path = log_file_path ,
10181037 )
10191038 else :
10201039 raise ValueError (f"Task type `{ task_type } ` is not supported." )
0 commit comments