@@ -49,6 +49,28 @@ def _get_last_name(email: str):
4949 return last_name
5050
5151
52+ def _prepare_input (source_path : str | TextIO , column_renames : dict = {}) -> pd .DataFrame :
53+ """Parse and prepare CSV data from `source_path` into an initial `pandas.DataFrame`."""
54+ df = files .read_csv (source_path , usecols = TOGGL_COLUMNS , parse_dates = ["Start date" ], cache_dates = True )
55+
56+ df ["Start time" ] = df ["Start time" ].apply (_str_timedelta )
57+ df ["Duration" ] = df ["Duration" ].apply (_str_timedelta )
58+
59+ # assign First and Last name
60+ df ["First name" ] = df ["Email" ].apply (_get_first_name )
61+ df ["Last name" ] = df ["Email" ].apply (_get_last_name )
62+
63+ # calculate hours as a decimal from duration timedelta
64+ df ["Hours" ] = (df ["Duration" ].dt .total_seconds () / 3600 ).round (2 )
65+
66+ df .sort_values (["Start date" , "Start time" , "Email" ], inplace = True )
67+
68+ if column_renames :
69+ df .rename (columns = column_renames , inplace = True )
70+
71+ return df
72+
73+
5274def _str_timedelta (td : str ):
5375 """Convert a string formatted duration (e.g. 01:30) to a timedelta."""
5476 return pd .to_timedelta (pd .to_datetime (td , format = "%H:%M:%S" ).strftime ("%H:%M:%S" ))
@@ -78,14 +100,9 @@ def convert_to_harvest(
78100 if client_name is None :
79101 client_name = os .environ .get ("HARVEST_CLIENT_NAME" )
80102
81- # read CSV file, parsing dates and times
82- source = files .read_csv (source_path , usecols = TOGGL_COLUMNS , parse_dates = ["Start date" ], cache_dates = True )
83- source ["Start time" ] = source ["Start time" ].apply (_str_timedelta )
84- source ["Duration" ] = source ["Duration" ].apply (_str_timedelta )
85- source .sort_values (["Start date" , "Start time" , "Email" ], inplace = True )
86-
87- # rename columns that can be imported as-is
88- source .rename (columns = {"Project" : "Project" , "Description" : "Notes" , "Start date" : "Date" }, inplace = True )
103+ source = _prepare_input (
104+ source_path = source_path , column_renames = {"Project" : "Project" , "Description" : "Notes" , "Start date" : "Date" }
105+ )
89106
90107 # update static calculated columns
91108 source ["Client" ] = client_name
@@ -95,13 +112,6 @@ def convert_to_harvest(
95112 project_info = files .JsonFileCache ("TOGGL_PROJECT_INFO" )
96113 source ["Project" ] = source ["Project" ].apply (lambda x : project_info .get (key = x , default = x ))
97114
98- # assign First and Last name
99- source ["First name" ] = source ["Email" ].apply (_get_first_name )
100- source ["Last name" ] = source ["Email" ].apply (_get_last_name )
101-
102- # calculate hours as a decimal from duration timedelta
103- source ["Hours" ] = (source ["Duration" ].dt .total_seconds () / 3600 ).round (2 )
104-
105115 files .write_csv (output_path , source , columns = output_cols )
106116
107117
0 commit comments