Skip to content

Commit 6495d29

Browse files
committed
refactor(toggl): extract Toggl DataFrame init
1 parent 3678edd commit 6495d29

File tree

2 files changed

+57
-22
lines changed

2 files changed

+57
-22
lines changed

compiler_admin/services/toggl.py

Lines changed: 25 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,28 @@ def _get_last_name(email: str):
4949
return last_name
5050

5151

52+
def _prepare_input(source_path: str | TextIO, column_renames: dict = {}) -> pd.DataFrame:
53+
"""Parse and prepare CSV data from `source_path` into an initial `pandas.DataFrame`."""
54+
df = files.read_csv(source_path, usecols=TOGGL_COLUMNS, parse_dates=["Start date"], cache_dates=True)
55+
56+
df["Start time"] = df["Start time"].apply(_str_timedelta)
57+
df["Duration"] = df["Duration"].apply(_str_timedelta)
58+
59+
# assign First and Last name
60+
df["First name"] = df["Email"].apply(_get_first_name)
61+
df["Last name"] = df["Email"].apply(_get_last_name)
62+
63+
# calculate hours as a decimal from duration timedelta
64+
df["Hours"] = (df["Duration"].dt.total_seconds() / 3600).round(2)
65+
66+
df.sort_values(["Start date", "Start time", "Email"], inplace=True)
67+
68+
if column_renames:
69+
df.rename(columns=column_renames, inplace=True)
70+
71+
return df
72+
73+
5274
def _str_timedelta(td: str):
5375
"""Convert a string formatted duration (e.g. 01:30) to a timedelta."""
5476
return pd.to_timedelta(pd.to_datetime(td, format="%H:%M:%S").strftime("%H:%M:%S"))
@@ -78,14 +100,9 @@ def convert_to_harvest(
78100
if client_name is None:
79101
client_name = os.environ.get("HARVEST_CLIENT_NAME")
80102

81-
# read CSV file, parsing dates and times
82-
source = files.read_csv(source_path, usecols=TOGGL_COLUMNS, parse_dates=["Start date"], cache_dates=True)
83-
source["Start time"] = source["Start time"].apply(_str_timedelta)
84-
source["Duration"] = source["Duration"].apply(_str_timedelta)
85-
source.sort_values(["Start date", "Start time", "Email"], inplace=True)
86-
87-
# rename columns that can be imported as-is
88-
source.rename(columns={"Project": "Project", "Description": "Notes", "Start date": "Date"}, inplace=True)
103+
source = _prepare_input(
104+
source_path=source_path, column_renames={"Project": "Project", "Description": "Notes", "Start date": "Date"}
105+
)
89106

90107
# update static calculated columns
91108
source["Client"] = client_name
@@ -95,13 +112,6 @@ def convert_to_harvest(
95112
project_info = files.JsonFileCache("TOGGL_PROJECT_INFO")
96113
source["Project"] = source["Project"].apply(lambda x: project_info.get(key=x, default=x))
97114

98-
# assign First and Last name
99-
source["First name"] = source["Email"].apply(_get_first_name)
100-
source["Last name"] = source["Email"].apply(_get_last_name)
101-
102-
# calculate hours as a decimal from duration timedelta
103-
source["Hours"] = (source["Duration"].dt.total_seconds() / 3600).round(2)
104-
105115
files.write_csv(output_path, source, columns=output_cols)
106116

107117

tests/services/test_toggl.py

Lines changed: 32 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,15 @@
1010
import compiler_admin.services.toggl
1111
from compiler_admin.services.toggl import (
1212
__name__ as MODULE,
13-
files,
14-
TOGGL_COLUMNS,
15-
HARVEST_COLUMNS,
1613
_get_first_name,
1714
_get_last_name,
15+
_prepare_input,
1816
_str_timedelta,
1917
convert_to_harvest,
2018
download_time_entries,
19+
TOGGL_COLUMNS,
20+
HARVEST_COLUMNS,
21+
files,
2122
)
2223

2324

@@ -137,10 +138,9 @@ def test_str_timedelta():
137138
assert result.total_seconds() == (1 * 60 * 60) + (30 * 60) + 15
138139

139140

140-
def test_convert_to_harvest_mocked(toggl_file, spy_files, mock_google_user_info):
141-
mock_google_user_info.return_value = {}
142-
143-
convert_to_harvest(toggl_file, client_name=None)
141+
@pytest.mark.usefixtures("mock_google_user_info")
142+
def test_prepare_input(toggl_file, spy_files):
143+
df = _prepare_input(toggl_file)
144144

145145
spy_files.read_csv.assert_called_once()
146146
call_args = spy_files.read_csv.call_args
@@ -149,6 +149,31 @@ def test_convert_to_harvest_mocked(toggl_file, spy_files, mock_google_user_info)
149149
assert call_args.kwargs["parse_dates"] == ["Start date"]
150150
assert call_args.kwargs["cache_dates"] is True
151151

152+
df_cols = df.columns.to_list()
153+
assert set(df_cols) <= set(TOGGL_COLUMNS) or set(TOGGL_COLUMNS) <= set(df_cols)
154+
155+
assert "First name" in df_cols
156+
assert "Last name" in df_cols
157+
assert df["Start date"].dtype.name == "datetime64[ns]"
158+
assert df["Start time"].dtype.name == "timedelta64[ns]"
159+
assert df["Duration"].dtype.name == "timedelta64[ns]"
160+
assert df["Hours"].dtype.name == "float64"
161+
162+
df = _prepare_input(toggl_file, column_renames={"Start date": "SD", "Start time": "ST", "Duration": "D"})
163+
164+
assert "Start date" not in df.columns
165+
assert "Start time" not in df.columns
166+
assert "Duration" not in df.columns
167+
assert df["SD"].dtype.name == "datetime64[ns]"
168+
assert df["ST"].dtype.name == "timedelta64[ns]"
169+
assert df["D"].dtype.name == "timedelta64[ns]"
170+
171+
172+
def test_convert_to_harvest_mocked(toggl_file, spy_files, mock_google_user_info):
173+
mock_google_user_info.return_value = {}
174+
175+
convert_to_harvest(toggl_file, client_name=None)
176+
152177
spy_files.write_csv.assert_called_once()
153178
call_args = spy_files.write_csv.call_args
154179
assert sys.stdout in call_args[0]

0 commit comments

Comments
 (0)