Skip to content

Commit 2e45fe1

Browse files
committed
feat(toggl): simple column updates/conversions
* rename columns that can be imported as-is * add static calculated columns
1 parent 1f2a722 commit 2e45fe1

File tree

2 files changed

+100
-70
lines changed

2 files changed

+100
-70
lines changed

.env.sample

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
HARVEST_CLIENT_NAME=Client1
12
HARVEST_DATA=data/harvest-sample.csv
23
TOGGL_DATA=data/toggl-sample.csv
34
TOGGL_USER_INFO=data/toggl-user-info-sample.json

notebooks/toggl-to-harvest.ipynb

Lines changed: 99 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -1,73 +1,102 @@
11
{
2-
"cells": [
3-
{
4-
"cell_type": "code",
5-
"execution_count": null,
6-
"metadata": {},
7-
"outputs": [],
8-
"source": [
9-
"import os\n",
10-
"from pathlib import Path\n",
11-
"import pandas as pd\n",
12-
"\n",
13-
"\n",
14-
"def str_timedelta(td):\n",
15-
" \"\"\"\n",
16-
" Convert a string formatted duration (e.g. 01:30) to a timedelta.\n",
17-
" \"\"\"\n",
18-
" return pd.to_timedelta(pd.to_datetime(td, format=\"%H:%M:%S\").strftime(\"%H:%M:%S\"))\n",
19-
"\n",
20-
"\n",
21-
"DATA_DIR = Path(\"./data\")\n",
22-
"DATA_SOURCE = Path(os.environ.get(\"TOGGL_DATA\", \"./data/toggl-sample.csv\"))"
23-
]
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": null,
6+
"metadata": {},
7+
"outputs": [],
8+
"source": [
9+
"import os\n",
10+
"from pathlib import Path\n",
11+
"import pandas as pd\n",
12+
"\n",
13+
"\n",
14+
"DATA_DIR = Path(\"./data\")\n",
15+
"DATA_SOURCE = Path(os.environ.get(\"TOGGL_DATA\", \"./data/toggl-sample.csv\"))\n",
16+
"\n",
17+
"USER_INFO_FILE = os.environ.get(\"TOGGL_USER_INFO\")\n",
18+
"\n",
19+
"CLIENT_NAME = os.environ.get(\"HARVEST_CLIENT_NAME\")\n",
20+
"\n",
21+
"\n",
22+
"def str_timedelta(td):\n",
23+
" \"\"\"\n",
24+
" Convert a string formatted duration (e.g. 01:30) to a timedelta.\n",
25+
" \"\"\"\n",
26+
" return pd.to_timedelta(pd.to_datetime(td, format=\"%H:%M:%S\").strftime(\"%H:%M:%S\"))\n",
27+
"\n"
28+
]
29+
},
30+
{
31+
"cell_type": "code",
32+
"execution_count": null,
33+
"metadata": {},
34+
"outputs": [],
35+
"source": [
36+
"# assign category dtype for efficiency on repeating text columns\n",
37+
"dtypes = {\n",
38+
" \"Email\": \"category\",\n",
39+
" \"Task\": \"category\",\n",
40+
" \"Client\": \"category\"\n",
41+
"}\n",
42+
"# skip reading the columns we don't care about for Harvest\n",
43+
"cols = list(dtypes) + [\n",
44+
" \"Start date\",\n",
45+
" \"Start time\",\n",
46+
" \"Duration\",\n",
47+
"]\n",
48+
"# read CSV file, parsing dates and times\n",
49+
"source = pd.read_csv(DATA_SOURCE, dtype=dtypes, usecols=cols, parse_dates=[\"Start date\"], cache_dates=True)\n",
50+
"source[\"Start time\"] = source[\"Start time\"].apply(str_timedelta)\n",
51+
"source[\"Duration\"] = source[\"Duration\"].apply(str_timedelta)\n",
52+
"source.sort_values([\"Start date\", \"Start time\", \"Email\"], inplace=True)\n",
53+
"source.dtypes"
54+
]
55+
},
56+
{
57+
"cell_type": "code",
58+
"execution_count": null,
59+
"metadata": {},
60+
"outputs": [],
61+
"source": [
62+
"# rename columns that can be imported as-is\n",
63+
"source.rename(columns={\"Task\": \"Project\", \"Description\": \"Notes\", \"Start date\": \"Date\"}, inplace=True)\n",
64+
"source.dtypes"
65+
]
66+
},
67+
{
68+
"cell_type": "code",
69+
"execution_count": null,
70+
"metadata": {},
71+
"outputs": [],
72+
"source": [
73+
"# update static calculated columns\n",
74+
"source[\"Client\"] = CLIENT_NAME\n",
75+
"source[\"Client\"] = source[\"Client\"].astype(\"category\")\n",
76+
"source[\"Task\"] = \"Project Consulting\"\n",
77+
"source[\"Task\"] = source[\"Task\"].astype(\"category\")"
78+
]
79+
}
80+
],
81+
"metadata": {
82+
"kernelspec": {
83+
"display_name": "Python 3",
84+
"language": "python",
85+
"name": "python3"
86+
},
87+
"language_info": {
88+
"codemirror_mode": {
89+
"name": "ipython",
90+
"version": 3
91+
},
92+
"file_extension": ".py",
93+
"mimetype": "text/x-python",
94+
"name": "python",
95+
"nbconvert_exporter": "python",
96+
"pygments_lexer": "ipython3",
97+
"version": "3.11.6"
98+
}
2499
},
25-
{
26-
"cell_type": "code",
27-
"execution_count": null,
28-
"metadata": {},
29-
"outputs": [],
30-
"source": [
31-
"# assign category dtype for efficiency on repeating text columns\n",
32-
"dtypes = {\n",
33-
" \"Email\": \"category\",\n",
34-
" \"Task\": \"category\",\n",
35-
" \"Client\": \"category\"\n",
36-
"}\n",
37-
"# skip reading the columns we don't care about for Harvest\n",
38-
"cols = list(dtypes) + [\n",
39-
" \"Start date\",\n",
40-
" \"Start time\",\n",
41-
" \"Duration\",\n",
42-
"]\n",
43-
"# read CSV file, parsing dates and times\n",
44-
"source = pd.read_csv(DATA_SOURCE, dtype=dtypes, usecols=cols, parse_dates=[\"Start date\"], cache_dates=True)\n",
45-
"source[\"Start time\"] = source[\"Start time\"].apply(str_timedelta)\n",
46-
"source[\"Duration\"] = source[\"Duration\"].apply(str_timedelta)\n",
47-
"source.sort_values([\"Start date\", \"Start time\", \"Email\"], inplace=True)\n",
48-
"source.dtypes"
49-
]
50-
}
51-
],
52-
"metadata": {
53-
"kernelspec": {
54-
"display_name": "Python 3",
55-
"language": "python",
56-
"name": "python3"
57-
},
58-
"language_info": {
59-
"codemirror_mode": {
60-
"name": "ipython",
61-
"version": 3
62-
},
63-
"file_extension": ".py",
64-
"mimetype": "text/x-python",
65-
"name": "python",
66-
"nbconvert_exporter": "python",
67-
"pygments_lexer": "ipython3",
68-
"version": "3.11.6"
69-
}
70-
},
71-
"nbformat": 4,
72-
"nbformat_minor": 2
100+
"nbformat": 4,
101+
"nbformat_minor": 2
73102
}

0 commit comments

Comments
 (0)