Skip to content

Commit a786c80

Browse files
committed
feat: data combinations on primitive types
1 parent 7bf784b commit a786c80

File tree

1 file changed

+362
-0
lines changed

1 file changed

+362
-0
lines changed
Lines changed: 362 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,362 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": 1,
6+
"id": "721fe4af-316f-482a-97a5-e1936208891f",
7+
"metadata": {},
8+
"outputs": [],
9+
"source": [
10+
"# Introduce data as list of tuples splitted by date\n",
11+
"\n",
12+
"orders_2022_02_04 = [\n",
13+
" (9423517, '2022-02-04', 9001),\n",
14+
" (4626232, '2022-02-04', 9003),\n",
15+
" (9423534, '2022-02-04', 9001)\n",
16+
"]\n",
17+
"\n",
18+
"orders_2022_02_05 = [\n",
19+
" (9423679, '2022-02-05', 9002),\n",
20+
" (4626377, '2022-02-05', 9003),\n",
21+
" (4626412, '2022-02-05', 9004)\n",
22+
"]\n",
23+
"\n",
24+
"orders_2022_02_06 = [\n",
25+
" (9423783, '2022-02-06', 9002),\n",
26+
" (4626490, '2022-02-06', 9004)\n",
27+
"]"
28+
]
29+
},
30+
{
31+
"cell_type": "code",
32+
"execution_count": 2,
33+
"id": "9fbc4405-e384-4767-8e51-5ac170d0b936",
34+
"metadata": {},
35+
"outputs": [],
36+
"source": [
37+
"# Join into a single list using `+` operator\n",
38+
"orders = orders_2022_02_04 + orders_2022_02_05 + orders_2022_02_06"
39+
]
40+
},
41+
{
42+
"cell_type": "code",
43+
"execution_count": 3,
44+
"id": "9650e7e8-34d8-4786-98d3-00a2f39cb55c",
45+
"metadata": {},
46+
"outputs": [
47+
{
48+
"data": {
49+
"text/plain": [
50+
"[(9423517, '2022-02-04', 9001),\n",
51+
" (4626232, '2022-02-04', 9003),\n",
52+
" (9423534, '2022-02-04', 9001),\n",
53+
" (9423679, '2022-02-05', 9002),\n",
54+
" (4626377, '2022-02-05', 9003),\n",
55+
" (4626412, '2022-02-05', 9004),\n",
56+
" (9423783, '2022-02-06', 9002),\n",
57+
" (4626490, '2022-02-06', 9004)]"
58+
]
59+
},
60+
"metadata": {},
61+
"output_type": "display_data"
62+
}
63+
],
64+
"source": [
65+
"display(orders)"
66+
]
67+
},
68+
{
69+
"cell_type": "code",
70+
"execution_count": 4,
71+
"id": "dca5fea0-132d-46c0-8f75-1cef5101be40",
72+
"metadata": {},
73+
"outputs": [],
74+
"source": [
75+
"# Dictionaries can be \"combined\" using `**` operator\n",
76+
"\n",
77+
"extra_fields_9423517 = {\n",
78+
" 'ShippingInstrustions' : {\n",
79+
" 'name' : 'John Silver',\n",
80+
" 'Phone' : [\n",
81+
" {\n",
82+
" 'type' : 'Office',\n",
83+
" 'number' : '809-123-9309'\n",
84+
" },\n",
85+
" {\n",
86+
" 'type' : 'Mobile',\n",
87+
" 'number' : '417-123-4567'\n",
88+
" }\n",
89+
" ]\n",
90+
" }\n",
91+
"}\n",
92+
"\n",
93+
"order_9423517 = {\n",
94+
" 'OrderNo': 9423517,\n",
95+
" 'Date': '2022-02-04',\n",
96+
" 'Empno': 9001\n",
97+
"}\n",
98+
"\n",
99+
"order_9423517 = {\n",
100+
" **order_9423517,\n",
101+
" **extra_fields_9423517\n",
102+
"}"
103+
]
104+
},
105+
{
106+
"cell_type": "code",
107+
"execution_count": 5,
108+
"id": "2cf0aad4-e3da-4e35-8e13-a3d204332cfd",
109+
"metadata": {},
110+
"outputs": [
111+
{
112+
"data": {
113+
"text/plain": [
114+
"{'OrderNo': 9423517,\n",
115+
" 'Date': '2022-02-04',\n",
116+
" 'Empno': 9001,\n",
117+
" 'ShippingInstrustions': {'name': 'John Silver',\n",
118+
" 'Phone': [{'type': 'Office', 'number': '809-123-9309'},\n",
119+
" {'type': 'Mobile', 'number': '417-123-4567'}]}}"
120+
]
121+
},
122+
"metadata": {},
123+
"output_type": "display_data"
124+
}
125+
],
126+
"source": [
127+
"display(order_9423517)"
128+
]
129+
},
130+
{
131+
"cell_type": "code",
132+
"execution_count": 6,
133+
"id": "f3c1d34b-7a0c-49fe-98c7-984bb12d2d96",
134+
"metadata": {},
135+
"outputs": [
136+
{
137+
"data": {
138+
"text/plain": [
139+
"[(9423517, '2022-02-04', 9001, 'Jeans', 'Rip Curl', 87.0, 1),\n",
140+
" (9423517, '2022-02-04', 9001, 'Jacket', 'The North Face', 112.0, 1),\n",
141+
" (4626232, '2022-02-04', 9003, 'Socks', 'Vans', 15.0, 1),\n",
142+
" (4626232, '2022-02-04', 9003, 'Jeans', 'Quiksilver', 82.0, 1),\n",
143+
" (9423534, '2022-02-04', 9001, 'Socks', 'DC', 10.0, 2),\n",
144+
" (9423534, '2022-02-04', 9001, 'Socks', 'Quiksilver', 12.0, 2),\n",
145+
" (9423679, '2022-02-05', 9002, 'T-shirt', 'Patagonia', 35.0, 1),\n",
146+
" (4626377, '2022-02-05', 9003, 'Hoody', 'Animal', 44.0, 1),\n",
147+
" (4626377, '2022-02-05', 9003, 'Cargo Shorts', 'Animal', 38.0, 1),\n",
148+
" (4626412, '2022-02-05', 9004, 'Shirt', 'Volcom', 78.0, 1),\n",
149+
" (9423783, '2022-02-06', 9002, 'Boxer Shorts', 'Superdry', 30.0, 2),\n",
150+
" (9423783, '2022-02-06', 9002, 'Shorts', 'Globe', 26.0, 1),\n",
151+
" (4626490, '2022-02-06', 9004, 'Cargo Shorts', 'Billabong', 54.0, 1),\n",
152+
" (4626490, '2022-02-06', 9004, 'Sweater', 'Dickies', 56.0, 1)]"
153+
]
154+
},
155+
"metadata": {},
156+
"output_type": "display_data"
157+
}
158+
],
159+
"source": [
160+
"details = [\n",
161+
" (9423517, 'Jeans', 'Rip Curl', 87.0, 1),\n",
162+
" (9423517, 'Jacket', 'The North Face', 112.0, 1),\n",
163+
" (4626232, 'Socks', 'Vans', 15.0, 1),\n",
164+
" (4626232, 'Jeans', 'Quiksilver', 82.0, 1),\n",
165+
" (9423534, 'Socks', 'DC', 10.0, 2),\n",
166+
" (9423534, 'Socks', 'Quiksilver', 12.0, 2),\n",
167+
" (9423679, 'T-shirt', 'Patagonia', 35.0, 1),\n",
168+
" (4626377, 'Hoody', 'Animal', 44.0, 1),\n",
169+
" (4626377, 'Cargo Shorts', 'Animal', 38.0, 1),\n",
170+
" (4626412, 'Shirt', 'Volcom', 78.0, 1),\n",
171+
" (9423783, 'Boxer Shorts', 'Superdry', 30.0, 2),\n",
172+
" (9423783, 'Shorts', 'Globe', 26.0, 1),\n",
173+
" (4626490, 'Cargo Shorts', 'Billabong', 54.0, 1),\n",
174+
" (4626490, 'Sweater', 'Dickies', 56.0, 1)\n",
175+
"]\n",
176+
"\n",
177+
"# Order Details must contain the tuples with matching order numbers, merge them into a single tuple, and store all the tuples in a list\n",
178+
"order_details = []\n",
179+
"\n",
180+
"for o in orders:\n",
181+
" for d in details:\n",
182+
" if o[0] == d[0]:\n",
183+
" # skip the first element as it is the order number which is already insterted by `o`\n",
184+
" order_details.append(o + d[1:])\n",
185+
"\n",
186+
"display(order_details)"
187+
]
188+
},
189+
{
190+
"cell_type": "markdown",
191+
"id": "e1423d07-ccd2-48a8-ad5e-961b827a7d63",
192+
"metadata": {},
193+
"source": [
194+
"In case we have missing \"columns\" (fields) in part of our data we might want to still join both sets but a default value must be specified at the moment of performing this join."
195+
]
196+
},
197+
{
198+
"cell_type": "code",
199+
"execution_count": 7,
200+
"id": "b44149a5-b26f-4882-b453-8ef306b9e63d",
201+
"metadata": {},
202+
"outputs": [],
203+
"source": [
204+
"# Append an item that doesnt matches order data\n",
205+
"details.append((4626592, 'Shorts', 'Protest', 48.0, 1))"
206+
]
207+
},
208+
{
209+
"cell_type": "code",
210+
"execution_count": 8,
211+
"id": "2e49208b-2387-4e1d-b896-0058bc3e097e",
212+
"metadata": {},
213+
"outputs": [
214+
{
215+
"ename": "IndexError",
216+
"evalue": "list index out of range",
217+
"output_type": "error",
218+
"traceback": [
219+
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
220+
"\u001b[0;31mIndexError\u001b[0m Traceback (most recent call last)",
221+
"Cell \u001b[0;32mIn[8], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# Compact version of aboves loop\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m orders_details \u001b[38;5;241m=\u001b[39m [[o \u001b[38;5;28;01mfor\u001b[39;00m o \u001b[38;5;129;01min\u001b[39;00m orders \u001b[38;5;28;01mif\u001b[39;00m d[\u001b[38;5;241m0\u001b[39m] \u001b[38;5;241m==\u001b[39m o][\u001b[38;5;241m0\u001b[39m] \u001b[38;5;241m+\u001b[39m d[\u001b[38;5;241m1\u001b[39m:] \u001b[38;5;28;01mfor\u001b[39;00m d \u001b[38;5;129;01min\u001b[39;00m details]\n",
222+
"Cell \u001b[0;32mIn[8], line 2\u001b[0m, in \u001b[0;36m<listcomp>\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# Compact version of aboves loop\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m orders_details \u001b[38;5;241m=\u001b[39m [\u001b[43m[\u001b[49m\u001b[43mo\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mo\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43morders\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mif\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43md\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m==\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mo\u001b[49m\u001b[43m]\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m]\u001b[49m \u001b[38;5;241m+\u001b[39m d[\u001b[38;5;241m1\u001b[39m:] \u001b[38;5;28;01mfor\u001b[39;00m d \u001b[38;5;129;01min\u001b[39;00m details]\n",
223+
"\u001b[0;31mIndexError\u001b[0m: list index out of range"
224+
]
225+
}
226+
],
227+
"source": [
228+
"# Compact version of aboves loop\n",
229+
"orders_details = [[o for o in orders if d[0] == o][0] + d[1:] for d in details]"
230+
]
231+
},
232+
{
233+
"cell_type": "markdown",
234+
"id": "824f9684-034c-4350-b288-098cfba14d8b",
235+
"metadata": {},
236+
"source": [
237+
"When theres a unmatched order number the slice/range operation fails"
238+
]
239+
},
240+
{
241+
"cell_type": "code",
242+
"execution_count": 9,
243+
"id": "e1fd989c-ef73-4a85-8b40-5b9accdcc59f",
244+
"metadata": {},
245+
"outputs": [
246+
{
247+
"data": {
248+
"text/plain": [
249+
"[(9423517, '2022-02-04', 9001, 'Jeans', 'Rip Curl', 87.0, 1),\n",
250+
" (9423517, '2022-02-04', 9001, 'Jacket', 'The North Face', 112.0, 1),\n",
251+
" (4626232, '2022-02-04', 9003, 'Socks', 'Vans', 15.0, 1),\n",
252+
" (4626232, '2022-02-04', 9003, 'Jeans', 'Quiksilver', 82.0, 1),\n",
253+
" (9423534, '2022-02-04', 9001, 'Socks', 'DC', 10.0, 2),\n",
254+
" (9423534, '2022-02-04', 9001, 'Socks', 'Quiksilver', 12.0, 2),\n",
255+
" (9423679, '2022-02-05', 9002, 'T-shirt', 'Patagonia', 35.0, 1),\n",
256+
" (4626377, '2022-02-05', 9003, 'Hoody', 'Animal', 44.0, 1),\n",
257+
" (4626377, '2022-02-05', 9003, 'Cargo Shorts', 'Animal', 38.0, 1),\n",
258+
" (4626412, '2022-02-05', 9004, 'Shirt', 'Volcom', 78.0, 1),\n",
259+
" (9423783, '2022-02-06', 9002, 'Boxer Shorts', 'Superdry', 30.0, 2),\n",
260+
" (9423783, '2022-02-06', 9002, 'Shorts', 'Globe', 26.0, 1),\n",
261+
" (4626490, '2022-02-06', 9004, 'Cargo Shorts', 'Billabong', 54.0, 1),\n",
262+
" (4626490, '2022-02-06', 9004, 'Sweater', 'Dickies', 56.0, 1)]"
263+
]
264+
},
265+
"metadata": {},
266+
"output_type": "display_data"
267+
}
268+
],
269+
"source": [
270+
"# Fixes the index out of bounds issue but stills doesnt includes the new \n",
271+
"orders_details = [[o for o in orders if d[0] in o][0] + d[1:] for d in details if d[0] in [o[0] for o in orders]]\n",
272+
"display(orders_details)"
273+
]
274+
},
275+
{
276+
"cell_type": "code",
277+
"execution_count": 10,
278+
"id": "c0a67d43-1db7-458f-8d16-b1c713952145",
279+
"metadata": {},
280+
"outputs": [
281+
{
282+
"data": {
283+
"text/plain": [
284+
"[(9423517, '2022-02-04', 9001, 'Jeans', 'Rip Curl', 87.0, 1),\n",
285+
" (9423517, '2022-02-04', 9001, 'Jacket', 'The North Face', 112.0, 1),\n",
286+
" (4626232, '2022-02-04', 9003, 'Socks', 'Vans', 15.0, 1),\n",
287+
" (4626232, '2022-02-04', 9003, 'Jeans', 'Quiksilver', 82.0, 1),\n",
288+
" (9423534, '2022-02-04', 9001, 'Socks', 'DC', 10.0, 2),\n",
289+
" (9423534, '2022-02-04', 9001, 'Socks', 'Quiksilver', 12.0, 2),\n",
290+
" (9423679, '2022-02-05', 9002, 'T-shirt', 'Patagonia', 35.0, 1),\n",
291+
" (4626377, '2022-02-05', 9003, 'Hoody', 'Animal', 44.0, 1),\n",
292+
" (4626377, '2022-02-05', 9003, 'Cargo Shorts', 'Animal', 38.0, 1),\n",
293+
" (4626412, '2022-02-05', 9004, 'Shirt', 'Volcom', 78.0, 1),\n",
294+
" (9423783, '2022-02-06', 9002, 'Boxer Shorts', 'Superdry', 30.0, 2),\n",
295+
" (9423783, '2022-02-06', 9002, 'Shorts', 'Globe', 26.0, 1),\n",
296+
" (4626490, '2022-02-06', 9004, 'Cargo Shorts', 'Billabong', 54.0, 1),\n",
297+
" (4626490, '2022-02-06', 9004, 'Sweater', 'Dickies', 56.0, 1),\n",
298+
" (4626592, None, None, 'Shorts', 'Protest', 48.0, 1)]"
299+
]
300+
},
301+
"metadata": {},
302+
"output_type": "display_data"
303+
}
304+
],
305+
"source": [
306+
"# In order to include the missing order we must set a de-fault value when the orders list is missing an entry for a provided order detail\n",
307+
"order_details_right = [[o for o in orders if d[0] in o][0] + d[1:] if d[0] in [o[0] for o in orders] else (d[0], None, None) + d[1:] for d in details]\n",
308+
"display(order_details_right)"
309+
]
310+
},
311+
{
312+
"cell_type": "markdown",
313+
"id": "6ba43976-944a-4646-9cd4-bf4c0b74a5d9",
314+
"metadata": {},
315+
"source": [
316+
"Get a sum over all items"
317+
]
318+
},
319+
{
320+
"cell_type": "code",
321+
"execution_count": 11,
322+
"id": "5217b466-b184-4a54-84c6-acf882759bf6",
323+
"metadata": {},
324+
"outputs": [
325+
{
326+
"data": {
327+
"text/plain": [
328+
"779.0"
329+
]
330+
},
331+
"execution_count": 11,
332+
"metadata": {},
333+
"output_type": "execute_result"
334+
}
335+
],
336+
"source": [
337+
"sum(price * quantity for _, _, _, _, _, price, quantity in order_details_right)"
338+
]
339+
}
340+
],
341+
"metadata": {
342+
"kernelspec": {
343+
"display_name": "Python 3 (ipykernel)",
344+
"language": "python",
345+
"name": "python3"
346+
},
347+
"language_info": {
348+
"codemirror_mode": {
349+
"name": "ipython",
350+
"version": 3
351+
},
352+
"file_extension": ".py",
353+
"mimetype": "text/x-python",
354+
"name": "python",
355+
"nbconvert_exporter": "python",
356+
"pygments_lexer": "ipython3",
357+
"version": "3.9.17"
358+
}
359+
},
360+
"nbformat": 4,
361+
"nbformat_minor": 5
362+
}

0 commit comments

Comments
 (0)