From 5ccfe90c9d13411aaec57f138e1dbbf4f30bdaf8 Mon Sep 17 00:00:00 2001 From: FabianaMFZ Date: Sun, 5 May 2024 00:26:47 +0100 Subject: [PATCH 1/2] 'lab' --- lab_sql_7.ipynb | 851 ++++++++++++++++++++++++++++++++++++++++++++++++ lab_sql_7.sql | 50 +++ 2 files changed, 901 insertions(+) create mode 100644 lab_sql_7.ipynb create mode 100644 lab_sql_7.sql diff --git a/lab_sql_7.ipynb b/lab_sql_7.ipynb new file mode 100644 index 0000000..89fcde5 --- /dev/null +++ b/lab_sql_7.ipynb @@ -0,0 +1,851 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "a93ae413", + "metadata": {}, + "outputs": [], + "source": [ + "# Create a Python connection with SQL database and retrieve the results of the last two queries in sql file as dataframes.\n", + "# Hint: You can store the results from the two queries in two separate dataframes.\n", + "\n", + "# Write a function that checks if customer borrowed more or less films in the month of June as compared to May.\n", + "# Hint: For this part, you can create a join between the two dataframes created before, using the merge function available for pandas dataframes. " + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "f8e81b34", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import pymysql\n", + "from sqlalchemy import create_engine" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "1f33781a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "········\n" + ] + } + ], + "source": [ + "import getpass\n", + "password = getpass.getpass()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "fea6ab84", + "metadata": {}, + "outputs": [], + "source": [ + "connection_string = 'mysql+pymysql://root:' + password + '@localhost/sakila'\n", + "engine = create_engine(connection_string)" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "a69c861b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
rental_idrental_dateinventory_idcustomer_idreturn_datestaff_idlast_update
012005-05-24 22:53:303671302005-05-26 22:04:3012006-02-15 21:30:53
122005-05-24 22:54:3315254592005-05-28 19:40:3312006-02-15 21:30:53
232005-05-24 23:03:3917114082005-06-01 22:12:3912006-02-15 21:30:53
342005-05-24 23:04:4124523332005-06-03 01:43:4122006-02-15 21:30:53
452005-05-24 23:05:2120792222005-06-02 04:33:2112006-02-15 21:30:53
........................
16039160452005-08-23 22:25:26772142005-08-25 23:54:2612006-02-15 21:30:53
16040160462005-08-23 22:26:474364742005-08-27 18:02:4722006-02-15 21:30:53
16041160472005-08-23 22:42:4820881142005-08-25 02:48:4822006-02-15 21:30:53
16042160482005-08-23 22:43:0720191032005-08-31 21:33:0712006-02-15 21:30:53
16043160492005-08-23 22:50:1226663932005-08-30 01:01:1222006-02-15 21:30:53
\n", + "

16044 rows × 7 columns

\n", + "
" + ], + "text/plain": [ + " rental_id rental_date inventory_id customer_id \\\n", + "0 1 2005-05-24 22:53:30 367 130 \n", + "1 2 2005-05-24 22:54:33 1525 459 \n", + "2 3 2005-05-24 23:03:39 1711 408 \n", + "3 4 2005-05-24 23:04:41 2452 333 \n", + "4 5 2005-05-24 23:05:21 2079 222 \n", + "... ... ... ... ... \n", + "16039 16045 2005-08-23 22:25:26 772 14 \n", + "16040 16046 2005-08-23 22:26:47 4364 74 \n", + "16041 16047 2005-08-23 22:42:48 2088 114 \n", + "16042 16048 2005-08-23 22:43:07 2019 103 \n", + "16043 16049 2005-08-23 22:50:12 2666 393 \n", + "\n", + " return_date staff_id last_update \n", + "0 2005-05-26 22:04:30 1 2006-02-15 21:30:53 \n", + "1 2005-05-28 19:40:33 1 2006-02-15 21:30:53 \n", + "2 2005-06-01 22:12:39 1 2006-02-15 21:30:53 \n", + "3 2005-06-03 01:43:41 2 2006-02-15 21:30:53 \n", + "4 2005-06-02 04:33:21 1 2006-02-15 21:30:53 \n", + "... ... ... ... \n", + "16039 2005-08-25 23:54:26 1 2006-02-15 21:30:53 \n", + "16040 2005-08-27 18:02:47 2 2006-02-15 21:30:53 \n", + "16041 2005-08-25 02:48:48 2 2006-02-15 21:30:53 \n", + "16042 2005-08-31 21:33:07 1 2006-02-15 21:30:53 \n", + "16043 2005-08-30 01:01:12 2 2006-02-15 21:30:53 \n", + "\n", + "[16044 rows x 7 columns]" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data = pd.read_sql_query('select * from sakila.rental', engine)\n", + "data" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "7c124ce0", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customer_idrental_count
01978
11097
25067
3196
4536
.........
5155801
5165821
5175901
5185951
5195991
\n", + "

520 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " customer_id rental_count\n", + "0 197 8\n", + "1 109 7\n", + "2 506 7\n", + "3 19 6\n", + "4 53 6\n", + ".. ... ...\n", + "515 580 1\n", + "516 582 1\n", + "517 590 1\n", + "518 595 1\n", + "519 599 1\n", + "\n", + "[520 rows x 2 columns]" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Check the number of rentals for each customer for May\n", + "\n", + "# Create temporary table\n", + "create_table_query = '''\n", + "CREATE TEMPORARY TABLE sakila.rentals_may AS\n", + "SELECT \n", + " customer_id, \n", + " COUNT(rental_id) AS rental_count\n", + "FROM \n", + " sakila.rental\n", + "WHERE \n", + " MONTH(rental_date) = 5\n", + "GROUP BY \n", + " customer_id\n", + "ORDER BY \n", + " rental_count DESC\n", + "'''\n", + "\n", + "engine = create_engine(connection_string)\n", + "with engine.connect() as connection:\n", + " connection.execute(create_table_query)\n", + "\n", + "# Query the temporary table to get the data\n", + "select_query = 'SELECT * FROM sakila.rentals_may'\n", + "\n", + "rentals_may = pd.read_sql_query(select_query, engine)\n", + "rentals_may" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "b45cfe27", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customer_idrental_count
03111
145410
22139
32679
42959
.........
5855491
5865551
5875641
5885801
5895981
\n", + "

590 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " customer_id rental_count\n", + "0 31 11\n", + "1 454 10\n", + "2 213 9\n", + "3 267 9\n", + "4 295 9\n", + ".. ... ...\n", + "585 549 1\n", + "586 555 1\n", + "587 564 1\n", + "588 580 1\n", + "589 598 1\n", + "\n", + "[590 rows x 2 columns]" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Check the number of rentals for each customer for June\n", + "\n", + "# Create temporary table\n", + "create_table_query2 = '''\n", + "CREATE TEMPORARY TABLE sakila.rentals_june AS\n", + "SELECT \n", + " customer_id, \n", + " COUNT(rental_id) AS rental_count\n", + "FROM \n", + " sakila.rental\n", + "WHERE \n", + " MONTH(rental_date) = 6\n", + "GROUP BY \n", + " customer_id\n", + "ORDER BY \n", + " rental_count DESC\n", + "'''\n", + "\n", + "engine = create_engine(connection_string)\n", + "with engine.connect() as connection:\n", + " connection.execute(create_table_query2)\n", + "\n", + "# Query the temporary table to get the data \n", + "select_query2 = 'SELECT * FROM sakila.rentals_june'\n", + "\n", + "# Corrected variable name\n", + "rentals_june = pd.read_sql_query(select_query2, engine)\n", + "rentals_june" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "5315a6e9", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customer_idrental_count_mayrental_count_june
01978.08.0
11097.05.0
25067.05.0
3196.03.0
4536.05.0
............
593335NaN1.0
594370NaN1.0
595487NaN1.0
596555NaN1.0
597598NaN1.0
\n", + "

598 rows × 3 columns

\n", + "
" + ], + "text/plain": [ + " customer_id rental_count_may rental_count_june\n", + "0 197 8.0 8.0\n", + "1 109 7.0 5.0\n", + "2 506 7.0 5.0\n", + "3 19 6.0 3.0\n", + "4 53 6.0 5.0\n", + ".. ... ... ...\n", + "593 335 NaN 1.0\n", + "594 370 NaN 1.0\n", + "595 487 NaN 1.0\n", + "596 555 NaN 1.0\n", + "597 598 NaN 1.0\n", + "\n", + "[598 rows x 3 columns]" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "joined_rentals = rentals_may.merge(rentals_june, on='customer_id', suffixes=('_may', '_june'), how='outer')\n", + "joined_rentals" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "1cbbe8c5", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customer_idrental_count_mayrental_count_junerentals_difference
01978.08.0borrowed_same_in_june
11097.05.0borrowed_less_in_june
25067.05.0borrowed_less_in_june
3196.03.0borrowed_less_in_june
4536.05.0borrowed_less_in_june
...............
5155801.01.0borrowed_same_in_june
5165821.03.0borrowed_more_in_june
5175901.05.0borrowed_more_in_june
5185951.02.0borrowed_more_in_june
5195991.04.0borrowed_more_in_june
\n", + "

512 rows × 4 columns

\n", + "
" + ], + "text/plain": [ + " customer_id rental_count_may rental_count_june rentals_difference\n", + "0 197 8.0 8.0 borrowed_same_in_june\n", + "1 109 7.0 5.0 borrowed_less_in_june\n", + "2 506 7.0 5.0 borrowed_less_in_june\n", + "3 19 6.0 3.0 borrowed_less_in_june\n", + "4 53 6.0 5.0 borrowed_less_in_june\n", + ".. ... ... ... ...\n", + "515 580 1.0 1.0 borrowed_same_in_june\n", + "516 582 1.0 3.0 borrowed_more_in_june\n", + "517 590 1.0 5.0 borrowed_more_in_june\n", + "518 595 1.0 2.0 borrowed_more_in_june\n", + "519 599 1.0 4.0 borrowed_more_in_june\n", + "\n", + "[512 rows x 4 columns]" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Function to compare rental counts for May and June\n", + "def compare_rentals(df):\n", + " # Create a new column to indicate the comparison result\n", + " df['rentals_difference'] = df.apply(\n", + " lambda row: 'borrowed_more_in_june' if row['rental_count_june'] > row['rental_count_may'] else (\n", + " 'borrowed_less_in_june' if row['rental_count_june'] < row['rental_count_may'] else 'borrowed_same_in_june'), axis=1)\n", + " return df\n", + "\n", + "rentals_diff = compare_rentals(joined_rentals)\n", + "rentals_diff = rentals_diff.dropna()\n", + "rentals_diff" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.5" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/lab_sql_7.sql b/lab_sql_7.sql new file mode 100644 index 0000000..cbf0326 --- /dev/null +++ b/lab_sql_7.sql @@ -0,0 +1,50 @@ +-- Create a table rentals_may to store the data from rental table with information for the month of May. +create temporary table sakila.rentals_may as +select * +from sakila.rental +where month(rental_date) = 5; + +select * +from sakila.rentals_may; + +-- Create a table rentals_june to store the data from rental table with information for the month of June. +create temporary table sakila.rentals_june as +select * +from sakila.rental +where month(rental_date) = 6; + +select * +from sakila.rentals_june; + +-- Check the number of rentals for each customer for May. +select customer_id, count(rental_id) +from sakila.rentals_may +group by customer_id +order by count(rental_id) desc; + +-- Check the number of rentals for each customer for June. +select customer_id, count(rental_id) +from sakila.rentals_june +group by customer_id +order by count(rental_id) desc; + +-- Join temporary tables for May and June. + +CREATE TEMPORARY TABLE sakila.joined_rentals as +SELECT m.customer_id, m.rental_id as rental_id_may, j.rental_id as rental_id_june +FROM sakila.rentals_may m +LEFT JOIN sakila.rentals_june j + ON m.customer_id = j.customer_id; + +select * from sakila.joined_rentals; + +CREATE TEMPORARY TABLE sakila.count_joined_rentals as +select customer_id, count(rental_id_may) as rentals_may , count(rental_id_may) as rentals_june, +case + when count(rentals_june) > count(rentals_may) then 'borrowed_more_in_june' + when count(rentals_june) < count(rentals_may) then 'borrowed_less_in_june' + when count(rentals_june) = count(rentals_may) then 'borrowed_same_in_june' +end as rentals_difference +from sakila.joined_rentals +group by customer_id +order by count(rentals_difference) desc; \ No newline at end of file From 98f00dd5e88fc3909038361d502c33b99e6fc7fd Mon Sep 17 00:00:00 2001 From: FabianaMFZ Date: Sun, 5 May 2024 00:30:09 +0100 Subject: [PATCH 2/2] 'lab' --- lab_sql_7.sql | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/lab_sql_7.sql b/lab_sql_7.sql index cbf0326..39c4e2a 100644 --- a/lab_sql_7.sql +++ b/lab_sql_7.sql @@ -36,15 +36,11 @@ FROM sakila.rentals_may m LEFT JOIN sakila.rentals_june j ON m.customer_id = j.customer_id; -select * from sakila.joined_rentals; - -CREATE TEMPORARY TABLE sakila.count_joined_rentals as -select customer_id, count(rental_id_may) as rentals_may , count(rental_id_may) as rentals_june, +select customer_id,count(rental_id_may) as rentals_may , count(rental_id_may) as rentals_june, case - when count(rentals_june) > count(rentals_may) then 'borrowed_more_in_june' - when count(rentals_june) < count(rentals_may) then 'borrowed_less_in_june' - when count(rentals_june) = count(rentals_may) then 'borrowed_same_in_june' -end as rentals_difference + when count(rental_id_june) > count(rental_id_may) then 'borrowed_more_in_june' + when count(rental_id_june) < count(rental_id_may) then 'borrowed_less_in_june' + when count(rental_id_june) = count(rental_id_may) then 'borrowed_same_in_june' +end as rentals_difference from sakila.joined_rentals -group by customer_id -order by count(rentals_difference) desc; \ No newline at end of file +group by customer_id;