From b1c246f643c24cec3d67b4ead33667fa202084c2 Mon Sep 17 00:00:00 2001 From: filipapietrat Date: Wed, 22 Nov 2023 13:27:43 +0000 Subject: [PATCH] [lab-sql-9]Filipa T --- .../[lab-sql-9]Filipa T-checkpoint.ipynb | 546 ++++++++++++++++++ [lab-sql-9]Filipa T.ipynb | 546 ++++++++++++++++++ [lab-sql-9]Filipa T.sql | 56 ++ 3 files changed, 1148 insertions(+) create mode 100644 .ipynb_checkpoints/[lab-sql-9]Filipa T-checkpoint.ipynb create mode 100644 [lab-sql-9]Filipa T.ipynb create mode 100644 [lab-sql-9]Filipa T.sql diff --git a/.ipynb_checkpoints/[lab-sql-9]Filipa T-checkpoint.ipynb b/.ipynb_checkpoints/[lab-sql-9]Filipa T-checkpoint.ipynb new file mode 100644 index 0000000..07e4688 --- /dev/null +++ b/.ipynb_checkpoints/[lab-sql-9]Filipa T-checkpoint.ipynb @@ -0,0 +1,546 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 56, + "id": "3367cdce", + "metadata": {}, + "outputs": [], + "source": [ + "import pymysql\n", + "from sqlalchemy import create_engine\n", + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "id": "c6575e39", + "metadata": {}, + "outputs": [], + "source": [ + "#Create a Python connection with SQL database and retrieve the results of the last two queries (also mentioned below) as dataframes:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "id": "1264f188", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "········\n" + ] + } + ], + "source": [ + "import getpass\n", + "password = getpass.getpass()" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "id": "a503596d", + "metadata": {}, + "outputs": [], + "source": [ + "# Check the number of rentals for each customer for May" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "id": "f5d4568b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customer_idrental_count
01978
15067
21097
32696
42396
.........
5154311
5163511
517101
5181361
519611
\n", + "

520 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " customer_id rental_count\n", + "0 197 8\n", + "1 506 7\n", + "2 109 7\n", + "3 269 6\n", + "4 239 6\n", + ".. ... ...\n", + "515 431 1\n", + "516 351 1\n", + "517 10 1\n", + "518 136 1\n", + "519 61 1\n", + "\n", + "[520 rows x 2 columns]" + ] + }, + "execution_count": 60, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "connection_string = 'mysql+pymysql://root:' + password + '@localhost/bank'\n", + "engine = create_engine(connection_string)\n", + "may_data = pd.read_sql_query('select customer_id, count(rental_id) as rental_count from sakila.rentals_may group by customer_id order by rental_count desc', engine)\n", + "may_data" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "id": "f4242747", + "metadata": {}, + "outputs": [], + "source": [ + "# Check the number of rentals for each customer for June" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "id": "6981e1a8", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customer_idrental_count
03111
145410
23299
32959
45619
.........
5854251
5864121
5873351
588221
5891261
\n", + "

590 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " customer_id rental_count\n", + "0 31 11\n", + "1 454 10\n", + "2 329 9\n", + "3 295 9\n", + "4 561 9\n", + ".. ... ...\n", + "585 425 1\n", + "586 412 1\n", + "587 335 1\n", + "588 22 1\n", + "589 126 1\n", + "\n", + "[590 rows x 2 columns]" + ] + }, + "execution_count": 62, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "connection_string = 'mysql+pymysql://root:' + password + '@localhost/bank'\n", + "engine = create_engine(connection_string)\n", + "june_data = pd.read_sql_query('select customer_id, count(rental_id) as rental_count from sakila.rentals_june group by customer_id order by rental_count desc', engine)\n", + "june_data" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "id": "7e166361", + "metadata": {}, + "outputs": [], + "source": [ + "# Write a function that checks if customer borrowed more or less films in the month of June as compared to May." + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "id": "6da36a5f", + "metadata": {}, + "outputs": [], + "source": [ + "may_data = pd.read_sql_query('select customer_id, count(rental_id) as rental_count from sakila.rentals_may group by customer_id order by rental_count desc', engine)\n", + "\n", + "# Renamaming the columns\n", + "may_data = may_data.rename(columns={'rental_count': 'rentals_may'})\n", + "\n", + "\n", + "june_data = pd.read_sql_query('select customer_id, count(rental_id) as rental_count from sakila.rentals_june group by customer_id order by rental_count desc', engine)\n", + "\n", + "# Renamaming the columns\n", + "june_data = june_data.rename(columns={'rental_count': 'rentals_june'})\n", + "\n", + "# Merging the dfs\n", + "final_data = pd.merge(may_data, june_data, on='customer_id', how='outer')\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "id": "fbd5a870", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customer_idrentals_mayrentals_juneresult
01978.08.0It's a tie!
15067.05.0June Wins!
21097.05.0June Wins!
32696.03.0June Wins!
42396.05.0June Wins!
...............
59363NaN1.0nan alert
594598NaN1.0nan alert
595487NaN1.0nan alert
596555NaN1.0nan alert
597335NaN1.0nan alert
\n", + "

598 rows × 4 columns

\n", + "
" + ], + "text/plain": [ + " customer_id rentals_may rentals_june result\n", + "0 197 8.0 8.0 It's a tie!\n", + "1 506 7.0 5.0 June Wins!\n", + "2 109 7.0 5.0 June Wins!\n", + "3 269 6.0 3.0 June Wins!\n", + "4 239 6.0 5.0 June Wins!\n", + ".. ... ... ... ...\n", + "593 63 NaN 1.0 nan alert\n", + "594 598 NaN 1.0 nan alert\n", + "595 487 NaN 1.0 nan alert\n", + "596 555 NaN 1.0 nan alert\n", + "597 335 NaN 1.0 nan alert\n", + "\n", + "[598 rows x 4 columns]" + ] + }, + "execution_count": 65, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "for index, row in final_data.iterrows():\n", + " may_rentals = row['rentals_may']\n", + " june_rentals = row['rentals_june']\n", + "\n", + " if pd.isna(may_rentals):\n", + " final_data.loc[index, 'result'] = 'nan alert'\n", + " elif may_rentals > june_rentals:\n", + " final_data.loc[index, 'result'] = 'June Wins!'\n", + " elif may_rentals < june_rentals:\n", + " final_data.loc[index, 'result'] = 'May Wins!'\n", + " else:\n", + " final_data.loc[index, 'result'] = 'It\\'s a tie!'\n", + "\n", + "final_data" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "id": "fb77a681", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([\"It's a tie!\", 'June Wins!', 'May Wins!', 'nan alert'],\n", + " dtype=object)" + ] + }, + "execution_count": 67, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "final_data['result'].unique()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e10ad1ac", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/[lab-sql-9]Filipa T.ipynb b/[lab-sql-9]Filipa T.ipynb new file mode 100644 index 0000000..07e4688 --- /dev/null +++ b/[lab-sql-9]Filipa T.ipynb @@ -0,0 +1,546 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 56, + "id": "3367cdce", + "metadata": {}, + "outputs": [], + "source": [ + "import pymysql\n", + "from sqlalchemy import create_engine\n", + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "id": "c6575e39", + "metadata": {}, + "outputs": [], + "source": [ + "#Create a Python connection with SQL database and retrieve the results of the last two queries (also mentioned below) as dataframes:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "id": "1264f188", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "········\n" + ] + } + ], + "source": [ + "import getpass\n", + "password = getpass.getpass()" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "id": "a503596d", + "metadata": {}, + "outputs": [], + "source": [ + "# Check the number of rentals for each customer for May" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "id": "f5d4568b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customer_idrental_count
01978
15067
21097
32696
42396
.........
5154311
5163511
517101
5181361
519611
\n", + "

520 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " customer_id rental_count\n", + "0 197 8\n", + "1 506 7\n", + "2 109 7\n", + "3 269 6\n", + "4 239 6\n", + ".. ... ...\n", + "515 431 1\n", + "516 351 1\n", + "517 10 1\n", + "518 136 1\n", + "519 61 1\n", + "\n", + "[520 rows x 2 columns]" + ] + }, + "execution_count": 60, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "connection_string = 'mysql+pymysql://root:' + password + '@localhost/bank'\n", + "engine = create_engine(connection_string)\n", + "may_data = pd.read_sql_query('select customer_id, count(rental_id) as rental_count from sakila.rentals_may group by customer_id order by rental_count desc', engine)\n", + "may_data" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "id": "f4242747", + "metadata": {}, + "outputs": [], + "source": [ + "# Check the number of rentals for each customer for June" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "id": "6981e1a8", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customer_idrental_count
03111
145410
23299
32959
45619
.........
5854251
5864121
5873351
588221
5891261
\n", + "

590 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " customer_id rental_count\n", + "0 31 11\n", + "1 454 10\n", + "2 329 9\n", + "3 295 9\n", + "4 561 9\n", + ".. ... ...\n", + "585 425 1\n", + "586 412 1\n", + "587 335 1\n", + "588 22 1\n", + "589 126 1\n", + "\n", + "[590 rows x 2 columns]" + ] + }, + "execution_count": 62, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "connection_string = 'mysql+pymysql://root:' + password + '@localhost/bank'\n", + "engine = create_engine(connection_string)\n", + "june_data = pd.read_sql_query('select customer_id, count(rental_id) as rental_count from sakila.rentals_june group by customer_id order by rental_count desc', engine)\n", + "june_data" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "id": "7e166361", + "metadata": {}, + "outputs": [], + "source": [ + "# Write a function that checks if customer borrowed more or less films in the month of June as compared to May." + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "id": "6da36a5f", + "metadata": {}, + "outputs": [], + "source": [ + "may_data = pd.read_sql_query('select customer_id, count(rental_id) as rental_count from sakila.rentals_may group by customer_id order by rental_count desc', engine)\n", + "\n", + "# Renamaming the columns\n", + "may_data = may_data.rename(columns={'rental_count': 'rentals_may'})\n", + "\n", + "\n", + "june_data = pd.read_sql_query('select customer_id, count(rental_id) as rental_count from sakila.rentals_june group by customer_id order by rental_count desc', engine)\n", + "\n", + "# Renamaming the columns\n", + "june_data = june_data.rename(columns={'rental_count': 'rentals_june'})\n", + "\n", + "# Merging the dfs\n", + "final_data = pd.merge(may_data, june_data, on='customer_id', how='outer')\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "id": "fbd5a870", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customer_idrentals_mayrentals_juneresult
01978.08.0It's a tie!
15067.05.0June Wins!
21097.05.0June Wins!
32696.03.0June Wins!
42396.05.0June Wins!
...............
59363NaN1.0nan alert
594598NaN1.0nan alert
595487NaN1.0nan alert
596555NaN1.0nan alert
597335NaN1.0nan alert
\n", + "

598 rows × 4 columns

\n", + "
" + ], + "text/plain": [ + " customer_id rentals_may rentals_june result\n", + "0 197 8.0 8.0 It's a tie!\n", + "1 506 7.0 5.0 June Wins!\n", + "2 109 7.0 5.0 June Wins!\n", + "3 269 6.0 3.0 June Wins!\n", + "4 239 6.0 5.0 June Wins!\n", + ".. ... ... ... ...\n", + "593 63 NaN 1.0 nan alert\n", + "594 598 NaN 1.0 nan alert\n", + "595 487 NaN 1.0 nan alert\n", + "596 555 NaN 1.0 nan alert\n", + "597 335 NaN 1.0 nan alert\n", + "\n", + "[598 rows x 4 columns]" + ] + }, + "execution_count": 65, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "for index, row in final_data.iterrows():\n", + " may_rentals = row['rentals_may']\n", + " june_rentals = row['rentals_june']\n", + "\n", + " if pd.isna(may_rentals):\n", + " final_data.loc[index, 'result'] = 'nan alert'\n", + " elif may_rentals > june_rentals:\n", + " final_data.loc[index, 'result'] = 'June Wins!'\n", + " elif may_rentals < june_rentals:\n", + " final_data.loc[index, 'result'] = 'May Wins!'\n", + " else:\n", + " final_data.loc[index, 'result'] = 'It\\'s a tie!'\n", + "\n", + "final_data" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "id": "fb77a681", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([\"It's a tie!\", 'June Wins!', 'May Wins!', 'nan alert'],\n", + " dtype=object)" + ] + }, + "execution_count": 67, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "final_data['result'].unique()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e10ad1ac", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/[lab-sql-9]Filipa T.sql b/[lab-sql-9]Filipa T.sql new file mode 100644 index 0000000..0d229d4 --- /dev/null +++ b/[lab-sql-9]Filipa T.sql @@ -0,0 +1,56 @@ +-- In this lab we will find the customers who were active in consecutive months of May and June. Follow the steps to complete the analysis. +-- Create a table rentals_may to store the data from rental table with information for the month of May. +-- Insert values in the table rentals_may using the table rental, filtering values only for the month of May. + +create table sakila.rentals_may +select *, +date_format(rental_date, '%M') as rental_month +from sakila.rental +where (date_format(rental_date, '%M') = 'May') +; + +select * +from sakila.rentals_may +; + +-- Create a table rentals_june to store the data from rental table with information for the month of June. +-- Insert values in the table rentals_june using the table rental, filtering values only for the month of June. + +create table sakila.rentals_june +select *, +date_format(rental_date, '%M') as rental_month +from sakila.rental +where (date_format(rental_date, '%M') = 'June') +; + +select * +from sakila.rentals_june +; + +-- Check the number of rentals for each customer for May. + +select customer_id, count(rental_id) as rental_count +from sakila.rentals_may +group by customer_id +order by rental_count desc +; + +-- Check the number of rentals for each customer for June. + +select customer_id, count(rental_id) as rental_count +from sakila.rentals_june +group by customer_id +order by rental_count desc + +; +-- Create a Python connection with SQL database and retrieve the results of the last two queries (also mentioned below) as dataframes: + +-- Check the number of rentals for each customer for May + +-- Check the number of rentals for each customer for June + +-- Hint: You can store the results from the two queries in two separate dataframes. + +-- Write a function that checks if customer borrowed more or less films in the month of June as compared to May. + +-- Hint: For this part, you can create a join between the two dataframes created before, using the merge function available for pandas dataframes. Here is a link to the documentation for the merge function. \ No newline at end of file