From bb10748d0880b7443503c969294d0e547431377a Mon Sep 17 00:00:00 2001 From: TiagoQuaresmaSimoes Date: Fri, 24 Nov 2023 21:21:37 +0000 Subject: [PATCH] [lab-sql-9]Tiago --- .../[lab-sql-9]Tiago-checkpoint.ipynb | 524 ++++++++++++++++++ [lab-sql-9]Tiago.ipynb | 524 ++++++++++++++++++ [lab-sql-9]Tiago.sql | 55 ++ 3 files changed, 1103 insertions(+) create mode 100644 .ipynb_checkpoints/[lab-sql-9]Tiago-checkpoint.ipynb create mode 100644 [lab-sql-9]Tiago.ipynb create mode 100644 [lab-sql-9]Tiago.sql diff --git a/.ipynb_checkpoints/[lab-sql-9]Tiago-checkpoint.ipynb b/.ipynb_checkpoints/[lab-sql-9]Tiago-checkpoint.ipynb new file mode 100644 index 0000000..47b472b --- /dev/null +++ b/.ipynb_checkpoints/[lab-sql-9]Tiago-checkpoint.ipynb @@ -0,0 +1,524 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "db6bed8d", + "metadata": {}, + "outputs": [], + "source": [ + "import pymysql\n", + "from sqlalchemy import create_engine\n", + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "be051a50", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "········\n" + ] + } + ], + "source": [ + "import getpass \n", + "password = getpass.getpass()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "61b33f78", + "metadata": {}, + "outputs": [], + "source": [ + "connection_string = 'mysql+pymysql://root:' + password + '@localhost/bank'\n", + "engine = create_engine(connection_string)\n", + "may = pd.read_sql_query(\"select concat(c.first_name, ' ', c.last_name) as name, count(r.rental_id) as qty \\\n", + "from sakila.rentals_may as r \\\n", + "left join sakila.customer as c \\\n", + "on c.customer_id = r.customer_id \\\n", + "group by r.customer_id \\\n", + "order by 2 desc;\", engine)\n", + "june = pd.read_sql_query(\"select concat(c.first_name, ' ', c.last_name) as name, count(r.rental_id) as qty \\\n", + "from sakila.rentals_june as r \\\n", + "left join sakila.customer as c \\\n", + "on c.customer_id = r.customer_id \\\n", + "group by r.customer_id \\\n", + "order by 2 desc;\", engine)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "c9b480aa", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
nameqty
0SUE PETERS8
1LESLIE SEWARD7
2EDNA WEST7
3CASSANDRA WALTERS6
4MINNIE ROMERO6
.........
515JOEL FRANCISCO1
516JACK FOUST1
517DOROTHY TAYLOR1
518ANITA MORALES1
519KATHERINE RIVERA1
\n", + "

520 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " name qty\n", + "0 SUE PETERS 8\n", + "1 LESLIE SEWARD 7\n", + "2 EDNA WEST 7\n", + "3 CASSANDRA WALTERS 6\n", + "4 MINNIE ROMERO 6\n", + ".. ... ...\n", + "515 JOEL FRANCISCO 1\n", + "516 JACK FOUST 1\n", + "517 DOROTHY TAYLOR 1\n", + "518 ANITA MORALES 1\n", + "519 KATHERINE RIVERA 1\n", + "\n", + "[520 rows x 2 columns]" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "may" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "c089070f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
nameqty
0BRENDA WRIGHT11
1ALEX GRESHAM10
2FRANK WAGGONER9
3DAISY BATES9
4IAN STILL9
.........
585FRANCIS SIKES1
586ALLEN BUTTERFIELD1
587GREGORY MAULDIN1
588LAURA RODRIGUEZ1
589ELLEN SIMPSON1
\n", + "

590 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " name qty\n", + "0 BRENDA WRIGHT 11\n", + "1 ALEX GRESHAM 10\n", + "2 FRANK WAGGONER 9\n", + "3 DAISY BATES 9\n", + "4 IAN STILL 9\n", + ".. ... ...\n", + "585 FRANCIS SIKES 1\n", + "586 ALLEN BUTTERFIELD 1\n", + "587 GREGORY MAULDIN 1\n", + "588 LAURA RODRIGUEZ 1\n", + "589 ELLEN SIMPSON 1\n", + "\n", + "[590 rows x 2 columns]" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "june" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "0376b5e5", + "metadata": {}, + "outputs": [], + "source": [ + "df_all= pd.merge(may, june, how='inner', on='name')" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "e320119f", + "metadata": {}, + "outputs": [], + "source": [ + "df_all=df_all.rename(columns={\"qty_x\": \"may\", \"qty_y\": \"june\"})\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "ec229515", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
namemayjune
0SUE PETERS88
1LESLIE SEWARD75
2EDNA WEST75
3CASSANDRA WALTERS63
4MINNIE ROMERO65
............
507CLAUDIA FULLER18
508JOEL FRANCISCO17
509JACK FOUST13
510DOROTHY TAYLOR15
511ANITA MORALES11
\n", + "

512 rows × 3 columns

\n", + "
" + ], + "text/plain": [ + " name may june\n", + "0 SUE PETERS 8 8\n", + "1 LESLIE SEWARD 7 5\n", + "2 EDNA WEST 7 5\n", + "3 CASSANDRA WALTERS 6 3\n", + "4 MINNIE ROMERO 6 5\n", + ".. ... ... ...\n", + "507 CLAUDIA FULLER 1 8\n", + "508 JOEL FRANCISCO 1 7\n", + "509 JACK FOUST 1 3\n", + "510 DOROTHY TAYLOR 1 5\n", + "511 ANITA MORALES 1 1\n", + "\n", + "[512 rows x 3 columns]" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_all" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "b16c456e", + "metadata": {}, + "outputs": [], + "source": [ + "may= df_all[\"may\"].values.tolist()\n", + "june= df_all[\"june\"].values.tolist()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "0c244302", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "May Count: 92\n", + "June Count: 358\n" + ] + } + ], + "source": [ + "\n", + "\n", + "may_count = 0\n", + "june_count = 0\n", + " \n", + "for i in range(0, len(may)):\n", + " if (may[i] > june[i]):\n", + " may_count += 1\n", + " elif (may[i] < june[i]):\n", + " june_count += 1\n", + " else: pass\n", + "\n", + "print(\"May Count:\", may_count)\n", + "print(\"June Count:\", june_count)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cea199f3", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4106db87", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/[lab-sql-9]Tiago.ipynb b/[lab-sql-9]Tiago.ipynb new file mode 100644 index 0000000..47b472b --- /dev/null +++ b/[lab-sql-9]Tiago.ipynb @@ -0,0 +1,524 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "db6bed8d", + "metadata": {}, + "outputs": [], + "source": [ + "import pymysql\n", + "from sqlalchemy import create_engine\n", + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "be051a50", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "········\n" + ] + } + ], + "source": [ + "import getpass \n", + "password = getpass.getpass()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "61b33f78", + "metadata": {}, + "outputs": [], + "source": [ + "connection_string = 'mysql+pymysql://root:' + password + '@localhost/bank'\n", + "engine = create_engine(connection_string)\n", + "may = pd.read_sql_query(\"select concat(c.first_name, ' ', c.last_name) as name, count(r.rental_id) as qty \\\n", + "from sakila.rentals_may as r \\\n", + "left join sakila.customer as c \\\n", + "on c.customer_id = r.customer_id \\\n", + "group by r.customer_id \\\n", + "order by 2 desc;\", engine)\n", + "june = pd.read_sql_query(\"select concat(c.first_name, ' ', c.last_name) as name, count(r.rental_id) as qty \\\n", + "from sakila.rentals_june as r \\\n", + "left join sakila.customer as c \\\n", + "on c.customer_id = r.customer_id \\\n", + "group by r.customer_id \\\n", + "order by 2 desc;\", engine)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "c9b480aa", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
nameqty
0SUE PETERS8
1LESLIE SEWARD7
2EDNA WEST7
3CASSANDRA WALTERS6
4MINNIE ROMERO6
.........
515JOEL FRANCISCO1
516JACK FOUST1
517DOROTHY TAYLOR1
518ANITA MORALES1
519KATHERINE RIVERA1
\n", + "

520 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " name qty\n", + "0 SUE PETERS 8\n", + "1 LESLIE SEWARD 7\n", + "2 EDNA WEST 7\n", + "3 CASSANDRA WALTERS 6\n", + "4 MINNIE ROMERO 6\n", + ".. ... ...\n", + "515 JOEL FRANCISCO 1\n", + "516 JACK FOUST 1\n", + "517 DOROTHY TAYLOR 1\n", + "518 ANITA MORALES 1\n", + "519 KATHERINE RIVERA 1\n", + "\n", + "[520 rows x 2 columns]" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "may" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "c089070f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
nameqty
0BRENDA WRIGHT11
1ALEX GRESHAM10
2FRANK WAGGONER9
3DAISY BATES9
4IAN STILL9
.........
585FRANCIS SIKES1
586ALLEN BUTTERFIELD1
587GREGORY MAULDIN1
588LAURA RODRIGUEZ1
589ELLEN SIMPSON1
\n", + "

590 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " name qty\n", + "0 BRENDA WRIGHT 11\n", + "1 ALEX GRESHAM 10\n", + "2 FRANK WAGGONER 9\n", + "3 DAISY BATES 9\n", + "4 IAN STILL 9\n", + ".. ... ...\n", + "585 FRANCIS SIKES 1\n", + "586 ALLEN BUTTERFIELD 1\n", + "587 GREGORY MAULDIN 1\n", + "588 LAURA RODRIGUEZ 1\n", + "589 ELLEN SIMPSON 1\n", + "\n", + "[590 rows x 2 columns]" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "june" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "0376b5e5", + "metadata": {}, + "outputs": [], + "source": [ + "df_all= pd.merge(may, june, how='inner', on='name')" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "e320119f", + "metadata": {}, + "outputs": [], + "source": [ + "df_all=df_all.rename(columns={\"qty_x\": \"may\", \"qty_y\": \"june\"})\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "ec229515", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
namemayjune
0SUE PETERS88
1LESLIE SEWARD75
2EDNA WEST75
3CASSANDRA WALTERS63
4MINNIE ROMERO65
............
507CLAUDIA FULLER18
508JOEL FRANCISCO17
509JACK FOUST13
510DOROTHY TAYLOR15
511ANITA MORALES11
\n", + "

512 rows × 3 columns

\n", + "
" + ], + "text/plain": [ + " name may june\n", + "0 SUE PETERS 8 8\n", + "1 LESLIE SEWARD 7 5\n", + "2 EDNA WEST 7 5\n", + "3 CASSANDRA WALTERS 6 3\n", + "4 MINNIE ROMERO 6 5\n", + ".. ... ... ...\n", + "507 CLAUDIA FULLER 1 8\n", + "508 JOEL FRANCISCO 1 7\n", + "509 JACK FOUST 1 3\n", + "510 DOROTHY TAYLOR 1 5\n", + "511 ANITA MORALES 1 1\n", + "\n", + "[512 rows x 3 columns]" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_all" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "b16c456e", + "metadata": {}, + "outputs": [], + "source": [ + "may= df_all[\"may\"].values.tolist()\n", + "june= df_all[\"june\"].values.tolist()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "0c244302", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "May Count: 92\n", + "June Count: 358\n" + ] + } + ], + "source": [ + "\n", + "\n", + "may_count = 0\n", + "june_count = 0\n", + " \n", + "for i in range(0, len(may)):\n", + " if (may[i] > june[i]):\n", + " may_count += 1\n", + " elif (may[i] < june[i]):\n", + " june_count += 1\n", + " else: pass\n", + "\n", + "print(\"May Count:\", may_count)\n", + "print(\"June Count:\", june_count)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cea199f3", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4106db87", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/[lab-sql-9]Tiago.sql b/[lab-sql-9]Tiago.sql new file mode 100644 index 0000000..7a64d1d --- /dev/null +++ b/[lab-sql-9]Tiago.sql @@ -0,0 +1,55 @@ +-- In this lab we will find the customers who were active in consecutive months of May and June. Follow the steps to complete the analysis. + +-- Create a table rentals_may to store the data from rental table with information for the month of May. +-- Insert values in the table rentals_may using the table rental, filtering values only for the month of May. +CREATE TABLE rentals_may as +SELECT * +FROM sakila.rental +where month(rental.rental_date)= 5; + +SELECT * +FROM sakila.rentals_may; + +-- Create a table rentals_june to store the data from rental table with information for the month of June. +-- Insert values in the table rentals_june using the table rental, filtering values only for the month of June. +CREATE TABLE rentals_june as +SELECT * +FROM sakila.rental +where month(rental.rental_date)= 6; + +SELECT * +FROM sakila.rentals_june; + +-- Check the number of rentals for each customer for May. +select concat(c.first_name, ' ', c.last_name) as name, count(r.rental_id) as Qty +from sakila.rentals_may as r +left join sakila.customer as c +on c.customer_id = r.customer_id +group by r.customer_id +order by 2 desc +; + + + +-- Check the number of rentals for each customer for June. +select concat(c.first_name, ' ', c.last_name) as name, count(r.rental_id) as Qty +from sakila.rentals_june as r +left join sakila.customer as c +on c.customer_id = r.customer_id +group by r.customer_id +order by 2 desc +; + + +-- Create a Python connection with SQL database and retrieve the results of the last two queries (also mentioned below) as dataframes: + +-- Check the number of rentals for each customer for May + +-- Check the number of rentals for each customer for June + + +-- Hint: You can store the results from the two queries in two separate dataframes. + +-- Write a function that checks if customer borrowed more or less films in the month of June as compared to May. + +-- Hint: For this part, you can create a join between the two dataframes created before, using the merge function available for pandas dataframes. Here is a link to the documentation for the merge function. \ No newline at end of file