diff --git a/.ipynb_checkpoints/lab-sql-9-checkpoint.ipynb b/.ipynb_checkpoints/lab-sql-9-checkpoint.ipynb new file mode 100644 index 0000000..0dd5a22 --- /dev/null +++ b/.ipynb_checkpoints/lab-sql-9-checkpoint.ipynb @@ -0,0 +1,388 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "716cf0f1", + "metadata": {}, + "source": [ + "Create a Python connection with SQL database and retrieve the results of the last two queries (also mentioned below) as dataframes:\n", + "\n", + "Check the number of rentals for each customer for May\n", + "\n", + "Check the number of rentals for each customer for June\n", + "\n", + "Hint: You can store the results from the two queries in two separate dataframes." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "2e6f857e", + "metadata": {}, + "outputs": [], + "source": [ + "import pymysql\n", + "from sqlalchemy import create_engine\n", + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "283c46dc", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "········\n" + ] + } + ], + "source": [ + "import getpass # To get the password without showing the input\n", + "password = getpass.getpass()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "84e25554", + "metadata": {}, + "outputs": [], + "source": [ + "connection_string = 'mysql+pymysql://root:' + password + '@localhost/bank'\n", + "engine = create_engine(connection_string)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "e6ab611b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CUSTOMER_IDRENTAL_COUNT
01978
15067
21097
32696
42396
.........
5154311
5163511
517101
5181361
519611
\n", + "

520 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " CUSTOMER_ID RENTAL_COUNT\n", + "0 197 8\n", + "1 506 7\n", + "2 109 7\n", + "3 269 6\n", + "4 239 6\n", + ".. ... ...\n", + "515 431 1\n", + "516 351 1\n", + "517 10 1\n", + "518 136 1\n", + "519 61 1\n", + "\n", + "[520 rows x 2 columns]" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "query_may = \"SELECT CUSTOMER_ID, COUNT(RENTAL_ID) AS RENTAL_COUNT FROM SAKILA.RENTALS_MAY GROUP BY CUSTOMER_ID ORDER BY RENTAL_COUNT DESC\"\n", + "df_may = pd.read_sql_query(query_may, engine)\n", + "\n", + "# Execute the second query and store the result in a dataframe\n", + "query_june = \"SELECT CUSTOMER_ID, COUNT(RENTAL_ID) AS RENTAL_COUNT FROM SAKILA.RENTALS_JUNE GROUP BY CUSTOMER_ID ORDER BY RENTAL_COUNT DESC\"\n", + "df_june = pd.read_sql_query(query_june, engine)\n", + "\n", + "\n", + "\n", + "df_may" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "13da5007", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CUSTOMER_IDRENTAL_COUNT
01978
15067
21097
32696
42396
.........
5154311
5163511
517101
5181361
519611
\n", + "

520 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " CUSTOMER_ID RENTAL_COUNT\n", + "0 197 8\n", + "1 506 7\n", + "2 109 7\n", + "3 269 6\n", + "4 239 6\n", + ".. ... ...\n", + "515 431 1\n", + "516 351 1\n", + "517 10 1\n", + "518 136 1\n", + "519 61 1\n", + "\n", + "[520 rows x 2 columns]" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_may" + ] + }, + { + "cell_type": "markdown", + "id": "16474d15", + "metadata": {}, + "source": [ + "Write a function that checks if customer borrowed more or less films in the month of June as compared to May.\n", + "\n", + "Hint: For this part, you can create a join between the two dataframes created before, using the merge function available for pandas dataframes. Here is a link to the documentation for the merge function." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "b17fd5a4", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Customer borrowed more films in June compared to May\n" + ] + } + ], + "source": [ + "def compare_rentals(df_may, df_june, customer_id):\n", + "\n", + " may_rentals = df_may[df_may['CUSTOMER_ID'] == customer_id]['RENTAL_COUNT'].values\n", + " june_rentals = df_june[df_june['CUSTOMER_ID'] == customer_id]['RENTAL_COUNT'].values\n", + "\n", + " if len(may_rentals) > 0 and len(june_rentals) > 0:\n", + " if may_rentals[0] < june_rentals[0]:\n", + " return \"Customer borrowed more films in June compared to May\"\n", + " elif may_rentals[0] > june_rentals[0]:\n", + " return \"Customer borrowed fewer films in June compared to May\"\n", + " else:\n", + " return \"Customer borrowed the same number of films in May and June\"\n", + " else:\n", + " return \"Customer ID not found in one or both datasets\"\n" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "09ad1f81", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'Customer ID not found in one or both datasets'" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "compare_rentals(df_may, df_june, 34)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.5" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/lab-sql-9.ipynb b/lab-sql-9.ipynb new file mode 100644 index 0000000..0dd5a22 --- /dev/null +++ b/lab-sql-9.ipynb @@ -0,0 +1,388 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "716cf0f1", + "metadata": {}, + "source": [ + "Create a Python connection with SQL database and retrieve the results of the last two queries (also mentioned below) as dataframes:\n", + "\n", + "Check the number of rentals for each customer for May\n", + "\n", + "Check the number of rentals for each customer for June\n", + "\n", + "Hint: You can store the results from the two queries in two separate dataframes." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "2e6f857e", + "metadata": {}, + "outputs": [], + "source": [ + "import pymysql\n", + "from sqlalchemy import create_engine\n", + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "283c46dc", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "········\n" + ] + } + ], + "source": [ + "import getpass # To get the password without showing the input\n", + "password = getpass.getpass()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "84e25554", + "metadata": {}, + "outputs": [], + "source": [ + "connection_string = 'mysql+pymysql://root:' + password + '@localhost/bank'\n", + "engine = create_engine(connection_string)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "e6ab611b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CUSTOMER_IDRENTAL_COUNT
01978
15067
21097
32696
42396
.........
5154311
5163511
517101
5181361
519611
\n", + "

520 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " CUSTOMER_ID RENTAL_COUNT\n", + "0 197 8\n", + "1 506 7\n", + "2 109 7\n", + "3 269 6\n", + "4 239 6\n", + ".. ... ...\n", + "515 431 1\n", + "516 351 1\n", + "517 10 1\n", + "518 136 1\n", + "519 61 1\n", + "\n", + "[520 rows x 2 columns]" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "query_may = \"SELECT CUSTOMER_ID, COUNT(RENTAL_ID) AS RENTAL_COUNT FROM SAKILA.RENTALS_MAY GROUP BY CUSTOMER_ID ORDER BY RENTAL_COUNT DESC\"\n", + "df_may = pd.read_sql_query(query_may, engine)\n", + "\n", + "# Execute the second query and store the result in a dataframe\n", + "query_june = \"SELECT CUSTOMER_ID, COUNT(RENTAL_ID) AS RENTAL_COUNT FROM SAKILA.RENTALS_JUNE GROUP BY CUSTOMER_ID ORDER BY RENTAL_COUNT DESC\"\n", + "df_june = pd.read_sql_query(query_june, engine)\n", + "\n", + "\n", + "\n", + "df_may" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "13da5007", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
CUSTOMER_IDRENTAL_COUNT
01978
15067
21097
32696
42396
.........
5154311
5163511
517101
5181361
519611
\n", + "

520 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " CUSTOMER_ID RENTAL_COUNT\n", + "0 197 8\n", + "1 506 7\n", + "2 109 7\n", + "3 269 6\n", + "4 239 6\n", + ".. ... ...\n", + "515 431 1\n", + "516 351 1\n", + "517 10 1\n", + "518 136 1\n", + "519 61 1\n", + "\n", + "[520 rows x 2 columns]" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_may" + ] + }, + { + "cell_type": "markdown", + "id": "16474d15", + "metadata": {}, + "source": [ + "Write a function that checks if customer borrowed more or less films in the month of June as compared to May.\n", + "\n", + "Hint: For this part, you can create a join between the two dataframes created before, using the merge function available for pandas dataframes. Here is a link to the documentation for the merge function." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "b17fd5a4", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Customer borrowed more films in June compared to May\n" + ] + } + ], + "source": [ + "def compare_rentals(df_may, df_june, customer_id):\n", + "\n", + " may_rentals = df_may[df_may['CUSTOMER_ID'] == customer_id]['RENTAL_COUNT'].values\n", + " june_rentals = df_june[df_june['CUSTOMER_ID'] == customer_id]['RENTAL_COUNT'].values\n", + "\n", + " if len(may_rentals) > 0 and len(june_rentals) > 0:\n", + " if may_rentals[0] < june_rentals[0]:\n", + " return \"Customer borrowed more films in June compared to May\"\n", + " elif may_rentals[0] > june_rentals[0]:\n", + " return \"Customer borrowed fewer films in June compared to May\"\n", + " else:\n", + " return \"Customer borrowed the same number of films in May and June\"\n", + " else:\n", + " return \"Customer ID not found in one or both datasets\"\n" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "09ad1f81", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'Customer ID not found in one or both datasets'" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "compare_rentals(df_may, df_june, 34)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.5" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/lab-sql-9.sql b/lab-sql-9.sql new file mode 100644 index 0000000..68489e1 --- /dev/null +++ b/lab-sql-9.sql @@ -0,0 +1,54 @@ +-- In this lab we will find the customers who were active in consecutive months of May and June. Follow the steps to complete the analysis. + +-- Create a table rentals_may to store the data from rental table with information for the month of May. + +CREATE TABLE SAKILA.RENTALS_MAY AS +SELECT * FROM SAKILA.RENTAL +WHERE MONTH(rental_date) = 5; + +SELECT * FROM SAKILA.RENTALS_MAY; + +-- Insert values in the table rentals_may using the table rental, filtering values only for the month of May. + +INSERT INTO rentals_may +SELECT * +FROM rental +WHERE MONTH(rental_date) = 5; + +SELECT COUNT(*) FROM SAKILA.RENTALS_MAY; + + +-- Create a table rentals_june to store the data from rental table with information for the month of June. + +CREATE TABLE SAKILA.RENTALS_JUNE AS +SELECT * FROM SAKILA.RENTAL +WHERE MONTH(rental_date) = 6; + +SELECT COUNT(*) FROM SAKILA.RENTALS_JUNE; + + +-- Insert values in the table rentals_june using the table rental, filtering values only for the month of June. + +INSERT INTO RENTALS_JUNE +SELECT * +FROM rental +WHERE MONTH(rental_date) = 6; + +SELECT COUNT(*) FROM SAKILA.RENTALS_JUNE; + + +-- Check the number of rentals for each customer for May. + +SELECT CUSTOMER_ID, COUNT(RENTAL_ID) AS RENTAL_COUNT +FROM SAKILA.RENTALS_MAY +GROUP BY CUSTOMER_ID +ORDER BY RENTAL_COUNT DESC; + + + +-- Check the number of rentals for each customer for June. + +SELECT CUSTOMER_ID, COUNT(RENTAL_ID) AS RENTAL_COUNT +FROM SAKILA.RENTALS_JUNE +GROUP BY CUSTOMER_ID +ORDER BY RENTAL_COUNT DESC; \ No newline at end of file