diff --git a/.ipynb_checkpoints/[lab-sql-9] Sara-checkpoint.ipynb b/.ipynb_checkpoints/[lab-sql-9] Sara-checkpoint.ipynb new file mode 100644 index 0000000..7ce74e1 --- /dev/null +++ b/.ipynb_checkpoints/[lab-sql-9] Sara-checkpoint.ipynb @@ -0,0 +1,136 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "5d76472d", + "metadata": {}, + "source": [ + "DA PT 2023 \n", + "Sara Ferreira da Silva" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "a2d39fe9", + "metadata": {}, + "outputs": [], + "source": [ + "# Importing extension to read SQL files\n", + "import pymysql\n", + "from sqlalchemy import create_engine\n", + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb20116d", + "metadata": {}, + "outputs": [], + "source": [ + "# Importing the password extension and also the password protection so it is not visible\n", + "import getpass \n", + "password = getpass.getpass()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8abbb119", + "metadata": {}, + "outputs": [], + "source": [ + "# Connecting the password to my SQL\n", + "connection_string = 'mysql+pymysql://root:' + password + '@localhost/bank'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b975e6b1", + "metadata": {}, + "outputs": [], + "source": [ + "# Running it\n", + "engine = create_engine(connection_string)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "15acda6d", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: My SQl is allways giving an error because I do not know its password. \n", + "# Thus, henceforth I proceed with the lab knowing I am not able to retrieve any data.\n", + "# Notwithstanding it is on the assumption the remaining code is correct, despite the above situation. \n", + "\n", + "# Obtaining the rental information for May\n", + "rentals_may = pd.read_sql_query('SELECT * FROM sakila.rentals_may', engine)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac42f88a", + "metadata": {}, + "outputs": [], + "source": [ + "# Obtaining the rental information for June\n", + "rentals_june = pd.read_sql_query('SELECT * FROM sakila.rentals_june', engine)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a434e3c", + "metadata": {}, + "outputs": [], + "source": [ + "# Joining both tables\n", + "total_rentals = pd.merge(rentals_may, rentals_june on ='customer_id')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "127e9016", + "metadata": {}, + "outputs": [], + "source": [ + "def compare_rentals(row):\n", + " if row['rental_id_may'] < row['rental_id_june']:\n", + " return 'Borrowed more in June'\n", + " elif row['rental_id_may'] > row['rental_id_june']:\n", + " return 'Borrowed more in May'\n", + " else:\n", + " return 'Borrowed the same amount'\n", + "\n", + "merged_rentals['rental_comparison'] = merged_rentals.apply(compare_rentals, axis=1)\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/[lab-sql-9] Sara.ipynb b/[lab-sql-9] Sara.ipynb new file mode 100644 index 0000000..7ce74e1 --- /dev/null +++ b/[lab-sql-9] Sara.ipynb @@ -0,0 +1,136 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "5d76472d", + "metadata": {}, + "source": [ + "DA PT 2023 \n", + "Sara Ferreira da Silva" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "a2d39fe9", + "metadata": {}, + "outputs": [], + "source": [ + "# Importing extension to read SQL files\n", + "import pymysql\n", + "from sqlalchemy import create_engine\n", + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb20116d", + "metadata": {}, + "outputs": [], + "source": [ + "# Importing the password extension and also the password protection so it is not visible\n", + "import getpass \n", + "password = getpass.getpass()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8abbb119", + "metadata": {}, + "outputs": [], + "source": [ + "# Connecting the password to my SQL\n", + "connection_string = 'mysql+pymysql://root:' + password + '@localhost/bank'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b975e6b1", + "metadata": {}, + "outputs": [], + "source": [ + "# Running it\n", + "engine = create_engine(connection_string)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "15acda6d", + "metadata": {}, + "outputs": [], + "source": [ + "# Note: My SQl is allways giving an error because I do not know its password. \n", + "# Thus, henceforth I proceed with the lab knowing I am not able to retrieve any data.\n", + "# Notwithstanding it is on the assumption the remaining code is correct, despite the above situation. \n", + "\n", + "# Obtaining the rental information for May\n", + "rentals_may = pd.read_sql_query('SELECT * FROM sakila.rentals_may', engine)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ac42f88a", + "metadata": {}, + "outputs": [], + "source": [ + "# Obtaining the rental information for June\n", + "rentals_june = pd.read_sql_query('SELECT * FROM sakila.rentals_june', engine)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a434e3c", + "metadata": {}, + "outputs": [], + "source": [ + "# Joining both tables\n", + "total_rentals = pd.merge(rentals_may, rentals_june on ='customer_id')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "127e9016", + "metadata": {}, + "outputs": [], + "source": [ + "def compare_rentals(row):\n", + " if row['rental_id_may'] < row['rental_id_june']:\n", + " return 'Borrowed more in June'\n", + " elif row['rental_id_may'] > row['rental_id_june']:\n", + " return 'Borrowed more in May'\n", + " else:\n", + " return 'Borrowed the same amount'\n", + "\n", + "merged_rentals['rental_comparison'] = merged_rentals.apply(compare_rentals, axis=1)\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/[lab-sql-9] Sara.sql b/[lab-sql-9] Sara.sql new file mode 100644 index 0000000..26b0a44 --- /dev/null +++ b/[lab-sql-9] Sara.sql @@ -0,0 +1,37 @@ +-- DA PT 2023 +-- Sara Ferreira da Silva + +select * from sakila.rental; -- rental_date + customer_id + rental_id +select * from sakila.customer; -- customer_id + first_name + last_name + active + +-- In this lab we will find the customers who were active in consecutive months of May and June. Follow the steps to complete the analysis. +-- Create a table rentals_may to store the data from rental table with information for the month of May. +-- Insert values in the table rentals_may using the table rental, filtering values only for the month of May. +-- Insert values in the table rentals_may using the table rental, filtering values only for the month of May. +-- Create a table rentals_june to store the data from rental table with information for the month of June. +-- Insert values in the table rentals_june using the table rental, filtering values only for the month of June. + +create temporary table sakila.rentals_may as +select * from sakila.rental +where month(rental_date) = 5; + +create temporary table sakila.rentals_june as +select * from sakila.rental +where month(rental_date) = 6; + +-- Check the number of rentals for each customer for May. +select customer_id, count(rental_id) as number_of_rental +from sakila.rentals_may +group by customer_id; + +-- Check the number of rentals for each customer for June. +select customer_id, count(rental_id) as number_of_rental +from sakila.rentals_june +group by customer_id; + +-- Create a Python connection with SQL database and retrieve the results of the last two queries (also mentioned below) as dataframes: +-- Check the number of rentals for each customer for May +-- Check the number of rentals for each customer for June +-- Hint: You can store the results from the two queries in two separate dataframes. +-- Write a function that checks if customer borrowed more or less films in the month of June as compared to May. +-- Hint: For this part, you can create a join between the two dataframes created before, using the merge function available for pandas dataframes.