feat: add exercise #1

LeoBorai · LeoBorai · commit 85c342b047a2 · 2024-04-07T22:39:26.000-04:00
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
@@ -0,0 +1,6 @@
+version: 2
+updates:
+  - package-ecosystem: "pip"
+    directory: "/"
+    schedule:
+      interval: "weekly"
diff --git a/.github/workflows/container-build.yml b/.github/workflows/container-build.yml
@@ -0,0 +1,17 @@
+name: Container Build
+on:
+  - pull_request
+  - push
+
+jobs:
+  docker:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Builds Docker Image
+        run: docker compose build notebook
+
+      - name: Stops Containers
+        run: docker compose down
diff --git a/.github/workflows/dependa-auto-merge.yml b/.github/workflows/dependa-auto-merge.yml
@@ -0,0 +1,23 @@
+name: Dependabot auto-merge
+on: pull_request
+
+permissions:
+  contents: write
+  pull-requests: write
+
+jobs:
+  dependabot:
+    runs-on: ubuntu-latest
+    if: ${{ github.actor == 'dependabot[bot]' }}
+    steps:
+      - name: Dependabot metadata
+        id: metadata
+        uses: dependabot/fetch-metadata@v1
+        with:
+          github-token: "${{ secrets.GITHUB_TOKEN }}"
+      - name: Enable auto-merge for Dependabot PRs
+        if: ${{ steps.metadata.outputs.update-type == 'version-update:semver-patch' }}
+        run: gh pr merge --auto --merge "$PR_URL"
+        env:
+          PR_URL: ${{github.event.pull_request.html_url}}
+          GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,2 @@
+.DS_Store
+.ipynb_checkpoints
diff --git a/Dockerfile b/Dockerfile
@@ -0,0 +1,19 @@
+FROM python:3.9-slim-buster
+
+RUN mkdir /app
+
+WORKDIR /app
+
+RUN apt-get update \
+  && apt-get install --yes --no-install-recommends \
+    build-essential
+
+COPY requirements.txt .
+
+RUN pip install -r requirements.txt
+
+COPY . .
+
+EXPOSE 8888
+
+ENTRYPOINT ["jupyter", "notebook", "--ip=0.0.0.0", "--no-browser", "--allow-root"]
diff --git a/Justfile b/Justfile
@@ -0,0 +1,11 @@
+dev:
+  docker compose up notebook
+
+build:
+  docker compose build notebook
+
+cleanup:
+  docker compose down
+
+bash:
+  docker exec -it <CONTAINER ID> bash
diff --git a/README.md b/README.md
@@ -0,0 +1,25 @@
+<div>
+  <h1 align="center">Python for Data Science</h1>
+  <h4 align="center">🐍 Following "Python for Data Science" Book Exercises</h4>
+</div>
+
+## Run Locally
+
+Build an run containers using `docker compose`
+
+```bash
+docker compose up --build notebook
+```
+
+> Using `Justfile` this is a matter of running `just build` and from
+> there on `just dev`
+
+After working you can release resources using:
+
+```bash
+docker compose down
+```
+
+> A [Justfile][1] is included!
+
+[1]: https://just.systems
diff --git a/docker-compose.yml b/docker-compose.yml
@@ -0,0 +1,11 @@
+version: '3'
+
+services:
+  notebook:
+    build: 
+      context: .
+      dockerfile: Dockerfile
+    volumes:
+      - .:/app
+    ports:
+      - 8888:8888
diff --git a/notebooks/chapter02_improved_photo_tag_analysis.ipynb b/notebooks/chapter02_improved_photo_tag_analysis.ipynb
@@ -0,0 +1,115 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "42b7f162-35b4-4958-b57d-de78a20118ff",
+   "metadata": {},
+   "source": [
+    "In order to always fetch the latest version of the data, such data is fetched from author's repository, then converted into a Python statement and finally executed.\n",
+    "\n",
+    "> Note about `exec`, it is discouraged to execute arbirtary code as is done in the following block. A malicious user could post harmful\n",
+    "> code in such GitHub's file and then such code would be executed."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "abae6b96-65db-49fe-ab49-b0f955418d6d",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[{'name': 'photo1.jpg', 'tags': {'food', 'coffee', 'drink', 'cup', 'breakfast', 'table', 'tableware'}}, {'name': 'photo2.jpg', 'tags': {'food', 'dish', 'vegetable', 'dinner', 'meal', 'meat', 'tableware'}}, {'name': 'photo3.jpg', 'tags': {'city', 'skyline', 'skyscraper', 'architecture', 'travel', 'building', 'cityscape'}}, {'name': 'photo4.jpg', 'tags': {'glass', 'drink', 'meal', 'grapes', 'fruit', 'juice', 'food'}}]\n"
+     ]
+    }
+   ],
+   "source": [
+    "import requests\n",
+    "\n",
+    "LIST_OF_DICTIONARIES_JSON_URL = \"https://raw.githubusercontent.com/pythondatabook/sources/main/ch2/list_of_dicts.txt\"\n",
+    "\n",
+    "response = requests.get(LIST_OF_DICTIONARIES_JSON_URL)\n",
+    "response_text = response.text\n",
+    "response_text = 'data = ' + response_text\n",
+    "\n",
+    "exec(response_text)\n",
+    "print(data)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9309d683-2707-4857-aee4-a2ef09806dad",
+   "metadata": {},
+   "source": [
+    "Group photos with intersecting tags, saving the results in `photo_groups`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "id": "86df43c8-d757-406c-86b9-b466111f7adb",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{\n",
+      "    \"tableware_food\": [\n",
+      "        \"photo1.jpg\",\n",
+      "        \"photo2.jpg\"\n",
+      "    ],\n",
+      "    \"drink_food\": [\n",
+      "        \"photo1.jpg\",\n",
+      "        \"photo4.jpg\"\n",
+      "    ],\n",
+      "    \"meal_food\": [\n",
+      "        \"photo2.jpg\",\n",
+      "        \"photo4.jpg\"\n",
+      "    ]\n",
+      "}\n"
+     ]
+    }
+   ],
+   "source": [
+    "import json\n",
+    "\n",
+    "photo_groups = {}\n",
+    "\n",
+    "for photo_x in range(0, len(data)):\n",
+    "    for photo_y in range(photo_x+1, len(data)):\n",
+    "        intersection = data[photo_x]['tags'].intersection(data[photo_y]['tags'])\n",
+    "\n",
+    "        if len(intersection) >= 2:\n",
+    "            intersection = list(intersection)\n",
+    "            key = '_'.join(intersection)\n",
+    "            photo_groups.setdefault(key, [data[photo_x]['name'], data[photo_y]['name']])\n",
+    "\n",
+    "print(json.dumps(photo_groups, indent=4))"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.17"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,11 @@
+h5py == 3.10.0
+jupyter == 1.0.0
+keras == 3.1.1
+matplotlib == 3.8.4
+notebook == 7.1.2
+numpy == 1.26.4
+pandas == 2.2.1
+pillow == 10.3.0
+scikit-learn == 1.4.0
+seaborn == 0.13.2
+spacy == 3.7.4