From cc0aaf69aa2fa7c2b09c5fa4b337a6330204de07 Mon Sep 17 00:00:00 2001 From: naoking158 <29372455+naoking158@users.noreply.github.com> Date: Tue, 16 Jul 2024 11:06:44 +0900 Subject: [PATCH 01/18] =?UTF-8?q?feat:=20=E4=BE=9D=E5=AD=98=E3=83=A9?= =?UTF-8?q?=E3=82=A4=E3=83=96=E3=83=A9=E3=83=AA=E3=82=92=E8=BF=BD=E5=8A=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- requirements.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/requirements.txt b/requirements.txt index 0ca03183..8dcfe44f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,3 +4,6 @@ django-oauth-toolkit==1.7.1 djangorestframework==3.14.0 django-webpack-loader==1.8.1 psycopg2==2.9.3 +pandas==2.2.2 +matplotlib==3.9.1 +ruff==0.5.0 From c3d4b26873fb718ab9c1835b0501d73e74dc7adb Mon Sep 17 00:00:00 2001 From: naoking158 <29372455+naoking158@users.noreply.github.com> Date: Tue, 16 Jul 2024 11:08:27 +0900 Subject: [PATCH 02/18] =?UTF-8?q?feat:=20ruff=20=E3=81=AE=E8=A8=AD?= =?UTF-8?q?=E5=AE=9A=E3=83=95=E3=82=A1=E3=82=A4=E3=83=AB=E3=82=92=E8=BF=BD?= =?UTF-8?q?=E5=8A=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ruff.toml | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 ruff.toml diff --git a/ruff.toml b/ruff.toml new file mode 100644 index 00000000..be8b193f --- /dev/null +++ b/ruff.toml @@ -0,0 +1,25 @@ +line-length = 99 +indent-width = 4 + +# Assume Python 3.9 +target-version = "py39" + +[lint] +select = ["E4", "E7", "E9", "F", "B", "E", "W", "I", "PL", "DJ"] + +# Avoid enforcing line-length violations (`E501`) +ignore = ["E501", "B904", "PLW1508"] + +# Avoid trying to fix flake8-bugbear (`B`) violations and unused-import (E401). +unfixable = ["B", "E401"] + +# Ignore `E402` (import violations) in all `__init__.py` files, and in select subdirectories. +[lint.per-file-ignores] +"__init__.py" = ["E402"] +"**/{tests,docs,tools}/*" = ["E402"] + +[format] +# Use single quotes for non-triple-quoted strings. +quote-style = "single" +# Like Black, indent with spaces, rather than tabs. +indent-style = "space" From 8b4db383b7da7180b933e4a940aafdee9a820100 Mon Sep 17 00:00:00 2001 From: naoking158 <29372455+naoking158@users.noreply.github.com> Date: Tue, 16 Jul 2024 11:10:17 +0900 Subject: [PATCH 03/18] =?UTF-8?q?style:=20ruff=20=E3=81=AE=E3=83=AA?= =?UTF-8?q?=E3=83=B3=E3=83=88=E3=81=A8=E3=83=95=E3=82=A9=E3=83=BC=E3=83=9E?= =?UTF-8?q?=E3=83=83=E3=83=88=E3=82=92=E9=81=A9=E7=94=A8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 実行コード ```sh $ ruff check --fix; ruff format ``` --- dashboard/consumption/urls.py | 1 + dashboard/dashboard/settings.py | 16 ++++++++-------- dashboard/dashboard/urls.py | 5 +++-- dashboard/dashboard/wsgi.py | 2 +- dashboard/manage.py | 8 ++++---- 5 files changed, 17 insertions(+), 15 deletions(-) diff --git a/dashboard/consumption/urls.py b/dashboard/consumption/urls.py index 0ab53ec1..94c80b39 100644 --- a/dashboard/consumption/urls.py +++ b/dashboard/consumption/urls.py @@ -1,4 +1,5 @@ from django.urls import re_path + from . import views urlpatterns = [ diff --git a/dashboard/dashboard/settings.py b/dashboard/dashboard/settings.py index 0128ded9..948d9542 100644 --- a/dashboard/dashboard/settings.py +++ b/dashboard/dashboard/settings.py @@ -25,7 +25,7 @@ # SECURITY WARNING: don't run with debug turned on in production! DEBUG = True -ALLOWED_HOSTS = ["*"] +ALLOWED_HOSTS = ['*'] # Application definition @@ -75,13 +75,13 @@ # https://docs.djangoproject.com/en/1.11/ref/settings/#databases DATABASES = { - "default": { - "ENGINE": "django.db.backends.postgresql", - "NAME": os.getenv("SMAP_DB_NAME", "smapdb"), - "USER": os.getenv("SMAP_DB_USER", "smap"), - "PASSWORD": os.getenv("SMAP_DB_PASSWORD", "smap1234"), - "HOST": os.getenv("SMAP_DB_HOST", "localhost"), - "PORT": os.getenv("SMAP_DB_PORT", 5432), + 'default': { + 'ENGINE': 'django.db.backends.postgresql', + 'NAME': os.getenv('SMAP_DB_NAME', 'smapdb'), + 'USER': os.getenv('SMAP_DB_USER', 'smap'), + 'PASSWORD': os.getenv('SMAP_DB_PASSWORD', 'smap1234'), + 'HOST': os.getenv('SMAP_DB_HOST', 'localhost'), + 'PORT': os.getenv('SMAP_DB_PORT', 5432), } } diff --git a/dashboard/dashboard/urls.py b/dashboard/dashboard/urls.py index 1d413510..d41ebb7a 100644 --- a/dashboard/dashboard/urls.py +++ b/dashboard/dashboard/urls.py @@ -13,10 +13,11 @@ 1. Import the include() function: from django.conf.urls import url, include 2. Add a URL to urlpatterns: url(r'^blog/', include('blog.urls')) """ -from django.urls import re_path, include + from django.contrib import admin +from django.urls import include, re_path urlpatterns = [ re_path(r'^admin/', admin.site.urls), - re_path(r'^', include('consumption.urls')) + re_path(r'^', include('consumption.urls')), ] diff --git a/dashboard/dashboard/wsgi.py b/dashboard/dashboard/wsgi.py index ce07ee7d..331a3f3e 100644 --- a/dashboard/dashboard/wsgi.py +++ b/dashboard/dashboard/wsgi.py @@ -11,6 +11,6 @@ from django.core.wsgi import get_wsgi_application -os.environ.setdefault("DJANGO_SETTINGS_MODULE", "dashboard.settings") +os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'dashboard.settings') application = get_wsgi_application() diff --git a/dashboard/manage.py b/dashboard/manage.py index bc629b3f..abb7a313 100755 --- a/dashboard/manage.py +++ b/dashboard/manage.py @@ -2,8 +2,8 @@ import os import sys -if __name__ == "__main__": - os.environ.setdefault("DJANGO_SETTINGS_MODULE", "dashboard.settings") +if __name__ == '__main__': + os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'dashboard.settings') try: from django.core.management import execute_from_command_line except ImportError: @@ -15,8 +15,8 @@ except ImportError: raise ImportError( "Couldn't import Django. Are you sure it's installed and " - "available on your PYTHONPATH environment variable? Did you " - "forget to activate a virtual environment?" + 'available on your PYTHONPATH environment variable? Did you ' + 'forget to activate a virtual environment?' ) raise execute_from_command_line(sys.argv) From 82382adaf3d7292f0f7ccfb64c9324a53c0ff0d2 Mon Sep 17 00:00:00 2001 From: naoking158 <29372455+naoking158@users.noreply.github.com> Date: Tue, 16 Jul 2024 11:22:06 +0900 Subject: [PATCH 04/18] =?UTF-8?q?feat:=20=E3=83=A2=E3=83=87=E3=83=AB?= =?UTF-8?q?=E5=AE=9A=E7=BE=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- dashboard/consumption/models.py | 54 ++++++++++++++++++++++++++++++++- 1 file changed, 53 insertions(+), 1 deletion(-) diff --git a/dashboard/consumption/models.py b/dashboard/consumption/models.py index 1dfab760..a1e4b94e 100644 --- a/dashboard/consumption/models.py +++ b/dashboard/consumption/models.py @@ -2,5 +2,57 @@ from __future__ import unicode_literals from django.db import models +from django.utils import timezone -# Create your models here. + +class QuerySet(models.QuerySet): + """ + queryset.update()で更新日時を記録するhook + + ref: https://scrapbox.io/shimizukawa/django_bulk_update_%E6%99%82%E3%81%ABupdated_at%E3%82%92%E6%9B%B4%E6%96%B0%E3%81%99%E3%82%8B + """ + + # bulk update SQLの発行元メソッド + def update(self, **kwargs) -> int: + if 'updated_at' not in kwargs: + kwargs['updated_at'] = timezone.now() + return super().update(**kwargs) + + +class BaseModel(models.Model): + created_at = models.DateTimeField(auto_now_add=True) + updated_at = models.DateTimeField(auto_now=True, blank=True, null=True) + + # bulk_update では Model.save() が呼ばれないため、updated_at が更新されない + # この対処として、objects を差し替える + # ref: https://scrapbox.io/shimizukawa/django_bulk_update_%E6%99%82%E3%81%ABupdated_at%E3%82%92%E6%9B%B4%E6%96%B0%E3%81%99%E3%82%8B + objects = models.manager.BaseManager.from_queryset(QuerySet)() + + class Meta: + abstract = True + + +class User(BaseModel): + id = models.IntegerField(primary_key=True, help_text='ユーザID') + area = models.CharField(max_length=3, help_text='エリア') + tariff = models.CharField(max_length=3, help_text='関税') + + def __str__(self): + return f'User {self.id} - Area: {self.area} - Tariff: {self.tariff}' + + +class Consumption(models.Model): + id = models.AutoField(primary_key=True) + user = models.ForeignKey( + User, on_delete=models.PROTECT, help_text='この消費データに関連するユーザ' + ) + datetime = models.DateTimeField(help_text='消費データの日時') + consumption = models.FloatField(help_text='30分ごとのエネルギー消費量') + + class Meta: + constraints = [ + models.UniqueConstraint(fields=['user', 'datetime'], name='unique_user_datetime') + ] + + def __str__(self): + return f'User {self.user.id} - Datetime: {self.datetime} - Consumption: {self.consumption}' From fa6f17b971a7319b79b026f1a5e5eeb46f57d06b Mon Sep 17 00:00:00 2001 From: naoking158 <29372455+naoking158@users.noreply.github.com> Date: Tue, 16 Jul 2024 12:09:15 +0900 Subject: [PATCH 05/18] =?UTF-8?q?chore:=20=E3=83=A2=E3=83=87=E3=83=AB?= =?UTF-8?q?=E3=82=92=20admin=20=E3=83=9A=E3=83=BC=E3=82=B8=E3=81=AB?= =?UTF-8?q?=E7=99=BB=E9=8C=B2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- dashboard/consumption/admin.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/dashboard/consumption/admin.py b/dashboard/consumption/admin.py index 13be29d9..81022d15 100644 --- a/dashboard/consumption/admin.py +++ b/dashboard/consumption/admin.py @@ -3,4 +3,8 @@ from django.contrib import admin +from consumption.models import Consumption, User + # Register your models here. +admin.site.register(User) +admin.site.register(Consumption) From 26de8f66f3e170296950f414dbfd5b47d37c385b Mon Sep 17 00:00:00 2001 From: naoking158 <29372455+naoking158@users.noreply.github.com> Date: Tue, 16 Jul 2024 11:15:02 +0900 Subject: [PATCH 06/18] =?UTF-8?q?feat:=20=E3=83=A6=E3=83=BC=E3=82=B6?= =?UTF-8?q?=E3=83=BC=E3=82=A4=E3=83=B3=E3=83=9D=E3=83=BC=E3=83=88=E6=A9=9F?= =?UTF-8?q?=E8=83=BD=E3=81=AE=E5=AE=9F=E8=A3=85?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../consumption/management/commands/import.py | 62 ++++++++++++++++++- 1 file changed, 61 insertions(+), 1 deletion(-) diff --git a/dashboard/consumption/management/commands/import.py b/dashboard/consumption/management/commands/import.py index 9593d6a5..f57cbe17 100644 --- a/dashboard/consumption/management/commands/import.py +++ b/dashboard/consumption/management/commands/import.py @@ -1,8 +1,68 @@ +from pathlib import Path +from typing import Iterable + +import pandas as pd +from django.conf import settings from django.core.management.base import BaseCommand +from django.db import transaction + +from consumption.models import User + + +def make_user_list_to_create_and_update( + df: pd.DataFrame, existing_users: dict[int, User] +) -> Iterable[list[User]]: + """User テーブルに登録するユーザーリストと、更新するユーザーリストを作成""" + users_to_create = [] + users_to_update = [] + + for _, row in df.iterrows(): + user_id = row['id'] + if user_id in existing_users: + user = existing_users[user_id] + user.area = row['area'] + user.tariff = row['tariff'] + users_to_update.append(user) + else: + users_to_create.append(User(id=row['id'], area=row['area'], tariff=row['tariff'])) + + return users_to_create, users_to_update + + +def import_user_data(csv_file_path, batch_size=10000): + """ユーザー情報を CSV から User テーブルへインポート""" + df = pd.read_csv(csv_file_path) + + # 列名のチェック + if not all(column in df.columns for column in ['id', 'area', 'tariff']): + raise ValueError("CSV file must contain 'id', 'area', and 'tariff' columns") + + existing_users = User.objects.in_bulk(df['id'].tolist()) + users_to_create, users_to_update = make_user_list_to_create_and_update(df, existing_users) + with transaction.atomic(): + for i in range(0, len(users_to_create) - batch_size, batch_size): + if len(users_to_create) - i >= batch_size: + User.objects.bulk_create(users_to_create[i : i + batch_size]) + else: + User.objects.bulk_create(users_to_create[i:]) + + for i in range(0, len(users_to_create) - batch_size, batch_size): + if len(users_to_update) - i >= batch_size: + User.objects.bulk_update(users_to_update[i : i + batch_size], ['area', 'tariff']) + else: + User.objects.bulk_update(users_to_update[i:], ['area', 'tariff']) + class Command(BaseCommand): help = 'import data' def handle(self, *args, **options): - print("Implement me!") + data_dir = Path(settings.BASE_DIR).parent / 'data' + if not data_dir.exists(): + raise FileNotFoundError( + f'`{data_dir}` not found. Please place the directory containing the CSV files.' + ) + + import_user_data(data_dir / 'user_data.csv') + From 3ac2dd54914fdf603ffbf1114b6e47c9e691696f Mon Sep 17 00:00:00 2001 From: naoking158 <29372455+naoking158@users.noreply.github.com> Date: Tue, 16 Jul 2024 11:16:58 +0900 Subject: [PATCH 07/18] =?UTF-8?q?feat:=20=E6=B6=88=E8=B2=BB=E9=87=8F?= =?UTF-8?q?=E3=82=A4=E3=83=B3=E3=83=9D=E3=83=BC=E3=83=88=E6=A9=9F=E8=83=BD?= =?UTF-8?q?=E3=81=AE=E5=AE=9F=E8=A3=85?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../consumption/management/commands/import.py | 129 +++++++++++++++++- 1 file changed, 127 insertions(+), 2 deletions(-) diff --git a/dashboard/consumption/management/commands/import.py b/dashboard/consumption/management/commands/import.py index f57cbe17..0af95722 100644 --- a/dashboard/consumption/management/commands/import.py +++ b/dashboard/consumption/management/commands/import.py @@ -1,12 +1,14 @@ from pathlib import Path -from typing import Iterable +from typing import Any, Iterable import pandas as pd from django.conf import settings from django.core.management.base import BaseCommand from django.db import transaction +from django.utils.timezone import make_aware +from pandas.api.types import is_float_dtype -from consumption.models import User +from consumption.models import Consumption, User def make_user_list_to_create_and_update( @@ -53,6 +55,123 @@ def import_user_data(csv_file_path, batch_size=10000): User.objects.bulk_update(users_to_update[i:], ['area', 'tariff']) +def load_consumption_data(consumption_dir: Path) -> pd.DataFrame: + """消費量の情報を複数の CSV から取得して1つの pd.DataFrame に集約""" + + all_files = [consumption_dir / f for f in consumption_dir.glob('*.csv')] + if not all_files: + raise Exception(f'No CSV files found in {consumption_dir}') + + # 全てのCSVファイルをロード + all_dfs = [] + for file in all_files: + user_id = file.stem + try: + int(user_id) + except ValueError: + raise ValueError(f'Invalid user_id in filename: {file}') + + df = pd.read_csv(file) + df['user_id'] = int(user_id) + all_dfs.append(df) + + combined_df = pd.concat(all_dfs, ignore_index=True) + + # 列名のチェック + required_columns = {'user_id', 'datetime', 'consumption'} + if not required_columns.issubset(combined_df.columns): + raise ValueError(f'CSV file must contain columns: {required_columns}') + + # datetimeのパース + combined_df['datetime'] = pd.to_datetime(combined_df['datetime']) + # CSVデータにタイムゾーンの情報が含まれていない場合、UTCとして扱う + if combined_df['datetime'].dt.tz is None: + combined_df['datetime'] = combined_df['datetime'].apply(make_aware) + + # 重複の削除 + combined_df = combined_df.drop_duplicates(subset=['user_id', 'datetime'], keep='last') + + # consumption が float か確認 + if not is_float_dtype(combined_df['consumption']): + try: + combined_df['consumption'] = combined_df['consumption'].astype('float64') + except ValueError as e: + raise ValueError( + f'{e}. Correct the aforementioned characters in the consumption CSV to the appropriate numerical values.' + ) + return combined_df + + +def make_consumption_data_list_to_create_and_update( + combined_df: pd.DataFrame, + existing_consumptions: dict[Any, Consumption], + existing_users: dict[int, User], +) -> Iterable[list[Consumption]]: + """Consumption テーブルに登録する消費量のリストと、更新する消費量のリストを作成""" + consumption_data_to_create = [] + consumption_data_to_update = [] + + for _, row in combined_df.iterrows(): + user_id = row['user_id'] + key = (user_id, row['datetime']) + if key in existing_consumptions: + consumption_data = existing_consumptions[key] + consumption_data.consumption = row['consumption'] + consumption_data_to_update.append(consumption_data) + else: + consumption_data_to_create.append( + Consumption( + user=existing_users[user_id], + datetime=row['datetime'], + consumption=row['consumption'], + ) + ) + + return consumption_data_to_create, consumption_data_to_update + + +def import_all_consumption_data(consumption_dir: Path, batch_size=1000): + """消費量の情報を複数の CSV から Consumption テーブルへインポート""" + combined_df = load_consumption_data(consumption_dir) + + # 全ユーザIDを取得 + user_ids = combined_df['user_id'].unique().tolist() + existing_users = User.objects.in_bulk(user_ids) + + # 登録されていないユーザIDをチェック + for user_id in user_ids: + if int(user_id) not in existing_users: + raise ValueError(f'User ID {user_id} not found in database') + + # 既存の消費データを一括取得 + existing_data = Consumption.objects.filter( + user_id__in=user_ids, datetime__in=combined_df['datetime'].tolist() + ) + existing_consumptions = {(data.user.id, data.datetime): data for data in existing_data} + + consumption_data_to_create, consumption_data_to_update = ( + make_consumption_data_list_to_create_and_update( + combined_df=combined_df, + existing_consumptions=existing_consumptions, + existing_users=existing_users, + ) + ) + + with transaction.atomic(): + for i in range(0, len(consumption_data_to_create), batch_size): + if len(consumption_data_to_create) - i >= batch_size: + Consumption.objects.bulk_create(consumption_data_to_create[i : i + batch_size]) + else: + Consumption.objects.bulk_create(consumption_data_to_create[i:]) + + for i in range(0, len(consumption_data_to_update), batch_size): + if len(consumption_data_to_update) - i >= batch_size: + Consumption.objects.bulk_update( + consumption_data_to_update[i : i + batch_size], ['consumption'] + ) + else: + Consumption.objects.bulk_update(consumption_data_to_update[i:], ['consumption']) + class Command(BaseCommand): help = 'import data' @@ -66,3 +185,9 @@ def handle(self, *args, **options): import_user_data(data_dir / 'user_data.csv') + consumption_dir = data_dir / 'consumption' + if not consumption_dir.exists(): + raise FileNotFoundError( + f'`{consumption_dir}` not found. Please place the directory containing the CSV files.' + ) + import_all_consumption_data(consumption_dir) From 7c3b3c4c6472cf0f0e2dcd6227d95d84599b7208 Mon Sep 17 00:00:00 2001 From: naoking158 <29372455+naoking158@users.noreply.github.com> Date: Tue, 16 Jul 2024 11:26:08 +0900 Subject: [PATCH 08/18] =?UTF-8?q?feat:=20=E3=83=81=E3=83=A3=E3=83=BC?= =?UTF-8?q?=E3=83=88=E9=96=A2=E9=80=A3=E3=81=AE=E3=83=95=E3=82=A1=E3=82=A4?= =?UTF-8?q?=E3=83=AB=E3=82=92=E6=A0=BC=E7=B4=8D=E3=81=99=E3=82=8B=E3=83=87?= =?UTF-8?q?=E3=82=A3=E3=83=AC=E3=82=AF=E3=83=88=E3=83=AA=E4=BD=9C=E6=88=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- dashboard/consumption/chart/__init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 dashboard/consumption/chart/__init__.py diff --git a/dashboard/consumption/chart/__init__.py b/dashboard/consumption/chart/__init__.py new file mode 100644 index 00000000..e69de29b From 8cce9e6bf4637950fd941896cd0579ead414dfb0 Mon Sep 17 00:00:00 2001 From: naoking158 <29372455+naoking158@users.noreply.github.com> Date: Tue, 16 Jul 2024 11:29:51 +0900 Subject: [PATCH 09/18] =?UTF-8?q?feat:=20=E6=97=A5=E3=81=94=E3=81=A8?= =?UTF-8?q?=E3=81=AE=E6=B6=88=E8=B2=BB=E9=87=8F=E3=81=AE=E7=B7=8F=E9=87=8F?= =?UTF-8?q?=E3=81=AE=E3=82=B0=E3=83=A9=E3=83=95=E7=94=9F=E6=88=90=E6=A9=9F?= =?UTF-8?q?=E8=83=BD=E3=82=92=E5=AE=9F=E8=A3=85?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit このグラフには日ごとの - 消費量の総量 - 中央値と 10-90%-ile がプロットされる。 --- dashboard/consumption/chart/generate.py | 61 +++++++++++++++++++++ dashboard/consumption/chart/statistics.py | 67 +++++++++++++++++++++++ 2 files changed, 128 insertions(+) create mode 100644 dashboard/consumption/chart/generate.py create mode 100644 dashboard/consumption/chart/statistics.py diff --git a/dashboard/consumption/chart/generate.py b/dashboard/consumption/chart/generate.py new file mode 100644 index 00000000..ccbfa2d6 --- /dev/null +++ b/dashboard/consumption/chart/generate.py @@ -0,0 +1,61 @@ +import base64 +import io + +import matplotlib.pyplot as plt +import pandas as pd +from matplotlib.figure import Figure + +from consumption.chart.statistics import ( + get_daily_percentiles_for_all, + get_daily_total_consumptions_for_all, +) + + +def plot_total_consumption(df: pd.DataFrame, percentiles: pd.DataFrame) -> Figure: + fig, ax1 = plt.subplots(figsize=(10, 5)) + + ax1.plot(df['date'], df['daily_total'], label='Total Consumption', color='blue') + ax1.set_title('Daily Consumption with 10-90 Percentile and Median') + ax1.set_xlabel('Date') + ax1.set_ylabel('Total Consumption', color='blue') + ax1.tick_params(axis='y', labelcolor='blue') + ax1.grid(True) + + ax2 = ax1.twinx() + if not percentiles.empty: + ax2.fill_between( + percentiles['date'], + percentiles['p10'], + percentiles['p90'], + color='green', + alpha=0.1, + label='10-90 Percentile', + ) + ax2.plot( + percentiles['date'], percentiles['p50'], linestyle='--', label='Median', color='green' + ) + ax2.set_ylabel('Percentiles and Median', color='green') + ax2.tick_params(axis='y', labelcolor='green') + + # 凡例の統合 + lines, labels = ax1.get_legend_handles_labels() + lines2, labels2 = ax2.get_legend_handles_labels() + ax1.legend(lines + lines2, labels + labels2, loc='upper left', bbox_to_anchor=(0.1, 0.9)) + + return fig + + +def generate_daily_total_consumption_graph() -> str: + """日ごとの消費量の総量と、中央値と 10-90%-ile をプロットしたグラフを生成""" + df = get_daily_total_consumptions_for_all() + percentiles = get_daily_percentiles_for_all() + + with io.BytesIO() as buffer: + fig = plot_total_consumption(df, percentiles) + fig.savefig(buffer, format='png') + buffer.seek(0) + image_png = buffer.getvalue() + + graph = base64.b64encode(image_png).decode('utf-8') + + return graph diff --git a/dashboard/consumption/chart/statistics.py b/dashboard/consumption/chart/statistics.py new file mode 100644 index 00000000..5809c624 --- /dev/null +++ b/dashboard/consumption/chart/statistics.py @@ -0,0 +1,67 @@ +import pandas as pd +from django.db import connection +from django.db.models import Sum +from django.db.models.functions import TruncDate + +from consumption.models import Consumption + + +def get_daily_total_consumptions_for_all() -> pd.DataFrame: + """全ユーザーの日ごとの消費量の合計を集計 + + Returns + ------- + pandas.DataFrame + columns=['date', 'daily_total'] + date: 日付, + daily_total: 全ユーザーの日ごとの消費量の合計 + """ + daily_total_consumption = ( + Consumption.objects.annotate(date=TruncDate('datetime')) + .values('date') + .annotate(daily_total=Sum('consumption')) + .order_by('date') + ) + return pd.DataFrame(daily_total_consumption) + + +def get_daily_percentiles_for_all() -> pd.DataFrame: + """日ごとの消費量の 10-90%-ile と中央値を集計 + + Returns + ------- + pandas.DataFrame + columns=['date', 'p10', 'p50', 'p90'] + date: 日付, + p10: 全ユーザーの日ごとの消費量の 10%-ile, + p50: 全ユーザーの日ごとの消費量の 50%-ile (median), + p90: 全ユーザーの日ごとの消費量の 90%-ile + """ + # モデルからテーブル名を取得 + consumption_table = Consumption._meta.db_table + + # NOTE: + query = f""" + WITH daily_totals AS ( + SELECT + user_id, + DATE_TRUNC('day', datetime) AS date, + SUM(consumption) AS daily_total + FROM {consumption_table} + GROUP BY user_id, date + ) + SELECT + date, + PERCENTILE_CONT(0.1) WITHIN GROUP (ORDER BY daily_total) AS p10, + PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY daily_total) AS p50, + PERCENTILE_CONT(0.9) WITHIN GROUP (ORDER BY daily_total) AS p90 + FROM daily_totals + GROUP BY date + ORDER BY date; + """ + + with connection.cursor() as cursor: + cursor.execute(query) + rows = cursor.fetchall() + + return pd.DataFrame(rows, columns=['date', 'p10', 'p50', 'p90']) From ebed81b6f5aeda33c3d41adb420e9d4dc95a87b7 Mon Sep 17 00:00:00 2001 From: naoking158 <29372455+naoking158@users.noreply.github.com> Date: Tue, 16 Jul 2024 12:13:49 +0900 Subject: [PATCH 10/18] =?UTF-8?q?feat:=20=E3=82=A8=E3=83=AA=E3=82=A2?= =?UTF-8?q?=E5=88=A5=E3=81=AE=E6=97=A5=E3=81=94=E3=81=A8=E3=81=AE=E6=B6=88?= =?UTF-8?q?=E8=B2=BB=E9=87=8F=E3=81=AE=E7=B7=8F=E9=87=8F=E3=81=AE=E3=82=B0?= =?UTF-8?q?=E3=83=A9=E3=83=95=E7=94=9F=E6=88=90=E6=A9=9F=E8=83=BD=E3=82=92?= =?UTF-8?q?=E5=AE=9F=E8=A3=85?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit このグラフにはエリア別に日ごとの - 消費量の総量 - 中央値と 10-90%-ile がプロットされる。 --- dashboard/consumption/chart/generate.py | 64 +++++++++++++++++++++ dashboard/consumption/chart/statistics.py | 70 ++++++++++++++++++++++- 2 files changed, 133 insertions(+), 1 deletion(-) diff --git a/dashboard/consumption/chart/generate.py b/dashboard/consumption/chart/generate.py index ccbfa2d6..3c2d885c 100644 --- a/dashboard/consumption/chart/generate.py +++ b/dashboard/consumption/chart/generate.py @@ -6,6 +6,8 @@ from matplotlib.figure import Figure from consumption.chart.statistics import ( + get_area_daily_percentiles, + get_area_daily_total_consumptions, get_daily_percentiles_for_all, get_daily_total_consumptions_for_all, ) @@ -45,6 +47,53 @@ def plot_total_consumption(df: pd.DataFrame, percentiles: pd.DataFrame) -> Figur return fig +def plot_area_consumption(area_totals: pd.DataFrame, area_percentiles: pd.DataFrame) -> Figure: + fig, ax = plt.subplots(figsize=(10, 5)) + + colors = ['red', 'cyan', 'green', 'blue'] + color_index = 0 + ax2 = ax.twinx() + for area in area_totals['area'].unique(): + area_data_totals = area_totals[area_totals['area'] == area] + area_data_percentiles = area_percentiles[area_percentiles['area'] == area] + + ax.plot( + area_data_totals['date'], + area_data_totals['daily_total'], + label=f'{area} Total Consumption', + color=colors[color_index], + ) + ax2.fill_between( + area_data_percentiles['date'], + area_data_percentiles['p10'], + area_data_percentiles['p90'], + alpha=0.1, + label=f'{area} 10-90 Percentile', + color=colors[color_index], + ) + ax2.plot( + area_data_percentiles['date'], + area_data_percentiles['p50'], + linestyle='--', + label=f'{area} Median', + color=colors[color_index], + ) + + color_index = (color_index + 1) % len(colors) + + ax.set_title('Daily Consumption with 10-90 Percentile and Median by Area') + ax.set_xlabel('Date') + ax.set_ylabel('Total Consumption') + ax.grid(True) + + ax2.set_ylabel('Percentiles and Median') + lines, labels = ax.get_legend_handles_labels() + lines2, labels2 = ax2.get_legend_handles_labels() + ax.legend(lines + lines2, labels + labels2, loc='upper left', bbox_to_anchor=(0.1, 0.9)) + + return fig + + def generate_daily_total_consumption_graph() -> str: """日ごとの消費量の総量と、中央値と 10-90%-ile をプロットしたグラフを生成""" df = get_daily_total_consumptions_for_all() @@ -58,4 +107,19 @@ def generate_daily_total_consumption_graph() -> str: graph = base64.b64encode(image_png).decode('utf-8') + +def generate_daily_total_consumption_graph_by_area() -> str: + """エリア別に、日ごとの消費量の総量と、中央値と 10-90%-ile をプロットしたグラフを生成""" + df = get_area_daily_total_consumptions() + percentiles = get_area_daily_percentiles() + + with io.BytesIO() as buffer: + fig = plot_area_consumption(df, percentiles) + fig.savefig(buffer, format='png') + buffer.seek(0) + image_png = buffer.getvalue() + + graph = base64.b64encode(image_png).decode('utf-8') + return graph + return graph diff --git a/dashboard/consumption/chart/statistics.py b/dashboard/consumption/chart/statistics.py index 5809c624..75a4d385 100644 --- a/dashboard/consumption/chart/statistics.py +++ b/dashboard/consumption/chart/statistics.py @@ -3,7 +3,7 @@ from django.db.models import Sum from django.db.models.functions import TruncDate -from consumption.models import Consumption +from consumption.models import Consumption, User def get_daily_total_consumptions_for_all() -> pd.DataFrame: @@ -65,3 +65,71 @@ def get_daily_percentiles_for_all() -> pd.DataFrame: rows = cursor.fetchall() return pd.DataFrame(rows, columns=['date', 'p10', 'p50', 'p90']) + + +def get_area_daily_total_consumptions() -> pd.DataFrame: + """エリア別に日ごとの消費量の合計を集計 + + Returns + ------- + pandas.DataFrame + columns=['area', date', 'daily_total'] + area: エリア名 + date: 日付, + daily_total: 全ユーザーの日ごとの消費量の合計 + """ + area_daily_totals = ( + Consumption.objects.select_related('user') + .annotate(date=TruncDate('datetime')) + .values('user__area', 'date') + .annotate(daily_total=Sum('consumption')) + .order_by('user__area', 'date') + ) + df = pd.DataFrame(area_daily_totals) + df.rename(columns={'user__area': 'area'}, inplace=True) + return df + + +def get_area_daily_percentiles() -> pd.DataFrame: + """エリア別に日ごとの消費量の 10-90%-ile と中央値を集計 + + Returns + ------- + pandas.DataFrame + columns=['area', 'date', 'p10', 'p50', 'p90'] + area: エリア名, + date: 日付, + p10: 全ユーザーの日ごとの消費量の 10%-ile, + p50: 全ユーザーの日ごとの消費量の 50%-ile (median), + p90: 全ユーザーの日ごとの消費量の 90%-ile + """ + # モデルからテーブル名を取得 + consumption_table = Consumption._meta.db_table + user_table = User._meta.db_table + + query = f""" + WITH daily_totals AS ( + SELECT + area, + DATE_TRUNC('day', datetime) AS date, + SUM(consumption) AS daily_total + FROM {consumption_table} AS c + INNER JOIN {user_table} AS u ON c.user_id = u.id + GROUP BY area, user_id, date + ) + SELECT + area, + date, + PERCENTILE_CONT(0.1) WITHIN GROUP (ORDER BY daily_total) AS p10, + PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY daily_total) AS p50, + PERCENTILE_CONT(0.9) WITHIN GROUP (ORDER BY daily_total) AS p90 + FROM daily_totals + GROUP BY area, date + ORDER BY area, date; + """ + + with connection.cursor() as cursor: + cursor.execute(query) + rows = cursor.fetchall() + + return pd.DataFrame(rows, columns=['area', 'date', 'p10', 'p50', 'p90']) From 17cf970b6597bd8f1f2012864fe1a6d65f71a92d Mon Sep 17 00:00:00 2001 From: naoking158 <29372455+naoking158@users.noreply.github.com> Date: Tue, 16 Jul 2024 12:16:27 +0900 Subject: [PATCH 11/18] =?UTF-8?q?feat:=20=E7=89=B9=E5=AE=9A=E3=83=A6?= =?UTF-8?q?=E3=83=BC=E3=82=B6=E3=83=BC=E3=81=AE=E6=97=A5=E3=81=94=E3=81=A8?= =?UTF-8?q?=E3=81=AE=E6=B6=88=E8=B2=BB=E9=87=8F=E3=81=AE=E7=B7=8F=E9=87=8F?= =?UTF-8?q?=E3=81=AE=E3=82=B0=E3=83=A9=E3=83=95=E7=94=9F=E6=88=90=E6=A9=9F?= =?UTF-8?q?=E8=83=BD=E3=82=92=E5=AE=9F=E8=A3=85?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit このグラフには特定ユーザーの日ごとの消費量の総量と、 そのユーザーが属するエリアの日ごとの消費量の中央値がプロットされる。 --- dashboard/consumption/chart/generate.py | 38 +++++++++++++ dashboard/consumption/chart/statistics.py | 69 +++++++++++++++++++++++ 2 files changed, 107 insertions(+) diff --git a/dashboard/consumption/chart/generate.py b/dashboard/consumption/chart/generate.py index 3c2d885c..64cbb760 100644 --- a/dashboard/consumption/chart/generate.py +++ b/dashboard/consumption/chart/generate.py @@ -10,6 +10,8 @@ get_area_daily_total_consumptions, get_daily_percentiles_for_all, get_daily_total_consumptions_for_all, + get_user_area_daily_consumption_median, + get_user_daily_total_consumptions, ) @@ -94,6 +96,28 @@ def plot_area_consumption(area_totals: pd.DataFrame, area_percentiles: pd.DataFr return fig +def plot_user_and_area_consumption( + user_df: pd.DataFrame, area_df: pd.DataFrame, user_id: int +) -> Figure: + fig, ax = plt.subplots(figsize=(10, 5)) + ax.plot( + user_df['date'], user_df['daily_total'], label=f'User {user_id} Consumption', color='blue' + ) + ax.plot( + area_df['date'], + area_df['p50'], + label='Area Median Consumption', + color='red', + linestyle='--', + ) + ax.set_xlabel('Date') + ax.set_ylabel('Total Consumption') + ax.grid(True) + ax.legend(loc='upper left') + + return fig + + def generate_daily_total_consumption_graph() -> str: """日ごとの消費量の総量と、中央値と 10-90%-ile をプロットしたグラフを生成""" df = get_daily_total_consumptions_for_all() @@ -106,6 +130,7 @@ def generate_daily_total_consumption_graph() -> str: image_png = buffer.getvalue() graph = base64.b64encode(image_png).decode('utf-8') + return graph def generate_daily_total_consumption_graph_by_area() -> str: @@ -122,4 +147,17 @@ def generate_daily_total_consumption_graph_by_area() -> str: graph = base64.b64encode(image_png).decode('utf-8') return graph + +def generate_user_consumption_graph(user_id: int) -> str: + """ユーザーごとの日ごとの消費量の総量と、エリアの中央値をプロットしたグラフを生成""" + user_df = get_user_daily_total_consumptions(user_id) + area_df = get_user_area_daily_consumption_median(user_id) + + with io.BytesIO() as buffer: + fig = plot_user_and_area_consumption(user_df, area_df, user_id) + fig.savefig(buffer, format='png') + buffer.seek(0) + image_png1 = buffer.getvalue() + + graph = base64.b64encode(image_png1).decode('utf-8') return graph diff --git a/dashboard/consumption/chart/statistics.py b/dashboard/consumption/chart/statistics.py index 75a4d385..d00f9a49 100644 --- a/dashboard/consumption/chart/statistics.py +++ b/dashboard/consumption/chart/statistics.py @@ -133,3 +133,72 @@ def get_area_daily_percentiles() -> pd.DataFrame: rows = cursor.fetchall() return pd.DataFrame(rows, columns=['area', 'date', 'p10', 'p50', 'p90']) + + +def get_user_daily_total_consumptions(user_id: int) -> pd.DataFrame: + """特定ユーザーの日ごとの消費量の合計を集計 + + Params + ------ + user_id : int + 対象ユーザのID + + Returns + ------- + pandas.DataFrame + columns=['date', 'daily_total'] + date: 日付, + daily_total: ユーザーの日ごとの消費量の合計 + """ + user_daily_totals = ( + Consumption.objects.filter(user_id=user_id) + .annotate(date=TruncDate('datetime')) + .values('date') + .annotate(daily_total=Sum('consumption')) + .order_by('date') + ) + return pd.DataFrame(user_daily_totals) + + +def get_user_area_daily_consumption_median(user_id: int) -> pd.DataFrame: + """特定ユーザが属するエリアの日ごとの消費量の中央値を集計 + + Params + ------ + user_id : int + 対象ユーザのID + + Returns + ------- + pandas.DataFrame + columns=['date', 'p10', 'p50', 'p90'] + date: 日付, + p50: 全ユーザーの日ごとの消費量の 50%-ile (median) + """ + # モデルからテーブル名を取得 + consumption_table = Consumption._meta.db_table + user_table = User._meta.db_table + + query = f""" + WITH daily_totals AS ( + SELECT + DATE_TRUNC('day', datetime) AS date, + SUM(consumption) AS daily_total + FROM {consumption_table} AS c + INNER JOIN {user_table} AS u ON c.user_id = u.id + WHERE u.area = (SELECT area FROM {user_table} WHERE id = %s) + GROUP BY user_id, date + ) + SELECT + date, + PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY daily_total) AS p50 + FROM daily_totals + GROUP BY date + ORDER BY date; + """ + + with connection.cursor() as cursor: + cursor.execute(query, [user_id]) + area_rows = cursor.fetchall() + + return pd.DataFrame(area_rows, columns=['date', 'p50']) From 1a94626c5093829356e2eff41e7e458fc364b546 Mon Sep 17 00:00:00 2001 From: naoking158 <29372455+naoking158@users.noreply.github.com> Date: Tue, 16 Jul 2024 12:18:05 +0900 Subject: [PATCH 12/18] =?UTF-8?q?tests:=20statistics.py=20=E3=81=AE?= =?UTF-8?q?=E3=81=9F=E3=82=81=E3=81=AE=E3=83=86=E3=82=B9=E3=83=88=E3=82=92?= =?UTF-8?q?=E4=BD=9C=E6=88=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- dashboard/consumption/tests.py | 215 ++++++++++++++++++++++++++++++++- 1 file changed, 214 insertions(+), 1 deletion(-) diff --git a/dashboard/consumption/tests.py b/dashboard/consumption/tests.py index 5982e6bc..7dc19d5a 100644 --- a/dashboard/consumption/tests.py +++ b/dashboard/consumption/tests.py @@ -1,6 +1,219 @@ # -*- coding: utf-8 -*- from __future__ import unicode_literals +import numpy as np +import pandas as pd from django.test import TestCase +from django.utils import timezone -# Create your tests here. +from consumption.chart.statistics import ( + get_area_daily_percentiles, + get_area_daily_total_consumptions, + get_daily_percentiles_for_all, + get_daily_total_consumptions_for_all, + get_user_area_daily_consumption_median, + get_user_daily_total_consumptions, +) +from consumption.models import Consumption, User + + +class StatisticsTests(TestCase): + def setUp(self): + # ユーザーを作成 + self.users = [ + User.objects.create(id=1, area='a1', tariff='t1'), + User.objects.create(id=2, area='a2', tariff='t3'), + User.objects.create(id=3, area='a1', tariff='t1'), + User.objects.create(id=4, area='a2', tariff='t3'), + ] + + # 基準時刻を作成 + std_time = timezone.now().replace(hour=0, minute=0, second=0, microsecond=0) + + # 消費データの値をまとめる配列 + self.consumption_values = [] + + # 3日分のデータを30分おきに作成 + for day in range(3): + for half_hour in range(48): + datetime = ( + std_time + - timezone.timedelta(days=day) + + timezone.timedelta(minutes=30 * half_hour) + ) + for i, user in enumerate(self.users): + self.consumption_values.append((user, datetime, 10.0 * (i + 1) + half_hour)) + + # 消費データを作成 + for user, datetime, consumption in self.consumption_values: + Consumption.objects.create(user=user, datetime=datetime, consumption=consumption) + + def test_get_daily_total_consumptions_for_all(self): + df = get_daily_total_consumptions_for_all() + + # 消費データを日付ごとに集計 + consumption_by_date = {} + for _, datetime, consumption in self.consumption_values: + date = datetime.astimezone(timezone.get_default_timezone()).date() + if date not in consumption_by_date: + consumption_by_date[date] = 0 + consumption_by_date[date] += consumption + + # 期待されるデータフレームを作成 + expected_data = [ + {'date': date, 'daily_total': total} + for date, total in sorted(consumption_by_date.items()) + ] + expected_df = pd.DataFrame(expected_data) + + # 期待されるデータフレームと関数の結果を比較 + pd.testing.assert_frame_equal(df, expected_df) + + def test_get_daily_percentiles_for_all(self): + df = get_daily_percentiles_for_all() + + # 日付ごとの消費データをユーザーごとに集計 + consumption_by_date_user = {} + for user, datetime, consumption in self.consumption_values: + date = datetime.astimezone(timezone.get_default_timezone()).date() + if date not in consumption_by_date_user: + consumption_by_date_user[date] = {} + if user.id not in consumption_by_date_user[date]: + consumption_by_date_user[date][user.id] = 0 + consumption_by_date_user[date][user.id] += consumption + + # 期待されるデータフレームを作成 + expected_data = [] + for date, user_consumptions in consumption_by_date_user.items(): + daily_totals = list(user_consumptions.values()) + p10 = np.percentile(daily_totals, 10) + p50 = np.percentile(daily_totals, 50) + p90 = np.percentile(daily_totals, 90) + expected_data.append({'date': date, 'p10': p10, 'p50': p50, 'p90': p90}) + + expected_df = pd.DataFrame(expected_data).sort_values('date').reset_index(drop=True) + expected_df['date'] = pd.to_datetime(expected_df['date']).dt.tz_localize( + timezone.get_default_timezone() + ) + + # 期待されるデータフレームと関数の結果を比較 + pd.testing.assert_frame_equal(df, expected_df) + + def test_get_area_daily_total_consumptions(self): + df = get_area_daily_total_consumptions() + + # エリアごと、日付ごとの消費データを集計 + consumption_by_area_date = {} + for user, datetime, consumption in self.consumption_values: + date = datetime.astimezone(timezone.get_default_timezone()).date() + area = user.area + if area not in consumption_by_area_date: + consumption_by_area_date[area] = {} + if date not in consumption_by_area_date[area]: + consumption_by_area_date[area][date] = 0 + consumption_by_area_date[area][date] += consumption + + # 期待されるデータフレームを作成 + expected_data = [] + for area, date_consumptions in consumption_by_area_date.items(): + for date, total in date_consumptions.items(): + expected_data.append({'area': area, 'date': date, 'daily_total': total}) + + expected_df = ( + pd.DataFrame(expected_data).sort_values(['area', 'date']).reset_index(drop=True) + ) + + # 期待されるデータフレームと関数の結果を比較 + pd.testing.assert_frame_equal(df, expected_df) + + def test_get_area_daily_percentiles(self): + df = get_area_daily_percentiles() + + # エリアごと、日付ごとの消費データをユーザーごとに集計 + consumption_by_area_date_user = {} + for user, datetime, consumption in self.consumption_values: + date = datetime.astimezone(timezone.get_default_timezone()).date() + area = user.area + if area not in consumption_by_area_date_user: + consumption_by_area_date_user[area] = {} + if date not in consumption_by_area_date_user[area]: + consumption_by_area_date_user[area][date] = {} + if user.id not in consumption_by_area_date_user[area][date]: + consumption_by_area_date_user[area][date][user.id] = 0 + consumption_by_area_date_user[area][date][user.id] += consumption + + # 期待されるデータフレームを作成 + expected_data = [] + for area, date_consumptions in consumption_by_area_date_user.items(): + for date, user_consumptions in date_consumptions.items(): + daily_totals = list(user_consumptions.values()) + p10 = np.percentile(daily_totals, 10) + p50 = np.percentile(daily_totals, 50) + p90 = np.percentile(daily_totals, 90) + expected_data.append( + {'area': area, 'date': date, 'p10': p10, 'p50': p50, 'p90': p90} + ) + + expected_df = ( + pd.DataFrame(expected_data).sort_values(['area', 'date']).reset_index(drop=True) + ) + expected_df['date'] = pd.to_datetime(expected_df['date']).dt.tz_localize( + timezone.get_default_timezone() + ) + + # 期待されるデータフレームと関数の結果を比較 + pd.testing.assert_frame_equal(df, expected_df) + + def test_get_user_daily_total_consumptions(self): + user_id = self.users[0].id + df = get_user_daily_total_consumptions(user_id) + + # 特定ユーザーの日付ごとの消費データを集計 + consumption_by_date = {} + for user, datetime, consumption in self.consumption_values: + if user.id == user_id: + date = datetime.astimezone(timezone.get_default_timezone()).date() + if date not in consumption_by_date: + consumption_by_date[date] = 0 + consumption_by_date[date] += consumption + + # 期待されるデータフレームを作成 + expected_data = [ + {'date': date, 'daily_total': total} + for date, total in sorted(consumption_by_date.items()) + ] + expected_df = pd.DataFrame(expected_data) + + # 期待されるデータフレームと関数の結果を比較 + pd.testing.assert_frame_equal(df, expected_df) + + def test_get_user_area_daily_consumption_median(self): + user_id = self.users[0].id + df = get_user_area_daily_consumption_median(user_id) + + # 特定ユーザーが属するエリアの日付ごとの消費データをユーザーごとに集計 + user_area = self.users[0].area + consumption_by_date_user = {} + for user, datetime, consumption in self.consumption_values: + if user.area == user_area: + date = datetime.astimezone(timezone.get_default_timezone()).date() + if date not in consumption_by_date_user: + consumption_by_date_user[date] = {} + if user.id not in consumption_by_date_user[date]: + consumption_by_date_user[date][user.id] = 0 + consumption_by_date_user[date][user.id] += consumption + + # 期待されるデータフレームを作成 + expected_data = [] + for date, user_consumptions in consumption_by_date_user.items(): + daily_totals = list(user_consumptions.values()) + p50 = np.percentile(daily_totals, 50) + expected_data.append({'date': date, 'p50': p50}) + + expected_df = pd.DataFrame(expected_data).sort_values('date').reset_index(drop=True) + expected_df['date'] = pd.to_datetime(expected_df['date']).dt.tz_localize( + timezone.get_default_timezone() + ) + + # 期待されるデータフレームと関数の結果を比較 + pd.testing.assert_frame_equal(df, expected_df) From facb59db6ac3dbf08119c982ab9fafaa87875556 Mon Sep 17 00:00:00 2001 From: naoking158 <29372455+naoking158@users.noreply.github.com> Date: Tue, 16 Jul 2024 12:21:18 +0900 Subject: [PATCH 13/18] =?UTF-8?q?feat:=20layout.html=20=E3=82=92=E6=9B=B4?= =?UTF-8?q?=E6=96=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../templates/consumption/layout.html | 60 ++++++++++++++++++- 1 file changed, 57 insertions(+), 3 deletions(-) diff --git a/dashboard/consumption/templates/consumption/layout.html b/dashboard/consumption/templates/consumption/layout.html index 4aa4edee..85c90a5f 100644 --- a/dashboard/consumption/templates/consumption/layout.html +++ b/dashboard/consumption/templates/consumption/layout.html @@ -1,8 +1,62 @@ + + + + Document + + + - - {% block content %}{% endblock content %} + + + + +
+
+
+ + +
+ +
    + {% for u_id in user_ids %} +
  • + {{ u_id }} +
  • + {% endfor %} +
+
+ + +
+ {% block content %}{% endblock content %} +
+ +
+
+
- \ No newline at end of file + + From 49cd8238706d16f24a0000a9e90b6a4f897bff46 Mon Sep 17 00:00:00 2001 From: naoking158 <29372455+naoking158@users.noreply.github.com> Date: Tue, 16 Jul 2024 12:23:17 +0900 Subject: [PATCH 14/18] =?UTF-8?q?feat:=20detail=20=E3=83=9A=E3=83=BC?= =?UTF-8?q?=E3=82=B8=E3=81=AE=20URL=20=E3=82=92=E8=BF=BD=E5=8A=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- dashboard/consumption/urls.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dashboard/consumption/urls.py b/dashboard/consumption/urls.py index 94c80b39..2ff9578e 100644 --- a/dashboard/consumption/urls.py +++ b/dashboard/consumption/urls.py @@ -5,5 +5,5 @@ urlpatterns = [ re_path(r'^$', views.summary), re_path(r'^summary/', views.summary), - re_path(r'^detail/', views.detail), + re_path(r'^detail/(?P\d+)/$', views.detail, name='detail'), ] From f984b3639806604778816bbed743e6d6a75a13bb Mon Sep 17 00:00:00 2001 From: naoking158 <29372455+naoking158@users.noreply.github.com> Date: Tue, 16 Jul 2024 12:22:43 +0900 Subject: [PATCH 15/18] =?UTF-8?q?feat:=20summary=20=E3=83=9A=E3=83=BC?= =?UTF-8?q?=E3=82=B8=E3=82=92=E6=9B=B4=E6=96=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../consumption/templates/consumption/summary.html | 9 +++++---- dashboard/consumption/views.py | 14 ++++++++++---- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/dashboard/consumption/templates/consumption/summary.html b/dashboard/consumption/templates/consumption/summary.html index f4d0dece..b1e2e57c 100644 --- a/dashboard/consumption/templates/consumption/summary.html +++ b/dashboard/consumption/templates/consumption/summary.html @@ -1,7 +1,8 @@ {% extends 'consumption/layout.html' %} {% block content %} - -

{{message}}

- -{% endblock %} \ No newline at end of file +

Total Consumption Chart

+Total Consumption Chart +

Area Consumption Chart

+Area Consumption Chart +{% endblock %} diff --git a/dashboard/consumption/views.py b/dashboard/consumption/views.py index 17827c03..6e028a72 100644 --- a/dashboard/consumption/views.py +++ b/dashboard/consumption/views.py @@ -3,13 +3,19 @@ from django.shortcuts import render -# Create your views here. +from consumption.chart.generate import ( + generate_daily_total_consumption_graph, + generate_daily_total_consumption_graph_by_area, +) +from consumption.models import User def summary(request): - context = { - 'message': 'Hello!', - } + graph = generate_daily_total_consumption_graph() + graph_by_area = generate_daily_total_consumption_graph_by_area() + user_ids = list(User.objects.values_list('id', flat=True).order_by('id')) + + context = {'graph': graph, 'graph_by_area': graph_by_area, 'user_ids': user_ids} return render(request, 'consumption/summary.html', context) From ba3998f6a41f349fba17ede2b313538f49f920a4 Mon Sep 17 00:00:00 2001 From: naoking158 <29372455+naoking158@users.noreply.github.com> Date: Tue, 16 Jul 2024 12:23:46 +0900 Subject: [PATCH 16/18] =?UTF-8?q?feat:=20details=20=E3=83=9A=E3=83=BC?= =?UTF-8?q?=E3=82=B8=E3=82=92=E6=9B=B4=E6=96=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../consumption/templates/consumption/detail.html | 12 ++++++++++-- dashboard/consumption/views.py | 10 +++++++--- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/dashboard/consumption/templates/consumption/detail.html b/dashboard/consumption/templates/consumption/detail.html index 22a36845..2fcd1558 100644 --- a/dashboard/consumption/templates/consumption/detail.html +++ b/dashboard/consumption/templates/consumption/detail.html @@ -1,5 +1,13 @@ {% extends 'consumption/layout.html' %} -{% block content %} +{% block subtitle %} +Details for {{ user_info }} +{% endblock %} -{% endblock %} \ No newline at end of file +{% block content %} +

User {{ user_id }} Total Consumption Chart

+

+ {{ user_info }} +

+Total Consumption Chart +{% endblock %} diff --git a/dashboard/consumption/views.py b/dashboard/consumption/views.py index 6e028a72..993cd7fb 100644 --- a/dashboard/consumption/views.py +++ b/dashboard/consumption/views.py @@ -6,6 +6,7 @@ from consumption.chart.generate import ( generate_daily_total_consumption_graph, generate_daily_total_consumption_graph_by_area, + generate_user_consumption_graph, ) from consumption.models import User @@ -19,7 +20,10 @@ def summary(request): return render(request, 'consumption/summary.html', context) -def detail(request): - context = { - } +def detail(request, user_id: int): + user_ids = list(User.objects.values_list('id', flat=True).order_by('id')) + user_info = User.objects.get(id=user_id) + graph = generate_user_consumption_graph(user_id) + + context = {'graph': graph, 'user_id': user_id, 'user_ids': user_ids, 'user_info': user_info} return render(request, 'consumption/detail.html', context) From 5b690ba38d4f9ce4376e2265298b5ea1e588cc79 Mon Sep 17 00:00:00 2001 From: naoking158 <29372455+naoking158@users.noreply.github.com> Date: Tue, 16 Jul 2024 13:53:36 +0900 Subject: [PATCH 17/18] docs: add REPORT.md --- REPORT.md | 70 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) diff --git a/REPORT.md b/REPORT.md index e69de29b..3b62b84e 100644 --- a/REPORT.md +++ b/REPORT.md @@ -0,0 +1,70 @@ + +# 作業を確認したブラウザ + + User-agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36 + +# 技術的な決定 + +## フロントエンドとバックエンドの分離 + +バックエンド処理はグラフ描画処理であり、それらは全て `chart` ディレクトリ配下にまとめた。 +フロントエンド処理は `views.py` にまとめ、`chart` 配下の機能を呼び出す形にすることでバックエンドと分離している。 + + +## モデル定義 + +Consumption テーブルのフィールド `consumption` の型は `FloatField` とした。 +理由は処理速度を優先するためである。 + +本アプリケーションでは数値の厳密さは重要ではないため、 +処理速度を落としてまで `DecimalField` を選択する必要は無いと考えられる。 + + +## インポート処理 + +データ量が多いため、バッチ処理でレコードの追加・更新を行うようにしている。 + +### 問題 + +時間の都合上、データベースに登録しようとしているCSVデータと同じ主キーのレコードが存在する場合、 +それらに対して一律で上書きするようにした。 +データベースへの負荷を考えると、登録前に各レコードのフィールドまでチェックして、 +フィールドが異なる場合のみ更新するようにしたほうが良い。 + +また、テストを書いてデータベース登録前の前処理が期待通り実装できているかチェックしたかったが、 +こちらも時間の都合上見送った。 + + +## データ取得 + +極力データベース側で処理を行い、処理後の結果をアプリケーションで取得するようにした。 +理由は、トラフィックの占有を防ぐためである。 + +例えば、消費量 (consumption) のデータ数は `ユーザ数 x 日数 x 48個/日` であり、テストデータだけでも50万個近くにのぼる。 +消費量の統計量を計算しようとしたとき、アプリケーション側でこれを行うと消費量の全データを取得する必要があり、トラフィックを占有してしまうことが予想される。 + +そのため、データベース側で先に処理を行い、処理後の結果を取得するようにした。 + +データベース側の処理負荷について考えると、 +今回のアプリケーションは大人数で使用するものでは無いため、 +負荷については問題にならないと判断した。 + +### 問題 + +データベース側で統計量 (中央値、10-90%-ile) の計算をするために生のSQLを発行している。 +保守性や脆弱性を考えると極力ORMの機能を活用すべきだと考えられる。 + +時間の都合上実装できなかったが、 +Aggregate を継承することで実装している例も見つかったため、Django の機能だけで実装することは可能かもしれない。 +https://gist.github.com/mekicha/b3d5e61683d5a6af642e4549eed95994 + + +## summary ページ + +ユーザーのリストアップはユーザー ID のみにした。 +理由は、トラフィックの占有を防ぐためと、描画の高速化のためである。 + +仮にページネーションと非同期処理を組み合わせて一部のユーザー情報だけを表示する方針であれば、 +ユーザーの全情報をテーブルで表示しても問題ないと考えられる。 + +しかし、今回は時間の都合上そこまで実装できなかったため、ユーザーIDのみを表示する方針とした。 From 307fad6074e3c21d77f7792c02dda554a6fe1c0a Mon Sep 17 00:00:00 2001 From: naoking158 <29372455+naoking158@users.noreply.github.com> Date: Tue, 16 Jul 2024 21:54:56 +0900 Subject: [PATCH 18/18] =?UTF-8?q?fix:=20=E3=82=A4=E3=83=B3=E3=83=9D?= =?UTF-8?q?=E3=83=BC=E3=83=88=E6=A9=9F=E8=83=BD=E3=81=AE=E3=83=90=E3=83=83?= =?UTF-8?q?=E3=83=81=E5=87=A6=E7=90=86=E3=81=8C=E3=82=A4=E3=83=B3=E3=83=87?= =?UTF-8?q?=E3=83=83=E3=82=AF=E3=82=B9=E3=82=92=E6=AD=A3=E3=81=97=E3=81=8F?= =?UTF-8?q?=E6=89=B1=E3=81=88=E3=81=A6=E3=81=84=E3=81=AA=E3=81=8B=E3=81=A3?= =?UTF-8?q?=E3=81=9F=E3=81=AE=E3=81=A7=E4=BF=AE=E6=AD=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../consumption/management/commands/import.py | 55 ++++++++++--------- 1 file changed, 30 insertions(+), 25 deletions(-) diff --git a/dashboard/consumption/management/commands/import.py b/dashboard/consumption/management/commands/import.py index 0af95722..36941d46 100644 --- a/dashboard/consumption/management/commands/import.py +++ b/dashboard/consumption/management/commands/import.py @@ -31,7 +31,7 @@ def make_user_list_to_create_and_update( return users_to_create, users_to_update -def import_user_data(csv_file_path, batch_size=10000): +def import_user_data(csv_file_path, batch_size=1000): """ユーザー情報を CSV から User テーブルへインポート""" df = pd.read_csv(csv_file_path) @@ -41,18 +41,21 @@ def import_user_data(csv_file_path, batch_size=10000): existing_users = User.objects.in_bulk(df['id'].tolist()) users_to_create, users_to_update = make_user_list_to_create_and_update(df, existing_users) - with transaction.atomic(): - for i in range(0, len(users_to_create) - batch_size, batch_size): - if len(users_to_create) - i >= batch_size: - User.objects.bulk_create(users_to_create[i : i + batch_size]) - else: - User.objects.bulk_create(users_to_create[i:]) - for i in range(0, len(users_to_create) - batch_size, batch_size): - if len(users_to_update) - i >= batch_size: - User.objects.bulk_update(users_to_update[i : i + batch_size], ['area', 'tariff']) - else: - User.objects.bulk_update(users_to_update[i:], ['area', 'tariff']) + with transaction.atomic(): + for i in range(len(users_to_create) // batch_size + 1): + # IndexError が発生しないように処理をスキップ + if i * batch_size == len(users_to_create): + continue + User.objects.bulk_create(users_to_create[i * batch_size : (i + 1) * batch_size]) + + for i in range(len(users_to_update) // batch_size + 1): + # IndexError が発生しないように処理をスキップ + if i * batch_size == len(users_to_update): + continue + User.objects.bulk_update( + users_to_update[i * batch_size : (i + 1) * batch_size], ['area', 'tariff'] + ) def load_consumption_data(consumption_dir: Path) -> pd.DataFrame: @@ -158,19 +161,21 @@ def import_all_consumption_data(consumption_dir: Path, batch_size=1000): ) with transaction.atomic(): - for i in range(0, len(consumption_data_to_create), batch_size): - if len(consumption_data_to_create) - i >= batch_size: - Consumption.objects.bulk_create(consumption_data_to_create[i : i + batch_size]) - else: - Consumption.objects.bulk_create(consumption_data_to_create[i:]) - - for i in range(0, len(consumption_data_to_update), batch_size): - if len(consumption_data_to_update) - i >= batch_size: - Consumption.objects.bulk_update( - consumption_data_to_update[i : i + batch_size], ['consumption'] - ) - else: - Consumption.objects.bulk_update(consumption_data_to_update[i:], ['consumption']) + for i in range(len(consumption_data_to_create) // batch_size + 1): + # IndexError が発生しないように処理をスキップ + if i * batch_size == len(consumption_data_to_create): + continue + Consumption.objects.bulk_create( + consumption_data_to_create[i * batch_size : (i + 1) * batch_size] + ) + + for i in range(len(consumption_data_to_update) // batch_size + 1): + # IndexError が発生しないように処理をスキップ + if i * batch_size == len(consumption_data_to_update): + continue + Consumption.objects.bulk_update( + consumption_data_to_update[i * batch_size : (i + 1) * batch_size], ['consumption'] + ) class Command(BaseCommand):