camenergydatalab · naoking158 · Jul 16, 2024 · Jul 16, 2024 · Jul 16, 2024 · Jul 16, 2024
diff --git a/REPORT.md b/REPORT.md
@@ -0,0 +1,70 @@
+
+# 作業を確認したブラウザ
+
+  User-agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36
+
+# 技術的な決定
+
+## フロントエンドとバックエンドの分離
+
+バックエンド処理はグラフ描画処理であり、それらは全て `chart` ディレクトリ配下にまとめた。
+フロントエンド処理は `views.py` にまとめ、`chart` 配下の機能を呼び出す形にすることでバックエンドと分離している。
+
+
+## モデル定義
+
+Consumption テーブルのフィールド `consumption` の型は `FloatField` とした。
+理由は処理速度を優先するためである。
+
+本アプリケーションでは数値の厳密さは重要ではないため、
+処理速度を落としてまで `DecimalField` を選択する必要は無いと考えられる。
+
+
+## インポート処理
+
+データ量が多いため、バッチ処理でレコードの追加・更新を行うようにしている。
+
+### 問題
+
+時間の都合上、データベースに登録しようとしているCSVデータと同じ主キーのレコードが存在する場合、
+それらに対して一律で上書きするようにした。
+データベースへの負荷を考えると、登録前に各レコードのフィールドまでチェックして、
+フィールドが異なる場合のみ更新するようにしたほうが良い。
+
+また、テストを書いてデータベース登録前の前処理が期待通り実装できているかチェックしたかったが、
+こちらも時間の都合上見送った。
+
+
+## データ取得
+
+極力データベース側で処理を行い、処理後の結果をアプリケーションで取得するようにした。
+理由は、トラフィックの占有を防ぐためである。
+
+例えば、消費量 (consumption) のデータ数は `ユーザ数 x 日数 x 48個/日` であり、テストデータだけでも50万個近くにのぼる。
+消費量の統計量を計算しようとしたとき、アプリケーション側でこれを行うと消費量の全データを取得する必要があり、トラフィックを占有してしまうことが予想される。
+
+そのため、データベース側で先に処理を行い、処理後の結果を取得するようにした。
+
+データベース側の処理負荷について考えると、
+今回のアプリケーションは大人数で使用するものでは無いため、
+負荷については問題にならないと判断した。
+
+### 問題
+
+データベース側で統計量 (中央値、10-90%-ile) の計算をするために生のSQLを発行している。
+保守性や脆弱性を考えると極力ORMの機能を活用すべきだと考えられる。
+
+時間の都合上実装できなかったが、
+Aggregate を継承することで実装している例も見つかったため、Django の機能だけで実装することは可能かもしれない。
+https://gist.github.com/mekicha/b3d5e61683d5a6af642e4549eed95994
+
+
+## summary ページ
+
+ユーザーのリストアップはユーザー ID のみにした。
+理由は、トラフィックの占有を防ぐためと、描画の高速化のためである。
+
+仮にページネーションと非同期処理を組み合わせて一部のユーザー情報だけを表示する方針であれば、
+ユーザーの全情報をテーブルで表示しても問題ないと考えられる。
+
+しかし、今回は時間の都合上そこまで実装できなかったため、ユーザーIDのみを表示する方針とした。
diff --git a/dashboard/consumption/admin.py b/dashboard/consumption/admin.py
@@ -3,4 +3,8 @@
 
 from django.contrib import admin
 
+from consumption.models import Consumption, User
+
 # Register your models here.
+admin.site.register(User)
+admin.site.register(Consumption)
diff --git a/dashboard/consumption/chart/__init__.py b/dashboard/consumption/chart/__init__.py
diff --git a/dashboard/consumption/chart/generate.py b/dashboard/consumption/chart/generate.py
@@ -0,0 +1,163 @@
+import base64
+import io
+
+import matplotlib.pyplot as plt
+import pandas as pd
+from matplotlib.figure import Figure
+
+from consumption.chart.statistics import (
+    get_area_daily_percentiles,
+    get_area_daily_total_consumptions,
+    get_daily_percentiles_for_all,
+    get_daily_total_consumptions_for_all,
+    get_user_area_daily_consumption_median,
+    get_user_daily_total_consumptions,
+)
+
+
+def plot_total_consumption(df: pd.DataFrame, percentiles: pd.DataFrame) -> Figure:
+    fig, ax1 = plt.subplots(figsize=(10, 5))
+
+    ax1.plot(df['date'], df['daily_total'], label='Total Consumption', color='blue')
+    ax1.set_title('Daily Consumption with 10-90 Percentile and Median')
+    ax1.set_xlabel('Date')
+    ax1.set_ylabel('Total Consumption', color='blue')
+    ax1.tick_params(axis='y', labelcolor='blue')
+    ax1.grid(True)
+
+    ax2 = ax1.twinx()
+    if not percentiles.empty:
+        ax2.fill_between(
+            percentiles['date'],
+            percentiles['p10'],
+            percentiles['p90'],
+            color='green',
+            alpha=0.1,
+            label='10-90 Percentile',
+        )
+        ax2.plot(
+            percentiles['date'], percentiles['p50'], linestyle='--', label='Median', color='green'
+        )
+    ax2.set_ylabel('Percentiles and Median', color='green')
+    ax2.tick_params(axis='y', labelcolor='green')
+
+    # 凡例の統合
+    lines, labels = ax1.get_legend_handles_labels()
+    lines2, labels2 = ax2.get_legend_handles_labels()
+    ax1.legend(lines + lines2, labels + labels2, loc='upper left', bbox_to_anchor=(0.1, 0.9))
+
+    return fig
+
+
+def plot_area_consumption(area_totals: pd.DataFrame, area_percentiles: pd.DataFrame) -> Figure:
+    fig, ax = plt.subplots(figsize=(10, 5))
+
+    colors = ['red', 'cyan', 'green', 'blue']
+    color_index = 0
+    ax2 = ax.twinx()
+    for area in area_totals['area'].unique():
+        area_data_totals = area_totals[area_totals['area'] == area]
+        area_data_percentiles = area_percentiles[area_percentiles['area'] == area]
+
+        ax.plot(
+            area_data_totals['date'],
+            area_data_totals['daily_total'],
+            label=f'{area} Total Consumption',
+            color=colors[color_index],
+        )
+        ax2.fill_between(
+            area_data_percentiles['date'],
+            area_data_percentiles['p10'],
+            area_data_percentiles['p90'],
+            alpha=0.1,
+            label=f'{area} 10-90 Percentile',
+            color=colors[color_index],
+        )
+        ax2.plot(
+            area_data_percentiles['date'],
+            area_data_percentiles['p50'],
+            linestyle='--',
+            label=f'{area} Median',
+            color=colors[color_index],
+        )
+
+        color_index = (color_index + 1) % len(colors)
+
+    ax.set_title('Daily Consumption with 10-90 Percentile and Median by Area')
+    ax.set_xlabel('Date')
+    ax.set_ylabel('Total Consumption')
+    ax.grid(True)
+
+    ax2.set_ylabel('Percentiles and Median')
+    lines, labels = ax.get_legend_handles_labels()
+    lines2, labels2 = ax2.get_legend_handles_labels()
+    ax.legend(lines + lines2, labels + labels2, loc='upper left', bbox_to_anchor=(0.1, 0.9))
+
+    return fig
+
+
+def plot_user_and_area_consumption(
+    user_df: pd.DataFrame, area_df: pd.DataFrame, user_id: int
+) -> Figure:
+    fig, ax = plt.subplots(figsize=(10, 5))
+    ax.plot(
+        user_df['date'], user_df['daily_total'], label=f'User {user_id} Consumption', color='blue'
+    )
+    ax.plot(
+        area_df['date'],
+        area_df['p50'],
+        label='Area Median Consumption',
+        color='red',
+        linestyle='--',
+    )
+    ax.set_xlabel('Date')
+    ax.set_ylabel('Total Consumption')
+    ax.grid(True)
+    ax.legend(loc='upper left')
+
+    return fig
+
+
+def generate_daily_total_consumption_graph() -> str:
+    """日ごとの消費量の総量と、中央値と 10-90%-ile をプロットしたグラフを生成"""
+    df = get_daily_total_consumptions_for_all()
+    percentiles = get_daily_percentiles_for_all()
+
+    with io.BytesIO() as buffer:
+        fig = plot_total_consumption(df, percentiles)
+        fig.savefig(buffer, format='png')
+        buffer.seek(0)
+        image_png = buffer.getvalue()
+
+    graph = base64.b64encode(image_png).decode('utf-8')
+    return graph
+
+
+def generate_daily_total_consumption_graph_by_area() -> str:
+    """エリア別に、日ごとの消費量の総量と、中央値と 10-90%-ile をプロットしたグラフを生成"""
+    df = get_area_daily_total_consumptions()
+    percentiles = get_area_daily_percentiles()
+
+    with io.BytesIO() as buffer:
+        fig = plot_area_consumption(df, percentiles)
+        fig.savefig(buffer, format='png')
+        buffer.seek(0)
+        image_png = buffer.getvalue()
+
+    graph = base64.b64encode(image_png).decode('utf-8')
+    return graph
+
+
+def generate_user_consumption_graph(user_id: int) -> str:
+    """ユーザーごとの日ごとの消費量の総量と、エリアの中央値をプロットしたグラフを生成"""
+    user_df = get_user_daily_total_consumptions(user_id)
+    area_df = get_user_area_daily_consumption_median(user_id)
+
+    with io.BytesIO() as buffer:
+        fig = plot_user_and_area_consumption(user_df, area_df, user_id)
+        fig.savefig(buffer, format='png')
+        buffer.seek(0)
+        image_png1 = buffer.getvalue()
+
+    graph = base64.b64encode(image_png1).decode('utf-8')
+    return graph