Skip to content

Commit 610e1ae

Browse files
pesapstaadecker
authored andcommitted
Merge pull request #103 from staadecker/plots
Plot improvements
2 parents 688e88e + 470ab64 commit 610e1ae

File tree

26 files changed

+1580
-491
lines changed

26 files changed

+1580
-491
lines changed

database/2021-07-29_create_low_hydro_scenario.py

Lines changed: 53 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,9 @@
2222
new_start_year = 2020
2323
new_end_year = 2050
2424
new_scenario_id = 24
25-
new_scenario_name = "Lowest year (2015) repeated. Using EIA and AMPL Canada and Mex data."
25+
new_scenario_name = (
26+
"Lowest year (2015) repeated. Using EIA and AMPL Canada and Mex data."
27+
)
2628
new_scenario_description = "Lowest year (2015) repeated from 2020 to 2050, based on data from id 21 (EIA + AMPL Canada & Mex)."
2729

2830

@@ -35,44 +37,66 @@ def main():
3537
f"""
3638
SELECT DISTINCT generation_plant_id FROM hydro_historical_monthly_capacity_factors
3739
WHERE hydro_simple_scenario_id={all_plants_scenario};
38-
""")
39-
hydro_plants = pd.DataFrame(db_cursor.fetchall(), columns=["generation_plant_id"])["generation_plant_id"]
40+
"""
41+
)
42+
hydro_plants = pd.DataFrame(db_cursor.fetchall(), columns=["generation_plant_id"])[
43+
"generation_plant_id"
44+
]
4045

4146
# 2. Get all the hydro flow data for the worst year
4247
db_cursor.execute(
4348
f"""
4449
SELECT generation_plant_id, month, hydro_min_flow_mw, hydro_avg_flow_mw FROM hydro_historical_monthly_capacity_factors
4550
WHERE hydro_simple_scenario_id={raw_data_scenario} and year={worst_year};
46-
""")
47-
worst_year_data = pd.DataFrame(db_cursor.fetchall(),
48-
columns=["generation_plant_id", "month", "hydro_min_flow_mw", "hydro_avg_flow_mw"])
51+
"""
52+
)
53+
worst_year_data = pd.DataFrame(
54+
db_cursor.fetchall(),
55+
columns=[
56+
"generation_plant_id",
57+
"month",
58+
"hydro_min_flow_mw",
59+
"hydro_avg_flow_mw",
60+
],
61+
)
4962

5063
# 3. Identify plants where data is missing
51-
missing_hydro_plants = hydro_plants[~hydro_plants.isin(worst_year_data["generation_plant_id"])].values
64+
missing_hydro_plants = hydro_plants[
65+
~hydro_plants.isin(worst_year_data["generation_plant_id"])
66+
].values
5267

5368
# 4. For each missing plant get the data for all the years
5469
db_cursor.execute(
5570
f"""
5671
SELECT generation_plant_id, year, month, hydro_min_flow_mw, hydro_avg_flow_mw FROM hydro_historical_monthly_capacity_factors
5772
WHERE hydro_simple_scenario_id={raw_data_scenario} and generation_plant_id in ({",".join(missing_hydro_plants.astype(str))});
58-
""")
59-
missing_plants_data = pd.DataFrame(db_cursor.fetchall(),
60-
columns=["generation_plant_id", "year", "month", "hydro_min_flow_mw",
61-
"hydro_avg_flow_mw"])
73+
"""
74+
)
75+
missing_plants_data = pd.DataFrame(
76+
db_cursor.fetchall(),
77+
columns=[
78+
"generation_plant_id",
79+
"year",
80+
"month",
81+
"hydro_min_flow_mw",
82+
"hydro_avg_flow_mw",
83+
],
84+
)
6285

6386
# 5. Pick the year with the least flow
6487
# Aggregate by year
65-
missing_data_by_year = missing_plants_data.groupby(["generation_plant_id", "year"], as_index=False)[
66-
"hydro_avg_flow_mw"].mean()
88+
missing_data_by_year = missing_plants_data.groupby(
89+
["generation_plant_id", "year"], as_index=False
90+
)["hydro_avg_flow_mw"].mean()
6791
# Select years where the flow is at its lowest
68-
year_to_use = \
69-
missing_data_by_year.loc[missing_data_by_year.groupby("generation_plant_id")["hydro_avg_flow_mw"].idxmin()][
70-
["generation_plant_id", "year"]]
92+
year_to_use = missing_data_by_year.loc[
93+
missing_data_by_year.groupby("generation_plant_id")[
94+
"hydro_avg_flow_mw"
95+
].idxmin()
96+
][["generation_plant_id", "year"]]
7197
# Essentially filter missing_plants_data to only include keys from the right table, aka plants and years that are lowest
7298
missing_plants_data = missing_plants_data.merge(
73-
year_to_use,
74-
on=["generation_plant_id", "year"],
75-
how="right"
99+
year_to_use, on=["generation_plant_id", "year"], how="right"
76100
).drop("year", axis=1)
77101

78102
# 6. Add the missing data to our worst year data and verify we have data for all the plants
@@ -81,14 +105,13 @@ def main():
81105

82106
# 7. Cross join the series with all the years from 2020 to 2050
83107
years = pd.Series(range(new_start_year, new_end_year + 1), name="year")
84-
worst_year_data = worst_year_data.merge(
85-
years,
86-
how="cross"
87-
)
108+
worst_year_data = worst_year_data.merge(years, how="cross")
88109
worst_year_data["hydro_simple_scenario_id"] = new_scenario_id
89110

90111
# 8. Complete some data checks
91-
assert len(worst_year_data) == 12 * (new_end_year - new_start_year + 1) * len(hydro_plants)
112+
assert len(worst_year_data) == 12 * (new_end_year - new_start_year + 1) * len(
113+
hydro_plants
114+
)
92115

93116
# 9. Add data to database
94117
print(f"hydro_simple_scenario: {new_scenario_id}")
@@ -99,7 +122,9 @@ def main():
99122
print(f"To year: {new_end_year}")
100123
print(f"Example data:\n{worst_year_data.head()}")
101124

102-
if not query_yes_no("\nAre you sure you want to add this data to the database?", default="no"):
125+
if not query_yes_no(
126+
"\nAre you sure you want to add this data to the database?", default="no"
127+
):
103128
raise SystemExit
104129

105130
db_cursor.execute(
@@ -110,9 +135,10 @@ def main():
110135
n = len(worst_year_data)
111136
start_time = time.time()
112137
for i, r in enumerate(worst_year_data.itertuples(index=False)):
113-
if i !=0 and i % 1000 == 0:
138+
if i != 0 and i % 1000 == 0:
114139
print(
115-
f"{i}/{n} inserts completed. Estimated time remaining {format_seconds((n - i) * (time.time() - start_time) / i)}")
140+
f"{i}/{n} inserts completed. Estimated time remaining {format_seconds((n - i) * (time.time() - start_time) / i)}"
141+
)
116142
db_cursor.execute(
117143
f"INSERT INTO hydro_historical_monthly_capacity_factors(hydro_simple_scenario_id, generation_plant_id, year, month, hydro_min_flow_mw, hydro_avg_flow_mw) "
118144
f"VALUES ({r.hydro_simple_scenario_id},{r.generation_plant_id},{r.year},{r.month},{r.hydro_min_flow_mw},{r.hydro_avg_flow_mw})"

database/2021-07-30_create_no_hydro_scenario.py

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,10 @@
1717

1818
new_scenario_id = 25
1919
new_scenario_name = "No Hydro"
20-
new_scenario_description = "All average flows are zero effectively removing all hydro generation from the model." \
21-
" Represents as an extreme edge case of no hydro generation."
20+
new_scenario_description = (
21+
"All average flows are zero effectively removing all hydro generation from the model."
22+
" Represents as an extreme edge case of no hydro generation."
23+
)
2224

2325

2426
def main():
@@ -30,9 +32,18 @@ def main():
3032
f"""
3133
SELECT DISTINCT generation_plant_id, year, month, hydro_min_flow_mw, hydro_avg_flow_mw FROM hydro_historical_monthly_capacity_factors
3234
WHERE hydro_simple_scenario_id={all_plants_scenario};
33-
""")
34-
df = pd.DataFrame(db_cursor.fetchall(),
35-
columns=["generation_plant_id", "year", "month", "hydro_min_flow_mw", "hydro_avg_flow_mw"])
35+
"""
36+
)
37+
df = pd.DataFrame(
38+
db_cursor.fetchall(),
39+
columns=[
40+
"generation_plant_id",
41+
"year",
42+
"month",
43+
"hydro_min_flow_mw",
44+
"hydro_avg_flow_mw",
45+
],
46+
)
3647

3748
# 2. Set all the flows to zero and set the scenario id
3849
df["hydro_min_flow_mw"] = 0
@@ -46,7 +57,9 @@ def main():
4657
print(f"Num hydro plants: {df.generation_plant_id.nunique()}")
4758
print(f"Example data:\n{df.head()}")
4859

49-
if not query_yes_no("\nAre you sure you want to add this data to the database?", default="no"):
60+
if not query_yes_no(
61+
"\nAre you sure you want to add this data to the database?", default="no"
62+
):
5063
raise SystemExit
5164

5265
db_cursor.execute(
@@ -59,7 +72,8 @@ def main():
5972
for i, r in enumerate(df.itertuples(index=False)):
6073
if i != 0 and i % 1000 == 0:
6174
print(
62-
f"{i}/{n} inserts completed. Estimated time remaining {format_seconds((n - i) * (time.time() - start_time) / i)}")
75+
f"{i}/{n} inserts completed. Estimated time remaining {format_seconds((n - i) * (time.time() - start_time) / i)}"
76+
)
6377
db_cursor.execute(
6478
f"INSERT INTO hydro_historical_monthly_capacity_factors(hydro_simple_scenario_id, generation_plant_id, year, month, hydro_min_flow_mw, hydro_avg_flow_mw) "
6579
f"VALUES ({r.hydro_simple_scenario_id},{r.generation_plant_id},{r.year},{r.month},{r.hydro_min_flow_mw},{r.hydro_avg_flow_mw})"

database/2021-08-02_create_half_hydro_scenario.py

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,18 @@ def main():
2929
f"""
3030
SELECT DISTINCT generation_plant_id, year, month, hydro_min_flow_mw, hydro_avg_flow_mw FROM hydro_historical_monthly_capacity_factors
3131
WHERE hydro_simple_scenario_id={all_plants_scenario};
32-
""")
33-
df = pd.DataFrame(db_cursor.fetchall(),
34-
columns=["generation_plant_id", "year", "month", "hydro_min_flow_mw", "hydro_avg_flow_mw"])
32+
"""
33+
)
34+
df = pd.DataFrame(
35+
db_cursor.fetchall(),
36+
columns=[
37+
"generation_plant_id",
38+
"year",
39+
"month",
40+
"hydro_min_flow_mw",
41+
"hydro_avg_flow_mw",
42+
],
43+
)
3544

3645
# 2. Set all the flows to zero and set the scenario id
3746
df["hydro_avg_flow_mw"] /= 2
@@ -45,7 +54,9 @@ def main():
4554
print(f"Num hydro plants: {df.generation_plant_id.nunique()}")
4655
print(f"Example data:\n{df.head()}")
4756

48-
if not query_yes_no("\nAre you sure you want to add this data to the database?", default="no"):
57+
if not query_yes_no(
58+
"\nAre you sure you want to add this data to the database?", default="no"
59+
):
4960
raise SystemExit
5061

5162
db_cursor.execute(
@@ -58,7 +69,8 @@ def main():
5869
for i, r in enumerate(df.itertuples(index=False)):
5970
if i != 0 and i % 1000 == 0:
6071
print(
61-
f"{i}/{n} inserts completed. Estimated time remaining {format_seconds((n - i) * (time.time() - start_time) / i)}")
72+
f"{i}/{n} inserts completed. Estimated time remaining {format_seconds((n - i) * (time.time() - start_time) / i)}"
73+
)
6274
db_cursor.execute(
6375
f"INSERT INTO hydro_historical_monthly_capacity_factors(hydro_simple_scenario_id, generation_plant_id, year, month, hydro_min_flow_mw, hydro_avg_flow_mw) "
6476
f"VALUES ({r.hydro_simple_scenario_id},{r.generation_plant_id},{r.year},{r.month},{r.hydro_min_flow_mw},{r.hydro_avg_flow_mw})"

0 commit comments

Comments
 (0)