Skip to content

Commit 17fe68b

Browse files
pesapstaadecker
authored andcommitted
Merge pull request #78 from staadecker/get_inputs_post
Improve how get_inputs post-processing is handled
2 parents 6cd18ba + 2255bfe commit 17fe68b

File tree

10 files changed

+271
-216
lines changed

10 files changed

+271
-216
lines changed

docs/Database.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ their strong points. DBVisualizer can also create a graph of all the relationshi
2121
tables.
2222

2323
Further, it is often useful to read the comments on tables (PGAdmin: right-click table -> Properties)
24-
as they sometimes give details on the tables role. Finally, if the table is used in [`get_inputs.py`](/switch_model/wecc/get_inputs.py)
24+
as they sometimes give details on the tables role. Finally, if the table is used in [`get_inputs.py`](/switch_model/wecc/get_inputs/get_inputs.py)
2525
one can discover what it does by looking at how get_inputs.py uses the table to generate the SWITCH inputs.
2626

2727
## Connecting to the database
Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
"""
2+
Script to retrieve the input data from the switch-wecc database and apply post-processing steps.
3+
"""
4+
import argparse
5+
import os
6+
7+
from switch_model.utilities import query_yes_no, StepTimer
8+
from switch_model.wecc.get_inputs.get_inputs import query_db
9+
from switch_model.wecc.get_inputs.register_post_process import run_post_process
10+
from switch_model.wecc.utilities import load_config
11+
from switch_model.wecc.get_inputs.post_process_steps import *
12+
13+
14+
def main():
15+
timer = StepTimer()
16+
17+
# Create command line tool, just provides help information
18+
parser = argparse.ArgumentParser(
19+
description="Write SWITCH input files from database tables.",
20+
epilog="""
21+
This tool will populate the inputs folder with the data from the PostgreSQL database.
22+
config.yaml specifies the scenario parameters.
23+
The environment variable DB_URL specifies the url to connect to the database. """,
24+
)
25+
parser.add_argument("--skip-cf", default=False, action='store_true',
26+
help="Skip creation variable_capacity_factors.csv. Useful when debugging and one doesn't"
27+
"want to wait for the command.")
28+
parser.add_argument("--post-only", default=False, action='store_true',
29+
help="Only run the post solve functions (don't query db)")
30+
parser.add_argument("--overwrite", default=False, action='store_true',
31+
help="Overwrite previous input files without prompting to confirm.")
32+
args = parser.parse_args() # Makes switch get_inputs --help works
33+
34+
# Load values from config.yaml
35+
full_config = load_config()
36+
switch_to_input_dir(full_config, overwrite=args.overwrite)
37+
38+
if not args.post_only:
39+
query_db(full_config, skip_cf=args.skip_cf)
40+
run_post_process()
41+
print(f"\nScript took {timer.step_time_as_str()} seconds to build input tables.")
42+
43+
44+
def switch_to_input_dir(config, overwrite):
45+
inputs_dir = config["inputs_dir"]
46+
47+
# Create inputs_dir if it doesn't exist
48+
if not os.path.exists(inputs_dir):
49+
os.makedirs(inputs_dir)
50+
print("Inputs directory created.")
51+
else:
52+
if not overwrite and not query_yes_no(
53+
"Inputs directory already exists. Allow contents to be overwritten?"
54+
):
55+
raise SystemExit("User cancelled run.")
56+
57+
os.chdir(inputs_dir)
58+
return inputs_dir
59+
60+
61+
if __name__ == "__main__":
62+
main()

switch_model/wecc/get_inputs.py renamed to switch_model/wecc/get_inputs/get_inputs.py

Lines changed: 12 additions & 215 deletions
Original file line numberDiff line numberDiff line change
@@ -9,33 +9,27 @@
99
"""
1010

1111
# Standard packages
12-
import argparse
13-
import os
14-
import shutil
12+
import warnings
1513
from typing import Iterable, List
1614

1715
# Switch packages
18-
from switch_model.utilities import query_yes_no, StepTimer
19-
from switch_model.wecc.utilities import load_config, connect
20-
21-
# Third-party packages
22-
import pandas as pd
23-
16+
from switch_model.wecc.utilities import connect
17+
from switch_model.version import __version__
2418

2519
def write_csv_from_query(cursor, fname: str, headers: List[str], query: str):
2620
"""Create CSV file from cursor."""
27-
print(f"{fname}.csv... ", flush=True)
21+
print(f"\t{fname}.csv... ", flush=True)
2822
cursor.execute(query)
2923
data = cursor.fetchall()
3024
write_csv(data, fname, headers, log=False)
3125
if not data:
32-
print("Warning: File is empty.")
26+
warnings.warn(f"File {fname} is empty.")
3327

3428

3529
def write_csv(data: Iterable[List], fname, headers: List[str], log=True):
3630
"""Create CSV file from Iterable."""
3731
if log:
38-
print(f"{fname}.csv... ", flush=True)
32+
print(f"\t{fname}.csv... ", flush=True)
3933
with open(fname + ".csv", "w") as f:
4034
f.write(",".join(headers) + "\n")
4135
for row in data:
@@ -72,50 +66,6 @@ def write_csv(data: Iterable[List], fname, headers: List[str], log=True):
7266
]
7367

7468

75-
def switch_to_input_dir(config):
76-
inputs_dir = config["inputs_dir"]
77-
78-
# Create inputs_dir if it doesn't exist
79-
if not os.path.exists(inputs_dir):
80-
os.makedirs(inputs_dir)
81-
print("Inputs directory created.")
82-
else:
83-
if not query_yes_no(
84-
"Inputs directory already exists. Allow contents to be overwritten?"
85-
):
86-
raise Exception("User cancelled run.")
87-
88-
os.chdir(inputs_dir)
89-
return inputs_dir
90-
91-
def main():
92-
timer = StepTimer()
93-
94-
# Create command line tool, just provides help information
95-
parser = argparse.ArgumentParser(
96-
description="Write SWITCH input files from database tables.",
97-
epilog="""
98-
This tool will populate the inputs folder with the data from the PostgreSQL database.
99-
config.yaml specifies the scenario parameters.
100-
The environment variable DB_URL specifies the url to connect to the database. """,
101-
)
102-
parser.add_argument("--skip-cf", default=False, action='store_true',
103-
help="Skip creation variable_capacity_factors.csv. Useful when debugging and one doesn't"
104-
"want to wait for the command.")
105-
parser.add_argument("--post-only", default=False, action='store_true',
106-
help="Only run the post solve functions (don't query db)")
107-
args = parser.parse_args() # Makes switch get_inputs --help works
108-
109-
# Load values from config.yaml
110-
full_config = load_config()
111-
switch_to_input_dir(full_config)
112-
113-
if not args.post_only:
114-
query_db(full_config, skip_cf=args.skip_cf)
115-
post_process()
116-
print(f"\nScript took {timer.step_time_as_str()} seconds to build input tables.")
117-
118-
11969
def query_db(full_config, skip_cf):
12070
config = full_config["get_inputs"]
12171
scenario_id = config["scenario_id"]
@@ -124,7 +74,7 @@ def query_db(full_config, skip_cf):
12474
db_conn = connect()
12575
db_cursor = db_conn.cursor()
12676

127-
print(f"\nStarting to copy data from the database to the input files.")
77+
print("Copying data from the database to the input files...")
12878

12979
scenario_params = [
13080
"name",
@@ -185,23 +135,23 @@ def query_db(full_config, skip_cf):
185135
generation_plant_technologies_scenario_id =s_details[19]
186136
variable_o_m_cost_scenario_id = s_details[20]
187137

188-
print(f"Scenario: {scenario_id}: {name}.\n")
138+
print(f"Scenario: {scenario_id}: {name}.")
189139

190140
# Write general scenario parameters into a documentation file
141+
print("\tscenario_params.txt...")
191142
colnames = [desc[0] for desc in db_cursor.description]
192143
with open("scenario_params.txt", "w") as f:
193144
f.write(f"Scenario id: {scenario_id}\n")
194145
f.write(f"Scenario name: {name}\n")
195146
f.write(f"Scenario notes: {description}\n")
196147
for i, col in enumerate(colnames):
197148
f.write(f"{col}: {s_details[i]}\n")
198-
print("scenario_params.txt...")
199149

200150
########################################################
201151
# Which input specification are we writing against?
152+
print("\tswitch_inputs_version.txt...")
202153
with open("switch_inputs_version.txt", "w") as f:
203-
f.write("2.0.5\n")
204-
print("switch_inputs_version.txt...")
154+
f.write(f"{__version__}\n")
205155

206156
########################################################
207157
# TIMESCALES
@@ -991,160 +941,7 @@ def planning_reserves(db_cursor, time_sample_id, hydro_simple_scenario_id):
991941

992942

993943
def create_modules_txt():
994-
print("modules.txt...")
944+
print("\tmodules.txt...")
995945
with open("modules.txt", "w") as f:
996946
for module in modules:
997947
f.write(module + "\n")
998-
999-
1000-
def post_process():
1001-
fix_prebuild_conflict_bug()
1002-
# Graphing post process
1003-
graph_config = os.path.join(os.path.dirname(__file__), "graph_config")
1004-
print("graph_tech_colors.csv...")
1005-
shutil.copy(os.path.join(graph_config, "graph_tech_colors.csv"), "graph_tech_colors.csv")
1006-
print("graph_tech_types.csv...")
1007-
shutil.copy(os.path.join(graph_config, "graph_tech_types.csv"), "graph_tech_types.csv")
1008-
create_graph_timestamp_map()
1009-
replace_plants_in_zone_all()
1010-
1011-
1012-
def fix_prebuild_conflict_bug():
1013-
"""
1014-
This post-processing step is necessary to pass the no_predetermined_bld_yr_vs_period_conflict BuildCheck.
1015-
Basically we are moving all the 2020 predetermined build years to 2019 to avoid a conflict with the 2020 period.
1016-
See generators.core.build.py for details.
1017-
"""
1018-
print("Shifting 2020 prebuilds to 2019...")
1019-
periods = pd.read_csv("periods.csv", index_col=False)
1020-
if 2020 not in periods["INVESTMENT_PERIOD"].values:
1021-
return
1022-
1023-
# Read two files that need modification
1024-
gen_build_costs = pd.read_csv("gen_build_costs.csv", index_col=False)
1025-
gen_build_predetermined = pd.read_csv("gen_build_predetermined.csv", index_col=False)
1026-
# Save their size
1027-
rows_prior = gen_build_costs.size, gen_build_predetermined.size
1028-
# Save columns of gen_build_costs
1029-
gen_build_costs_col = gen_build_costs.columns
1030-
# Merge to know which rows are prebuild
1031-
gen_build_costs = gen_build_costs.merge(
1032-
gen_build_predetermined,
1033-
on=["GENERATION_PROJECT", "build_year"],
1034-
how='left'
1035-
)
1036-
1037-
# If row is prebuild and in 2020, replace it with 2019
1038-
gen_build_costs.loc[
1039-
(~gen_build_costs["gen_predetermined_cap"].isna()) & (gen_build_costs["build_year"] == 2020),
1040-
"build_year"] = 2019
1041-
# If row is in 2020 replace it with 2019
1042-
gen_build_predetermined.loc[gen_build_predetermined["build_year"] == 2020, "build_year"] = 2019
1043-
# Go back to original column set
1044-
gen_build_costs = gen_build_costs[gen_build_costs_col]
1045-
1046-
# Ensure the size is still the same
1047-
rows_post = gen_build_costs.size, gen_build_predetermined.size
1048-
assert rows_post == rows_prior
1049-
1050-
# Write the files back out
1051-
gen_build_costs.to_csv("gen_build_costs.csv", index=False)
1052-
gen_build_predetermined.to_csv("gen_build_predetermined.csv", index=False)
1053-
1054-
1055-
def create_graph_timestamp_map():
1056-
print("graph_timestamp_map.csv...")
1057-
timepoints = pd.read_csv("timepoints.csv", index_col=False)
1058-
timeseries = pd.read_csv("timeseries.csv", index_col=False)
1059-
1060-
timepoints = timepoints.merge(
1061-
timeseries,
1062-
how='left',
1063-
left_on='timeseries',
1064-
right_on='TIMESERIES',
1065-
validate="many_to_one"
1066-
)
1067-
1068-
timepoints["time_column"] = timepoints["timeseries"].apply(lambda c: c.partition("-")[2])
1069-
1070-
timestamp_map = timepoints[["timestamp", "ts_period", "time_column"]]
1071-
timestamp_map.columns = ["timestamp", "time_row", "time_column"]
1072-
timestamp_map.to_csv("graph_timestamp_map.csv", index=False)
1073-
1074-
1075-
def replace_plants_in_zone_all():
1076-
"""
1077-
This post-process step replaces all the generation projects that have a load called
1078-
_ALL_ZONES with a generation project for each load zone.
1079-
"""
1080-
print("Replacing _ALL_ZONES plants with a plant in each zone...")
1081-
1082-
# Read load_zones.csv
1083-
load_zones = pd.read_csv("load_zones.csv", index_col=False)
1084-
load_zones["dbid_suffix"] = "_" + load_zones["zone_dbid"].astype(str)
1085-
num_zones = len(load_zones)
1086-
1087-
def replace_rows(plants_to_copy, filename, df=None, plants_col="GENERATION_PROJECT", load_column=None):
1088-
# If the df does not already exist, read the file
1089-
if df is None:
1090-
df = pd.read_csv(filename, index_col=False)
1091-
1092-
# Save the columns for later use
1093-
df_col = df.columns
1094-
df_rows = len(df)
1095-
1096-
# Force the plants_col to string type to allow concating
1097-
df = df.astype({plants_col: str})
1098-
1099-
# Extract the rows that need copying
1100-
should_copy = df[plants_col].isin(plants_to_copy)
1101-
rows_to_copy = df[should_copy]
1102-
# Filter out the plants that need replacing from our data frame
1103-
df = df[~should_copy]
1104-
# replacement is the cross join of the plants that need replacement
1105-
# with the load zones. The cross join is done by joining over a column called
1106-
# key that is always 1.
1107-
replacement = rows_to_copy.assign(key=1).merge(
1108-
load_zones.assign(key=1),
1109-
on='key',
1110-
)
1111-
1112-
replacement[plants_col] = replacement[plants_col] + replacement["dbid_suffix"]
1113-
1114-
if load_column is not None:
1115-
# Set gen_load_zone to be the LOAD_ZONE column
1116-
replacement[load_column] = replacement["LOAD_ZONE"]
1117-
1118-
# Keep the same columns as originally
1119-
replacement = replacement[df_col]
1120-
1121-
# Add the replacement plants to our dataframe
1122-
df = df.append(replacement)
1123-
1124-
assert len(df) == df_rows + len(rows_to_copy) * (num_zones - 1)
1125-
1126-
df.to_csv(filename, index=False)
1127-
1128-
plants = pd.read_csv("generation_projects_info.csv", index_col=False)
1129-
# Find the plants that need replacing
1130-
to_replace = plants[plants["gen_load_zone"] == "_ALL_ZONES"]
1131-
# If no plant needs replacing end there
1132-
if to_replace.empty:
1133-
return
1134-
# If to_replace has variable capacity factors we raise exceptions
1135-
# since the variabale capacity factors won't be the same across zones
1136-
if not all(to_replace["gen_is_variable"] == 0):
1137-
raise Exception("generation_projects_info.csv contains variable plants "
1138-
"with load zone _ALL_ZONES. This is not allowed since "
1139-
"copying variable capacity factors to all "
1140-
"zones is not implemented (and likely unwanted).")
1141-
1142-
plants_to_replace = to_replace["GENERATION_PROJECT"]
1143-
replace_rows(plants_to_replace, "generation_projects_info.csv", load_column="gen_load_zone")
1144-
replace_rows(plants_to_replace, "gen_build_costs.csv")
1145-
replace_rows(plants_to_replace, "gen_build_predetermined.csv")
1146-
1147-
1148-
1149-
if __name__ == "__main__":
1150-
main()
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
"""
2+
This file ensures that all python files in this folder are imported so that
3+
their @register_post_process function handlers are noticed.
4+
"""
5+
from pathlib import Path
6+
7+
# Import all the files in this directory that end in .py and don't start with an underscore
8+
__all__ = [f.stem for f in Path(__file__).parent.glob("[!_]*.py")]

0 commit comments

Comments
 (0)