2222new_start_year = 2020
2323new_end_year = 2050
2424new_scenario_id = 24
25- new_scenario_name = "Lowest year (2015) repeated. Using EIA and AMPL Canada and Mex data."
25+ new_scenario_name = (
26+ "Lowest year (2015) repeated. Using EIA and AMPL Canada and Mex data."
27+ )
2628new_scenario_description = "Lowest year (2015) repeated from 2020 to 2050, based on data from id 21 (EIA + AMPL Canada & Mex)."
2729
2830
@@ -35,44 +37,66 @@ def main():
3537 f"""
3638 SELECT DISTINCT generation_plant_id FROM hydro_historical_monthly_capacity_factors
3739 WHERE hydro_simple_scenario_id={ all_plants_scenario } ;
38- """ )
39- hydro_plants = pd .DataFrame (db_cursor .fetchall (), columns = ["generation_plant_id" ])["generation_plant_id" ]
40+ """
41+ )
42+ hydro_plants = pd .DataFrame (db_cursor .fetchall (), columns = ["generation_plant_id" ])[
43+ "generation_plant_id"
44+ ]
4045
4146 # 2. Get all the hydro flow data for the worst year
4247 db_cursor .execute (
4348 f"""
4449 SELECT generation_plant_id, month, hydro_min_flow_mw, hydro_avg_flow_mw FROM hydro_historical_monthly_capacity_factors
4550 WHERE hydro_simple_scenario_id={ raw_data_scenario } and year={ worst_year } ;
46- """ )
47- worst_year_data = pd .DataFrame (db_cursor .fetchall (),
48- columns = ["generation_plant_id" , "month" , "hydro_min_flow_mw" , "hydro_avg_flow_mw" ])
51+ """
52+ )
53+ worst_year_data = pd .DataFrame (
54+ db_cursor .fetchall (),
55+ columns = [
56+ "generation_plant_id" ,
57+ "month" ,
58+ "hydro_min_flow_mw" ,
59+ "hydro_avg_flow_mw" ,
60+ ],
61+ )
4962
5063 # 3. Identify plants where data is missing
51- missing_hydro_plants = hydro_plants [~ hydro_plants .isin (worst_year_data ["generation_plant_id" ])].values
64+ missing_hydro_plants = hydro_plants [
65+ ~ hydro_plants .isin (worst_year_data ["generation_plant_id" ])
66+ ].values
5267
5368 # 4. For each missing plant get the data for all the years
5469 db_cursor .execute (
5570 f"""
5671 SELECT generation_plant_id, year, month, hydro_min_flow_mw, hydro_avg_flow_mw FROM hydro_historical_monthly_capacity_factors
5772 WHERE hydro_simple_scenario_id={ raw_data_scenario } and generation_plant_id in ({ "," .join (missing_hydro_plants .astype (str ))} );
58- """ )
59- missing_plants_data = pd .DataFrame (db_cursor .fetchall (),
60- columns = ["generation_plant_id" , "year" , "month" , "hydro_min_flow_mw" ,
61- "hydro_avg_flow_mw" ])
73+ """
74+ )
75+ missing_plants_data = pd .DataFrame (
76+ db_cursor .fetchall (),
77+ columns = [
78+ "generation_plant_id" ,
79+ "year" ,
80+ "month" ,
81+ "hydro_min_flow_mw" ,
82+ "hydro_avg_flow_mw" ,
83+ ],
84+ )
6285
6386 # 5. Pick the year with the least flow
6487 # Aggregate by year
65- missing_data_by_year = missing_plants_data .groupby (["generation_plant_id" , "year" ], as_index = False )[
66- "hydro_avg_flow_mw" ].mean ()
88+ missing_data_by_year = missing_plants_data .groupby (
89+ ["generation_plant_id" , "year" ], as_index = False
90+ )["hydro_avg_flow_mw" ].mean ()
6791 # Select years where the flow is at its lowest
68- year_to_use = \
69- missing_data_by_year .loc [missing_data_by_year .groupby ("generation_plant_id" )["hydro_avg_flow_mw" ].idxmin ()][
70- ["generation_plant_id" , "year" ]]
92+ year_to_use = missing_data_by_year .loc [
93+ missing_data_by_year .groupby ("generation_plant_id" )[
94+ "hydro_avg_flow_mw"
95+ ].idxmin ()
96+ ][["generation_plant_id" , "year" ]]
7197 # Essentially filter missing_plants_data to only include keys from the right table, aka plants and years that are lowest
7298 missing_plants_data = missing_plants_data .merge (
73- year_to_use ,
74- on = ["generation_plant_id" , "year" ],
75- how = "right"
99+ year_to_use , on = ["generation_plant_id" , "year" ], how = "right"
76100 ).drop ("year" , axis = 1 )
77101
78102 # 6. Add the missing data to our worst year data and verify we have data for all the plants
@@ -81,14 +105,13 @@ def main():
81105
82106 # 7. Cross join the series with all the years from 2020 to 2050
83107 years = pd .Series (range (new_start_year , new_end_year + 1 ), name = "year" )
84- worst_year_data = worst_year_data .merge (
85- years ,
86- how = "cross"
87- )
108+ worst_year_data = worst_year_data .merge (years , how = "cross" )
88109 worst_year_data ["hydro_simple_scenario_id" ] = new_scenario_id
89110
90111 # 8. Complete some data checks
91- assert len (worst_year_data ) == 12 * (new_end_year - new_start_year + 1 ) * len (hydro_plants )
112+ assert len (worst_year_data ) == 12 * (new_end_year - new_start_year + 1 ) * len (
113+ hydro_plants
114+ )
92115
93116 # 9. Add data to database
94117 print (f"hydro_simple_scenario: { new_scenario_id } " )
@@ -99,7 +122,9 @@ def main():
99122 print (f"To year: { new_end_year } " )
100123 print (f"Example data:\n { worst_year_data .head ()} " )
101124
102- if not query_yes_no ("\n Are you sure you want to add this data to the database?" , default = "no" ):
125+ if not query_yes_no (
126+ "\n Are you sure you want to add this data to the database?" , default = "no"
127+ ):
103128 raise SystemExit
104129
105130 db_cursor .execute (
@@ -110,9 +135,10 @@ def main():
110135 n = len (worst_year_data )
111136 start_time = time .time ()
112137 for i , r in enumerate (worst_year_data .itertuples (index = False )):
113- if i != 0 and i % 1000 == 0 :
138+ if i != 0 and i % 1000 == 0 :
114139 print (
115- f"{ i } /{ n } inserts completed. Estimated time remaining { format_seconds ((n - i ) * (time .time () - start_time ) / i )} " )
140+ f"{ i } /{ n } inserts completed. Estimated time remaining { format_seconds ((n - i ) * (time .time () - start_time ) / i )} "
141+ )
116142 db_cursor .execute (
117143 f"INSERT INTO hydro_historical_monthly_capacity_factors(hydro_simple_scenario_id, generation_plant_id, year, month, hydro_min_flow_mw, hydro_avg_flow_mw) "
118144 f"VALUES ({ r .hydro_simple_scenario_id } ,{ r .generation_plant_id } ,{ r .year } ,{ r .month } ,{ r .hydro_min_flow_mw } ,{ r .hydro_avg_flow_mw } )"
0 commit comments