Coverage for src/prepare_times_nz/stage_4/electricity/fixed_plant_adjustments.py: 37%
158 statements
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-16 23:05 +0000
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-16 23:05 +0000
1"""
2Create renewable-availability adjustments for plants with fixed commissioning dates.
4This script identifies fixed-build plants from the stage-3 genstack output, combines
5them with any user-supplied installation-month assumptions, and defaults missing
6months to July. It then converts each installation month into seasonal shares for the
7commissioning year, maps each plant to the matching renewable-availability wildcard
8used in the stage-4 Veda AF table, and joins those seasonal shares onto the existing
9renewable availability timeslices.
11The final output represents a partial-year commissioning adjustment. For the
12commissioning year, each timeslice availability value is scaled by the share of the
13relevant season that the plant is assumed to be online. A second set of rows is then
14created for the following year using the unscaled availability values, so Veda
15processing can extrapolate the plant as fully available after commissioning.
17IMPORTANT NOTE: the method to adjust timeslice-specific AFs for plants in specific years
18only works if these plants are not vintaged. They should not be vintaged anyway,
19as they represent single plants.
20"""
22from __future__ import annotations
24import pandas as pd
25from prepare_times_nz.utilities.data_in_out import _save_data
26from prepare_times_nz.utilities.filepaths import ASSUMPTIONS, STAGE_3_DATA, STAGE_4_DATA
28GENSTACK_FILE = STAGE_3_DATA / "electricity" / "genstack.csv"
29FIXED_INSTALL_MONTHS_FILE = (
30 ASSUMPTIONS / "electricity_generation" / "future_techs" / "FixedInstallMonths.csv"
31)
32RENEWABLE_AVAILABILITY_FILE = (
33 STAGE_4_DATA / "scen_ren_af" / "renewable_availability.csv"
34)
35OUTPUT_LOCATION = STAGE_4_DATA / "scen_ren_af"
36DEFAULT_INSTALL_MONTH = 7
38month_season_map = {
39 1: "SUM",
40 2: "SUM",
41 3: "FAL",
42 4: "FAL",
43 5: "FAL",
44 6: "WIN",
45 7: "WIN",
46 8: "WIN",
47 9: "SPR",
48 10: "SPR",
49 11: "SPR",
50 12: "SUM",
51}
54def get_fixed_plant_dates(df: pd.DataFrame) -> pd.DataFrame:
55 """
56 Return one row per fixed-build plant and commissioning year.
58 The stage-3 genstack table contains many duplicate rows per plant because
59 it is expanded by variable, year, and scenario. Step 1 filters to fixed
60 commissioning plants and collapses that metadata to one row per plant.
61 """
63 required_columns = {
64 "Plant",
65 "TechName",
66 "Tech_TIMES",
67 "Island",
68 "CommissioningType",
69 "CommissioningYear",
70 }
71 missing_columns = required_columns.difference(df.columns)
72 if missing_columns:
73 missing_str = ", ".join(sorted(missing_columns))
74 raise ValueError(
75 f"Missing required columns for fixed plant dates: {missing_str}"
76 )
78 out = df[df["CommissioningType"] == "Fixed"].copy()
79 out = out.loc[:, ["Plant", "TechName", "Tech_TIMES", "Island", "CommissioningYear"]]
80 out = out.dropna(subset=["Plant", "TechName", "CommissioningYear"])
81 out = out.rename(columns={"CommissioningYear": "Year"})
82 out["Year"] = pd.to_numeric(out["Year"], errors="raise").astype(int)
84 out = out.drop_duplicates().sort_values(["Year", "TechName"], ignore_index=True)
85 return out
88def read_fixed_plant_dates(filepath=GENSTACK_FILE) -> pd.DataFrame:
89 """Read the stage-3 genstack file and return the fixed-build plant table."""
91 df = pd.read_csv(filepath)
92 return get_fixed_plant_dates(df)
95def get_season_shares(month: int) -> dict[str, float]:
96 """
97 For a given install month, return the share of each season active in that year.
99 Example:
100 - month 1: all seasons are fully represented
101 - month 6: winter and spring are fully represented, summer is 1/3, autumn is 0
102 """
104 if pd.isna(month):
105 raise ValueError("Month cannot be null.")
107 month = int(month)
108 if month < 1 or month > 12:
109 raise ValueError(f"Month must be between 1 and 12. Received: {month}")
111 seasons = ["SUM", "FAL", "WIN", "SPR"]
112 months_active = range(month, 13)
114 shares = {}
115 for season in seasons:
116 season_months = [
117 m
118 for m, mapped_season in month_season_map.items()
119 if mapped_season == season
120 ]
121 active_months = [m for m in months_active if month_season_map[m] == season]
122 shares[season] = len(active_months) / len(season_months)
124 return shares
127def read_fixed_install_month_assumptions(
128 filepath=FIXED_INSTALL_MONTHS_FILE,
129) -> pd.DataFrame:
130 """
131 Read user assumptions for fixed-plant installation months.
132 """
134 df = pd.read_csv(filepath)
135 required_columns = {"PlantName", "InstallMonth"}
136 missing_columns = required_columns.difference(df.columns)
137 if missing_columns:
138 missing_str = ", ".join(sorted(missing_columns))
139 raise ValueError(
140 f"Missing required columns for fixed install month assumptions: {missing_str}"
141 )
143 out = df.loc[:, ["PlantName", "InstallMonth"]].copy()
144 out["PlantName"] = out["PlantName"].astype(str).str.strip()
145 out["PlantName"] = out["PlantName"].replace("", pd.NA)
146 out["InstallMonth"] = pd.to_numeric(out["InstallMonth"], errors="coerce")
147 out = out.dropna(subset=["PlantName"])
148 out = out.drop_duplicates(subset=["PlantName"], keep="last")
150 return out
153def validate_fixed_install_month_names(
154 fixed_plants: pd.DataFrame, install_months: pd.DataFrame
155) -> None:
156 """
157 Ensure all assumption names correspond to known fixed-build plants.
158 """
160 valid_names = set(fixed_plants["Plant"])
161 provided_names = set(install_months["PlantName"])
162 unknown_names = sorted(provided_names.difference(valid_names))
164 if unknown_names:
165 unknown_str = ", ".join(unknown_names)
166 raise ValueError(
167 "Fixed install month assumptions include plants not found in the "
168 f"existing fixed install list: {unknown_str}"
169 )
172def assign_install_months(
173 fixed_plants: pd.DataFrame,
174 install_months: pd.DataFrame,
175 default_month: int = DEFAULT_INSTALL_MONTH,
176) -> pd.DataFrame:
177 """
178 Join fixed plants to install-month assumptions and fill missing values.
179 """
181 validate_fixed_install_month_names(fixed_plants, install_months)
183 out = fixed_plants.merge(
184 install_months,
185 left_on="Plant",
186 right_on="PlantName",
187 how="left",
188 )
189 out = out.drop(columns=["PlantName"])
190 out["InstallMonth"] = out["InstallMonth"].fillna(default_month).astype(int)
192 invalid_months = out.loc[
193 ~out["InstallMonth"].between(1, 12), ["Plant", "InstallMonth"]
194 ]
195 if not invalid_months.empty:
196 invalid_str = ", ".join(
197 f"{row.Plant}={row.InstallMonth}" for row in invalid_months.itertuples()
198 )
199 raise ValueError(f"Install months must be between 1 and 12: {invalid_str}")
201 return out
204def create_renewable_availability_wildcard(tech_code: str) -> str:
205 """
206 Create the renewable-availability wildcard used in the Veda AF table.
207 """
209 wildcard = f"ELC_{tech_code}_*"
211 if wildcard == "ELC_Geo_*":
212 return "ELC_Geo_*, -ELC_GeoCHP_*"
214 return wildcard
217def add_renewable_availability_wildcards(df: pd.DataFrame) -> pd.DataFrame:
218 """
219 Add the genstack tech code and matching renewable-availability wildcard.
220 """
222 out = df.copy()
223 out["Pset_PN"] = out["Tech_TIMES"].map(create_renewable_availability_wildcard)
224 return out
227def expand_fixed_plants_to_season_shares(df: pd.DataFrame) -> pd.DataFrame:
228 """
229 Expand one row per plant to one row per plant-season share.
230 """
232 rows = []
233 for row in df.itertuples():
234 season_shares = get_season_shares(row.InstallMonth)
235 for season, share in season_shares.items():
236 rows.append(
237 {
238 "TechName": row.TechName,
239 "TechCode": row.Tech_TIMES,
240 "Pset_PN": row.Pset_PN,
241 "Year": row.Year,
242 "Region": row.Island,
243 "Season": season,
244 "Share": share,
245 }
246 )
248 out = pd.DataFrame(rows)
249 out = out.sort_values(["Year", "Region", "TechName", "Season"], ignore_index=True)
250 return out
253def get_fixed_plant_season_shares(
254 genstack_filepath=GENSTACK_FILE,
255 assumptions_filepath=FIXED_INSTALL_MONTHS_FILE,
256 default_month: int = DEFAULT_INSTALL_MONTH,
257) -> pd.DataFrame:
258 """
259 Build the season-share table for all fixed-build plants.
261 Output columns:
262 - TechName
263 - Year
264 - Region
265 - Season
266 - Share
267 """
269 fixed_plants = read_fixed_plant_dates(genstack_filepath)
270 install_months = read_fixed_install_month_assumptions(assumptions_filepath)
271 fixed_plants = assign_install_months(
272 fixed_plants, install_months, default_month=default_month
273 )
274 fixed_plants = add_renewable_availability_wildcards(fixed_plants)
276 return expand_fixed_plants_to_season_shares(fixed_plants)
279def read_renewable_availability(filepath=RENEWABLE_AVAILABILITY_FILE) -> pd.DataFrame:
280 """
281 Read the existing stage-4 Veda renewable availability output.
283 This is the table that fixed-plant seasonal adjustments will eventually
284 update after a plant-to-availability mapping is applied.
285 """
287 df = pd.read_csv(filepath)
288 required_columns = {
289 "TimeSlice",
290 "LimType",
291 "Attribute",
292 "NI",
293 "SI",
294 "Pset_PN",
295 "Year",
296 }
297 missing_columns = required_columns.difference(df.columns)
298 if missing_columns:
299 missing_str = ", ".join(sorted(missing_columns))
300 raise ValueError(
301 "Missing required columns for renewable availability data: "
302 f"{missing_str}"
303 )
305 return df
308def extract_season_from_timeslice(timeslice: str) -> str:
309 """
310 Convert a TIMES timeslice label like ``WIN-WK-P`` to ``WIN``.
311 """
313 if pd.isna(timeslice):
314 raise ValueError("TimeSlice cannot be null when deriving Season.")
316 season = str(timeslice).strip()[:3]
317 valid_seasons = {"SUM", "FAL", "WIN", "SPR"}
318 if season not in valid_seasons:
319 raise ValueError(f"Unrecognised season in TimeSlice: {timeslice}")
321 return season
324def prepare_renewable_availability_for_join(df: pd.DataFrame) -> pd.DataFrame:
325 """
326 Add a season key and keep only explicit renewable-availability years.
327 """
329 out = df.copy()
330 # remove interps
331 out = out[out["Year"] > 0].copy()
332 # define seasons
333 out["Season"] = out["TimeSlice"].map(extract_season_from_timeslice)
334 # remove the year
335 out = out.drop("Year", axis=1)
336 return out
339def validate_fixed_plant_wildcards_against_renewable_availability(
340 fixed_plants: pd.DataFrame, renewable_availability: pd.DataFrame
341) -> None:
342 """
343 Ensure generated plant wildcards exist in the renewable availability table.
344 """
346 available_wildcards = set(renewable_availability["Pset_PN"].dropna().unique())
347 fixed_wildcards = set(fixed_plants["Pset_PN"].dropna().unique())
348 missing_wildcards = sorted(fixed_wildcards.difference(available_wildcards))
350 if missing_wildcards:
351 missing_str = ", ".join(missing_wildcards)
352 raise ValueError(
353 "Generated fixed-plant wildcards not found in renewable availability "
354 f"data: {missing_str}"
355 )
358def join_fixed_plants_to_renewable_availability(
359 fixed_plants: pd.DataFrame, renewable_availability: pd.DataFrame
360) -> pd.DataFrame:
361 """
362 Join fixed-plant season shares to renewable availability by wildcard and season.
363 """
365 renewable_availability = prepare_renewable_availability_for_join(
366 renewable_availability
367 )
369 out = fixed_plants.merge(
370 renewable_availability,
371 on=["Pset_PN", "Season"],
372 how="left",
373 validate="many_to_many",
374 )
376 missing_matches = out.loc[
377 out["TimeSlice"].isna(), ["TechName", "Pset_PN", "Season"]
378 ].drop_duplicates()
379 if not missing_matches.empty:
380 missing_str = ", ".join(
381 f"{row.TechName} ({row.Pset_PN}, {row.Season})"
382 for row in missing_matches.itertuples()
383 )
384 raise ValueError(
385 "Fixed plant season rows could not be matched to renewable "
386 f"availability data: {missing_str}"
387 )
389 return out.sort_values(
390 ["Year", "Region", "TechName", "Season", "TimeSlice"], ignore_index=True
391 )
394def format_fixed_plant_adjustment_output(df: pd.DataFrame) -> pd.DataFrame:
395 """
396 Convert joined renewable rows into the downstream fixed-plant output shape.
398 The commissioning year gets a reduced availability based on the seasonal
399 share. The following year gets the unmodified availability so downstream
400 extrapolation can carry that forward.
401 """
403 out = df.copy()
404 out["Value"] = pd.NA
405 out.loc[out["Region"] == "NI", "Value"] = out.loc[out["Region"] == "NI", "NI"]
406 out.loc[out["Region"] == "SI", "Value"] = out.loc[out["Region"] == "SI", "SI"]
407 invalid_regions = out.loc[
408 out["Value"].isna(), ["TechName", "Region"]
409 ].drop_duplicates()
410 if not invalid_regions.empty:
411 invalid_str = ", ".join(
412 f"{row.TechName} ({row.Region})" for row in invalid_regions.itertuples()
413 )
414 raise ValueError(
415 "Fixed plant rows include unsupported regions for renewable "
416 f"availability values: {invalid_str}"
417 )
419 base_values = out.copy()
421 first_year = base_values.copy()
422 first_year["Value"] = first_year["Value"] * first_year["Share"]
424 subsequent_year = base_values.copy()
425 subsequent_year["Year"] = subsequent_year["Year"] + 1
427 out = pd.concat([first_year, subsequent_year], ignore_index=True)
428 out = out.drop(columns=["NI", "SI", "Pset_PN", "TechCode", "Season", "Share"])
430 return out.sort_values(
431 ["TechName", "Year", "Region", "TimeSlice"], ignore_index=True
432 )
435def main() -> pd.DataFrame:
436 """Script entry point"""
438 fixed_plants = get_fixed_plant_season_shares()
439 renewable_availability = read_renewable_availability()
440 validate_fixed_plant_wildcards_against_renewable_availability(
441 fixed_plants, renewable_availability
442 )
443 joined = join_fixed_plants_to_renewable_availability(
444 fixed_plants, renewable_availability
445 )
446 out = format_fixed_plant_adjustment_output(joined)
447 _save_data(
448 out,
449 "renewable_availability_fixed_adjustments.csv",
450 "Renewable availability fixed adjustments",
451 OUTPUT_LOCATION,
452 )
455if __name__ == "__main__":
456 main()