Coverage for src/prepare_times_nz/stage_4/electricity/fixed_plant_adjustments.py: 37%

158 statements  

« prev     ^ index     » next       coverage.py v7.14.1, created at 2026-06-16 23:05 +0000

1""" 

2Create renewable-availability adjustments for plants with fixed commissioning dates. 

3 

4This script identifies fixed-build plants from the stage-3 genstack output, combines 

5them with any user-supplied installation-month assumptions, and defaults missing 

6months to July. It then converts each installation month into seasonal shares for the 

7commissioning year, maps each plant to the matching renewable-availability wildcard 

8used in the stage-4 Veda AF table, and joins those seasonal shares onto the existing 

9renewable availability timeslices. 

10 

11The final output represents a partial-year commissioning adjustment. For the 

12commissioning year, each timeslice availability value is scaled by the share of the 

13relevant season that the plant is assumed to be online. A second set of rows is then 

14created for the following year using the unscaled availability values, so Veda 

15processing can extrapolate the plant as fully available after commissioning. 

16 

17IMPORTANT NOTE: the method to adjust timeslice-specific AFs for plants in specific years 

18only works if these plants are not vintaged. They should not be vintaged anyway, 

19as they represent single plants. 

20""" 

21 

22from __future__ import annotations 

23 

24import pandas as pd 

25from prepare_times_nz.utilities.data_in_out import _save_data 

26from prepare_times_nz.utilities.filepaths import ASSUMPTIONS, STAGE_3_DATA, STAGE_4_DATA 

27 

28GENSTACK_FILE = STAGE_3_DATA / "electricity" / "genstack.csv" 

29FIXED_INSTALL_MONTHS_FILE = ( 

30 ASSUMPTIONS / "electricity_generation" / "future_techs" / "FixedInstallMonths.csv" 

31) 

32RENEWABLE_AVAILABILITY_FILE = ( 

33 STAGE_4_DATA / "scen_ren_af" / "renewable_availability.csv" 

34) 

35OUTPUT_LOCATION = STAGE_4_DATA / "scen_ren_af" 

36DEFAULT_INSTALL_MONTH = 7 

37 

38month_season_map = { 

39 1: "SUM", 

40 2: "SUM", 

41 3: "FAL", 

42 4: "FAL", 

43 5: "FAL", 

44 6: "WIN", 

45 7: "WIN", 

46 8: "WIN", 

47 9: "SPR", 

48 10: "SPR", 

49 11: "SPR", 

50 12: "SUM", 

51} 

52 

53 

54def get_fixed_plant_dates(df: pd.DataFrame) -> pd.DataFrame: 

55 """ 

56 Return one row per fixed-build plant and commissioning year. 

57 

58 The stage-3 genstack table contains many duplicate rows per plant because 

59 it is expanded by variable, year, and scenario. Step 1 filters to fixed 

60 commissioning plants and collapses that metadata to one row per plant. 

61 """ 

62 

63 required_columns = { 

64 "Plant", 

65 "TechName", 

66 "Tech_TIMES", 

67 "Island", 

68 "CommissioningType", 

69 "CommissioningYear", 

70 } 

71 missing_columns = required_columns.difference(df.columns) 

72 if missing_columns: 

73 missing_str = ", ".join(sorted(missing_columns)) 

74 raise ValueError( 

75 f"Missing required columns for fixed plant dates: {missing_str}" 

76 ) 

77 

78 out = df[df["CommissioningType"] == "Fixed"].copy() 

79 out = out.loc[:, ["Plant", "TechName", "Tech_TIMES", "Island", "CommissioningYear"]] 

80 out = out.dropna(subset=["Plant", "TechName", "CommissioningYear"]) 

81 out = out.rename(columns={"CommissioningYear": "Year"}) 

82 out["Year"] = pd.to_numeric(out["Year"], errors="raise").astype(int) 

83 

84 out = out.drop_duplicates().sort_values(["Year", "TechName"], ignore_index=True) 

85 return out 

86 

87 

88def read_fixed_plant_dates(filepath=GENSTACK_FILE) -> pd.DataFrame: 

89 """Read the stage-3 genstack file and return the fixed-build plant table.""" 

90 

91 df = pd.read_csv(filepath) 

92 return get_fixed_plant_dates(df) 

93 

94 

95def get_season_shares(month: int) -> dict[str, float]: 

96 """ 

97 For a given install month, return the share of each season active in that year. 

98 

99 Example: 

100 - month 1: all seasons are fully represented 

101 - month 6: winter and spring are fully represented, summer is 1/3, autumn is 0 

102 """ 

103 

104 if pd.isna(month): 

105 raise ValueError("Month cannot be null.") 

106 

107 month = int(month) 

108 if month < 1 or month > 12: 

109 raise ValueError(f"Month must be between 1 and 12. Received: {month}") 

110 

111 seasons = ["SUM", "FAL", "WIN", "SPR"] 

112 months_active = range(month, 13) 

113 

114 shares = {} 

115 for season in seasons: 

116 season_months = [ 

117 m 

118 for m, mapped_season in month_season_map.items() 

119 if mapped_season == season 

120 ] 

121 active_months = [m for m in months_active if month_season_map[m] == season] 

122 shares[season] = len(active_months) / len(season_months) 

123 

124 return shares 

125 

126 

127def read_fixed_install_month_assumptions( 

128 filepath=FIXED_INSTALL_MONTHS_FILE, 

129) -> pd.DataFrame: 

130 """ 

131 Read user assumptions for fixed-plant installation months. 

132 """ 

133 

134 df = pd.read_csv(filepath) 

135 required_columns = {"PlantName", "InstallMonth"} 

136 missing_columns = required_columns.difference(df.columns) 

137 if missing_columns: 

138 missing_str = ", ".join(sorted(missing_columns)) 

139 raise ValueError( 

140 f"Missing required columns for fixed install month assumptions: {missing_str}" 

141 ) 

142 

143 out = df.loc[:, ["PlantName", "InstallMonth"]].copy() 

144 out["PlantName"] = out["PlantName"].astype(str).str.strip() 

145 out["PlantName"] = out["PlantName"].replace("", pd.NA) 

146 out["InstallMonth"] = pd.to_numeric(out["InstallMonth"], errors="coerce") 

147 out = out.dropna(subset=["PlantName"]) 

148 out = out.drop_duplicates(subset=["PlantName"], keep="last") 

149 

150 return out 

151 

152 

153def validate_fixed_install_month_names( 

154 fixed_plants: pd.DataFrame, install_months: pd.DataFrame 

155) -> None: 

156 """ 

157 Ensure all assumption names correspond to known fixed-build plants. 

158 """ 

159 

160 valid_names = set(fixed_plants["Plant"]) 

161 provided_names = set(install_months["PlantName"]) 

162 unknown_names = sorted(provided_names.difference(valid_names)) 

163 

164 if unknown_names: 

165 unknown_str = ", ".join(unknown_names) 

166 raise ValueError( 

167 "Fixed install month assumptions include plants not found in the " 

168 f"existing fixed install list: {unknown_str}" 

169 ) 

170 

171 

172def assign_install_months( 

173 fixed_plants: pd.DataFrame, 

174 install_months: pd.DataFrame, 

175 default_month: int = DEFAULT_INSTALL_MONTH, 

176) -> pd.DataFrame: 

177 """ 

178 Join fixed plants to install-month assumptions and fill missing values. 

179 """ 

180 

181 validate_fixed_install_month_names(fixed_plants, install_months) 

182 

183 out = fixed_plants.merge( 

184 install_months, 

185 left_on="Plant", 

186 right_on="PlantName", 

187 how="left", 

188 ) 

189 out = out.drop(columns=["PlantName"]) 

190 out["InstallMonth"] = out["InstallMonth"].fillna(default_month).astype(int) 

191 

192 invalid_months = out.loc[ 

193 ~out["InstallMonth"].between(1, 12), ["Plant", "InstallMonth"] 

194 ] 

195 if not invalid_months.empty: 

196 invalid_str = ", ".join( 

197 f"{row.Plant}={row.InstallMonth}" for row in invalid_months.itertuples() 

198 ) 

199 raise ValueError(f"Install months must be between 1 and 12: {invalid_str}") 

200 

201 return out 

202 

203 

204def create_renewable_availability_wildcard(tech_code: str) -> str: 

205 """ 

206 Create the renewable-availability wildcard used in the Veda AF table. 

207 """ 

208 

209 wildcard = f"ELC_{tech_code}_*" 

210 

211 if wildcard == "ELC_Geo_*": 

212 return "ELC_Geo_*, -ELC_GeoCHP_*" 

213 

214 return wildcard 

215 

216 

217def add_renewable_availability_wildcards(df: pd.DataFrame) -> pd.DataFrame: 

218 """ 

219 Add the genstack tech code and matching renewable-availability wildcard. 

220 """ 

221 

222 out = df.copy() 

223 out["Pset_PN"] = out["Tech_TIMES"].map(create_renewable_availability_wildcard) 

224 return out 

225 

226 

227def expand_fixed_plants_to_season_shares(df: pd.DataFrame) -> pd.DataFrame: 

228 """ 

229 Expand one row per plant to one row per plant-season share. 

230 """ 

231 

232 rows = [] 

233 for row in df.itertuples(): 

234 season_shares = get_season_shares(row.InstallMonth) 

235 for season, share in season_shares.items(): 

236 rows.append( 

237 { 

238 "TechName": row.TechName, 

239 "TechCode": row.Tech_TIMES, 

240 "Pset_PN": row.Pset_PN, 

241 "Year": row.Year, 

242 "Region": row.Island, 

243 "Season": season, 

244 "Share": share, 

245 } 

246 ) 

247 

248 out = pd.DataFrame(rows) 

249 out = out.sort_values(["Year", "Region", "TechName", "Season"], ignore_index=True) 

250 return out 

251 

252 

253def get_fixed_plant_season_shares( 

254 genstack_filepath=GENSTACK_FILE, 

255 assumptions_filepath=FIXED_INSTALL_MONTHS_FILE, 

256 default_month: int = DEFAULT_INSTALL_MONTH, 

257) -> pd.DataFrame: 

258 """ 

259 Build the season-share table for all fixed-build plants. 

260 

261 Output columns: 

262 - TechName 

263 - Year 

264 - Region 

265 - Season 

266 - Share 

267 """ 

268 

269 fixed_plants = read_fixed_plant_dates(genstack_filepath) 

270 install_months = read_fixed_install_month_assumptions(assumptions_filepath) 

271 fixed_plants = assign_install_months( 

272 fixed_plants, install_months, default_month=default_month 

273 ) 

274 fixed_plants = add_renewable_availability_wildcards(fixed_plants) 

275 

276 return expand_fixed_plants_to_season_shares(fixed_plants) 

277 

278 

279def read_renewable_availability(filepath=RENEWABLE_AVAILABILITY_FILE) -> pd.DataFrame: 

280 """ 

281 Read the existing stage-4 Veda renewable availability output. 

282 

283 This is the table that fixed-plant seasonal adjustments will eventually 

284 update after a plant-to-availability mapping is applied. 

285 """ 

286 

287 df = pd.read_csv(filepath) 

288 required_columns = { 

289 "TimeSlice", 

290 "LimType", 

291 "Attribute", 

292 "NI", 

293 "SI", 

294 "Pset_PN", 

295 "Year", 

296 } 

297 missing_columns = required_columns.difference(df.columns) 

298 if missing_columns: 

299 missing_str = ", ".join(sorted(missing_columns)) 

300 raise ValueError( 

301 "Missing required columns for renewable availability data: " 

302 f"{missing_str}" 

303 ) 

304 

305 return df 

306 

307 

308def extract_season_from_timeslice(timeslice: str) -> str: 

309 """ 

310 Convert a TIMES timeslice label like ``WIN-WK-P`` to ``WIN``. 

311 """ 

312 

313 if pd.isna(timeslice): 

314 raise ValueError("TimeSlice cannot be null when deriving Season.") 

315 

316 season = str(timeslice).strip()[:3] 

317 valid_seasons = {"SUM", "FAL", "WIN", "SPR"} 

318 if season not in valid_seasons: 

319 raise ValueError(f"Unrecognised season in TimeSlice: {timeslice}") 

320 

321 return season 

322 

323 

324def prepare_renewable_availability_for_join(df: pd.DataFrame) -> pd.DataFrame: 

325 """ 

326 Add a season key and keep only explicit renewable-availability years. 

327 """ 

328 

329 out = df.copy() 

330 # remove interps 

331 out = out[out["Year"] > 0].copy() 

332 # define seasons 

333 out["Season"] = out["TimeSlice"].map(extract_season_from_timeslice) 

334 # remove the year 

335 out = out.drop("Year", axis=1) 

336 return out 

337 

338 

339def validate_fixed_plant_wildcards_against_renewable_availability( 

340 fixed_plants: pd.DataFrame, renewable_availability: pd.DataFrame 

341) -> None: 

342 """ 

343 Ensure generated plant wildcards exist in the renewable availability table. 

344 """ 

345 

346 available_wildcards = set(renewable_availability["Pset_PN"].dropna().unique()) 

347 fixed_wildcards = set(fixed_plants["Pset_PN"].dropna().unique()) 

348 missing_wildcards = sorted(fixed_wildcards.difference(available_wildcards)) 

349 

350 if missing_wildcards: 

351 missing_str = ", ".join(missing_wildcards) 

352 raise ValueError( 

353 "Generated fixed-plant wildcards not found in renewable availability " 

354 f"data: {missing_str}" 

355 ) 

356 

357 

358def join_fixed_plants_to_renewable_availability( 

359 fixed_plants: pd.DataFrame, renewable_availability: pd.DataFrame 

360) -> pd.DataFrame: 

361 """ 

362 Join fixed-plant season shares to renewable availability by wildcard and season. 

363 """ 

364 

365 renewable_availability = prepare_renewable_availability_for_join( 

366 renewable_availability 

367 ) 

368 

369 out = fixed_plants.merge( 

370 renewable_availability, 

371 on=["Pset_PN", "Season"], 

372 how="left", 

373 validate="many_to_many", 

374 ) 

375 

376 missing_matches = out.loc[ 

377 out["TimeSlice"].isna(), ["TechName", "Pset_PN", "Season"] 

378 ].drop_duplicates() 

379 if not missing_matches.empty: 

380 missing_str = ", ".join( 

381 f"{row.TechName} ({row.Pset_PN}, {row.Season})" 

382 for row in missing_matches.itertuples() 

383 ) 

384 raise ValueError( 

385 "Fixed plant season rows could not be matched to renewable " 

386 f"availability data: {missing_str}" 

387 ) 

388 

389 return out.sort_values( 

390 ["Year", "Region", "TechName", "Season", "TimeSlice"], ignore_index=True 

391 ) 

392 

393 

394def format_fixed_plant_adjustment_output(df: pd.DataFrame) -> pd.DataFrame: 

395 """ 

396 Convert joined renewable rows into the downstream fixed-plant output shape. 

397 

398 The commissioning year gets a reduced availability based on the seasonal 

399 share. The following year gets the unmodified availability so downstream 

400 extrapolation can carry that forward. 

401 """ 

402 

403 out = df.copy() 

404 out["Value"] = pd.NA 

405 out.loc[out["Region"] == "NI", "Value"] = out.loc[out["Region"] == "NI", "NI"] 

406 out.loc[out["Region"] == "SI", "Value"] = out.loc[out["Region"] == "SI", "SI"] 

407 invalid_regions = out.loc[ 

408 out["Value"].isna(), ["TechName", "Region"] 

409 ].drop_duplicates() 

410 if not invalid_regions.empty: 

411 invalid_str = ", ".join( 

412 f"{row.TechName} ({row.Region})" for row in invalid_regions.itertuples() 

413 ) 

414 raise ValueError( 

415 "Fixed plant rows include unsupported regions for renewable " 

416 f"availability values: {invalid_str}" 

417 ) 

418 

419 base_values = out.copy() 

420 

421 first_year = base_values.copy() 

422 first_year["Value"] = first_year["Value"] * first_year["Share"] 

423 

424 subsequent_year = base_values.copy() 

425 subsequent_year["Year"] = subsequent_year["Year"] + 1 

426 

427 out = pd.concat([first_year, subsequent_year], ignore_index=True) 

428 out = out.drop(columns=["NI", "SI", "Pset_PN", "TechCode", "Season", "Share"]) 

429 

430 return out.sort_values( 

431 ["TechName", "Year", "Region", "TimeSlice"], ignore_index=True 

432 ) 

433 

434 

435def main() -> pd.DataFrame: 

436 """Script entry point""" 

437 

438 fixed_plants = get_fixed_plant_season_shares() 

439 renewable_availability = read_renewable_availability() 

440 validate_fixed_plant_wildcards_against_renewable_availability( 

441 fixed_plants, renewable_availability 

442 ) 

443 joined = join_fixed_plants_to_renewable_availability( 

444 fixed_plants, renewable_availability 

445 ) 

446 out = format_fixed_plant_adjustment_output(joined) 

447 _save_data( 

448 out, 

449 "renewable_availability_fixed_adjustments.csv", 

450 "Renewable availability fixed adjustments", 

451 OUTPUT_LOCATION, 

452 ) 

453 

454 

455if __name__ == "__main__": 

456 main()