Coverage for src/prepare_times_nz/stage_4/electricity/fixed_plant

1"""

2Create renewable-availability adjustments for plants with fixed commissioning dates.

4This script identifies fixed-build plants from the stage-3 genstack output, combines

5them with any user-supplied installation-month assumptions, and defaults missing

6months to July. It then converts each installation month into seasonal shares for the

7commissioning year, maps each plant to the matching renewable-availability wildcard

8used in the stage-4 Veda AF table, and joins those seasonal shares onto the existing

9renewable availability timeslices.

11The final output represents a partial-year commissioning adjustment. For the

12commissioning year, each timeslice availability value is scaled by the share of the

13relevant season that the plant is assumed to be online. A second set of rows is then

14created for the following year using the unscaled availability values, so Veda

15processing can extrapolate the plant as fully available after commissioning.

17IMPORTANT NOTE: the method to adjust timeslice-specific AFs for plants in specific years

18only works if these plants are not vintaged. They should not be vintaged anyway,

19as they represent single plants.

20"""

22from __future__ import annotations

24import pandas as pd

25from prepare_times_nz.utilities.data_in_out import _save_data

26from prepare_times_nz.utilities.filepaths import ASSUMPTIONS, STAGE_3_DATA, STAGE_4_DATA

28GENSTACK_FILE = STAGE_3_DATA / "electricity" / "genstack.csv"

29FIXED_INSTALL_MONTHS_FILE = (

30 ASSUMPTIONS / "electricity_generation" / "future_techs" / "FixedInstallMonths.csv"

31)

32RENEWABLE_AVAILABILITY_FILE = (

33 STAGE_4_DATA / "scen_ren_af" / "renewable_availability.csv"

34)

35OUTPUT_LOCATION = STAGE_4_DATA / "scen_ren_af"

36DEFAULT_INSTALL_MONTH = 7

38month_season_map = {

39 1: "SUM",

40 2: "SUM",

41 3: "FAL",

42 4: "FAL",

43 5: "FAL",

44 6: "WIN",

45 7: "WIN",

46 8: "WIN",

47 9: "SPR",

48 10: "SPR",

49 11: "SPR",

50 12: "SUM",

51}

54def get_fixed_plant_dates(df: pd.DataFrame) -> pd.DataFrame:

55 """

56 Return one row per fixed-build plant and commissioning year.

58 The stage-3 genstack table contains many duplicate rows per plant because

59 it is expanded by variable, year, and scenario. Step 1 filters to fixed

60 commissioning plants and collapses that metadata to one row per plant.

61 """

63 required_columns = {

64 "Plant",

65 "TechName",

66 "Tech_TIMES",

67 "Island",

68 "CommissioningType",

69 "CommissioningYear",

70 }

71 missing_columns = required_columns.difference(df.columns)

72 if missing_columns:

73 missing_str = ", ".join(sorted(missing_columns))

74 raise ValueError(

75 f"Missing required columns for fixed plant dates: {missing_str}"

76 )

78 out = df[df["CommissioningType"] == "Fixed"].copy()

79 out = out.loc[:, ["Plant", "TechName", "Tech_TIMES", "Island", "CommissioningYear"]]

80 out = out.dropna(subset=["Plant", "TechName", "CommissioningYear"])

81 out = out.rename(columns={"CommissioningYear": "Year"})

82 out["Year"] = pd.to_numeric(out["Year"], errors="raise").astype(int)

84 out = out.drop_duplicates().sort_values(["Year", "TechName"], ignore_index=True)

85 return out

88def read_fixed_plant_dates(filepath=GENSTACK_FILE) -> pd.DataFrame:

89 """Read the stage-3 genstack file and return the fixed-build plant table."""

91 df = pd.read_csv(filepath)

92 return get_fixed_plant_dates(df)

95def get_season_shares(month: int) -> dict[str, float]:

96 """

97 For a given install month, return the share of each season active in that year.

99 Example:

100 - month 1: all seasons are fully represented

101 - month 6: winter and spring are fully represented, summer is 1/3, autumn is 0

102 """

103

104 if pd.isna(month):

105 raise ValueError("Month cannot be null.")

106

107 month = int(month)

108 if month < 1 or month > 12:

109 raise ValueError(f"Month must be between 1 and 12. Received: {month}")

110

111 seasons = ["SUM", "FAL", "WIN", "SPR"]

112 months_active = range(month, 13)

113

114 shares = {}

115 for season in seasons:

116 season_months = [

117 m

118 for m, mapped_season in month_season_map.items()

119 if mapped_season == season

120 ]

121 active_months = [m for m in months_active if month_season_map[m] == season]

122 shares[season] = len(active_months) / len(season_months)

123

124 return shares

125

126

127def read_fixed_install_month_assumptions(

128 filepath=FIXED_INSTALL_MONTHS_FILE,

129) -> pd.DataFrame:

130 """

131 Read user assumptions for fixed-plant installation months.

132 """

133

134 df = pd.read_csv(filepath)

135 required_columns = {"PlantName", "InstallMonth"}

136 missing_columns = required_columns.difference(df.columns)

137 if missing_columns:

138 missing_str = ", ".join(sorted(missing_columns))

139 raise ValueError(

140 f"Missing required columns for fixed install month assumptions: {missing_str}"

141 )

142

143 out = df.loc[:, ["PlantName", "InstallMonth"]].copy()

144 out["PlantName"] = out["PlantName"].astype(str).str.strip()

145 out["PlantName"] = out["PlantName"].replace("", pd.NA)

146 out["InstallMonth"] = pd.to_numeric(out["InstallMonth"], errors="coerce")

147 out = out.dropna(subset=["PlantName"])

148 out = out.drop_duplicates(subset=["PlantName"], keep="last")

149

150 return out

151

152

153def validate_fixed_install_month_names(

154 fixed_plants: pd.DataFrame, install_months: pd.DataFrame

155) -> None:

156 """

157 Ensure all assumption names correspond to known fixed-build plants.

158 """

159

160 valid_names = set(fixed_plants["Plant"])

161 provided_names = set(install_months["PlantName"])

162 unknown_names = sorted(provided_names.difference(valid_names))

163

164 if unknown_names:

165 unknown_str = ", ".join(unknown_names)

166 raise ValueError(

167 "Fixed install month assumptions include plants not found in the "

168 f"existing fixed install list: {unknown_str}"

169 )

170

171

172def assign_install_months(

173 fixed_plants: pd.DataFrame,

174 install_months: pd.DataFrame,

175 default_month: int = DEFAULT_INSTALL_MONTH,

176) -> pd.DataFrame:

177 """

178 Join fixed plants to install-month assumptions and fill missing values.

179 """

180

181 validate_fixed_install_month_names(fixed_plants, install_months)

182

183 out = fixed_plants.merge(

184 install_months,

185 left_on="Plant",

186 right_on="PlantName",

187 how="left",

188 )

189 out = out.drop(columns=["PlantName"])

190 out["InstallMonth"] = out["InstallMonth"].fillna(default_month).astype(int)

191

192 invalid_months = out.loc[

193 ~out["InstallMonth"].between(1, 12), ["Plant", "InstallMonth"]

194 ]

195 if not invalid_months.empty:

196 invalid_str = ", ".join(

197 f"{row.Plant}={row.InstallMonth}" for row in invalid_months.itertuples()

198 )

199 raise ValueError(f"Install months must be between 1 and 12: {invalid_str}")

200

201 return out

202

203

204def create_renewable_availability_wildcard(tech_code: str) -> str:

205 """

206 Create the renewable-availability wildcard used in the Veda AF table.

207 """

208

209 wildcard = f"ELC_{tech_code}_*"

210

211 if wildcard == "ELC_Geo_*":

212 return "ELC_Geo_*, -ELC_GeoCHP_*"

213

214 return wildcard

215

216

217def add_renewable_availability_wildcards(df: pd.DataFrame) -> pd.DataFrame:

218 """

219 Add the genstack tech code and matching renewable-availability wildcard.

220 """

221

222 out = df.copy()

223 out["Pset_PN"] = out["Tech_TIMES"].map(create_renewable_availability_wildcard)

224 return out

225

226

227def expand_fixed_plants_to_season_shares(df: pd.DataFrame) -> pd.DataFrame:

228 """

229 Expand one row per plant to one row per plant-season share.

230 """

231

232 rows = []

233 for row in df.itertuples():

234 season_shares = get_season_shares(row.InstallMonth)

235 for season, share in season_shares.items():

236 rows.append(

237 {

238 "TechName": row.TechName,

239 "TechCode": row.Tech_TIMES,

240 "Pset_PN": row.Pset_PN,

241 "Year": row.Year,

242 "Region": row.Island,

243 "Season": season,

244 "Share": share,

245 }

246 )

247

248 out = pd.DataFrame(rows)

249 out = out.sort_values(["Year", "Region", "TechName", "Season"], ignore_index=True)

250 return out

251

252

253def get_fixed_plant_season_shares(

254 genstack_filepath=GENSTACK_FILE,

255 assumptions_filepath=FIXED_INSTALL_MONTHS_FILE,

256 default_month: int = DEFAULT_INSTALL_MONTH,

257) -> pd.DataFrame:

258 """

259 Build the season-share table for all fixed-build plants.

260

261 Output columns:

262 - TechName

263 - Year

264 - Region

265 - Season

266 - Share

267 """

268

269 fixed_plants = read_fixed_plant_dates(genstack_filepath)

270 install_months = read_fixed_install_month_assumptions(assumptions_filepath)

271 fixed_plants = assign_install_months(

272 fixed_plants, install_months, default_month=default_month

273 )

274 fixed_plants = add_renewable_availability_wildcards(fixed_plants)

275

276 return expand_fixed_plants_to_season_shares(fixed_plants)

277

278

279def read_renewable_availability(filepath=RENEWABLE_AVAILABILITY_FILE) -> pd.DataFrame:

280 """

281 Read the existing stage-4 Veda renewable availability output.

282

283 This is the table that fixed-plant seasonal adjustments will eventually

284 update after a plant-to-availability mapping is applied.

285 """

286

287 df = pd.read_csv(filepath)

288 required_columns = {

289 "TimeSlice",

290 "LimType",

291 "Attribute",

292 "NI",

293 "SI",

294 "Pset_PN",

295 "Year",

296 }

297 missing_columns = required_columns.difference(df.columns)

298 if missing_columns:

299 missing_str = ", ".join(sorted(missing_columns))

300 raise ValueError(

301 "Missing required columns for renewable availability data: "

302 f"{missing_str}"

303 )

304

305 return df

306

307

308def extract_season_from_timeslice(timeslice: str) -> str:

309 """

310 Convert a TIMES timeslice label like ``WIN-WK-P`` to ``WIN``.

311 """

312

313 if pd.isna(timeslice):

314 raise ValueError("TimeSlice cannot be null when deriving Season.")

315

316 season = str(timeslice).strip()[:3]

317 valid_seasons = {"SUM", "FAL", "WIN", "SPR"}

318 if season not in valid_seasons:

319 raise ValueError(f"Unrecognised season in TimeSlice: {timeslice}")

320

321 return season

322

323

324def prepare_renewable_availability_for_join(df: pd.DataFrame) -> pd.DataFrame:

325 """

326 Add a season key and keep only explicit renewable-availability years.

327 """

328

329 out = df.copy()

330 # remove interps

331 out = out[out["Year"] > 0].copy()

332 # define seasons

333 out["Season"] = out["TimeSlice"].map(extract_season_from_timeslice)

334 # remove the year

335 out = out.drop("Year", axis=1)

336 return out

337

338

339def validate_fixed_plant_wildcards_against_renewable_availability(

340 fixed_plants: pd.DataFrame, renewable_availability: pd.DataFrame

341) -> None:

342 """

343 Ensure generated plant wildcards exist in the renewable availability table.

344 """

345

346 available_wildcards = set(renewable_availability["Pset_PN"].dropna().unique())

347 fixed_wildcards = set(fixed_plants["Pset_PN"].dropna().unique())

348 missing_wildcards = sorted(fixed_wildcards.difference(available_wildcards))

349

350 if missing_wildcards:

351 missing_str = ", ".join(missing_wildcards)

352 raise ValueError(

353 "Generated fixed-plant wildcards not found in renewable availability "

354 f"data: {missing_str}"

355 )

356

357

358def join_fixed_plants_to_renewable_availability(

359 fixed_plants: pd.DataFrame, renewable_availability: pd.DataFrame

360) -> pd.DataFrame:

361 """

362 Join fixed-plant season shares to renewable availability by wildcard and season.

363 """

364

365 renewable_availability = prepare_renewable_availability_for_join(

366 renewable_availability

367 )

368

369 out = fixed_plants.merge(

370 renewable_availability,

371 on=["Pset_PN", "Season"],

372 how="left",

373 validate="many_to_many",

374 )

375

376 missing_matches = out.loc[

377 out["TimeSlice"].isna(), ["TechName", "Pset_PN", "Season"]

378 ].drop_duplicates()

379 if not missing_matches.empty:

380 missing_str = ", ".join(

381 f"{row.TechName} ({row.Pset_PN}, {row.Season})"

382 for row in missing_matches.itertuples()

383 )

384 raise ValueError(

385 "Fixed plant season rows could not be matched to renewable "

386 f"availability data: {missing_str}"

387 )

388

389 return out.sort_values(

390 ["Year", "Region", "TechName", "Season", "TimeSlice"], ignore_index=True

391 )

392

393

394def format_fixed_plant_adjustment_output(df: pd.DataFrame) -> pd.DataFrame:

395 """

396 Convert joined renewable rows into the downstream fixed-plant output shape.

397

398 The commissioning year gets a reduced availability based on the seasonal

399 share. The following year gets the unmodified availability so downstream

400 extrapolation can carry that forward.

401 """

402

403 out = df.copy()

404 out["Value"] = pd.NA

405 out.loc[out["Region"] == "NI", "Value"] = out.loc[out["Region"] == "NI", "NI"]

406 out.loc[out["Region"] == "SI", "Value"] = out.loc[out["Region"] == "SI", "SI"]

407 invalid_regions = out.loc[

408 out["Value"].isna(), ["TechName", "Region"]

409 ].drop_duplicates()

410 if not invalid_regions.empty:

411 invalid_str = ", ".join(

412 f"{row.TechName} ({row.Region})" for row in invalid_regions.itertuples()

413 )

414 raise ValueError(

415 "Fixed plant rows include unsupported regions for renewable "

416 f"availability values: {invalid_str}"

417 )

418

419 base_values = out.copy()

420

421 first_year = base_values.copy()

422 first_year["Value"] = first_year["Value"] * first_year["Share"]

423

424 subsequent_year = base_values.copy()

425 subsequent_year["Year"] = subsequent_year["Year"] + 1

426

427 out = pd.concat([first_year, subsequent_year], ignore_index=True)

428 out = out.drop(columns=["NI", "SI", "Pset_PN", "TechCode", "Season", "Share"])

429

430 return out.sort_values(

431 ["TechName", "Year", "Region", "TimeSlice"], ignore_index=True

432 )

433

434

435def main() -> pd.DataFrame:

436 """Script entry point"""

437

438 fixed_plants = get_fixed_plant_season_shares()

439 renewable_availability = read_renewable_availability()

440 validate_fixed_plant_wildcards_against_renewable_availability(

441 fixed_plants, renewable_availability

442 )

443 joined = join_fixed_plants_to_renewable_availability(

444 fixed_plants, renewable_availability

445 )

446 out = format_fixed_plant_adjustment_output(joined)

447 _save_data(

448 out,

449 "renewable_availability_fixed_adjustments.csv",

450 "Renewable availability fixed adjustments",

451 OUTPUT_LOCATION,

452 )

453

454

455if __name__ == "__main__":

456 main()

Coverage for src/prepare_times_nz/stage_4/electricity/fixed_plant_adjustments.py: 37%

158 statements