Coverage for scripts/stage_3_scenarios/electricity/solar_run_hourly_profiles.py: 51%

218 statements  

« prev     ^ index     » next       coverage.py v7.14.1, created at 2026-06-16 23:05 +0000

1""" 

2Run PVWatts hourly solar profiles for the configured NIWA scenarios. 

3""" 

4 

5from __future__ import annotations 

6 

7import csv 

8import json 

9import re 

10from datetime import datetime, timedelta, timezone 

11from typing import Any 

12from zoneinfo import ZoneInfo 

13 

14import pandas as pd 

15from prepare_times_nz.stage_0.stage_0_settings import BASE_YEAR 

16from prepare_times_nz.utilities.filepaths import ASSUMPTIONS, STAGE_3_DATA 

17from prepare_times_nz.utilities.timeslices import create_timeslices 

18 

19# pylint: disable=wrong-import-order 

20from PySAM import Pvwattsv8 

21 

22SOLAR_SCENARIOS_FILE = ( 

23 ASSUMPTIONS / "electricity_generation/renewable_curves/SolarPvScenarios.csv" 

24) 

25EXCLUDED_SOLAR_TECHS = {"SolarFixed"} 

26# note that these are techs where we have data but not any existing 

27# techs. 

28# We remove it so it doesn't build into wem_wcm and similar files from 

29# the renewable curves 

30# If we use this tech later just remove the filter and data ready to go 

31 

32OUTPUT_ROOT = STAGE_3_DATA / "electricity/solar_af" 

33PREPARED_EPW_DIR = OUTPUT_ROOT / "prepared_epw" 

34HOURLY_DIR = OUTPUT_ROOT / "hourly" 

35METADATA_DIR = OUTPUT_ROOT / "metadata" 

36 

37EPW_FILENAME_PATTERN = re.compile(r"^TMY3_NZ_(?P<zone>[A-Z]{2})\.epw$") 

38 

39ZONE_CODE_TO_REGION = { 

40 "AK": "Auckland", 

41 "BP": "Bay of Plenty", 

42 "CC": "Christchurch", 

43 "DN": "Dunedin", 

44 "EC": "East Coast", 

45 "HN": "Hamilton", 

46 "IN": "Invercargill", 

47 "MW": "Manawatu", 

48 "NL": "Northland", 

49 "NM": "Nelson-Marlborough", 

50 "NP": "New Plymouth", 

51 "OC": "Central Otago", 

52 "QL": "Queenstown-Lakes", 

53 "RR": "Rotorua", 

54 "TP": "Taupo", 

55 "WC": "West Coast", 

56 "WI": "Wairarapa", 

57 "WN": "Wellington", 

58} 

59 

60ZONE_CODE_TO_ISLAND = { 

61 "AK": "NI", 

62 "BP": "NI", 

63 "CC": "SI", 

64 "DN": "SI", 

65 "EC": "NI", 

66 "HN": "NI", 

67 "IN": "SI", 

68 "MW": "NI", 

69 "NL": "NI", 

70 "NM": "SI", 

71 "NP": "NI", 

72 "OC": "SI", 

73 "QL": "SI", 

74 "RR": "NI", 

75 "TP": "NI", 

76 "WC": "SI", 

77 "WI": "NI", 

78 "WN": "NI", 

79} 

80 

81ZONE_ORDER = [ 

82 "NL", 

83 "AK", 

84 "HN", 

85 "BP", 

86 "RR", 

87 "TP", 

88 "NP", 

89 "EC", 

90 "MW", 

91 "WI", 

92 "WN", 

93 "NM", 

94 "WC", 

95 "CC", 

96 "QL", 

97 "OC", 

98 "DN", 

99 "IN", 

100] 

101 

102ARRAY_TYPE_MAP = { 

103 "fixed_open_rack": 0, 

104 "fixed_roof_mount": 1, 

105 "single_axis_tracking": 2, 

106 "single_axis_backtracking": 3, 

107 "two_axis_tracking": 4, 

108} 

109 

110MODULE_TYPE_MAP = { 

111 "standard": 0, 

112 "premium": 1, 

113 "thin_film": 2, 

114} 

115 

116EPW_STANDARD_TZ = timezone(timedelta(hours=12), name="NZST_FIXED") 

117NZ_WALLCLOCK_TZ = ZoneInfo("Pacific/Auckland") 

118 

119 

120def ensure_output_dir(path): 

121 """ 

122 Create an output directory with predictable permissions. 

123 """ 

124 path.mkdir(parents=True, exist_ok=True) 

125 path.chmod(0o755) 

126 return path 

127 

128 

129def read_epw_rows(epw_path): 

130 """ 

131 Read an EPW file using the encodings present in the NIWA datasets. 

132 """ 

133 last_error = None 

134 for encoding in ("utf-8-sig", "latin-1"): 

135 try: 

136 with epw_path.open("r", encoding=encoding, newline="") as handle: 

137 return list(csv.reader(handle)) 

138 except UnicodeDecodeError as exc: 

139 last_error = exc 

140 

141 if last_error is not None: 

142 raise last_error 

143 

144 raise ValueError(f"Could not read EPW rows from {epw_path}") 

145 

146 

147def parse_epw_time_index(epw_path): 

148 """ 

149 Read the year/month/day/hour/minute tuple for each EPW data row. 

150 """ 

151 rows = read_epw_rows(epw_path)[8:] 

152 time_index = [] 

153 for row_num, row in enumerate(rows, start=9): 

154 if len(row) < 5: 

155 raise ValueError( 

156 f"EPW data row {row_num} has fewer than 5 columns in {epw_path}" 

157 ) 

158 hour = int(row[3]) 

159 minute = int(row[4]) 

160 if minute not in {0, 60}: 

161 raise ValueError( 

162 f"Unsupported EPW minute value {minute} at row {row_num} in {epw_path}. " 

163 "The workflow expects NIWA EPW minute values of 0 or 60." 

164 ) 

165 if not 1 <= hour <= 24: 

166 raise ValueError( 

167 f"Unsupported EPW hour value {hour} at row {row_num} in {epw_path}. " 

168 "The workflow expects EPW-standard 01..24 hour fields." 

169 ) 

170 

171 time_index.append((int(row[0]), int(row[1]), int(row[2]), hour, minute)) 

172 

173 if len(time_index) != 8760: 

174 raise ValueError( 

175 f"Expected 8760 EPW rows in {epw_path}, found {len(time_index)}" 

176 ) 

177 

178 return time_index 

179 

180 

181def parse_epw_data_period_metadata(epw_path) -> dict[str, Any]: 

182 """ 

183 Read the shared TMY calendar metadata from the EPW header. 

184 """ 

185 rows = read_epw_rows(epw_path) 

186 if len(rows) < 8: 

187 raise ValueError(f"Expected at least 8 EPW header rows in {epw_path}") 

188 

189 data_periods = rows[7] 

190 if len(data_periods) < 7 or data_periods[0] != "DATA PERIODS": 

191 raise ValueError(f"Malformed DATA PERIODS header row in {epw_path}") 

192 

193 metadata = { 

194 "calendar_label": data_periods[3].strip(), 

195 "start_weekday": data_periods[4].strip(), 

196 } 

197 

198 # NIWA EPW headers may use either a numeric day count in column 7 or 

199 # MBIE TMY3-style start/end dates in columns 6 and 7. 

200 try: 

201 metadata["days_in_year"] = int(data_periods[6]) 

202 except ValueError: 

203 metadata["start_date"] = data_periods[5].strip() 

204 metadata["end_date"] = data_periods[6].strip() 

205 metadata["days_in_year"] = 365 

206 

207 return metadata 

208 

209 

210def validate_base_year_calendar(): 

211 """ 

212 Fail loudly until leap-year handling is implemented for solar timeslices. 

213 """ 

214 if pd.Timestamp(year=BASE_YEAR, month=1, day=1).is_leap_year: 

215 raise ValueError( 

216 "Solar availability-factor workflow currently assumes a 365-day " 

217 f"calendar, but BASE_YEAR {BASE_YEAR} is a leap year. Revisit leap-year " 

218 "handling before regenerating solar availability factors." 

219 ) 

220 

221 

222def validate_epw_calendar_assumptions(epw_path): 

223 """ 

224 Fail loudly if an EPW file implies leap-year calendar handling. 

225 """ 

226 metadata = parse_epw_data_period_metadata(epw_path) 

227 if metadata["days_in_year"] != 365: 

228 raise ValueError( 

229 "Solar availability-factor workflow currently assumes a 365-day " 

230 f"calendar, but {epw_path.name} declares {metadata['days_in_year']} " 

231 "days in its DATA PERIODS header. Revisit leap-year handling before " 

232 "regenerating solar availability factors." 

233 ) 

234 

235 

236def get_canonical_epw_index(epw_path): 

237 """ 

238 Return the shared month/day/hour/minute sequence for one EPW file. 

239 """ 

240 raw_index = parse_epw_time_index(epw_path) 

241 validate_epw_calendar_assumptions(epw_path) 

242 return [(month, day, hour, minute) for _, month, day, hour, minute in raw_index] 

243 

244 

245def convert_epw_standard_time_to_wallclock(month: int, day: int, hour: int) -> datetime: 

246 """ 

247 Convert a fixed NZST EPW interval-start hour to NZ wall-clock time. 

248 """ 

249 standard_time = datetime( 

250 BASE_YEAR, 

251 month, 

252 day, 

253 hour, 

254 tzinfo=EPW_STANDARD_TZ, 

255 ) 

256 return standard_time.astimezone(NZ_WALLCLOCK_TZ) 

257 

258 

259def format_time_index_rows(canonical_index): 

260 """ 

261 Build hourly rows using the configured model base year calendar. 

262 """ 

263 rows = [] 

264 for hour_index, (month, day, hour, minute) in enumerate(canonical_index, start=1): 

265 interval_start_hour = (hour - 1) % 24 

266 trading_date = pd.Timestamp(year=BASE_YEAR, month=month, day=day) 

267 wall_clock = convert_epw_standard_time_to_wallclock( 

268 month=month, 

269 day=day, 

270 hour=interval_start_hour, 

271 ) 

272 rows.append( 

273 { 

274 "hour_of_year": hour_index, 

275 "Trading_Date": trading_date, 

276 "Year": BASE_YEAR, 

277 "Month": month, 

278 "Day": day, 

279 "EPWHour": hour, 

280 "Hour": interval_start_hour, 

281 "Minute": minute, 

282 "WallClock_DateTime": pd.Timestamp(wall_clock), 

283 "WallClock_Date": pd.Timestamp(wall_clock.date()), 

284 "WallClock_Year": wall_clock.year, 

285 "WallClock_Month": wall_clock.month, 

286 "WallClock_Day": wall_clock.day, 

287 "WallClock_Hour": wall_clock.hour, 

288 "WallClock_UtcOffsetHours": ( 

289 wall_clock.utcoffset().total_seconds() / 3600 

290 ), 

291 } 

292 ) 

293 return rows 

294 

295 

296def load_solar_scenarios() -> list[dict[str, Any]]: 

297 """ 

298 Load and normalize the configured solar archetype parameters. 

299 """ 

300 df = pd.read_csv(SOLAR_SCENARIOS_FILE) 

301 df = df[~df["Tech_TIMES"].isin(EXCLUDED_SOLAR_TECHS)].copy() 

302 scenarios = [] 

303 

304 for row in df.to_dict(orient="records"): 

305 scenario = dict(row) 

306 scenario["ArrayTypeCode"] = ARRAY_TYPE_MAP[scenario["ArrayType"]] 

307 scenario["ModuleTypeCode"] = MODULE_TYPE_MAP[scenario["ModuleType"]] 

308 scenario["UseWeatherFileAlbedo"] = str( 

309 scenario["UseWeatherFileAlbedo"] 

310 ).lower() in { 

311 "1", 

312 "true", 

313 "yes", 

314 } 

315 scenarios.append(scenario) 

316 

317 return scenarios 

318 

319 

320def discover_epw_files(epw_dir): 

321 """ 

322 Find one prepared EPW file for each expected NIWA climate zone. 

323 """ 

324 discovered = {} 

325 unexpected = [] 

326 for path in sorted(epw_dir.glob("*.epw")): 

327 match = EPW_FILENAME_PATTERN.match(path.name) 

328 if not match: 

329 unexpected.append(path.name) 

330 continue 

331 

332 zone = match.group("zone") 

333 if zone in discovered: 

334 raise ValueError( 

335 f"Duplicate EPW files found for zone {zone}: " 

336 f"{discovered[zone].name} and {path.name}" 

337 ) 

338 discovered[zone] = path 

339 

340 if unexpected: 

341 raise ValueError( 

342 "Unsupported EPW filenames in prepared bundle: " f"{', '.join(unexpected)}" 

343 ) 

344 

345 missing = [zone for zone in ZONE_ORDER if zone not in discovered] 

346 if missing: 

347 raise FileNotFoundError(f"Missing EPW files for zones: {', '.join(missing)}") 

348 

349 return discovered 

350 

351 

352def build_time_index(epw_files): 

353 """ 

354 Build the shared hourly index using the model base-year calendar. 

355 

356 All NIWA files should agree on month/day/hour/minute ordering. The EPW row 

357 year and header calendar metadata are ignored for timeslice construction. 

358 """ 

359 validate_base_year_calendar() 

360 

361 canonical = None 

362 canonical_zone = None 

363 for zone in ZONE_ORDER: 

364 index = get_canonical_epw_index(epw_files[zone]) 

365 if canonical is None: 

366 canonical = index 

367 canonical_zone = zone 

368 elif canonical != index: 

369 raise ValueError(f"Time index mismatch between {canonical_zone} and {zone}") 

370 

371 return pd.DataFrame(format_time_index_rows(canonical)) 

372 

373 

374def build_model(epw_path, scenario): 

375 """ 

376 Build a PVWatts model for one scenario-zone pair. 

377 """ 

378 model = Pvwattsv8.default("PVWattsNone") # pylint: disable=c-extension-no-member 

379 model.SolarResource.solar_resource_file = str(epw_path.resolve()) 

380 model.Lifetime.system_use_lifetime_output = 0 

381 

382 model.SystemDesign.system_capacity = float(scenario["SystemCapacityKW"]) 

383 model.SystemDesign.array_type = int(scenario["ArrayTypeCode"]) 

384 model.SystemDesign.tilt = float(scenario["TiltDeg"]) 

385 model.SystemDesign.azimuth = float(scenario["AzimuthDeg"]) 

386 model.SystemDesign.module_type = int(scenario["ModuleTypeCode"]) 

387 model.SystemDesign.dc_ac_ratio = float(scenario["DcAcRatio"]) 

388 model.SystemDesign.inv_eff = float(scenario["InvEffPercent"]) 

389 model.SystemDesign.losses = float(scenario["LossesPercent"]) 

390 model.SystemDesign.bifaciality = float(scenario["Bifaciality"]) 

391 model.SystemDesign.gcr = float(scenario["Gcr"]) 

392 model.SystemDesign.en_snowloss = 0 

393 

394 use_weather_file_albedo = 1.0 if scenario["UseWeatherFileAlbedo"] else 0.0 

395 model.SolarResource.use_wf_albedo = use_weather_file_albedo 

396 if not use_weather_file_albedo: 

397 albedo = float(scenario["Albedo"]) 

398 model.SolarResource.albedo = tuple(albedo for _ in range(12)) 

399 

400 model.Shading.shading_en_azal = 0 

401 model.Shading.shading_en_diff = 0 

402 model.Shading.shading_en_mxh = 0 

403 model.Shading.shading_en_string_option = 0 

404 model.Shading.shading_en_timestep = 0 

405 

406 return model 

407 

408 

409def _json_default(value: Any): 

410 """ 

411 Convert pandas / numpy scalar values to plain Python types for JSON output. 

412 """ 

413 if hasattr(value, "item"): 

414 return value.item() 

415 raise TypeError(f"Object of type {type(value).__name__} is not JSON serializable") 

416 

417 

418def collect_zone_hourly_rows(scenario_name, zone, time_index, generation): 

419 """ 

420 Collect long-format hourly rows for a single zone. 

421 """ 

422 region = ZONE_CODE_TO_REGION[zone] 

423 island = ZONE_CODE_TO_ISLAND[zone] 

424 

425 rows = [] 

426 for time_row, generation_kw_per_kw in zip( 

427 time_index.itertuples(index=False), generation 

428 ): 

429 rows.append( 

430 { 

431 "Scenario": scenario_name, 

432 "Tech_TIMES": scenario_name, 

433 "ZoneCode": zone, 

434 "Region": region, 

435 "Island": island, 

436 "Trading_Date": time_row.Trading_Date, 

437 "Year": time_row.Year, 

438 "Month": time_row.Month, 

439 "Day": time_row.Day, 

440 "Hour": time_row.Hour, 

441 "EPWHour": time_row.EPWHour, 

442 "Minute": time_row.Minute, 

443 "WallClock_DateTime": time_row.WallClock_DateTime, 

444 "WallClock_Date": time_row.WallClock_Date, 

445 "WallClock_Year": time_row.WallClock_Year, 

446 "WallClock_Month": time_row.WallClock_Month, 

447 "WallClock_Day": time_row.WallClock_Day, 

448 "WallClock_Hour": time_row.WallClock_Hour, 

449 "WallClock_UtcOffsetHours": time_row.WallClock_UtcOffsetHours, 

450 "generation_kw_per_kw": generation_kw_per_kw, 

451 } 

452 ) 

453 

454 return rows 

455 

456 

457def run_scenario_hourly_profiles(scenario, epw_files, time_index): 

458 """ 

459 Run one solar archetype across all NIWA zones. 

460 """ 

461 scenario_name = scenario["Tech_TIMES"] 

462 wide = time_index.copy() 

463 long_rows = [] 

464 zone_results = [] 

465 

466 for zone in ZONE_ORDER: 

467 region = ZONE_CODE_TO_REGION[zone] 

468 island = ZONE_CODE_TO_ISLAND[zone] 

469 model = build_model(epw_files[zone], scenario) 

470 model.execute() 

471 generation = [float(value) for value in model.Outputs.gen] 

472 

473 if len(generation) != 8760: 

474 raise ValueError( 

475 f"Expected 8760 PVWatts outputs for {scenario_name}/{zone}, " 

476 f"got {len(generation)}" 

477 ) 

478 

479 wide[zone] = generation 

480 zone_results.append( 

481 { 

482 "Tech_TIMES": scenario_name, 

483 "ZoneCode": zone, 

484 "Region": region, 

485 "Island": island, 

486 "EPWFile": epw_files[zone].name, 

487 "AnnualEnergyKWhPerKWDC": float(sum(generation)), 

488 "CapacityFactorPercent": float(model.Outputs.capacity_factor), 

489 "KWhPerKW": float(model.Outputs.kwh_per_kw), 

490 } 

491 ) 

492 long_rows.extend( 

493 collect_zone_hourly_rows( 

494 scenario_name=scenario_name, 

495 zone=zone, 

496 time_index=time_index, 

497 generation=generation, 

498 ) 

499 ) 

500 

501 long_df = pd.DataFrame(long_rows) 

502 long_df = create_timeslices( 

503 long_df, 

504 date_col="WallClock_Date", 

505 hour_col="WallClock_Hour", 

506 ) 

507 return wide, long_df, zone_results 

508 

509 

510def save_scenario_outputs(scenario_name, scenario, wide, long_df, zone_results): 

511 """ 

512 Save scenario outputs and metadata. 

513 """ 

514 wide_path = HOURLY_DIR / f"{scenario_name}_hourly_by_zone.csv" 

515 long_path = HOURLY_DIR / f"{scenario_name}_hourly_long.csv" 

516 metadata_path = METADATA_DIR / f"{scenario_name}_run_metadata.json" 

517 

518 wide.to_csv(wide_path, index=False) 

519 long_df.to_csv(long_path, index=False) 

520 

521 with metadata_path.open("w", encoding="utf-8") as handle: 

522 json.dump( 

523 { 

524 "Tech_TIMES": scenario_name, 

525 "Settings": scenario, 

526 "ZoneResults": zone_results, 

527 "Outputs": { 

528 "HourlyByZoneCsv": str(wide_path), 

529 "HourlyLongCsv": str(long_path), 

530 }, 

531 }, 

532 handle, 

533 indent=2, 

534 default=_json_default, 

535 ) 

536 

537 

538def run_hourly_profiles(): 

539 """ 

540 Run PVWatts across all configured solar archetypes and NIWA zones. 

541 """ 

542 scenarios = load_solar_scenarios() 

543 epw_files = discover_epw_files(PREPARED_EPW_DIR) 

544 time_index = build_time_index(epw_files) 

545 

546 ensure_output_dir(HOURLY_DIR) 

547 ensure_output_dir(METADATA_DIR) 

548 

549 combined_rows = [] 

550 metadata_rows = [] 

551 

552 for scenario in scenarios: 

553 scenario_name = scenario["Tech_TIMES"] 

554 wide, long_df, zone_results = run_scenario_hourly_profiles( 

555 scenario=scenario, 

556 epw_files=epw_files, 

557 time_index=time_index, 

558 ) 

559 save_scenario_outputs( 

560 scenario_name=scenario_name, 

561 scenario=scenario, 

562 wide=wide, 

563 long_df=long_df, 

564 zone_results=zone_results, 

565 ) 

566 combined_rows.append(long_df) 

567 metadata_rows.extend(zone_results) 

568 

569 all_hourly = pd.concat(combined_rows, ignore_index=True) 

570 all_hourly.to_csv(HOURLY_DIR / "all_scenarios_hourly_long.csv", index=False) 

571 pd.DataFrame(metadata_rows).to_csv( 

572 METADATA_DIR / "all_scenarios_zone_summary.csv", index=False 

573 ) 

574 

575 

576if __name__ == "__main__": 

577 run_hourly_profiles()