Coverage for scripts / stage_3_scenarios / electricity / solar_run_hourly_profiles.py: 44%

196 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-01 22:14 +0000

1""" 

2Run PVWatts hourly solar profiles for the configured NIWA scenarios. 

3""" 

4 

5from __future__ import annotations 

6 

7import csv 

8import json 

9from datetime import datetime, timedelta, timezone 

10from typing import Any 

11from zoneinfo import ZoneInfo 

12 

13import pandas as pd 

14from prepare_times_nz.stage_0.stage_0_settings import BASE_YEAR 

15from prepare_times_nz.utilities.filepaths import ASSUMPTIONS, STAGE_3_DATA 

16from prepare_times_nz.utilities.timeslices import create_timeslices 

17 

18# pylint: disable=wrong-import-order 

19from PySAM import Pvwattsv8 

20 

21SOLAR_SCENARIOS_FILE = ( 

22 ASSUMPTIONS / "electricity_generation/renewable_curves/SolarPvScenarios.csv" 

23) 

24EXCLUDED_SOLAR_TECHS = {"SolarFixed"} 

25# note that these are techs where we have data but not any existing 

26# techs. 

27# We remove it so it doesn't build into wem_wcm and similar files from 

28# the renewable curves 

29# If we use this tech later just remove the filter and data ready to go 

30 

31OUTPUT_ROOT = STAGE_3_DATA / "electricity/solar_af" 

32PREPARED_EPW_DIR = OUTPUT_ROOT / "prepared_epw" 

33HOURLY_DIR = OUTPUT_ROOT / "hourly" 

34METADATA_DIR = OUTPUT_ROOT / "metadata" 

35 

36EXPECTED_PATTERN = "TMY_NZ_{zone}.epw" 

37 

38ZONE_CODE_TO_REGION = { 

39 "AK": "Auckland", 

40 "BP": "Bay of Plenty", 

41 "CC": "Christchurch", 

42 "DN": "Dunedin", 

43 "EC": "East Coast", 

44 "HN": "Hamilton", 

45 "IN": "Invercargill", 

46 "MW": "Manawatu", 

47 "NL": "Northland", 

48 "NM": "Nelson-Marlborough", 

49 "NP": "New Plymouth", 

50 "OC": "Central Otago", 

51 "QL": "Queenstown-Lakes", 

52 "RR": "Rotorua", 

53 "TP": "Taupo", 

54 "WC": "West Coast", 

55 "WI": "Wairarapa", 

56 "WN": "Wellington", 

57} 

58 

59ZONE_CODE_TO_ISLAND = { 

60 "AK": "NI", 

61 "BP": "NI", 

62 "CC": "SI", 

63 "DN": "SI", 

64 "EC": "NI", 

65 "HN": "NI", 

66 "IN": "SI", 

67 "MW": "NI", 

68 "NL": "NI", 

69 "NM": "SI", 

70 "NP": "NI", 

71 "OC": "SI", 

72 "QL": "SI", 

73 "RR": "NI", 

74 "TP": "NI", 

75 "WC": "SI", 

76 "WI": "NI", 

77 "WN": "NI", 

78} 

79 

80ZONE_ORDER = [ 

81 "NL", 

82 "AK", 

83 "HN", 

84 "BP", 

85 "RR", 

86 "TP", 

87 "NP", 

88 "EC", 

89 "MW", 

90 "WI", 

91 "WN", 

92 "NM", 

93 "WC", 

94 "CC", 

95 "QL", 

96 "OC", 

97 "DN", 

98 "IN", 

99] 

100 

101ARRAY_TYPE_MAP = { 

102 "fixed_open_rack": 0, 

103 "fixed_roof_mount": 1, 

104 "single_axis_tracking": 2, 

105 "single_axis_backtracking": 3, 

106 "two_axis_tracking": 4, 

107} 

108 

109MODULE_TYPE_MAP = { 

110 "standard": 0, 

111 "premium": 1, 

112 "thin_film": 2, 

113} 

114 

115EPW_STANDARD_TZ = timezone(timedelta(hours=12), name="NZST_FIXED") 

116NZ_WALLCLOCK_TZ = ZoneInfo("Pacific/Auckland") 

117 

118 

119def ensure_output_dir(path): 

120 """ 

121 Create an output directory with predictable permissions. 

122 """ 

123 path.mkdir(parents=True, exist_ok=True) 

124 path.chmod(0o755) 

125 return path 

126 

127 

128def read_epw_rows(epw_path): 

129 """ 

130 Read an EPW file using the encodings present in the NIWA datasets. 

131 """ 

132 last_error = None 

133 for encoding in ("utf-8-sig", "latin-1"): 

134 try: 

135 with epw_path.open("r", encoding=encoding, newline="") as handle: 

136 return list(csv.reader(handle)) 

137 except UnicodeDecodeError as exc: 

138 last_error = exc 

139 

140 if last_error is not None: 

141 raise last_error 

142 

143 raise ValueError(f"Could not read EPW rows from {epw_path}") 

144 

145 

146def parse_epw_time_index(epw_path): 

147 """ 

148 Read the year/month/day/hour/minute tuple for each EPW data row. 

149 """ 

150 rows = read_epw_rows(epw_path)[8:] 

151 time_index = [] 

152 for row_num, row in enumerate(rows, start=9): 

153 if len(row) < 5: 

154 raise ValueError( 

155 f"EPW data row {row_num} has fewer than 5 columns in {epw_path}" 

156 ) 

157 time_index.append( 

158 (int(row[0]), int(row[1]), int(row[2]), int(row[3]), int(row[4])) 

159 ) 

160 

161 if len(time_index) != 8760: 

162 raise ValueError( 

163 f"Expected 8760 EPW rows in {epw_path}, found {len(time_index)}" 

164 ) 

165 

166 return time_index 

167 

168 

169def parse_epw_data_period_metadata(epw_path) -> dict[str, Any]: 

170 """ 

171 Read the shared TMY calendar metadata from the EPW header. 

172 """ 

173 rows = read_epw_rows(epw_path) 

174 if len(rows) < 8: 

175 raise ValueError(f"Expected at least 8 EPW header rows in {epw_path}") 

176 

177 data_periods = rows[7] 

178 if len(data_periods) < 7 or data_periods[0] != "DATA PERIODS": 

179 raise ValueError(f"Malformed DATA PERIODS header row in {epw_path}") 

180 

181 return { 

182 "calendar_label": data_periods[3].strip(), 

183 "start_weekday": data_periods[4].strip(), 

184 "days_in_year": int(data_periods[6]), 

185 } 

186 

187 

188def validate_base_year_calendar(): 

189 """ 

190 Fail loudly until leap-year handling is implemented for solar timeslices. 

191 """ 

192 if pd.Timestamp(year=BASE_YEAR, month=1, day=1).is_leap_year: 

193 raise ValueError( 

194 "Solar availability-factor workflow currently assumes a 365-day " 

195 f"calendar, but BASE_YEAR {BASE_YEAR} is a leap year. Revisit leap-year " 

196 "handling before regenerating solar availability factors." 

197 ) 

198 

199 

200def validate_epw_calendar_assumptions(epw_path): 

201 """ 

202 Fail loudly if an EPW file implies leap-year calendar handling. 

203 """ 

204 metadata = parse_epw_data_period_metadata(epw_path) 

205 if metadata["days_in_year"] != 365: 

206 raise ValueError( 

207 "Solar availability-factor workflow currently assumes a 365-day " 

208 f"calendar, but {epw_path.name} declares {metadata['days_in_year']} " 

209 "days in its DATA PERIODS header. Revisit leap-year handling before " 

210 "regenerating solar availability factors." 

211 ) 

212 

213 

214def get_canonical_epw_index(epw_path): 

215 """ 

216 Return the shared month/day/hour/minute sequence for one EPW file. 

217 """ 

218 raw_index = parse_epw_time_index(epw_path) 

219 validate_epw_calendar_assumptions(epw_path) 

220 return [(month, day, hour, minute) for _, month, day, hour, minute in raw_index] 

221 

222 

223def convert_epw_standard_time_to_wallclock(month: int, day: int, hour: int) -> datetime: 

224 """ 

225 Convert a fixed NZST EPW interval-start hour to NZ wall-clock time. 

226 """ 

227 standard_time = datetime( 

228 BASE_YEAR, 

229 month, 

230 day, 

231 hour, 

232 tzinfo=EPW_STANDARD_TZ, 

233 ) 

234 return standard_time.astimezone(NZ_WALLCLOCK_TZ) 

235 

236 

237def format_time_index_rows(canonical_index): 

238 """ 

239 Build hourly rows using the configured model base year calendar. 

240 """ 

241 rows = [] 

242 for hour_index, (month, day, hour, minute) in enumerate(canonical_index, start=1): 

243 interval_start_hour = (hour - 1) % 24 

244 trading_date = pd.Timestamp(year=BASE_YEAR, month=month, day=day) 

245 wall_clock = convert_epw_standard_time_to_wallclock( 

246 month=month, 

247 day=day, 

248 hour=interval_start_hour, 

249 ) 

250 rows.append( 

251 { 

252 "hour_of_year": hour_index, 

253 "Trading_Date": trading_date, 

254 "Year": BASE_YEAR, 

255 "Month": month, 

256 "Day": day, 

257 "EPWHour": hour, 

258 "Hour": interval_start_hour, 

259 "Minute": minute, 

260 "WallClock_DateTime": pd.Timestamp(wall_clock), 

261 "WallClock_Date": pd.Timestamp(wall_clock.date()), 

262 "WallClock_Year": wall_clock.year, 

263 "WallClock_Month": wall_clock.month, 

264 "WallClock_Day": wall_clock.day, 

265 "WallClock_Hour": wall_clock.hour, 

266 "WallClock_UtcOffsetHours": ( 

267 wall_clock.utcoffset().total_seconds() / 3600 

268 ), 

269 } 

270 ) 

271 return rows 

272 

273 

274def load_solar_scenarios() -> list[dict[str, Any]]: 

275 """ 

276 Load and normalize the configured solar archetype parameters. 

277 """ 

278 df = pd.read_csv(SOLAR_SCENARIOS_FILE) 

279 df = df[~df["Tech_TIMES"].isin(EXCLUDED_SOLAR_TECHS)].copy() 

280 scenarios = [] 

281 

282 for row in df.to_dict(orient="records"): 

283 scenario = dict(row) 

284 scenario["ArrayTypeCode"] = ARRAY_TYPE_MAP[scenario["ArrayType"]] 

285 scenario["ModuleTypeCode"] = MODULE_TYPE_MAP[scenario["ModuleType"]] 

286 scenario["UseWeatherFileAlbedo"] = str( 

287 scenario["UseWeatherFileAlbedo"] 

288 ).lower() in { 

289 "1", 

290 "true", 

291 "yes", 

292 } 

293 scenarios.append(scenario) 

294 

295 return scenarios 

296 

297 

298def discover_epw_files(epw_dir): 

299 """ 

300 Find one prepared EPW file for each expected NIWA climate zone. 

301 """ 

302 discovered = {} 

303 for zone in ZONE_ORDER: 

304 path = epw_dir / EXPECTED_PATTERN.format(zone=zone) 

305 if path.exists(): 

306 discovered[zone] = path 

307 

308 missing = [zone for zone in ZONE_ORDER if zone not in discovered] 

309 if missing: 

310 raise FileNotFoundError(f"Missing EPW files for zones: {', '.join(missing)}") 

311 

312 return discovered 

313 

314 

315def build_time_index(epw_files): 

316 """ 

317 Build the shared hourly index using the model base-year calendar. 

318 

319 All NIWA files should agree on month/day/hour/minute ordering. The EPW row 

320 year and header calendar metadata are ignored for timeslice construction. 

321 """ 

322 validate_base_year_calendar() 

323 

324 canonical = None 

325 canonical_zone = None 

326 for zone in ZONE_ORDER: 

327 index = get_canonical_epw_index(epw_files[zone]) 

328 if canonical is None: 

329 canonical = index 

330 canonical_zone = zone 

331 elif canonical != index: 

332 raise ValueError(f"Time index mismatch between {canonical_zone} and {zone}") 

333 

334 return pd.DataFrame(format_time_index_rows(canonical)) 

335 

336 

337def build_model(epw_path, scenario): 

338 """ 

339 Build a PVWatts model for one scenario-zone pair. 

340 """ 

341 model = Pvwattsv8.default("PVWattsNone") # pylint: disable=c-extension-no-member 

342 model.SolarResource.solar_resource_file = str(epw_path.resolve()) 

343 model.Lifetime.system_use_lifetime_output = 0 

344 

345 model.SystemDesign.system_capacity = float(scenario["SystemCapacityKW"]) 

346 model.SystemDesign.array_type = int(scenario["ArrayTypeCode"]) 

347 model.SystemDesign.tilt = float(scenario["TiltDeg"]) 

348 model.SystemDesign.azimuth = float(scenario["AzimuthDeg"]) 

349 model.SystemDesign.module_type = int(scenario["ModuleTypeCode"]) 

350 model.SystemDesign.dc_ac_ratio = float(scenario["DcAcRatio"]) 

351 model.SystemDesign.inv_eff = float(scenario["InvEffPercent"]) 

352 model.SystemDesign.losses = float(scenario["LossesPercent"]) 

353 model.SystemDesign.bifaciality = float(scenario["Bifaciality"]) 

354 model.SystemDesign.gcr = float(scenario["Gcr"]) 

355 model.SystemDesign.en_snowloss = 0 

356 

357 use_weather_file_albedo = 1.0 if scenario["UseWeatherFileAlbedo"] else 0.0 

358 model.SolarResource.use_wf_albedo = use_weather_file_albedo 

359 if not use_weather_file_albedo: 

360 albedo = float(scenario["Albedo"]) 

361 model.SolarResource.albedo = tuple(albedo for _ in range(12)) 

362 

363 model.Shading.shading_en_azal = 0 

364 model.Shading.shading_en_diff = 0 

365 model.Shading.shading_en_mxh = 0 

366 model.Shading.shading_en_string_option = 0 

367 model.Shading.shading_en_timestep = 0 

368 

369 return model 

370 

371 

372def _json_default(value: Any): 

373 """ 

374 Convert pandas / numpy scalar values to plain Python types for JSON output. 

375 """ 

376 if hasattr(value, "item"): 

377 return value.item() 

378 raise TypeError(f"Object of type {type(value).__name__} is not JSON serializable") 

379 

380 

381def collect_zone_hourly_rows(scenario_name, zone, time_index, generation): 

382 """ 

383 Collect long-format hourly rows for a single zone. 

384 """ 

385 region = ZONE_CODE_TO_REGION[zone] 

386 island = ZONE_CODE_TO_ISLAND[zone] 

387 

388 rows = [] 

389 for time_row, generation_kw_per_kw in zip( 

390 time_index.itertuples(index=False), generation 

391 ): 

392 rows.append( 

393 { 

394 "Scenario": scenario_name, 

395 "Tech_TIMES": scenario_name, 

396 "ZoneCode": zone, 

397 "Region": region, 

398 "Island": island, 

399 "Trading_Date": time_row.Trading_Date, 

400 "Year": time_row.Year, 

401 "Month": time_row.Month, 

402 "Day": time_row.Day, 

403 "Hour": time_row.Hour, 

404 "EPWHour": time_row.EPWHour, 

405 "Minute": time_row.Minute, 

406 "WallClock_DateTime": time_row.WallClock_DateTime, 

407 "WallClock_Date": time_row.WallClock_Date, 

408 "WallClock_Year": time_row.WallClock_Year, 

409 "WallClock_Month": time_row.WallClock_Month, 

410 "WallClock_Day": time_row.WallClock_Day, 

411 "WallClock_Hour": time_row.WallClock_Hour, 

412 "WallClock_UtcOffsetHours": time_row.WallClock_UtcOffsetHours, 

413 "generation_kw_per_kw": generation_kw_per_kw, 

414 } 

415 ) 

416 

417 return rows 

418 

419 

420def run_scenario_hourly_profiles(scenario, epw_files, time_index): 

421 """ 

422 Run one solar archetype across all NIWA zones. 

423 """ 

424 scenario_name = scenario["Tech_TIMES"] 

425 wide = time_index.copy() 

426 long_rows = [] 

427 zone_results = [] 

428 

429 for zone in ZONE_ORDER: 

430 region = ZONE_CODE_TO_REGION[zone] 

431 island = ZONE_CODE_TO_ISLAND[zone] 

432 model = build_model(epw_files[zone], scenario) 

433 model.execute() 

434 generation = [float(value) for value in model.Outputs.gen] 

435 

436 if len(generation) != 8760: 

437 raise ValueError( 

438 f"Expected 8760 PVWatts outputs for {scenario_name}/{zone}, " 

439 f"got {len(generation)}" 

440 ) 

441 

442 wide[zone] = generation 

443 zone_results.append( 

444 { 

445 "Tech_TIMES": scenario_name, 

446 "ZoneCode": zone, 

447 "Region": region, 

448 "Island": island, 

449 "EPWFile": epw_files[zone].name, 

450 "AnnualEnergyKWhPerKWDC": float(sum(generation)), 

451 "CapacityFactorPercent": float(model.Outputs.capacity_factor), 

452 "KWhPerKW": float(model.Outputs.kwh_per_kw), 

453 } 

454 ) 

455 long_rows.extend( 

456 collect_zone_hourly_rows( 

457 scenario_name=scenario_name, 

458 zone=zone, 

459 time_index=time_index, 

460 generation=generation, 

461 ) 

462 ) 

463 

464 long_df = pd.DataFrame(long_rows) 

465 long_df = create_timeslices( 

466 long_df, 

467 date_col="WallClock_Date", 

468 hour_col="WallClock_Hour", 

469 ) 

470 return wide, long_df, zone_results 

471 

472 

473def save_scenario_outputs(scenario_name, scenario, wide, long_df, zone_results): 

474 """ 

475 Save scenario outputs and metadata. 

476 """ 

477 wide_path = HOURLY_DIR / f"{scenario_name}_hourly_by_zone.csv" 

478 long_path = HOURLY_DIR / f"{scenario_name}_hourly_long.csv" 

479 metadata_path = METADATA_DIR / f"{scenario_name}_run_metadata.json" 

480 

481 wide.to_csv(wide_path, index=False) 

482 long_df.to_csv(long_path, index=False) 

483 

484 with metadata_path.open("w", encoding="utf-8") as handle: 

485 json.dump( 

486 { 

487 "Tech_TIMES": scenario_name, 

488 "Settings": scenario, 

489 "ZoneResults": zone_results, 

490 "Outputs": { 

491 "HourlyByZoneCsv": str(wide_path), 

492 "HourlyLongCsv": str(long_path), 

493 }, 

494 }, 

495 handle, 

496 indent=2, 

497 default=_json_default, 

498 ) 

499 

500 

501def run_hourly_profiles(): 

502 """ 

503 Run PVWatts across all configured solar archetypes and NIWA zones. 

504 """ 

505 scenarios = load_solar_scenarios() 

506 epw_files = discover_epw_files(PREPARED_EPW_DIR) 

507 time_index = build_time_index(epw_files) 

508 

509 ensure_output_dir(HOURLY_DIR) 

510 ensure_output_dir(METADATA_DIR) 

511 

512 combined_rows = [] 

513 metadata_rows = [] 

514 

515 for scenario in scenarios: 

516 scenario_name = scenario["Tech_TIMES"] 

517 wide, long_df, zone_results = run_scenario_hourly_profiles( 

518 scenario=scenario, 

519 epw_files=epw_files, 

520 time_index=time_index, 

521 ) 

522 save_scenario_outputs( 

523 scenario_name=scenario_name, 

524 scenario=scenario, 

525 wide=wide, 

526 long_df=long_df, 

527 zone_results=zone_results, 

528 ) 

529 combined_rows.append(long_df) 

530 metadata_rows.extend(zone_results) 

531 

532 all_hourly = pd.concat(combined_rows, ignore_index=True) 

533 all_hourly.to_csv(HOURLY_DIR / "all_scenarios_hourly_long.csv", index=False) 

534 pd.DataFrame(metadata_rows).to_csv( 

535 METADATA_DIR / "all_scenarios_zone_summary.csv", index=False 

536 ) 

537 

538 

539if __name__ == "__main__": 

540 run_hourly_profiles()