Coverage for scripts/stage_3_scenarios/electricity/solar_run_hourly

1"""

2Run PVWatts hourly solar profiles for the configured NIWA scenarios.

3"""

5from __future__ import annotations

7import csv

8import json

9import re

10from datetime import datetime, timedelta, timezone

11from typing import Any

12from zoneinfo import ZoneInfo

14import pandas as pd

15from prepare_times_nz.stage_0.stage_0_settings import BASE_YEAR

16from prepare_times_nz.utilities.filepaths import ASSUMPTIONS, STAGE_3_DATA

17from prepare_times_nz.utilities.timeslices import create_timeslices

19# pylint: disable=wrong-import-order

20from PySAM import Pvwattsv8

22SOLAR_SCENARIOS_FILE = (

23 ASSUMPTIONS / "electricity_generation/renewable_curves/SolarPvScenarios.csv"

24)

25EXCLUDED_SOLAR_TECHS = {"SolarFixed"}

26# note that these are techs where we have data but not any existing

27# techs.

28# We remove it so it doesn't build into wem_wcm and similar files from

29# the renewable curves

30# If we use this tech later just remove the filter and data ready to go

32OUTPUT_ROOT = STAGE_3_DATA / "electricity/solar_af"

33PREPARED_EPW_DIR = OUTPUT_ROOT / "prepared_epw"

34HOURLY_DIR = OUTPUT_ROOT / "hourly"

35METADATA_DIR = OUTPUT_ROOT / "metadata"

37EPW_FILENAME_PATTERN = re.compile(r"^TMY3_NZ_(?P<zone>[A-Z]{2})\.epw$")

39ZONE_CODE_TO_REGION = {

40 "AK": "Auckland",

41 "BP": "Bay of Plenty",

42 "CC": "Christchurch",

43 "DN": "Dunedin",

44 "EC": "East Coast",

45 "HN": "Hamilton",

46 "IN": "Invercargill",

47 "MW": "Manawatu",

48 "NL": "Northland",

49 "NM": "Nelson-Marlborough",

50 "NP": "New Plymouth",

51 "OC": "Central Otago",

52 "QL": "Queenstown-Lakes",

53 "RR": "Rotorua",

54 "TP": "Taupo",

55 "WC": "West Coast",

56 "WI": "Wairarapa",

57 "WN": "Wellington",

58}

60ZONE_CODE_TO_ISLAND = {

61 "AK": "NI",

62 "BP": "NI",

63 "CC": "SI",

64 "DN": "SI",

65 "EC": "NI",

66 "HN": "NI",

67 "IN": "SI",

68 "MW": "NI",

69 "NL": "NI",

70 "NM": "SI",

71 "NP": "NI",

72 "OC": "SI",

73 "QL": "SI",

74 "RR": "NI",

75 "TP": "NI",

76 "WC": "SI",

77 "WI": "NI",

78 "WN": "NI",

79}

81ZONE_ORDER = [

82 "NL",

83 "AK",

84 "HN",

85 "BP",

86 "RR",

87 "TP",

88 "NP",

89 "EC",

90 "MW",

91 "WI",

92 "WN",

93 "NM",

94 "WC",

95 "CC",

96 "QL",

97 "OC",

98 "DN",

99 "IN",

100]

101

102ARRAY_TYPE_MAP = {

103 "fixed_open_rack": 0,

104 "fixed_roof_mount": 1,

105 "single_axis_tracking": 2,

106 "single_axis_backtracking": 3,

107 "two_axis_tracking": 4,

108}

109

110MODULE_TYPE_MAP = {

111 "standard": 0,

112 "premium": 1,

113 "thin_film": 2,

114}

115

116EPW_STANDARD_TZ = timezone(timedelta(hours=12), name="NZST_FIXED")

117NZ_WALLCLOCK_TZ = ZoneInfo("Pacific/Auckland")

118

119

120def ensure_output_dir(path):

121 """

122 Create an output directory with predictable permissions.

123 """

124 path.mkdir(parents=True, exist_ok=True)

125 path.chmod(0o755)

126 return path

127

128

129def read_epw_rows(epw_path):

130 """

131 Read an EPW file using the encodings present in the NIWA datasets.

132 """

133 last_error = None

134 for encoding in ("utf-8-sig", "latin-1"):

135 try:

136 with epw_path.open("r", encoding=encoding, newline="") as handle:

137 return list(csv.reader(handle))

138 except UnicodeDecodeError as exc:

139 last_error = exc

140

141 if last_error is not None:

142 raise last_error

143

144 raise ValueError(f"Could not read EPW rows from {epw_path}")

145

146

147def parse_epw_time_index(epw_path):

148 """

149 Read the year/month/day/hour/minute tuple for each EPW data row.

150 """

151 rows = read_epw_rows(epw_path)[8:]

152 time_index = []

153 for row_num, row in enumerate(rows, start=9):

154 if len(row) < 5:

155 raise ValueError(

156 f"EPW data row {row_num} has fewer than 5 columns in {epw_path}"

157 )

158 hour = int(row[3])

159 minute = int(row[4])

160 if minute not in {0, 60}:

161 raise ValueError(

162 f"Unsupported EPW minute value {minute} at row {row_num} in {epw_path}. "

163 "The workflow expects NIWA EPW minute values of 0 or 60."

164 )

165 if not 1 <= hour <= 24:

166 raise ValueError(

167 f"Unsupported EPW hour value {hour} at row {row_num} in {epw_path}. "

168 "The workflow expects EPW-standard 01..24 hour fields."

169 )

170

171 time_index.append((int(row[0]), int(row[1]), int(row[2]), hour, minute))

172

173 if len(time_index) != 8760:

174 raise ValueError(

175 f"Expected 8760 EPW rows in {epw_path}, found {len(time_index)}"

176 )

177

178 return time_index

179

180

181def parse_epw_data_period_metadata(epw_path) -> dict[str, Any]:

182 """

183 Read the shared TMY calendar metadata from the EPW header.

184 """

185 rows = read_epw_rows(epw_path)

186 if len(rows) < 8:

187 raise ValueError(f"Expected at least 8 EPW header rows in {epw_path}")

188

189 data_periods = rows[7]

190 if len(data_periods) < 7 or data_periods[0] != "DATA PERIODS":

191 raise ValueError(f"Malformed DATA PERIODS header row in {epw_path}")

192

193 metadata = {

194 "calendar_label": data_periods[3].strip(),

195 "start_weekday": data_periods[4].strip(),

196 }

197

198 # NIWA EPW headers may use either a numeric day count in column 7 or

199 # MBIE TMY3-style start/end dates in columns 6 and 7.

200 try:

201 metadata["days_in_year"] = int(data_periods[6])

202 except ValueError:

203 metadata["start_date"] = data_periods[5].strip()

204 metadata["end_date"] = data_periods[6].strip()

205 metadata["days_in_year"] = 365

206

207 return metadata

208

209

210def validate_base_year_calendar():

211 """

212 Fail loudly until leap-year handling is implemented for solar timeslices.

213 """

214 if pd.Timestamp(year=BASE_YEAR, month=1, day=1).is_leap_year:

215 raise ValueError(

216 "Solar availability-factor workflow currently assumes a 365-day "

217 f"calendar, but BASE_YEAR {BASE_YEAR} is a leap year. Revisit leap-year "

218 "handling before regenerating solar availability factors."

219 )

220

221

222def validate_epw_calendar_assumptions(epw_path):

223 """

224 Fail loudly if an EPW file implies leap-year calendar handling.

225 """

226 metadata = parse_epw_data_period_metadata(epw_path)

227 if metadata["days_in_year"] != 365:

228 raise ValueError(

229 "Solar availability-factor workflow currently assumes a 365-day "

230 f"calendar, but {epw_path.name} declares {metadata['days_in_year']} "

231 "days in its DATA PERIODS header. Revisit leap-year handling before "

232 "regenerating solar availability factors."

233 )

234

235

236def get_canonical_epw_index(epw_path):

237 """

238 Return the shared month/day/hour/minute sequence for one EPW file.

239 """

240 raw_index = parse_epw_time_index(epw_path)

241 validate_epw_calendar_assumptions(epw_path)

242 return [(month, day, hour, minute) for _, month, day, hour, minute in raw_index]

243

244

245def convert_epw_standard_time_to_wallclock(month: int, day: int, hour: int) -> datetime:

246 """

247 Convert a fixed NZST EPW interval-start hour to NZ wall-clock time.

248 """

249 standard_time = datetime(

250 BASE_YEAR,

251 month,

252 day,

253 hour,

254 tzinfo=EPW_STANDARD_TZ,

255 )

256 return standard_time.astimezone(NZ_WALLCLOCK_TZ)

257

258

259def format_time_index_rows(canonical_index):

260 """

261 Build hourly rows using the configured model base year calendar.

262 """

263 rows = []

264 for hour_index, (month, day, hour, minute) in enumerate(canonical_index, start=1):

265 interval_start_hour = (hour - 1) % 24

266 trading_date = pd.Timestamp(year=BASE_YEAR, month=month, day=day)

267 wall_clock = convert_epw_standard_time_to_wallclock(

268 month=month,

269 day=day,

270 hour=interval_start_hour,

271 )

272 rows.append(

273 {

274 "hour_of_year": hour_index,

275 "Trading_Date": trading_date,

276 "Year": BASE_YEAR,

277 "Month": month,

278 "Day": day,

279 "EPWHour": hour,

280 "Hour": interval_start_hour,

281 "Minute": minute,

282 "WallClock_DateTime": pd.Timestamp(wall_clock),

283 "WallClock_Date": pd.Timestamp(wall_clock.date()),

284 "WallClock_Year": wall_clock.year,

285 "WallClock_Month": wall_clock.month,

286 "WallClock_Day": wall_clock.day,

287 "WallClock_Hour": wall_clock.hour,

288 "WallClock_UtcOffsetHours": (

289 wall_clock.utcoffset().total_seconds() / 3600

290 ),

291 }

292 )

293 return rows

294

295

296def load_solar_scenarios() -> list[dict[str, Any]]:

297 """

298 Load and normalize the configured solar archetype parameters.

299 """

300 df = pd.read_csv(SOLAR_SCENARIOS_FILE)

301 df = df[~df["Tech_TIMES"].isin(EXCLUDED_SOLAR_TECHS)].copy()

302 scenarios = []

303

304 for row in df.to_dict(orient="records"):

305 scenario = dict(row)

306 scenario["ArrayTypeCode"] = ARRAY_TYPE_MAP[scenario["ArrayType"]]

307 scenario["ModuleTypeCode"] = MODULE_TYPE_MAP[scenario["ModuleType"]]

308 scenario["UseWeatherFileAlbedo"] = str(

309 scenario["UseWeatherFileAlbedo"]

310 ).lower() in {

311 "1",

312 "true",

313 "yes",

314 }

315 scenarios.append(scenario)

316

317 return scenarios

318

319

320def discover_epw_files(epw_dir):

321 """

322 Find one prepared EPW file for each expected NIWA climate zone.

323 """

324 discovered = {}

325 unexpected = []

326 for path in sorted(epw_dir.glob("*.epw")):

327 match = EPW_FILENAME_PATTERN.match(path.name)

328 if not match:

329 unexpected.append(path.name)

330 continue

331

332 zone = match.group("zone")

333 if zone in discovered:

334 raise ValueError(

335 f"Duplicate EPW files found for zone {zone}: "

336 f"{discovered[zone].name} and {path.name}"

337 )

338 discovered[zone] = path

339

340 if unexpected:

341 raise ValueError(

342 "Unsupported EPW filenames in prepared bundle: " f"{', '.join(unexpected)}"

343 )

344

345 missing = [zone for zone in ZONE_ORDER if zone not in discovered]

346 if missing:

347 raise FileNotFoundError(f"Missing EPW files for zones: {', '.join(missing)}")

348

349 return discovered

350

351

352def build_time_index(epw_files):

353 """

354 Build the shared hourly index using the model base-year calendar.

355

356 All NIWA files should agree on month/day/hour/minute ordering. The EPW row

357 year and header calendar metadata are ignored for timeslice construction.

358 """

359 validate_base_year_calendar()

360

361 canonical = None

362 canonical_zone = None

363 for zone in ZONE_ORDER:

364 index = get_canonical_epw_index(epw_files[zone])

365 if canonical is None:

366 canonical = index

367 canonical_zone = zone

368 elif canonical != index:

369 raise ValueError(f"Time index mismatch between {canonical_zone} and {zone}")

370

371 return pd.DataFrame(format_time_index_rows(canonical))

372

373

374def build_model(epw_path, scenario):

375 """

376 Build a PVWatts model for one scenario-zone pair.

377 """

378 model = Pvwattsv8.default("PVWattsNone") # pylint: disable=c-extension-no-member

379 model.SolarResource.solar_resource_file = str(epw_path.resolve())

380 model.Lifetime.system_use_lifetime_output = 0

381

382 model.SystemDesign.system_capacity = float(scenario["SystemCapacityKW"])

383 model.SystemDesign.array_type = int(scenario["ArrayTypeCode"])

384 model.SystemDesign.tilt = float(scenario["TiltDeg"])

385 model.SystemDesign.azimuth = float(scenario["AzimuthDeg"])

386 model.SystemDesign.module_type = int(scenario["ModuleTypeCode"])

387 model.SystemDesign.dc_ac_ratio = float(scenario["DcAcRatio"])

388 model.SystemDesign.inv_eff = float(scenario["InvEffPercent"])

389 model.SystemDesign.losses = float(scenario["LossesPercent"])

390 model.SystemDesign.bifaciality = float(scenario["Bifaciality"])

391 model.SystemDesign.gcr = float(scenario["Gcr"])

392 model.SystemDesign.en_snowloss = 0

393

394 use_weather_file_albedo = 1.0 if scenario["UseWeatherFileAlbedo"] else 0.0

395 model.SolarResource.use_wf_albedo = use_weather_file_albedo

396 if not use_weather_file_albedo:

397 albedo = float(scenario["Albedo"])

398 model.SolarResource.albedo = tuple(albedo for _ in range(12))

399

400 model.Shading.shading_en_azal = 0

401 model.Shading.shading_en_diff = 0

402 model.Shading.shading_en_mxh = 0

403 model.Shading.shading_en_string_option = 0

404 model.Shading.shading_en_timestep = 0

405

406 return model

407

408

409def _json_default(value: Any):

410 """

411 Convert pandas / numpy scalar values to plain Python types for JSON output.

412 """

413 if hasattr(value, "item"):

414 return value.item()

415 raise TypeError(f"Object of type {type(value).__name__} is not JSON serializable")

416

417

418def collect_zone_hourly_rows(scenario_name, zone, time_index, generation):

419 """

420 Collect long-format hourly rows for a single zone.

421 """

422 region = ZONE_CODE_TO_REGION[zone]

423 island = ZONE_CODE_TO_ISLAND[zone]

424

425 rows = []

426 for time_row, generation_kw_per_kw in zip(

427 time_index.itertuples(index=False), generation

428 ):

429 rows.append(

430 {

431 "Scenario": scenario_name,

432 "Tech_TIMES": scenario_name,

433 "ZoneCode": zone,

434 "Region": region,

435 "Island": island,

436 "Trading_Date": time_row.Trading_Date,

437 "Year": time_row.Year,

438 "Month": time_row.Month,

439 "Day": time_row.Day,

440 "Hour": time_row.Hour,

441 "EPWHour": time_row.EPWHour,

442 "Minute": time_row.Minute,

443 "WallClock_DateTime": time_row.WallClock_DateTime,

444 "WallClock_Date": time_row.WallClock_Date,

445 "WallClock_Year": time_row.WallClock_Year,

446 "WallClock_Month": time_row.WallClock_Month,

447 "WallClock_Day": time_row.WallClock_Day,

448 "WallClock_Hour": time_row.WallClock_Hour,

449 "WallClock_UtcOffsetHours": time_row.WallClock_UtcOffsetHours,

450 "generation_kw_per_kw": generation_kw_per_kw,

451 }

452 )

453

454 return rows

455

456

457def run_scenario_hourly_profiles(scenario, epw_files, time_index):

458 """

459 Run one solar archetype across all NIWA zones.

460 """

461 scenario_name = scenario["Tech_TIMES"]

462 wide = time_index.copy()

463 long_rows = []

464 zone_results = []

465

466 for zone in ZONE_ORDER:

467 region = ZONE_CODE_TO_REGION[zone]

468 island = ZONE_CODE_TO_ISLAND[zone]

469 model = build_model(epw_files[zone], scenario)

470 model.execute()

471 generation = [float(value) for value in model.Outputs.gen]

472

473 if len(generation) != 8760:

474 raise ValueError(

475 f"Expected 8760 PVWatts outputs for {scenario_name}/{zone}, "

476 f"got {len(generation)}"

477 )

478

479 wide[zone] = generation

480 zone_results.append(

481 {

482 "Tech_TIMES": scenario_name,

483 "ZoneCode": zone,

484 "Region": region,

485 "Island": island,

486 "EPWFile": epw_files[zone].name,

487 "AnnualEnergyKWhPerKWDC": float(sum(generation)),

488 "CapacityFactorPercent": float(model.Outputs.capacity_factor),

489 "KWhPerKW": float(model.Outputs.kwh_per_kw),

490 }

491 )

492 long_rows.extend(

493 collect_zone_hourly_rows(

494 scenario_name=scenario_name,

495 zone=zone,

496 time_index=time_index,

497 generation=generation,

498 )

499 )

500

501 long_df = pd.DataFrame(long_rows)

502 long_df = create_timeslices(

503 long_df,

504 date_col="WallClock_Date",

505 hour_col="WallClock_Hour",

506 )

507 return wide, long_df, zone_results

508

509

510def save_scenario_outputs(scenario_name, scenario, wide, long_df, zone_results):

511 """

512 Save scenario outputs and metadata.

513 """

514 wide_path = HOURLY_DIR / f"{scenario_name}_hourly_by_zone.csv"

515 long_path = HOURLY_DIR / f"{scenario_name}_hourly_long.csv"

516 metadata_path = METADATA_DIR / f"{scenario_name}_run_metadata.json"

517

518 wide.to_csv(wide_path, index=False)

519 long_df.to_csv(long_path, index=False)

520

521 with metadata_path.open("w", encoding="utf-8") as handle:

522 json.dump(

523 {

524 "Tech_TIMES": scenario_name,

525 "Settings": scenario,

526 "ZoneResults": zone_results,

527 "Outputs": {

528 "HourlyByZoneCsv": str(wide_path),

529 "HourlyLongCsv": str(long_path),

530 },

531 },

532 handle,

533 indent=2,

534 default=_json_default,

535 )

536

537

538def run_hourly_profiles():

539 """

540 Run PVWatts across all configured solar archetypes and NIWA zones.

541 """

542 scenarios = load_solar_scenarios()

543 epw_files = discover_epw_files(PREPARED_EPW_DIR)

544 time_index = build_time_index(epw_files)

545

546 ensure_output_dir(HOURLY_DIR)

547 ensure_output_dir(METADATA_DIR)

548

549 combined_rows = []

550 metadata_rows = []

551

552 for scenario in scenarios:

553 scenario_name = scenario["Tech_TIMES"]

554 wide, long_df, zone_results = run_scenario_hourly_profiles(

555 scenario=scenario,

556 epw_files=epw_files,

557 time_index=time_index,

558 )

559 save_scenario_outputs(

560 scenario_name=scenario_name,

561 scenario=scenario,

562 wide=wide,

563 long_df=long_df,

564 zone_results=zone_results,

565 )

566 combined_rows.append(long_df)

567 metadata_rows.extend(zone_results)

568

569 all_hourly = pd.concat(combined_rows, ignore_index=True)

570 all_hourly.to_csv(HOURLY_DIR / "all_scenarios_hourly_long.csv", index=False)

571 pd.DataFrame(metadata_rows).to_csv(

572 METADATA_DIR / "all_scenarios_zone_summary.csv", index=False

573 )

574

575

576if __name__ == "__main__":

577 run_hourly_profiles()

Coverage for scripts/stage_3_scenarios/electricity/solar_run_hourly_profiles.py: 51%

218 statements