Coverage for scripts/stage_3_scenarios/electricity/solar_run_hourly_profiles.py: 51%
218 statements
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-16 23:05 +0000
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-16 23:05 +0000
1"""
2Run PVWatts hourly solar profiles for the configured NIWA scenarios.
3"""
5from __future__ import annotations
7import csv
8import json
9import re
10from datetime import datetime, timedelta, timezone
11from typing import Any
12from zoneinfo import ZoneInfo
14import pandas as pd
15from prepare_times_nz.stage_0.stage_0_settings import BASE_YEAR
16from prepare_times_nz.utilities.filepaths import ASSUMPTIONS, STAGE_3_DATA
17from prepare_times_nz.utilities.timeslices import create_timeslices
19# pylint: disable=wrong-import-order
20from PySAM import Pvwattsv8
22SOLAR_SCENARIOS_FILE = (
23 ASSUMPTIONS / "electricity_generation/renewable_curves/SolarPvScenarios.csv"
24)
25EXCLUDED_SOLAR_TECHS = {"SolarFixed"}
26# note that these are techs where we have data but not any existing
27# techs.
28# We remove it so it doesn't build into wem_wcm and similar files from
29# the renewable curves
30# If we use this tech later just remove the filter and data ready to go
32OUTPUT_ROOT = STAGE_3_DATA / "electricity/solar_af"
33PREPARED_EPW_DIR = OUTPUT_ROOT / "prepared_epw"
34HOURLY_DIR = OUTPUT_ROOT / "hourly"
35METADATA_DIR = OUTPUT_ROOT / "metadata"
37EPW_FILENAME_PATTERN = re.compile(r"^TMY3_NZ_(?P<zone>[A-Z]{2})\.epw$")
39ZONE_CODE_TO_REGION = {
40 "AK": "Auckland",
41 "BP": "Bay of Plenty",
42 "CC": "Christchurch",
43 "DN": "Dunedin",
44 "EC": "East Coast",
45 "HN": "Hamilton",
46 "IN": "Invercargill",
47 "MW": "Manawatu",
48 "NL": "Northland",
49 "NM": "Nelson-Marlborough",
50 "NP": "New Plymouth",
51 "OC": "Central Otago",
52 "QL": "Queenstown-Lakes",
53 "RR": "Rotorua",
54 "TP": "Taupo",
55 "WC": "West Coast",
56 "WI": "Wairarapa",
57 "WN": "Wellington",
58}
60ZONE_CODE_TO_ISLAND = {
61 "AK": "NI",
62 "BP": "NI",
63 "CC": "SI",
64 "DN": "SI",
65 "EC": "NI",
66 "HN": "NI",
67 "IN": "SI",
68 "MW": "NI",
69 "NL": "NI",
70 "NM": "SI",
71 "NP": "NI",
72 "OC": "SI",
73 "QL": "SI",
74 "RR": "NI",
75 "TP": "NI",
76 "WC": "SI",
77 "WI": "NI",
78 "WN": "NI",
79}
81ZONE_ORDER = [
82 "NL",
83 "AK",
84 "HN",
85 "BP",
86 "RR",
87 "TP",
88 "NP",
89 "EC",
90 "MW",
91 "WI",
92 "WN",
93 "NM",
94 "WC",
95 "CC",
96 "QL",
97 "OC",
98 "DN",
99 "IN",
100]
102ARRAY_TYPE_MAP = {
103 "fixed_open_rack": 0,
104 "fixed_roof_mount": 1,
105 "single_axis_tracking": 2,
106 "single_axis_backtracking": 3,
107 "two_axis_tracking": 4,
108}
110MODULE_TYPE_MAP = {
111 "standard": 0,
112 "premium": 1,
113 "thin_film": 2,
114}
116EPW_STANDARD_TZ = timezone(timedelta(hours=12), name="NZST_FIXED")
117NZ_WALLCLOCK_TZ = ZoneInfo("Pacific/Auckland")
120def ensure_output_dir(path):
121 """
122 Create an output directory with predictable permissions.
123 """
124 path.mkdir(parents=True, exist_ok=True)
125 path.chmod(0o755)
126 return path
129def read_epw_rows(epw_path):
130 """
131 Read an EPW file using the encodings present in the NIWA datasets.
132 """
133 last_error = None
134 for encoding in ("utf-8-sig", "latin-1"):
135 try:
136 with epw_path.open("r", encoding=encoding, newline="") as handle:
137 return list(csv.reader(handle))
138 except UnicodeDecodeError as exc:
139 last_error = exc
141 if last_error is not None:
142 raise last_error
144 raise ValueError(f"Could not read EPW rows from {epw_path}")
147def parse_epw_time_index(epw_path):
148 """
149 Read the year/month/day/hour/minute tuple for each EPW data row.
150 """
151 rows = read_epw_rows(epw_path)[8:]
152 time_index = []
153 for row_num, row in enumerate(rows, start=9):
154 if len(row) < 5:
155 raise ValueError(
156 f"EPW data row {row_num} has fewer than 5 columns in {epw_path}"
157 )
158 hour = int(row[3])
159 minute = int(row[4])
160 if minute not in {0, 60}:
161 raise ValueError(
162 f"Unsupported EPW minute value {minute} at row {row_num} in {epw_path}. "
163 "The workflow expects NIWA EPW minute values of 0 or 60."
164 )
165 if not 1 <= hour <= 24:
166 raise ValueError(
167 f"Unsupported EPW hour value {hour} at row {row_num} in {epw_path}. "
168 "The workflow expects EPW-standard 01..24 hour fields."
169 )
171 time_index.append((int(row[0]), int(row[1]), int(row[2]), hour, minute))
173 if len(time_index) != 8760:
174 raise ValueError(
175 f"Expected 8760 EPW rows in {epw_path}, found {len(time_index)}"
176 )
178 return time_index
181def parse_epw_data_period_metadata(epw_path) -> dict[str, Any]:
182 """
183 Read the shared TMY calendar metadata from the EPW header.
184 """
185 rows = read_epw_rows(epw_path)
186 if len(rows) < 8:
187 raise ValueError(f"Expected at least 8 EPW header rows in {epw_path}")
189 data_periods = rows[7]
190 if len(data_periods) < 7 or data_periods[0] != "DATA PERIODS":
191 raise ValueError(f"Malformed DATA PERIODS header row in {epw_path}")
193 metadata = {
194 "calendar_label": data_periods[3].strip(),
195 "start_weekday": data_periods[4].strip(),
196 }
198 # NIWA EPW headers may use either a numeric day count in column 7 or
199 # MBIE TMY3-style start/end dates in columns 6 and 7.
200 try:
201 metadata["days_in_year"] = int(data_periods[6])
202 except ValueError:
203 metadata["start_date"] = data_periods[5].strip()
204 metadata["end_date"] = data_periods[6].strip()
205 metadata["days_in_year"] = 365
207 return metadata
210def validate_base_year_calendar():
211 """
212 Fail loudly until leap-year handling is implemented for solar timeslices.
213 """
214 if pd.Timestamp(year=BASE_YEAR, month=1, day=1).is_leap_year:
215 raise ValueError(
216 "Solar availability-factor workflow currently assumes a 365-day "
217 f"calendar, but BASE_YEAR {BASE_YEAR} is a leap year. Revisit leap-year "
218 "handling before regenerating solar availability factors."
219 )
222def validate_epw_calendar_assumptions(epw_path):
223 """
224 Fail loudly if an EPW file implies leap-year calendar handling.
225 """
226 metadata = parse_epw_data_period_metadata(epw_path)
227 if metadata["days_in_year"] != 365:
228 raise ValueError(
229 "Solar availability-factor workflow currently assumes a 365-day "
230 f"calendar, but {epw_path.name} declares {metadata['days_in_year']} "
231 "days in its DATA PERIODS header. Revisit leap-year handling before "
232 "regenerating solar availability factors."
233 )
236def get_canonical_epw_index(epw_path):
237 """
238 Return the shared month/day/hour/minute sequence for one EPW file.
239 """
240 raw_index = parse_epw_time_index(epw_path)
241 validate_epw_calendar_assumptions(epw_path)
242 return [(month, day, hour, minute) for _, month, day, hour, minute in raw_index]
245def convert_epw_standard_time_to_wallclock(month: int, day: int, hour: int) -> datetime:
246 """
247 Convert a fixed NZST EPW interval-start hour to NZ wall-clock time.
248 """
249 standard_time = datetime(
250 BASE_YEAR,
251 month,
252 day,
253 hour,
254 tzinfo=EPW_STANDARD_TZ,
255 )
256 return standard_time.astimezone(NZ_WALLCLOCK_TZ)
259def format_time_index_rows(canonical_index):
260 """
261 Build hourly rows using the configured model base year calendar.
262 """
263 rows = []
264 for hour_index, (month, day, hour, minute) in enumerate(canonical_index, start=1):
265 interval_start_hour = (hour - 1) % 24
266 trading_date = pd.Timestamp(year=BASE_YEAR, month=month, day=day)
267 wall_clock = convert_epw_standard_time_to_wallclock(
268 month=month,
269 day=day,
270 hour=interval_start_hour,
271 )
272 rows.append(
273 {
274 "hour_of_year": hour_index,
275 "Trading_Date": trading_date,
276 "Year": BASE_YEAR,
277 "Month": month,
278 "Day": day,
279 "EPWHour": hour,
280 "Hour": interval_start_hour,
281 "Minute": minute,
282 "WallClock_DateTime": pd.Timestamp(wall_clock),
283 "WallClock_Date": pd.Timestamp(wall_clock.date()),
284 "WallClock_Year": wall_clock.year,
285 "WallClock_Month": wall_clock.month,
286 "WallClock_Day": wall_clock.day,
287 "WallClock_Hour": wall_clock.hour,
288 "WallClock_UtcOffsetHours": (
289 wall_clock.utcoffset().total_seconds() / 3600
290 ),
291 }
292 )
293 return rows
296def load_solar_scenarios() -> list[dict[str, Any]]:
297 """
298 Load and normalize the configured solar archetype parameters.
299 """
300 df = pd.read_csv(SOLAR_SCENARIOS_FILE)
301 df = df[~df["Tech_TIMES"].isin(EXCLUDED_SOLAR_TECHS)].copy()
302 scenarios = []
304 for row in df.to_dict(orient="records"):
305 scenario = dict(row)
306 scenario["ArrayTypeCode"] = ARRAY_TYPE_MAP[scenario["ArrayType"]]
307 scenario["ModuleTypeCode"] = MODULE_TYPE_MAP[scenario["ModuleType"]]
308 scenario["UseWeatherFileAlbedo"] = str(
309 scenario["UseWeatherFileAlbedo"]
310 ).lower() in {
311 "1",
312 "true",
313 "yes",
314 }
315 scenarios.append(scenario)
317 return scenarios
320def discover_epw_files(epw_dir):
321 """
322 Find one prepared EPW file for each expected NIWA climate zone.
323 """
324 discovered = {}
325 unexpected = []
326 for path in sorted(epw_dir.glob("*.epw")):
327 match = EPW_FILENAME_PATTERN.match(path.name)
328 if not match:
329 unexpected.append(path.name)
330 continue
332 zone = match.group("zone")
333 if zone in discovered:
334 raise ValueError(
335 f"Duplicate EPW files found for zone {zone}: "
336 f"{discovered[zone].name} and {path.name}"
337 )
338 discovered[zone] = path
340 if unexpected:
341 raise ValueError(
342 "Unsupported EPW filenames in prepared bundle: " f"{', '.join(unexpected)}"
343 )
345 missing = [zone for zone in ZONE_ORDER if zone not in discovered]
346 if missing:
347 raise FileNotFoundError(f"Missing EPW files for zones: {', '.join(missing)}")
349 return discovered
352def build_time_index(epw_files):
353 """
354 Build the shared hourly index using the model base-year calendar.
356 All NIWA files should agree on month/day/hour/minute ordering. The EPW row
357 year and header calendar metadata are ignored for timeslice construction.
358 """
359 validate_base_year_calendar()
361 canonical = None
362 canonical_zone = None
363 for zone in ZONE_ORDER:
364 index = get_canonical_epw_index(epw_files[zone])
365 if canonical is None:
366 canonical = index
367 canonical_zone = zone
368 elif canonical != index:
369 raise ValueError(f"Time index mismatch between {canonical_zone} and {zone}")
371 return pd.DataFrame(format_time_index_rows(canonical))
374def build_model(epw_path, scenario):
375 """
376 Build a PVWatts model for one scenario-zone pair.
377 """
378 model = Pvwattsv8.default("PVWattsNone") # pylint: disable=c-extension-no-member
379 model.SolarResource.solar_resource_file = str(epw_path.resolve())
380 model.Lifetime.system_use_lifetime_output = 0
382 model.SystemDesign.system_capacity = float(scenario["SystemCapacityKW"])
383 model.SystemDesign.array_type = int(scenario["ArrayTypeCode"])
384 model.SystemDesign.tilt = float(scenario["TiltDeg"])
385 model.SystemDesign.azimuth = float(scenario["AzimuthDeg"])
386 model.SystemDesign.module_type = int(scenario["ModuleTypeCode"])
387 model.SystemDesign.dc_ac_ratio = float(scenario["DcAcRatio"])
388 model.SystemDesign.inv_eff = float(scenario["InvEffPercent"])
389 model.SystemDesign.losses = float(scenario["LossesPercent"])
390 model.SystemDesign.bifaciality = float(scenario["Bifaciality"])
391 model.SystemDesign.gcr = float(scenario["Gcr"])
392 model.SystemDesign.en_snowloss = 0
394 use_weather_file_albedo = 1.0 if scenario["UseWeatherFileAlbedo"] else 0.0
395 model.SolarResource.use_wf_albedo = use_weather_file_albedo
396 if not use_weather_file_albedo:
397 albedo = float(scenario["Albedo"])
398 model.SolarResource.albedo = tuple(albedo for _ in range(12))
400 model.Shading.shading_en_azal = 0
401 model.Shading.shading_en_diff = 0
402 model.Shading.shading_en_mxh = 0
403 model.Shading.shading_en_string_option = 0
404 model.Shading.shading_en_timestep = 0
406 return model
409def _json_default(value: Any):
410 """
411 Convert pandas / numpy scalar values to plain Python types for JSON output.
412 """
413 if hasattr(value, "item"):
414 return value.item()
415 raise TypeError(f"Object of type {type(value).__name__} is not JSON serializable")
418def collect_zone_hourly_rows(scenario_name, zone, time_index, generation):
419 """
420 Collect long-format hourly rows for a single zone.
421 """
422 region = ZONE_CODE_TO_REGION[zone]
423 island = ZONE_CODE_TO_ISLAND[zone]
425 rows = []
426 for time_row, generation_kw_per_kw in zip(
427 time_index.itertuples(index=False), generation
428 ):
429 rows.append(
430 {
431 "Scenario": scenario_name,
432 "Tech_TIMES": scenario_name,
433 "ZoneCode": zone,
434 "Region": region,
435 "Island": island,
436 "Trading_Date": time_row.Trading_Date,
437 "Year": time_row.Year,
438 "Month": time_row.Month,
439 "Day": time_row.Day,
440 "Hour": time_row.Hour,
441 "EPWHour": time_row.EPWHour,
442 "Minute": time_row.Minute,
443 "WallClock_DateTime": time_row.WallClock_DateTime,
444 "WallClock_Date": time_row.WallClock_Date,
445 "WallClock_Year": time_row.WallClock_Year,
446 "WallClock_Month": time_row.WallClock_Month,
447 "WallClock_Day": time_row.WallClock_Day,
448 "WallClock_Hour": time_row.WallClock_Hour,
449 "WallClock_UtcOffsetHours": time_row.WallClock_UtcOffsetHours,
450 "generation_kw_per_kw": generation_kw_per_kw,
451 }
452 )
454 return rows
457def run_scenario_hourly_profiles(scenario, epw_files, time_index):
458 """
459 Run one solar archetype across all NIWA zones.
460 """
461 scenario_name = scenario["Tech_TIMES"]
462 wide = time_index.copy()
463 long_rows = []
464 zone_results = []
466 for zone in ZONE_ORDER:
467 region = ZONE_CODE_TO_REGION[zone]
468 island = ZONE_CODE_TO_ISLAND[zone]
469 model = build_model(epw_files[zone], scenario)
470 model.execute()
471 generation = [float(value) for value in model.Outputs.gen]
473 if len(generation) != 8760:
474 raise ValueError(
475 f"Expected 8760 PVWatts outputs for {scenario_name}/{zone}, "
476 f"got {len(generation)}"
477 )
479 wide[zone] = generation
480 zone_results.append(
481 {
482 "Tech_TIMES": scenario_name,
483 "ZoneCode": zone,
484 "Region": region,
485 "Island": island,
486 "EPWFile": epw_files[zone].name,
487 "AnnualEnergyKWhPerKWDC": float(sum(generation)),
488 "CapacityFactorPercent": float(model.Outputs.capacity_factor),
489 "KWhPerKW": float(model.Outputs.kwh_per_kw),
490 }
491 )
492 long_rows.extend(
493 collect_zone_hourly_rows(
494 scenario_name=scenario_name,
495 zone=zone,
496 time_index=time_index,
497 generation=generation,
498 )
499 )
501 long_df = pd.DataFrame(long_rows)
502 long_df = create_timeslices(
503 long_df,
504 date_col="WallClock_Date",
505 hour_col="WallClock_Hour",
506 )
507 return wide, long_df, zone_results
510def save_scenario_outputs(scenario_name, scenario, wide, long_df, zone_results):
511 """
512 Save scenario outputs and metadata.
513 """
514 wide_path = HOURLY_DIR / f"{scenario_name}_hourly_by_zone.csv"
515 long_path = HOURLY_DIR / f"{scenario_name}_hourly_long.csv"
516 metadata_path = METADATA_DIR / f"{scenario_name}_run_metadata.json"
518 wide.to_csv(wide_path, index=False)
519 long_df.to_csv(long_path, index=False)
521 with metadata_path.open("w", encoding="utf-8") as handle:
522 json.dump(
523 {
524 "Tech_TIMES": scenario_name,
525 "Settings": scenario,
526 "ZoneResults": zone_results,
527 "Outputs": {
528 "HourlyByZoneCsv": str(wide_path),
529 "HourlyLongCsv": str(long_path),
530 },
531 },
532 handle,
533 indent=2,
534 default=_json_default,
535 )
538def run_hourly_profiles():
539 """
540 Run PVWatts across all configured solar archetypes and NIWA zones.
541 """
542 scenarios = load_solar_scenarios()
543 epw_files = discover_epw_files(PREPARED_EPW_DIR)
544 time_index = build_time_index(epw_files)
546 ensure_output_dir(HOURLY_DIR)
547 ensure_output_dir(METADATA_DIR)
549 combined_rows = []
550 metadata_rows = []
552 for scenario in scenarios:
553 scenario_name = scenario["Tech_TIMES"]
554 wide, long_df, zone_results = run_scenario_hourly_profiles(
555 scenario=scenario,
556 epw_files=epw_files,
557 time_index=time_index,
558 )
559 save_scenario_outputs(
560 scenario_name=scenario_name,
561 scenario=scenario,
562 wide=wide,
563 long_df=long_df,
564 zone_results=zone_results,
565 )
566 combined_rows.append(long_df)
567 metadata_rows.extend(zone_results)
569 all_hourly = pd.concat(combined_rows, ignore_index=True)
570 all_hourly.to_csv(HOURLY_DIR / "all_scenarios_hourly_long.csv", index=False)
571 pd.DataFrame(metadata_rows).to_csv(
572 METADATA_DIR / "all_scenarios_zone_summary.csv", index=False
573 )
576if __name__ == "__main__":
577 run_hourly_profiles()