Coverage for scripts / stage_3_scenarios / electricity / solar_run_hourly_profiles.py: 44%
196 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-01 22:14 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-01 22:14 +0000
1"""
2Run PVWatts hourly solar profiles for the configured NIWA scenarios.
3"""
5from __future__ import annotations
7import csv
8import json
9from datetime import datetime, timedelta, timezone
10from typing import Any
11from zoneinfo import ZoneInfo
13import pandas as pd
14from prepare_times_nz.stage_0.stage_0_settings import BASE_YEAR
15from prepare_times_nz.utilities.filepaths import ASSUMPTIONS, STAGE_3_DATA
16from prepare_times_nz.utilities.timeslices import create_timeslices
18# pylint: disable=wrong-import-order
19from PySAM import Pvwattsv8
21SOLAR_SCENARIOS_FILE = (
22 ASSUMPTIONS / "electricity_generation/renewable_curves/SolarPvScenarios.csv"
23)
24EXCLUDED_SOLAR_TECHS = {"SolarFixed"}
25# note that these are techs where we have data but not any existing
26# techs.
27# We remove it so it doesn't build into wem_wcm and similar files from
28# the renewable curves
29# If we use this tech later just remove the filter and data ready to go
31OUTPUT_ROOT = STAGE_3_DATA / "electricity/solar_af"
32PREPARED_EPW_DIR = OUTPUT_ROOT / "prepared_epw"
33HOURLY_DIR = OUTPUT_ROOT / "hourly"
34METADATA_DIR = OUTPUT_ROOT / "metadata"
36EXPECTED_PATTERN = "TMY_NZ_{zone}.epw"
38ZONE_CODE_TO_REGION = {
39 "AK": "Auckland",
40 "BP": "Bay of Plenty",
41 "CC": "Christchurch",
42 "DN": "Dunedin",
43 "EC": "East Coast",
44 "HN": "Hamilton",
45 "IN": "Invercargill",
46 "MW": "Manawatu",
47 "NL": "Northland",
48 "NM": "Nelson-Marlborough",
49 "NP": "New Plymouth",
50 "OC": "Central Otago",
51 "QL": "Queenstown-Lakes",
52 "RR": "Rotorua",
53 "TP": "Taupo",
54 "WC": "West Coast",
55 "WI": "Wairarapa",
56 "WN": "Wellington",
57}
59ZONE_CODE_TO_ISLAND = {
60 "AK": "NI",
61 "BP": "NI",
62 "CC": "SI",
63 "DN": "SI",
64 "EC": "NI",
65 "HN": "NI",
66 "IN": "SI",
67 "MW": "NI",
68 "NL": "NI",
69 "NM": "SI",
70 "NP": "NI",
71 "OC": "SI",
72 "QL": "SI",
73 "RR": "NI",
74 "TP": "NI",
75 "WC": "SI",
76 "WI": "NI",
77 "WN": "NI",
78}
80ZONE_ORDER = [
81 "NL",
82 "AK",
83 "HN",
84 "BP",
85 "RR",
86 "TP",
87 "NP",
88 "EC",
89 "MW",
90 "WI",
91 "WN",
92 "NM",
93 "WC",
94 "CC",
95 "QL",
96 "OC",
97 "DN",
98 "IN",
99]
101ARRAY_TYPE_MAP = {
102 "fixed_open_rack": 0,
103 "fixed_roof_mount": 1,
104 "single_axis_tracking": 2,
105 "single_axis_backtracking": 3,
106 "two_axis_tracking": 4,
107}
109MODULE_TYPE_MAP = {
110 "standard": 0,
111 "premium": 1,
112 "thin_film": 2,
113}
115EPW_STANDARD_TZ = timezone(timedelta(hours=12), name="NZST_FIXED")
116NZ_WALLCLOCK_TZ = ZoneInfo("Pacific/Auckland")
119def ensure_output_dir(path):
120 """
121 Create an output directory with predictable permissions.
122 """
123 path.mkdir(parents=True, exist_ok=True)
124 path.chmod(0o755)
125 return path
128def read_epw_rows(epw_path):
129 """
130 Read an EPW file using the encodings present in the NIWA datasets.
131 """
132 last_error = None
133 for encoding in ("utf-8-sig", "latin-1"):
134 try:
135 with epw_path.open("r", encoding=encoding, newline="") as handle:
136 return list(csv.reader(handle))
137 except UnicodeDecodeError as exc:
138 last_error = exc
140 if last_error is not None:
141 raise last_error
143 raise ValueError(f"Could not read EPW rows from {epw_path}")
146def parse_epw_time_index(epw_path):
147 """
148 Read the year/month/day/hour/minute tuple for each EPW data row.
149 """
150 rows = read_epw_rows(epw_path)[8:]
151 time_index = []
152 for row_num, row in enumerate(rows, start=9):
153 if len(row) < 5:
154 raise ValueError(
155 f"EPW data row {row_num} has fewer than 5 columns in {epw_path}"
156 )
157 time_index.append(
158 (int(row[0]), int(row[1]), int(row[2]), int(row[3]), int(row[4]))
159 )
161 if len(time_index) != 8760:
162 raise ValueError(
163 f"Expected 8760 EPW rows in {epw_path}, found {len(time_index)}"
164 )
166 return time_index
169def parse_epw_data_period_metadata(epw_path) -> dict[str, Any]:
170 """
171 Read the shared TMY calendar metadata from the EPW header.
172 """
173 rows = read_epw_rows(epw_path)
174 if len(rows) < 8:
175 raise ValueError(f"Expected at least 8 EPW header rows in {epw_path}")
177 data_periods = rows[7]
178 if len(data_periods) < 7 or data_periods[0] != "DATA PERIODS":
179 raise ValueError(f"Malformed DATA PERIODS header row in {epw_path}")
181 return {
182 "calendar_label": data_periods[3].strip(),
183 "start_weekday": data_periods[4].strip(),
184 "days_in_year": int(data_periods[6]),
185 }
188def validate_base_year_calendar():
189 """
190 Fail loudly until leap-year handling is implemented for solar timeslices.
191 """
192 if pd.Timestamp(year=BASE_YEAR, month=1, day=1).is_leap_year:
193 raise ValueError(
194 "Solar availability-factor workflow currently assumes a 365-day "
195 f"calendar, but BASE_YEAR {BASE_YEAR} is a leap year. Revisit leap-year "
196 "handling before regenerating solar availability factors."
197 )
200def validate_epw_calendar_assumptions(epw_path):
201 """
202 Fail loudly if an EPW file implies leap-year calendar handling.
203 """
204 metadata = parse_epw_data_period_metadata(epw_path)
205 if metadata["days_in_year"] != 365:
206 raise ValueError(
207 "Solar availability-factor workflow currently assumes a 365-day "
208 f"calendar, but {epw_path.name} declares {metadata['days_in_year']} "
209 "days in its DATA PERIODS header. Revisit leap-year handling before "
210 "regenerating solar availability factors."
211 )
214def get_canonical_epw_index(epw_path):
215 """
216 Return the shared month/day/hour/minute sequence for one EPW file.
217 """
218 raw_index = parse_epw_time_index(epw_path)
219 validate_epw_calendar_assumptions(epw_path)
220 return [(month, day, hour, minute) for _, month, day, hour, minute in raw_index]
223def convert_epw_standard_time_to_wallclock(month: int, day: int, hour: int) -> datetime:
224 """
225 Convert a fixed NZST EPW interval-start hour to NZ wall-clock time.
226 """
227 standard_time = datetime(
228 BASE_YEAR,
229 month,
230 day,
231 hour,
232 tzinfo=EPW_STANDARD_TZ,
233 )
234 return standard_time.astimezone(NZ_WALLCLOCK_TZ)
237def format_time_index_rows(canonical_index):
238 """
239 Build hourly rows using the configured model base year calendar.
240 """
241 rows = []
242 for hour_index, (month, day, hour, minute) in enumerate(canonical_index, start=1):
243 interval_start_hour = (hour - 1) % 24
244 trading_date = pd.Timestamp(year=BASE_YEAR, month=month, day=day)
245 wall_clock = convert_epw_standard_time_to_wallclock(
246 month=month,
247 day=day,
248 hour=interval_start_hour,
249 )
250 rows.append(
251 {
252 "hour_of_year": hour_index,
253 "Trading_Date": trading_date,
254 "Year": BASE_YEAR,
255 "Month": month,
256 "Day": day,
257 "EPWHour": hour,
258 "Hour": interval_start_hour,
259 "Minute": minute,
260 "WallClock_DateTime": pd.Timestamp(wall_clock),
261 "WallClock_Date": pd.Timestamp(wall_clock.date()),
262 "WallClock_Year": wall_clock.year,
263 "WallClock_Month": wall_clock.month,
264 "WallClock_Day": wall_clock.day,
265 "WallClock_Hour": wall_clock.hour,
266 "WallClock_UtcOffsetHours": (
267 wall_clock.utcoffset().total_seconds() / 3600
268 ),
269 }
270 )
271 return rows
274def load_solar_scenarios() -> list[dict[str, Any]]:
275 """
276 Load and normalize the configured solar archetype parameters.
277 """
278 df = pd.read_csv(SOLAR_SCENARIOS_FILE)
279 df = df[~df["Tech_TIMES"].isin(EXCLUDED_SOLAR_TECHS)].copy()
280 scenarios = []
282 for row in df.to_dict(orient="records"):
283 scenario = dict(row)
284 scenario["ArrayTypeCode"] = ARRAY_TYPE_MAP[scenario["ArrayType"]]
285 scenario["ModuleTypeCode"] = MODULE_TYPE_MAP[scenario["ModuleType"]]
286 scenario["UseWeatherFileAlbedo"] = str(
287 scenario["UseWeatherFileAlbedo"]
288 ).lower() in {
289 "1",
290 "true",
291 "yes",
292 }
293 scenarios.append(scenario)
295 return scenarios
298def discover_epw_files(epw_dir):
299 """
300 Find one prepared EPW file for each expected NIWA climate zone.
301 """
302 discovered = {}
303 for zone in ZONE_ORDER:
304 path = epw_dir / EXPECTED_PATTERN.format(zone=zone)
305 if path.exists():
306 discovered[zone] = path
308 missing = [zone for zone in ZONE_ORDER if zone not in discovered]
309 if missing:
310 raise FileNotFoundError(f"Missing EPW files for zones: {', '.join(missing)}")
312 return discovered
315def build_time_index(epw_files):
316 """
317 Build the shared hourly index using the model base-year calendar.
319 All NIWA files should agree on month/day/hour/minute ordering. The EPW row
320 year and header calendar metadata are ignored for timeslice construction.
321 """
322 validate_base_year_calendar()
324 canonical = None
325 canonical_zone = None
326 for zone in ZONE_ORDER:
327 index = get_canonical_epw_index(epw_files[zone])
328 if canonical is None:
329 canonical = index
330 canonical_zone = zone
331 elif canonical != index:
332 raise ValueError(f"Time index mismatch between {canonical_zone} and {zone}")
334 return pd.DataFrame(format_time_index_rows(canonical))
337def build_model(epw_path, scenario):
338 """
339 Build a PVWatts model for one scenario-zone pair.
340 """
341 model = Pvwattsv8.default("PVWattsNone") # pylint: disable=c-extension-no-member
342 model.SolarResource.solar_resource_file = str(epw_path.resolve())
343 model.Lifetime.system_use_lifetime_output = 0
345 model.SystemDesign.system_capacity = float(scenario["SystemCapacityKW"])
346 model.SystemDesign.array_type = int(scenario["ArrayTypeCode"])
347 model.SystemDesign.tilt = float(scenario["TiltDeg"])
348 model.SystemDesign.azimuth = float(scenario["AzimuthDeg"])
349 model.SystemDesign.module_type = int(scenario["ModuleTypeCode"])
350 model.SystemDesign.dc_ac_ratio = float(scenario["DcAcRatio"])
351 model.SystemDesign.inv_eff = float(scenario["InvEffPercent"])
352 model.SystemDesign.losses = float(scenario["LossesPercent"])
353 model.SystemDesign.bifaciality = float(scenario["Bifaciality"])
354 model.SystemDesign.gcr = float(scenario["Gcr"])
355 model.SystemDesign.en_snowloss = 0
357 use_weather_file_albedo = 1.0 if scenario["UseWeatherFileAlbedo"] else 0.0
358 model.SolarResource.use_wf_albedo = use_weather_file_albedo
359 if not use_weather_file_albedo:
360 albedo = float(scenario["Albedo"])
361 model.SolarResource.albedo = tuple(albedo for _ in range(12))
363 model.Shading.shading_en_azal = 0
364 model.Shading.shading_en_diff = 0
365 model.Shading.shading_en_mxh = 0
366 model.Shading.shading_en_string_option = 0
367 model.Shading.shading_en_timestep = 0
369 return model
372def _json_default(value: Any):
373 """
374 Convert pandas / numpy scalar values to plain Python types for JSON output.
375 """
376 if hasattr(value, "item"):
377 return value.item()
378 raise TypeError(f"Object of type {type(value).__name__} is not JSON serializable")
381def collect_zone_hourly_rows(scenario_name, zone, time_index, generation):
382 """
383 Collect long-format hourly rows for a single zone.
384 """
385 region = ZONE_CODE_TO_REGION[zone]
386 island = ZONE_CODE_TO_ISLAND[zone]
388 rows = []
389 for time_row, generation_kw_per_kw in zip(
390 time_index.itertuples(index=False), generation
391 ):
392 rows.append(
393 {
394 "Scenario": scenario_name,
395 "Tech_TIMES": scenario_name,
396 "ZoneCode": zone,
397 "Region": region,
398 "Island": island,
399 "Trading_Date": time_row.Trading_Date,
400 "Year": time_row.Year,
401 "Month": time_row.Month,
402 "Day": time_row.Day,
403 "Hour": time_row.Hour,
404 "EPWHour": time_row.EPWHour,
405 "Minute": time_row.Minute,
406 "WallClock_DateTime": time_row.WallClock_DateTime,
407 "WallClock_Date": time_row.WallClock_Date,
408 "WallClock_Year": time_row.WallClock_Year,
409 "WallClock_Month": time_row.WallClock_Month,
410 "WallClock_Day": time_row.WallClock_Day,
411 "WallClock_Hour": time_row.WallClock_Hour,
412 "WallClock_UtcOffsetHours": time_row.WallClock_UtcOffsetHours,
413 "generation_kw_per_kw": generation_kw_per_kw,
414 }
415 )
417 return rows
420def run_scenario_hourly_profiles(scenario, epw_files, time_index):
421 """
422 Run one solar archetype across all NIWA zones.
423 """
424 scenario_name = scenario["Tech_TIMES"]
425 wide = time_index.copy()
426 long_rows = []
427 zone_results = []
429 for zone in ZONE_ORDER:
430 region = ZONE_CODE_TO_REGION[zone]
431 island = ZONE_CODE_TO_ISLAND[zone]
432 model = build_model(epw_files[zone], scenario)
433 model.execute()
434 generation = [float(value) for value in model.Outputs.gen]
436 if len(generation) != 8760:
437 raise ValueError(
438 f"Expected 8760 PVWatts outputs for {scenario_name}/{zone}, "
439 f"got {len(generation)}"
440 )
442 wide[zone] = generation
443 zone_results.append(
444 {
445 "Tech_TIMES": scenario_name,
446 "ZoneCode": zone,
447 "Region": region,
448 "Island": island,
449 "EPWFile": epw_files[zone].name,
450 "AnnualEnergyKWhPerKWDC": float(sum(generation)),
451 "CapacityFactorPercent": float(model.Outputs.capacity_factor),
452 "KWhPerKW": float(model.Outputs.kwh_per_kw),
453 }
454 )
455 long_rows.extend(
456 collect_zone_hourly_rows(
457 scenario_name=scenario_name,
458 zone=zone,
459 time_index=time_index,
460 generation=generation,
461 )
462 )
464 long_df = pd.DataFrame(long_rows)
465 long_df = create_timeslices(
466 long_df,
467 date_col="WallClock_Date",
468 hour_col="WallClock_Hour",
469 )
470 return wide, long_df, zone_results
473def save_scenario_outputs(scenario_name, scenario, wide, long_df, zone_results):
474 """
475 Save scenario outputs and metadata.
476 """
477 wide_path = HOURLY_DIR / f"{scenario_name}_hourly_by_zone.csv"
478 long_path = HOURLY_DIR / f"{scenario_name}_hourly_long.csv"
479 metadata_path = METADATA_DIR / f"{scenario_name}_run_metadata.json"
481 wide.to_csv(wide_path, index=False)
482 long_df.to_csv(long_path, index=False)
484 with metadata_path.open("w", encoding="utf-8") as handle:
485 json.dump(
486 {
487 "Tech_TIMES": scenario_name,
488 "Settings": scenario,
489 "ZoneResults": zone_results,
490 "Outputs": {
491 "HourlyByZoneCsv": str(wide_path),
492 "HourlyLongCsv": str(long_path),
493 },
494 },
495 handle,
496 indent=2,
497 default=_json_default,
498 )
501def run_hourly_profiles():
502 """
503 Run PVWatts across all configured solar archetypes and NIWA zones.
504 """
505 scenarios = load_solar_scenarios()
506 epw_files = discover_epw_files(PREPARED_EPW_DIR)
507 time_index = build_time_index(epw_files)
509 ensure_output_dir(HOURLY_DIR)
510 ensure_output_dir(METADATA_DIR)
512 combined_rows = []
513 metadata_rows = []
515 for scenario in scenarios:
516 scenario_name = scenario["Tech_TIMES"]
517 wide, long_df, zone_results = run_scenario_hourly_profiles(
518 scenario=scenario,
519 epw_files=epw_files,
520 time_index=time_index,
521 )
522 save_scenario_outputs(
523 scenario_name=scenario_name,
524 scenario=scenario,
525 wide=wide,
526 long_df=long_df,
527 zone_results=zone_results,
528 )
529 combined_rows.append(long_df)
530 metadata_rows.extend(zone_results)
532 all_hourly = pd.concat(combined_rows, ignore_index=True)
533 all_hourly.to_csv(HOURLY_DIR / "all_scenarios_hourly_long.csv", index=False)
534 pd.DataFrame(metadata_rows).to_csv(
535 METADATA_DIR / "all_scenarios_zone_summary.csv", index=False
536 )
539if __name__ == "__main__":
540 run_hourly_profiles()