Coverage for scripts/stage_3_scenarios/electricity/solar_prepare_epw.py: 85%
111 statements
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-16 23:05 +0000
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-16 23:05 +0000
1"""
2Prepare NIWA EPW files for the solar availability-factor workflow.
3"""
5from __future__ import annotations
7import csv
8import json
9import re
10import shutil
11import tarfile
12from pathlib import Path
14from prepare_times_nz.utilities.filepaths import DATA_RAW, STAGE_3_DATA
16NIWA_DATA_DIR = DATA_RAW / "external_data/niwa"
18OUTPUT_ROOT = STAGE_3_DATA / "electricity/solar_af"
19PREPARED_EPW_DIR = OUTPUT_ROOT / "prepared_epw"
20PREPARED_EPW_SENTINEL = PREPARED_EPW_DIR / ".prepared"
21METADATA_DIR = OUTPUT_ROOT / "metadata"
23EXPECTED_ZONES = {
24 "AK",
25 "BP",
26 "CC",
27 "DN",
28 "EC",
29 "HN",
30 "IN",
31 "MW",
32 "NL",
33 "NM",
34 "NP",
35 "OC",
36 "QL",
37 "RR",
38 "TP",
39 "WC",
40 "WI",
41 "WN",
42}
44EPW_FILENAME_PATTERNS = (re.compile(r"^TMY3_NZ_(?P<zone>[A-Z]{2})\.epw$"),)
46SOURCE_CANDIDATE = ("TMY3", "archive", NIWA_DATA_DIR / "tmy3_epw.tar.gz")
49def ensure_output_dir(path):
50 """
51 Create an output directory with predictable permissions.
52 """
53 path.mkdir(parents=True, exist_ok=True)
54 path.chmod(0o755)
55 return path
58def extract_zone_code(filename: str) -> str | None:
59 """
60 Return the NIWA climate-zone code from a supported EPW filename.
61 """
62 for pattern in EPW_FILENAME_PATTERNS:
63 match = pattern.match(filename)
64 if match:
65 return match.group("zone")
66 return None
69def resolve_epw_source():
70 """
71 Locate the preferred NIWA EPW bundle.
72 """
73 dataset, source_type, source_path = SOURCE_CANDIDATE
74 if source_path.exists():
75 return {
76 "dataset": dataset,
77 "source_type": source_type,
78 "source_path": source_path,
79 }
81 raise FileNotFoundError("No NIWA EPW source found. Checked: " f"{source_path}.")
84def read_epw_rows(epw_path):
85 """
86 Read an EPW file using the encodings present in the NIWA datasets.
87 """
88 try:
89 with epw_path.open("r", encoding="utf-8-sig", newline="") as handle:
90 return list(csv.reader(handle))
91 except UnicodeDecodeError:
92 with epw_path.open("r", encoding="latin-1", newline="") as handle:
93 return list(csv.reader(handle))
96def validate_epw_file(path: Path):
97 """
98 Validate filename and hour-field conventions for a NIWA EPW file.
99 """
100 zone = extract_zone_code(path.name)
101 if zone is None:
102 raise ValueError(
103 f"Unsupported NIWA EPW filename {path.name}. Expected TMY3_NZ_<zone>.epw."
104 )
106 rows = read_epw_rows(path)
107 data_rows = rows[8:]
108 if len(data_rows) != 8760:
109 raise ValueError(f"Expected 8760 EPW rows in {path}, found {len(data_rows)}")
111 for row_num, row in enumerate(data_rows, start=9):
112 if len(row) < 5:
113 raise ValueError(
114 f"EPW data row {row_num} has fewer than 5 columns in {path}"
115 )
117 hour = row[3].strip()
118 minute = row[4].strip()
119 if minute not in {"0", "60"}:
120 raise ValueError(
121 f"Unsupported EPW minute value {minute!r} at row {row_num} in {path}. "
122 "The workflow expects NIWA EPW minute values of 0 or 60."
123 )
124 if not hour.isdigit() or not 1 <= int(hour) <= 24:
125 raise ValueError(
126 f"Unsupported EPW hour value {hour!r} at row {row_num} in {path}. "
127 "The workflow now validates EPW-standard 01..24 hours instead of "
128 "normalizing 00..23 values."
129 )
131 return zone
134def _archive_members(archive_path):
135 """
136 Return the EPW members from the NIWA archive.
137 """
138 with tarfile.open(archive_path, "r:gz") as handle:
139 members = [member for member in handle.getmembers() if member.isfile()]
140 epw_members = [member for member in members if member.name.endswith(".epw")]
142 if not epw_members:
143 raise FileNotFoundError(f"No .epw files found in archive {archive_path}.")
145 return sorted(epw_members, key=lambda member: member.name)
148def clear_prepared_epw_dir(output_dir: Path):
149 """
150 Remove previously prepared EPW files so stale bundles cannot linger.
151 """
152 for epw_path in output_dir.glob("*.epw"):
153 epw_path.unlink()
156def copy_epw_bundle(source, output_dir):
157 """
158 Copy or extract the NIWA EPW bundle into stage-3 storage.
159 """
160 source_type = source["source_type"]
161 source_path = source["source_path"]
162 copied = 0
163 copied_paths = []
165 if source_type == "archive":
166 with tarfile.open(source_path, "r:gz") as handle:
167 for member in _archive_members(source_path):
168 extracted = handle.extractfile(member)
169 if extracted is None:
170 raise FileNotFoundError(
171 f"Could not extract {member.name} from {source_path}."
172 )
173 target_path = output_dir / member.name.rsplit("/", maxsplit=1)[-1]
174 with target_path.open("wb") as output_handle:
175 shutil.copyfileobj(extracted, output_handle)
176 copied += 1
177 copied_paths.append(target_path)
178 return copied, copied_paths
180 raise ValueError(f"Unsupported EPW source type {source_type!r}.")
183def validate_prepared_bundle(epw_paths: list[Path]):
184 """
185 Validate that the prepared bundle contains exactly the supported 18 zones.
186 """
187 zone_to_file = {}
188 for epw_path in sorted(epw_paths):
189 zone = validate_epw_file(epw_path)
190 existing = zone_to_file.get(zone)
191 if existing is not None:
192 raise ValueError(
193 f"Duplicate EPW files for zone {zone}: {existing.name} and {epw_path.name}"
194 )
195 zone_to_file[zone] = epw_path.name
197 missing = sorted(EXPECTED_ZONES.difference(zone_to_file))
198 if missing:
199 raise ValueError(f"Missing EPW files for zones: {', '.join(missing)}")
201 extra = sorted(set(zone_to_file).difference(EXPECTED_ZONES))
202 if extra:
203 raise ValueError(f"Unexpected EPW zones: {', '.join(extra)}")
205 return zone_to_file
208def prepare_epw_files():
209 """
210 Copy NIWA EPWs into stage-3 storage and validate them in place.
211 """
212 source = resolve_epw_source()
213 output_dir = ensure_output_dir(PREPARED_EPW_DIR)
214 clear_prepared_epw_dir(output_dir)
215 copied, copied_paths = copy_epw_bundle(source, output_dir)
216 zone_to_file = validate_prepared_bundle(copied_paths)
218 PREPARED_EPW_SENTINEL.touch()
219 ensure_output_dir(METADATA_DIR)
220 with (METADATA_DIR / "prepare_epw_summary.json").open(
221 "w", encoding="utf-8"
222 ) as handle:
223 json.dump(
224 {
225 "dataset": source["dataset"],
226 "source_type": source["source_type"],
227 "source_path": str(source["source_path"]),
228 "copied_files": copied,
229 "prepared_files": sorted(path.name for path in copied_paths),
230 "zones": [
231 {"ZoneCode": zone, "EPWFile": zone_to_file[zone]}
232 for zone in sorted(zone_to_file)
233 ],
234 },
235 handle,
236 indent=2,
237 )
240if __name__ == "__main__":
241 prepare_epw_files()