Coverage for scripts / stage_3_scenarios / electricity / solar_prepare_epw.py: 63%
97 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-01 22:14 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-01 22:14 +0000
1"""
2Prepare NIWA EPW files for the solar availability-factor workflow.
3"""
5from __future__ import annotations
7import csv
8import json
9import shutil
10import tarfile
12from prepare_times_nz.utilities.filepaths import DATA_RAW, STAGE_3_DATA
14PREFERRED_EPW_DIR = DATA_RAW / "external_data/niwa/tmy2_epw"
15PREFERRED_EPW_ARCHIVE = DATA_RAW / "external_data/niwa/tmy2_epw.tar.gz"
17OUTPUT_ROOT = STAGE_3_DATA / "electricity/solar_af"
18PREPARED_EPW_DIR = OUTPUT_ROOT / "prepared_epw"
19PREPARED_EPW_SENTINEL = PREPARED_EPW_DIR / ".prepared"
20METADATA_DIR = OUTPUT_ROOT / "metadata"
23def ensure_output_dir(path):
24 """
25 Create an output directory with predictable permissions.
26 """
27 path.mkdir(parents=True, exist_ok=True)
28 path.chmod(0o755)
29 return path
32def resolve_epw_source():
33 """
34 Locate the NIWA EPW bundle.
35 """
36 if PREFERRED_EPW_DIR.exists() and any(PREFERRED_EPW_DIR.glob("*.epw")):
37 return {
38 "source_type": "directory",
39 "source_path": PREFERRED_EPW_DIR,
40 }
42 if PREFERRED_EPW_ARCHIVE.exists():
43 return {
44 "source_type": "archive",
45 "source_path": PREFERRED_EPW_ARCHIVE,
46 }
48 raise FileNotFoundError(
49 "No NIWA EPW source found. Expected either "
50 f"{PREFERRED_EPW_DIR} or {PREFERRED_EPW_ARCHIVE}."
51 )
54def read_epw_rows(epw_path):
55 """
56 Read an EPW file using the encodings present in the NIWA datasets.
57 """
58 try:
59 with epw_path.open("r", encoding="utf-8-sig", newline="") as handle:
60 return list(csv.reader(handle))
61 except UnicodeDecodeError:
62 with epw_path.open("r", encoding="latin-1", newline="") as handle:
63 return list(csv.reader(handle))
66def normalize_epw(path):
67 """
68 Normalize EPW hour fields only when a file actually uses 00..23 notation.
69 """
70 rows = read_epw_rows(path)
71 needs_normalization = any(
72 len(row) >= 5 and row[4].strip() == "60" and row[3].strip() == "00"
73 for row in rows[8:]
74 )
75 if not needs_normalization:
76 return False
78 changed = False
79 for row in rows[8:]:
80 if len(row) < 5:
81 continue
82 hour = row[3].strip()
83 minute = row[4].strip()
84 if minute == "60" and hour.isdigit():
85 hour_int = int(hour)
86 if 0 <= hour_int <= 23:
87 normalized = str(hour_int + 1).zfill(2)
88 if normalized != row[3]:
89 row[3] = normalized
90 changed = True
92 if changed:
93 with path.open("w", encoding="utf-8", newline="") as handle:
94 writer = csv.writer(handle, lineterminator="\n")
95 writer.writerows(rows)
97 return changed
100def _archive_members(archive_path):
101 """
102 Return the EPW members from the NIWA archive.
103 """
104 with tarfile.open(archive_path, "r:gz") as handle:
105 members = [member for member in handle.getmembers() if member.isfile()]
106 epw_members = [member for member in members if member.name.endswith(".epw")]
108 if not epw_members:
109 raise FileNotFoundError(f"No .epw files found in archive {archive_path}.")
111 return sorted(epw_members, key=lambda member: member.name)
114def copy_epw_bundle(source, output_dir):
115 """
116 Copy or extract the NIWA EPW bundle into stage-3 storage.
117 """
118 source_type = source["source_type"]
119 source_path = source["source_path"]
120 copied = 0
121 normalized = 0
123 if source_type == "directory":
124 source_paths = sorted(source_path.glob("*.epw"))
125 for epw_path in source_paths:
126 target_path = output_dir / epw_path.name
127 shutil.copy2(epw_path, target_path)
128 copied += 1
129 if normalize_epw(target_path):
130 normalized += 1
131 return copied, normalized
133 if source_type == "archive":
134 with tarfile.open(source_path, "r:gz") as handle:
135 for member in _archive_members(source_path):
136 extracted = handle.extractfile(member)
137 if extracted is None:
138 raise FileNotFoundError(
139 f"Could not extract {member.name} from {source_path}."
140 )
141 target_path = output_dir / member.name.rsplit("/", maxsplit=1)[-1]
142 with target_path.open("wb") as output_handle:
143 shutil.copyfileobj(extracted, output_handle)
144 copied += 1
145 if normalize_epw(target_path):
146 normalized += 1
147 return copied, normalized
149 raise ValueError(f"Unsupported EPW source type {source_type!r}.")
152def prepare_epw_files():
153 """
154 Copy bundled EPWs into stage-3 storage and normalize them in place.
155 """
156 source = resolve_epw_source()
157 output_dir = ensure_output_dir(PREPARED_EPW_DIR)
158 copied, normalized = copy_epw_bundle(source, output_dir)
160 PREPARED_EPW_SENTINEL.touch()
161 ensure_output_dir(METADATA_DIR)
162 with (METADATA_DIR / "prepare_epw_summary.json").open(
163 "w", encoding="utf-8"
164 ) as handle:
165 json.dump(
166 {
167 "source_type": source["source_type"],
168 "source_path": str(source["source_path"]),
169 "copied_files": copied,
170 "normalized_files": normalized,
171 },
172 handle,
173 indent=2,
174 )
177if __name__ == "__main__":
178 prepare_epw_files()