Coverage for scripts/stage_3_scenarios/electricity/solar_prepare_epw.py: 85%

111 statements  

« prev     ^ index     » next       coverage.py v7.14.1, created at 2026-06-16 23:05 +0000

1""" 

2Prepare NIWA EPW files for the solar availability-factor workflow. 

3""" 

4 

5from __future__ import annotations 

6 

7import csv 

8import json 

9import re 

10import shutil 

11import tarfile 

12from pathlib import Path 

13 

14from prepare_times_nz.utilities.filepaths import DATA_RAW, STAGE_3_DATA 

15 

16NIWA_DATA_DIR = DATA_RAW / "external_data/niwa" 

17 

18OUTPUT_ROOT = STAGE_3_DATA / "electricity/solar_af" 

19PREPARED_EPW_DIR = OUTPUT_ROOT / "prepared_epw" 

20PREPARED_EPW_SENTINEL = PREPARED_EPW_DIR / ".prepared" 

21METADATA_DIR = OUTPUT_ROOT / "metadata" 

22 

23EXPECTED_ZONES = { 

24 "AK", 

25 "BP", 

26 "CC", 

27 "DN", 

28 "EC", 

29 "HN", 

30 "IN", 

31 "MW", 

32 "NL", 

33 "NM", 

34 "NP", 

35 "OC", 

36 "QL", 

37 "RR", 

38 "TP", 

39 "WC", 

40 "WI", 

41 "WN", 

42} 

43 

44EPW_FILENAME_PATTERNS = (re.compile(r"^TMY3_NZ_(?P<zone>[A-Z]{2})\.epw$"),) 

45 

46SOURCE_CANDIDATE = ("TMY3", "archive", NIWA_DATA_DIR / "tmy3_epw.tar.gz") 

47 

48 

49def ensure_output_dir(path): 

50 """ 

51 Create an output directory with predictable permissions. 

52 """ 

53 path.mkdir(parents=True, exist_ok=True) 

54 path.chmod(0o755) 

55 return path 

56 

57 

58def extract_zone_code(filename: str) -> str | None: 

59 """ 

60 Return the NIWA climate-zone code from a supported EPW filename. 

61 """ 

62 for pattern in EPW_FILENAME_PATTERNS: 

63 match = pattern.match(filename) 

64 if match: 

65 return match.group("zone") 

66 return None 

67 

68 

69def resolve_epw_source(): 

70 """ 

71 Locate the preferred NIWA EPW bundle. 

72 """ 

73 dataset, source_type, source_path = SOURCE_CANDIDATE 

74 if source_path.exists(): 

75 return { 

76 "dataset": dataset, 

77 "source_type": source_type, 

78 "source_path": source_path, 

79 } 

80 

81 raise FileNotFoundError("No NIWA EPW source found. Checked: " f"{source_path}.") 

82 

83 

84def read_epw_rows(epw_path): 

85 """ 

86 Read an EPW file using the encodings present in the NIWA datasets. 

87 """ 

88 try: 

89 with epw_path.open("r", encoding="utf-8-sig", newline="") as handle: 

90 return list(csv.reader(handle)) 

91 except UnicodeDecodeError: 

92 with epw_path.open("r", encoding="latin-1", newline="") as handle: 

93 return list(csv.reader(handle)) 

94 

95 

96def validate_epw_file(path: Path): 

97 """ 

98 Validate filename and hour-field conventions for a NIWA EPW file. 

99 """ 

100 zone = extract_zone_code(path.name) 

101 if zone is None: 

102 raise ValueError( 

103 f"Unsupported NIWA EPW filename {path.name}. Expected TMY3_NZ_<zone>.epw." 

104 ) 

105 

106 rows = read_epw_rows(path) 

107 data_rows = rows[8:] 

108 if len(data_rows) != 8760: 

109 raise ValueError(f"Expected 8760 EPW rows in {path}, found {len(data_rows)}") 

110 

111 for row_num, row in enumerate(data_rows, start=9): 

112 if len(row) < 5: 

113 raise ValueError( 

114 f"EPW data row {row_num} has fewer than 5 columns in {path}" 

115 ) 

116 

117 hour = row[3].strip() 

118 minute = row[4].strip() 

119 if minute not in {"0", "60"}: 

120 raise ValueError( 

121 f"Unsupported EPW minute value {minute!r} at row {row_num} in {path}. " 

122 "The workflow expects NIWA EPW minute values of 0 or 60." 

123 ) 

124 if not hour.isdigit() or not 1 <= int(hour) <= 24: 

125 raise ValueError( 

126 f"Unsupported EPW hour value {hour!r} at row {row_num} in {path}. " 

127 "The workflow now validates EPW-standard 01..24 hours instead of " 

128 "normalizing 00..23 values." 

129 ) 

130 

131 return zone 

132 

133 

134def _archive_members(archive_path): 

135 """ 

136 Return the EPW members from the NIWA archive. 

137 """ 

138 with tarfile.open(archive_path, "r:gz") as handle: 

139 members = [member for member in handle.getmembers() if member.isfile()] 

140 epw_members = [member for member in members if member.name.endswith(".epw")] 

141 

142 if not epw_members: 

143 raise FileNotFoundError(f"No .epw files found in archive {archive_path}.") 

144 

145 return sorted(epw_members, key=lambda member: member.name) 

146 

147 

148def clear_prepared_epw_dir(output_dir: Path): 

149 """ 

150 Remove previously prepared EPW files so stale bundles cannot linger. 

151 """ 

152 for epw_path in output_dir.glob("*.epw"): 

153 epw_path.unlink() 

154 

155 

156def copy_epw_bundle(source, output_dir): 

157 """ 

158 Copy or extract the NIWA EPW bundle into stage-3 storage. 

159 """ 

160 source_type = source["source_type"] 

161 source_path = source["source_path"] 

162 copied = 0 

163 copied_paths = [] 

164 

165 if source_type == "archive": 

166 with tarfile.open(source_path, "r:gz") as handle: 

167 for member in _archive_members(source_path): 

168 extracted = handle.extractfile(member) 

169 if extracted is None: 

170 raise FileNotFoundError( 

171 f"Could not extract {member.name} from {source_path}." 

172 ) 

173 target_path = output_dir / member.name.rsplit("/", maxsplit=1)[-1] 

174 with target_path.open("wb") as output_handle: 

175 shutil.copyfileobj(extracted, output_handle) 

176 copied += 1 

177 copied_paths.append(target_path) 

178 return copied, copied_paths 

179 

180 raise ValueError(f"Unsupported EPW source type {source_type!r}.") 

181 

182 

183def validate_prepared_bundle(epw_paths: list[Path]): 

184 """ 

185 Validate that the prepared bundle contains exactly the supported 18 zones. 

186 """ 

187 zone_to_file = {} 

188 for epw_path in sorted(epw_paths): 

189 zone = validate_epw_file(epw_path) 

190 existing = zone_to_file.get(zone) 

191 if existing is not None: 

192 raise ValueError( 

193 f"Duplicate EPW files for zone {zone}: {existing.name} and {epw_path.name}" 

194 ) 

195 zone_to_file[zone] = epw_path.name 

196 

197 missing = sorted(EXPECTED_ZONES.difference(zone_to_file)) 

198 if missing: 

199 raise ValueError(f"Missing EPW files for zones: {', '.join(missing)}") 

200 

201 extra = sorted(set(zone_to_file).difference(EXPECTED_ZONES)) 

202 if extra: 

203 raise ValueError(f"Unexpected EPW zones: {', '.join(extra)}") 

204 

205 return zone_to_file 

206 

207 

208def prepare_epw_files(): 

209 """ 

210 Copy NIWA EPWs into stage-3 storage and validate them in place. 

211 """ 

212 source = resolve_epw_source() 

213 output_dir = ensure_output_dir(PREPARED_EPW_DIR) 

214 clear_prepared_epw_dir(output_dir) 

215 copied, copied_paths = copy_epw_bundle(source, output_dir) 

216 zone_to_file = validate_prepared_bundle(copied_paths) 

217 

218 PREPARED_EPW_SENTINEL.touch() 

219 ensure_output_dir(METADATA_DIR) 

220 with (METADATA_DIR / "prepare_epw_summary.json").open( 

221 "w", encoding="utf-8" 

222 ) as handle: 

223 json.dump( 

224 { 

225 "dataset": source["dataset"], 

226 "source_type": source["source_type"], 

227 "source_path": str(source["source_path"]), 

228 "copied_files": copied, 

229 "prepared_files": sorted(path.name for path in copied_paths), 

230 "zones": [ 

231 {"ZoneCode": zone, "EPWFile": zone_to_file[zone]} 

232 for zone in sorted(zone_to_file) 

233 ], 

234 }, 

235 handle, 

236 indent=2, 

237 ) 

238 

239 

240if __name__ == "__main__": 

241 prepare_epw_files()