Coverage for scripts / stage_3_scenarios / electricity / solar_prepare_epw.py: 63%

97 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-01 22:14 +0000

1""" 

2Prepare NIWA EPW files for the solar availability-factor workflow. 

3""" 

4 

5from __future__ import annotations 

6 

7import csv 

8import json 

9import shutil 

10import tarfile 

11 

12from prepare_times_nz.utilities.filepaths import DATA_RAW, STAGE_3_DATA 

13 

14PREFERRED_EPW_DIR = DATA_RAW / "external_data/niwa/tmy2_epw" 

15PREFERRED_EPW_ARCHIVE = DATA_RAW / "external_data/niwa/tmy2_epw.tar.gz" 

16 

17OUTPUT_ROOT = STAGE_3_DATA / "electricity/solar_af" 

18PREPARED_EPW_DIR = OUTPUT_ROOT / "prepared_epw" 

19PREPARED_EPW_SENTINEL = PREPARED_EPW_DIR / ".prepared" 

20METADATA_DIR = OUTPUT_ROOT / "metadata" 

21 

22 

23def ensure_output_dir(path): 

24 """ 

25 Create an output directory with predictable permissions. 

26 """ 

27 path.mkdir(parents=True, exist_ok=True) 

28 path.chmod(0o755) 

29 return path 

30 

31 

32def resolve_epw_source(): 

33 """ 

34 Locate the NIWA EPW bundle. 

35 """ 

36 if PREFERRED_EPW_DIR.exists() and any(PREFERRED_EPW_DIR.glob("*.epw")): 

37 return { 

38 "source_type": "directory", 

39 "source_path": PREFERRED_EPW_DIR, 

40 } 

41 

42 if PREFERRED_EPW_ARCHIVE.exists(): 

43 return { 

44 "source_type": "archive", 

45 "source_path": PREFERRED_EPW_ARCHIVE, 

46 } 

47 

48 raise FileNotFoundError( 

49 "No NIWA EPW source found. Expected either " 

50 f"{PREFERRED_EPW_DIR} or {PREFERRED_EPW_ARCHIVE}." 

51 ) 

52 

53 

54def read_epw_rows(epw_path): 

55 """ 

56 Read an EPW file using the encodings present in the NIWA datasets. 

57 """ 

58 try: 

59 with epw_path.open("r", encoding="utf-8-sig", newline="") as handle: 

60 return list(csv.reader(handle)) 

61 except UnicodeDecodeError: 

62 with epw_path.open("r", encoding="latin-1", newline="") as handle: 

63 return list(csv.reader(handle)) 

64 

65 

66def normalize_epw(path): 

67 """ 

68 Normalize EPW hour fields only when a file actually uses 00..23 notation. 

69 """ 

70 rows = read_epw_rows(path) 

71 needs_normalization = any( 

72 len(row) >= 5 and row[4].strip() == "60" and row[3].strip() == "00" 

73 for row in rows[8:] 

74 ) 

75 if not needs_normalization: 

76 return False 

77 

78 changed = False 

79 for row in rows[8:]: 

80 if len(row) < 5: 

81 continue 

82 hour = row[3].strip() 

83 minute = row[4].strip() 

84 if minute == "60" and hour.isdigit(): 

85 hour_int = int(hour) 

86 if 0 <= hour_int <= 23: 

87 normalized = str(hour_int + 1).zfill(2) 

88 if normalized != row[3]: 

89 row[3] = normalized 

90 changed = True 

91 

92 if changed: 

93 with path.open("w", encoding="utf-8", newline="") as handle: 

94 writer = csv.writer(handle, lineterminator="\n") 

95 writer.writerows(rows) 

96 

97 return changed 

98 

99 

100def _archive_members(archive_path): 

101 """ 

102 Return the EPW members from the NIWA archive. 

103 """ 

104 with tarfile.open(archive_path, "r:gz") as handle: 

105 members = [member for member in handle.getmembers() if member.isfile()] 

106 epw_members = [member for member in members if member.name.endswith(".epw")] 

107 

108 if not epw_members: 

109 raise FileNotFoundError(f"No .epw files found in archive {archive_path}.") 

110 

111 return sorted(epw_members, key=lambda member: member.name) 

112 

113 

114def copy_epw_bundle(source, output_dir): 

115 """ 

116 Copy or extract the NIWA EPW bundle into stage-3 storage. 

117 """ 

118 source_type = source["source_type"] 

119 source_path = source["source_path"] 

120 copied = 0 

121 normalized = 0 

122 

123 if source_type == "directory": 

124 source_paths = sorted(source_path.glob("*.epw")) 

125 for epw_path in source_paths: 

126 target_path = output_dir / epw_path.name 

127 shutil.copy2(epw_path, target_path) 

128 copied += 1 

129 if normalize_epw(target_path): 

130 normalized += 1 

131 return copied, normalized 

132 

133 if source_type == "archive": 

134 with tarfile.open(source_path, "r:gz") as handle: 

135 for member in _archive_members(source_path): 

136 extracted = handle.extractfile(member) 

137 if extracted is None: 

138 raise FileNotFoundError( 

139 f"Could not extract {member.name} from {source_path}." 

140 ) 

141 target_path = output_dir / member.name.rsplit("/", maxsplit=1)[-1] 

142 with target_path.open("wb") as output_handle: 

143 shutil.copyfileobj(extracted, output_handle) 

144 copied += 1 

145 if normalize_epw(target_path): 

146 normalized += 1 

147 return copied, normalized 

148 

149 raise ValueError(f"Unsupported EPW source type {source_type!r}.") 

150 

151 

152def prepare_epw_files(): 

153 """ 

154 Copy bundled EPWs into stage-3 storage and normalize them in place. 

155 """ 

156 source = resolve_epw_source() 

157 output_dir = ensure_output_dir(PREPARED_EPW_DIR) 

158 copied, normalized = copy_epw_bundle(source, output_dir) 

159 

160 PREPARED_EPW_SENTINEL.touch() 

161 ensure_output_dir(METADATA_DIR) 

162 with (METADATA_DIR / "prepare_epw_summary.json").open( 

163 "w", encoding="utf-8" 

164 ) as handle: 

165 json.dump( 

166 { 

167 "source_type": source["source_type"], 

168 "source_path": str(source["source_path"]), 

169 "copied_files": copied, 

170 "normalized_files": normalized, 

171 }, 

172 handle, 

173 indent=2, 

174 ) 

175 

176 

177if __name__ == "__main__": 

178 prepare_epw_files()