Coverage for moptipy / evaluation / progress.py: 68%

222 statements  

« prev     ^ index     » next       coverage.py v7.12.0, created at 2025-11-24 08:49 +0000

1""" 

2Objects embodying the progress of a run over time. 

3 

4An instance of :class:`Progress` holds one :attr:`~Progress.time` vector and 

5an objective value (:attr:`~Progress.f`) vector. The time dimension (stored in 

6:attr:`~Progress.time_unit`) can either be in FEs or in milliseconds and the 

7objective value dimension (stored in :attr:`~Progress.f_name`) can be raw 

8objective values, standardized objective values, or normalized objective 

9values. 

10The two vectors together thus describe how a run of an optimization algorithm 

11improves the objective value over time. 

12""" 

13from dataclasses import dataclass 

14from math import inf, isfinite 

15from typing import Any, Callable, Final, Generator, Iterable 

16 

17import numpy as np 

18from pycommons.io.console import logger 

19from pycommons.io.csv import COMMENT_START, CSV_SEPARATOR 

20from pycommons.io.path import Path 

21from pycommons.strings.string_conv import num_to_str, str_to_num 

22from pycommons.types import type_error 

23 

24from moptipy.api.logging import ( 

25 KEY_ALGORITHM, 

26 KEY_GOAL_F, 

27 KEY_INSTANCE, 

28 KEY_RAND_SEED, 

29 PROGRESS_CURRENT_F, 

30 PROGRESS_FES, 

31 PROGRESS_TIME_MILLIS, 

32 SECTION_PROGRESS, 

33) 

34from moptipy.evaluation.base import ( 

35 F_NAME_RAW, 

36 F_NAME_SCALED, 

37 KEY_ENCODING, 

38 KEY_OBJECTIVE_FUNCTION, 

39 TIME_UNIT_FES, 

40 TIME_UNIT_MILLIS, 

41 PerRunData, 

42 check_f_name, 

43 check_time_unit, 

44) 

45from moptipy.evaluation.log_parser import SetupAndStateParser 

46from moptipy.utils.logger import ( 

47 KEY_VALUE_SEPARATOR, 

48) 

49from moptipy.utils.nputils import is_all_finite, is_np_float, is_np_int 

50 

51 

52@dataclass(frozen=True, init=False, order=False, eq=False) 

53class Progress(PerRunData): 

54 """An immutable record of progress information over a single run.""" 

55 

56 #: The time axis data. 

57 time: np.ndarray 

58 

59 #: The unit of the time axis. 

60 time_unit: str 

61 

62 #: The objective value data. 

63 f: np.ndarray 

64 

65 #: the name of the objective value axis. 

66 f_name: str 

67 

68 #: the standard value of the objective dimension. 

69 #: If :attr:`f_name` is `F_NAME_SCALED` or `F_NAME_NORMALIZED`. 

70 #: then this value has been used to normalize the data. 

71 f_standard: int | float | None 

72 

73 def __init__(self, 

74 algorithm: str, 

75 instance: str, 

76 objective: str, 

77 encoding: str | None, 

78 rand_seed: int, 

79 time: np.ndarray, 

80 time_unit: str, 

81 f: np.ndarray, 

82 f_name: str, 

83 f_standard: int | float | None = None, 

84 only_improvements: bool = True): 

85 """ 

86 Create a consistent instance of :class:`EndResult`. 

87 

88 :param algorithm: the algorithm name 

89 :param instance: the instance name 

90 :param objective: the name of the objective function 

91 :param encoding: the name of the encoding that was used, if any, or 

92 `None` if no encoding was used 

93 :param rand_seed: the random seed 

94 :param time: the time axis data 

95 :param time_unit: the unit of the time axis 

96 :param f: the objective value axis data 

97 :param f_name: the name of the objective value axis data 

98 :param f_standard: the value used to standardize of the objective 

99 value dimension 

100 :param only_improvements: enforce that f-values should be 

101 improving and time values increasing 

102 """ 

103 super().__init__(algorithm, instance, objective, encoding, rand_seed) 

104 

105 if not isinstance(time, np.ndarray): 

106 raise type_error(time, "time data", np.ndarray) 

107 time.flags.writeable = False 

108 if len(time.shape) != 1: 

109 raise ValueError("time array must be one-dimensional, but " 

110 f"has shape {time.shape}.") 

111 if not is_np_int(time.dtype): 

112 raise TypeError("time data must be integer-valued, " 

113 f"but has type {time.dtype}.") 

114 tl = time.size 

115 if tl <= 0: 

116 raise ValueError("time data must not be empty.") 

117 if tl > 1: 

118 if only_improvements: 

119 if np.any(time[1:] <= time[:-1]): 

120 raise ValueError("time data must be strictly increasing," 

121 f"but encountered {time}.") 

122 elif np.any(time[1:] < time[:-1]): 

123 raise ValueError("time data must be monotonously" 

124 f"increasing, but encountered {time}.") 

125 

126 object.__setattr__(self, "time", time) 

127 object.__setattr__(self, "time_unit", check_time_unit(time_unit)) 

128 

129 mintime = 1 if time_unit == TIME_UNIT_FES else 0 

130 if np.any(time < mintime): 

131 raise ValueError(f"No time value can be less than {mintime} if" 

132 f" time unit is {time_unit}.") 

133 

134 if not isinstance(f, np.ndarray): 

135 raise type_error(f, "f data", np.ndarray) 

136 f.flags.writeable = False 

137 if len(f.shape) != 1: 

138 raise ValueError( 

139 f"f array must be one-dimensional, but has shape {f.shape}.") 

140 if is_np_float(f.dtype): 

141 if not is_all_finite(f): 

142 raise ValueError("f must be all finite.") 

143 elif not is_np_int(f.dtype): 

144 raise TypeError("f data must be integer- or float valued, but" 

145 f" encountered an {type(f)} of {f.dtype}.") 

146 fl = f.size 

147 if fl <= 0: 

148 raise ValueError("f data must not be empty.") 

149 if fl != tl: 

150 raise ValueError(f"Length {fl} of f data and length {tl} of " 

151 "time data must be the same.") 

152 if not isinstance(only_improvements, bool): 

153 raise type_error(only_improvements, "only_improvements", bool) 

154 if only_improvements and (fl > 1): 

155 if np.any(f[1:-1] >= f[:-2]): 

156 raise ValueError( 

157 "f data must be strictly decreasing, with " 

158 "only the entry being permitted as exception.") 

159 if f[-1] > f[-2]: 

160 raise ValueError(f"last f-value ({f[-1]}) cannot be greater" 

161 f"than second-to-last ({f[-2]}).") 

162 object.__setattr__(self, "f", f) 

163 object.__setattr__(self, "f_name", check_f_name(f_name)) 

164 

165 if (f_name != F_NAME_RAW) and (f_standard is None): 

166 raise ValueError(f"If f_name is {F_NAME_RAW}, " 

167 f"then f_standard cannot be {f_standard}.") 

168 if f_standard is not None: 

169 if isinstance(f_standard, float): 

170 if not isfinite(f_standard): 

171 raise ValueError(f"f_standard cannot be {f_standard}.") 

172 elif not isinstance(f_standard, int): 

173 raise type_error(f_standard, "f_standard", (int, float)) 

174 object.__setattr__(self, "f_standard", f_standard) 

175 

176 

177def to_csv(progress: Progress, file: str, 

178 put_header: bool = True) -> str: 

179 """ 

180 Store a :class:`Progress` record in a CSV file. 

181 

182 :param file: the file to generate 

183 :param put_header: should we put a header with meta-data? 

184 :return: the fully resolved file name 

185 """ 

186 if not isinstance(progress, Progress): 

187 raise type_error(progress, "progress", Progress) 

188 if not isinstance(put_header, bool): 

189 raise type_error(put_header, "put_header", bool) 

190 path: Final[Path] = Path(file) 

191 logger(f"Writing progress object to CSV file {path!r}.") 

192 path.ensure_parent_dir_exists() 

193 

194 with path.open_for_write() as out: 

195 sep: Final[str] = CSV_SEPARATOR 

196 write: Final[Callable[[str], int]] = out.write 

197 if put_header: 

198 kv: Final[str] = KEY_VALUE_SEPARATOR 

199 cmt: Final[str] = COMMENT_START 

200 write(f"{cmt} {KEY_ALGORITHM}{kv}{progress.algorithm}\n") 

201 write(f"{cmt} {KEY_INSTANCE}{kv}{progress.instance}\n") 

202 write(f"{cmt} {KEY_OBJECTIVE_FUNCTION}{kv}{progress.objective}\n") 

203 if progress.encoding is not None: 

204 write(f"{cmt} {KEY_ENCODING}{kv}{progress.objective}\n") 

205 write(f"{cmt} {KEY_RAND_SEED}{kv}{hex(progress.rand_seed)}\n") 

206 if progress.f_standard is not None: 

207 write(f"{cmt} {KEY_GOAL_F}{kv}{progress.f_standard}\n") 

208 write(f"{progress.time_unit}{sep}{progress.f_name}\n") 

209 for i, t in enumerate(progress.time): 

210 write(f"{t}{sep}{num_to_str(progress.f[i])}\n") 

211 

212 logger(f"Done writing progress object to CSV file {path!r}.") 

213 

214 path.enforce_file() 

215 return path 

216 

217 

218class __InnerLogParser(SetupAndStateParser[Progress]): 

219 """The internal log parser class.""" 

220 

221 def __init__(self, time_unit: str, f_name: str, 

222 f_standard: dict[str, int | float] | None, 

223 only_improvements: bool, 

224 path_filter: Callable[[Path], bool] | None) -> None: 

225 """ 

226 Create the internal log parser. 

227 

228 :param time_unit: the time unit 

229 :param f_name: the objective name 

230 :param f_standard: a dictionary mapping instances to standard values 

231 :param only_improvements: enforce that f-values should be improving 

232 and time values increasing 

233 :param path_filter: the path filter 

234 """ 

235 super().__init__(path_filter) 

236 self.__time_unit = check_time_unit(time_unit) 

237 self.__f_name = check_f_name(f_name) 

238 self.__last_fe: int | None = None 

239 self.__t_collector: Final[list[int]] = [] 

240 self.__f_collector: Final[list[int | float]] = [] 

241 if not isinstance(only_improvements, bool): 

242 raise type_error(only_improvements, "only_improvements", bool) 

243 self.__only_improvements = only_improvements 

244 if (f_standard is not None) and (not isinstance(f_standard, dict)): 

245 raise type_error(f_standard, "f_standard", dict) 

246 self.__f_standard: Final[dict[str, int | float] | None] \ 

247 = f_standard 

248 self.__state: int = 0 

249 

250 def _parse_file(self, file: Path) -> Progress | None: 

251 super()._parse_file(file) 

252 if self.__state != 2: 

253 raise ValueError( 

254 "Illegal state, log file must have a " 

255 f"{SECTION_PROGRESS!r} section.") 

256 if not self.__f_collector: 

257 raise ValueError("f-collector cannot be empty.") 

258 if not self.__t_collector: 

259 raise ValueError("time-collector cannot be empty.") 

260 self.__state = 0 

261 

262 f_standard: int | float | None = None 

263 if (self.__f_standard is not None) and \ 

264 (self.instance in self.__f_standard): 

265 f_standard = self.__f_standard[self.instance] 

266 if f_standard is None: 

267 f_standard = self.goal_f 

268 if (self.__f_name != F_NAME_RAW) and (f_standard is None): 

269 raise ValueError(f"f_standard cannot be {f_standard} if f_name " 

270 f"is {self.__f_name}.") 

271 tt = self.total_time_millis if (self.__time_unit == TIME_UNIT_MILLIS) \ 

272 else self.total_fes 

273 if tt < self.__t_collector[-1]: 

274 raise ValueError( 

275 f"Last time units {tt} inconsistent with last" 

276 f"recorded time unit {self.__t_collector[-1]}.") 

277 if self.__last_fe < self.total_fes: 

278 if tt > self.__t_collector[-1]: 

279 self.__t_collector.append(tt) 

280 self.__f_collector.append(self.__f_collector[-1]) 

281 elif self.__last_fe > self.total_fes: 

282 raise ValueError( 

283 f"Last FE {self.__last_fe} inconsistent with total number" 

284 f"{self.total_fes} of FEs.") 

285 

286 ff: np.ndarray 

287 if self.__f_name == F_NAME_RAW: 

288 ff = np.array(self.__f_collector) 

289 elif self.__f_name == F_NAME_SCALED: 

290 ff = np.array([f / f_standard for f in self.__f_collector]) 

291 else: 

292 ff = np.array([(f - f_standard) / f_standard 

293 for f in self.__f_collector]) 

294 self.__f_collector.clear() 

295 

296 return Progress(self.algorithm, 

297 self.instance, 

298 self.objective, 

299 self.encoding, 

300 self.rand_seed, 

301 np.array(self.__t_collector), 

302 self.__time_unit, 

303 ff, 

304 self.__f_name, 

305 f_standard, 

306 self.__only_improvements) 

307 

308 def _end_parse_file(self, file: Path) -> None: 

309 """Clean up.""" 

310 self.__t_collector.clear() 

311 self.__last_fe = None 

312 super()._end_parse_file(file) 

313 

314 def _start_section(self, title: str) -> bool: 

315 if title == SECTION_PROGRESS: 

316 if self.__state != 0: 

317 raise ValueError(f"Already did section {title}.") 

318 self.__state = 1 

319 return True 

320 return super()._start_section(title) 

321 

322 def _needs_more_lines(self) -> bool: 

323 return (self.__state < 2) or super()._needs_more_lines() 

324 

325 def _lines(self, lines: list[str]) -> bool: 

326 if not isinstance(lines, list): 

327 raise type_error(lines, "lines", list) 

328 if self.__state != 1: 

329 return super()._lines(lines) 

330 n_rows = len(lines) 

331 if n_rows < 2: 

332 raise ValueError("lines must contain at least two elements," 

333 f"but contains {n_rows}.") 

334 

335 columns = [c.strip() for c in lines[0].split(CSV_SEPARATOR)] 

336 n_cols = len(columns) 

337 if n_cols < 3: 

338 raise ValueError("There must be at least three columns, " 

339 f"but found {n_cols} in {lines[0]!r}.") 

340 

341 time_col_name: str = PROGRESS_TIME_MILLIS if \ 

342 self.__time_unit == TIME_UNIT_MILLIS else PROGRESS_FES 

343 time_col_idx: int = -1 

344 f_col_idx: int = -1 

345 fe_col_idx: int = -1 

346 for idx, col in enumerate(columns): # find the columns we 

347 if col == PROGRESS_FES: 

348 fe_col_idx = idx 

349 if col == time_col_name: 

350 if time_col_idx >= 0: 

351 raise ValueError(f"Time column {time_col_name} " 

352 "appears twice.") 

353 time_col_idx = idx 

354 elif col == PROGRESS_CURRENT_F: 

355 if f_col_idx >= 0: 

356 raise ValueError( 

357 f"F column {PROGRESS_CURRENT_F} " 

358 "appears twice.") 

359 f_col_idx = idx 

360 

361 def aa(splt): # noqa 

362 return splt[time_col_idx], splt[f_col_idx] 

363 

364 time: Iterable[int] 

365 f: Iterable[Any] 

366 time, f = zip(*[[c.strip() 

367 for c in aa(line.split(CSV_SEPARATOR))] 

368 for line in lines[1:]], strict=True) 

369 time = [int(t) for t in time] 

370 f = [str_to_num(v) for v in f] 

371 if self.__only_improvements: 

372 biggest_t: int = -1 

373 best_f: int | float = inf 

374 for idx, t in enumerate(time): 

375 v = f[idx] 

376 if t > biggest_t: 

377 if biggest_t >= 0: 

378 self.__t_collector.append(biggest_t) 

379 self.__f_collector.append(best_f) 

380 best_f = v 

381 biggest_t = t 

382 elif v < best_f: 

383 best_f = v 

384 if biggest_t >= 0: 

385 self.__t_collector.append(biggest_t) 

386 self.__f_collector.append(best_f) 

387 else: 

388 self.__t_collector.extend(time) 

389 self.__f_collector.extend(f) 

390 

391 self.__last_fe = int((lines[-1].split(CSV_SEPARATOR))[fe_col_idx]) 

392 if self.__last_fe <= 0: 

393 raise ValueError(f"Last FE cannot be {self.__last_fe}.") 

394 

395 self.__state = 2 

396 return self._needs_more_lines() 

397 

398 

399def from_logs(path: str, 

400 time_unit: str = TIME_UNIT_FES, 

401 f_name: str = F_NAME_RAW, 

402 f_standard: dict[str, int | float] | None = None, 

403 only_improvements: bool = True, 

404 path_filter: Callable[[Path], bool] | None = None) \ 

405 -> Generator[Progress, None, None]: 

406 """ 

407 Parse a given path and pass yield all progress data found. 

408 

409 If `path` identifies a file with suffix `.txt`, then this file is 

410 parsed. The appropriate :class:`Progress` is created. If `path` identifies 

411 a directory, then this directory is parsed recursively for each log file 

412 found, one record is returned. 

413 

414 :param path: the path to parse 

415 :param time_unit: the time unit 

416 :param f_name: the objective name 

417 :param f_standard: a dictionary mapping instances to standard values 

418 :param only_improvements: enforce that f-values should be improving and 

419 time values increasing 

420 :param path_filter: a function to filter paths 

421 """ 

422 return __InnerLogParser(time_unit, f_name, f_standard, 

423 only_improvements, path_filter).parse(path)