Coverage for moptipy / evaluation / progress.py: 68%
222 statements
« prev ^ index » next coverage.py v7.12.0, created at 2025-11-24 08:49 +0000
« prev ^ index » next coverage.py v7.12.0, created at 2025-11-24 08:49 +0000
1"""
2Objects embodying the progress of a run over time.
4An instance of :class:`Progress` holds one :attr:`~Progress.time` vector and
5an objective value (:attr:`~Progress.f`) vector. The time dimension (stored in
6:attr:`~Progress.time_unit`) can either be in FEs or in milliseconds and the
7objective value dimension (stored in :attr:`~Progress.f_name`) can be raw
8objective values, standardized objective values, or normalized objective
9values.
10The two vectors together thus describe how a run of an optimization algorithm
11improves the objective value over time.
12"""
13from dataclasses import dataclass
14from math import inf, isfinite
15from typing import Any, Callable, Final, Generator, Iterable
17import numpy as np
18from pycommons.io.console import logger
19from pycommons.io.csv import COMMENT_START, CSV_SEPARATOR
20from pycommons.io.path import Path
21from pycommons.strings.string_conv import num_to_str, str_to_num
22from pycommons.types import type_error
24from moptipy.api.logging import (
25 KEY_ALGORITHM,
26 KEY_GOAL_F,
27 KEY_INSTANCE,
28 KEY_RAND_SEED,
29 PROGRESS_CURRENT_F,
30 PROGRESS_FES,
31 PROGRESS_TIME_MILLIS,
32 SECTION_PROGRESS,
33)
34from moptipy.evaluation.base import (
35 F_NAME_RAW,
36 F_NAME_SCALED,
37 KEY_ENCODING,
38 KEY_OBJECTIVE_FUNCTION,
39 TIME_UNIT_FES,
40 TIME_UNIT_MILLIS,
41 PerRunData,
42 check_f_name,
43 check_time_unit,
44)
45from moptipy.evaluation.log_parser import SetupAndStateParser
46from moptipy.utils.logger import (
47 KEY_VALUE_SEPARATOR,
48)
49from moptipy.utils.nputils import is_all_finite, is_np_float, is_np_int
52@dataclass(frozen=True, init=False, order=False, eq=False)
53class Progress(PerRunData):
54 """An immutable record of progress information over a single run."""
56 #: The time axis data.
57 time: np.ndarray
59 #: The unit of the time axis.
60 time_unit: str
62 #: The objective value data.
63 f: np.ndarray
65 #: the name of the objective value axis.
66 f_name: str
68 #: the standard value of the objective dimension.
69 #: If :attr:`f_name` is `F_NAME_SCALED` or `F_NAME_NORMALIZED`.
70 #: then this value has been used to normalize the data.
71 f_standard: int | float | None
73 def __init__(self,
74 algorithm: str,
75 instance: str,
76 objective: str,
77 encoding: str | None,
78 rand_seed: int,
79 time: np.ndarray,
80 time_unit: str,
81 f: np.ndarray,
82 f_name: str,
83 f_standard: int | float | None = None,
84 only_improvements: bool = True):
85 """
86 Create a consistent instance of :class:`EndResult`.
88 :param algorithm: the algorithm name
89 :param instance: the instance name
90 :param objective: the name of the objective function
91 :param encoding: the name of the encoding that was used, if any, or
92 `None` if no encoding was used
93 :param rand_seed: the random seed
94 :param time: the time axis data
95 :param time_unit: the unit of the time axis
96 :param f: the objective value axis data
97 :param f_name: the name of the objective value axis data
98 :param f_standard: the value used to standardize of the objective
99 value dimension
100 :param only_improvements: enforce that f-values should be
101 improving and time values increasing
102 """
103 super().__init__(algorithm, instance, objective, encoding, rand_seed)
105 if not isinstance(time, np.ndarray):
106 raise type_error(time, "time data", np.ndarray)
107 time.flags.writeable = False
108 if len(time.shape) != 1:
109 raise ValueError("time array must be one-dimensional, but "
110 f"has shape {time.shape}.")
111 if not is_np_int(time.dtype):
112 raise TypeError("time data must be integer-valued, "
113 f"but has type {time.dtype}.")
114 tl = time.size
115 if tl <= 0:
116 raise ValueError("time data must not be empty.")
117 if tl > 1:
118 if only_improvements:
119 if np.any(time[1:] <= time[:-1]):
120 raise ValueError("time data must be strictly increasing,"
121 f"but encountered {time}.")
122 elif np.any(time[1:] < time[:-1]):
123 raise ValueError("time data must be monotonously"
124 f"increasing, but encountered {time}.")
126 object.__setattr__(self, "time", time)
127 object.__setattr__(self, "time_unit", check_time_unit(time_unit))
129 mintime = 1 if time_unit == TIME_UNIT_FES else 0
130 if np.any(time < mintime):
131 raise ValueError(f"No time value can be less than {mintime} if"
132 f" time unit is {time_unit}.")
134 if not isinstance(f, np.ndarray):
135 raise type_error(f, "f data", np.ndarray)
136 f.flags.writeable = False
137 if len(f.shape) != 1:
138 raise ValueError(
139 f"f array must be one-dimensional, but has shape {f.shape}.")
140 if is_np_float(f.dtype):
141 if not is_all_finite(f):
142 raise ValueError("f must be all finite.")
143 elif not is_np_int(f.dtype):
144 raise TypeError("f data must be integer- or float valued, but"
145 f" encountered an {type(f)} of {f.dtype}.")
146 fl = f.size
147 if fl <= 0:
148 raise ValueError("f data must not be empty.")
149 if fl != tl:
150 raise ValueError(f"Length {fl} of f data and length {tl} of "
151 "time data must be the same.")
152 if not isinstance(only_improvements, bool):
153 raise type_error(only_improvements, "only_improvements", bool)
154 if only_improvements and (fl > 1):
155 if np.any(f[1:-1] >= f[:-2]):
156 raise ValueError(
157 "f data must be strictly decreasing, with "
158 "only the entry being permitted as exception.")
159 if f[-1] > f[-2]:
160 raise ValueError(f"last f-value ({f[-1]}) cannot be greater"
161 f"than second-to-last ({f[-2]}).")
162 object.__setattr__(self, "f", f)
163 object.__setattr__(self, "f_name", check_f_name(f_name))
165 if (f_name != F_NAME_RAW) and (f_standard is None):
166 raise ValueError(f"If f_name is {F_NAME_RAW}, "
167 f"then f_standard cannot be {f_standard}.")
168 if f_standard is not None:
169 if isinstance(f_standard, float):
170 if not isfinite(f_standard):
171 raise ValueError(f"f_standard cannot be {f_standard}.")
172 elif not isinstance(f_standard, int):
173 raise type_error(f_standard, "f_standard", (int, float))
174 object.__setattr__(self, "f_standard", f_standard)
177def to_csv(progress: Progress, file: str,
178 put_header: bool = True) -> str:
179 """
180 Store a :class:`Progress` record in a CSV file.
182 :param file: the file to generate
183 :param put_header: should we put a header with meta-data?
184 :return: the fully resolved file name
185 """
186 if not isinstance(progress, Progress):
187 raise type_error(progress, "progress", Progress)
188 if not isinstance(put_header, bool):
189 raise type_error(put_header, "put_header", bool)
190 path: Final[Path] = Path(file)
191 logger(f"Writing progress object to CSV file {path!r}.")
192 path.ensure_parent_dir_exists()
194 with path.open_for_write() as out:
195 sep: Final[str] = CSV_SEPARATOR
196 write: Final[Callable[[str], int]] = out.write
197 if put_header:
198 kv: Final[str] = KEY_VALUE_SEPARATOR
199 cmt: Final[str] = COMMENT_START
200 write(f"{cmt} {KEY_ALGORITHM}{kv}{progress.algorithm}\n")
201 write(f"{cmt} {KEY_INSTANCE}{kv}{progress.instance}\n")
202 write(f"{cmt} {KEY_OBJECTIVE_FUNCTION}{kv}{progress.objective}\n")
203 if progress.encoding is not None:
204 write(f"{cmt} {KEY_ENCODING}{kv}{progress.objective}\n")
205 write(f"{cmt} {KEY_RAND_SEED}{kv}{hex(progress.rand_seed)}\n")
206 if progress.f_standard is not None:
207 write(f"{cmt} {KEY_GOAL_F}{kv}{progress.f_standard}\n")
208 write(f"{progress.time_unit}{sep}{progress.f_name}\n")
209 for i, t in enumerate(progress.time):
210 write(f"{t}{sep}{num_to_str(progress.f[i])}\n")
212 logger(f"Done writing progress object to CSV file {path!r}.")
214 path.enforce_file()
215 return path
218class __InnerLogParser(SetupAndStateParser[Progress]):
219 """The internal log parser class."""
221 def __init__(self, time_unit: str, f_name: str,
222 f_standard: dict[str, int | float] | None,
223 only_improvements: bool,
224 path_filter: Callable[[Path], bool] | None) -> None:
225 """
226 Create the internal log parser.
228 :param time_unit: the time unit
229 :param f_name: the objective name
230 :param f_standard: a dictionary mapping instances to standard values
231 :param only_improvements: enforce that f-values should be improving
232 and time values increasing
233 :param path_filter: the path filter
234 """
235 super().__init__(path_filter)
236 self.__time_unit = check_time_unit(time_unit)
237 self.__f_name = check_f_name(f_name)
238 self.__last_fe: int | None = None
239 self.__t_collector: Final[list[int]] = []
240 self.__f_collector: Final[list[int | float]] = []
241 if not isinstance(only_improvements, bool):
242 raise type_error(only_improvements, "only_improvements", bool)
243 self.__only_improvements = only_improvements
244 if (f_standard is not None) and (not isinstance(f_standard, dict)):
245 raise type_error(f_standard, "f_standard", dict)
246 self.__f_standard: Final[dict[str, int | float] | None] \
247 = f_standard
248 self.__state: int = 0
250 def _parse_file(self, file: Path) -> Progress | None:
251 super()._parse_file(file)
252 if self.__state != 2:
253 raise ValueError(
254 "Illegal state, log file must have a "
255 f"{SECTION_PROGRESS!r} section.")
256 if not self.__f_collector:
257 raise ValueError("f-collector cannot be empty.")
258 if not self.__t_collector:
259 raise ValueError("time-collector cannot be empty.")
260 self.__state = 0
262 f_standard: int | float | None = None
263 if (self.__f_standard is not None) and \
264 (self.instance in self.__f_standard):
265 f_standard = self.__f_standard[self.instance]
266 if f_standard is None:
267 f_standard = self.goal_f
268 if (self.__f_name != F_NAME_RAW) and (f_standard is None):
269 raise ValueError(f"f_standard cannot be {f_standard} if f_name "
270 f"is {self.__f_name}.")
271 tt = self.total_time_millis if (self.__time_unit == TIME_UNIT_MILLIS) \
272 else self.total_fes
273 if tt < self.__t_collector[-1]:
274 raise ValueError(
275 f"Last time units {tt} inconsistent with last"
276 f"recorded time unit {self.__t_collector[-1]}.")
277 if self.__last_fe < self.total_fes:
278 if tt > self.__t_collector[-1]:
279 self.__t_collector.append(tt)
280 self.__f_collector.append(self.__f_collector[-1])
281 elif self.__last_fe > self.total_fes:
282 raise ValueError(
283 f"Last FE {self.__last_fe} inconsistent with total number"
284 f"{self.total_fes} of FEs.")
286 ff: np.ndarray
287 if self.__f_name == F_NAME_RAW:
288 ff = np.array(self.__f_collector)
289 elif self.__f_name == F_NAME_SCALED:
290 ff = np.array([f / f_standard for f in self.__f_collector])
291 else:
292 ff = np.array([(f - f_standard) / f_standard
293 for f in self.__f_collector])
294 self.__f_collector.clear()
296 return Progress(self.algorithm,
297 self.instance,
298 self.objective,
299 self.encoding,
300 self.rand_seed,
301 np.array(self.__t_collector),
302 self.__time_unit,
303 ff,
304 self.__f_name,
305 f_standard,
306 self.__only_improvements)
308 def _end_parse_file(self, file: Path) -> None:
309 """Clean up."""
310 self.__t_collector.clear()
311 self.__last_fe = None
312 super()._end_parse_file(file)
314 def _start_section(self, title: str) -> bool:
315 if title == SECTION_PROGRESS:
316 if self.__state != 0:
317 raise ValueError(f"Already did section {title}.")
318 self.__state = 1
319 return True
320 return super()._start_section(title)
322 def _needs_more_lines(self) -> bool:
323 return (self.__state < 2) or super()._needs_more_lines()
325 def _lines(self, lines: list[str]) -> bool:
326 if not isinstance(lines, list):
327 raise type_error(lines, "lines", list)
328 if self.__state != 1:
329 return super()._lines(lines)
330 n_rows = len(lines)
331 if n_rows < 2:
332 raise ValueError("lines must contain at least two elements,"
333 f"but contains {n_rows}.")
335 columns = [c.strip() for c in lines[0].split(CSV_SEPARATOR)]
336 n_cols = len(columns)
337 if n_cols < 3:
338 raise ValueError("There must be at least three columns, "
339 f"but found {n_cols} in {lines[0]!r}.")
341 time_col_name: str = PROGRESS_TIME_MILLIS if \
342 self.__time_unit == TIME_UNIT_MILLIS else PROGRESS_FES
343 time_col_idx: int = -1
344 f_col_idx: int = -1
345 fe_col_idx: int = -1
346 for idx, col in enumerate(columns): # find the columns we
347 if col == PROGRESS_FES:
348 fe_col_idx = idx
349 if col == time_col_name:
350 if time_col_idx >= 0:
351 raise ValueError(f"Time column {time_col_name} "
352 "appears twice.")
353 time_col_idx = idx
354 elif col == PROGRESS_CURRENT_F:
355 if f_col_idx >= 0:
356 raise ValueError(
357 f"F column {PROGRESS_CURRENT_F} "
358 "appears twice.")
359 f_col_idx = idx
361 def aa(splt): # noqa
362 return splt[time_col_idx], splt[f_col_idx]
364 time: Iterable[int]
365 f: Iterable[Any]
366 time, f = zip(*[[c.strip()
367 for c in aa(line.split(CSV_SEPARATOR))]
368 for line in lines[1:]], strict=True)
369 time = [int(t) for t in time]
370 f = [str_to_num(v) for v in f]
371 if self.__only_improvements:
372 biggest_t: int = -1
373 best_f: int | float = inf
374 for idx, t in enumerate(time):
375 v = f[idx]
376 if t > biggest_t:
377 if biggest_t >= 0:
378 self.__t_collector.append(biggest_t)
379 self.__f_collector.append(best_f)
380 best_f = v
381 biggest_t = t
382 elif v < best_f:
383 best_f = v
384 if biggest_t >= 0:
385 self.__t_collector.append(biggest_t)
386 self.__f_collector.append(best_f)
387 else:
388 self.__t_collector.extend(time)
389 self.__f_collector.extend(f)
391 self.__last_fe = int((lines[-1].split(CSV_SEPARATOR))[fe_col_idx])
392 if self.__last_fe <= 0:
393 raise ValueError(f"Last FE cannot be {self.__last_fe}.")
395 self.__state = 2
396 return self._needs_more_lines()
399def from_logs(path: str,
400 time_unit: str = TIME_UNIT_FES,
401 f_name: str = F_NAME_RAW,
402 f_standard: dict[str, int | float] | None = None,
403 only_improvements: bool = True,
404 path_filter: Callable[[Path], bool] | None = None) \
405 -> Generator[Progress, None, None]:
406 """
407 Parse a given path and pass yield all progress data found.
409 If `path` identifies a file with suffix `.txt`, then this file is
410 parsed. The appropriate :class:`Progress` is created. If `path` identifies
411 a directory, then this directory is parsed recursively for each log file
412 found, one record is returned.
414 :param path: the path to parse
415 :param time_unit: the time unit
416 :param f_name: the objective name
417 :param f_standard: a dictionary mapping instances to standard values
418 :param only_improvements: enforce that f-values should be improving and
419 time values increasing
420 :param path_filter: a function to filter paths
421 """
422 return __InnerLogParser(time_unit, f_name, f_standard,
423 only_improvements, path_filter).parse(path)