Coverage for moptipy / evaluation / log_parser.py: 79%
283 statements
« prev ^ index » next coverage.py v7.12.0, created at 2025-11-24 08:49 +0000
« prev ^ index » next coverage.py v7.12.0, created at 2025-11-24 08:49 +0000
1"""
2Parsers for structured log data produced by the `moptipy` experiment API.
4The `moptipy` :class:`~moptipy.api.execution.Execution` and experiment-running
5facility (:func:`~moptipy.api.experiment.run_experiment`) uses the class
6:class:`~moptipy.utils.logger.Logger` from module :mod:`~moptipy.utils.logger`
7to produce log files complying with
8https://thomasweise.github.io/moptipy/#log-files.
10Here we provide a skeleton for parsing such log files in form of the class
11:class:`~LogParser`. It works similar to SAX-XML parsing in that the data
12is read is from files and methods that consume the data are invoked. By
13overwriting these methods, we can do useful things with the data.
15For example in module :mod:`~moptipy.evaluation.end_results`, the method
16:meth:`~moptipy.evaluation.end_results.from_logs` can load
17:class:`~moptipy.evaluation.end_results.EndResult` records from the logs
18and the method :meth:`~moptipy.evaluation.progress.from_logs` in
19module :mod:`~moptipy.evaluation.progress` reads the whole
20:class:`~moptipy.evaluation.progress.Progress` that the algorithms make
21over time.
22"""
24from contextlib import suppress
25from math import inf, isfinite, isinf
26from typing import Callable, Final, TypeVar
28from pycommons.io.csv import COMMENT_START, SCOPE_SEPARATOR
29from pycommons.io.parser import Parser
30from pycommons.io.path import Path
31from pycommons.strings.string_conv import str_to_num
32from pycommons.types import check_to_int_range, type_error
34from moptipy.api.logging import (
35 ERROR_SECTION_PREFIX,
36 FILE_SUFFIX,
37 KEY_BEST_F,
38 KEY_GOAL_F,
39 KEY_LAST_IMPROVEMENT_FE,
40 KEY_LAST_IMPROVEMENT_TIME_MILLIS,
41 KEY_MAX_FES,
42 KEY_MAX_TIME_MILLIS,
43 KEY_NAME,
44 KEY_RAND_SEED,
45 KEY_TOTAL_FES,
46 KEY_TOTAL_TIME_MILLIS,
47 SCOPE_ALGORITHM,
48 SCOPE_ENCODING,
49 SCOPE_OBJECTIVE_FUNCTION,
50 SCOPE_PROCESS,
51 SECTION_FINAL_STATE,
52 SECTION_SETUP,
53)
54from moptipy.evaluation._utils import _check_max_time_millis
55from moptipy.utils.logger import (
56 SECTION_END,
57 SECTION_START,
58 parse_key_values,
59)
60from moptipy.utils.nputils import rand_seed_check
61from moptipy.utils.strings import (
62 PART_SEPARATOR,
63 sanitize_name,
64)
66#: the maximum FEs of a black-box process
67_FULL_KEY_MAX_FES: Final[str] = \
68 f"{SCOPE_PROCESS}{SCOPE_SEPARATOR}{KEY_MAX_FES}"
69#: the maximum runtime in milliseconds of a black-box process
70_FULL_KEY_MAX_TIME_MILLIS: Final[str] = \
71 f"{SCOPE_PROCESS}{SCOPE_SEPARATOR}{KEY_MAX_TIME_MILLIS}"
72#: the goal objective value of a black-box process
73_FULL_KEY_GOAL_F: Final[str] = f"{SCOPE_PROCESS}{SCOPE_SEPARATOR}{KEY_GOAL_F}"
74#: the random seed
75_FULL_KEY_RAND_SEED: Final[str] = \
76 f"{SCOPE_PROCESS}{SCOPE_SEPARATOR}{KEY_RAND_SEED}"
77#: the full algorithm name key
78_FULL_KEY_ALGORITHM: Final[str] = \
79 f"{SCOPE_ALGORITHM}{SCOPE_SEPARATOR}{KEY_NAME}"
80#: the full objective function name key
81_FULL_KEY_OBJECTIVE: Final[str] = \
82 f"{SCOPE_OBJECTIVE_FUNCTION}{SCOPE_SEPARATOR}{KEY_NAME}"
83#: the full encoding name key
84_FULL_KEY_ENCODING: Final[str] = \
85 f"{SCOPE_ENCODING}{SCOPE_SEPARATOR}{KEY_NAME}"
88def _true(_) -> bool:
89 """
90 Get `True` as return value, always.
92 :retval `True`: always
93 """
94 return True
97#: the type variable for data to be read from the directories
98T = TypeVar("T")
101class LogParser[T](Parser[T]):
102 """
103 A log parser can parse a log file and separate the sections.
105 The log parser is designed to load data from text files generated
106 by :class:`~moptipy.utils.logger.FileLogger`. It can also recursively
107 parse directories.
108 """
110 def __init__(self, path_filter: Callable[[Path], bool] | None = None):
111 """
112 Initialize the log parser.
114 :param path_filter: a filter allowing us to skip paths or files. If
115 this :class:`Callable` returns `True`, the file or directory is
116 considered for parsing. If it returns `False`, it is skipped.
117 """
118 if path_filter is None:
119 path_filter = _true
120 elif not callable(path_filter):
121 raise type_error(path_filter, "path_filter", call=True)
122 #: the current depth in terms of directories
123 self.__depth: int = 0
124 #: the path filter
125 self.__path_filter: Final[Callable[[Path], bool]] = path_filter
127 def _should_list_directory(self, directory: Path) -> tuple[bool, bool]:
128 """
129 Decide whether to enter a directory to parse all files inside.
131 :param directory: the path of the directory
132 :return: a tuple with two `True` values if all the sub-directories and
133 files inside the directory should be processed, two `False` values
134 if this directory should be skipped and parsing should continue
135 with the next sibling directory
136 """
137 should: Final[bool] = self.__path_filter(directory)
138 return should, should
140 def _should_parse_file(self, file: Path) -> bool:
141 """
142 Decide whether to start parsing a file.
144 :param file: the file path
145 :return: `True` if the file should be parsed, `False` if it should be
146 skipped (and
147 :meth:`~moptipy.evaluation.log_parser.LogParser.parse_file` should
148 return `True`).
149 """
150 return file.endswith(FILE_SUFFIX) and self.__path_filter(file)
152 # noinspection PyMethodMayBeStatic
153 def _start_section(self, title: str) -> bool:
154 """
155 Start a section.
157 If this method returns `True`, then all the lines of text of the
158 section `title` will be read and together passed to
159 :meth:`~moptipy.evaluation.log_parser.LogParser.lines`.
160 If this method returns `False`, then the section will be skipped
161 and we fast-forward to the next section, if any.
163 :param title: the section title
164 :return: `True` if the section data should be loaded and passed to
165 :meth:`lines`, `False` of the section can be skipped. In that
166 case, we will fast-forward to the next
167 :meth:`~moptipy.evaluation.log_parser.LogParser.start_section`.
168 """
169 if not title:
170 raise ValueError(f"Title cannot be empty, but is {title!r}.")
171 if title.startswith(ERROR_SECTION_PREFIX):
172 raise ValueError(f"Encountered error section {title!r}.")
173 return False
175 # noinspection PyUnusedLocal
176 # noinspection PyMethodMayBeStatic
177 def _lines(self, lines: list[str]) -> bool:
178 """
179 Consume all the lines from a section.
181 This method receives the complete text of a section, where all lines
182 are separated and put into one list `lines`. Each line is stripped
183 from whitespace and comments, empty lines are omitted.
184 If this method returns `True`, we will continue parsing the file and
185 move to the next section, if any, or directly to the end of the file
186 parsing process.
188 :param lines: the lines to consume
189 :return: `True` if further parsing is necessary and the next section
190 should be fed to
191 :meth:`~moptipy.evaluation.log_parser.LogParser.start_section`,
192 `False` if the parsing process can be terminated`
193 """
194 del lines
195 return True
197 def _parse_file(self, file: Path) -> T | None: # pylint: disable=R1711
198 """
199 Parse the contents of a file.
201 :param file: the file to parse
202 :return: the return value received from invoking `get_result`
203 """
204 lines: list[str] = []
205 buffer: list[str] = []
206 state: int = 0
207 wants_section: bool = False
208 sec_end: str = ""
209 section: str = ""
210 sect_start: Final[str] = SECTION_START
211 sect_end: Final[str] = SECTION_END
212 cmt_chr: Final[str] = COMMENT_START
214 index: int = 0
215 with (file.open_for_read() as handle):
216 while True:
218 # get the next line
219 if index >= len(buffer):
220 try:
221 buffer = handle.readlines(128)
222 except Exception as be:
223 raise ValueError(
224 f"Error when reading lines from file {file!r} "
225 f"while in section {section!r}."
226 if state == 1 else
227 "Error when reading lines from file "
228 f"{file!r}.") from be
229 if (buffer is None) or (len(buffer) <= 0):
230 break
231 index = 0
233 orig_cur = buffer[index]
234 index += 1
236 # strip next line from comments and white space
237 cur = orig_cur.strip()
238 if len(cur) <= 0:
239 continue
241 i = cur.find(cmt_chr)
242 if i >= 0:
243 cur = cur[:i].strip()
244 if len(cur) <= 0:
245 continue
247 if state in {0, 2}:
248 if not cur.startswith(sect_start):
249 raise ValueError("Line should start with "
250 f"{sect_start!r} but is "
251 f"{orig_cur!r} in file {file!r}.")
252 section = cur[len(sect_start):]
253 if len(section) <= 0:
254 raise ValueError(
255 "Section title cannot be empty in "
256 f"{file!r}, but encountered {orig_cur!r}.")
257 state = 1
258 sec_end = sect_end + section
259 wants_section = self._start_section(section)
260 elif state == 1:
261 if cur == sec_end:
262 state = 2
263 if wants_section:
264 try:
265 do_next = self._lines(lines)
266 except Exception as be:
267 raise ValueError(
268 "Error when processing section "
269 f"{section!r} in file {file!r}.") \
270 from be
271 lines.clear()
272 if not do_next:
273 break
274 elif wants_section:
275 lines.append(cur)
277 if state == 0:
278 raise ValueError(f"Log file {file!r} contains no section.")
279 if state == 1:
280 raise ValueError(f"Log file {file!r} ended before"
281 f"encountering {sec_end!r}.")
282 return None # pylint: disable=R1711
285#: the start for random seeds
286_SEED_START: Final[str] = f"{PART_SEPARATOR}0x"
289class ExperimentParser[T](LogParser[T]):
290 """A log parser following our pre-defined experiment structure."""
292 def __init__(self, path_filter: Callable[[Path], bool] | None = None):
293 """
294 Initialize the experiment parser.
296 :param path_filter: a filter allowing us to skip paths or files. If
297 this :class:`Callable` returns `True`, the file or directory is
298 considered for parsing. If it returns `False`, it is skipped.
299 """
300 super().__init__(path_filter)
302 #: The name of the algorithm to which the current log file belongs.
303 self.algorithm: str | None = None
304 #: The name of the instance to which the current log file belongs.
305 self.instance: str | None = None
306 #: The random seed of the current log file.
307 self.rand_seed: int | None = None
308 #: the file basename
309 self.__file_base_name: str | None = None
311 def _start_parse_file(self, file: Path) -> None:
312 """
313 Start parsing the file.
315 This function sets up best guesses about the instance name, the
316 algorithm name, and the random seed based on the file name.
318 :param file: the file to parse
319 """
320 super()._start_parse_file(file)
321 inst_name_suggestion: str | None = None
322 algo_name_suggestion: str | None = None
323 with (suppress(Exception)):
324 inst_dir: Final[Path] = file.up()
325 inst_name_suggestion = inst_dir.basename()
326 if sanitize_name(inst_name_suggestion) != inst_name_suggestion:
327 inst_name_suggestion = None
328 else:
329 algo_dir: Final[Path] = inst_dir.up()
330 algo_name_suggestion = algo_dir.basename()
331 if sanitize_name(algo_name_suggestion) \
332 != algo_name_suggestion:
333 algo_name_suggestion = None
335 fbn: Final[str] = file.basename()
336 self.__file_base_name = fbn
338 seed_start: int = fbn.rfind(_SEED_START)
339 seed_end: int = str.__len__(fbn) - len(FILE_SUFFIX)
340 if (seed_start > 0) and (seed_end > (seed_start + 3)):
341 try:
342 self.rand_seed = rand_seed_check(int(
343 fbn[seed_start + 3:seed_end], base=16))
344 except Exception: # noqa
345 seed_start = -1
346 if (seed_start > 0) and (inst_name_suggestion is not None):
347 if algo_name_suggestion is not None:
348 start: str = (f"{algo_name_suggestion}{PART_SEPARATOR}"
349 f"{inst_name_suggestion}{_SEED_START}")
350 if fbn.casefold().startswith(start.casefold()):
351 self.instance = inst_name_suggestion
352 self.algorithm = algo_name_suggestion
353 else:
354 start = f"{PART_SEPARATOR}{inst_name_suggestion}{_SEED_START}"
355 if start.casefold() in fbn.casefold():
356 self.instance = inst_name_suggestion
358 def _parse_file(self, file: Path) -> T | None:
359 """
360 Parse the file contents.
362 :param file: the file to parse
363 :returns: nothing
364 """
365 res: Final[T | None] = super()._parse_file(file)
367 if (self.algorithm is not None) and (self.rand_seed is not None) and (
368 self.instance is None):
369 bn: Final[str] = self.__file_base_name
370 alcf: str = f"{self.algorithm.casefold()}{PART_SEPARATOR}"
371 if bn.casefold().startswith(alcf):
372 inst_end: Final[int] = bn.rfind(_SEED_START)
373 anl: Final[int] = str.__len__(alcf)
374 if inst_end > anl:
375 inst: str = bn[anl:inst_end]
376 if sanitize_name(inst) == inst:
377 self.instance = inst
378 self.instance = "unknown"
380 return res
382 def _end_parse_file(self, file: Path) -> None:
383 """
384 Finalize parsing a file.
386 :param file: the file
387 """
388 self.rand_seed = None
389 self.algorithm = None
390 self.instance = None
391 self.__file_base_name = None
392 super()._end_parse_file(file)
395class SetupAndStateParser[T](ExperimentParser[T]):
396 """
397 A log parser which loads and processes the basic data from the logs.
399 This parser processes the `SETUP` and `STATE` sections of a log file and
400 stores the performance-related information in member variables.
401 """
403 def __init__(self, path_filter: Callable[[Path], bool] | None = None):
404 """
405 Create the basic data parser.
407 :param path_filter: a filter allowing us to skip paths or files. If
408 this :class:`Callable` returns `True`, the file or directory is
409 considered for parsing. If it returns `False`, it is skipped.
410 """
411 super().__init__(path_filter)
412 #: the total consumed runtime, in objective function evaluations
413 self.total_fes: int | None = None
414 #: the total consumed runtime in milliseconds
415 self.total_time_millis: int | None = None
416 #: the best objective function value encountered
417 self.best_f: int | float | None = None
418 #: the objective function evaluation when the last improvement
419 #: happened, in milliseconds
420 self.last_improvement_fe: int | None = None
421 #: the time step when the last improvement happened, in milliseconds
422 self.last_improvement_time_millis: int | None = None
423 #: the goal objective value, if any
424 self.goal_f: int | float | None = None
425 #: the maximum permitted number of objective function evaluations,
426 #: if any
427 self.max_fes: int | None = None
428 #: the maximum runtime limit in milliseconds, if any
429 self.max_time_millis: int | None = None
430 #: The name of the objective to which the current log file belongs.
431 self.objective: str | None = None
432 #: The name of the encoding to which the current log file belongs.
433 self.encoding: str | None = None
434 #: the internal state, an OR mask: 1=after setup section, 2=after
435 #: state section, 4=in setup section, 8=in state section
436 self.__state: int = 0
438 def _should_parse_file(self, file: Path) -> bool:
439 """
440 Begin parsing the file identified by `path`.
442 :param file: the path identifying the file
443 """
444 if not super()._should_parse_file(file):
445 return False
446 if self.__state != 0:
447 raise ValueError(f"Illegal state when trying to parse {file}.")
448 return True
450 def _parse_file(self, file: Path) -> T | None:
451 """
452 Parse the file.
454 :param file: the file
455 :returns: the parsed object
456 """
457 res: Final[T | None] = super()._parse_file(file)
458 if self.__state != 3:
459 raise ValueError(
460 "Illegal state, log file must have both a "
461 f"{SECTION_FINAL_STATE!r} and a "
462 f"{SECTION_SETUP!r} section.")
463 if self.rand_seed is None:
464 raise ValueError("rand_seed is missing.")
465 if self.algorithm is None:
466 raise ValueError("algorithm is missing.")
467 if self.instance is None:
468 raise ValueError("instance is missing.")
469 if self.objective is None:
470 raise ValueError("objective is missing.")
471 if self.total_fes is None:
472 raise ValueError("total_fes is missing.")
473 if self.total_time_millis is None:
474 raise ValueError("total_time_millis is missing.")
475 if self.best_f is None:
476 raise ValueError("best_f is missing.")
477 if self.last_improvement_fe is None:
478 raise ValueError("last_improvement_fe is missing.")
479 if self.last_improvement_time_millis is None:
480 raise ValueError("last_improvement_time_millis is missing.")
481 return res
483 def _end_parse_file(self, file: Path) -> None:
484 """
485 Finalize the state *after* parsing.
487 :param file: the file to parse
488 """
489 self.total_fes = None
490 self.total_time_millis = None
491 self.best_f = None
492 self.last_improvement_fe = None
493 self.last_improvement_time_millis = None
494 self.goal_f = None
495 self.max_fes = None
496 self.max_time_millis = None
497 self.objective = None
498 self.encoding = None
499 self.__state = 0
500 return super()._end_parse_file(file)
502 def _needs_more_lines(self) -> bool:
503 """
504 Check whether we need to process more lines.
506 You can overwrite this method if your parser parses additional log
507 sections. Your overwritten method should return `True` if more
508 sections except `STATE` and `SETUP` still need to be parsed and return
509 `super().needs_more_lines()` otherwise.
511 :returns: `True` if more data needs to be processed, `False` otherwise
512 """
513 return self.__state != 3
515 def _lines(self, lines: list[str]) -> bool:
516 """
517 Process the lines loaded from a section.
519 If you process more sections, you should override this method. Your
520 overridden method then can parse the data if you are in the right
521 section. It should end with `return super().lines(lines)`.
523 :param lines: the lines that have been loaded
524 :returns: `True` if parsing should be continued, `False` otherwise
525 """
526 if (self.__state & 4) != 0:
527 self._setup_section(parse_key_values(lines))
528 elif (self.__state & 8) != 0:
529 self._state_section(lines)
530 return self._needs_more_lines()
532 def _start_section(self, title: str) -> bool:
533 """
534 Begin a section.
536 :param title: the section title
537 :returns: `True` if the text of the section should be processed,
538 `False` otherwise
539 """
540 super()._start_section(title)
541 if title == SECTION_SETUP:
542 if (self.__state & 1) != 0:
543 raise ValueError(f"Already did section {title!r}.")
544 self.__state |= 4
545 return True
546 if title == SECTION_FINAL_STATE:
547 if (self.__state & 2) != 0:
548 raise ValueError(f"Already did section {title}.")
549 self.__state |= 8
550 return True
551 return False
553 def _setup_section(self, data: dict[str, str]) -> None:
554 """
555 Parse the data from the `setup` section.
557 :param data: the parsed data
558 """
559 self.goal_f = None
560 if _FULL_KEY_GOAL_F in data:
561 goal_f = data[_FULL_KEY_GOAL_F]
562 g: Final[int | float] = str_to_num(goal_f)
563 if isfinite(g):
564 self.goal_f = g
565 elif not (isinf(g) and (g >= inf)):
566 raise ValueError(
567 f"invalid goal f {goal_f}, which renders to {g}")
569 if _FULL_KEY_MAX_FES in data:
570 self.max_fes = check_to_int_range(
571 data[_FULL_KEY_MAX_FES], _FULL_KEY_MAX_FES, 1,
572 1_000_000_000_000_000)
573 if _FULL_KEY_MAX_TIME_MILLIS in data:
574 self.max_time_millis = check_to_int_range(
575 data[_FULL_KEY_MAX_TIME_MILLIS], _FULL_KEY_MAX_TIME_MILLIS, 1,
576 1_000_000_000_000)
577 if _FULL_KEY_ALGORITHM in data:
578 a = data[_FULL_KEY_ALGORITHM]
579 if self.algorithm is None:
580 self.algorithm = a
581 elif a != self.algorithm:
582 # this error may occur under windows due to case-insensitive
583 # file names
584 if a.casefold() == self.algorithm.casefold():
585 self.algorithm = a # rely on name from log file
586 else: # ok, case was not the issue - raise error
587 raise ValueError(
588 f"algorithm name from file name is {self.algorithm!r}"
589 f", but key {_FULL_KEY_ALGORITHM!r} gives {a!r}.")
590 else:
591 raise ValueError(f"key {_FULL_KEY_ALGORITHM!r} missing in file!")
593 if _FULL_KEY_OBJECTIVE in data:
594 self.objective = data[_FULL_KEY_OBJECTIVE]
595 else:
596 raise ValueError(f"key {_FULL_KEY_OBJECTIVE!r} missing in file!")
598 self.encoding = data.get(_FULL_KEY_ENCODING)
600 seed_check = rand_seed_check(int(data[_FULL_KEY_RAND_SEED]))
601 if self.rand_seed is None:
602 self.rand_seed = seed_check
603 elif seed_check != self.rand_seed:
604 raise ValueError(
605 f"Found seed {seed_check} in log file, but file name "
606 f"indicates seed {self.rand_seed}.")
608 self.__state = (self.__state | 1) & (~4)
610 def _state_section(self, lines: list[str]) -> None:
611 """
612 Process the data of the final state section.
614 :param lines: the lines of that section
615 """
616 data: Final[dict[str, str]] = parse_key_values(lines)
618 self.total_fes = check_to_int_range(
619 data[KEY_TOTAL_FES], KEY_TOTAL_FES, 1,
620 1_000_000_000_000_000 if self.max_fes is None else self.max_fes)
621 self.total_time_millis = check_to_int_range(
622 data[KEY_TOTAL_TIME_MILLIS], KEY_TOTAL_TIME_MILLIS, 0,
623 1_000_000_000_000 if self.max_time_millis is None else
624 ((1_000_000 + self.max_time_millis) * 1_000))
625 if self.max_time_millis is not None:
626 _check_max_time_millis(self.max_time_millis, self.total_fes,
627 self.total_time_millis)
628 self.best_f = str_to_num(data[KEY_BEST_F])
629 if not isfinite(self.best_f):
630 raise ValueError(f"infinite best f detected: {self.best_f}")
631 self.last_improvement_fe = check_to_int_range(
632 data[KEY_LAST_IMPROVEMENT_FE], KEY_LAST_IMPROVEMENT_FE, 1,
633 self.total_fes)
634 self.last_improvement_time_millis = check_to_int_range(
635 data[KEY_LAST_IMPROVEMENT_TIME_MILLIS],
636 KEY_LAST_IMPROVEMENT_TIME_MILLIS, 0, self.total_time_millis)
637 self.__state = (self.__state | 2) & (~8)