Coverage for moptipy/evaluation/log

1"""

2Parsers for structured log data produced by the `moptipy` experiment API.

4The `moptipy` :class:`~moptipy.api.execution.Execution` and experiment-running

5facility (:func:`~moptipy.api.experiment.run_experiment`) uses the class

6:class:`~moptipy.utils.logger.Logger` from module :mod:`~moptipy.utils.logger`

7to produce log files complying with

8https://thomasweise.github.io/moptipy/#log-files.

10Here we provide a skeleton for parsing such log files in form of the class

11:class:`~LogParser`. It works similar to SAX-XML parsing in that the data

12is read is from files and methods that consume the data are invoked. By

13overwriting these methods, we can do useful things with the data.

15For example in module :mod:`~moptipy.evaluation.end_results`, the method

16:meth:`~moptipy.evaluation.end_results.from_logs` can load

17:class:`~moptipy.evaluation.end_results.EndResult` records from the logs

18and the method :meth:`~moptipy.evaluation.progress.from_logs` in

19module :mod:`~moptipy.evaluation.progress` reads the whole

20:class:`~moptipy.evaluation.progress.Progress` that the algorithms make

21over time.

22"""

24from contextlib import suppress

25from math import inf, isfinite, isinf

26from typing import Callable, Final, TypeVar

28from pycommons.io.csv import COMMENT_START, SCOPE_SEPARATOR

29from pycommons.io.parser import Parser

30from pycommons.io.path import Path

31from pycommons.strings.string_conv import str_to_num

32from pycommons.types import check_to_int_range, type_error

34from moptipy.api.logging import (

35 ERROR_SECTION_PREFIX,

36 FILE_SUFFIX,

37 KEY_BEST_F,

38 KEY_GOAL_F,

39 KEY_LAST_IMPROVEMENT_FE,

40 KEY_LAST_IMPROVEMENT_TIME_MILLIS,

41 KEY_MAX_FES,

42 KEY_MAX_TIME_MILLIS,

43 KEY_NAME,

44 KEY_RAND_SEED,

45 KEY_TOTAL_FES,

46 KEY_TOTAL_TIME_MILLIS,

47 SCOPE_ALGORITHM,

48 SCOPE_ENCODING,

49 SCOPE_OBJECTIVE_FUNCTION,

50 SCOPE_PROCESS,

51 SECTION_FINAL_STATE,

52 SECTION_SETUP,

53)

54from moptipy.evaluation._utils import _check_max_time_millis

55from moptipy.utils.logger import (

56 SECTION_END,

57 SECTION_START,

58 parse_key_values,

59)

60from moptipy.utils.nputils import rand_seed_check

61from moptipy.utils.strings import (

62 PART_SEPARATOR,

63 sanitize_name,

64)

66#: the maximum FEs of a black-box process

67_FULL_KEY_MAX_FES: Final[str] = \

68 f"{SCOPE_PROCESS}{SCOPE_SEPARATOR}{KEY_MAX_FES}"

69#: the maximum runtime in milliseconds of a black-box process

70_FULL_KEY_MAX_TIME_MILLIS: Final[str] = \

71 f"{SCOPE_PROCESS}{SCOPE_SEPARATOR}{KEY_MAX_TIME_MILLIS}"

72#: the goal objective value of a black-box process

73_FULL_KEY_GOAL_F: Final[str] = f"{SCOPE_PROCESS}{SCOPE_SEPARATOR}{KEY_GOAL_F}"

74#: the random seed

75_FULL_KEY_RAND_SEED: Final[str] = \

76 f"{SCOPE_PROCESS}{SCOPE_SEPARATOR}{KEY_RAND_SEED}"

77#: the full algorithm name key

78_FULL_KEY_ALGORITHM: Final[str] = \

79 f"{SCOPE_ALGORITHM}{SCOPE_SEPARATOR}{KEY_NAME}"

80#: the full objective function name key

81_FULL_KEY_OBJECTIVE: Final[str] = \

82 f"{SCOPE_OBJECTIVE_FUNCTION}{SCOPE_SEPARATOR}{KEY_NAME}"

83#: the full encoding name key

84_FULL_KEY_ENCODING: Final[str] = \

85 f"{SCOPE_ENCODING}{SCOPE_SEPARATOR}{KEY_NAME}"

88def _true(_) -> bool:

89 """

90 Get `True` as return value, always.

92 :retval `True`: always

93 """

94 return True

97#: the type variable for data to be read from the directories

98T = TypeVar("T")

100

101class LogParser[T](Parser[T]):

102 """

103 A log parser can parse a log file and separate the sections.

104

105 The log parser is designed to load data from text files generated

106 by :class:`~moptipy.utils.logger.FileLogger`. It can also recursively

107 parse directories.

108 """

109

110 def __init__(self, path_filter: Callable[[Path], bool] | None = None):

111 """

112 Initialize the log parser.

113

114 :param path_filter: a filter allowing us to skip paths or files. If

115 this :class:`Callable` returns `True`, the file or directory is

116 considered for parsing. If it returns `False`, it is skipped.

117 """

118 if path_filter is None:

119 path_filter = _true

120 elif not callable(path_filter):

121 raise type_error(path_filter, "path_filter", call=True)

122 #: the current depth in terms of directories

123 self.__depth: int = 0

124 #: the path filter

125 self.__path_filter: Final[Callable[[Path], bool]] = path_filter

126

127 def _should_list_directory(self, directory: Path) -> tuple[bool, bool]:

128 """

129 Decide whether to enter a directory to parse all files inside.

130

131 :param directory: the path of the directory

132 :return: a tuple with two `True` values if all the sub-directories and

133 files inside the directory should be processed, two `False` values

134 if this directory should be skipped and parsing should continue

135 with the next sibling directory

136 """

137 should: Final[bool] = self.__path_filter(directory)

138 return should, should

139

140 def _should_parse_file(self, file: Path) -> bool:

141 """

142 Decide whether to start parsing a file.

143

144 :param file: the file path

145 :return: `True` if the file should be parsed, `False` if it should be

146 skipped (and

147 :meth:`~moptipy.evaluation.log_parser.LogParser.parse_file` should

148 return `True`).

149 """

150 return file.endswith(FILE_SUFFIX) and self.__path_filter(file)

151

152 # noinspection PyMethodMayBeStatic

153 def _start_section(self, title: str) -> bool:

154 """

155 Start a section.

156

157 If this method returns `True`, then all the lines of text of the

158 section `title` will be read and together passed to

159 :meth:`~moptipy.evaluation.log_parser.LogParser.lines`.

160 If this method returns `False`, then the section will be skipped

161 and we fast-forward to the next section, if any.

162

163 :param title: the section title

164 :return: `True` if the section data should be loaded and passed to

165 :meth:`lines`, `False` of the section can be skipped. In that

166 case, we will fast-forward to the next

167 :meth:`~moptipy.evaluation.log_parser.LogParser.start_section`.

168 """

169 if not title:

170 raise ValueError(f"Title cannot be empty, but is {title!r}.")

171 if title.startswith(ERROR_SECTION_PREFIX):

172 raise ValueError(f"Encountered error section {title!r}.")

173 return False

174

175 # noinspection PyUnusedLocal

176 # noinspection PyMethodMayBeStatic

177 def _lines(self, lines: list[str]) -> bool:

178 """

179 Consume all the lines from a section.

180

181 This method receives the complete text of a section, where all lines

182 are separated and put into one list `lines`. Each line is stripped

183 from whitespace and comments, empty lines are omitted.

184 If this method returns `True`, we will continue parsing the file and

185 move to the next section, if any, or directly to the end of the file

186 parsing process.

187

188 :param lines: the lines to consume

189 :return: `True` if further parsing is necessary and the next section

190 should be fed to

191 :meth:`~moptipy.evaluation.log_parser.LogParser.start_section`,

192 `False` if the parsing process can be terminated`

193 """

194 del lines

195 return True

196

197 def _parse_file(self, file: Path) -> T | None: # pylint: disable=R1711

198 """

199 Parse the contents of a file.

200

201 :param file: the file to parse

202 :return: the return value received from invoking `get_result`

203 """

204 lines: list[str] = []

205 buffer: list[str] = []

206 state: int = 0

207 wants_section: bool = False

208 sec_end: str = ""

209 section: str = ""

210 sect_start: Final[str] = SECTION_START

211 sect_end: Final[str] = SECTION_END

212 cmt_chr: Final[str] = COMMENT_START

213

214 index: int = 0

215 with (file.open_for_read() as handle):

216 while True:

217

218 # get the next line

219 if index >= len(buffer):

220 try:

221 buffer = handle.readlines(128)

222 except Exception as be:

223 raise ValueError(

224 f"Error when reading lines from file {file!r} "

225 f"while in section {section!r}."

226 if state == 1 else

227 "Error when reading lines from file "

228 f"{file!r}.") from be

229 if (buffer is None) or (len(buffer) <= 0):

230 break

231 index = 0

232

233 orig_cur = buffer[index]

234 index += 1

235

236 # strip next line from comments and white space

237 cur = orig_cur.strip()

238 if len(cur) <= 0:

239 continue

240

241 i = cur.find(cmt_chr)

242 if i >= 0:

243 cur = cur[:i].strip()

244 if len(cur) <= 0:

245 continue

246

247 if state in {0, 2}:

248 if not cur.startswith(sect_start):

249 raise ValueError("Line should start with "

250 f"{sect_start!r} but is "

251 f"{orig_cur!r} in file {file!r}.")

252 section = cur[len(sect_start):]

253 if len(section) <= 0:

254 raise ValueError(

255 "Section title cannot be empty in "

256 f"{file!r}, but encountered {orig_cur!r}.")

257 state = 1

258 sec_end = sect_end + section

259 wants_section = self._start_section(section)

260 elif state == 1:

261 if cur == sec_end:

262 state = 2

263 if wants_section:

264 try:

265 do_next = self._lines(lines)

266 except Exception as be:

267 raise ValueError(

268 "Error when processing section "

269 f"{section!r} in file {file!r}.") \

270 from be

271 lines.clear()

272 if not do_next:

273 break

274 elif wants_section:

275 lines.append(cur)

276

277 if state == 0:

278 raise ValueError(f"Log file {file!r} contains no section.")

279 if state == 1:

280 raise ValueError(f"Log file {file!r} ended before"

281 f"encountering {sec_end!r}.")

282 return None # pylint: disable=R1711

283

284

285#: the start for random seeds

286_SEED_START: Final[str] = f"{PART_SEPARATOR}0x"

287

288

289class ExperimentParser[T](LogParser[T]):

290 """A log parser following our pre-defined experiment structure."""

291

292 def __init__(self, path_filter: Callable[[Path], bool] | None = None):

293 """

294 Initialize the experiment parser.

295

296 :param path_filter: a filter allowing us to skip paths or files. If

297 this :class:`Callable` returns `True`, the file or directory is

298 considered for parsing. If it returns `False`, it is skipped.

299 """

300 super().__init__(path_filter)

301

302 #: The name of the algorithm to which the current log file belongs.

303 self.algorithm: str | None = None

304 #: The name of the instance to which the current log file belongs.

305 self.instance: str | None = None

306 #: The random seed of the current log file.

307 self.rand_seed: int | None = None

308 #: the file basename

309 self.__file_base_name: str | None = None

310

311 def _start_parse_file(self, file: Path) -> None:

312 """

313 Start parsing the file.

314

315 This function sets up best guesses about the instance name, the

316 algorithm name, and the random seed based on the file name.

317

318 :param file: the file to parse

319 """

320 super()._start_parse_file(file)

321 inst_name_suggestion: str | None = None

322 algo_name_suggestion: str | None = None

323 with (suppress(Exception)):

324 inst_dir: Final[Path] = file.up()

325 inst_name_suggestion = inst_dir.basename()

326 if sanitize_name(inst_name_suggestion) != inst_name_suggestion:

327 inst_name_suggestion = None

328 else:

329 algo_dir: Final[Path] = inst_dir.up()

330 algo_name_suggestion = algo_dir.basename()

331 if sanitize_name(algo_name_suggestion) \

332 != algo_name_suggestion:

333 algo_name_suggestion = None

334

335 fbn: Final[str] = file.basename()

336 self.__file_base_name = fbn

337

338 seed_start: int = fbn.rfind(_SEED_START)

339 seed_end: int = str.__len__(fbn) - len(FILE_SUFFIX)

340 if (seed_start > 0) and (seed_end > (seed_start + 3)):

341 try:

342 self.rand_seed = rand_seed_check(int(

343 fbn[seed_start + 3:seed_end], base=16))

344 except Exception: # noqa

345 seed_start = -1

346 if (seed_start > 0) and (inst_name_suggestion is not None):

347 if algo_name_suggestion is not None:

348 start: str = (f"{algo_name_suggestion}{PART_SEPARATOR}"

349 f"{inst_name_suggestion}{_SEED_START}")

350 if fbn.casefold().startswith(start.casefold()):

351 self.instance = inst_name_suggestion

352 self.algorithm = algo_name_suggestion

353 else:

354 start = f"{PART_SEPARATOR}{inst_name_suggestion}{_SEED_START}"

355 if start.casefold() in fbn.casefold():

356 self.instance = inst_name_suggestion

357

358 def _parse_file(self, file: Path) -> T | None:

359 """

360 Parse the file contents.

361

362 :param file: the file to parse

363 :returns: nothing

364 """

365 res: Final[T | None] = super()._parse_file(file)

366

367 if (self.algorithm is not None) and (self.rand_seed is not None) and (

368 self.instance is None):

369 bn: Final[str] = self.__file_base_name

370 alcf: str = f"{self.algorithm.casefold()}{PART_SEPARATOR}"

371 if bn.casefold().startswith(alcf):

372 inst_end: Final[int] = bn.rfind(_SEED_START)

373 anl: Final[int] = str.__len__(alcf)

374 if inst_end > anl:

375 inst: str = bn[anl:inst_end]

376 if sanitize_name(inst) == inst:

377 self.instance = inst

378 self.instance = "unknown"

379

380 return res

381

382 def _end_parse_file(self, file: Path) -> None:

383 """

384 Finalize parsing a file.

385

386 :param file: the file

387 """

388 self.rand_seed = None

389 self.algorithm = None

390 self.instance = None

391 self.__file_base_name = None

392 super()._end_parse_file(file)

393

394

395class SetupAndStateParser[T](ExperimentParser[T]):

396 """

397 A log parser which loads and processes the basic data from the logs.

398

399 This parser processes the `SETUP` and `STATE` sections of a log file and

400 stores the performance-related information in member variables.

401 """

402

403 def __init__(self, path_filter: Callable[[Path], bool] | None = None):

404 """

405 Create the basic data parser.

406

407 :param path_filter: a filter allowing us to skip paths or files. If

408 this :class:`Callable` returns `True`, the file or directory is

409 considered for parsing. If it returns `False`, it is skipped.

410 """

411 super().__init__(path_filter)

412 #: the total consumed runtime, in objective function evaluations

413 self.total_fes: int | None = None

414 #: the total consumed runtime in milliseconds

415 self.total_time_millis: int | None = None

416 #: the best objective function value encountered

417 self.best_f: int | float | None = None

418 #: the objective function evaluation when the last improvement

419 #: happened, in milliseconds

420 self.last_improvement_fe: int | None = None

421 #: the time step when the last improvement happened, in milliseconds

422 self.last_improvement_time_millis: int | None = None

423 #: the goal objective value, if any

424 self.goal_f: int | float | None = None

425 #: the maximum permitted number of objective function evaluations,

426 #: if any

427 self.max_fes: int | None = None

428 #: the maximum runtime limit in milliseconds, if any

429 self.max_time_millis: int | None = None

430 #: The name of the objective to which the current log file belongs.

431 self.objective: str | None = None

432 #: The name of the encoding to which the current log file belongs.

433 self.encoding: str | None = None

434 #: the internal state, an OR mask: 1=after setup section, 2=after

435 #: state section, 4=in setup section, 8=in state section

436 self.__state: int = 0

437

438 def _should_parse_file(self, file: Path) -> bool:

439 """

440 Begin parsing the file identified by `path`.

441

442 :param file: the path identifying the file

443 """

444 if not super()._should_parse_file(file):

445 return False

446 if self.__state != 0:

447 raise ValueError(f"Illegal state when trying to parse {file}.")

448 return True

449

450 def _parse_file(self, file: Path) -> T | None:

451 """

452 Parse the file.

453

454 :param file: the file

455 :returns: the parsed object

456 """

457 res: Final[T | None] = super()._parse_file(file)

458 if self.__state != 3:

459 raise ValueError(

460 "Illegal state, log file must have both a "

461 f"{SECTION_FINAL_STATE!r} and a "

462 f"{SECTION_SETUP!r} section.")

463 if self.rand_seed is None:

464 raise ValueError("rand_seed is missing.")

465 if self.algorithm is None:

466 raise ValueError("algorithm is missing.")

467 if self.instance is None:

468 raise ValueError("instance is missing.")

469 if self.objective is None:

470 raise ValueError("objective is missing.")

471 if self.total_fes is None:

472 raise ValueError("total_fes is missing.")

473 if self.total_time_millis is None:

474 raise ValueError("total_time_millis is missing.")

475 if self.best_f is None:

476 raise ValueError("best_f is missing.")

477 if self.last_improvement_fe is None:

478 raise ValueError("last_improvement_fe is missing.")

479 if self.last_improvement_time_millis is None:

480 raise ValueError("last_improvement_time_millis is missing.")

481 return res

482

483 def _end_parse_file(self, file: Path) -> None:

484 """

485 Finalize the state *after* parsing.

486

487 :param file: the file to parse

488 """

489 self.total_fes = None

490 self.total_time_millis = None

491 self.best_f = None

492 self.last_improvement_fe = None

493 self.last_improvement_time_millis = None

494 self.goal_f = None

495 self.max_fes = None

496 self.max_time_millis = None

497 self.objective = None

498 self.encoding = None

499 self.__state = 0

500 return super()._end_parse_file(file)

501

502 def _needs_more_lines(self) -> bool:

503 """

504 Check whether we need to process more lines.

505

506 You can overwrite this method if your parser parses additional log

507 sections. Your overwritten method should return `True` if more

508 sections except `STATE` and `SETUP` still need to be parsed and return

509 `super().needs_more_lines()` otherwise.

510

511 :returns: `True` if more data needs to be processed, `False` otherwise

512 """

513 return self.__state != 3

514

515 def _lines(self, lines: list[str]) -> bool:

516 """

517 Process the lines loaded from a section.

518

519 If you process more sections, you should override this method. Your

520 overridden method then can parse the data if you are in the right

521 section. It should end with `return super().lines(lines)`.

522

523 :param lines: the lines that have been loaded

524 :returns: `True` if parsing should be continued, `False` otherwise

525 """

526 if (self.__state & 4) != 0:

527 self._setup_section(parse_key_values(lines))

528 elif (self.__state & 8) != 0:

529 self._state_section(lines)

530 return self._needs_more_lines()

531

532 def _start_section(self, title: str) -> bool:

533 """

534 Begin a section.

535

536 :param title: the section title

537 :returns: `True` if the text of the section should be processed,

538 `False` otherwise

539 """

540 super()._start_section(title)

541 if title == SECTION_SETUP:

542 if (self.__state & 1) != 0:

543 raise ValueError(f"Already did section {title!r}.")

544 self.__state |= 4

545 return True

546 if title == SECTION_FINAL_STATE:

547 if (self.__state & 2) != 0:

548 raise ValueError(f"Already did section {title}.")

549 self.__state |= 8

550 return True

551 return False

552

553 def _setup_section(self, data: dict[str, str]) -> None:

554 """

555 Parse the data from the `setup` section.

556

557 :param data: the parsed data

558 """

559 self.goal_f = None

560 if _FULL_KEY_GOAL_F in data:

561 goal_f = data[_FULL_KEY_GOAL_F]

562 g: Final[int | float] = str_to_num(goal_f)

563 if isfinite(g):

564 self.goal_f = g

565 elif not (isinf(g) and (g >= inf)):

566 raise ValueError(

567 f"invalid goal f {goal_f}, which renders to {g}")

568

569 if _FULL_KEY_MAX_FES in data:

570 self.max_fes = check_to_int_range(

571 data[_FULL_KEY_MAX_FES], _FULL_KEY_MAX_FES, 1,

572 1_000_000_000_000_000)

573 if _FULL_KEY_MAX_TIME_MILLIS in data:

574 self.max_time_millis = check_to_int_range(

575 data[_FULL_KEY_MAX_TIME_MILLIS], _FULL_KEY_MAX_TIME_MILLIS, 1,

576 1_000_000_000_000)

577 if _FULL_KEY_ALGORITHM in data:

578 a = data[_FULL_KEY_ALGORITHM]

579 if self.algorithm is None:

580 self.algorithm = a

581 elif a != self.algorithm:

582 # this error may occur under windows due to case-insensitive

583 # file names

584 if a.casefold() == self.algorithm.casefold():

585 self.algorithm = a # rely on name from log file

586 else: # ok, case was not the issue - raise error

587 raise ValueError(

588 f"algorithm name from file name is {self.algorithm!r}"

589 f", but key {_FULL_KEY_ALGORITHM!r} gives {a!r}.")

590 else:

591 raise ValueError(f"key {_FULL_KEY_ALGORITHM!r} missing in file!")

592

593 if _FULL_KEY_OBJECTIVE in data:

594 self.objective = data[_FULL_KEY_OBJECTIVE]

595 else:

596 raise ValueError(f"key {_FULL_KEY_OBJECTIVE!r} missing in file!")

597

598 self.encoding = data.get(_FULL_KEY_ENCODING)

599

600 seed_check = rand_seed_check(int(data[_FULL_KEY_RAND_SEED]))

601 if self.rand_seed is None:

602 self.rand_seed = seed_check

603 elif seed_check != self.rand_seed:

604 raise ValueError(

605 f"Found seed {seed_check} in log file, but file name "

606 f"indicates seed {self.rand_seed}.")

607

608 self.__state = (self.__state | 1) & (~4)

609

610 def _state_section(self, lines: list[str]) -> None:

611 """

612 Process the data of the final state section.

613

614 :param lines: the lines of that section

615 """

616 data: Final[dict[str, str]] = parse_key_values(lines)

617

618 self.total_fes = check_to_int_range(

619 data[KEY_TOTAL_FES], KEY_TOTAL_FES, 1,

620 1_000_000_000_000_000 if self.max_fes is None else self.max_fes)

621 self.total_time_millis = check_to_int_range(

622 data[KEY_TOTAL_TIME_MILLIS], KEY_TOTAL_TIME_MILLIS, 0,

623 1_000_000_000_000 if self.max_time_millis is None else

624 ((1_000_000 + self.max_time_millis) * 1_000))

625 if self.max_time_millis is not None:

626 _check_max_time_millis(self.max_time_millis, self.total_fes,

627 self.total_time_millis)

628 self.best_f = str_to_num(data[KEY_BEST_F])

629 if not isfinite(self.best_f):

630 raise ValueError(f"infinite best f detected: {self.best_f}")

631 self.last_improvement_fe = check_to_int_range(

632 data[KEY_LAST_IMPROVEMENT_FE], KEY_LAST_IMPROVEMENT_FE, 1,

633 self.total_fes)

634 self.last_improvement_time_millis = check_to_int_range(

635 data[KEY_LAST_IMPROVEMENT_TIME_MILLIS],

636 KEY_LAST_IMPROVEMENT_TIME_MILLIS, 0, self.total_time_millis)

637 self.__state = (self.__state | 2) & (~8)

Coverage for moptipy / evaluation / log_parser.py: 79%

283 statements