Coverage for moptipy/evaluation/end

1"""

2Record for EndResult as well as parsing, serialization, and parsing.

4When doing experiments with `moptipy`, you apply algorithm setups to problem

5instances. For each `setup x instance` combination, you may conduct a series

6of repetitions (so-called runs) with different random seeds. Each single run

7of an algorithm setup on a problem instances can produce a separate log file.

8From each log file, we can load a :class:`EndResult` instance, which

9represents, well, the end result of the run, i.e., information such as the

10best solution quality reached, when it was reached, and the termination

11criterion. These end result records then can be the basis for, e.g., computing

12summary statistics via :mod:`~moptipy.evaluation.end_statistics` or for

13plotting the end result distribution via

14:mod:`~moptipy.evaluation.plot_end_results`.

15"""

16import argparse

17from dataclasses import dataclass

18from math import inf, isfinite

19from typing import Any, Callable, Final, Generator, Iterable, TypeVar, cast

21from pycommons.ds.sequences import reiterable

22from pycommons.io.console import logger

23from pycommons.io.csv import (

24 CSV_SEPARATOR,

25 SCOPE_SEPARATOR,

26 csv_column,

27 csv_column_or_none,

28 csv_scope,

29 csv_str_or_none,

30 csv_val_or_none,

31 pycommons_footer_bottom_comments,

32)

33from pycommons.io.csv import CsvReader as CsvReaderBase

34from pycommons.io.csv import CsvWriter as CsvWriterBase

35from pycommons.io.path import Path, file_path, write_lines

36from pycommons.strings.string_conv import (

37 int_or_none_to_str,

38 num_or_none_to_str,

39 num_to_str,

40 str_to_num,

41)

42from pycommons.types import (

43 check_int_range,

44 check_to_int_range,

45 type_error,

46)

48from moptipy.api.logging import (

49 KEY_ALGORITHM,

50 KEY_BEST_F,

51 KEY_GOAL_F,

52 KEY_INSTANCE,

53 KEY_LAST_IMPROVEMENT_FE,

54 KEY_LAST_IMPROVEMENT_TIME_MILLIS,

55 KEY_MAX_FES,

56 KEY_MAX_TIME_MILLIS,

57 KEY_RAND_SEED,

58 KEY_TOTAL_FES,

59 KEY_TOTAL_TIME_MILLIS,

60 PROGRESS_CURRENT_F,

61 PROGRESS_FES,

62 PROGRESS_TIME_MILLIS,

63 SECTION_PROGRESS,

64)

65from moptipy.evaluation._utils import (

66 _check_max_time_millis,

67)

68from moptipy.evaluation.base import (

69 DESC_ALGORITHM,

70 DESC_ENCODING,

71 DESC_INSTANCE,

72 DESC_OBJECTIVE_FUNCTION,

73 F_NAME_NORMALIZED,

74 F_NAME_RAW,

75 F_NAME_SCALED,

76 KEY_ENCODING,

77 KEY_OBJECTIVE_FUNCTION,

78 PerRunData,

79 motipy_footer_bottom_comments,

80)

81from moptipy.evaluation.log_parser import SetupAndStateParser

82from moptipy.utils.help import moptipy_argparser

83from moptipy.utils.math import try_float_div, try_int, try_int_div

85#: a description of the random seed

86DESC_RAND_SEED: Final[str] = (

87 "the value of the seed of the random number generator used in the run. "

88 f"Random seeds are in 0..{((1 << (8 * 8)) - 1)} and the random "

89 f"number generators are those from numpy.")

90#: the description of best-F

91DESC_BEST_F: Final[str] = (

92 " the best (smallest) objective value ever encountered during the run ("

93 "regardless whether the algorithm later forgot it again or not).")

94#: the description of the last improvement FE

95DESC_LAST_IMPROVEMENT_FE: Final[str] = (

96 "the objective function evaluation (FE) when the last improving move took"

97 " place. 1 FE corresponds to the construction and evaluation "

98 "of one solution. The first FE has index 1. With 'last "

99 "improving move' we mean the last time when a solution was "

100 "discovered that was better than all previous solutions. This "

101 "time / FE index is the one when the solution with objective "

102 f"value {KEY_BEST_F} was discovered.")

103#: the description of the last improvement time milliseconds

104DESC_LAST_IMPROVEMENT_TIME_MILLIS: Final[str] = (

105 "the clock time in milliseconds after the begin of the run when "

106 "the last improving search move took place.")

107#: the description of the total FEs

108DESC_TOTAL_FES: Final[str] = (

109 "the total number of objective function evaluations (FEs) that were "

110 "performed during the run.")

111#: the total consumed time in milliseconds

112DESC_TOTAL_TIME_MILLIS: Final[str] = (

113 "the clock time in milliseconds that has passed between the begin of the "

114 "run and the end of the run.")

115#: the description of the goal objective value

116DESC_GOAL_F: Final[str] = (

117 "the goal objective value. A run will stop as soon as a solution was"

118 "discovered which has an objective value less than or equal to "

119 f"{KEY_GOAL_F}. In other words, as soon as {KEY_BEST_F} reaches or dips "

120 f"under {KEY_GOAL_F}, the algorithm will stop. If {KEY_GOAL_F} is not "

121 "reached, the run will continue until other budget limits are exhausted. "

122 "If a lower bound for the objective function is known, this is often used"

123 " as a goal objective value. If o goal objective value is specified, this"

124 " field is empty.")

125#: a description of the budget as the maximum objective function evaluation

126DESC_MAX_FES: Final[str] = (

127 "the maximum number of permissible FEs per run. As soon as this limit is "

128 f"reached, the run will stop. In other words, {KEY_TOTAL_FES} will never "

129 f"be more than {KEY_MAX_FES}. A run may stop earlier if some other "

130 "termination criterion is reached, but never later.")

131#: a description of the budget in terms of maximum runtime

132DESC_MAX_TIME_MILLIS: Final[str] = (

133 "the maximum number of milliseconds of clock time that a run is permitted"

134 " to use as computational budget before being terminated. This limit is "

135 "more of a soft limit, as we cannot physically stop a run at arbitrary "

136 "points without causing mayhem. Thus, it may be that some runs consume "

137 "slightly more runtime than this limit. But the rule is that the "

138 "algorithm gets told to stop (via should_terminate() becoming True) as "

139 f"soon as this time has elapsed. But generally, {KEY_TOTAL_TIME_MILLIS}<="

140 f"{KEY_MAX_TIME_MILLIS} approximately holds.")

141

142

143@dataclass(frozen=True, init=False, order=False, eq=False)

144class EndResult(PerRunData):

145 """

146 An immutable end result record of one run of one algorithm on one problem.

147

148 This record provides the information of the outcome of one application of

149 one algorithm to one problem instance in an immutable way.

150 """

151

152 #: The best objective value encountered.

153 best_f: int | float

154

155 #: The index of the function evaluation when best_f was reached.

156 last_improvement_fe: int

157

158 #: The time when best_f was reached.

159 last_improvement_time_millis: int

160

161 #: The total number of performed FEs.

162 total_fes: int

163

164 #: The total time consumed by the run.

165 total_time_millis: int

166

167 #: The goal objective value if provided

168 goal_f: int | float | None

169

170 #: The (optional) maximum permitted FEs.

171 max_fes: int | None

172

173 #: The (optional) maximum runtime.

174 max_time_millis: int | None

175

176 def __init__(self,

177 algorithm: str,

178 instance: str,

179 objective: str,

180 encoding: str | None,

181 rand_seed: int,

182 best_f: int | float,

183 last_improvement_fe: int,

184 last_improvement_time_millis: int,

185 total_fes: int,

186 total_time_millis: int,

187 goal_f: int | float | None,

188 max_fes: int | None,

189 max_time_millis: int | None):

190 """

191 Create a consistent instance of :class:`EndResult`.

192

193 :param algorithm: the algorithm name

194 :param instance: the instance name

195 :param objective: the name of the objective function

196 :param encoding: the name of the encoding that was used, if any, or

197 `None` if no encoding was used

198 :param rand_seed: the random seed

199 :param best_f: the best reached objective value

200 :param last_improvement_fe: the FE when best_f was reached

201 :param last_improvement_time_millis: the time when best_f was reached

202 :param total_fes: the total FEs

203 :param total_time_millis: the total runtime

204 :param goal_f: the goal objective value, if provide

205 :param max_fes: the optional maximum FEs

206 :param max_time_millis: the optional maximum runtime

207

208 :raises TypeError: if any parameter has a wrong type

209 :raises ValueError: if the parameter values are inconsistent

210 """

211 super().__init__(algorithm, instance, objective, encoding, rand_seed)

212 object.__setattr__(self, "best_f", try_int(best_f))

213 object.__setattr__(

214 self, "last_improvement_fe", check_int_range(

215 last_improvement_fe, "last_improvement_fe",

216 1, 1_000_000_000_000_000))

217 object.__setattr__(

218 self, "last_improvement_time_millis", check_int_range(

219 last_improvement_time_millis, "last_improvement_time_millis",

220 0, 100_000_000_000))

221 object.__setattr__(

222 self, "total_fes", check_int_range(

223 total_fes, "total_fes", last_improvement_fe,

224 1_000_000_000_000_000))

225 object.__setattr__(

226 self, "total_time_millis", check_int_range(

227 total_time_millis, "total_time_millis",

228 last_improvement_time_millis, 100_000_000_000))

229

230 if goal_f is not None:

231 goal_f = None if goal_f <= -inf else try_int(goal_f)

232 object.__setattr__(self, "goal_f", goal_f)

233

234 if max_fes is not None:

235 check_int_range(max_fes, "max_fes", total_fes,

236 1_000_000_000_000_000_000)

237 object.__setattr__(self, "max_fes", max_fes)

238

239 if max_time_millis is not None:

240 check_int_range(

241 max_time_millis, "max_time_millis", 1, 100_000_000_000)

242 _check_max_time_millis(max_time_millis,

243 total_fes,

244 total_time_millis)

245 object.__setattr__(self, "max_time_millis", max_time_millis)

246

247 def _tuple(self) -> tuple[Any, ...]:

248 """

249 Get the tuple representation of this object used in comparisons.

250

251 :return: the comparison-relevant data of this object in a tuple

252 """

253 return (self.__class__.__name__,

254 "" if self.algorithm is None else self.algorithm,

255 "" if self.instance is None else self.instance,

256 "" if self.objective is None else self.objective,

257 "" if self.encoding is None else self.encoding,

258 1, self.rand_seed, "", "",

259 inf if self.goal_f is None else self.goal_f,

260 inf if self.max_fes is None else self.max_fes,

261 inf if self.max_time_millis is None else self.max_time_millis,

262 self.best_f, self.last_improvement_fe,

263 self.last_improvement_time_millis, self.total_fes,

264 self.total_time_millis)

265

266 def success(self) -> bool:

267 """

268 Check if a run is successful.

269

270 This method returns `True` if and only if `goal_f` is defined and

271 `best_f <= goal_f` (and `False` otherwise).

272

273 :return: `True` if and only if `best_f<=goal_f`

274 """

275 return False if self.goal_f is None else self.best_f <= self.goal_f

276

277 def get_best_f(self) -> int | float:

278 """

279 Get the best objective value reached.

280

281 :returns: the best objective value reached

282 """

283 if not isinstance(self, EndResult):

284 raise type_error(self, "self", EndResult)

285 return self.best_f

286

287 def get_last_improvement_fe(self) -> int:

288 """

289 Get the index of the function evaluation when `best_f` was reached.

290

291 :returns: the index of the function evaluation when `best_f` was

292 reached

293 """

294 if not isinstance(self, EndResult):

295 raise type_error(self, "self", EndResult)

296 return self.last_improvement_fe

297

298 def get_last_improvement_time_millis(self) -> int:

299 """

300 Get the milliseconds when `best_f` was reached.

301

302 :returns: the milliseconds when `best_f` was reached

303 """

304 if not isinstance(self, EndResult):

305 raise type_error(self, "self", EndResult)

306 return self.last_improvement_time_millis

307

308 def get_total_fes(self) -> int:

309 """

310 Get the total number of performed FEs.

311

312 :returns: the total number of performed FEs

313 """

314 if not isinstance(self, EndResult):

315 raise type_error(self, "self", EndResult)

316 return self.total_fes

317

318 def get_total_time_millis(self) -> int:

319 """

320 Get the total time consumed by the run.

321

322 :returns: the total time consumed by the run

323 """

324 if not isinstance(self, EndResult):

325 raise type_error(self, "self", EndResult)

326 return self.total_time_millis

327

328 def get_goal_f(self) -> int | float | None:

329 """

330 Get the goal objective value, if any.

331

332 :returns: the goal objective value, if any

333 """

334 if not isinstance(self, EndResult):

335 raise type_error(self, "self", EndResult)

336 return self.goal_f

337

338 def get_max_fes(self) -> int | None:

339 """

340 Get the maximum number of FEs permissible.

341

342 :returns: the maximum number of FEs permissible

343 """

344 if not isinstance(self, EndResult):

345 raise type_error(self, "self", EndResult)

346 return self.max_fes

347

348 def get_max_time_millis(self) -> int | None:

349 """

350 Get the maximum permissible milliseconds permitted.

351

352 :returns: the maximum permissible milliseconds permitted

353 """

354 if not isinstance(self, EndResult):

355 raise type_error(self, "self", EndResult)

356 return self.max_time_millis

357

358 def get_normalized_best_f(self) -> int | float | None:

359 """

360 Get the normalized f.

361

362 :returns: the normalized f

363 """

364 g: Final[int | float | None] = EndResult.get_goal_f(self)

365 if (g is None) or (g <= 0):

366 return None

367 return try_float_div(self.best_f - g, g)

368

369 def get_scaled_best_f(self) -> int | float | None:

370 """

371 Get the normalized f.

372

373 :returns: the normalized f

374 """

375 g: Final[int | float | None] = EndResult.get_goal_f(self)

376 if (g is None) or (g <= 0):

377 return None

378 return try_float_div(self.best_f, g)

379

380 def get_fes_per_time_milli(self) -> int | float:

381 """

382 Get the fes per time milliseconds.

383

384 :returns: the fes per time milliseconds

385 """

386 return try_int_div(EndResult.get_total_fes(self), max(

387 1, EndResult.get_total_time_millis(self)))

388

389

390#: A set of getters for accessing variables of the end result

391__PROPERTIES: Final[Callable[[str], Callable[[

392 EndResult], int | float | None]]] = {

393 KEY_LAST_IMPROVEMENT_FE: EndResult.get_last_improvement_fe,

394 "last improvement FE": EndResult.get_last_improvement_fe,

395 KEY_LAST_IMPROVEMENT_TIME_MILLIS:

396 EndResult.get_last_improvement_time_millis,

397 "last improvement ms": EndResult.get_last_improvement_time_millis,

398 KEY_TOTAL_FES: EndResult.get_total_fes,

399 "fes": EndResult.get_total_fes,

400 KEY_TOTAL_TIME_MILLIS: EndResult.get_total_time_millis,

401 "ms": EndResult.get_total_time_millis,

402 KEY_GOAL_F: EndResult.get_goal_f,

403 F_NAME_RAW: EndResult.get_best_f,

404 KEY_BEST_F: EndResult.get_best_f,

405 "f": EndResult.get_best_f,

406 F_NAME_SCALED: EndResult.get_scaled_best_f,

407 "bestFscaled": EndResult.get_scaled_best_f,

408 F_NAME_NORMALIZED: EndResult.get_normalized_best_f,

409 "bestFnormalized": EndResult.get_normalized_best_f,

410 KEY_MAX_FES: EndResult.get_max_fes,

411 "budgetFEs": EndResult.get_max_fes,

412 KEY_MAX_TIME_MILLIS: EndResult.get_max_time_millis,

413 "budgetMS": EndResult.get_max_time_millis,

414 "fesPerTimeMilli": EndResult.get_fes_per_time_milli,

415}.get

416

417

418def getter(dimension: str) -> Callable[[EndResult], int | float | None]:

419 """

420 Produce a function that obtains the given dimension from EndResults.

421

422 The following dimensions are supported:

423

424 1. `lastImprovementFE`: :attr:`~EndResult.last_improvement_fe`

425 2. `lastImprovementTimeMillis`:

426 :attr:`~EndResult.last_improvement_time_millis`

427 3. `totalFEs`: :attr:`~EndResult.total_fes`

428 4. `totalTimeMillis`: :attr:`~EndResult.total_time_millis`

429 5. `goalF`: :attr:`~EndResult.goal_f`

430 6. `plainF`, `bestF`: :attr:`~EndResult.best_f`

431 7. `scaledF`: :attr:`~EndResult.best_f`/:attr:`~EndResult.goal_f`

432 8. `normalizedF`: (:attr:`~EndResult.best_f`-attr:`~EndResult.goal_f`)/

433 :attr:`~EndResult.goal_f`

434 9. `maxFEs`: :attr:`~EndResult.max_fes`

435 10. `maxTimeMillis`: :attr:`~EndResult.max_time_millis`

436 11. `fesPerTimeMilli`: :attr:`~EndResult.total_fes`

437 /:attr:`~EndResult.total_time_millis`

438

439 :param dimension: the dimension

440 :returns: a callable that returns the value corresponding to the

441 dimension from its input value, which must be an :class:`EndResult`

442 """

443 result: Callable[[EndResult], int | float] | None = __PROPERTIES(

444 str.strip(dimension))

445 if result is None:

446 raise ValueError(f"Unknown EndResult dimension {dimension!r}.")

447 return result

448

449

450def to_csv(results: Iterable[EndResult], file: str) -> Path:

451 """

452 Write a sequence of end results to a file in CSV format.

453

454 :param results: the end results

455 :param file: the path

456 :return: the path of the file that was written

457 """

458 path: Final[Path] = Path(file)

459 logger(f"Writing end results to CSV file {path!r}.")

460 path.ensure_parent_dir_exists()

461 with path.open_for_write() as wt:

462 write_lines(CsvWriter.write(results), wt)

463 logger(f"Done writing end results to CSV file {path!r}.")

464 return path

465

466

467def from_csv(file: str,

468 filterer: Callable[[EndResult], bool]

469 = lambda _: True) -> Generator[EndResult, None, None]:

470 """

471 Parse a given CSV file to get :class:`EndResult` Records.

472

473 :param file: the path to parse

474 :param filterer: an optional filter function

475 """

476 path: Final[Path] = file_path(file)

477 logger(f"Now reading CSV file {path!r}.")

478 with path.open_for_read() as rd:

479 for r in CsvReader.read(rd):

480 if filterer(r):

481 yield r

482 logger(f"Done reading CSV file {path!r}.")

483

484

485class CsvWriter(CsvWriterBase):

486 """A class for CSV writing of :class:`EndResult`."""

487

488 def __init__(self, data: Iterable[EndResult],

489 scope: str | None = None) -> None:

490 """

491 Initialize the csv writer.

492

493 :param data: the data

494 :param scope: the prefix to be pre-pended to all columns

495 """

496 data = reiterable(data)

497 super().__init__(data, scope)

498 no_encoding: bool = True

499 no_max_fes: bool = True

500 no_max_ms: bool = True

501 no_goal_f: bool = True

502 check: int = 4

503 for er in data:

504 if no_encoding and (er.encoding is not None):

505 no_encoding = False

506 check -= 1

507 if check <= 0:

508 break

509 if no_max_fes and (er.max_fes is not None):

510 no_max_fes = False

511 check -= 1

512 if check <= 0:

513 break

514 if no_max_ms and (er.max_time_millis is not None):

515 no_max_ms = False

516 check -= 1

517 if check <= 0:

518 break

519 if no_goal_f and (er.goal_f is not None) and (

520 isfinite(er.goal_f)):

521 no_goal_f = False

522 check -= 1

523 if check <= 0:

524 break

525 #: do we need the encoding?

526 self.__needs_encoding: Final[bool] = not no_encoding

527 #: do we need the max FEs?

528 self.__needs_max_fes: Final[bool] = not no_max_fes

529 #: do we need the max millis?

530 self.__needs_max_ms: Final[bool] = not no_max_ms

531 #: do we need the goal F?

532 self.__needs_goal_f: Final[bool] = not no_goal_f

533

534 def get_column_titles(self) -> Iterable[str]:

535 """

536 Get the column titles.

537

538 :returns: the column titles

539 """

540 p: Final[str] = self.scope

541 data: list[str] = [

542 KEY_ALGORITHM, KEY_INSTANCE, KEY_OBJECTIVE_FUNCTION]

543 if self.__needs_encoding:

544 data.append(KEY_ENCODING)

545 data.extend((KEY_RAND_SEED, KEY_BEST_F, KEY_LAST_IMPROVEMENT_FE,

546 KEY_LAST_IMPROVEMENT_TIME_MILLIS, KEY_TOTAL_FES,

547 KEY_TOTAL_TIME_MILLIS))

548

549 if self.__needs_goal_f:

550 data.append(KEY_GOAL_F)

551 if self.__needs_max_fes:

552 data.append(KEY_MAX_FES)

553 if self.__needs_max_ms:

554 data.append(KEY_MAX_TIME_MILLIS)

555 return (csv_scope(p, q) for q in data)

556

557 def get_row(self, data: EndResult) -> Iterable[str]:

558 """

559 Render a single end result record to a CSV row.

560

561 :param data: the end result record

562 :returns: the row iterator

563 """

564 yield data.algorithm

565 yield data.instance

566 yield data.objective

567 if self.__needs_encoding:

568 yield data.encoding or ""

569 yield hex(data.rand_seed)

570 yield num_to_str(data.best_f)

571 yield str(data.last_improvement_fe)

572 yield str(data.last_improvement_time_millis)

573 yield str(data.total_fes)

574 yield str(data.total_time_millis)

575 if self.__needs_goal_f:

576 yield num_or_none_to_str(data.goal_f)

577 if self.__needs_max_fes:

578 yield int_or_none_to_str(data.max_fes)

579 if self.__needs_max_ms:

580 yield int_or_none_to_str(data.max_time_millis)

581

582 def get_header_comments(self) -> Iterable[str]:

583 """

584 Get any possible header comments.

585

586 :returns: the header comments

587 """

588 return ("Experiment End Results",

589 "See the description at the bottom of the file.")

590

591 def get_footer_comments(self) -> Iterable[str]:

592 """

593 Get any possible footer comments.

594

595 :returns: the footer comments

596 """

597 yield ""

598 scope: Final[str | None] = self.scope

599 yield ("Records describing the end results of single runs ("

600 "single executions) of algorithms applied to optimization "

601 "problems.")

602 yield ("Each run is characterized by an algorithm setup, a problem "

603 "instance, and a random seed.")

604 if scope:

605 yield ("All end result records start with prefix "

606 f"{scope}{SCOPE_SEPARATOR}.")

607 yield f"{csv_scope(scope, KEY_ALGORITHM)}: {DESC_ALGORITHM}"

608 yield f"{csv_scope(scope, KEY_INSTANCE)}: {DESC_INSTANCE}"

609 yield (f"{csv_scope(scope, KEY_OBJECTIVE_FUNCTION)}:"

610 f" {DESC_OBJECTIVE_FUNCTION}")

611 if self.__needs_encoding:

612 yield f"{csv_scope(scope, KEY_ENCODING)}: {DESC_ENCODING}"

613 yield f"{csv_scope(scope, KEY_RAND_SEED)}: {DESC_RAND_SEED}"

614 yield f"{csv_scope(scope, KEY_BEST_F)}: {DESC_BEST_F}"

615 yield (f"{csv_scope(scope, KEY_LAST_IMPROVEMENT_FE)}: "

616 f"{DESC_LAST_IMPROVEMENT_FE}")

617 yield (f"{csv_scope(scope, KEY_LAST_IMPROVEMENT_TIME_MILLIS)}: "

618 f"{DESC_LAST_IMPROVEMENT_TIME_MILLIS}")

619 yield f"{csv_scope(scope, KEY_TOTAL_FES)}: {DESC_TOTAL_FES}"

620 yield (f"{csv_scope(scope, KEY_TOTAL_TIME_MILLIS)}: "

621 f"{DESC_TOTAL_TIME_MILLIS}")

622 if self.__needs_goal_f:

623 yield f"{csv_scope(scope, KEY_GOAL_F)}: {DESC_GOAL_F}"

624 if self.__needs_max_fes:

625 yield f"{csv_scope(scope, KEY_MAX_FES)}: {DESC_MAX_FES}"

626 if self.__needs_max_ms:

627 yield (f"{csv_scope(scope, KEY_MAX_TIME_MILLIS)}: "

628 f"{DESC_MAX_TIME_MILLIS}")

629

630 def get_footer_bottom_comments(self) -> Iterable[str]:

631 """

632 Get the footer bottom comments.

633

634 :returns: the footer comments

635 """

636 yield from motipy_footer_bottom_comments(

637 self, ("The end results data is produced using module "

638 "moptipy.evaluation.end_results."))

639 yield from pycommons_footer_bottom_comments(self)

640

641

642class CsvReader(CsvReaderBase):

643 """A csv parser for end results."""

644

645 def __init__(self, columns: dict[str, int]) -> None:

646 """

647 Create a CSV parser for :class:`EndResult`.

648

649 :param columns: the columns

650 """

651 super().__init__(columns)

652 #: the index of the algorithm column, if any

653 self.__idx_algorithm: Final[int] = csv_column(columns, KEY_ALGORITHM)

654 #: the index of the instance column, if any

655 self.__idx_instance: Final[int] = csv_column(columns, KEY_INSTANCE)

656 #: the index of the objective function column, if any

657 self.__idx_objective: Final[int] = csv_column(

658 columns, KEY_OBJECTIVE_FUNCTION)

659 #: the index of the encoding column, if any

660 self.__idx_encoding = csv_column_or_none(columns, KEY_ENCODING)

661

662 #: the index of the random seed column

663 self.__idx_seed: Final[int] = csv_column(columns, KEY_RAND_SEED)

664 #: the column with the last improvement FE

665 self.__idx_li_fe: Final[int] = csv_column(

666 columns, KEY_LAST_IMPROVEMENT_FE)

667 #: the column with the last improvement time milliseconds

668 self.__idx_li_ms: Final[int] = csv_column(

669 columns, KEY_LAST_IMPROVEMENT_TIME_MILLIS)

670 #: the column with the best obtained objective value

671 self.__idx_best_f: Final[int] = csv_column(columns, KEY_BEST_F)

672 #: the column with the total time in FEs

673 self.__idx_tt_fe: Final[int] = csv_column(columns, KEY_TOTAL_FES)

674 #: the column with the total time in milliseconds

675 self.__idx_tt_ms: Final[int] = csv_column(

676 columns, KEY_TOTAL_TIME_MILLIS)

677

678 #: the column with the goal objective value, if any

679 self.__idx_goal_f: Final[int | None] = csv_column_or_none(

680 columns, KEY_GOAL_F)

681 #: the column with the maximum FEs, if any such budget constraint was

682 #: defined

683 self.__idx_max_fes: Final[int | None] = csv_column_or_none(

684 columns, KEY_MAX_FES)

685 #: the column with the maximum runtime in milliseconds, if any such

686 #: budget constraint was defined

687 self.__idx_max_ms: Final[int | None] = csv_column_or_none(

688 columns, KEY_MAX_TIME_MILLIS)

689

690 def parse_row(self, data: list[str]) -> EndResult:

691 """

692 Parse a row of data.

693

694 :param data: the data row

695 :return: the end result statistics

696 """

697 return EndResult(

698 data[self.__idx_algorithm], # algorithm

699 data[self.__idx_instance], # instance

700 data[self.__idx_objective], # objective

701 csv_str_or_none(data, self.__idx_encoding), # encoding

702 int(data[self.__idx_seed], base=0), # rand seed

703 str_to_num(data[self.__idx_best_f]), # best_f

704 int(data[self.__idx_li_fe]), # last_improvement_fe

705 int(data[self.__idx_li_ms]), # last_improvement_time_millis

706 int(data[self.__idx_tt_fe]), # total_fes

707 int(data[self.__idx_tt_ms]), # total_time_millis

708 csv_val_or_none(data, self.__idx_goal_f, str_to_num),

709 csv_val_or_none(data, self.__idx_max_fes, int), # max_fes

710 csv_val_or_none(data, self.__idx_max_ms, int)) # max_time_ms

711

712

713#: the type variable for data to be read from the directories

714T = TypeVar("T", bound=EndResult)

715

716

717class EndResultLogParser[T](SetupAndStateParser[T]):

718 """The internal log parser class."""

719

720 def _parse_file(self, file: Path) -> T:

721 """

722 Get the parsing result.

723

724 :returns: the :class:`EndResult` instance

725 """

726 super()._parse_file(file)

727 return cast("T", EndResult(self.algorithm,

728 self.instance,

729 self.objective,

730 self.encoding,

731 self.rand_seed,

732 self.best_f,

733 self.last_improvement_fe,

734 self.last_improvement_time_millis,

735 self.total_fes,

736 self.total_time_millis,

737 self.goal_f,

738 self.max_fes,

739 self.max_time_millis))

740

741

742def _join_goals(vlimit, vgoal, select): # noqa

743 if vlimit is None:

744 return vgoal

745 if vgoal is None:

746 return vlimit

747 return select(vlimit, vgoal)

748

749

750class __EndResultProgressLogParser(SetupAndStateParser[EndResult]):

751 """The internal log parser class for virtual end results."""

752

753 def __init__(

754 self,

755 max_fes: int | Callable[[str, str], int | None] | None,

756 max_time_millis: int | Callable[[str, str], int | None] | None,

757 goal_f: int | float | Callable[

758 [str, str], int | float | None] | None,

759 path_filter: Callable[[Path], bool] | None = None):

760 """

761 Create the internal log parser.

762

763 :param max_fes: the maximum FEs, or `None` if unspecified

764 :param max_time_millis: the maximum runtime in milliseconds, or

765 `None` if unspecified

766 :param goal_f: the goal objective value, or `None` if unspecified

767 :param path_filter: the path filter

768 """

769 super().__init__(path_filter)

770 self.__src_limit_ms: Final[

771 int | Callable[[str, str], int | None] | None] = max_time_millis

772 self.__src_limit_fes: Final[

773 int | Callable[[str, str], int | None] | None] = max_fes

774 self.__src_limit_f: Final[

775 int | float | Callable[

776 [str, str], int | float | None] | None] = goal_f

777

778 self.__limit_ms: int | float = inf

779 self.__limit_ms_n: int | None = None

780 self.__limit_fes: int | float = inf

781 self.__limit_fes_n: int | None = None

782 self.__limit_f: int | float = -inf

783 self.__limit_f_n: int | float | None = None

784

785 self.__stop_fes: int | None = None

786 self.__stop_ms: int | None = None

787 self.__stop_f: int | float | None = None

788 self.__stop_li_fe: int | None = None

789 self.__stop_li_ms: int | None = None

790 self.__hit_goal: bool = False

791 self.__state: int = 0

792

793 def _parse_file(self, file: Path) -> EndResult:

794 super()._parse_file(file)

795 if self.__state != 2:

796 raise ValueError(

797 "Illegal state, log file must have a "

798 f"{SECTION_PROGRESS!r} section.")

799 self.__state = 0

800 l_hit_goal = self.__hit_goal

801 stop_fes: int = self.__stop_fes

802 stop_ms: int = self.__stop_ms

803 if not l_hit_goal:

804 stop_ms = max(stop_ms, cast("int", min(

805 self.total_time_millis, self.__limit_ms)))

806 ul_fes = self.total_fes

807 if stop_ms < self.total_time_millis:

808 ul_fes -= 1

809 stop_fes = max(stop_fes, cast("int", min(

810 ul_fes, self.__limit_fes)))

811

812 return EndResult(

813 algorithm=self.algorithm,

814 instance=self.instance,

815 objective=self.objective,

816 encoding=self.encoding,

817 rand_seed=self.rand_seed,

818 best_f=self.__stop_f,

819 last_improvement_fe=self.__stop_li_fe,

820 last_improvement_time_millis=self.__stop_li_ms,

821 total_fes=stop_fes,

822 total_time_millis=stop_ms,

823 goal_f=_join_goals(self.__limit_f_n, self.goal_f, max),

824 max_fes=_join_goals(self.__limit_fes_n, self.max_fes, min),

825 max_time_millis=_join_goals(

826 self.__limit_ms_n, self.max_time_millis, min))

827

828 def _end_parse_file(self, file: Path) -> None:

829 """

830 Cleanup.

831

832 :param file: the file that was parsed.

833 """

834 self.__stop_fes = None

835 self.__stop_ms = None

836 self.__stop_f = None

837 self.__stop_li_fe = None

838 self.__stop_li_ms = None

839 self.__limit_fes_n = None

840 self.__limit_fes = inf

841 self.__limit_ms_n = None

842 self.__limit_ms = inf

843 self.__limit_f_n = None

844 self.__limit_f = -inf

845 self.__hit_goal = False

846 super()._end_parse_file(file)

847

848 def _start_parse_file(self, file: Path) -> None:

849 super()._start_parse_file(file)

850 a: Final[str | None] = self.algorithm

851 i: Final[str | None] = self.instance

852

853 fes = self.__src_limit_fes(a, i) if (a and i and callable(

854 self.__src_limit_fes)) else (

855 self.__src_limit_fes if isinstance(self.__src_limit_fes, int)

856 else None)

857 self.__limit_fes_n = None if fes is None else \

858 check_int_range(fes, "limit_fes", 1, 1_000_000_000_000_000)

859 self.__limit_fes = inf if self.__limit_fes_n is None \

860 else self.__limit_fes_n

861

862 time = self.__src_limit_ms(a, i) if (a and i and callable(

863 self.__src_limit_ms)) else (

864 self.__src_limit_ms if isinstance(self.__src_limit_ms, int)

865 else None)

866 self.__limit_ms_n = None if time is None else \

867 check_int_range(time, "l_limit_ms", 1, 1_000_000_000_000)

868 self.__limit_ms = inf if self.__limit_ms_n is None \

869 else self.__limit_ms_n

870

871 self.__limit_f_n = self.__src_limit_f(a, i) if (a and i and callable(

872 self.__src_limit_f)) else (

873 self.__src_limit_f if isinstance(self.__src_limit_f, int | float)

874 else None)

875 if self.__limit_f_n is not None:

876 if not isinstance(self.__limit_f_n, int | float):

877 raise type_error(self.__limit_f_n, "limit_f", (

878 int, float))

879 if not isfinite(self.__limit_f_n):

880 if self.__limit_f_n <= -inf:

881 self.__limit_f_n = None

882 else:

883 raise ValueError(

884 f"invalid limit f={self.__limit_f_n} for "

885 f"{self.algorithm} on {self.instance}")

886 self.__limit_f = -inf if self.__limit_f_n is None \

887 else self.__limit_f_n

888

889 def _start_section(self, title: str) -> bool:

890 if title == SECTION_PROGRESS:

891 if self.__state != 0:

892 raise ValueError(f"Already did section {title}.")

893 self.__state = 1

894 return True

895 return super()._start_section(title)

896

897 def _needs_more_lines(self) -> bool:

898 return (self.__state < 2) or super()._needs_more_lines()

899

900 def _lines(self, lines: list[str]) -> bool:

901 if self.__state != 1:

902 return super()._lines(lines)

903 self.__state = 2

904

905 n_rows = len(lines)

906 if n_rows < 2:

907 raise ValueError("lines must contain at least two elements,"

908 f"but contains {n_rows}.")

909

910 columns = [c.strip() for c in lines[0].split(CSV_SEPARATOR)]

911 fe_col: Final[int] = columns.index(PROGRESS_FES)

912 ms_col: Final[int] = columns.index(PROGRESS_TIME_MILLIS)

913 f_col: Final[int] = columns.index(PROGRESS_CURRENT_F)

914 current_fes: int = -1

915 current_ms: int = -1

916 current_f: int | float = inf

917 current_li_fe: int | None = None

918 current_li_ms: int | None = None

919 stop_fes: int | None = None

920 stop_ms: int | None = None

921 stop_f: int | float | None = None

922 stop_li_fe: int | None = None

923 stop_li_ms: int | None = None

924 limit_fes: Final[int | float] = self.__limit_fes

925 l_limit_ms: Final[int | float] = self.__limit_ms

926 limit_f: Final[int | float] = self.__limit_f

927

928 for line in lines[1:]:

929 values = line.split(CSV_SEPARATOR)

930 current_fes = check_to_int_range(

931 values[fe_col], "fes", current_fes, 1_000_000_000_000_000)

932 current_ms = check_to_int_range(

933 values[ms_col], "ms", current_ms, 1_000_000_000_00)

934 f: int | float = str_to_num(values[f_col])

935 if (current_fes <= limit_fes) and (current_ms <= l_limit_ms):

936 if f < current_f: # can only update best within budget

937 current_f = f

938 current_li_fe = current_fes

939 current_li_ms = current_ms

940 stop_ms = current_ms

941 stop_fes = current_fes

942 stop_f = current_f

943 stop_li_fe = current_li_fe

944 stop_li_ms = current_li_ms

945 if (current_fes >= limit_fes) or (current_ms >= l_limit_ms) or \

946 (current_f <= limit_f):

947 self.__hit_goal = True

948 break # we can stop parsing the stuff

949

950 if (stop_fes is None) or (stop_ms is None) or (stop_f is None) \

951 or (current_fes <= 0) or (not isfinite(current_f)):

952 raise ValueError(

953 "Illegal state, no fitting data point found: stop_fes="

954 f"{stop_fes}, stop_ms={stop_ms}, stop_f={stop_f}, "

955 f"current_fes={current_fes}, current_ms={current_ms}, "

956 f"current_f={current_f}.")

957

958 if current_fes >= limit_fes:

959 stop_fes = max(stop_fes, min(

960 cast("int", limit_fes), current_fes))

961 elif current_ms > l_limit_ms:

962 stop_fes = max(stop_fes, current_fes - 1)

963 else:

964 stop_fes = max(stop_fes, current_fes)

965

966 if current_ms >= l_limit_ms:

967 stop_ms = max(stop_ms, min(cast("int", l_limit_ms), current_ms))

968 else:

969 stop_ms = max(stop_ms, current_ms)

970

971 self.__stop_fes = stop_fes

972 self.__stop_ms = stop_ms

973 self.__stop_f = stop_f

974 self.__stop_li_fe = stop_li_fe

975 self.__stop_li_ms = stop_li_ms

976 return self._needs_more_lines()

977

978

979def from_logs(

980 path: str, max_fes: int | Callable[

981 [str, str], int | None] | None = None,

982 max_time_millis: int | Callable[

983 [str, str], int | None] | None = None,

984 goal_f: int | float | Callable[

985 [str, str], int | float | None] | None = None,

986 path_filter: Callable[[Path], bool] | None = None) \

987 -> Generator[EndResult, None, None]:

988 """

989 Parse a given path and yield all end results found.

990

991 If `path` identifies a file with suffix `.txt`, then this file is

992 parsed. The appropriate :class:`EndResult` is created and yielded.

993 If `path` identifies a directory, then this directory is parsed

994 recursively for each log file found, one record is yielded.

995

996 Via the parameters `max_fes`, `max_time_millis`, and `goal_f`, you can

997 set virtual limits for the objective function evaluations, the maximum

998 runtime, and the objective value. The :class:`EndResult` records will

999 then not represent the actual final state of the runs but be

1000 synthesized from the logged progress information. This, of course,

1001 requires such information to be present. It will also raise a

1002 `ValueError` if the goals are invalid, e.g., if a runtime limit is

1003 specified that is before the first logged points.

1004

1005 There is one caveat when specifying `max_time_millis`: Let's say that

1006 the log files only log improvements. Then you might have a log point

1007 for 7000 FEs, 1000ms, and f=100. The next log point could be 8000 FEs,

1008 1200ms, and f=90. Now if your time limit specified is 1100ms, we know

1009 that the end result is f=100 (because f=90 was reached too late) and

1010 that the total runtime is 1100ms, as this is the limit you specified

1011 and it was also reached. But we do not know the number of consumed

1012 FEs. We know you consumed at least 7000 FEs, but you did not consume

1013 8000 FEs. It would be wrong to claim that 7000 FEs were consumed,

1014 since it could have been more. We therefore set a virtual end point at

1015 7999 FEs. In terms of performance metrics such as the

1016 :mod:`~moptipy.evaluation.ert`, this would be the most conservative

1017 choice in that it does not over-estimate the speed of the algorithm.

1018 It can, however, lead to very big deviations from the actual values.

1019 For example, if your algorithm quickly converged to a local optimum

1020 and there simply is no log point that exceeds the virtual time limit

1021 but the original run had a huge FE-based budget while your virtual

1022 time limit was small, this could lead to an estimate of millions of

1023 FEs taking part within seconds...

1024

1025 :param path: the path to parse

1026 :param max_fes: the maximum FEs, a callable to compute the maximum

1027 FEs from the algorithm and instance name, or `None` if unspecified

1028 :param max_time_millis: the maximum runtime in milliseconds, a

1029 callable to compute the maximum runtime from the algorithm and

1030 instance name, or `None` if unspecified

1031 :param goal_f: the goal objective value, a callable to compute the

1032 goal objective value from the algorithm and instance name, or

1033 `None` if unspecified

1034 :param path_filter: a filter allowing us to skip paths or files. If

1035 this :class:`Callable` returns `True`, the file or directory is

1036 considered for parsing. If it returns `False`, it is skipped.

1037 """

1038 need_goals: bool = False

1039 if max_fes is not None:

1040 if not callable(max_fes):

1041 max_fes = check_int_range(

1042 max_fes, "max_fes", 1, 1_000_000_000_000_000)

1043 need_goals = True

1044 if max_time_millis is not None:

1045 if not callable(max_time_millis):

1046 max_time_millis = check_int_range(

1047 max_time_millis, "max_time_millis", 1, 1_000_000_000_000)

1048 need_goals = True

1049 if goal_f is not None:

1050 if callable(goal_f):

1051 need_goals = True

1052 else:

1053 if not isinstance(goal_f, int | float):

1054 raise type_error(goal_f, "goal_f", (int, float, None))

1055 if isfinite(goal_f):

1056 need_goals = True

1057 elif goal_f <= -inf:

1058 goal_f = None

1059 else:

1060 raise ValueError(f"goal_f={goal_f} is not permissible.")

1061 if need_goals:

1062 return __EndResultProgressLogParser(

1063 max_fes, max_time_millis, goal_f, path_filter).parse(path)

1064 return EndResultLogParser(path_filter).parse(path)

1065

1066

1067# Run log files to end results if executed as script

1068if __name__ == "__main__":

1069 parser: Final[argparse.ArgumentParser] = moptipy_argparser(

1070 __file__,

1071 "Convert log files obtained with moptipy to the end results CSV "

1072 "format that can be post-processed or exported to other tools.",

1073 "This program recursively parses a folder hierarchy created by"

1074 " the moptipy experiment execution facility. This folder "

1075 "structure follows the scheme of algorithm/instance/log_file "

1076 "and has one log file per run. As result of the parsing, one "

1077 "CSV file (where columns are separated by ';') is created with"

1078 " one row per log file. This row contains the end-of-run state"

1079 " loaded from the log file. Whereas the log files may store "

1080 "the complete progress of one run of one algorithm on one "

1081 "problem instance as well as the algorithm configuration "

1082 "parameters, instance features, system settings, and the final"

1083 " results, the end results CSV file will only represent the "

1084 "final result quality, when it was obtained, how long the runs"

1085 " took, etc. This information is much denser and smaller and "

1086 "suitable for importing into other tools such as Excel or for "

1087 "postprocessing.")

1088 parser.add_argument(

1089 "source", nargs="?", default="./results",

1090 help="the location of the experimental results, i.e., the root folder "

1091 "under which to search for log files", type=Path)

1092 parser.add_argument(

1093 "dest", help="the path to the end results CSV file to be created",

1094 type=Path, nargs="?", default="./evaluation/end_results.txt")

1095 parser.add_argument(

1096 "--maxFEs", help="the maximum permitted FEs",

1097 type=int, nargs="?", default=None)

1098 parser.add_argument(

1099 "--maxTime", help="the maximum permitted time in milliseconds",

1100 type=int, nargs="?", default=None)

1101 parser.add_argument(

1102 "--goalF", help="the goal objective value",

1103 type=str_to_num, nargs="?", default=None)

1104 args: Final[argparse.Namespace] = parser.parse_args()

1105

1106 to_csv(from_logs(args.source, args.maxFEs, args.maxTime, args.goalF),

1107 args.dest)

Coverage for moptipy / evaluation / end_results.py: 85%

413 statements