Coverage for moptipy / evaluation / end_statistics.py: 79%
670 statements
« prev ^ index » next coverage.py v7.13.0, created at 2025-12-11 03:05 +0000
« prev ^ index » next coverage.py v7.13.0, created at 2025-12-11 03:05 +0000
1"""
2SampleStatistics aggregated over multiple instances of `EndResult`.
4The :mod:`~moptipy.evaluation.end_results` records hold the final result of
5a run of an optimization algorithm on a problem instance. Often, we do not
6want to compare these single results directly, but instead analyze summary
7statistics, such as the mean best objective value found. For this purpose,
8:class:`EndStatistics` exists. It summarizes the singular results from the
9runs into a record with the most important statistics.
10"""
11import argparse
12import os.path
13from dataclasses import dataclass
14from itertools import starmap
15from math import ceil, inf, isfinite
16from typing import Callable, Final, Generator, Iterable, Iterator, cast
18from pycommons.ds.sequences import reiterable
19from pycommons.io.console import logger
20from pycommons.io.csv import (
21 SCOPE_SEPARATOR,
22 csv_column,
23 csv_column_or_none,
24 csv_scope,
25 csv_select_scope,
26 csv_select_scope_or_none,
27 csv_str_or_none,
28 csv_val_or_none,
29)
30from pycommons.io.csv import CsvReader as CsvReaderBase
31from pycommons.io.csv import CsvWriter as CsvWriterBase
32from pycommons.io.path import Path, file_path, write_lines
33from pycommons.math.sample_statistics import (
34 KEY_MEAN_ARITH,
35 KEY_STDDEV,
36 SampleStatistics,
37)
38from pycommons.math.sample_statistics import CsvReader as StatReader
39from pycommons.math.sample_statistics import CsvWriter as StatWriter
40from pycommons.strings.string_conv import (
41 num_or_none_to_str,
42 str_to_num,
43)
44from pycommons.types import (
45 check_int_range,
46 type_error,
47 type_name_of,
48)
50from moptipy.api.logging import (
51 KEY_ALGORITHM,
52 KEY_BEST_F,
53 KEY_GOAL_F,
54 KEY_INSTANCE,
55 KEY_LAST_IMPROVEMENT_FE,
56 KEY_LAST_IMPROVEMENT_TIME_MILLIS,
57 KEY_MAX_FES,
58 KEY_MAX_TIME_MILLIS,
59 KEY_TOTAL_FES,
60 KEY_TOTAL_TIME_MILLIS,
61)
62from moptipy.evaluation._utils import (
63 _check_max_time_millis,
64)
65from moptipy.evaluation.base import (
66 DESC_ALGORITHM,
67 DESC_ENCODING,
68 DESC_INSTANCE,
69 DESC_OBJECTIVE_FUNCTION,
70 F_NAME_RAW,
71 F_NAME_SCALED,
72 KEY_ENCODING,
73 KEY_N,
74 KEY_OBJECTIVE_FUNCTION,
75 MultiRunData,
76 motipy_footer_bottom_comments,
77)
78from moptipy.evaluation.end_results import (
79 DESC_BEST_F,
80 DESC_GOAL_F,
81 DESC_LAST_IMPROVEMENT_FE,
82 DESC_LAST_IMPROVEMENT_TIME_MILLIS,
83 DESC_MAX_FES,
84 DESC_MAX_TIME_MILLIS,
85 DESC_TOTAL_FES,
86 DESC_TOTAL_TIME_MILLIS,
87 EndResult,
88)
89from moptipy.evaluation.end_results import from_csv as end_results_from_csv
90from moptipy.evaluation.end_results import from_logs as end_results_from_logs
91from moptipy.utils.help import moptipy_argparser
92from moptipy.utils.math import try_int, try_int_div
94#: The key for the best F.
95KEY_BEST_F_SCALED: Final[str] = KEY_BEST_F + "scaled"
96#: The key for the number of successful runs.
97KEY_N_SUCCESS: Final[str] = "successN"
98#: The key for the success FEs.
99KEY_SUCCESS_FES: Final[str] = "successFEs"
100#: The key for the success time millis.
101KEY_SUCCESS_TIME_MILLIS: Final[str] = "successTimeMillis"
102#: The key for the ERT in FEs.
103KEY_ERT_FES: Final[str] = "ertFEs"
104#: The key for the ERT in milliseconds.
105KEY_ERT_TIME_MILLIS: Final[str] = "ertTimeMillis"
108@dataclass(frozen=True, init=False, order=False, eq=False)
109class EndStatistics(MultiRunData):
110 """
111 Statistics over end results of one or multiple algorithm*instance setups.
113 If one algorithm*instance is used, then `algorithm` and `instance` are
114 defined. Otherwise, only the parameter which is the same over all recorded
115 runs is defined.
116 """
118 #: The statistics about the best encountered result.
119 best_f: SampleStatistics
120 #: The statistics about the last improvement FE.
121 last_improvement_fe: SampleStatistics
122 #: The statistics about the last improvement time.
123 last_improvement_time_millis: SampleStatistics
124 #: The statistics about the total number of FEs.
125 total_fes: SampleStatistics
126 #: The statistics about the total time.
127 total_time_millis: SampleStatistics
128 #: The goal objective value.
129 goal_f: SampleStatistics | int | float | None
130 #: best_f / goal_f if goal_f is consistently defined and always positive.
131 best_f_scaled: SampleStatistics | None
132 #: The number of successful runs, if goal_f != None, else None.
133 n_success: int | None
134 #: The FEs to success, if n_success > 0, None otherwise.
135 success_fes: SampleStatistics | None
136 #: The time to success, if n_success > 0, None otherwise.
137 success_time_millis: SampleStatistics | None
138 #: The ERT if FEs, while is inf if n_success=0, None if goal_f is None,
139 #: and finite otherwise.
140 ert_fes: int | float | None
141 #: The ERT if milliseconds, while is inf if n_success=0, None if goal_f
142 #: is None, and finite otherwise.
143 ert_time_millis: int | float | None
144 #: The budget in FEs, if every run had one; None otherwise.
145 max_fes: SampleStatistics | int | None
146 #: The budget in milliseconds, if every run had one; None otherwise.
147 max_time_millis: SampleStatistics | int | None
149 def __init__(self,
150 algorithm: str | None,
151 instance: str | None,
152 objective: str | None,
153 encoding: str | None,
154 n: int,
155 best_f: SampleStatistics,
156 last_improvement_fe: SampleStatistics,
157 last_improvement_time_millis: SampleStatistics,
158 total_fes: SampleStatistics,
159 total_time_millis: SampleStatistics,
160 goal_f: float | int | SampleStatistics | None,
161 best_f_scaled: SampleStatistics | None,
162 n_success: int | None,
163 success_fes: SampleStatistics | None,
164 success_time_millis: SampleStatistics | None,
165 ert_fes: int | float | None,
166 ert_time_millis: int | float | None,
167 max_fes: SampleStatistics | int | None,
168 max_time_millis: SampleStatistics | int | None):
169 """
170 Create the end statistics of an experiment-setup combination.
172 :param algorithm: the algorithm name, if all runs are with the same
173 algorithm
174 :param instance: the instance name, if all runs are on the same
175 instance
176 :param objective: the objective name, if all runs are on the same
177 objective function, `None` otherwise
178 :param encoding: the encoding name, if all runs are on the same
179 encoding and an encoding was actually used, `None` otherwise
180 :param n: the total number of runs
181 :param best_f: statistics about the best achieved result
182 :param last_improvement_fe: statistics about the last improvement FE
183 :param last_improvement_time_millis: statistics about the last
184 improvement time
185 :param total_fes: statistics about the total FEs
186 :param total_time_millis: statistics about the total runtime in
187 milliseconds
188 :param goal_f: if the goal objective value is not defined sometimes,
189 this will be `None`. If it is always defined and always the same,
190 then this will be that value. If different goal values exist, then
191 this is the `SampleStatistics` record about them
192 :param best_f_scaled: if `goal_f` is not `None` and greater than zero,
193 then here we provide statistics about `best_f` divided by the
194 corresponding `goal_f`
195 :param n_success: the number of successful runs is only defined if
196 `goal_f` is not `None` and counts the number of runs that reach or
197 surpass their corresponding `goal_f`
198 :param success_fes: if `goal_f` is not `None`,
199 then this holds statistics about the last improvement FE of only
200 the successful runs
201 :param success_time_millis: if `goal_f` is not `None`, then this holds
202 statistics about the last improvement times of only the successful
203 runs
204 :param ert_fes: if `goal_f` is always defined, then this is the
205 empirically estimated running time to solve the problem in FEs if
206 `n_success>0` and `inf` otherwise
207 :param ert_time_millis: if `goal_f` is always defined, then this is
208 the empirically estimated running time to solve the problem in
209 milliseconds if `n_success>0` and `inf` otherwise
210 :param max_fes: the budget in FEs, if any
211 :param max_time_millis: the budget in terms of milliseconds
212 """
213 super().__init__(algorithm, instance, objective, encoding, n)
215 if not isinstance(best_f, SampleStatistics):
216 raise type_error(best_f, "best_f", SampleStatistics)
217 object.__setattr__(self, "best_f", best_f)
218 if best_f.n != n:
219 raise ValueError(f"best_f.n={best_f.n} != n={n}")
221 if not isinstance(last_improvement_fe, SampleStatistics):
222 raise type_error(last_improvement_fe, "last_improvement_fe",
223 SampleStatistics)
224 if last_improvement_fe.n != n:
225 raise ValueError(
226 f"last_improvement_fe.n={last_improvement_fe.n} != n={n}")
227 check_int_range(
228 last_improvement_fe.minimum, "last_improvement_fe.minimum",
229 1, 1_000_000_000_000_000)
230 check_int_range(
231 last_improvement_fe.maximum, "last_improvement_fe.maximum",
232 last_improvement_fe.minimum, 1_000_000_000_000_000)
233 object.__setattr__(self, "last_improvement_fe", last_improvement_fe)
235 if not isinstance(last_improvement_time_millis, SampleStatistics):
236 raise type_error(last_improvement_time_millis,
237 "last_improvement_time_millis", SampleStatistics)
238 if last_improvement_time_millis.n != n:
239 raise ValueError("last_improvement_time_millis.n="
240 f"{last_improvement_time_millis.n} != n={n}")
241 check_int_range(
242 last_improvement_time_millis.minimum,
243 "last_improvement_time_millis.minimum",
244 0, 100_000_000_000)
245 check_int_range(
246 last_improvement_time_millis.maximum,
247 "last_improvement_time_millis.maximum",
248 last_improvement_time_millis.minimum, 100_000_000_000)
249 object.__setattr__(self, "last_improvement_time_millis",
250 last_improvement_time_millis)
252 if not isinstance(total_fes, SampleStatistics):
253 raise type_error(total_fes, "total_fes", SampleStatistics)
254 if total_fes.n != n:
255 raise ValueError(
256 f"total_fes.n={total_fes.n} != n={n}")
257 check_int_range(
258 total_fes.minimum, "total_fes.minimum",
259 last_improvement_fe.minimum, 1_000_000_000_000_000)
260 check_int_range(
261 total_fes.maximum, "total_fes.maximum",
262 max(total_fes.minimum, last_improvement_fe.maximum),
263 1_000_000_000_000_000)
264 object.__setattr__(self, "total_fes", total_fes)
266 if not isinstance(total_time_millis, SampleStatistics):
267 raise type_error(total_time_millis, "total_time_millis",
268 SampleStatistics)
269 if total_time_millis.n != n:
270 raise ValueError(
271 f"total_time_millis.n={total_time_millis.n} != n={n}")
272 check_int_range(
273 total_time_millis.minimum, "total_time_millis.minimum",
274 last_improvement_time_millis.minimum, 100_000_000_000)
275 check_int_range(
276 total_time_millis.maximum, "total_time_millis.maximum",
277 max(total_time_millis.minimum,
278 last_improvement_time_millis.maximum),
279 100_000_000_000)
280 object.__setattr__(self, "total_time_millis", total_time_millis)
282 if goal_f is None:
283 if best_f_scaled is not None:
284 raise ValueError(
285 "If goal_f is None, best_f_scaled must also be None, "
286 f"but is {type(best_f_scaled)}.")
287 if n_success is not None:
288 raise ValueError(
289 "If goal_f is None, n_success must also be None, "
290 f"but is {type(n_success)}.")
291 if success_fes is not None:
292 raise ValueError(
293 "If success_fes is None, best_f_scaled must also be None, "
294 f"but is {type(success_fes)}.")
295 if success_time_millis is not None:
296 raise ValueError(
297 "If success_time_millis is None, best_f_scaled "
298 "must also be None, "
299 f"but is {type(success_time_millis)}.")
300 if ert_fes is not None:
301 raise ValueError(
302 "If goal_f is None, ert_fes must also be None, "
303 f"but is {type(ert_fes)}.")
304 if ert_time_millis is not None:
305 raise ValueError(
306 "If goal_f is None, ert_time_millis must also be None, "
307 f"but is {type(ert_time_millis)}.")
308 else: # goal_f is not None
309 if isinstance(goal_f, SampleStatistics):
310 if goal_f.n != n:
311 raise ValueError(f"goal_f.n={goal_f.n} != n={n}")
312 goal_f = goal_f.compact(False)
313 if isinstance(goal_f, float):
314 goal_f = None if goal_f <= (-inf) else try_int(goal_f)
315 elif not isinstance(goal_f, int | SampleStatistics):
316 raise type_error(goal_f, "goal_f", (
317 int, float, SampleStatistics))
319 if best_f_scaled is not None:
320 goal_f_min: Final[int | float] = \
321 goal_f.minimum if isinstance(goal_f, SampleStatistics) \
322 else goal_f
323 if goal_f_min <= 0:
324 raise ValueError(
325 f"best_f_scaled must be None if minimum goal_f "
326 f"({goal_f_min}) of goal_f {goal_f} is not positive,"
327 f" but is {best_f_scaled}.")
328 if not isinstance(best_f_scaled, SampleStatistics):
329 raise type_error(best_f_scaled, "best_f_scaled",
330 SampleStatistics)
331 if best_f_scaled.n != n:
332 raise ValueError(
333 f"best_f_scaled.n={best_f_scaled.n} != n={n}")
334 if best_f_scaled.minimum < 0:
335 raise ValueError(
336 "best_f_scaled cannot be negative, but encountered "
337 f"{best_f_scaled.minimum}.")
339 check_int_range(n_success, "n_success")
340 if not isinstance(ert_fes, int | float):
341 raise type_error(ert_fes, "ert_fes", (int, float))
342 if not isinstance(ert_time_millis, int | float):
343 raise type_error(ert_time_millis, "ert_time_millis",
344 (int, float))
346 if n_success > 0:
347 if not isinstance(success_fes, SampleStatistics):
348 raise type_error(success_fes,
349 "if n_success>0, then success_fes",
350 SampleStatistics)
351 if success_fes.n != n_success:
352 raise ValueError(f"success_fes.n={success_fes.n} != "
353 f"n_success={n_success}")
354 check_int_range(
355 success_fes.minimum, "success_fes.minimum",
356 last_improvement_fe.minimum, 1_000_000_000_000_000)
357 check_int_range(
358 success_fes.maximum, "success_fes.maximum",
359 success_fes.minimum, last_improvement_fe.maximum)
360 if not isinstance(success_time_millis, SampleStatistics):
361 raise type_error(
362 success_time_millis,
363 "if n_success>0, then success_time_millis",
364 SampleStatistics)
365 if success_time_millis.n != n_success:
366 raise ValueError(
367 f"success_time_millis.n={success_time_millis.n} != "
368 f"n_success={n_success}")
369 check_int_range(
370 success_time_millis.minimum,
371 "success_time_millis.minimum",
372 last_improvement_time_millis.minimum, 100_000_000_000)
373 check_int_range(
374 success_time_millis.maximum,
375 "success_time_millis.maximum",
376 success_time_millis.minimum,
377 last_improvement_time_millis.maximum)
378 ert_fes = try_int(ert_fes)
379 if ert_fes < success_fes.minimum:
380 raise ValueError(
381 "ert_fes must be >= "
382 f"{success_fes.minimum}, but is {ert_fes}.")
383 ert_fe_max = ceil(total_fes.mean_arith * n)
384 if ert_fes > ert_fe_max:
385 raise ValueError(
386 "ert_fes must be <= "
387 f"{ert_fe_max}, but is {ert_fes}.")
389 ert_time_millis = try_int(ert_time_millis)
390 if ert_time_millis < success_time_millis.minimum:
391 raise ValueError(
392 "ert_time_millis must be >= "
393 f"{success_time_millis.minimum}, but "
394 f"is {ert_time_millis}.")
395 ert_time_max = ceil(total_time_millis.mean_arith * n)
396 if ert_time_millis > ert_time_max:
397 raise ValueError(
398 "ert_time_millis must be <= "
399 f"{ert_time_max}, but is {ert_time_millis}.")
400 else:
401 if success_fes is not None:
402 raise ValueError(
403 "If n_success<=0, then success_fes must be None, "
404 f"but it's a {type_name_of(success_fes)}.")
405 if success_time_millis is not None:
406 raise ValueError(
407 "If n_success<=0, then success_time_millis must be "
408 f"None, but it is a "
409 f"{type_name_of(success_time_millis)}.")
410 if ert_fes < inf:
411 raise ValueError(
412 "If n_success<=0, then ert_fes must "
413 f"be inf, but it's {ert_fes}.")
414 if ert_time_millis < inf:
415 raise ValueError(
416 "If n_success<=0, then ert_time_millis must "
417 f"be inf, but it's {ert_time_millis}.")
419 object.__setattr__(self, "goal_f", goal_f)
420 object.__setattr__(self, "best_f_scaled", best_f_scaled)
421 object.__setattr__(self, "n_success", n_success)
422 object.__setattr__(self, "success_fes", success_fes)
423 object.__setattr__(self, "success_time_millis", success_time_millis)
424 object.__setattr__(self, "ert_fes", ert_fes)
425 object.__setattr__(self, "ert_time_millis", ert_time_millis)
427 if isinstance(max_fes, SampleStatistics):
428 if max_fes.n != n:
429 raise ValueError(f"max_fes.n={max_fes.n} != n={n}")
430 max_fes_f: int | float | SampleStatistics = max_fes.compact(
431 needs_n=False)
432 if isinstance(max_fes_f, float):
433 raise type_error(max_fes_f, "max_fes", (
434 int, SampleStatistics, None))
435 max_fes = max_fes_f
436 if isinstance(max_fes, int):
437 if (max_fes < total_fes.maximum) or (max_fes < 0):
438 raise ValueError(f"0<max_fes must be >= "
439 f"{total_fes.maximum}, but is {max_fes}.")
440 elif isinstance(max_fes, SampleStatistics):
441 if (max_fes.minimum < total_fes.minimum) or (
442 max_fes.minimum <= 0):
443 raise ValueError(
444 f"0<max_fes.minimum must be >= {total_fes.minimum},"
445 f" but is {max_fes.minimum}.")
446 if max_fes.maximum < total_fes.maximum:
447 raise ValueError(
448 f"max_fes.maximum must be >= {total_fes.maximum},"
449 f" but is {max_fes.maximum}.")
450 elif max_fes is not None:
451 raise type_error(max_fes, "max_fes", (int, SampleStatistics, None))
452 object.__setattr__(self, "max_fes", max_fes)
454 if isinstance(max_time_millis, SampleStatistics):
455 if max_time_millis.n != n:
456 raise ValueError(
457 f"max_time_millis.n={max_time_millis.n} != n={n}")
458 max_time_millis_f: int | float | SampleStatistics = (
459 max_time_millis.compact(False))
460 if isinstance(max_time_millis_f, float):
461 raise type_error(max_time_millis_f, "max_time_millis", (
462 int, SampleStatistics, None))
463 if isinstance(max_time_millis, int):
464 _check_max_time_millis(max_time_millis,
465 total_fes.minimum,
466 total_time_millis.maximum)
467 elif isinstance(max_time_millis, SampleStatistics):
468 _check_max_time_millis(max_time_millis.minimum,
469 total_fes.minimum,
470 total_time_millis.minimum)
471 _check_max_time_millis(max_time_millis.maximum,
472 total_fes.minimum,
473 total_time_millis.maximum)
474 elif max_time_millis is not None:
475 raise type_error(max_time_millis, "max_time_millis",
476 (int, SampleStatistics, None))
477 object.__setattr__(self, "max_time_millis", max_time_millis)
479 def get_n(self) -> int:
480 """
481 Get the number of runs.
483 :returns: the number of runs.
484 """
485 if not isinstance(self, EndStatistics):
486 raise type_error(self, "self", EndStatistics)
487 return self.n
489 def get_best_f(self) -> SampleStatistics:
490 """
491 Get the statistics about the best objective value reached.
493 :returns: the statistics about the best objective value reached
494 """
495 if not isinstance(self, EndStatistics):
496 raise type_error(self, "self", EndStatistics)
497 return self.best_f
499 def get_last_improvement_fe(self) -> SampleStatistics:
500 """
501 Get the statistics about the last improvement FE.
503 :returns: the statistics about the last improvement FE
504 """
505 if not isinstance(self, EndStatistics):
506 raise type_error(self, "self", EndStatistics)
507 return self.last_improvement_fe
509 def get_last_improvement_time_millis(self) -> SampleStatistics:
510 """
511 Get the statistics about the last improvement time millis.
513 :returns: the statistics about the last improvement time millis
514 """
515 if not isinstance(self, EndStatistics):
516 raise type_error(self, "self", EndStatistics)
517 return self.last_improvement_time_millis
519 def get_total_fes(self) -> SampleStatistics:
520 """
521 Get the statistics about the total FEs.
523 :returns: the statistics about the total FEs
524 """
525 if not isinstance(self, EndStatistics):
526 raise type_error(self, "self", EndStatistics)
527 return self.total_fes
529 def get_total_time_millis(self) -> SampleStatistics:
530 """
531 Get the statistics about the total time millis.
533 :returns: the statistics about the total time millis
534 """
535 if not isinstance(self, EndStatistics):
536 raise type_error(self, "self", EndStatistics)
537 return self.total_time_millis
539 def get_goal_f(self) -> SampleStatistics | int | float | None:
540 """
541 Get the statistics about the goal objective value.
543 :returns: the statistics about the goal objective value
544 """
545 if not isinstance(self, EndStatistics):
546 raise type_error(self, "self", EndStatistics)
547 return self.goal_f
549 def get_best_f_scaled(self) -> SampleStatistics | None:
550 """
551 Get the statistics about the scaled best objective value.
553 :returns: the statistics about the scaled best objective value
554 """
555 if not isinstance(self, EndStatistics):
556 raise type_error(self, "self", EndStatistics)
557 return self.best_f_scaled
559 def get_n_success(self) -> int | None:
560 """
561 Get the number of successful runs.
563 :returns: the number of successful runs.
564 """
565 if not isinstance(self, EndStatistics):
566 raise type_error(self, "self", EndStatistics)
567 return self.n_success
569 def get_success_fes(self) -> SampleStatistics | None:
570 """
571 Get the statistics about the FEs until success of the successful runs.
573 :returns: the statistics about the FEs until success of the successful
574 runs
575 """
576 if not isinstance(self, EndStatistics):
577 raise type_error(self, "self", EndStatistics)
578 return self.success_fes
580 def get_success_time_millis(self) -> SampleStatistics | None:
581 """
582 Get the statistics about the ms until success of the successful runs.
584 :returns: the statistics about the ms until success of the successful
585 runs
586 """
587 if not isinstance(self, EndStatistics):
588 raise type_error(self, "self", EndStatistics)
589 return self.success_time_millis
591 def get_ert_fes(self) -> int | float | None:
592 """
593 Get the expected FEs until success.
595 :returns: the statistics about the expected FEs until success.
596 """
597 if not isinstance(self, EndStatistics):
598 raise type_error(self, "self", EndStatistics)
599 return self.ert_fes
601 def get_ert_time_millis(self) -> int | float | None:
602 """
603 Get the expected milliseconds until success.
605 :returns: the statistics about the expected milliseconds until
606 success.
607 """
608 if not isinstance(self, EndStatistics):
609 raise type_error(self, "self", EndStatistics)
610 return self.ert_time_millis
612 def get_max_fes(self) -> SampleStatistics | int | None:
613 """
614 Get the statistics about the maximum permitted FEs.
616 :returns: the statistics about the maximum permitted FEs
617 """
618 if not isinstance(self, EndStatistics):
619 raise type_error(self, "self", EndStatistics)
620 return self.max_fes
622 def get_max_time_millis(self) -> SampleStatistics | int | None:
623 """
624 Get the statistics about the maximum permitted runtime in ms.
626 :returns: the statistics about the maximum permitted runtime in ms
627 """
628 if not isinstance(self, EndStatistics):
629 raise type_error(self, "self", EndStatistics)
630 return self.max_time_millis
633def create(source: Iterable[EndResult]) -> EndStatistics:
634 """
635 Create an `EndStatistics` Record from an Iterable of `EndResult`.
637 :param source: the source
638 :return: the statistics
639 :rtype: EndStatistics
640 """
641 if not isinstance(source, Iterable):
642 raise type_error(source, "source", Iterable)
644 n: int = 0
645 best_f: list[int | float] = []
646 last_improvement_fe: list[int] = []
647 last_improvement_time_millis: list[int] = []
648 total_fes: list[int] = []
649 total_time_millis: list[int] = []
650 max_fes: list[int] | None = []
651 max_fes_same: bool = True
652 max_time_millis: list[int] | None = []
653 max_time_same: bool = True
654 goal_f: list[int | float] | None = []
655 goal_f_same: bool = True
656 best_f_scaled: list[float] | None = []
657 n_success: int | None = 0
658 success_fes: list[int] | None = []
659 success_times: list[int] | None = []
661 fes: int = 0
662 time: int = 0
663 algorithm: str | None = None
664 instance: str | None = None
665 objective: str | None = None
666 encoding: str | None = None
668 for er in source:
669 if not isinstance(er, EndResult):
670 raise type_error(er, "end result", EndResult)
671 if n == 0:
672 algorithm = er.algorithm
673 instance = er.instance
674 objective = er.objective
675 encoding = er.encoding
676 else:
677 if algorithm != er.algorithm:
678 algorithm = None
679 if instance != er.instance:
680 instance = None
681 if objective != er.objective:
682 objective = None
683 if encoding != er.encoding:
684 encoding = None
685 n += 1
686 best_f.append(er.best_f)
687 last_improvement_fe.append(er.last_improvement_fe)
688 last_improvement_time_millis.append(
689 er.last_improvement_time_millis)
690 total_fes.append(er.total_fes)
691 total_time_millis.append(er.total_time_millis)
692 if er.max_fes is None:
693 max_fes = None
694 elif max_fes is not None:
695 if n > 1:
696 max_fes_same = max_fes_same \
697 and (max_fes[-1] == er.max_fes)
698 max_fes.append(er.max_fes)
699 if er.max_time_millis is None:
700 max_time_millis = None
701 elif max_time_millis is not None:
702 if n > 1:
703 max_time_same = \
704 max_time_same \
705 and (max_time_millis[-1] == er.max_time_millis)
706 max_time_millis.append(er.max_time_millis)
708 if er.goal_f is None:
709 goal_f = None
710 best_f_scaled = None
711 n_success = None
712 success_fes = None
713 success_times = None
714 elif goal_f is not None:
715 if n > 1:
716 goal_f_same = goal_f_same and (goal_f[-1] == er.goal_f)
717 goal_f.append(er.goal_f)
719 if er.goal_f <= 0:
720 best_f_scaled = None
721 elif best_f_scaled is not None:
722 best_f_scaled.append(er.best_f / er.goal_f)
724 if er.best_f <= er.goal_f:
725 n_success += 1
726 success_fes.append(er.last_improvement_fe)
727 success_times.append(er.last_improvement_time_millis)
728 fes += er.last_improvement_fe
729 time += er.last_improvement_time_millis
730 else:
731 fes += er.total_fes
732 time += er.total_time_millis
733 if n <= 0:
734 raise ValueError("There must be at least one end result record.")
736 return EndStatistics(
737 algorithm,
738 instance,
739 objective,
740 encoding,
741 n,
742 SampleStatistics.from_samples(best_f),
743 SampleStatistics.from_samples(last_improvement_fe),
744 SampleStatistics.from_samples(last_improvement_time_millis),
745 SampleStatistics.from_samples(total_fes),
746 SampleStatistics.from_samples(total_time_millis),
747 None if (goal_f is None)
748 else (goal_f[0] if goal_f_same else SampleStatistics.from_samples(
749 goal_f)),
750 None if (best_f_scaled is None)
751 else SampleStatistics.from_samples(best_f_scaled),
752 n_success,
753 None if (n_success is None) or (n_success <= 0)
754 else SampleStatistics.from_samples(success_fes),
755 None if (n_success is None) or (n_success <= 0)
756 else SampleStatistics.from_samples(success_times),
757 None if (n_success is None)
758 else (inf if (n_success <= 0) else try_int_div(fes, n_success)),
759 None if (n_success is None) else
760 (inf if (n_success <= 0) else try_int_div(time, n_success)),
761 None if max_fes is None else
762 (max_fes[0] if max_fes_same else SampleStatistics.from_samples(
763 max_fes)),
764 None if max_time_millis is None
765 else (max_time_millis[0] if max_time_same
766 else SampleStatistics.from_samples(max_time_millis)))
769def from_end_results(source: Iterable[EndResult],
770 join_all_algorithms: bool = False,
771 join_all_instances: bool = False,
772 join_all_objectives: bool = False,
773 join_all_encodings: bool = False) \
774 -> Generator[EndStatistics, None, None]:
775 """
776 Aggregate statistics over a stream of end results.
778 :param source: the stream of end results
779 :param join_all_algorithms: should the statistics be aggregated
780 over all algorithms
781 :param join_all_instances: should the statistics be aggregated
782 over all algorithms
783 :param join_all_objectives: should the statistics be aggregated over
784 all objectives?
785 :param join_all_encodings: should statistics be aggregated over all
786 encodings
787 :returns: iterates over the generated end statistics records
788 """
789 if not isinstance(source, Iterable):
790 raise type_error(source, "source", Iterable)
791 if not isinstance(join_all_algorithms, bool):
792 raise type_error(join_all_algorithms,
793 "join_all_algorithms", bool)
794 if not isinstance(join_all_instances, bool):
795 raise type_error(join_all_instances, "join_all_instances", bool)
796 if not isinstance(join_all_objectives, bool):
797 raise type_error(join_all_objectives, "join_all_objectives", bool)
798 if not isinstance(join_all_encodings, bool):
799 raise type_error(join_all_encodings, "join_all_encodings", bool)
801 if (join_all_algorithms and join_all_instances
802 and join_all_objectives and join_all_encodings):
803 yield create(source)
804 return
806 sorter: dict[tuple[str, str, str, str], list[EndResult]] = {}
807 for er in source:
808 if not isinstance(er, EndResult):
809 raise type_error(source, "end results from source",
810 EndResult)
811 key = ("" if join_all_algorithms else er.algorithm,
812 "" if join_all_instances else er.instance,
813 "" if join_all_objectives else er.objective,
814 "" if join_all_encodings else (
815 "" if er.encoding is None else er.encoding))
816 if key in sorter:
817 lst = sorter[key]
818 else:
819 lst = []
820 sorter[key] = lst
821 lst.append(er)
823 if len(sorter) <= 0:
824 raise ValueError("source must not be empty")
826 if len(sorter) > 1:
827 for key in sorted(sorter.keys()):
828 yield create(sorter[key])
829 else:
830 yield create(next(iter(sorter.values()))) #: pylint: disable=R1708
833def to_csv(data: EndStatistics | Iterable[EndStatistics],
834 file: str) -> Path:
835 """
836 Store a set of :class:`EndStatistics` in a CSV file.
838 :param data: the data to store
839 :param file: the file to generate
840 :return: the path to the generated CSV file
841 """
842 path: Final[Path] = Path(file)
843 logger(f"Writing end result statistics to CSV file {path!r}.")
844 path.ensure_parent_dir_exists()
845 with path.open_for_write() as wt:
846 write_lines(CsvWriter.write(
847 (data, ) if isinstance(data, EndStatistics) else data), wt)
849 logger(f"Done writing end result statistics to CSV file {path!r}.")
850 return path
853def from_csv(file: str) -> Generator[EndStatistics, None, None]:
854 """
855 Parse a CSV file and collect all encountered :class:`EndStatistics`.
857 :param file: the file to parse
858 :returns: the iterator with the results
859 """
860 path: Final[Path] = file_path(file)
861 logger(f"Begin reading end result statistics from CSV file {path!r}.")
862 with path.open_for_read() as rd:
863 yield from CsvReader.read(rows=rd)
864 logger("Finished reading end result statistics from CSV "
865 f"file {path!r}.")
868#: the internal getters that can work directly
869__PROPERTIES: Final[Callable[[str], Callable[[
870 EndStatistics], SampleStatistics | int | float | None] | None]] = {
871 KEY_N: EndStatistics.get_n,
872 KEY_N_SUCCESS: EndStatistics.get_n_success,
873 KEY_ERT_FES: EndStatistics.get_ert_fes,
874 KEY_ERT_TIME_MILLIS: EndStatistics.get_ert_time_millis,
875 KEY_GOAL_F: EndStatistics.get_goal_f,
876 KEY_MAX_TIME_MILLIS: EndStatistics.get_max_time_millis,
877 KEY_MAX_FES: EndStatistics.get_max_fes,
878 KEY_BEST_F: EndStatistics.get_best_f,
879 F_NAME_RAW: EndStatistics.get_best_f,
880 KEY_LAST_IMPROVEMENT_FE: EndStatistics.get_last_improvement_fe,
881 "last improvement FE": EndStatistics.get_last_improvement_fe,
882 KEY_LAST_IMPROVEMENT_TIME_MILLIS:
883 EndStatistics.get_last_improvement_time_millis,
884 "last improvement ms": EndStatistics.get_last_improvement_time_millis,
885 KEY_BEST_F_SCALED: EndStatistics.get_best_f_scaled,
886 KEY_SUCCESS_FES: EndStatistics.get_success_fes,
887 KEY_SUCCESS_TIME_MILLIS: EndStatistics.get_success_time_millis,
888 F_NAME_SCALED: EndStatistics.get_best_f_scaled,
889 KEY_TOTAL_FES: EndStatistics.get_total_fes,
890 "fes": EndStatistics.get_total_fes,
891 KEY_TOTAL_TIME_MILLIS: EndStatistics.get_total_time_millis,
892 "ms": EndStatistics.get_total_time_millis,
893 "f": EndStatistics.get_best_f,
894 "budgetFEs": EndStatistics.get_max_fes,
895 "budgetMS": EndStatistics.get_max_time_millis,
896}.get
898#: the success keys
899__SUCCESS_KEYS: Final[Callable[[str], bool]] = {
900 KEY_SUCCESS_FES, KEY_SUCCESS_TIME_MILLIS,
901}.__contains__
903#: the internal static getters
904__STATIC: Final[dict[str, Callable[[EndStatistics], int | float | None]]] = {
905 KEY_N: EndStatistics.get_n,
906 KEY_N_SUCCESS: EndStatistics.get_n_success,
907 KEY_ERT_FES: EndStatistics.get_ert_fes,
908 KEY_ERT_TIME_MILLIS: EndStatistics.get_ert_time_millis,
909}
912def getter(dimension: str) -> Callable[[EndStatistics], int | float | None]:
913 """
914 Create a function that obtains the given dimension from EndStatistics.
916 :param dimension: the dimension
917 :returns: a callable that returns the value corresponding to the
918 dimension
919 """
920 dimension = str.strip(dimension)
921 direct: Callable[[EndStatistics], int | float | None] = \
922 __STATIC.get(dimension)
923 if direct is not None:
924 return direct
926 names: Final[list[str]] = str.split(str.strip(dimension), SCOPE_SEPARATOR)
927 n_names: Final[int] = list.__len__(names)
928 if not (0 < n_names < 3):
929 raise ValueError(
930 f"Invalid name combination {dimension!r} -> {names!r}.")
931 getter_1: Final[Callable[[
932 EndStatistics], int | float | SampleStatistics | None] | None] = \
933 __PROPERTIES(names[0])
934 if getter_1 is None:
935 raise ValueError(f"Invalid dimension {names[0]!r} in {dimension!r}.")
936 getter_2: Final[Callable[[
937 SampleStatistics], int | float | None]] = \
938 SampleStatistics.getter(
939 names[1] if n_names > 1 else KEY_MEAN_ARITH)
941 if getter_2 is SampleStatistics.getter(KEY_STDDEV): # it is sd
942 n_prop: Final[Callable[[EndStatistics], int | None]] = \
943 EndStatistics.get_n_success if __SUCCESS_KEYS(
944 names[0]) else EndStatistics.get_n
946 def __combo_sd(
947 data: EndStatistics, __g1=getter_1, __g2=getter_2,
948 __n=n_prop) -> int | float | None:
949 val: int | float | SampleStatistics | None = __g1(data)
950 if val is None:
951 return None
952 if isinstance(val, int | float):
953 n = __n(data)
954 return None if (n is None) or (n <= 0) else 0
955 return __g2(val)
956 direct = cast("Callable[[EndStatistics], int | float | None]",
957 __combo_sd)
958 else: # any other form of mean or statistic
960 def __combo_no_sd(data: EndStatistics,
961 __g1=getter_1, __g2=getter_2) -> int | float | None:
962 val: int | float | SampleStatistics | None = __g1(data)
963 if (val is None) or (isinstance(val, int | float)):
964 return val
965 return __g2(val)
966 direct = cast("Callable[[EndStatistics], int | float | None]",
967 __combo_no_sd)
969 __STATIC[dimension] = direct
970 return direct
973def _to_csv_writer(
974 data: Iterable[EndStatistics],
975 get_func: Callable[
976 [EndStatistics], SampleStatistics | int | float | None],
977 n_func: Callable[[EndStatistics], int],
978 scope: str | None = None,
979 what_short: str | None = None,
980 what_long: str | None = None) -> StatWriter | None:
981 """
982 Get a CSV Writer for the given data subset.
984 :param data: the data iterator
985 :param get_func: the getter for the value
986 :param n_func: the n-getter
987 :param scope: the scope to use
988 :param what_short: the short description
989 :param what_long: the long description
990 :returns: the writer, if there was any associated data
991 """
992 refined: list[tuple[SampleStatistics | int | float | None, int]] = [
993 v for v in ((get_func(es), n_func(es)) for es in data)
994 if v[0] is not None]
995 if list.__len__(refined) <= 0:
996 return None
997 return StatWriter(
998 data=starmap(SampleStatistics.from_single_value, refined),
999 scope=scope, n_not_needed=True, what_short=what_short,
1000 what_long=what_long)
1003class CsvWriter(CsvWriterBase[EndStatistics]):
1004 """A class for CSV writing of :class:`EndStatistics`."""
1006 def __init__(self, data: Iterable[EndStatistics],
1007 scope: str | None = None) -> None:
1008 """
1009 Initialize the csv writer.
1011 :param scope: the prefix to be pre-pended to all columns
1012 :param data: the data to write
1013 """
1014 data = reiterable(data)
1015 super().__init__(data, scope)
1016 checker: int = 127
1017 has_algorithm: bool = False
1018 has_instance: bool = False
1019 has_objective: bool = False
1020 has_encoding: bool = False
1021 has_n_success: bool = False
1022 has_ert_fes: bool = False
1023 has_ert_time_millis: bool = False
1024 for es in data:
1025 if es.algorithm is not None:
1026 has_algorithm = True
1027 checker &= ~1
1028 if es.instance is not None:
1029 has_instance = True
1030 checker &= ~2
1031 if es.objective is not None:
1032 has_objective = True
1033 checker &= ~4
1034 if es.encoding is not None:
1035 has_encoding = True
1036 checker &= ~8
1037 if es.n_success is not None:
1038 has_n_success = True
1039 checker &= ~16
1040 if es.ert_fes is not None:
1041 has_ert_fes = True
1042 checker &= ~32
1043 if es.ert_time_millis is not None:
1044 has_ert_time_millis = True
1045 checker &= ~64
1046 if checker == 0:
1047 break
1049 #: do we put the algorithm column?
1050 self.__has_algorithm: Final[bool] = has_algorithm
1051 #: do we put the instance column?
1052 self.__has_instance: Final[bool] = has_instance
1053 #: do we put the objective column?
1054 self.__has_objective: Final[bool] = has_objective
1055 #: do we put the encoding column?
1056 self.__has_encoding: Final[bool] = has_encoding
1057 #: do we put the n_success column?
1058 self.__has_n_success: Final[bool] = has_n_success
1059 #: do we put the ert-fes column?
1060 self.__has_ert_fes: Final[bool] = has_ert_fes
1061 #: do we put the ert time millis column?
1062 self.__has_ert_time_millis: Final[bool] = has_ert_time_millis
1064 self.__goal_f: Final[StatWriter | None] = _to_csv_writer(
1065 data, EndStatistics.get_goal_f, EndStatistics.get_n,
1066 csv_scope(scope, KEY_GOAL_F), KEY_GOAL_F,
1067 "the goal objective value after which the runs can stop")
1068 self.__best_f_scaled: Final[StatWriter | None] = _to_csv_writer(
1069 data, EndStatistics.get_best_f_scaled, EndStatistics.get_n,
1070 csv_scope(scope, KEY_BEST_F_SCALED), KEY_BEST_F_SCALED,
1071 f"best objective value reached ({KEY_BEST_F}), divided by"
1072 f" the goal objective value ({KEY_GOAL_F})")
1073 self.__success_fes: Final[StatWriter | None] = _to_csv_writer(
1074 data, EndStatistics.get_success_fes, EndStatistics.get_n_success,
1075 csv_scope(scope, KEY_SUCCESS_FES), KEY_SUCCESS_FES,
1076 f"the FEs needed to reach {KEY_GOAL_F} for the successful runs")
1077 self.__success_time_millis: Final[StatWriter | None] = _to_csv_writer(
1078 data, EndStatistics.get_success_time_millis,
1079 EndStatistics.get_n_success, csv_scope(
1080 scope, KEY_SUCCESS_TIME_MILLIS), KEY_SUCCESS_TIME_MILLIS,
1081 f"the milliseconds needed to reach {KEY_GOAL_F} for the "
1082 "successful runs")
1083 self.__max_fes: Final[StatWriter | None] = _to_csv_writer(
1084 data, EndStatistics.get_max_fes, EndStatistics.get_n,
1085 csv_scope(scope, KEY_MAX_FES), KEY_MAX_FES,
1086 "the maximum number of FEs in the computational budget")
1087 self.__max_time_millis: Final[StatWriter | None] = _to_csv_writer(
1088 data, EndStatistics.get_max_time_millis, EndStatistics.get_n,
1089 csv_scope(scope, KEY_MAX_TIME_MILLIS), KEY_MAX_TIME_MILLIS,
1090 "the maximum milliseconds per run in the computational budget")
1092 #: the best objective value reached
1093 self.__best_f: Final[StatWriter] = StatWriter(
1094 data=map(EndStatistics.get_best_f, data),
1095 scope=csv_scope(scope, KEY_BEST_F),
1096 n_not_needed=True, what_short=KEY_BEST_F,
1097 what_long="the best objective value reached per run")
1098 #: the FE when the last improvement happened
1099 self.__life: Final[StatWriter] = StatWriter(
1100 data=map(EndStatistics.get_last_improvement_fe, data),
1101 scope=csv_scope(scope, KEY_LAST_IMPROVEMENT_FE),
1102 n_not_needed=True, what_short=KEY_LAST_IMPROVEMENT_FE,
1103 what_long="the FE when the last improvement happened in a run")
1104 #: the milliseconds when the last improvement happened
1105 self.__lims: Final[StatWriter] = StatWriter(
1106 data=map(EndStatistics.get_last_improvement_time_millis, data),
1107 scope=csv_scope(
1108 scope, KEY_LAST_IMPROVEMENT_TIME_MILLIS),
1109 n_not_needed=True, what_short=KEY_LAST_IMPROVEMENT_TIME_MILLIS,
1110 what_long="the millisecond when the last "
1111 "improvement happened in a run")
1112 #: the total FEs
1113 self.__total_fes: Final[StatWriter] = StatWriter(
1114 data=map(EndStatistics.get_total_fes, data),
1115 scope=csv_scope(scope, KEY_TOTAL_FES),
1116 n_not_needed=True, what_short=KEY_TOTAL_FES,
1117 what_long="the total FEs consumed by the runs")
1118 #: the total milliseconds
1119 self.__total_ms: Final[StatWriter] = StatWriter(
1120 data=map(EndStatistics.get_total_time_millis, data),
1121 scope=csv_scope(scope, KEY_TOTAL_TIME_MILLIS),
1122 n_not_needed=True, what_short=KEY_TOTAL_TIME_MILLIS,
1123 what_long="the total millisecond consumed by a run")
1125 def get_column_titles(self) -> Iterator[str]:
1126 """
1127 Get the column titles.
1129 :returns: the column titles
1130 """
1131 p: Final[str] = self.scope
1132 if self.__has_algorithm:
1133 yield csv_scope(p, KEY_ALGORITHM)
1134 if self.__has_instance:
1135 yield csv_scope(p, KEY_INSTANCE)
1136 if self.__has_objective:
1137 yield csv_scope(p, KEY_OBJECTIVE_FUNCTION)
1138 if self.__has_encoding:
1139 yield csv_scope(p, KEY_ENCODING)
1140 yield csv_scope(p, KEY_N)
1141 yield from self.__best_f.get_column_titles()
1142 yield from self.__life.get_column_titles()
1143 yield from self.__lims.get_column_titles()
1144 yield from self.__total_fes.get_column_titles()
1145 yield from self.__total_ms.get_column_titles()
1146 if self.__goal_f is not None:
1147 yield from self.__goal_f.get_column_titles()
1148 if self.__best_f_scaled is not None:
1149 yield from self.__best_f_scaled.get_column_titles()
1150 if self.__has_n_success:
1151 yield csv_scope(p, KEY_N_SUCCESS)
1152 if self.__success_fes is not None:
1153 yield from self.__success_fes.get_column_titles()
1154 if self.__success_time_millis is not None:
1155 yield from self.__success_time_millis.get_column_titles()
1156 if self.__has_ert_fes:
1157 yield csv_scope(p, KEY_ERT_FES)
1158 if self.__has_ert_time_millis:
1159 yield csv_scope(p, KEY_ERT_TIME_MILLIS)
1160 if self.__max_fes is not None:
1161 yield from self.__max_fes.get_column_titles()
1162 if self.__max_time_millis is not None:
1163 yield from self.__max_time_millis.get_column_titles()
1165 def get_row(self, data: EndStatistics) -> Iterable[str]:
1166 """
1167 Render a single end result record to a CSV row.
1169 :param data: the end result record
1170 :returns: the row strings
1171 """
1172 if self.__has_algorithm:
1173 yield "" if data.algorithm is None else data.algorithm
1174 if self.__has_instance:
1175 yield "" if data.instance is None else data.instance
1176 if self.__has_objective:
1177 yield "" if data.objective is None else data.objective
1178 if self.__has_encoding:
1179 yield "" if data.encoding is None else data.encoding
1180 yield str(data.n)
1181 yield from self.__best_f.get_row(data.best_f)
1182 yield from self.__life.get_row(data.last_improvement_fe)
1183 yield from self.__lims.get_row(data.last_improvement_time_millis)
1184 yield from self.__total_fes.get_row(data.total_fes)
1185 yield from self.__total_ms.get_row(data.total_time_millis)
1186 if self.__goal_f is not None:
1187 yield from self.__goal_f.get_optional_row(data.goal_f, data.n)
1188 if self.__best_f_scaled is not None:
1189 yield from self.__best_f_scaled.get_optional_row(
1190 data.best_f_scaled, data.n)
1191 if self.__has_n_success:
1192 yield str(data.n_success)
1193 if self.__success_fes is not None:
1194 yield from self.__success_fes.get_optional_row(
1195 data.success_fes, data.n_success)
1196 if self.__success_time_millis is not None:
1197 yield from self.__success_time_millis.get_optional_row(
1198 data.success_time_millis, data.n_success)
1199 if self.__has_ert_fes:
1200 yield num_or_none_to_str(data.ert_fes)
1201 if self.__has_ert_time_millis:
1202 yield num_or_none_to_str(data.ert_time_millis)
1203 if self.__max_fes is not None:
1204 yield from self.__max_fes.get_optional_row(data.max_fes, data.n)
1205 if self.__max_time_millis is not None:
1206 yield from self.__max_time_millis.get_optional_row(
1207 data.max_time_millis, data.n)
1209 def get_header_comments(self) -> Iterable[str]:
1210 """
1211 Get any possible header comments.
1213 :returns: the header comments
1214 """
1215 return ("Experiment End Results Statistics",
1216 "See the description at the bottom of the file.")
1218 def get_footer_comments(self) -> Iterable[str]:
1219 """
1220 Get any possible footer comments.
1222 :param dest: the destination
1223 """
1224 yield ""
1225 scope: Final[str | None] = self.scope
1227 yield ("This file presents statistics gathered over multiple runs "
1228 "of optimization algorithms applied to problem instances.")
1229 if scope:
1230 yield ("All end result statistics records start with prefix "
1231 f"{scope}{SCOPE_SEPARATOR}.")
1232 if self.__has_algorithm:
1233 yield f"{csv_scope(scope, KEY_ALGORITHM)}: {DESC_ALGORITHM}"
1234 if self.__has_instance:
1235 yield f"{csv_scope(scope, KEY_INSTANCE)}: {DESC_INSTANCE}"
1236 if self.__has_objective:
1237 yield (f"{csv_scope(scope, KEY_OBJECTIVE_FUNCTION)}:"
1238 f" {DESC_OBJECTIVE_FUNCTION}")
1239 if self.__has_encoding:
1240 yield f"{csv_scope(scope, KEY_ENCODING)}: {DESC_ENCODING}"
1241 yield (f"{csv_scope(scope, KEY_N)}: the number of runs that were "
1242 f"performed for the given setup.")
1244 yield from self.__best_f.get_footer_comments()
1245 yield f"In summary {csv_scope(scope, KEY_BEST_F)} is {DESC_BEST_F}."
1247 yield from self.__life.get_footer_comments()
1248 yield (f"In summary {csv_scope(scope, KEY_LAST_IMPROVEMENT_FE)} "
1249 f"is {DESC_LAST_IMPROVEMENT_FE}.")
1251 yield from self.__lims.get_footer_comments()
1252 yield ("In summary "
1253 f"{csv_scope(scope, KEY_LAST_IMPROVEMENT_TIME_MILLIS)} "
1254 f"is {DESC_LAST_IMPROVEMENT_TIME_MILLIS}.")
1256 yield from self.__total_fes.get_footer_comments()
1257 yield (f"In summary {csv_scope(scope, KEY_TOTAL_FES)} "
1258 f"is {DESC_TOTAL_FES}.")
1260 yield from self.__total_ms.get_footer_comments()
1261 yield (f"In summary {csv_scope(scope, KEY_TOTAL_TIME_MILLIS)} "
1262 f"is {DESC_TOTAL_TIME_MILLIS}.")
1264 if self.__goal_f is not None:
1265 yield from self.__goal_f.get_footer_comments()
1266 yield (f"In summary {csv_scope(scope, KEY_GOAL_F)} is"
1267 f" {DESC_GOAL_F}.")
1269 if self.__best_f_scaled is not None:
1270 yield from self.__best_f_scaled.get_footer_comments()
1271 yield (f"In summary {csv_scope(scope, KEY_BEST_F_SCALED)} "
1272 "describes the best objective value reached ("
1273 f"{csv_scope(scope, KEY_BEST_F)}) divided by the goal "
1274 f"objective value ({csv_scope(scope, KEY_GOAL_F)}).")
1276 if self.__has_n_success:
1277 yield (f"{csv_scope(scope, KEY_N_SUCCESS)} is the number of "
1278 "runs that reached goal objective value "
1279 f"{csv_scope(scope, KEY_GOAL_F)}. Obviously, "
1280 f"0<={csv_scope(scope, KEY_N_SUCCESS)}<="
1281 f"{csv_scope(scope, KEY_N)}.")
1282 if self.__success_fes is not None:
1283 yield from self.__success_fes.get_footer_comments()
1284 yield (f"{csv_scope(scope, KEY_SUCCESS_FES)} offers statistics "
1285 "about the number of FEs that the 0<="
1286 f"{csv_scope(scope, KEY_N_SUCCESS)}<="
1287 f"{csv_scope(scope, KEY_N)} successful runs needed to "
1288 "reach the goal objective value "
1289 f"{csv_scope(scope, KEY_GOAL_F)}.")
1291 if self.__success_time_millis is not None:
1292 yield from self.__success_fes.get_footer_comments()
1293 yield (f"{csv_scope(scope, KEY_SUCCESS_TIME_MILLIS)} offers "
1294 "statistics about the number of milliseconds of clock time"
1295 f" that the 0<={csv_scope(scope, KEY_N_SUCCESS)}<="
1296 f"{csv_scope(scope, KEY_N)} successful runs needed to "
1297 "reach the goal objective value "
1298 f"{csv_scope(scope, KEY_GOAL_F)}.")
1300 if self.__has_ert_fes:
1301 yield (f"{csv_scope(scope, KEY_ERT_FES)} is the empirical "
1302 "estimate of the number of FEs to solve the problem. It "
1303 "can be approximated by dividing the sum of "
1304 f"{csv_scope(scope, KEY_TOTAL_FES)} over all runs by the "
1305 f"number {csv_scope(scope, KEY_N_SUCCESS)} of successful "
1306 "runs.")
1308 if self.__has_ert_time_millis:
1309 yield (f"{csv_scope(scope, KEY_ERT_TIME_MILLIS)} is the empirical"
1310 " estimate of the number of FEs to solve the problem. It "
1311 "can be approximated by dividing the sum of "
1312 f"{csv_scope(scope, KEY_TOTAL_TIME_MILLIS)} over all runs "
1313 f"by the number {csv_scope(scope, KEY_N_SUCCESS)} of "
1314 "successful runs.")
1316 if self.__max_fes is not None:
1317 yield from self.__max_fes.get_footer_comments()
1318 yield (f"In summary {csv_scope(scope, KEY_MAX_FES)} is"
1319 f" {DESC_MAX_FES}.")
1320 if self.__max_time_millis is not None:
1321 yield from self.__max_time_millis.get_footer_comments()
1322 yield (f"In summary {csv_scope(scope, KEY_MAX_TIME_MILLIS)} is"
1323 f" {DESC_MAX_TIME_MILLIS}.")
1325 def get_footer_bottom_comments(self) -> Iterable[str]:
1326 """
1327 Get the footer bottom comments.
1329 :returns: the footer comments
1330 """
1331 yield from motipy_footer_bottom_comments(
1332 self, ("The end statistics data is produced using module "
1333 "moptipy.evaluation.end_statistics."))
1334 yield from StatWriter.get_footer_bottom_comments(self.__best_f)
1337class CsvReader(CsvReaderBase[EndStatistics]):
1338 """A csv parser for end results."""
1340 def __init__(self, columns: dict[str, int]) -> None:
1341 """
1342 Create a CSV parser for :class:`EndResult`.
1344 :param columns: the columns
1345 """
1346 super().__init__(columns)
1347 #: the index of the algorithm column, if any
1348 self.__idx_algorithm: Final[int | None] = csv_column_or_none(
1349 columns, KEY_ALGORITHM)
1350 #: the index of the instance column, if any
1351 self.__idx_instance: Final[int | None] = csv_column_or_none(
1352 columns, KEY_INSTANCE)
1353 #: the index of the objective column, if any
1354 self.__idx_objective: Final[int | None] = csv_column_or_none(
1355 columns, KEY_OBJECTIVE_FUNCTION)
1356 #: the index of the encoding column, if any
1357 self.__idx_encoding: Final[int | None] = csv_column_or_none(
1358 columns, KEY_ENCODING)
1360 #: the index of the `N` column, i.e., where the number of runs is
1361 #: stored
1362 self.idx_n: Final[int] = csv_column(columns, KEY_N, True)
1364 n_key: Final[tuple[tuple[str, int]]] = ((KEY_N, self.idx_n), )
1365 #: the reader for the best-objective-value-reached statistics
1366 self.__best_f: Final[StatReader] = csv_select_scope(
1367 StatReader, columns, KEY_BEST_F, n_key)
1368 #: the reader for the last improvement FE statistics
1369 self.__life: Final[StatReader] = csv_select_scope(
1370 StatReader, columns, KEY_LAST_IMPROVEMENT_FE, n_key)
1371 #: the reader for the last improvement millisecond index statistics
1372 self.__lims: Final[StatReader] = csv_select_scope(
1373 StatReader, columns, KEY_LAST_IMPROVEMENT_TIME_MILLIS, n_key)
1374 #: the reader for the total FEs statistics
1375 self.__total_fes: Final[StatReader] = csv_select_scope(
1376 StatReader, columns, KEY_TOTAL_FES, n_key)
1377 #: the reader for the total milliseconds consumed statistics
1378 self.__total_ms: Final[StatReader] = csv_select_scope(
1379 StatReader, columns, KEY_TOTAL_TIME_MILLIS, n_key)
1381 #: the reader for the goal objective value statistics, if any
1382 self.__goal_f: Final[StatReader | None] = csv_select_scope_or_none(
1383 StatReader, columns, KEY_GOAL_F, n_key)
1384 #: the reader for the best-f / goal-f statistics, if any
1385 self.__best_f_scaled: Final[StatReader | None] = \
1386 csv_select_scope_or_none(
1387 StatReader, columns, KEY_BEST_F_SCALED, n_key)
1389 #: the index of the column where the number of successful runs is
1390 #: stored
1391 self.__idx_n_success: Final[int | None] = csv_column_or_none(
1392 columns, KEY_N_SUCCESS)
1393 succ_key: Final[tuple[tuple[str, int], ...]] = () \
1394 if self.__idx_n_success is None else (
1395 (KEY_N, self.__idx_n_success), )
1396 #: the reader for the success FE data, if any
1397 self.__success_fes: Final[StatReader | None] = \
1398 None if self.__idx_n_success is None else \
1399 csv_select_scope_or_none(
1400 StatReader, columns, KEY_SUCCESS_FES, succ_key)
1401 #: the reader for the success time milliseconds data, if any
1402 self.__success_time_millis: Final[StatReader | None] = \
1403 None if self.__idx_n_success is None else \
1404 csv_select_scope_or_none(
1405 StatReader, columns, KEY_SUCCESS_TIME_MILLIS, succ_key)
1407 #: the index of the expected FEs until success
1408 self.__idx_ert_fes: Final[int | None] = csv_column_or_none(
1409 columns, KEY_ERT_FES)
1410 #: the index of the expected milliseconds until success
1411 self.__idx_ert_time_millis: Final[int | None] = csv_column_or_none(
1412 columns, KEY_ERT_TIME_MILLIS)
1414 #: the columns with the maximum FE-based budget statistics
1415 self.__max_fes: Final[StatReader | None] = csv_select_scope_or_none(
1416 StatReader, columns, KEY_MAX_FES, n_key)
1417 #: the columns with the maximum time-based budget statistics
1418 self.__max_time_millis: Final[StatReader | None] = \
1419 csv_select_scope_or_none(
1420 StatReader, columns, KEY_MAX_TIME_MILLIS, n_key)
1422 def parse_row(self, data: list[str]) -> EndStatistics:
1423 """
1424 Parse a row of data.
1426 :param data: the data row
1427 :return: the end result statistics
1428 """
1429 return EndStatistics(
1430 algorithm=csv_str_or_none(data, self.__idx_algorithm),
1431 instance=csv_str_or_none(data, self.__idx_instance),
1432 objective=csv_str_or_none(data, self.__idx_objective),
1433 encoding=csv_str_or_none(data, self.__idx_encoding),
1434 n=int(data[self.idx_n]),
1435 best_f=self.__best_f.parse_row(data),
1436 last_improvement_fe=self.__life.parse_row(data),
1437 last_improvement_time_millis=self.__lims.parse_row(data),
1438 total_fes=self.__total_fes.parse_row(data),
1439 total_time_millis=self.__total_ms.parse_row(data),
1440 goal_f=StatReader.parse_optional_row(self.__goal_f, data),
1441 best_f_scaled=StatReader.parse_optional_row(
1442 self.__best_f_scaled, data),
1443 n_success=csv_val_or_none(data, self.__idx_n_success, int),
1444 success_fes=StatReader.parse_optional_row(
1445 self.__success_fes, data),
1446 success_time_millis=StatReader.parse_optional_row(
1447 self.__success_time_millis, data),
1448 ert_fes=csv_val_or_none(data, self.__idx_ert_fes, str_to_num),
1449 ert_time_millis=csv_val_or_none(
1450 data, self.__idx_ert_time_millis, str_to_num),
1451 max_fes=StatReader.parse_optional_row(self.__max_fes, data),
1452 max_time_millis=StatReader.parse_optional_row(
1453 self.__max_time_millis, data),
1454 )
1457@dataclass(frozen=True, init=False, order=False, eq=False)
1458class __PvEndStatistics(EndStatistics):
1459 """Aggregated end statistics."""
1461 #: the value of the parameter over which it is aggregated
1462 pv: int | float
1464 def __init__(self, es: EndStatistics, pv: int | float):
1465 """
1466 Create the end statistics of an experiment-setup combination.
1468 :param es: the original end statistics object
1469 :param pv: the parameter value
1470 """
1471 super().__init__(
1472 es.algorithm, es.instance, es.objective, es.encoding, es.n,
1473 es.best_f, es.last_improvement_fe,
1474 es.last_improvement_time_millis, es.total_fes,
1475 es.total_time_millis, es.goal_f, es.best_f_scaled, es.n_success,
1476 es.success_fes, es.success_time_millis, es.ert_fes,
1477 es.ert_time_millis, es.max_fes, es.max_time_millis)
1478 if not isinstance(pv, (int | float)):
1479 raise type_error(pv, "pv", (int, float))
1480 if not isfinite(pv):
1481 raise ValueError(f"got {pv=}")
1482 object.__setattr__(self, "pv", pv)
1484 def get_param_value(self) -> int | float:
1485 """
1486 Get the parameter value.
1488 :return: the parameter value
1489 """
1490 return self.pv
1493def aggregate_over_parameter(
1494 data: Iterable[EndResult],
1495 param_value: Callable[[EndResult], int | float],
1496 join_all_algorithms: bool = False,
1497 join_all_instances: bool = False,
1498 join_all_objectives: bool = False,
1499 join_all_encodings: bool = False) -> tuple[
1500 Callable[[EndStatistics], int | float], Iterable[EndStatistics]]:
1501 """
1502 Aggregate a stream of data into groups based on a parameter.
1504 :param data: the source data
1505 :param param_value: the function obtaining a parameter value
1506 :param join_all_algorithms: should the statistics be aggregated
1507 over all algorithms
1508 :param join_all_instances: should the statistics be aggregated
1509 over all algorithms
1510 :param join_all_objectives: should the statistics be aggregated over
1511 all objectives?
1512 :param join_all_encodings: should statistics be aggregated over all
1513 encodings
1514 """
1515 param_map: Final[dict[int | float, list[EndResult]]] = {}
1516 for er in data:
1517 pv = param_value(er)
1518 if not isinstance(pv, (int | float)):
1519 raise type_error(pv, f"param_value{er}", (int, float))
1520 if not isfinite(pv):
1521 raise ValueError(f"got {pv} = param({er})")
1522 if pv in param_map:
1523 param_map[pv].append(er)
1524 else:
1525 param_map[pv] = [er]
1526 if dict.__len__(param_map) <= 0:
1527 raise ValueError("Did not encounter any data.")
1529 stats: Final[list[EndStatistics]] = []
1530 for pv in sorted(param_map.keys()):
1531 for ess in from_end_results(
1532 param_map[pv], join_all_algorithms, join_all_instances,
1533 join_all_objectives, join_all_encodings):
1534 stats.append(__PvEndStatistics(ess, pv))
1535 return cast("Callable[[EndStatistics], int | float]",
1536 __PvEndStatistics.get_param_value), tuple(stats)
1539# Run end-results to stat file if executed as script
1540if __name__ == "__main__":
1541 parser: Final[argparse.ArgumentParser] = moptipy_argparser(
1542 __file__, "Build an end-results statistics CSV file.",
1543 "This program creates a CSV file with basic statistics on the "
1544 "end-of-run state of experiments conducted with moptipy. It "
1545 "therefore either parses a directory structure with log files "
1546 "(if src identifies a directory) or a end results CSV file (if"
1547 " src identifies a file). In the former case, the directory "
1548 "will follow the form 'algorithm/instance/log_file' with one "
1549 "log file per run. In the latter case, it will be a file "
1550 "generated by the end_results.py tool of moptipy. The output "
1551 "of this tool is a CSV file where the columns are separated by"
1552 " ';' and the rows contain the statistics.")
1553 def_src: str = "./evaluation/end_results.txt"
1554 if not os.path.isfile(def_src):
1555 def_src = "./results"
1556 parser.add_argument(
1557 "source", nargs="?", default=def_src,
1558 help="either the directory with moptipy log files or the path to the "
1559 "end-results CSV file", type=Path)
1560 parser.add_argument(
1561 "dest", type=Path, nargs="?",
1562 default="./evaluation/end_statistics.txt",
1563 help="the path to the end results statistics CSV file to be created")
1564 parser.add_argument(
1565 "--join_algorithms",
1566 help="compute statistics over all algorithms, i.e., the statistics"
1567 " are not separated by algorithm but all algorithms are treated "
1568 "as one", action="store_true")
1569 parser.add_argument(
1570 "--join_instances",
1571 help="compute statistics over all instances, i.e., the statistics"
1572 " are not separated by instance but all instances are treated "
1573 "as one", action="store_true")
1574 parser.add_argument(
1575 "--join_objectives",
1576 help="compute statistics over all objective functions, i.e., the "
1577 "statistics are not separated by objective functions but all "
1578 "objectives functions are treated as one", action="store_true")
1579 parser.add_argument(
1580 "--join_encodings",
1581 help="compute statistics over all encodings, i.e., the statistics"
1582 " are not separated by encodings but all encodings are treated "
1583 "as one", action="store_true")
1584 args: Final[argparse.Namespace] = parser.parse_args()
1586 src_path: Final[Path] = args.source
1587 end_results: Iterable[EndResult]
1588 if src_path.is_file():
1589 logger(f"{src_path!r} identifies as file, load as end-results csv")
1590 end_results = end_results_from_csv(src_path)
1591 else:
1592 logger(f"{src_path!r} identifies as directory, load it as log files")
1593 end_results = end_results_from_logs(src_path)
1595 end_stats: Final[list[EndStatistics]] = []
1596 to_csv(from_end_results(
1597 source=end_results,
1598 join_all_algorithms=args.join_algorithms,
1599 join_all_instances=args.join_instances,
1600 join_all_objectives=args.join_objectives,
1601 join_all_encodings=args.join_encodings), args.dest)