Coverage for moptipy / evaluation / ecdf.py: 71%
231 statements
« prev ^ index » next coverage.py v7.12.0, created at 2025-11-24 08:49 +0000
« prev ^ index » next coverage.py v7.12.0, created at 2025-11-24 08:49 +0000
1"""
2Approximate the :class:`~moptipy.evaluation.ecdf.Ecdf` to reach certain goals.
4The empirical cumulative distribution function (ECDF) for short illustrates
5the fraction of runs that have reached a certain goal over time. Let's say
6that you have performed 10 runs of a certain algorithm on a certain problem.
7As goal quality, you could define the globally optimal solution quality.
8For any point in time, the ECDF then shows how many of these runs have solved
9the problem to this goal, to optimality.
10Let's say the first run solves the problem after 100 FEs.
11Then the ECDF is 0 until 99 FEs and at 100 FEs, it becomes 1/10.
12The second-fastest run solves the problem after 200 FEs.
13The ECDF thus stays 0.1 until 199 FEs and at 200 FEs, it jumps to 0.2.
14And so on.
15This means that the value of the ECDF is always between 0 and 1.
171. Nikolaus Hansen, Anne Auger, Steffen Finck, Raymond Ros. *Real-Parameter
18 Black-Box Optimization Benchmarking 2010: Experimental Setup.*
19 Research Report RR-7215, INRIA. 2010. inria-00462481.
20 https://hal.inria.fr/inria-00462481/document/
212. Dave Andrew Douglas Tompkins and Holger H. Hoos. UBCSAT: An Implementation
22 and Experimentation Environment for SLS Algorithms for SAT and MAX-SAT. In
23 *Revised Selected Papers from the Seventh International Conference on
24 Theory and Applications of Satisfiability Testing (SAT'04),* May 10-13,
25 2004, Vancouver, BC, Canada, pages 306-320. Lecture Notes in Computer
26 Science (LNCS), volume 3542. Berlin, Germany: Springer-Verlag GmbH.
27 ISBN: 3-540-27829-X. doi: https://doi.org/10.1007/11527695_24.
283. Holger H. Hoos and Thomas Stützle. Evaluating Las Vegas Algorithms -
29 Pitfalls and Remedies. In Gregory F. Cooper and Serafín Moral, editors,
30 *Proceedings of the 14th Conference on Uncertainty in Artificial
31 Intelligence (UAI'98)*, July 24-26, 1998, Madison, WI, USA, pages 238-245.
32 San Francisco, CA, USA: Morgan Kaufmann Publishers Inc.
33 ISBN: 1-55860-555-X.
34"""
36from dataclasses import dataclass
37from math import inf, isfinite
38from typing import Any, Callable, Final, Iterable
40import numpy as np
41from pycommons.io.console import logger
42from pycommons.io.csv import COMMENT_START, CSV_SEPARATOR
43from pycommons.io.path import Path
44from pycommons.strings.string_conv import num_to_str
45from pycommons.types import check_int_range, type_error
47from moptipy.api.logging import (
48 KEY_ALGORITHM,
49 KEY_GOAL_F,
50)
51from moptipy.evaluation._utils import _get_goal_reach_index
52from moptipy.evaluation.base import (
53 F_NAME_NORMALIZED,
54 F_NAME_SCALED,
55 KEY_ENCODING,
56 KEY_N,
57 KEY_OBJECTIVE_FUNCTION,
58 MultiRun2DData,
59)
60from moptipy.evaluation.progress import Progress
61from moptipy.utils.lang import Lang
62from moptipy.utils.logger import (
63 KEY_VALUE_SEPARATOR,
64)
65from moptipy.utils.nputils import is_all_finite
67#: The number of instances.
68KEY_N_INSTS: Final[str] = f"{KEY_N}Insts"
69#: The objective dimension name.
70KEY_F_NAME: Final[str] = "fName"
73@dataclass(frozen=True, init=False, order=False, eq=False)
74class Ecdf(MultiRun2DData):
75 """The ECDF data."""
77 #: The ECDF data function
78 ecdf: np.ndarray
79 #: The number of instances over which the ERT-ECDF is computed.
80 n_insts: int
81 #: The goal value, or None if different goals were used for different
82 #: instances
83 goal_f: int | float | None
85 def __init__(self,
86 algorithm: str | None,
87 objective: str | None,
88 encoding: str | None,
89 n: int,
90 n_insts: int,
91 time_unit: str,
92 f_name: str,
93 goal_f: int | float | None,
94 ecdf: np.ndarray):
95 """
96 Create the ECDF function.
98 :param algorithm: the algorithm name, if all runs are with the same
99 algorithm
100 :param objective: the objective name, if all runs are on the same
101 objective function, `None` otherwise
102 :param encoding: the encoding name, if all runs are on the same
103 encoding and an encoding was actually used, `None` otherwise
104 :param n: the total number of runs
105 :param n_insts: the total number of instances
106 :param time_unit: the time unit
107 :param f_name: the objective dimension name
108 :param goal_f: the goal value, or `None` if different goals were used
109 for different instances
110 :param numpy.ndarray ecdf: the ert-ecdf matrix
111 """
112 super().__init__(algorithm, None, objective, encoding, n,
113 time_unit, f_name)
114 object.__setattr__(
115 self, "n_insts", check_int_range(n_insts, "n_insts", 1, self.n))
117 if goal_f is not None:
118 if not isinstance(goal_f, int | float):
119 raise type_error(goal_f, "goal_f", (int, float))
120 if not isfinite(goal_f):
121 raise ValueError(f"Invalid goal_f {goal_f}.")
122 object.__setattr__(self, "goal_f", goal_f)
124 if not isinstance(ecdf, np.ndarray):
125 raise type_error(ecdf, "ecdf", np.ndarray)
126 ecdf.flags.writeable = False
128 time: Final[np.ndarray] = ecdf[:, 0]
129 ll = time.size
130 if ll < 2:
131 raise ValueError("Must have at least two points in "
132 f"ecdf curve , but encountered {ll}.")
133 if not is_all_finite(time[:-1]):
134 raise ValueError("Non-last Ert-based time-values must be finite, "
135 f"but encountered {time}.")
136 if np.any(time[1:] <= time[:-1]):
137 raise ValueError("Time data must be strictly increasing,"
138 f"but encountered {time}.")
140 prb: Final[np.ndarray] = ecdf[:, 1]
141 if not is_all_finite(prb):
142 raise ValueError(
143 f"All ECDF values must be finite, but encountered {prb}.")
144 if (ll > 2) and np.any(prb[1:-1] <= prb[:-2]):
145 raise ValueError("ECDF data must be strictly increasing,"
146 f"but encountered {prb}.")
147 if prb[0] < 0:
148 raise ValueError(f"First ECDF element cannot be {prb[0]}.")
149 if prb[ll - 1] > 1:
150 raise ValueError(f"Last ECDF element cannot be {prb[ll - 1]}.")
151 object.__setattr__(self, "ecdf", ecdf)
153 def time_label(self) -> str:
154 """
155 Get the time label for x-axes.
157 :return: the time key
158 """
159 return Lang.translate(self.time_unit)
161 def _time_key(self) -> str:
162 """
163 Get the time key.
165 :return: the time key
166 """
167 return self.time_unit
169 def _tuple(self) -> tuple[Any, ...]:
170 """
171 Get the tuple representation of this object used in comparisons.
173 :return: the comparison-relevant data of this object in a tuple
174 """
175 return (self.__class__.__name__,
176 "" if self.algorithm is None else self.algorithm,
177 "" if self.instance is None else self.instance,
178 "" if self.objective is None else self.objective,
179 "" if self.encoding is None else self.encoding,
180 self.n, -1, self.time_unit, self.f_name,
181 inf if self.goal_f is None else self.goal_f,
182 self.n_insts)
184 @staticmethod
185 def _compute_times(source: list[Progress],
186 goal: int | float) -> list[float]:
187 """
188 Compute the times for the given goals.
190 Warning: `source` must only contain progress objects that contain
191 monotonously improving points. It must not contain runs that may get
192 worse over time.
194 :param source: the source array
195 :param goal: the goal value
196 :return: a list of times
197 """
198 ret = []
199 for pr in source:
200 idx = _get_goal_reach_index(pr.f, goal)
201 if idx < 0:
202 continue
203 ret.append(pr.time[idx])
204 return ret
206 # noinspection PyUnusedLocal
207 @staticmethod
208 def _get_div(n: int, n_insts: int) -> int:
209 """
210 Get the divisor.
212 :param n: the number of runs
213 :param n_insts: the number of instances
214 :return: the divisor
215 """
216 del n_insts
217 return n
219 @classmethod
220 def _create(cls: type["Ecdf"],
221 source: Iterable[Progress],
222 goal_f: int | float | Callable[[
223 str], int | float] | None = None,
224 use_default_goal_f: bool = True) -> "Ecdf":
225 """
226 Create one single Ecdf record from an iterable of Progress records.
228 :param source: the set of progress instances
229 :param goal_f: the goal objective value
230 :param use_default_goal_f: should we use the default lower bounds as
231 goals?
232 :return: the Ecdf record
233 """
234 if not isinstance(source, Iterable):
235 raise type_error(source, "source", Iterable)
237 algorithm: str | None = None
238 objective: str | None = None
239 encoding: str | None = None
240 time_unit: str | None = None
241 f_name: str | None = None
242 inst_runs: dict[str, list[Progress]] = {}
243 n: int = 0
245 for progress in source:
246 if not isinstance(progress, Progress):
247 raise type_error(progress, "progress", Progress)
248 if n <= 0:
249 algorithm = progress.algorithm
250 time_unit = progress.time_unit
251 objective = progress.objective
252 encoding = progress.encoding
253 f_name = progress.f_name
254 else:
255 if algorithm != progress.algorithm:
256 algorithm = None
257 if objective != progress.objective:
258 objective = None
259 if encoding != progress.encoding:
260 encoding = None
261 if time_unit != progress.time_unit:
262 raise ValueError(
263 f"Cannot mix time units {time_unit} "
264 f"and {progress.time_unit}.")
265 if f_name != progress.f_name:
266 raise ValueError(f"Cannot mix f-names {f_name} "
267 f"and {progress.f_name}.")
268 n += 1
269 if progress.instance in inst_runs:
270 inst_runs[progress.instance].append(progress)
271 else:
272 inst_runs[progress.instance] = [progress]
273 del source
275 if n <= 0:
276 raise ValueError("Did not encounter any progress information.")
277 n_insts: Final[int] = len(inst_runs)
278 if (n_insts <= 0) or (n_insts > n):
279 raise ValueError("Huh?.")
281 times: list[float] = []
282 goal: int | float | None
283 same_goal_f: int | float | None = None
284 first: bool = True
285 for instance, pl in inst_runs.items():
286 goal = None
287 if isinstance(goal_f, int | float):
288 goal = goal_f
289 elif callable(goal_f):
290 goal = goal_f(instance)
291 if (goal is None) and use_default_goal_f:
292 if f_name == F_NAME_SCALED:
293 goal = 1
294 elif f_name == F_NAME_NORMALIZED:
295 goal = 0
296 else:
297 goal = pl[0].f_standard
298 for pp in pl:
299 if goal != pp.f_standard:
300 raise ValueError("Inconsistent goals: "
301 f"{goal} and {pp.f_standard}")
302 if not isinstance(goal, int | float):
303 raise type_error(goal, "goal", (int, float))
304 if first:
305 same_goal_f = goal
306 elif goal != same_goal_f:
307 same_goal_f = None
308 res = cls._compute_times(pl, goal)
309 for t in res:
310 if isfinite(t):
311 if t < 0:
312 raise ValueError(f"Invalid ert {t}.")
313 times.append(t)
314 elif not (t >= inf):
315 raise ValueError(f"Invalid ert {t}.")
316 del inst_runs
318 if len(times) <= 0:
319 return cls(algorithm, objective, encoding, n, n_insts, time_unit,
320 f_name, same_goal_f, np.array([[0, 0], [inf, 0]]))
322 times.sort()
323 time: list[float] = [0]
324 ecdf: list[float] = [0]
325 success: int = 0
326 div: Final[int] = cls._get_div(n, n_insts)
327 ll: int = 0
328 for t in times:
329 success += 1 # noqa: SIM113
330 if t > time[ll]:
331 time.append(t)
332 ecdf.append(success / div)
333 ll += 1
334 else:
335 ecdf[ll] = success / div
337 time.append(inf)
338 ecdf.append(ecdf[ll])
340 return cls(algorithm, objective, encoding, n, n_insts,
341 time_unit, f_name, same_goal_f,
342 np.column_stack((np.array(time), np.array(ecdf))))
344 @classmethod
345 def _from_progresses(
346 cls: type["Ecdf"],
347 source: Iterable[Progress], consumer: Callable[["Ecdf"], Any],
348 f_goal: int | float | Callable[[str], int | float]
349 | Iterable[int | float | Callable] | None = None,
350 join_all_algorithms: bool = False,
351 join_all_objectives: bool = False,
352 join_all_encodings: bool = False) -> None:
353 """
354 Compute one or multiple ECDFs from a stream of end results.
356 :param source: the set of progress instances
357 :param f_goal: one or multiple goal values
358 :param consumer: the destination to which the new records will be
359 passed, can be the `append` method of a :class:`list`
360 :param join_all_algorithms: should the Ecdf be aggregated over all
361 algorithms
362 :param join_all_objectives: should the Ecdf be aggregated over all
363 objective functions
364 :param join_all_encodings: should the Ecdf be aggregated over all
365 encodings
366 """
367 if not isinstance(source, Iterable):
368 raise type_error(source, "source", Iterable)
369 if not callable(consumer):
370 raise type_error(consumer, "consumer", call=True)
371 if not isinstance(join_all_algorithms, bool):
372 raise type_error(join_all_algorithms,
373 "join_all_algorithms", bool)
374 if not isinstance(join_all_objectives, bool):
375 raise type_error(join_all_objectives,
376 "join_all_objectives", bool)
377 if not isinstance(join_all_encodings, bool):
378 raise type_error(join_all_encodings,
379 "join_all_encodings", bool)
380 if not isinstance(f_goal, Iterable):
381 f_goal = [f_goal]
383 sorter: dict[tuple[str, str, str], list[Progress]] = {}
384 for er in source:
385 if not isinstance(er, Progress):
386 raise type_error(er, "progress", Progress)
387 key = ("" if join_all_algorithms else er.algorithm,
388 "" if join_all_objectives else er.objective,
389 "" if join_all_encodings else (
390 "" if er.encoding is None else er.encoding))
391 if key in sorter:
392 lst = sorter[key]
393 else:
394 lst = []
395 sorter[key] = lst
396 lst.append(er)
398 if len(sorter) <= 0:
399 raise ValueError("source must not be empty")
401 keyz = sorted(sorter.keys())
402 for goal in f_goal:
403 use_default_goal = goal is None
404 for key in keyz:
405 consumer(cls._create(sorter[key], goal, use_default_goal))
408def create(source: Iterable[Progress],
409 goal_f: int | float | Callable[[
410 str], int | float] | None = None,
411 use_default_goal_f: bool = True) -> Ecdf:
412 """
413 Create one single Ecdf record from an iterable of Progress records.
415 :param source: the set of progress instances
416 :param goal_f: the goal objective value
417 :param use_default_goal_f: should we use the default lower bounds as
418 goals?
419 :return: the Ecdf record
420 """
421 return Ecdf._create(source, goal_f, use_default_goal_f)
424def from_progresses(
425 source: Iterable[Progress], consumer: Callable[[Ecdf], Any],
426 f_goal: int | float | Callable[[str], int | float]
427 | Iterable[int | float | Callable] | None = None,
428 join_all_algorithms: bool = False,
429 join_all_objectives: bool = False,
430 join_all_encodings: bool = False) -> None:
431 """
432 Compute one or multiple ECDFs from a stream of end results.
434 :param source: the set of progress instances
435 :param f_goal: one or multiple goal values
436 :param consumer: the destination to which the new records will be
437 passed, can be the `append` method of a :class:`list`
438 :param join_all_algorithms: should the Ecdf be aggregated over all
439 algorithms
440 :param join_all_objectives: should the Ecdf be aggregated over all
441 objective functions
442 :param join_all_encodings: should the Ecdf be aggregated over all
443 encodings
444 """
445 return Ecdf._from_progresses(
446 source, consumer, f_goal, join_all_algorithms,
447 join_all_objectives, join_all_encodings)
450def get_goal(ecdf: Ecdf) -> int | float | None:
451 """
452 Get the goal value from the given ecdf instance.
454 :param Ecdf ecdf: the ecdf instance
455 :return: the goal value
456 """
457 return ecdf.goal_f
460def goal_to_str(goal_f: int | float | None) -> str:
461 """
462 Transform a goal to a string.
464 :param goal_f: the goal value
465 :return: the string representation
466 """
467 return "undefined" if goal_f is None else \
468 f"goal: \u2264{num_to_str(goal_f)}"
471def to_csv(ecdf: Ecdf, file: str,
472 put_header: bool = True) -> Path:
473 """
474 Store a :class:`Ecdf` record in a CSV file.
476 :param ecdf: the ecdf
477 :param file: the file to generate
478 :param put_header: should we put a header with meta-data?
479 :return: the fully resolved file name
480 """
481 if not isinstance(ecdf, Ecdf):
482 raise type_error(ecdf, "ecdf", Ecdf)
483 path: Final[Path] = Path(file)
484 logger(f"Writing ECDF to CSV file {path!r}.")
485 path.ensure_parent_dir_exists()
487 with path.open_for_write() as out:
488 sep: Final[str] = CSV_SEPARATOR
489 write: Final[Callable[[str], int]] = out.write
490 if put_header:
491 kv: Final[str] = KEY_VALUE_SEPARATOR
492 cmt: Final[str] = COMMENT_START
493 if ecdf.algorithm is not None:
494 write(f"{cmt} {KEY_ALGORITHM}{kv}{ecdf.algorithm}\n")
495 write(f"{cmt} {KEY_N}{kv}{ecdf.n}\n")
496 write(f"{cmt} {KEY_N_INSTS}{kv}{ecdf.n_insts}\n")
497 write(f"{cmt} {KEY_F_NAME}{kv}{ecdf.f_name}\n")
498 if ecdf.goal_f is not None:
499 write(f"{cmt} {KEY_GOAL_F}{kv}{ecdf.goal_f}\n")
500 if ecdf.objective is not None:
501 write(f"{cmt} {KEY_OBJECTIVE_FUNCTION}"
502 f"{kv}{ecdf.objective}\n")
503 if ecdf.encoding is not None:
504 write(f"{cmt} {KEY_ENCODING}{kv}{ecdf.encoding}\n")
506 write(f"{ecdf._time_key()}{sep}ecdf\n")
507 for v in ecdf.ecdf:
508 out.write(
509 f"{num_to_str(v[0])}{sep}{num_to_str(v[1])}\n")
511 logger(f"Done writing ECDF to CSV file {path!r}.")
513 path.enforce_file()
514 return path