Coverage for moptipy / evaluation / ecdf.py: 71%

231 statements  

« prev     ^ index     » next       coverage.py v7.12.0, created at 2025-11-24 08:49 +0000

1""" 

2Approximate the :class:`~moptipy.evaluation.ecdf.Ecdf` to reach certain goals. 

3 

4The empirical cumulative distribution function (ECDF) for short illustrates 

5the fraction of runs that have reached a certain goal over time. Let's say 

6that you have performed 10 runs of a certain algorithm on a certain problem. 

7As goal quality, you could define the globally optimal solution quality. 

8For any point in time, the ECDF then shows how many of these runs have solved 

9the problem to this goal, to optimality. 

10Let's say the first run solves the problem after 100 FEs. 

11Then the ECDF is 0 until 99 FEs and at 100 FEs, it becomes 1/10. 

12The second-fastest run solves the problem after 200 FEs. 

13The ECDF thus stays 0.1 until 199 FEs and at 200 FEs, it jumps to 0.2. 

14And so on. 

15This means that the value of the ECDF is always between 0 and 1. 

16 

171. Nikolaus Hansen, Anne Auger, Steffen Finck, Raymond Ros. *Real-Parameter 

18 Black-Box Optimization Benchmarking 2010: Experimental Setup.* 

19 Research Report RR-7215, INRIA. 2010. inria-00462481. 

20 https://hal.inria.fr/inria-00462481/document/ 

212. Dave Andrew Douglas Tompkins and Holger H. Hoos. UBCSAT: An Implementation 

22 and Experimentation Environment for SLS Algorithms for SAT and MAX-SAT. In 

23 *Revised Selected Papers from the Seventh International Conference on 

24 Theory and Applications of Satisfiability Testing (SAT'04),* May 10-13, 

25 2004, Vancouver, BC, Canada, pages 306-320. Lecture Notes in Computer 

26 Science (LNCS), volume 3542. Berlin, Germany: Springer-Verlag GmbH. 

27 ISBN: 3-540-27829-X. doi: https://doi.org/10.1007/11527695_24. 

283. Holger H. Hoos and Thomas Stützle. Evaluating Las Vegas Algorithms - 

29 Pitfalls and Remedies. In Gregory F. Cooper and Serafín Moral, editors, 

30 *Proceedings of the 14th Conference on Uncertainty in Artificial 

31 Intelligence (UAI'98)*, July 24-26, 1998, Madison, WI, USA, pages 238-245. 

32 San Francisco, CA, USA: Morgan Kaufmann Publishers Inc. 

33 ISBN: 1-55860-555-X. 

34""" 

35 

36from dataclasses import dataclass 

37from math import inf, isfinite 

38from typing import Any, Callable, Final, Iterable 

39 

40import numpy as np 

41from pycommons.io.console import logger 

42from pycommons.io.csv import COMMENT_START, CSV_SEPARATOR 

43from pycommons.io.path import Path 

44from pycommons.strings.string_conv import num_to_str 

45from pycommons.types import check_int_range, type_error 

46 

47from moptipy.api.logging import ( 

48 KEY_ALGORITHM, 

49 KEY_GOAL_F, 

50) 

51from moptipy.evaluation._utils import _get_goal_reach_index 

52from moptipy.evaluation.base import ( 

53 F_NAME_NORMALIZED, 

54 F_NAME_SCALED, 

55 KEY_ENCODING, 

56 KEY_N, 

57 KEY_OBJECTIVE_FUNCTION, 

58 MultiRun2DData, 

59) 

60from moptipy.evaluation.progress import Progress 

61from moptipy.utils.lang import Lang 

62from moptipy.utils.logger import ( 

63 KEY_VALUE_SEPARATOR, 

64) 

65from moptipy.utils.nputils import is_all_finite 

66 

67#: The number of instances. 

68KEY_N_INSTS: Final[str] = f"{KEY_N}Insts" 

69#: The objective dimension name. 

70KEY_F_NAME: Final[str] = "fName" 

71 

72 

73@dataclass(frozen=True, init=False, order=False, eq=False) 

74class Ecdf(MultiRun2DData): 

75 """The ECDF data.""" 

76 

77 #: The ECDF data function 

78 ecdf: np.ndarray 

79 #: The number of instances over which the ERT-ECDF is computed. 

80 n_insts: int 

81 #: The goal value, or None if different goals were used for different 

82 #: instances 

83 goal_f: int | float | None 

84 

85 def __init__(self, 

86 algorithm: str | None, 

87 objective: str | None, 

88 encoding: str | None, 

89 n: int, 

90 n_insts: int, 

91 time_unit: str, 

92 f_name: str, 

93 goal_f: int | float | None, 

94 ecdf: np.ndarray): 

95 """ 

96 Create the ECDF function. 

97 

98 :param algorithm: the algorithm name, if all runs are with the same 

99 algorithm 

100 :param objective: the objective name, if all runs are on the same 

101 objective function, `None` otherwise 

102 :param encoding: the encoding name, if all runs are on the same 

103 encoding and an encoding was actually used, `None` otherwise 

104 :param n: the total number of runs 

105 :param n_insts: the total number of instances 

106 :param time_unit: the time unit 

107 :param f_name: the objective dimension name 

108 :param goal_f: the goal value, or `None` if different goals were used 

109 for different instances 

110 :param numpy.ndarray ecdf: the ert-ecdf matrix 

111 """ 

112 super().__init__(algorithm, None, objective, encoding, n, 

113 time_unit, f_name) 

114 object.__setattr__( 

115 self, "n_insts", check_int_range(n_insts, "n_insts", 1, self.n)) 

116 

117 if goal_f is not None: 

118 if not isinstance(goal_f, int | float): 

119 raise type_error(goal_f, "goal_f", (int, float)) 

120 if not isfinite(goal_f): 

121 raise ValueError(f"Invalid goal_f {goal_f}.") 

122 object.__setattr__(self, "goal_f", goal_f) 

123 

124 if not isinstance(ecdf, np.ndarray): 

125 raise type_error(ecdf, "ecdf", np.ndarray) 

126 ecdf.flags.writeable = False 

127 

128 time: Final[np.ndarray] = ecdf[:, 0] 

129 ll = time.size 

130 if ll < 2: 

131 raise ValueError("Must have at least two points in " 

132 f"ecdf curve , but encountered {ll}.") 

133 if not is_all_finite(time[:-1]): 

134 raise ValueError("Non-last Ert-based time-values must be finite, " 

135 f"but encountered {time}.") 

136 if np.any(time[1:] <= time[:-1]): 

137 raise ValueError("Time data must be strictly increasing," 

138 f"but encountered {time}.") 

139 

140 prb: Final[np.ndarray] = ecdf[:, 1] 

141 if not is_all_finite(prb): 

142 raise ValueError( 

143 f"All ECDF values must be finite, but encountered {prb}.") 

144 if (ll > 2) and np.any(prb[1:-1] <= prb[:-2]): 

145 raise ValueError("ECDF data must be strictly increasing," 

146 f"but encountered {prb}.") 

147 if prb[0] < 0: 

148 raise ValueError(f"First ECDF element cannot be {prb[0]}.") 

149 if prb[ll - 1] > 1: 

150 raise ValueError(f"Last ECDF element cannot be {prb[ll - 1]}.") 

151 object.__setattr__(self, "ecdf", ecdf) 

152 

153 def time_label(self) -> str: 

154 """ 

155 Get the time label for x-axes. 

156 

157 :return: the time key 

158 """ 

159 return Lang.translate(self.time_unit) 

160 

161 def _time_key(self) -> str: 

162 """ 

163 Get the time key. 

164 

165 :return: the time key 

166 """ 

167 return self.time_unit 

168 

169 def _tuple(self) -> tuple[Any, ...]: 

170 """ 

171 Get the tuple representation of this object used in comparisons. 

172 

173 :return: the comparison-relevant data of this object in a tuple 

174 """ 

175 return (self.__class__.__name__, 

176 "" if self.algorithm is None else self.algorithm, 

177 "" if self.instance is None else self.instance, 

178 "" if self.objective is None else self.objective, 

179 "" if self.encoding is None else self.encoding, 

180 self.n, -1, self.time_unit, self.f_name, 

181 inf if self.goal_f is None else self.goal_f, 

182 self.n_insts) 

183 

184 @staticmethod 

185 def _compute_times(source: list[Progress], 

186 goal: int | float) -> list[float]: 

187 """ 

188 Compute the times for the given goals. 

189 

190 Warning: `source` must only contain progress objects that contain 

191 monotonously improving points. It must not contain runs that may get 

192 worse over time. 

193 

194 :param source: the source array 

195 :param goal: the goal value 

196 :return: a list of times 

197 """ 

198 ret = [] 

199 for pr in source: 

200 idx = _get_goal_reach_index(pr.f, goal) 

201 if idx < 0: 

202 continue 

203 ret.append(pr.time[idx]) 

204 return ret 

205 

206 # noinspection PyUnusedLocal 

207 @staticmethod 

208 def _get_div(n: int, n_insts: int) -> int: 

209 """ 

210 Get the divisor. 

211 

212 :param n: the number of runs 

213 :param n_insts: the number of instances 

214 :return: the divisor 

215 """ 

216 del n_insts 

217 return n 

218 

219 @classmethod 

220 def _create(cls: type["Ecdf"], 

221 source: Iterable[Progress], 

222 goal_f: int | float | Callable[[ 

223 str], int | float] | None = None, 

224 use_default_goal_f: bool = True) -> "Ecdf": 

225 """ 

226 Create one single Ecdf record from an iterable of Progress records. 

227 

228 :param source: the set of progress instances 

229 :param goal_f: the goal objective value 

230 :param use_default_goal_f: should we use the default lower bounds as 

231 goals? 

232 :return: the Ecdf record 

233 """ 

234 if not isinstance(source, Iterable): 

235 raise type_error(source, "source", Iterable) 

236 

237 algorithm: str | None = None 

238 objective: str | None = None 

239 encoding: str | None = None 

240 time_unit: str | None = None 

241 f_name: str | None = None 

242 inst_runs: dict[str, list[Progress]] = {} 

243 n: int = 0 

244 

245 for progress in source: 

246 if not isinstance(progress, Progress): 

247 raise type_error(progress, "progress", Progress) 

248 if n <= 0: 

249 algorithm = progress.algorithm 

250 time_unit = progress.time_unit 

251 objective = progress.objective 

252 encoding = progress.encoding 

253 f_name = progress.f_name 

254 else: 

255 if algorithm != progress.algorithm: 

256 algorithm = None 

257 if objective != progress.objective: 

258 objective = None 

259 if encoding != progress.encoding: 

260 encoding = None 

261 if time_unit != progress.time_unit: 

262 raise ValueError( 

263 f"Cannot mix time units {time_unit} " 

264 f"and {progress.time_unit}.") 

265 if f_name != progress.f_name: 

266 raise ValueError(f"Cannot mix f-names {f_name} " 

267 f"and {progress.f_name}.") 

268 n += 1 

269 if progress.instance in inst_runs: 

270 inst_runs[progress.instance].append(progress) 

271 else: 

272 inst_runs[progress.instance] = [progress] 

273 del source 

274 

275 if n <= 0: 

276 raise ValueError("Did not encounter any progress information.") 

277 n_insts: Final[int] = len(inst_runs) 

278 if (n_insts <= 0) or (n_insts > n): 

279 raise ValueError("Huh?.") 

280 

281 times: list[float] = [] 

282 goal: int | float | None 

283 same_goal_f: int | float | None = None 

284 first: bool = True 

285 for instance, pl in inst_runs.items(): 

286 goal = None 

287 if isinstance(goal_f, int | float): 

288 goal = goal_f 

289 elif callable(goal_f): 

290 goal = goal_f(instance) 

291 if (goal is None) and use_default_goal_f: 

292 if f_name == F_NAME_SCALED: 

293 goal = 1 

294 elif f_name == F_NAME_NORMALIZED: 

295 goal = 0 

296 else: 

297 goal = pl[0].f_standard 

298 for pp in pl: 

299 if goal != pp.f_standard: 

300 raise ValueError("Inconsistent goals: " 

301 f"{goal} and {pp.f_standard}") 

302 if not isinstance(goal, int | float): 

303 raise type_error(goal, "goal", (int, float)) 

304 if first: 

305 same_goal_f = goal 

306 elif goal != same_goal_f: 

307 same_goal_f = None 

308 res = cls._compute_times(pl, goal) 

309 for t in res: 

310 if isfinite(t): 

311 if t < 0: 

312 raise ValueError(f"Invalid ert {t}.") 

313 times.append(t) 

314 elif not (t >= inf): 

315 raise ValueError(f"Invalid ert {t}.") 

316 del inst_runs 

317 

318 if len(times) <= 0: 

319 return cls(algorithm, objective, encoding, n, n_insts, time_unit, 

320 f_name, same_goal_f, np.array([[0, 0], [inf, 0]])) 

321 

322 times.sort() 

323 time: list[float] = [0] 

324 ecdf: list[float] = [0] 

325 success: int = 0 

326 div: Final[int] = cls._get_div(n, n_insts) 

327 ll: int = 0 

328 for t in times: 

329 success += 1 # noqa: SIM113 

330 if t > time[ll]: 

331 time.append(t) 

332 ecdf.append(success / div) 

333 ll += 1 

334 else: 

335 ecdf[ll] = success / div 

336 

337 time.append(inf) 

338 ecdf.append(ecdf[ll]) 

339 

340 return cls(algorithm, objective, encoding, n, n_insts, 

341 time_unit, f_name, same_goal_f, 

342 np.column_stack((np.array(time), np.array(ecdf)))) 

343 

344 @classmethod 

345 def _from_progresses( 

346 cls: type["Ecdf"], 

347 source: Iterable[Progress], consumer: Callable[["Ecdf"], Any], 

348 f_goal: int | float | Callable[[str], int | float] 

349 | Iterable[int | float | Callable] | None = None, 

350 join_all_algorithms: bool = False, 

351 join_all_objectives: bool = False, 

352 join_all_encodings: bool = False) -> None: 

353 """ 

354 Compute one or multiple ECDFs from a stream of end results. 

355 

356 :param source: the set of progress instances 

357 :param f_goal: one or multiple goal values 

358 :param consumer: the destination to which the new records will be 

359 passed, can be the `append` method of a :class:`list` 

360 :param join_all_algorithms: should the Ecdf be aggregated over all 

361 algorithms 

362 :param join_all_objectives: should the Ecdf be aggregated over all 

363 objective functions 

364 :param join_all_encodings: should the Ecdf be aggregated over all 

365 encodings 

366 """ 

367 if not isinstance(source, Iterable): 

368 raise type_error(source, "source", Iterable) 

369 if not callable(consumer): 

370 raise type_error(consumer, "consumer", call=True) 

371 if not isinstance(join_all_algorithms, bool): 

372 raise type_error(join_all_algorithms, 

373 "join_all_algorithms", bool) 

374 if not isinstance(join_all_objectives, bool): 

375 raise type_error(join_all_objectives, 

376 "join_all_objectives", bool) 

377 if not isinstance(join_all_encodings, bool): 

378 raise type_error(join_all_encodings, 

379 "join_all_encodings", bool) 

380 if not isinstance(f_goal, Iterable): 

381 f_goal = [f_goal] 

382 

383 sorter: dict[tuple[str, str, str], list[Progress]] = {} 

384 for er in source: 

385 if not isinstance(er, Progress): 

386 raise type_error(er, "progress", Progress) 

387 key = ("" if join_all_algorithms else er.algorithm, 

388 "" if join_all_objectives else er.objective, 

389 "" if join_all_encodings else ( 

390 "" if er.encoding is None else er.encoding)) 

391 if key in sorter: 

392 lst = sorter[key] 

393 else: 

394 lst = [] 

395 sorter[key] = lst 

396 lst.append(er) 

397 

398 if len(sorter) <= 0: 

399 raise ValueError("source must not be empty") 

400 

401 keyz = sorted(sorter.keys()) 

402 for goal in f_goal: 

403 use_default_goal = goal is None 

404 for key in keyz: 

405 consumer(cls._create(sorter[key], goal, use_default_goal)) 

406 

407 

408def create(source: Iterable[Progress], 

409 goal_f: int | float | Callable[[ 

410 str], int | float] | None = None, 

411 use_default_goal_f: bool = True) -> Ecdf: 

412 """ 

413 Create one single Ecdf record from an iterable of Progress records. 

414 

415 :param source: the set of progress instances 

416 :param goal_f: the goal objective value 

417 :param use_default_goal_f: should we use the default lower bounds as 

418 goals? 

419 :return: the Ecdf record 

420 """ 

421 return Ecdf._create(source, goal_f, use_default_goal_f) 

422 

423 

424def from_progresses( 

425 source: Iterable[Progress], consumer: Callable[[Ecdf], Any], 

426 f_goal: int | float | Callable[[str], int | float] 

427 | Iterable[int | float | Callable] | None = None, 

428 join_all_algorithms: bool = False, 

429 join_all_objectives: bool = False, 

430 join_all_encodings: bool = False) -> None: 

431 """ 

432 Compute one or multiple ECDFs from a stream of end results. 

433 

434 :param source: the set of progress instances 

435 :param f_goal: one or multiple goal values 

436 :param consumer: the destination to which the new records will be 

437 passed, can be the `append` method of a :class:`list` 

438 :param join_all_algorithms: should the Ecdf be aggregated over all 

439 algorithms 

440 :param join_all_objectives: should the Ecdf be aggregated over all 

441 objective functions 

442 :param join_all_encodings: should the Ecdf be aggregated over all 

443 encodings 

444 """ 

445 return Ecdf._from_progresses( 

446 source, consumer, f_goal, join_all_algorithms, 

447 join_all_objectives, join_all_encodings) 

448 

449 

450def get_goal(ecdf: Ecdf) -> int | float | None: 

451 """ 

452 Get the goal value from the given ecdf instance. 

453 

454 :param Ecdf ecdf: the ecdf instance 

455 :return: the goal value 

456 """ 

457 return ecdf.goal_f 

458 

459 

460def goal_to_str(goal_f: int | float | None) -> str: 

461 """ 

462 Transform a goal to a string. 

463 

464 :param goal_f: the goal value 

465 :return: the string representation 

466 """ 

467 return "undefined" if goal_f is None else \ 

468 f"goal: \u2264{num_to_str(goal_f)}" 

469 

470 

471def to_csv(ecdf: Ecdf, file: str, 

472 put_header: bool = True) -> Path: 

473 """ 

474 Store a :class:`Ecdf` record in a CSV file. 

475 

476 :param ecdf: the ecdf 

477 :param file: the file to generate 

478 :param put_header: should we put a header with meta-data? 

479 :return: the fully resolved file name 

480 """ 

481 if not isinstance(ecdf, Ecdf): 

482 raise type_error(ecdf, "ecdf", Ecdf) 

483 path: Final[Path] = Path(file) 

484 logger(f"Writing ECDF to CSV file {path!r}.") 

485 path.ensure_parent_dir_exists() 

486 

487 with path.open_for_write() as out: 

488 sep: Final[str] = CSV_SEPARATOR 

489 write: Final[Callable[[str], int]] = out.write 

490 if put_header: 

491 kv: Final[str] = KEY_VALUE_SEPARATOR 

492 cmt: Final[str] = COMMENT_START 

493 if ecdf.algorithm is not None: 

494 write(f"{cmt} {KEY_ALGORITHM}{kv}{ecdf.algorithm}\n") 

495 write(f"{cmt} {KEY_N}{kv}{ecdf.n}\n") 

496 write(f"{cmt} {KEY_N_INSTS}{kv}{ecdf.n_insts}\n") 

497 write(f"{cmt} {KEY_F_NAME}{kv}{ecdf.f_name}\n") 

498 if ecdf.goal_f is not None: 

499 write(f"{cmt} {KEY_GOAL_F}{kv}{ecdf.goal_f}\n") 

500 if ecdf.objective is not None: 

501 write(f"{cmt} {KEY_OBJECTIVE_FUNCTION}" 

502 f"{kv}{ecdf.objective}\n") 

503 if ecdf.encoding is not None: 

504 write(f"{cmt} {KEY_ENCODING}{kv}{ecdf.encoding}\n") 

505 

506 write(f"{ecdf._time_key()}{sep}ecdf\n") 

507 for v in ecdf.ecdf: 

508 out.write( 

509 f"{num_to_str(v[0])}{sep}{num_to_str(v[1])}\n") 

510 

511 logger(f"Done writing ECDF to CSV file {path!r}.") 

512 

513 path.enforce_file() 

514 return path