Coverage for moptipy/evaluation/ecdf.py: 71%

1"""

2Approximate the :class:`~moptipy.evaluation.ecdf.Ecdf` to reach certain goals.

4The empirical cumulative distribution function (ECDF) for short illustrates

5the fraction of runs that have reached a certain goal over time. Let's say

6that you have performed 10 runs of a certain algorithm on a certain problem.

7As goal quality, you could define the globally optimal solution quality.

8For any point in time, the ECDF then shows how many of these runs have solved

9the problem to this goal, to optimality.

10Let's say the first run solves the problem after 100 FEs.

11Then the ECDF is 0 until 99 FEs and at 100 FEs, it becomes 1/10.

12The second-fastest run solves the problem after 200 FEs.

13The ECDF thus stays 0.1 until 199 FEs and at 200 FEs, it jumps to 0.2.

14And so on.

15This means that the value of the ECDF is always between 0 and 1.

171. Nikolaus Hansen, Anne Auger, Steffen Finck, Raymond Ros. *Real-Parameter

18 Black-Box Optimization Benchmarking 2010: Experimental Setup.*

19 Research Report RR-7215, INRIA. 2010. inria-00462481.

20 https://hal.inria.fr/inria-00462481/document/

212. Dave Andrew Douglas Tompkins and Holger H. Hoos. UBCSAT: An Implementation

22 and Experimentation Environment for SLS Algorithms for SAT and MAX-SAT. In

23 *Revised Selected Papers from the Seventh International Conference on

24 Theory and Applications of Satisfiability Testing (SAT'04),* May 10-13,

25 2004, Vancouver, BC, Canada, pages 306-320. Lecture Notes in Computer

26 Science (LNCS), volume 3542. Berlin, Germany: Springer-Verlag GmbH.

27 ISBN: 3-540-27829-X. doi: https://doi.org/10.1007/11527695_24.

283. Holger H. Hoos and Thomas Stützle. Evaluating Las Vegas Algorithms -

29 Pitfalls and Remedies. In Gregory F. Cooper and Serafín Moral, editors,

30 *Proceedings of the 14th Conference on Uncertainty in Artificial

31 Intelligence (UAI'98)*, July 24-26, 1998, Madison, WI, USA, pages 238-245.

32 San Francisco, CA, USA: Morgan Kaufmann Publishers Inc.

33 ISBN: 1-55860-555-X.

34"""

36from dataclasses import dataclass

37from math import inf, isfinite

38from typing import Any, Callable, Final, Iterable

40import numpy as np

41from pycommons.io.console import logger

42from pycommons.io.csv import COMMENT_START, CSV_SEPARATOR

43from pycommons.io.path import Path

44from pycommons.strings.string_conv import num_to_str

45from pycommons.types import check_int_range, type_error

47from moptipy.api.logging import (

48 KEY_ALGORITHM,

49 KEY_GOAL_F,

50)

51from moptipy.evaluation._utils import _get_goal_reach_index

52from moptipy.evaluation.base import (

53 F_NAME_NORMALIZED,

54 F_NAME_SCALED,

55 KEY_ENCODING,

56 KEY_N,

57 KEY_OBJECTIVE_FUNCTION,

58 MultiRun2DData,

59)

60from moptipy.evaluation.progress import Progress

61from moptipy.utils.lang import Lang

62from moptipy.utils.logger import (

63 KEY_VALUE_SEPARATOR,

64)

65from moptipy.utils.nputils import is_all_finite

67#: The number of instances.

68KEY_N_INSTS: Final[str] = f"{KEY_N}Insts"

69#: The objective dimension name.

70KEY_F_NAME: Final[str] = "fName"

73@dataclass(frozen=True, init=False, order=False, eq=False)

74class Ecdf(MultiRun2DData):

75 """The ECDF data."""

77 #: The ECDF data function

78 ecdf: np.ndarray

79 #: The number of instances over which the ERT-ECDF is computed.

80 n_insts: int

81 #: The goal value, or None if different goals were used for different

82 #: instances

83 goal_f: int | float | None

85 def __init__(self,

86 algorithm: str | None,

87 objective: str | None,

88 encoding: str | None,

89 n: int,

90 n_insts: int,

91 time_unit: str,

92 f_name: str,

93 goal_f: int | float | None,

94 ecdf: np.ndarray):

95 """

96 Create the ECDF function.

98 :param algorithm: the algorithm name, if all runs are with the same

99 algorithm

100 :param objective: the objective name, if all runs are on the same

101 objective function, `None` otherwise

102 :param encoding: the encoding name, if all runs are on the same

103 encoding and an encoding was actually used, `None` otherwise

104 :param n: the total number of runs

105 :param n_insts: the total number of instances

106 :param time_unit: the time unit

107 :param f_name: the objective dimension name

108 :param goal_f: the goal value, or `None` if different goals were used

109 for different instances

110 :param numpy.ndarray ecdf: the ert-ecdf matrix

111 """

112 super().__init__(algorithm, None, objective, encoding, n,

113 time_unit, f_name)

114 object.__setattr__(

115 self, "n_insts", check_int_range(n_insts, "n_insts", 1, self.n))

116

117 if goal_f is not None:

118 if not isinstance(goal_f, int | float):

119 raise type_error(goal_f, "goal_f", (int, float))

120 if not isfinite(goal_f):

121 raise ValueError(f"Invalid goal_f {goal_f}.")

122 object.__setattr__(self, "goal_f", goal_f)

123

124 if not isinstance(ecdf, np.ndarray):

125 raise type_error(ecdf, "ecdf", np.ndarray)

126 ecdf.flags.writeable = False

127

128 time: Final[np.ndarray] = ecdf[:, 0]

129 ll = time.size

130 if ll < 2:

131 raise ValueError("Must have at least two points in "

132 f"ecdf curve , but encountered {ll}.")

133 if not is_all_finite(time[:-1]):

134 raise ValueError("Non-last Ert-based time-values must be finite, "

135 f"but encountered {time}.")

136 if np.any(time[1:] <= time[:-1]):

137 raise ValueError("Time data must be strictly increasing,"

138 f"but encountered {time}.")

139

140 prb: Final[np.ndarray] = ecdf[:, 1]

141 if not is_all_finite(prb):

142 raise ValueError(

143 f"All ECDF values must be finite, but encountered {prb}.")

144 if (ll > 2) and np.any(prb[1:-1] <= prb[:-2]):

145 raise ValueError("ECDF data must be strictly increasing,"

146 f"but encountered {prb}.")

147 if prb[0] < 0:

148 raise ValueError(f"First ECDF element cannot be {prb[0]}.")

149 if prb[ll - 1] > 1:

150 raise ValueError(f"Last ECDF element cannot be {prb[ll - 1]}.")

151 object.__setattr__(self, "ecdf", ecdf)

152

153 def time_label(self) -> str:

154 """

155 Get the time label for x-axes.

156

157 :return: the time key

158 """

159 return Lang.translate(self.time_unit)

160

161 def _time_key(self) -> str:

162 """

163 Get the time key.

164

165 :return: the time key

166 """

167 return self.time_unit

168

169 def _tuple(self) -> tuple[Any, ...]:

170 """

171 Get the tuple representation of this object used in comparisons.

172

173 :return: the comparison-relevant data of this object in a tuple

174 """

175 return (self.__class__.__name__,

176 "" if self.algorithm is None else self.algorithm,

177 "" if self.instance is None else self.instance,

178 "" if self.objective is None else self.objective,

179 "" if self.encoding is None else self.encoding,

180 self.n, -1, self.time_unit, self.f_name,

181 inf if self.goal_f is None else self.goal_f,

182 self.n_insts)

183

184 @staticmethod

185 def _compute_times(source: list[Progress],

186 goal: int | float) -> list[float]:

187 """

188 Compute the times for the given goals.

189

190 Warning: `source` must only contain progress objects that contain

191 monotonously improving points. It must not contain runs that may get

192 worse over time.

193

194 :param source: the source array

195 :param goal: the goal value

196 :return: a list of times

197 """

198 ret = []

199 for pr in source:

200 idx = _get_goal_reach_index(pr.f, goal)

201 if idx < 0:

202 continue

203 ret.append(pr.time[idx])

204 return ret

205

206 # noinspection PyUnusedLocal

207 @staticmethod

208 def _get_div(n: int, n_insts: int) -> int:

209 """

210 Get the divisor.

211

212 :param n: the number of runs

213 :param n_insts: the number of instances

214 :return: the divisor

215 """

216 del n_insts

217 return n

218

219 @classmethod

220 def _create(cls: type["Ecdf"],

221 source: Iterable[Progress],

222 goal_f: int | float | Callable[[

223 str], int | float] | None = None,

224 use_default_goal_f: bool = True) -> "Ecdf":

225 """

226 Create one single Ecdf record from an iterable of Progress records.

227

228 :param source: the set of progress instances

229 :param goal_f: the goal objective value

230 :param use_default_goal_f: should we use the default lower bounds as

231 goals?

232 :return: the Ecdf record

233 """

234 if not isinstance(source, Iterable):

235 raise type_error(source, "source", Iterable)

236

237 algorithm: str | None = None

238 objective: str | None = None

239 encoding: str | None = None

240 time_unit: str | None = None

241 f_name: str | None = None

242 inst_runs: dict[str, list[Progress]] = {}

243 n: int = 0

244

245 for progress in source:

246 if not isinstance(progress, Progress):

247 raise type_error(progress, "progress", Progress)

248 if n <= 0:

249 algorithm = progress.algorithm

250 time_unit = progress.time_unit

251 objective = progress.objective

252 encoding = progress.encoding

253 f_name = progress.f_name

254 else:

255 if algorithm != progress.algorithm:

256 algorithm = None

257 if objective != progress.objective:

258 objective = None

259 if encoding != progress.encoding:

260 encoding = None

261 if time_unit != progress.time_unit:

262 raise ValueError(

263 f"Cannot mix time units {time_unit} "

264 f"and {progress.time_unit}.")

265 if f_name != progress.f_name:

266 raise ValueError(f"Cannot mix f-names {f_name} "

267 f"and {progress.f_name}.")

268 n += 1

269 if progress.instance in inst_runs:

270 inst_runs[progress.instance].append(progress)

271 else:

272 inst_runs[progress.instance] = [progress]

273 del source

274

275 if n <= 0:

276 raise ValueError("Did not encounter any progress information.")

277 n_insts: Final[int] = len(inst_runs)

278 if (n_insts <= 0) or (n_insts > n):

279 raise ValueError("Huh?.")

280

281 times: list[float] = []

282 goal: int | float | None

283 same_goal_f: int | float | None = None

284 first: bool = True

285 for instance, pl in inst_runs.items():

286 goal = None

287 if isinstance(goal_f, int | float):

288 goal = goal_f

289 elif callable(goal_f):

290 goal = goal_f(instance)

291 if (goal is None) and use_default_goal_f:

292 if f_name == F_NAME_SCALED:

293 goal = 1

294 elif f_name == F_NAME_NORMALIZED:

295 goal = 0

296 else:

297 goal = pl[0].f_standard

298 for pp in pl:

299 if goal != pp.f_standard:

300 raise ValueError("Inconsistent goals: "

301 f"{goal} and {pp.f_standard}")

302 if not isinstance(goal, int | float):

303 raise type_error(goal, "goal", (int, float))

304 if first:

305 same_goal_f = goal

306 elif goal != same_goal_f:

307 same_goal_f = None

308 res = cls._compute_times(pl, goal)

309 for t in res:

310 if isfinite(t):

311 if t < 0:

312 raise ValueError(f"Invalid ert {t}.")

313 times.append(t)

314 elif not (t >= inf):

315 raise ValueError(f"Invalid ert {t}.")

316 del inst_runs

317

318 if len(times) <= 0:

319 return cls(algorithm, objective, encoding, n, n_insts, time_unit,

320 f_name, same_goal_f, np.array([[0, 0], [inf, 0]]))

321

322 times.sort()

323 time: list[float] = [0]

324 ecdf: list[float] = [0]

325 success: int = 0

326 div: Final[int] = cls._get_div(n, n_insts)

327 ll: int = 0

328 for t in times:

329 success += 1 # noqa: SIM113

330 if t > time[ll]:

331 time.append(t)

332 ecdf.append(success / div)

333 ll += 1

334 else:

335 ecdf[ll] = success / div

336

337 time.append(inf)

338 ecdf.append(ecdf[ll])

339

340 return cls(algorithm, objective, encoding, n, n_insts,

341 time_unit, f_name, same_goal_f,

342 np.column_stack((np.array(time), np.array(ecdf))))

343

344 @classmethod

345 def _from_progresses(

346 cls: type["Ecdf"],

347 source: Iterable[Progress], consumer: Callable[["Ecdf"], Any],

348 f_goal: int | float | Callable[[str], int | float]

349 | Iterable[int | float | Callable] | None = None,

350 join_all_algorithms: bool = False,

351 join_all_objectives: bool = False,

352 join_all_encodings: bool = False) -> None:

353 """

354 Compute one or multiple ECDFs from a stream of end results.

355

356 :param source: the set of progress instances

357 :param f_goal: one or multiple goal values

358 :param consumer: the destination to which the new records will be

359 passed, can be the `append` method of a :class:`list`

360 :param join_all_algorithms: should the Ecdf be aggregated over all

361 algorithms

362 :param join_all_objectives: should the Ecdf be aggregated over all

363 objective functions

364 :param join_all_encodings: should the Ecdf be aggregated over all

365 encodings

366 """

367 if not isinstance(source, Iterable):

368 raise type_error(source, "source", Iterable)

369 if not callable(consumer):

370 raise type_error(consumer, "consumer", call=True)

371 if not isinstance(join_all_algorithms, bool):

372 raise type_error(join_all_algorithms,

373 "join_all_algorithms", bool)

374 if not isinstance(join_all_objectives, bool):

375 raise type_error(join_all_objectives,

376 "join_all_objectives", bool)

377 if not isinstance(join_all_encodings, bool):

378 raise type_error(join_all_encodings,

379 "join_all_encodings", bool)

380 if not isinstance(f_goal, Iterable):

381 f_goal = [f_goal]

382

383 sorter: dict[tuple[str, str, str], list[Progress]] = {}

384 for er in source:

385 if not isinstance(er, Progress):

386 raise type_error(er, "progress", Progress)

387 key = ("" if join_all_algorithms else er.algorithm,

388 "" if join_all_objectives else er.objective,

389 "" if join_all_encodings else (

390 "" if er.encoding is None else er.encoding))

391 if key in sorter:

392 lst = sorter[key]

393 else:

394 lst = []

395 sorter[key] = lst

396 lst.append(er)

397

398 if len(sorter) <= 0:

399 raise ValueError("source must not be empty")

400

401 keyz = sorted(sorter.keys())

402 for goal in f_goal:

403 use_default_goal = goal is None

404 for key in keyz:

405 consumer(cls._create(sorter[key], goal, use_default_goal))

406

407

408def create(source: Iterable[Progress],

409 goal_f: int | float | Callable[[

410 str], int | float] | None = None,

411 use_default_goal_f: bool = True) -> Ecdf:

412 """

413 Create one single Ecdf record from an iterable of Progress records.

414

415 :param source: the set of progress instances

416 :param goal_f: the goal objective value

417 :param use_default_goal_f: should we use the default lower bounds as

418 goals?

419 :return: the Ecdf record

420 """

421 return Ecdf._create(source, goal_f, use_default_goal_f)

422

423

424def from_progresses(

425 source: Iterable[Progress], consumer: Callable[[Ecdf], Any],

426 f_goal: int | float | Callable[[str], int | float]

427 | Iterable[int | float | Callable] | None = None,

428 join_all_algorithms: bool = False,

429 join_all_objectives: bool = False,

430 join_all_encodings: bool = False) -> None:

431 """

432 Compute one or multiple ECDFs from a stream of end results.

433

434 :param source: the set of progress instances

435 :param f_goal: one or multiple goal values

436 :param consumer: the destination to which the new records will be

437 passed, can be the `append` method of a :class:`list`

438 :param join_all_algorithms: should the Ecdf be aggregated over all

439 algorithms

440 :param join_all_objectives: should the Ecdf be aggregated over all

441 objective functions

442 :param join_all_encodings: should the Ecdf be aggregated over all

443 encodings

444 """

445 return Ecdf._from_progresses(

446 source, consumer, f_goal, join_all_algorithms,

447 join_all_objectives, join_all_encodings)

448

449

450def get_goal(ecdf: Ecdf) -> int | float | None:

451 """

452 Get the goal value from the given ecdf instance.

453

454 :param Ecdf ecdf: the ecdf instance

455 :return: the goal value

456 """

457 return ecdf.goal_f

458

459

460def goal_to_str(goal_f: int | float | None) -> str:

461 """

462 Transform a goal to a string.

463

464 :param goal_f: the goal value

465 :return: the string representation

466 """

467 return "undefined" if goal_f is None else \

468 f"goal: \u2264{num_to_str(goal_f)}"

469

470

471def to_csv(ecdf: Ecdf, file: str,

472 put_header: bool = True) -> Path:

473 """

474 Store a :class:`Ecdf` record in a CSV file.

475

476 :param ecdf: the ecdf

477 :param file: the file to generate

478 :param put_header: should we put a header with meta-data?

479 :return: the fully resolved file name

480 """

481 if not isinstance(ecdf, Ecdf):

482 raise type_error(ecdf, "ecdf", Ecdf)

483 path: Final[Path] = Path(file)

484 logger(f"Writing ECDF to CSV file {path!r}.")

485 path.ensure_parent_dir_exists()

486

487 with path.open_for_write() as out:

488 sep: Final[str] = CSV_SEPARATOR

489 write: Final[Callable[[str], int]] = out.write

490 if put_header:

491 kv: Final[str] = KEY_VALUE_SEPARATOR

492 cmt: Final[str] = COMMENT_START

493 if ecdf.algorithm is not None:

494 write(f"{cmt} {KEY_ALGORITHM}{kv}{ecdf.algorithm}\n")

495 write(f"{cmt} {KEY_N}{kv}{ecdf.n}\n")

496 write(f"{cmt} {KEY_N_INSTS}{kv}{ecdf.n_insts}\n")

497 write(f"{cmt} {KEY_F_NAME}{kv}{ecdf.f_name}\n")

498 if ecdf.goal_f is not None:

499 write(f"{cmt} {KEY_GOAL_F}{kv}{ecdf.goal_f}\n")

500 if ecdf.objective is not None:

501 write(f"{cmt} {KEY_OBJECTIVE_FUNCTION}"

502 f"{kv}{ecdf.objective}\n")

503 if ecdf.encoding is not None:

504 write(f"{cmt} {KEY_ENCODING}{kv}{ecdf.encoding}\n")

505

506 write(f"{ecdf._time_key()}{sep}ecdf\n")

507 for v in ecdf.ecdf:

508 out.write(

509 f"{num_to_str(v[0])}{sep}{num_to_str(v[1])}\n")

510

511 logger(f"Done writing ECDF to CSV file {path!r}.")

512

513 path.enforce_file()

514 return path

Coverage for moptipy / evaluation / ecdf.py: 71%

231 statements