Coverage for moptipy/evaluation/selector.py: 93%

1"""

2A tool for selecting a consistent subset of data from partial experiments.

4When we have partial experimental data, maybe collected from experiments that

5are still ongoing, we want to still evaluate them in some consistent way. The

6right method for doing this could be to select a subset of that data that is

7consistent, i.e., a subset where the algorithms have the same number of runs

8on the instances using the same seeds. The function :func:`select_consistent`

9offered by this module provides the functionality to make such a selection.

10It may be a bit slow, but hopefully it will pick the largest possible

11consistent sub-selection or, at least, get close to it.

13The current method to select the data is rather heuristic. It always begins

14with the full set of data and aims to delete the element that will cause the

15least other deletions down the road, until we arrive in a consistent state.

16I strongly suspect that doing this perfectly would be NP-hard, so we cannot

17implement this. Instead, we use different heuristics and then pick the best

18result.

19"""

21from collections import Counter

22from operator import itemgetter

23from typing import Any, Callable, Final, Iterable, TypeVar

25from pycommons.io.console import logger

26from pycommons.types import type_error

28from moptipy.evaluation.base import PerRunData

30#: the type variable for the selector routine

31T = TypeVar("T", bound=PerRunData)

33#: the algorithm key

34KEY_ALGORITHM: Final[int] = 0

35#: the instance key

36KEY_INSTANCE: Final[int] = KEY_ALGORITHM + 1

37#: the encoding key

38KEY_ENCODING: Final[int] = KEY_INSTANCE + 1

39#: the objective key

40KEY_OBJECTIVE: Final[int] = KEY_ENCODING + 1

41#: the seed key

42KEY_SEED: Final[int] = KEY_OBJECTIVE + 1

43#: the number of keys

44TOTAL_KEYS: Final[int] = KEY_SEED + 1

47def __data_tup(d: PerRunData) -> tuple[str, str, str, str, tuple[str, int]]:

48 """

49 Get a raw data tuple for the given record.

51 :param d: the data record

52 :returns: the data tuple

53 """

54 if not isinstance(d, PerRunData):

55 raise type_error(d, "dataElement", PerRunData)

56 return d.algorithm, d.instance, d.encoding, d.objective, (

57 d.instance, d.rand_seed)

60def __score_inc(scores: tuple[Counter, ...], key: tuple[Any, ...]) -> None:

61 """

62 Increment the score of a given tuple.

64 :param scores: the scores

65 :param key: the key

66 """

67 for i, k in enumerate(key):

68 scores[i][k] += 1

71def __score_dec(scores: tuple[Counter, ...], key: tuple[Any, ...]) -> None:

72 """

73 Decrement the score of a given tuple.

75 :param scores: the scores

76 :param key: the key

77 """

78 for i, k in enumerate(key):

79 scores[i][k] -= 1

80 z: int = scores[i][k]

81 if z <= 0:

82 del scores[i][k]

83 if z < 0:

84 raise ValueError("Got negative score?")

87def __ret(source: list, expected: int, log: bool) -> list:

88 """

89 Prepare a source list for return.

91 :param source: the list

92 :param expected: the number of expected records

93 :param log: shall we log information

94 :returns: the result

95 """

96 for i, er in enumerate(source):

97 source[i] = er[1]

98 if list.__len__(source) != expected:

99 raise ValueError("Inconsistent list length!")

100 if log:

101 logger(f"Now returning {expected} records of data.")

102 return source # type: ignore

103

104

105def __scorer_tired(d: tuple, scores: tuple[Counter, ...], _) -> list[int]:

106 """

107 Score based on the tired score.

108

109 :param d: the tuple to score

110 :param scores: the scores

111 :returns: the score

112 """

113 return sorted(scores[i][t] for i, t in enumerate(d))

114

115

116def __scorer_normalized_tired(d: tuple, scores: tuple[Counter, ...],

117 total: tuple[int, ...]) -> list[float]:

118 """

119 Score based on the tired score.

120

121 :param d: the tuple to score

122 :param scores: the scores

123 :param total: the total of the scores

124 :returns: the score

125 """

126 return sorted(

127 scores[i][t] / total[i] for i, t in enumerate(d) if total[i] > 0)

128

129

130def __scorer_sum(d: tuple, scores: tuple[Counter, ...], _) -> int:

131 """

132 Score based on the sum score.

133

134 :param d: the tuple to score

135 :param scores: the scores

136 :returns: the score

137 """

138 return sum(scores[i][t] for i, t in enumerate(d))

139

140

141def __scorer_normalized_sum(d: tuple, scores: tuple[Counter, ...],

142 total: tuple[int, ...]) -> float:

143 """

144 Score based on the sum score.

145

146 :param d: the tuple to score

147 :param scores: the scores

148 :param total: the total of the scores

149 :returns: the score

150 """

151 return sum(

152 scores[i][t] / total[i] for i, t in enumerate(d) if total[i] > 0)

153

154

155def __scorer_product(d: tuple, scores: tuple[Counter, ...], _) -> int:

156 """

157 Score based on the product score.

158

159 :param d: the tuple to score

160 :param scores: the scores

161 :returns: the score

162 """

163 result: int = 1

164 for i, t in enumerate(d):

165 result *= scores[i][t]

166 return result

167

168

169def __scorer_normalized_min(d: tuple, scores: tuple[Counter, ...],

170 total: tuple[int, ...]) -> float:

171 """

172 Score based on the normalized minimum score.

173

174 :param d: the tuple to score

175 :param scores: the scores

176 :param total: the total of the scores

177 :returns: the score

178 """

179 return min(

180 scores[i][t] / total[i] for i, t in enumerate(d) if total[i] > 0)

181

182

183def __not(_: tuple[Counter, ...]) -> int:

184 """

185 Do nothing, return a placeholder.

186

187 :returns -1: always

188 """

189 return -1

190

191

192def __total(scores: tuple[Counter, ...]) -> tuple[int, ...] | None:

193 """

194 Get the total scores.

195

196 :param scores: the scores

197 :returns: the total scores

198 """

199 result: tuple[int, ...] = tuple(

200 t.total() if len(t) > 1 else -1 for t in scores)

201 return None if (tuple.__len__(result) < 1) or (

202 max(result) <= 0) else result

203

204

205#: the scorers

206__SCORERS: Final[tuple[tuple[str, Callable[[

207 tuple, tuple[Counter, ...], Any], Any], Callable[[

208 tuple[Counter, ...]], Any]], ...]] = (

209 ("tired", __scorer_tired, __not),

210 ("normalized_tired", __scorer_normalized_tired, __total),

211 ("sum", __scorer_sum, __not),

212 ("normalized_sum", __scorer_normalized_sum, __total),

213 ("product", __scorer_product, __total),

214 ("normalized_min", __scorer_normalized_min, __total),

215)

216

217

218def select_consistent(data: Iterable[T], log: bool = True,

219 thorough: bool = True) -> list[T]:

220 """

221 Select data such that the numbers of runs are consistent.

222

223 The input is a set of data items which represent some records over the

224 runs of algorithms on instances. It may be that not all algorithms have

225 been applied to all instances. Maybe the number of runs is inconsistent

226 over the algorithm-instance combinations, too. Maybe some algorithms

227 have more runs on some instances. Maybe the runs are even different,

228 it could be that some algorithms have runs for seed `A`, `B`, and `C` on

229 instance `I`, while others have runs for seed `C` and `D`. This function

230 is designed to retain only the runs with seed `C` in such a case. It may

231 discard algorithms or instances or algorithm-instance-seed combinations

232 in order to obtain a selection of data where all algorithms have been

233 applied to all instances as same as often and using the same seeds.

234

235 Now there are different ways to select such consistent subsets of a

236 dataset. Of course, we want to select the data such that as much as

237 possible of the data is retained and as little as possible is discarded.

238 This may be a hard optimization problem in itself. Here, we offer a

239 heuristic solution. Basically, we step-by-step try to cut away the

240 setups that are covered by the least amount of runs. We keep repeating

241 this until we arrive in a situation where all setups have the same

242 amount of runs. We then check if there were some strange symmetries that

243 still make the data inconsistent and, if we found some, try to delete

244 one run to break the symmetries and then repeat the cleaning-up process.

245 In the end, we should get a list of overall consistent data elements that

246 can be used during a normal experiment evaluation procedure.

247

248 This iterative process may be rather slow on larger datasets, but it is

249 maybe the best approximation we can offer to retain as much data as

250 possible.

251

252 :param data: the source data

253 :param log: shall we log the progress

254 :param thorough: use the slower method which may give us more data

255 :return: a list with the selected data

256

257 >>> def __p(x) -> str:

258 ... return (f"{x.algorithm}/{x.instance}/{x.objective}/{x.encoding}/"

259 ... f"{x.rand_seed}")

260

261 >>> a1i1o1e1s1 = PerRunData("a1", "i1", "o1", "e1", 1)

262 >>> a1i1o1e1s2 = PerRunData("a1", "i1", "o1", "e1", 2)

263 >>> a1i1o1e1s3 = PerRunData("a1", "i1", "o1", "e1", 3)

264 >>> a1i2o1e1s1 = PerRunData("a1", "i2", "o1", "e1", 1)

265 >>> a1i2o1e1s2 = PerRunData("a1", "i2", "o1", "e1", 2)

266 >>> a1i2o1e1s3 = PerRunData("a1", "i2", "o1", "e1", 3)

267 >>> a2i1o1e1s1 = PerRunData("a2", "i1", "o1", "e1", 1)

268 >>> a2i1o1e1s2 = PerRunData("a2", "i1", "o1", "e1", 2)

269 >>> a2i1o1e1s3 = PerRunData("a2", "i1", "o1", "e1", 3)

270 >>> a2i2o1e1s1 = PerRunData("a1", "i2", "o1", "e1", 1)

271 >>> a2i2o1e1s2 = PerRunData("a2", "i2", "o1", "e1", 2)

272 >>> a2i2o1e1s3 = PerRunData("a2", "i2", "o1", "e1", 3)

273

274 >>> list(map(__p, select_consistent((

275 ... a1i1o1e1s1, a1i1o1e1s2, a1i1o1e1s3,

276 ... a1i2o1e1s1, a1i2o1e1s2, a1i2o1e1s3,

277 ... a2i1o1e1s1, a2i1o1e1s2,

278 ... a2i2o1e1s2, a2i2o1e1s3))))

279 ['a1/i1/o1/e1/1', 'a1/i1/o1/e1/2', 'a1/i2/o1/e1/2', 'a1/i2/o1/e1/3',\

280 'a2/i1/o1/e1/1', 'a2/i1/o1/e1/2', 'a2/i2/o1/e1/2', 'a2/i2/o1/e1/3']

281

282 >>> list(map(__p, select_consistent((

283 ... a1i1o1e1s2, a1i1o1e1s3,

284 ... a1i2o1e1s1, a1i2o1e1s2, a1i2o1e1s3,

285 ... a2i1o1e1s1, a2i1o1e1s2,

286 ... a2i2o1e1s2, a2i2o1e1s3))))

287 ['a1/i2/o1/e1/2', 'a1/i2/o1/e1/3', 'a2/i2/o1/e1/2', 'a2/i2/o1/e1/3']

288

289 >>> list(map(__p, select_consistent((

290 ... a1i1o1e1s2, a1i1o1e1s3,

291 ... a1i2o1e1s1, a1i2o1e1s2, a1i2o1e1s3,

292 ... a2i1o1e1s1, a2i1o1e1s2,

293 ... a2i2o1e1s2))))

294 ['a1/i1/o1/e1/2', 'a1/i2/o1/e1/2', 'a2/i1/o1/e1/2', 'a2/i2/o1/e1/2']

295

296 >>> list(map(__p, select_consistent((

297 ... a1i1o1e1s1, a1i1o1e1s2, a1i1o1e1s3,

298 ... a2i2o1e1s1, a2i2o1e1s2, a2i2o1e1s3))))

299 ['a1/i1/o1/e1/1', 'a1/i1/o1/e1/2', 'a1/i1/o1/e1/3']

300

301 >>> list(map(__p, select_consistent((

302 ... a1i1o1e1s1, a1i1o1e1s2, a1i1o1e1s3,

303 ... a2i1o1e1s1, a2i1o1e1s2, a2i1o1e1s3))))

304 ['a1/i1/o1/e1/1', 'a1/i1/o1/e1/2', 'a1/i1/o1/e1/3', \

305'a2/i1/o1/e1/1', 'a2/i1/o1/e1/2', 'a2/i1/o1/e1/3']

306

307 >>> list(map(__p, select_consistent((

308 ... a1i1o1e1s1, a1i1o1e1s2, a1i2o1e1s2, a1i2o1e1s3))))

309 ['a1/i1/o1/e1/1', 'a1/i1/o1/e1/2', 'a1/i2/o1/e1/2', 'a1/i2/o1/e1/3']

310

311 >>> list(map(__p, select_consistent((

312 ... a1i1o1e1s1, a1i1o1e1s2, a1i2o1e1s2))))

313 ['a1/i1/o1/e1/1', 'a1/i1/o1/e1/2']

314

315 >>> list(map(__p, select_consistent((

316 ... a1i1o1e1s1, a2i1o1e1s2))))

317 ['a1/i1/o1/e1/1']

318

319 >>> list(map(__p, select_consistent((

320 ... a1i1o1e1s1, a2i1o1e1s2, a2i1o1e1s3))))

321 ['a2/i1/o1/e1/2', 'a2/i1/o1/e1/3']

322

323 >>> try:

324 ... select_consistent((a1i1o1e1s1, a1i1o1e1s2, a1i2o1e1s2, a1i2o1e1s2))

325 ... except ValueError as ve:

326 ... print(ve)

327 Found 4 records but only 3 different keys!

328

329 >>> try:

330 ... select_consistent(1)

331 ... except TypeError as te:

332 ... print(te)

333 data should be an instance of typing.Iterable but is int, namely 1.

334

335 >>> try:

336 ... select_consistent((a2i1o1e1s2, a2i1o1e1s3), 3)

337 ... except TypeError as te:

338 ... print(te)

339 log should be an instance of bool but is int, namely 3.

340

341 >>> try:

342 ... select_consistent({234})

343 ... except TypeError as te:

344 ... print(te)

345 dataElement should be an instance of moptipy.evaluation.base.PerRunData \

346but is int, namely 234.

347

348 >>> try:

349 ... select_consistent((a2i1o1e1s2, a2i1o1e1s3), True, 4)

350 ... except TypeError as te:

351 ... print(te)

352 thorough should be an instance of bool but is int, namely 4.

353 """

354 if not isinstance(data, Iterable):

355 raise type_error(data, "data", Iterable)

356 if not isinstance(log, bool):

357 raise type_error(log, "log", bool)

358 if not isinstance(thorough, bool):

359 raise type_error(thorough, "thorough", bool)

360

361 # We obtain a sorted list of the data in order to make the results

362 # consistent regardless of the order in which the data comes in.

363 # The data is only sorted by its features and not by any other information

364 # attached to it.

365 dataset: Final[list[list]] = [[__data_tup(t), t, 0] for t in data]

366 dataset.sort(key=itemgetter(0))

367 dataset_size: int = list.__len__(dataset)

368 if log:

369 logger(f"Found {dataset_size} records of data.")

370

371 set_l: Final[int] = set.__len__({x[0] for x in dataset})

372 if set_l != dataset_size:

373 raise ValueError(

374 f"Found {dataset_size} records but only {set_l} different keys!")

375

376 # Compute the scores: count how often each algorithm, instance,

377 # objective, encoding, and (instance, seed) combination are

378 # encountered.

379 key_tuple_len: Final[int] = tuple.__len__(dataset[0][0])

380 if key_tuple_len != TOTAL_KEYS:

381 raise ValueError("Unexpected error!")

382

383 # the map for the score calculations

384 scores: Final[tuple[Counter, ...]] = tuple(

385 Counter() for _ in range(key_tuple_len))

386

387 # the final result

388 best_length: int = -1

389 best_data: list[list] | None = None

390

391 for scorer_name, scorer, scorer_total in __SCORERS if thorough else (

392 __SCORERS[0], ):

393 logger(f"Now scoring according to the {scorer_name!r} method.")

394 # compute the original item scores

395 for sc in scores:

396 sc.clear()

397

398 source: list[list] = dataset.copy()

399 count: int = dataset_size

400 for er in source:

401 __score_inc(scores, er[0])

402

403 changed: bool = True

404 while changed:

405 changed = False

406

407 # Find the setups with maximum and minimum scores.

408 min_score: Any = None

409 max_score: Any = None

410 norm: Any = scorer_total(scores)

411

412 if norm is None: # cannot proceed

413 if log:

414 logger(f"Method {scorer_name!r} is not applicable.")

415 count = -1 # we can do nothing

416 break

417

418 for er in source:

419 score = scorer(er[0], scores, norm)

420 er[2] = score

421 if (min_score is None) or (min_score > score):

422 min_score = score

423 if (max_score is None) or (max_score < score):

424 max_score = score

425 del norm

426

427 if min_score < max_score: # Some setups have lower scores.

428 del_count: int = 0 # We will delete them.

429 for i in range(count - 1, -1, -1):

430 er = source[i]

431 if er[2] <= min_score:

432 del source[i]

433 __score_dec(scores, er[0])

434 del_count += 1

435 if del_count <= 0:

436 raise ValueError(

437 f"Did not delete anything under {scorer_name!r}?")

438

439 new_count: int = list.__len__(source)

440 if new_count != (count - del_count):

441 raise ValueError("List inconsistent after deletion "

442 f"under {scorer_name!r}?")

443 if log:

444 logger(

445 f"Deleted {del_count} of the {count} records because "

446 f"their score was {min_score} while the maximum score"

447 f" was {max_score}. Retained {new_count} records "

448 f"under {scorer_name!r}.")

449 count = new_count

450 changed = True

451 continue

452

453 if log:

454 logger(f"All setups now have the same score {max_score} under"

455 f" {scorer_name!r}.")

456 if count <= best_length:

457 if log:

458 logger(f"We now only have {count} setups, which means we "

459 "cannot get better than the current best set with "

460 f"{best_length} setups, so we quit after score-"

461 f"based cleaning under {scorer_name!r}.")

462 count = -1

463 break

464

465 # If we get here, all elements have the same score.

466 # This means that we are basically done.

467 #

468 # However, this may also happen if a very odd division exists in

469 # the data. Maybe we have one algorithm that was applied to one

470 # instance ten times and another algorithm applied to another

471 # instance ten times. This data would still be inconsistent, as it

472 # does not allow for any comparison.

473

474 # Compute the different values for everything except the seeds.

475 keys: tuple[int, ...] = tuple(

476 v for v, s in enumerate(scores)

477 if (v != KEY_SEED) and (len(s) > 1))

478

479 if tuple.__len__(keys) > 1:

480 if log:

481 logger("Now checking for inconsistencies in "

482 "algorithm/instance/objective/encoding under"

483 f" {scorer_name!r}.")

484 # Only if there are different values in at least one tuple

485 # dimension, we need to check for strange situations.

486

487 # For each of (instance, algorithm, encoding, objective), we

488 # must have the same number of other records.

489 # If not, then we have some strange symmetric situation that

490 # needs to be solved.

491 per_value: dict[Any, set[Any]] = {}

492 last: set[Any] | None = None

493 for key in keys:

494 other_keys: tuple[int, ...] = tuple(

495 kk for kk in keys if kk != key)

496 for er in source:

497 kv = er[0][key]

498 other = tuple(er[0][ok] for ok in other_keys)

499 if kv in per_value:

500 per_value[kv].add(other)

501 else:

502 per_value[kv] = last = {other}

503 for v in per_value.values():

504 if v != last:

505 changed = True

506 break

507 per_value.clear()

508

509 # We just need to delete one random element. This will

510 # break the symmetry

511 if changed:

512 if log:

513 logger(

514 f"Deleting one of the {count} elements to "

515 "break the erroneous symmetry under "

516 f"{scorer_name}.")

517 er = source[-1]

518 del source[-1]

519 __score_dec(scores, er[0])

520 count -= 1

521 break

522 del per_value

523 del keys

524 if changed:

525 continue

526

527 if log:

528 logger("No inconsistencies in algorithm/instance/objecti"

529 f"ve/encoding found under {scorer_name!r}.")

530 elif log:

531 logger("No inconsistencies in algorithm/instance/objective/"

532 f"encoding possible under {scorer_name!r}.")

533 if count <= best_length:

534 if log:

535 logger(f"We now only have {count} setups, which means we "

536 "cannot get better than the current best set with "

537 f"{best_length} setups, so we quit after algorithm"

538 "/instance/objective/encoding cleaning under "

539 f"{scorer_name!r}.")

540 count = -1

541 break

542

543 # If we get here, the only problem left could be if algorithms

544 # have different seeds for the same instances. We thus need to

545 # check that for each instance, all the seeds are the same.

546 # Notice that such inconsistencies can only occur if different

547 # seeds occurred exactly as same as often.

548 if log:

549 logger("Now checking for inconsistencies in instance "

550 f"seeds under {scorer_name!r}.")

551 seeds: dict[str, dict[tuple[str, str, str], set[int]]] = {}

552 for er in source:

553 inst: str = er[1].instance

554 cur: dict[tuple[str, str, str], set[int]]

555 if inst in seeds:

556 cur = seeds[inst]

557 else:

558 seeds[inst] = cur = {}

559 kx: tuple[str, str, str] = (

560 er[1].algorithm, er[1].objective, er[1].encoding)

561 if kx in cur:

562 cur[kx].add(er[1].rand_seed)

563 else:

564 cur[kx] = {er[1].rand_seed}

565

566 max_seed_insts: set[str] = set()

567 max_seeds: int = -1

568 min_seed_inst: str | None = None

569 min_seeds: int = -1

570 must_delete_from_insts: set[str] = set()

571 for instance, inst_setups in seeds.items():

572 kvx: set[int] | None = None

573 for setup_seeds in inst_setups.values():

574 if kvx is None:

575 kvx = setup_seeds

576 seed_cnt: int = set.__len__(setup_seeds)

577 if seed_cnt >= max_seeds:

578 if seed_cnt > max_seeds:

579 max_seed_insts.clear()

580 max_seeds = seed_cnt

581 max_seed_insts.add(instance)

582 if (seed_cnt < min_seeds) or (min_seed_inst is None):

583 min_seeds = seed_cnt

584 min_seed_inst = instance

585 elif kvx != setup_seeds:

586 # We got a symmetric inconsistency

587 must_delete_from_insts.add(instance)

588 break

589 if min_seeds < max_seeds:

590 must_delete_from_insts.update(max_seed_insts)

591 del max_seed_insts

592

593 del_count = set.__len__(must_delete_from_insts)

594 if del_count > 0:

595 if log:

596 logger("Must delete one record from all of "

597 f"{must_delete_from_insts!r}.")

598 for i in range(count - 1, -1, -1):

599 er = source[i]

600 instance = er[1].instance

601 if instance in must_delete_from_insts:

602 must_delete_from_insts.remove(instance)

603 del source[i]

604 __score_dec(scores, er[0])

605 changed = True

606 if set.__len__(must_delete_from_insts) <= 0:

607 break

608 new_count = list.__len__(source)

609 if new_count != (count - del_count):

610 raise ValueError("Error when deleting instances "

611 f"under {scorer_name!r}.")

612 count = new_count

613 if not changed:

614 raise ValueError(

615 f"Seeds inconsistent under {scorer_name!r}.")

616 del must_delete_from_insts

617

618 del seeds

619 if changed:

620 if count <= best_length:

621 if log:

622 logger(f"We now only have {count} setups, which "

623 "means we cannot get better than the current "

624 f"best set with {best_length} setups, so we "

625 "quit after seed-based cleaning under "

626 f"{scorer_name!r}.")

627 count = -1

628 break

629 elif log:

630 logger(f"No seed inconsistencies under {scorer_name!r}.")

631 # There should not be any problems left, but we need to check

632 # again if something has changed.

633

634 if count <= 0:

635 continue # We can do nothing here

636

637 if count > best_length:

638 logger(f"Method {scorer_name!r} yields {count} records, "

639 "which is the new best.")

640 best_length = count

641 best_data = source

642 if best_length >= dataset_size:

643 logger(f"Included all data under {scorer_name!r}, "

644 "so we can stop.")

645 break

646 elif log:

647 logger(f"Method {scorer_name!r} yields {count} records, "

648 "which is not better than the current "

649 f"best {best_length}.")

650

651 # We are finally finished. The scores are no longer needed.

652 del scores

653 if best_length <= 0:

654 raise ValueError("No data found at all?")

655 return __ret(best_data, best_length, log)

Coverage for moptipy / evaluation / selector.py: 93%

252 statements