Coverage for moptipy / evaluation / selector.py: 93%
252 statements
« prev ^ index » next coverage.py v7.12.0, created at 2025-11-24 08:49 +0000
« prev ^ index » next coverage.py v7.12.0, created at 2025-11-24 08:49 +0000
1"""
2A tool for selecting a consistent subset of data from partial experiments.
4When we have partial experimental data, maybe collected from experiments that
5are still ongoing, we want to still evaluate them in some consistent way. The
6right method for doing this could be to select a subset of that data that is
7consistent, i.e., a subset where the algorithms have the same number of runs
8on the instances using the same seeds. The function :func:`select_consistent`
9offered by this module provides the functionality to make such a selection.
10It may be a bit slow, but hopefully it will pick the largest possible
11consistent sub-selection or, at least, get close to it.
13The current method to select the data is rather heuristic. It always begins
14with the full set of data and aims to delete the element that will cause the
15least other deletions down the road, until we arrive in a consistent state.
16I strongly suspect that doing this perfectly would be NP-hard, so we cannot
17implement this. Instead, we use different heuristics and then pick the best
18result.
19"""
21from collections import Counter
22from operator import itemgetter
23from typing import Any, Callable, Final, Iterable, TypeVar
25from pycommons.io.console import logger
26from pycommons.types import type_error
28from moptipy.evaluation.base import PerRunData
30#: the type variable for the selector routine
31T = TypeVar("T", bound=PerRunData)
33#: the algorithm key
34KEY_ALGORITHM: Final[int] = 0
35#: the instance key
36KEY_INSTANCE: Final[int] = KEY_ALGORITHM + 1
37#: the encoding key
38KEY_ENCODING: Final[int] = KEY_INSTANCE + 1
39#: the objective key
40KEY_OBJECTIVE: Final[int] = KEY_ENCODING + 1
41#: the seed key
42KEY_SEED: Final[int] = KEY_OBJECTIVE + 1
43#: the number of keys
44TOTAL_KEYS: Final[int] = KEY_SEED + 1
47def __data_tup(d: PerRunData) -> tuple[str, str, str, str, tuple[str, int]]:
48 """
49 Get a raw data tuple for the given record.
51 :param d: the data record
52 :returns: the data tuple
53 """
54 if not isinstance(d, PerRunData):
55 raise type_error(d, "dataElement", PerRunData)
56 return d.algorithm, d.instance, d.encoding, d.objective, (
57 d.instance, d.rand_seed)
60def __score_inc(scores: tuple[Counter, ...], key: tuple[Any, ...]) -> None:
61 """
62 Increment the score of a given tuple.
64 :param scores: the scores
65 :param key: the key
66 """
67 for i, k in enumerate(key):
68 scores[i][k] += 1
71def __score_dec(scores: tuple[Counter, ...], key: tuple[Any, ...]) -> None:
72 """
73 Decrement the score of a given tuple.
75 :param scores: the scores
76 :param key: the key
77 """
78 for i, k in enumerate(key):
79 scores[i][k] -= 1
80 z: int = scores[i][k]
81 if z <= 0:
82 del scores[i][k]
83 if z < 0:
84 raise ValueError("Got negative score?")
87def __ret(source: list, expected: int, log: bool) -> list:
88 """
89 Prepare a source list for return.
91 :param source: the list
92 :param expected: the number of expected records
93 :param log: shall we log information
94 :returns: the result
95 """
96 for i, er in enumerate(source):
97 source[i] = er[1]
98 if list.__len__(source) != expected:
99 raise ValueError("Inconsistent list length!")
100 if log:
101 logger(f"Now returning {expected} records of data.")
102 return source # type: ignore
105def __scorer_tired(d: tuple, scores: tuple[Counter, ...], _) -> list[int]:
106 """
107 Score based on the tired score.
109 :param d: the tuple to score
110 :param scores: the scores
111 :returns: the score
112 """
113 return sorted(scores[i][t] for i, t in enumerate(d))
116def __scorer_normalized_tired(d: tuple, scores: tuple[Counter, ...],
117 total: tuple[int, ...]) -> list[float]:
118 """
119 Score based on the tired score.
121 :param d: the tuple to score
122 :param scores: the scores
123 :param total: the total of the scores
124 :returns: the score
125 """
126 return sorted(
127 scores[i][t] / total[i] for i, t in enumerate(d) if total[i] > 0)
130def __scorer_sum(d: tuple, scores: tuple[Counter, ...], _) -> int:
131 """
132 Score based on the sum score.
134 :param d: the tuple to score
135 :param scores: the scores
136 :returns: the score
137 """
138 return sum(scores[i][t] for i, t in enumerate(d))
141def __scorer_normalized_sum(d: tuple, scores: tuple[Counter, ...],
142 total: tuple[int, ...]) -> float:
143 """
144 Score based on the sum score.
146 :param d: the tuple to score
147 :param scores: the scores
148 :param total: the total of the scores
149 :returns: the score
150 """
151 return sum(
152 scores[i][t] / total[i] for i, t in enumerate(d) if total[i] > 0)
155def __scorer_product(d: tuple, scores: tuple[Counter, ...], _) -> int:
156 """
157 Score based on the product score.
159 :param d: the tuple to score
160 :param scores: the scores
161 :returns: the score
162 """
163 result: int = 1
164 for i, t in enumerate(d):
165 result *= scores[i][t]
166 return result
169def __scorer_normalized_min(d: tuple, scores: tuple[Counter, ...],
170 total: tuple[int, ...]) -> float:
171 """
172 Score based on the normalized minimum score.
174 :param d: the tuple to score
175 :param scores: the scores
176 :param total: the total of the scores
177 :returns: the score
178 """
179 return min(
180 scores[i][t] / total[i] for i, t in enumerate(d) if total[i] > 0)
183def __not(_: tuple[Counter, ...]) -> int:
184 """
185 Do nothing, return a placeholder.
187 :returns -1: always
188 """
189 return -1
192def __total(scores: tuple[Counter, ...]) -> tuple[int, ...] | None:
193 """
194 Get the total scores.
196 :param scores: the scores
197 :returns: the total scores
198 """
199 result: tuple[int, ...] = tuple(
200 t.total() if len(t) > 1 else -1 for t in scores)
201 return None if (tuple.__len__(result) < 1) or (
202 max(result) <= 0) else result
205#: the scorers
206__SCORERS: Final[tuple[tuple[str, Callable[[
207 tuple, tuple[Counter, ...], Any], Any], Callable[[
208 tuple[Counter, ...]], Any]], ...]] = (
209 ("tired", __scorer_tired, __not),
210 ("normalized_tired", __scorer_normalized_tired, __total),
211 ("sum", __scorer_sum, __not),
212 ("normalized_sum", __scorer_normalized_sum, __total),
213 ("product", __scorer_product, __total),
214 ("normalized_min", __scorer_normalized_min, __total),
215)
218def select_consistent(data: Iterable[T], log: bool = True,
219 thorough: bool = True) -> list[T]:
220 """
221 Select data such that the numbers of runs are consistent.
223 The input is a set of data items which represent some records over the
224 runs of algorithms on instances. It may be that not all algorithms have
225 been applied to all instances. Maybe the number of runs is inconsistent
226 over the algorithm-instance combinations, too. Maybe some algorithms
227 have more runs on some instances. Maybe the runs are even different,
228 it could be that some algorithms have runs for seed `A`, `B`, and `C` on
229 instance `I`, while others have runs for seed `C` and `D`. This function
230 is designed to retain only the runs with seed `C` in such a case. It may
231 discard algorithms or instances or algorithm-instance-seed combinations
232 in order to obtain a selection of data where all algorithms have been
233 applied to all instances as same as often and using the same seeds.
235 Now there are different ways to select such consistent subsets of a
236 dataset. Of course, we want to select the data such that as much as
237 possible of the data is retained and as little as possible is discarded.
238 This may be a hard optimization problem in itself. Here, we offer a
239 heuristic solution. Basically, we step-by-step try to cut away the
240 setups that are covered by the least amount of runs. We keep repeating
241 this until we arrive in a situation where all setups have the same
242 amount of runs. We then check if there were some strange symmetries that
243 still make the data inconsistent and, if we found some, try to delete
244 one run to break the symmetries and then repeat the cleaning-up process.
245 In the end, we should get a list of overall consistent data elements that
246 can be used during a normal experiment evaluation procedure.
248 This iterative process may be rather slow on larger datasets, but it is
249 maybe the best approximation we can offer to retain as much data as
250 possible.
252 :param data: the source data
253 :param log: shall we log the progress
254 :param thorough: use the slower method which may give us more data
255 :return: a list with the selected data
257 >>> def __p(x) -> str:
258 ... return (f"{x.algorithm}/{x.instance}/{x.objective}/{x.encoding}/"
259 ... f"{x.rand_seed}")
261 >>> a1i1o1e1s1 = PerRunData("a1", "i1", "o1", "e1", 1)
262 >>> a1i1o1e1s2 = PerRunData("a1", "i1", "o1", "e1", 2)
263 >>> a1i1o1e1s3 = PerRunData("a1", "i1", "o1", "e1", 3)
264 >>> a1i2o1e1s1 = PerRunData("a1", "i2", "o1", "e1", 1)
265 >>> a1i2o1e1s2 = PerRunData("a1", "i2", "o1", "e1", 2)
266 >>> a1i2o1e1s3 = PerRunData("a1", "i2", "o1", "e1", 3)
267 >>> a2i1o1e1s1 = PerRunData("a2", "i1", "o1", "e1", 1)
268 >>> a2i1o1e1s2 = PerRunData("a2", "i1", "o1", "e1", 2)
269 >>> a2i1o1e1s3 = PerRunData("a2", "i1", "o1", "e1", 3)
270 >>> a2i2o1e1s1 = PerRunData("a1", "i2", "o1", "e1", 1)
271 >>> a2i2o1e1s2 = PerRunData("a2", "i2", "o1", "e1", 2)
272 >>> a2i2o1e1s3 = PerRunData("a2", "i2", "o1", "e1", 3)
274 >>> list(map(__p, select_consistent((
275 ... a1i1o1e1s1, a1i1o1e1s2, a1i1o1e1s3,
276 ... a1i2o1e1s1, a1i2o1e1s2, a1i2o1e1s3,
277 ... a2i1o1e1s1, a2i1o1e1s2,
278 ... a2i2o1e1s2, a2i2o1e1s3))))
279 ['a1/i1/o1/e1/1', 'a1/i1/o1/e1/2', 'a1/i2/o1/e1/2', 'a1/i2/o1/e1/3',\
280 'a2/i1/o1/e1/1', 'a2/i1/o1/e1/2', 'a2/i2/o1/e1/2', 'a2/i2/o1/e1/3']
282 >>> list(map(__p, select_consistent((
283 ... a1i1o1e1s2, a1i1o1e1s3,
284 ... a1i2o1e1s1, a1i2o1e1s2, a1i2o1e1s3,
285 ... a2i1o1e1s1, a2i1o1e1s2,
286 ... a2i2o1e1s2, a2i2o1e1s3))))
287 ['a1/i2/o1/e1/2', 'a1/i2/o1/e1/3', 'a2/i2/o1/e1/2', 'a2/i2/o1/e1/3']
289 >>> list(map(__p, select_consistent((
290 ... a1i1o1e1s2, a1i1o1e1s3,
291 ... a1i2o1e1s1, a1i2o1e1s2, a1i2o1e1s3,
292 ... a2i1o1e1s1, a2i1o1e1s2,
293 ... a2i2o1e1s2))))
294 ['a1/i1/o1/e1/2', 'a1/i2/o1/e1/2', 'a2/i1/o1/e1/2', 'a2/i2/o1/e1/2']
296 >>> list(map(__p, select_consistent((
297 ... a1i1o1e1s1, a1i1o1e1s2, a1i1o1e1s3,
298 ... a2i2o1e1s1, a2i2o1e1s2, a2i2o1e1s3))))
299 ['a1/i1/o1/e1/1', 'a1/i1/o1/e1/2', 'a1/i1/o1/e1/3']
301 >>> list(map(__p, select_consistent((
302 ... a1i1o1e1s1, a1i1o1e1s2, a1i1o1e1s3,
303 ... a2i1o1e1s1, a2i1o1e1s2, a2i1o1e1s3))))
304 ['a1/i1/o1/e1/1', 'a1/i1/o1/e1/2', 'a1/i1/o1/e1/3', \
305'a2/i1/o1/e1/1', 'a2/i1/o1/e1/2', 'a2/i1/o1/e1/3']
307 >>> list(map(__p, select_consistent((
308 ... a1i1o1e1s1, a1i1o1e1s2, a1i2o1e1s2, a1i2o1e1s3))))
309 ['a1/i1/o1/e1/1', 'a1/i1/o1/e1/2', 'a1/i2/o1/e1/2', 'a1/i2/o1/e1/3']
311 >>> list(map(__p, select_consistent((
312 ... a1i1o1e1s1, a1i1o1e1s2, a1i2o1e1s2))))
313 ['a1/i1/o1/e1/1', 'a1/i1/o1/e1/2']
315 >>> list(map(__p, select_consistent((
316 ... a1i1o1e1s1, a2i1o1e1s2))))
317 ['a1/i1/o1/e1/1']
319 >>> list(map(__p, select_consistent((
320 ... a1i1o1e1s1, a2i1o1e1s2, a2i1o1e1s3))))
321 ['a2/i1/o1/e1/2', 'a2/i1/o1/e1/3']
323 >>> try:
324 ... select_consistent((a1i1o1e1s1, a1i1o1e1s2, a1i2o1e1s2, a1i2o1e1s2))
325 ... except ValueError as ve:
326 ... print(ve)
327 Found 4 records but only 3 different keys!
329 >>> try:
330 ... select_consistent(1)
331 ... except TypeError as te:
332 ... print(te)
333 data should be an instance of typing.Iterable but is int, namely 1.
335 >>> try:
336 ... select_consistent((a2i1o1e1s2, a2i1o1e1s3), 3)
337 ... except TypeError as te:
338 ... print(te)
339 log should be an instance of bool but is int, namely 3.
341 >>> try:
342 ... select_consistent({234})
343 ... except TypeError as te:
344 ... print(te)
345 dataElement should be an instance of moptipy.evaluation.base.PerRunData \
346but is int, namely 234.
348 >>> try:
349 ... select_consistent((a2i1o1e1s2, a2i1o1e1s3), True, 4)
350 ... except TypeError as te:
351 ... print(te)
352 thorough should be an instance of bool but is int, namely 4.
353 """
354 if not isinstance(data, Iterable):
355 raise type_error(data, "data", Iterable)
356 if not isinstance(log, bool):
357 raise type_error(log, "log", bool)
358 if not isinstance(thorough, bool):
359 raise type_error(thorough, "thorough", bool)
361 # We obtain a sorted list of the data in order to make the results
362 # consistent regardless of the order in which the data comes in.
363 # The data is only sorted by its features and not by any other information
364 # attached to it.
365 dataset: Final[list[list]] = [[__data_tup(t), t, 0] for t in data]
366 dataset.sort(key=itemgetter(0))
367 dataset_size: int = list.__len__(dataset)
368 if log:
369 logger(f"Found {dataset_size} records of data.")
371 set_l: Final[int] = set.__len__({x[0] for x in dataset})
372 if set_l != dataset_size:
373 raise ValueError(
374 f"Found {dataset_size} records but only {set_l} different keys!")
376 # Compute the scores: count how often each algorithm, instance,
377 # objective, encoding, and (instance, seed) combination are
378 # encountered.
379 key_tuple_len: Final[int] = tuple.__len__(dataset[0][0])
380 if key_tuple_len != TOTAL_KEYS:
381 raise ValueError("Unexpected error!")
383 # the map for the score calculations
384 scores: Final[tuple[Counter, ...]] = tuple(
385 Counter() for _ in range(key_tuple_len))
387 # the final result
388 best_length: int = -1
389 best_data: list[list] | None = None
391 for scorer_name, scorer, scorer_total in __SCORERS if thorough else (
392 __SCORERS[0], ):
393 logger(f"Now scoring according to the {scorer_name!r} method.")
394 # compute the original item scores
395 for sc in scores:
396 sc.clear()
398 source: list[list] = dataset.copy()
399 count: int = dataset_size
400 for er in source:
401 __score_inc(scores, er[0])
403 changed: bool = True
404 while changed:
405 changed = False
407 # Find the setups with maximum and minimum scores.
408 min_score: Any = None
409 max_score: Any = None
410 norm: Any = scorer_total(scores)
412 if norm is None: # cannot proceed
413 if log:
414 logger(f"Method {scorer_name!r} is not applicable.")
415 count = -1 # we can do nothing
416 break
418 for er in source:
419 score = scorer(er[0], scores, norm)
420 er[2] = score
421 if (min_score is None) or (min_score > score):
422 min_score = score
423 if (max_score is None) or (max_score < score):
424 max_score = score
425 del norm
427 if min_score < max_score: # Some setups have lower scores.
428 del_count: int = 0 # We will delete them.
429 for i in range(count - 1, -1, -1):
430 er = source[i]
431 if er[2] <= min_score:
432 del source[i]
433 __score_dec(scores, er[0])
434 del_count += 1
435 if del_count <= 0:
436 raise ValueError(
437 f"Did not delete anything under {scorer_name!r}?")
439 new_count: int = list.__len__(source)
440 if new_count != (count - del_count):
441 raise ValueError("List inconsistent after deletion "
442 f"under {scorer_name!r}?")
443 if log:
444 logger(
445 f"Deleted {del_count} of the {count} records because "
446 f"their score was {min_score} while the maximum score"
447 f" was {max_score}. Retained {new_count} records "
448 f"under {scorer_name!r}.")
449 count = new_count
450 changed = True
451 continue
453 if log:
454 logger(f"All setups now have the same score {max_score} under"
455 f" {scorer_name!r}.")
456 if count <= best_length:
457 if log:
458 logger(f"We now only have {count} setups, which means we "
459 "cannot get better than the current best set with "
460 f"{best_length} setups, so we quit after score-"
461 f"based cleaning under {scorer_name!r}.")
462 count = -1
463 break
465 # If we get here, all elements have the same score.
466 # This means that we are basically done.
467 #
468 # However, this may also happen if a very odd division exists in
469 # the data. Maybe we have one algorithm that was applied to one
470 # instance ten times and another algorithm applied to another
471 # instance ten times. This data would still be inconsistent, as it
472 # does not allow for any comparison.
474 # Compute the different values for everything except the seeds.
475 keys: tuple[int, ...] = tuple(
476 v for v, s in enumerate(scores)
477 if (v != KEY_SEED) and (len(s) > 1))
479 if tuple.__len__(keys) > 1:
480 if log:
481 logger("Now checking for inconsistencies in "
482 "algorithm/instance/objective/encoding under"
483 f" {scorer_name!r}.")
484 # Only if there are different values in at least one tuple
485 # dimension, we need to check for strange situations.
487 # For each of (instance, algorithm, encoding, objective), we
488 # must have the same number of other records.
489 # If not, then we have some strange symmetric situation that
490 # needs to be solved.
491 per_value: dict[Any, set[Any]] = {}
492 last: set[Any] | None = None
493 for key in keys:
494 other_keys: tuple[int, ...] = tuple(
495 kk for kk in keys if kk != key)
496 for er in source:
497 kv = er[0][key]
498 other = tuple(er[0][ok] for ok in other_keys)
499 if kv in per_value:
500 per_value[kv].add(other)
501 else:
502 per_value[kv] = last = {other}
503 for v in per_value.values():
504 if v != last:
505 changed = True
506 break
507 per_value.clear()
509 # We just need to delete one random element. This will
510 # break the symmetry
511 if changed:
512 if log:
513 logger(
514 f"Deleting one of the {count} elements to "
515 "break the erroneous symmetry under "
516 f"{scorer_name}.")
517 er = source[-1]
518 del source[-1]
519 __score_dec(scores, er[0])
520 count -= 1
521 break
522 del per_value
523 del keys
524 if changed:
525 continue
527 if log:
528 logger("No inconsistencies in algorithm/instance/objecti"
529 f"ve/encoding found under {scorer_name!r}.")
530 elif log:
531 logger("No inconsistencies in algorithm/instance/objective/"
532 f"encoding possible under {scorer_name!r}.")
533 if count <= best_length:
534 if log:
535 logger(f"We now only have {count} setups, which means we "
536 "cannot get better than the current best set with "
537 f"{best_length} setups, so we quit after algorithm"
538 "/instance/objective/encoding cleaning under "
539 f"{scorer_name!r}.")
540 count = -1
541 break
543 # If we get here, the only problem left could be if algorithms
544 # have different seeds for the same instances. We thus need to
545 # check that for each instance, all the seeds are the same.
546 # Notice that such inconsistencies can only occur if different
547 # seeds occurred exactly as same as often.
548 if log:
549 logger("Now checking for inconsistencies in instance "
550 f"seeds under {scorer_name!r}.")
551 seeds: dict[str, dict[tuple[str, str, str], set[int]]] = {}
552 for er in source:
553 inst: str = er[1].instance
554 cur: dict[tuple[str, str, str], set[int]]
555 if inst in seeds:
556 cur = seeds[inst]
557 else:
558 seeds[inst] = cur = {}
559 kx: tuple[str, str, str] = (
560 er[1].algorithm, er[1].objective, er[1].encoding)
561 if kx in cur:
562 cur[kx].add(er[1].rand_seed)
563 else:
564 cur[kx] = {er[1].rand_seed}
566 max_seed_insts: set[str] = set()
567 max_seeds: int = -1
568 min_seed_inst: str | None = None
569 min_seeds: int = -1
570 must_delete_from_insts: set[str] = set()
571 for instance, inst_setups in seeds.items():
572 kvx: set[int] | None = None
573 for setup_seeds in inst_setups.values():
574 if kvx is None:
575 kvx = setup_seeds
576 seed_cnt: int = set.__len__(setup_seeds)
577 if seed_cnt >= max_seeds:
578 if seed_cnt > max_seeds:
579 max_seed_insts.clear()
580 max_seeds = seed_cnt
581 max_seed_insts.add(instance)
582 if (seed_cnt < min_seeds) or (min_seed_inst is None):
583 min_seeds = seed_cnt
584 min_seed_inst = instance
585 elif kvx != setup_seeds:
586 # We got a symmetric inconsistency
587 must_delete_from_insts.add(instance)
588 break
589 if min_seeds < max_seeds:
590 must_delete_from_insts.update(max_seed_insts)
591 del max_seed_insts
593 del_count = set.__len__(must_delete_from_insts)
594 if del_count > 0:
595 if log:
596 logger("Must delete one record from all of "
597 f"{must_delete_from_insts!r}.")
598 for i in range(count - 1, -1, -1):
599 er = source[i]
600 instance = er[1].instance
601 if instance in must_delete_from_insts:
602 must_delete_from_insts.remove(instance)
603 del source[i]
604 __score_dec(scores, er[0])
605 changed = True
606 if set.__len__(must_delete_from_insts) <= 0:
607 break
608 new_count = list.__len__(source)
609 if new_count != (count - del_count):
610 raise ValueError("Error when deleting instances "
611 f"under {scorer_name!r}.")
612 count = new_count
613 if not changed:
614 raise ValueError(
615 f"Seeds inconsistent under {scorer_name!r}.")
616 del must_delete_from_insts
618 del seeds
619 if changed:
620 if count <= best_length:
621 if log:
622 logger(f"We now only have {count} setups, which "
623 "means we cannot get better than the current "
624 f"best set with {best_length} setups, so we "
625 "quit after seed-based cleaning under "
626 f"{scorer_name!r}.")
627 count = -1
628 break
629 elif log:
630 logger(f"No seed inconsistencies under {scorer_name!r}.")
631 # There should not be any problems left, but we need to check
632 # again if something has changed.
634 if count <= 0:
635 continue # We can do nothing here
637 if count > best_length:
638 logger(f"Method {scorer_name!r} yields {count} records, "
639 "which is the new best.")
640 best_length = count
641 best_data = source
642 if best_length >= dataset_size:
643 logger(f"Included all data under {scorer_name!r}, "
644 "so we can stop.")
645 break
646 elif log:
647 logger(f"Method {scorer_name!r} yields {count} records, "
648 "which is not better than the current "
649 f"best {best_length}.")
651 # We are finally finished. The scores are no longer needed.
652 del scores
653 if best_length <= 0:
654 raise ValueError("No data found at all?")
655 return __ret(best_data, best_length, log)