Coverage for moptipy / examples / jssp / instance_selector.py: 8%
353 statements
« prev ^ index » next coverage.py v7.12.0, created at 2025-11-24 08:49 +0000
« prev ^ index » next coverage.py v7.12.0, created at 2025-11-24 08:49 +0000
1"""Code for selecting interesting instances for smaller-scale experiments."""
2import multiprocessing as mp
3from math import ceil, factorial, inf, log2, sqrt
4from os import sched_getaffinity
5from string import digits
6from typing import Callable, Final
8import numpy as np # type: ignore
10# pylint: disable-next=E0611
11from numpy.random import Generator, RandomState # type: ignore
12from pycommons.io.console import logger
13from pycommons.types import check_int_range, type_error
14from sklearn.cluster import SpectralClustering # type: ignore
16from moptipy.algorithms.so.rls import RLS
17from moptipy.api.execution import Execution
18from moptipy.examples.jssp.gantt_space import GanttSpace
19from moptipy.examples.jssp.instance import Instance
20from moptipy.examples.jssp.makespan import Makespan
21from moptipy.examples.jssp.ob_encoding import OperationBasedEncoding
22from moptipy.operators.permutations.op0_shuffle import Op0Shuffle
23from moptipy.operators.permutations.op1_swap2 import Op1Swap2
24from moptipy.spaces.permutations import Permutations
25from moptipy.utils.nputils import (
26 DEFAULT_FLOAT,
27 DEFAULT_INT,
28 rand_generator,
29 rand_seeds_from_str,
30)
33def __can_solve_instance(inst: Instance, seed: int,
34 queue: mp.Queue) -> None:
35 """
36 Execute one run of a RLS for 2.1min on the instance, return makespan.
38 :param inst: the jssp instance to check
39 :param seed: the seed of this run
40 :param queue: the queue for the return values
41 """
42 x_space: Final[Permutations] = Permutations.with_repetitions(
43 inst.jobs, inst.machines)
44 y_space: Final[GanttSpace] = GanttSpace(inst)
45 g: Final[OperationBasedEncoding] = OperationBasedEncoding(inst)
46 o0: Final[Op0Shuffle] = Op0Shuffle(x_space)
47 o1: Final[Op1Swap2] = Op1Swap2()
48 f: Final[Makespan] = Makespan(inst)
49 a: Final[RLS] = RLS(o0, o1)
50 goal: Final[int] = inst.makespan_lower_bound
52 ex: Final[Execution] = Execution()
53 ex.set_search_space(x_space)
54 ex.set_solution_space(y_space)
55 ex.set_encoding(g)
56 ex.set_algorithm(a)
57 ex.set_objective(f)
58 ex.set_max_time_millis(126_000)
59 ex.set_goal_f(goal)
60 ex.set_rand_seed(seed)
61 with ex.execute() as process:
62 queue.put(process.get_best_f())
65def __is_instance_too_easy(inst: Instance) -> bool:
66 """
67 Check if an instance is too easy to be of any interest.
69 We perform 16 runs of a (1+1)-EA on an instance, granting 2.1 minutes
70 of runtime to each of them. An instance is considered as easy if one of
71 the following conditions is fulfilled:
73 - it is solved often enough (lower bound reached often)
74 - there are not enough different results
75 - the mean result is too close to the lower bound
76 - the worst result is not far enough from the lower bound
77 - the worst result is too close to the best result
79 If any of these conditions hold, we consider the instance as too easy to
80 be of any interest for our educational experiment.
82 :param inst: the jssp instance to check
83 :returns: `True` if the instance is too easy to be interesting
84 """
85 n_runs: Final[int] = 16
86 seeds: Final[list[int]] = rand_seeds_from_str(string=inst.name,
87 n_seeds=n_runs)
88 queue: Final[mp.Queue] = mp.Queue()
90 while len(seeds) > 0:
91 processes = [mp.Process(target=__can_solve_instance,
92 args=(inst, seeds.pop(), queue))
93 for _ in range(min(len(seeds), __DEFAULT_N_THREADS))]
94 for p in processes:
95 p.start()
96 for p in processes:
97 p.join()
99 goal: Final[int] = inst.makespan_lower_bound
100 num_solved: int = 0
101 diff: set[int] = set()
102 min_makespan: int = 1 << 62
103 sum_makespans: int = 0
104 max_makespan: int = 0
106 for _ in range(n_runs):
107 makespan: int = queue.get()
108 diff.add(makespan)
109 if makespan <= min_makespan:
110 min_makespan = makespan
111 if makespan <= goal:
112 num_solved += 1
113 sum_makespans += makespan
114 max_makespan = max(max_makespan, makespan)
116 mean_makespan: Final[float] = sum_makespans / n_runs
117 mean_threshold: Final[float] = min(goal + 32.0,
118 max(1.01 * goal, goal + 3))
119 max_threshold: Final[float] = min(goal + 64.0,
120 max(1.02 * goal, goal + 5))
121 min_threshold: Final[float] = min(min_makespan + 48.0,
122 max(1.015 * min_makespan,
123 min_makespan + 4))
124 n_diff: Final[int] = len(diff)
125 diff_threshold: Final[int] = max(3, int(0.5 + ceil(log2(n_runs))))
126 solved_threshold: Final[int] = ceil(n_runs / 3)
128 result: Final[bool] = (num_solved >= solved_threshold) \
129 or (mean_makespan < mean_threshold) \
130 or (max_makespan < max_threshold) \
131 or (min_threshold > max_makespan) \
132 or (n_diff <= diff_threshold)
133 logger(f"Instance {inst.name} is {'easy' if result else 'hard'}, "
134 f"since {n_runs} runs of the (1+1)-EA had {n_diff} different "
135 f"results (threshold {diff_threshold}), a best result of "
136 f"{min_makespan}, a mean result of {mean_makespan} (threshold: "
137 f"{mean_threshold}), a worst result of {max_makespan} "
138 f"(thresholds: {max_threshold}, {min_threshold}), and "
139 f"reached the makespan lower bound ({goal}) {num_solved} "
140 f"times (threshold: {solved_threshold}).")
141 return result
144#: The default number of threads to be used
145__DEFAULT_N_THREADS: Final[int] = max(1, min(len(sched_getaffinity(0)), 128))
148def compute_instances_that_are_too_easy() -> tuple[str, ...]:
149 """
150 Compute the set of instances that are too easy to be of any interest.
152 For our educational experiments, we need instances that are not always
153 solved to optimality by simple algorithms. Otherwise, for example, the
154 box plots of the end results will collapse to single lines. Then, it
155 will also be impossible to really say which algorithm performs best on
156 the instance.
157 See the documentation of `__is_instance_too_easy` for the conditions that
158 lead to the rejection of an instance
160 :returns: the tuple of instance names that should not be included in
161 the experiment
162 """
163 logger("We now test the instances whether they are too easy.")
164 easy: Final[list[str]] = []
165 for inst_name in Instance.list_resources():
166 if __is_instance_too_easy(Instance.from_resource(inst_name)):
167 easy.append(inst_name)
168 easy.sort()
169 logger(f"Finished the test, found {len(easy)} easy instances: {easy}.")
170 return tuple(easy)
173#: This is the set of instances that are too easy to be interesting and shall
174#: therefore be skipped from our experiments.
175#: For a documentation about what constitutes 'too easy', see
176#: :func:`compute_instances_that_are_too_easy`
177__TOO_EASY: Final[set[str]] = \
178 {"demo", "dmu21", "dmu22", "dmu24", "dmu25", "dmu31", "dmu32", "dmu33",
179 "dmu34", "dmu35", "ft06", "ft20", "la01", "la02", "la03", "la04",
180 "la05", "la06", "la07", "la08", "la09", "la10", "la11", "la12", "la13",
181 "la14", "la15", "la17", "la18", "la23", "la26", "la28", "la30", "la31",
182 "la32", "la33", "la34", "la35", "swv16", "swv17", "swv18", "swv19",
183 "swv20", "ta51", "ta52", "ta53", "ta54", "ta55", "ta56", "ta57", "ta58",
184 "ta59", "ta60", "ta61", "ta63", "ta68", "ta69", "ta71", "ta72", "ta73",
185 "ta74", "ta75", "ta76", "ta77", "ta78", "ta79", "ta80"}
188def __get_instances() -> list[Instance]:
189 """
190 Get the instances.
192 :return: a tuple of instances
193 """
194 return [Instance.from_resource(name) for name in
195 Instance.list_resources() if name not in __TOO_EASY]
198def __optimize_clusters(cluster_groups: tuple[tuple[int, ...], ...],
199 n_groups: int,
200 extreme_groups: tuple[tuple[tuple[int,
201 int], ...], ...],
202 random: Generator) -> tuple[int, ...]:
203 """
204 Try to find optimal cluster-to-group assignments.
206 We perform hill climbing with restarts.
208 :param cluster_groups: the groups per cluster
209 :param n_groups: the number of groups
210 :param extreme_groups: the extreme groups
211 :param random: the random number generator
212 :return: the selected groups
213 """
214 n: Final[int] = len(cluster_groups)
215 current: Final[np.ndarray] = np.zeros(n, DEFAULT_INT)
216 current_f: tuple[int, int, int, int, float]
217 best: Final[np.ndarray] = np.zeros(n, DEFAULT_INT)
218 best_f: tuple[int, int, int, int, float]
219 total_best: Final[np.ndarray] = np.zeros(n, DEFAULT_INT)
220 total_best_f: tuple[int, int, int, int, float] = (-1, -1, -1, -1, -1.0)
221 run_last_improved: int = 1
222 run_current: int = 0
223 run_max_none_improved: Final[int] = 4
224 step_max_none_improved: Final[int] = int((2 + (n_groups * n)) ** 2)
226 done: Final[np.ndarray] = np.zeros(n_groups, DEFAULT_INT) # noqa
227 extremes: Final[set[int]] = set() # noqa
229 logger(f"Beginning to optimize the assignment of {len(cluster_groups)} "
230 f"clusters to {n_groups} groups. The minimum groups are "
231 f"{extreme_groups[0]}, the maximum groups are {extreme_groups[1]}."
232 f" We permit {run_max_none_improved} runs without improvement "
233 f"before termination and {step_max_none_improved} FEs without "
234 "improvement before stopping a run.")
236 def __f(sol: np.ndarray) -> tuple[int, int, int, int, float]:
237 """
238 Compute the quality: The internal objective function.
240 We maximize five things:
241 First, we maximize the number of group assignments that allow us to
242 include the extreme instances in the result. These are the smallest
243 and largest-scale instances.
244 Second, we maximize the number of instance sets covered.
245 Third, we maximize the minimum number of usages of the instance sets.
246 Fourth, we minimize the maximum number of usages of the instance sets
247 with the goal to balance the usages evenly over different sets.
248 Fifth, we minimize the standard deviation of the instance set usages,
249 again with the goal to balance the usages evenly over different sets.
251 :param sol: the solution
252 :return: the objective values
253 """
254 # first, we count how often each group has been used
255 done.fill(0)
256 for group in sol:
257 done[group] += 1
259 # second, we check which cluster-to-group assignment permits
260 # using an extreme instance
261 extremes.clear()
262 for groups in extreme_groups:
263 for group in groups:
264 if (sol[group[0]] == group[1]) and (group[0] not in extremes):
265 extremes.add(group[0])
266 break
267 return len(extremes), int(np.sum(done > 0)), int(done.min()), \
268 int(-done.max()), float(-np.std(done))
270 # The outer loop: the restarts of the hill climber.
271 # We continue to execute runs until a given number of successive runs did
272 # not improve the overall best solutions. Then we stop.
273 while (run_current - run_last_improved) <= run_max_none_improved:
274 run_current += 1
276 # generate an initial best solution
277 for i in range(n):
278 cg = cluster_groups[i]
279 best[i] = cg[random.integers(len(cg))]
280 best_f = __f(best) # compute quality
281 use_f = best_f
283 if best_f > total_best_f: # accept solution if it is better
284 total_best_f = best_f
285 run_last_improved = run_current # new global best
286 np.copyto(total_best, best)
287 logger(f"New global best {tuple(best)} with quality "
288 f"{best_f} found in run {run_current}.")
289 else:
290 logger(f"Run {run_current} starts with quality {best_f}.")
292 # local search step
293 # We continue the local search until a given number of successive
294 # steps did not yield any improvement. Then we terminate the run.
295 step_current: int = 0
296 step_last_improved: int = 1
297 while (step_current - step_last_improved) <= step_max_none_improved:
298 step_current += 1
299 np.copyto(current, best)
300 while True: # perform at least one move
301 i = int(random.integers(n))
302 cg = cluster_groups[i]
303 lcg = len(cg)
304 if lcg <= 1:
305 continue
306 while True: # perform the search move
307 new_v = cg[random.integers(lcg)]
308 if new_v != best[i]:
309 current[i] = new_v
310 break
311 # 50/50 chance to perform more moves
312 if random.integers(2) <= 0:
313 break
315 current_f = __f(current) # evaluate quality
316 # accept if better or with small probability
317 if (current_f >= use_f) or (random.integers(50) < 1):
318 use_f = current_f
319 np.copyto(best, current)
320 if current_f > best_f:
321 best_f = current_f
322 step_last_improved = step_current
323 if current_f > total_best_f: # new optimum
324 total_best_f = current_f
325 run_last_improved = run_current
326 np.copyto(total_best, current)
327 logger(f"New global best {tuple(best)} with quality"
328 f" {total_best_f} found in run {run_current} "
329 f"after {step_current} FEs.")
330 logger(f"Run {run_current} finished with quality {best_f}.")
332 result = tuple(total_best)
333 logger(f"Finished after {run_current} runs with solution {result} of "
334 f"quality {total_best_f}.")
335 return result
338def __optimize_scales(scale_choices: list[list[tuple[int, int]]],
339 random: Generator) -> list[int]:
340 """
341 Pick a diverse scale choice.
343 :param scale_choices: the scale choices we have per group
344 :param random: the random number generator
345 :returns: one chosen scale per group
346 """
347 n: Final[int] = len(scale_choices)
349 x_total_best: list[int] = [0] * n
350 opt_idx: list[tuple[int, int]] = [] # the relevant indexes
351 opt_sum: int = 0
352 for idx, choices in enumerate(scale_choices):
353 if len(choices) > 1:
354 opt_sum += len(choices)
355 opt_idx.append((idx, len(choices)))
357 if not opt_idx:
358 return x_total_best
360 def __f(xx: list[int]) -> tuple[float, float, int]:
361 """
362 Compute the quality of a suggestion, bigger is better.
364 :param xx: the candidate solution
365 :returns: a tuple with minimum distance, distance sum, and
366 difference count
367 """
368 dist_min: float = inf
369 dist_sum: float = 0
370 diff_cnt: int = 0
371 for i in range(n):
372 a = scale_choices[i][xx[i]]
373 for j in range(i):
374 b = scale_choices[j][xx[j]]
375 if a[0] == b[0]:
376 d0 = 0
377 else:
378 diff_cnt += 1
379 d0 = a[0] - b[0]
380 if a[1] == b[1]:
381 d1 = 0
382 else:
383 diff_cnt += 1
384 d1 = a[1] - b[1]
385 d = sqrt((d0 * d0) + (d1 * d1))
386 dist_min = min(dist_min, d)
387 dist_sum += d
388 return dist_min, dist_sum, diff_cnt
390 f_total_best: tuple[float, float, int] = __f(x_total_best)
391 logger(f"The following index-choices tuple exist: {opt_idx} and "
392 f"the initial choice is {x_total_best} at quality {f_total_best}.")
394 x_cur_best: list[int] = [0] * n
395 x_cur: list[int] = [0] * n
397 for _ in range(int(opt_sum ** 2.35)):
398 for ii, sc in enumerate(scale_choices):
399 x_cur_best[ii] = int(random.integers(len(sc)))
400 f_cur_best: tuple[float, float, int] = __f(x_cur_best)
401 if f_cur_best > f_total_best:
402 f_total_best = f_cur_best
403 x_total_best.clear()
404 x_total_best.extend(x_cur_best)
405 logger(f"Improved to {x_total_best} at quality {f_total_best}.")
406 for _ in range(int(opt_sum ** 2.35)):
407 idx, choicen = opt_idx[random.integers(len(opt_idx))]
408 old = x_cur[idx]
409 while old == x_cur[idx]:
410 x_cur[idx] = int(random.integers(choicen))
411 f_cur = __f(x_cur)
412 if f_cur >= f_cur_best:
413 f_cur_best = f_cur
414 x_cur_best.clear()
415 x_cur_best.extend(x_cur)
416 if f_cur_best > f_total_best:
417 f_total_best = f_cur_best
418 x_total_best.clear()
419 x_total_best.extend(x_cur_best)
420 logger(f"Improved to {x_total_best} "
421 f"at quality {f_total_best}.")
423 return x_total_best
426def __scale(jobs: int, machines: int) -> int:
427 """
428 Compute the scale of a JSSP instance.
430 :param jobs: the jobs
431 :param machines: the machines
432 :returns: the scale
433 """
434 return factorial(jobs) ** machines
437def propose_instances(n: int,
438 get_instances: Callable = __get_instances) -> \
439 tuple[str, ...]:
440 """
441 Propose a set of instances to be used for our experiment.
443 This function is used to obtain the instances chosen for the JSSP
444 experiment. You can also use it for other experiments, by using your own
445 instance source and/or for selecting more or less JSSP instances.
447 Basically, it will accept a function `get_instances`, which must produce
448 a sequence of Instance objects. Each instance has a name (e.g., `dmu14`)
449 and a group, where the group is the name without any numerical suffix
450 (e.g., `dmu`). This function will then select `n` instances from the
451 instance set with the goal to maximize the diversity of the instances, to
452 include instances from as many groups as possible, and to include one
453 instance of the smallest and one of the largest scale.
454 The diversity is measured in terms of the numbers of jobs and machines,
455 the instance scale, the minimum and maximum operation length, the
456 standard deviation of the mean operation lengths over the jobs, the
457 makespan bounds, and so on.
459 First, features are computed for each instance. Second, the instances are
460 clustered into `n` clusters. Third, we try to pick groups for each cluster
461 such that a) the minimum and maximum-scale instances can be included and
462 b) that instances from as many groups as possible are picked. Third, we
463 then randomly pick one instance for each cluster from the selected group
464 (while trying to pick the minimum and maximum-scale instances). Finally,
465 the chosen instance names are listed as sorted tuple and returned.
467 :param n: the number of instances to be proposed
468 :param get_instances: a function returning an
469 iterable of instances.
470 :return: a tuple with the instance names
471 """
472 check_int_range(n, "n", 1, 1000)
473 if not callable(get_instances):
474 raise type_error(get_instances, "get_instances", call=True)
475 instances = list(get_instances())
476 n_instances: Final[int] = len(instances)
477 if n_instances <= 0:
478 raise ValueError("No instance returned by get_instance.")
479 for i in instances:
480 if isinstance(i, Instance):
481 raise type_error(
482 i, "value in list returned by get_instances", Instance)
483 if n_instances <= n:
484 if n_instances == n: # nothing to do here
485 return tuple(sorted(inst.get_name() for inst in instances))
486 raise ValueError(f"{n} instances requested, but only "
487 f"{n_instances} available.")
488 logger(f"We will pick {n} instances out of {n_instances}.")
490 random: Final[Generator] = rand_generator(0) # the random number generator
491 random.shuffle(instances)
493 inst_names: Final[tuple[str, ...]] = tuple( # get the instance names
494 inst.get_name() for inst in instances)
495 inst_sizes: Final[list[tuple[int, int]]] =\
496 [(inst.jobs, inst.machines) for inst in instances]
498 # the group to which the instances belong
499 rm = str.maketrans("", "", digits)
500 inst_groups: Final[tuple[str, ...]] = tuple( # get the instance groups
501 inst.translate(rm) for inst in inst_names)
503 # create bi-directional mapping between group names and integer IDs
504 group_ids: Final[dict[str, int]] = {}
505 id_groups: Final[dict[int, str]] = {}
506 for group in inst_groups:
507 if group in group_ids:
508 continue
509 gid = len(group_ids)
510 group_ids[group] = gid
511 id_groups[gid] = group
512 n_groups: Final[int] = len(group_ids)
513 logger(f"The {n_instances} instances belong to the {n_groups} "
514 f"groups {sorted(group_ids.keys())}.")
516 # compute the feature matrix
517 base_features: Final[int] = 9
518 features = np.zeros((len(instances), base_features + n_groups),
519 DEFAULT_FLOAT)
520 min_scale_val = inf
521 min_scale_inst: set[int] = set()
522 max_scale_val = -inf
523 max_scale_inst: set[int] = set()
524 for i, inst in enumerate(instances):
525 features[i, 0] = inst.jobs
526 features[i, 1] = inst.machines
527 features[i, 2] = inst.jobs / inst.machines
528 scale = __scale(inst.jobs, inst.machines)
529 if scale <= min_scale_val:
530 if scale < min_scale_val:
531 min_scale_val = scale
532 min_scale_inst.clear()
533 min_scale_inst.add(i)
534 if scale >= max_scale_val:
535 if scale > max_scale_val:
536 max_scale_inst.clear()
537 max_scale_val = scale
538 max_scale_inst.add(i)
539 features[i, 3] = log2(log2(scale))
540 m = inst.matrix[:, 1::2]
541 features[i, 4] = np.std(np.mean(m, axis=0))
542 features[i, 5] = np.min(m)
543 features[i, 6] = np.max(m)
544 features[i, 7] = inst.makespan_lower_bound
545 features[i, 8] = inst.makespan_upper_bound / inst.makespan_lower_bound
546 features[i, base_features + group_ids[inst_groups[i]]] = 1
547 del instances
548 logger(f"We computed {base_features + n_groups} features for each "
549 f"instance, namely {base_features} features and {n_groups}"
550 f" group features. The instances with the smallest scale "
551 f" {min_scale_val} are {[inst_names[i] for i in min_scale_inst]} "
552 f" (encoded as {min_scale_inst}) and those of the largest scale "
553 f"{max_scale_val} are {[inst_names[i] for i in max_scale_inst]} "
554 f"(encoded as {max_scale_inst}). Now we will cluster the "
555 f"instances.")
557 # standardize feature columns
558 for i in range(base_features):
559 features[:, i] = (features[:, i] - np.mean(features[:, i])) \
560 / np.std(features[:, i])
562 # so now we have the following things:
563 # 1. a list `instances` of instance names
564 # 2. the list `instance_groups` with the corresponding groups
565 # 3. set `groups` with the group names
566 # 4. the matrix `features` with instance features
567 # 5. the bi-directional mapping between instance groups and group IDs
568 # 6. the instance/group indexes for the smallest and largest-scale
569 # instances
571 # now we cluster the instances
572 model = SpectralClustering(n_clusters=n,
573 n_init=100,
574 random_state=RandomState(
575 random.integers(2 ** 32)))
576 clusters = model.fit_predict(features)
577 if len(clusters) != n_instances:
578 raise ValueError(
579 f"Invalid number {len(clusters)} of cluster "
580 f"assignments to {n_instances} instances.")
581 if (max(clusters) - min(clusters) + 1) != n:
582 raise ValueError(f"Expected {n} clusters, but got "
583 f"{max(clusters) - min(clusters) + 1}.")
584 logger(f"Found clusters {clusters}. The minimum instances are in"
585 f"{[clusters[i] for i in min_scale_inst]}. The maximum instances "
586 f"are in {[clusters[i] for i in max_scale_inst]}. now assigning"
587 f"clusters to groups.")
589 # find which groups belong to which cluster
590 cluster_groups: Final[tuple[tuple[int, ...], ...]] = tuple(
591 tuple(sorted({group_ids[inst_groups[j]]
592 for j in np.where(clusters == i)[0]}))
593 for i in range(n))
594 logger(f"The groups available per cluster are {cluster_groups}.")
596 # Now we need to pick the extreme groups.
597 extreme_groups = tuple(tuple(sorted(
598 {(clusters[xx], group_ids[inst_groups[xx]]) for xx in ex}))
599 for ex in [min_scale_inst, max_scale_inst])
600 logger(f"The extreme groups are {extreme_groups}.")
602 # With this method, we choose one instance group for each cluster.
603 chosen_groups = __optimize_clusters(cluster_groups=cluster_groups,
604 extreme_groups=extreme_groups,
605 n_groups=n_groups,
606 random=random)
607 logger(f"The instance groups {chosen_groups} were chosen for the"
608 f" {n} clusters.")
610 # OK, we have picked one instance group per cluster.
611 # Now we need to pick the instance scales from these groups.
612 # The goal is to now select instances with (jobs, machines) settings as
613 # diverse as possible, while adhering to the selected instance groups.
614 # For example, a selected group may be of the family "la", but there
615 # might be instances of different (job, machines) settings in the cluster
616 # for this group.
617 # In this case, we want to pick those which do not already occur in other
618 # clusters.
619 # If we can, we will pick the instances with the minimum and the maximum
620 # scales.
621 # In a first step, we find out
622 needs_min_scale = True
623 needs_max_scale = True
624 scale_choices: list[list[tuple[int, int]]] = []
625 inst_choices: list[list[str]] = []
627 for i in range(n):
628 elements = np.where(clusters == i)[0]
629 sgroup: str = id_groups[chosen_groups[i]]
630 possibility: set[int] = {i for i in elements
631 if inst_groups[i] == sgroup}
632 cur_scale_choices: list[tuple[int, int]] = []
633 scale_choices.append(cur_scale_choices)
634 cur_inst_choices: list[str] = []
635 inst_choices.append(cur_inst_choices)
636 can_skip: bool = False
638 if needs_min_scale:
639 test = possibility.intersection(min_scale_inst)
640 if len(test) > 0:
641 logger(
642 f"Choosing from groups {[inst_names[t] for t in test]} "
643 f"for cluster {i} to facilitate minimum-scale instance.")
644 sel = list(test)
645 seli = sel[random.integers(len(sel))]
646 possibility = {seli}
647 cur_scale_choices.append(inst_sizes[seli])
648 cur_inst_choices.append(inst_names[seli])
649 can_skip = True
650 needs_min_scale = False
652 if needs_max_scale:
653 test = possibility.intersection(max_scale_inst)
654 if len(test) > 0:
655 logger(
656 f"Choosing from groups {[inst_names[t] for t in test]} "
657 f"for cluster {i} to facilitate maximum-scale instance.")
658 needs_max_scale = False
659 if can_skip:
660 continue
661 sel = list(test)
662 seli = sel[random.integers(len(sel))]
663 cur_scale_choices.append(inst_sizes[seli])
664 cur_inst_choices.append(inst_names[seli])
665 continue
666 if can_skip:
667 continue
669 scales: set[tuple[int, int]] = set()
670 sel = list(possibility)
671 random.shuffle(sel)
672 for ii in sel:
673 tup = inst_sizes[ii]
674 if tup not in scales:
675 scales.add(tup)
676 cur_inst_choices.append(inst_names[ii])
677 cur_scale_choices.append(tup)
679 logger(f"Got the scale choices {scale_choices} resulting from the "
680 f"possible instances {inst_choices}.")
681 # do the actual scale optimization
682 final_sel = __optimize_scales(scale_choices, random)
684 res_tmp: Final[list[tuple[int, str]]] = \
685 [(__scale(scale_choices[i][k][0], scale_choices[i][k][1]),
686 inst_choices[i][k]) for i, k in enumerate(final_sel)]
687 res_tmp.sort()
688 result: Final[list[str]] = [sey[1] for sey in res_tmp]
690 # Finally, we sort and finalize the set of chosen instances.
691 logger(f"Found final instance selection {result}.")
692 return tuple(result)