Coverage for moptipy / examples / jssp / instance_selector.py: 8%

353 statements  

« prev     ^ index     » next       coverage.py v7.12.0, created at 2025-11-24 08:49 +0000

1"""Code for selecting interesting instances for smaller-scale experiments.""" 

2import multiprocessing as mp 

3from math import ceil, factorial, inf, log2, sqrt 

4from os import sched_getaffinity 

5from string import digits 

6from typing import Callable, Final 

7 

8import numpy as np # type: ignore 

9 

10# pylint: disable-next=E0611 

11from numpy.random import Generator, RandomState # type: ignore 

12from pycommons.io.console import logger 

13from pycommons.types import check_int_range, type_error 

14from sklearn.cluster import SpectralClustering # type: ignore 

15 

16from moptipy.algorithms.so.rls import RLS 

17from moptipy.api.execution import Execution 

18from moptipy.examples.jssp.gantt_space import GanttSpace 

19from moptipy.examples.jssp.instance import Instance 

20from moptipy.examples.jssp.makespan import Makespan 

21from moptipy.examples.jssp.ob_encoding import OperationBasedEncoding 

22from moptipy.operators.permutations.op0_shuffle import Op0Shuffle 

23from moptipy.operators.permutations.op1_swap2 import Op1Swap2 

24from moptipy.spaces.permutations import Permutations 

25from moptipy.utils.nputils import ( 

26 DEFAULT_FLOAT, 

27 DEFAULT_INT, 

28 rand_generator, 

29 rand_seeds_from_str, 

30) 

31 

32 

33def __can_solve_instance(inst: Instance, seed: int, 

34 queue: mp.Queue) -> None: 

35 """ 

36 Execute one run of a RLS for 2.1min on the instance, return makespan. 

37 

38 :param inst: the jssp instance to check 

39 :param seed: the seed of this run 

40 :param queue: the queue for the return values 

41 """ 

42 x_space: Final[Permutations] = Permutations.with_repetitions( 

43 inst.jobs, inst.machines) 

44 y_space: Final[GanttSpace] = GanttSpace(inst) 

45 g: Final[OperationBasedEncoding] = OperationBasedEncoding(inst) 

46 o0: Final[Op0Shuffle] = Op0Shuffle(x_space) 

47 o1: Final[Op1Swap2] = Op1Swap2() 

48 f: Final[Makespan] = Makespan(inst) 

49 a: Final[RLS] = RLS(o0, o1) 

50 goal: Final[int] = inst.makespan_lower_bound 

51 

52 ex: Final[Execution] = Execution() 

53 ex.set_search_space(x_space) 

54 ex.set_solution_space(y_space) 

55 ex.set_encoding(g) 

56 ex.set_algorithm(a) 

57 ex.set_objective(f) 

58 ex.set_max_time_millis(126_000) 

59 ex.set_goal_f(goal) 

60 ex.set_rand_seed(seed) 

61 with ex.execute() as process: 

62 queue.put(process.get_best_f()) 

63 

64 

65def __is_instance_too_easy(inst: Instance) -> bool: 

66 """ 

67 Check if an instance is too easy to be of any interest. 

68 

69 We perform 16 runs of a (1+1)-EA on an instance, granting 2.1 minutes 

70 of runtime to each of them. An instance is considered as easy if one of 

71 the following conditions is fulfilled: 

72 

73 - it is solved often enough (lower bound reached often) 

74 - there are not enough different results 

75 - the mean result is too close to the lower bound 

76 - the worst result is not far enough from the lower bound 

77 - the worst result is too close to the best result 

78 

79 If any of these conditions hold, we consider the instance as too easy to 

80 be of any interest for our educational experiment. 

81 

82 :param inst: the jssp instance to check 

83 :returns: `True` if the instance is too easy to be interesting 

84 """ 

85 n_runs: Final[int] = 16 

86 seeds: Final[list[int]] = rand_seeds_from_str(string=inst.name, 

87 n_seeds=n_runs) 

88 queue: Final[mp.Queue] = mp.Queue() 

89 

90 while len(seeds) > 0: 

91 processes = [mp.Process(target=__can_solve_instance, 

92 args=(inst, seeds.pop(), queue)) 

93 for _ in range(min(len(seeds), __DEFAULT_N_THREADS))] 

94 for p in processes: 

95 p.start() 

96 for p in processes: 

97 p.join() 

98 

99 goal: Final[int] = inst.makespan_lower_bound 

100 num_solved: int = 0 

101 diff: set[int] = set() 

102 min_makespan: int = 1 << 62 

103 sum_makespans: int = 0 

104 max_makespan: int = 0 

105 

106 for _ in range(n_runs): 

107 makespan: int = queue.get() 

108 diff.add(makespan) 

109 if makespan <= min_makespan: 

110 min_makespan = makespan 

111 if makespan <= goal: 

112 num_solved += 1 

113 sum_makespans += makespan 

114 max_makespan = max(max_makespan, makespan) 

115 

116 mean_makespan: Final[float] = sum_makespans / n_runs 

117 mean_threshold: Final[float] = min(goal + 32.0, 

118 max(1.01 * goal, goal + 3)) 

119 max_threshold: Final[float] = min(goal + 64.0, 

120 max(1.02 * goal, goal + 5)) 

121 min_threshold: Final[float] = min(min_makespan + 48.0, 

122 max(1.015 * min_makespan, 

123 min_makespan + 4)) 

124 n_diff: Final[int] = len(diff) 

125 diff_threshold: Final[int] = max(3, int(0.5 + ceil(log2(n_runs)))) 

126 solved_threshold: Final[int] = ceil(n_runs / 3) 

127 

128 result: Final[bool] = (num_solved >= solved_threshold) \ 

129 or (mean_makespan < mean_threshold) \ 

130 or (max_makespan < max_threshold) \ 

131 or (min_threshold > max_makespan) \ 

132 or (n_diff <= diff_threshold) 

133 logger(f"Instance {inst.name} is {'easy' if result else 'hard'}, " 

134 f"since {n_runs} runs of the (1+1)-EA had {n_diff} different " 

135 f"results (threshold {diff_threshold}), a best result of " 

136 f"{min_makespan}, a mean result of {mean_makespan} (threshold: " 

137 f"{mean_threshold}), a worst result of {max_makespan} " 

138 f"(thresholds: {max_threshold}, {min_threshold}), and " 

139 f"reached the makespan lower bound ({goal}) {num_solved} " 

140 f"times (threshold: {solved_threshold}).") 

141 return result 

142 

143 

144#: The default number of threads to be used 

145__DEFAULT_N_THREADS: Final[int] = max(1, min(len(sched_getaffinity(0)), 128)) 

146 

147 

148def compute_instances_that_are_too_easy() -> tuple[str, ...]: 

149 """ 

150 Compute the set of instances that are too easy to be of any interest. 

151 

152 For our educational experiments, we need instances that are not always 

153 solved to optimality by simple algorithms. Otherwise, for example, the 

154 box plots of the end results will collapse to single lines. Then, it 

155 will also be impossible to really say which algorithm performs best on 

156 the instance. 

157 See the documentation of `__is_instance_too_easy` for the conditions that 

158 lead to the rejection of an instance 

159 

160 :returns: the tuple of instance names that should not be included in 

161 the experiment 

162 """ 

163 logger("We now test the instances whether they are too easy.") 

164 easy: Final[list[str]] = [] 

165 for inst_name in Instance.list_resources(): 

166 if __is_instance_too_easy(Instance.from_resource(inst_name)): 

167 easy.append(inst_name) 

168 easy.sort() 

169 logger(f"Finished the test, found {len(easy)} easy instances: {easy}.") 

170 return tuple(easy) 

171 

172 

173#: This is the set of instances that are too easy to be interesting and shall 

174#: therefore be skipped from our experiments. 

175#: For a documentation about what constitutes 'too easy', see 

176#: :func:`compute_instances_that_are_too_easy` 

177__TOO_EASY: Final[set[str]] = \ 

178 {"demo", "dmu21", "dmu22", "dmu24", "dmu25", "dmu31", "dmu32", "dmu33", 

179 "dmu34", "dmu35", "ft06", "ft20", "la01", "la02", "la03", "la04", 

180 "la05", "la06", "la07", "la08", "la09", "la10", "la11", "la12", "la13", 

181 "la14", "la15", "la17", "la18", "la23", "la26", "la28", "la30", "la31", 

182 "la32", "la33", "la34", "la35", "swv16", "swv17", "swv18", "swv19", 

183 "swv20", "ta51", "ta52", "ta53", "ta54", "ta55", "ta56", "ta57", "ta58", 

184 "ta59", "ta60", "ta61", "ta63", "ta68", "ta69", "ta71", "ta72", "ta73", 

185 "ta74", "ta75", "ta76", "ta77", "ta78", "ta79", "ta80"} 

186 

187 

188def __get_instances() -> list[Instance]: 

189 """ 

190 Get the instances. 

191 

192 :return: a tuple of instances 

193 """ 

194 return [Instance.from_resource(name) for name in 

195 Instance.list_resources() if name not in __TOO_EASY] 

196 

197 

198def __optimize_clusters(cluster_groups: tuple[tuple[int, ...], ...], 

199 n_groups: int, 

200 extreme_groups: tuple[tuple[tuple[int, 

201 int], ...], ...], 

202 random: Generator) -> tuple[int, ...]: 

203 """ 

204 Try to find optimal cluster-to-group assignments. 

205 

206 We perform hill climbing with restarts. 

207 

208 :param cluster_groups: the groups per cluster 

209 :param n_groups: the number of groups 

210 :param extreme_groups: the extreme groups 

211 :param random: the random number generator 

212 :return: the selected groups 

213 """ 

214 n: Final[int] = len(cluster_groups) 

215 current: Final[np.ndarray] = np.zeros(n, DEFAULT_INT) 

216 current_f: tuple[int, int, int, int, float] 

217 best: Final[np.ndarray] = np.zeros(n, DEFAULT_INT) 

218 best_f: tuple[int, int, int, int, float] 

219 total_best: Final[np.ndarray] = np.zeros(n, DEFAULT_INT) 

220 total_best_f: tuple[int, int, int, int, float] = (-1, -1, -1, -1, -1.0) 

221 run_last_improved: int = 1 

222 run_current: int = 0 

223 run_max_none_improved: Final[int] = 4 

224 step_max_none_improved: Final[int] = int((2 + (n_groups * n)) ** 2) 

225 

226 done: Final[np.ndarray] = np.zeros(n_groups, DEFAULT_INT) # noqa 

227 extremes: Final[set[int]] = set() # noqa 

228 

229 logger(f"Beginning to optimize the assignment of {len(cluster_groups)} " 

230 f"clusters to {n_groups} groups. The minimum groups are " 

231 f"{extreme_groups[0]}, the maximum groups are {extreme_groups[1]}." 

232 f" We permit {run_max_none_improved} runs without improvement " 

233 f"before termination and {step_max_none_improved} FEs without " 

234 "improvement before stopping a run.") 

235 

236 def __f(sol: np.ndarray) -> tuple[int, int, int, int, float]: 

237 """ 

238 Compute the quality: The internal objective function. 

239 

240 We maximize five things: 

241 First, we maximize the number of group assignments that allow us to 

242 include the extreme instances in the result. These are the smallest 

243 and largest-scale instances. 

244 Second, we maximize the number of instance sets covered. 

245 Third, we maximize the minimum number of usages of the instance sets. 

246 Fourth, we minimize the maximum number of usages of the instance sets 

247 with the goal to balance the usages evenly over different sets. 

248 Fifth, we minimize the standard deviation of the instance set usages, 

249 again with the goal to balance the usages evenly over different sets. 

250 

251 :param sol: the solution 

252 :return: the objective values 

253 """ 

254 # first, we count how often each group has been used 

255 done.fill(0) 

256 for group in sol: 

257 done[group] += 1 

258 

259 # second, we check which cluster-to-group assignment permits 

260 # using an extreme instance 

261 extremes.clear() 

262 for groups in extreme_groups: 

263 for group in groups: 

264 if (sol[group[0]] == group[1]) and (group[0] not in extremes): 

265 extremes.add(group[0]) 

266 break 

267 return len(extremes), int(np.sum(done > 0)), int(done.min()), \ 

268 int(-done.max()), float(-np.std(done)) 

269 

270 # The outer loop: the restarts of the hill climber. 

271 # We continue to execute runs until a given number of successive runs did 

272 # not improve the overall best solutions. Then we stop. 

273 while (run_current - run_last_improved) <= run_max_none_improved: 

274 run_current += 1 

275 

276 # generate an initial best solution 

277 for i in range(n): 

278 cg = cluster_groups[i] 

279 best[i] = cg[random.integers(len(cg))] 

280 best_f = __f(best) # compute quality 

281 use_f = best_f 

282 

283 if best_f > total_best_f: # accept solution if it is better 

284 total_best_f = best_f 

285 run_last_improved = run_current # new global best 

286 np.copyto(total_best, best) 

287 logger(f"New global best {tuple(best)} with quality " 

288 f"{best_f} found in run {run_current}.") 

289 else: 

290 logger(f"Run {run_current} starts with quality {best_f}.") 

291 

292 # local search step 

293 # We continue the local search until a given number of successive 

294 # steps did not yield any improvement. Then we terminate the run. 

295 step_current: int = 0 

296 step_last_improved: int = 1 

297 while (step_current - step_last_improved) <= step_max_none_improved: 

298 step_current += 1 

299 np.copyto(current, best) 

300 while True: # perform at least one move 

301 i = int(random.integers(n)) 

302 cg = cluster_groups[i] 

303 lcg = len(cg) 

304 if lcg <= 1: 

305 continue 

306 while True: # perform the search move 

307 new_v = cg[random.integers(lcg)] 

308 if new_v != best[i]: 

309 current[i] = new_v 

310 break 

311 # 50/50 chance to perform more moves 

312 if random.integers(2) <= 0: 

313 break 

314 

315 current_f = __f(current) # evaluate quality 

316 # accept if better or with small probability 

317 if (current_f >= use_f) or (random.integers(50) < 1): 

318 use_f = current_f 

319 np.copyto(best, current) 

320 if current_f > best_f: 

321 best_f = current_f 

322 step_last_improved = step_current 

323 if current_f > total_best_f: # new optimum 

324 total_best_f = current_f 

325 run_last_improved = run_current 

326 np.copyto(total_best, current) 

327 logger(f"New global best {tuple(best)} with quality" 

328 f" {total_best_f} found in run {run_current} " 

329 f"after {step_current} FEs.") 

330 logger(f"Run {run_current} finished with quality {best_f}.") 

331 

332 result = tuple(total_best) 

333 logger(f"Finished after {run_current} runs with solution {result} of " 

334 f"quality {total_best_f}.") 

335 return result 

336 

337 

338def __optimize_scales(scale_choices: list[list[tuple[int, int]]], 

339 random: Generator) -> list[int]: 

340 """ 

341 Pick a diverse scale choice. 

342 

343 :param scale_choices: the scale choices we have per group 

344 :param random: the random number generator 

345 :returns: one chosen scale per group 

346 """ 

347 n: Final[int] = len(scale_choices) 

348 

349 x_total_best: list[int] = [0] * n 

350 opt_idx: list[tuple[int, int]] = [] # the relevant indexes 

351 opt_sum: int = 0 

352 for idx, choices in enumerate(scale_choices): 

353 if len(choices) > 1: 

354 opt_sum += len(choices) 

355 opt_idx.append((idx, len(choices))) 

356 

357 if not opt_idx: 

358 return x_total_best 

359 

360 def __f(xx: list[int]) -> tuple[float, float, int]: 

361 """ 

362 Compute the quality of a suggestion, bigger is better. 

363 

364 :param xx: the candidate solution 

365 :returns: a tuple with minimum distance, distance sum, and 

366 difference count 

367 """ 

368 dist_min: float = inf 

369 dist_sum: float = 0 

370 diff_cnt: int = 0 

371 for i in range(n): 

372 a = scale_choices[i][xx[i]] 

373 for j in range(i): 

374 b = scale_choices[j][xx[j]] 

375 if a[0] == b[0]: 

376 d0 = 0 

377 else: 

378 diff_cnt += 1 

379 d0 = a[0] - b[0] 

380 if a[1] == b[1]: 

381 d1 = 0 

382 else: 

383 diff_cnt += 1 

384 d1 = a[1] - b[1] 

385 d = sqrt((d0 * d0) + (d1 * d1)) 

386 dist_min = min(dist_min, d) 

387 dist_sum += d 

388 return dist_min, dist_sum, diff_cnt 

389 

390 f_total_best: tuple[float, float, int] = __f(x_total_best) 

391 logger(f"The following index-choices tuple exist: {opt_idx} and " 

392 f"the initial choice is {x_total_best} at quality {f_total_best}.") 

393 

394 x_cur_best: list[int] = [0] * n 

395 x_cur: list[int] = [0] * n 

396 

397 for _ in range(int(opt_sum ** 2.35)): 

398 for ii, sc in enumerate(scale_choices): 

399 x_cur_best[ii] = int(random.integers(len(sc))) 

400 f_cur_best: tuple[float, float, int] = __f(x_cur_best) 

401 if f_cur_best > f_total_best: 

402 f_total_best = f_cur_best 

403 x_total_best.clear() 

404 x_total_best.extend(x_cur_best) 

405 logger(f"Improved to {x_total_best} at quality {f_total_best}.") 

406 for _ in range(int(opt_sum ** 2.35)): 

407 idx, choicen = opt_idx[random.integers(len(opt_idx))] 

408 old = x_cur[idx] 

409 while old == x_cur[idx]: 

410 x_cur[idx] = int(random.integers(choicen)) 

411 f_cur = __f(x_cur) 

412 if f_cur >= f_cur_best: 

413 f_cur_best = f_cur 

414 x_cur_best.clear() 

415 x_cur_best.extend(x_cur) 

416 if f_cur_best > f_total_best: 

417 f_total_best = f_cur_best 

418 x_total_best.clear() 

419 x_total_best.extend(x_cur_best) 

420 logger(f"Improved to {x_total_best} " 

421 f"at quality {f_total_best}.") 

422 

423 return x_total_best 

424 

425 

426def __scale(jobs: int, machines: int) -> int: 

427 """ 

428 Compute the scale of a JSSP instance. 

429 

430 :param jobs: the jobs 

431 :param machines: the machines 

432 :returns: the scale 

433 """ 

434 return factorial(jobs) ** machines 

435 

436 

437def propose_instances(n: int, 

438 get_instances: Callable = __get_instances) -> \ 

439 tuple[str, ...]: 

440 """ 

441 Propose a set of instances to be used for our experiment. 

442 

443 This function is used to obtain the instances chosen for the JSSP 

444 experiment. You can also use it for other experiments, by using your own 

445 instance source and/or for selecting more or less JSSP instances. 

446 

447 Basically, it will accept a function `get_instances`, which must produce 

448 a sequence of Instance objects. Each instance has a name (e.g., `dmu14`) 

449 and a group, where the group is the name without any numerical suffix 

450 (e.g., `dmu`). This function will then select `n` instances from the 

451 instance set with the goal to maximize the diversity of the instances, to 

452 include instances from as many groups as possible, and to include one 

453 instance of the smallest and one of the largest scale. 

454 The diversity is measured in terms of the numbers of jobs and machines, 

455 the instance scale, the minimum and maximum operation length, the 

456 standard deviation of the mean operation lengths over the jobs, the 

457 makespan bounds, and so on. 

458 

459 First, features are computed for each instance. Second, the instances are 

460 clustered into `n` clusters. Third, we try to pick groups for each cluster 

461 such that a) the minimum and maximum-scale instances can be included and 

462 b) that instances from as many groups as possible are picked. Third, we 

463 then randomly pick one instance for each cluster from the selected group 

464 (while trying to pick the minimum and maximum-scale instances). Finally, 

465 the chosen instance names are listed as sorted tuple and returned. 

466 

467 :param n: the number of instances to be proposed 

468 :param get_instances: a function returning an 

469 iterable of instances. 

470 :return: a tuple with the instance names 

471 """ 

472 check_int_range(n, "n", 1, 1000) 

473 if not callable(get_instances): 

474 raise type_error(get_instances, "get_instances", call=True) 

475 instances = list(get_instances()) 

476 n_instances: Final[int] = len(instances) 

477 if n_instances <= 0: 

478 raise ValueError("No instance returned by get_instance.") 

479 for i in instances: 

480 if isinstance(i, Instance): 

481 raise type_error( 

482 i, "value in list returned by get_instances", Instance) 

483 if n_instances <= n: 

484 if n_instances == n: # nothing to do here 

485 return tuple(sorted(inst.get_name() for inst in instances)) 

486 raise ValueError(f"{n} instances requested, but only " 

487 f"{n_instances} available.") 

488 logger(f"We will pick {n} instances out of {n_instances}.") 

489 

490 random: Final[Generator] = rand_generator(0) # the random number generator 

491 random.shuffle(instances) 

492 

493 inst_names: Final[tuple[str, ...]] = tuple( # get the instance names 

494 inst.get_name() for inst in instances) 

495 inst_sizes: Final[list[tuple[int, int]]] =\ 

496 [(inst.jobs, inst.machines) for inst in instances] 

497 

498 # the group to which the instances belong 

499 rm = str.maketrans("", "", digits) 

500 inst_groups: Final[tuple[str, ...]] = tuple( # get the instance groups 

501 inst.translate(rm) for inst in inst_names) 

502 

503 # create bi-directional mapping between group names and integer IDs 

504 group_ids: Final[dict[str, int]] = {} 

505 id_groups: Final[dict[int, str]] = {} 

506 for group in inst_groups: 

507 if group in group_ids: 

508 continue 

509 gid = len(group_ids) 

510 group_ids[group] = gid 

511 id_groups[gid] = group 

512 n_groups: Final[int] = len(group_ids) 

513 logger(f"The {n_instances} instances belong to the {n_groups} " 

514 f"groups {sorted(group_ids.keys())}.") 

515 

516 # compute the feature matrix 

517 base_features: Final[int] = 9 

518 features = np.zeros((len(instances), base_features + n_groups), 

519 DEFAULT_FLOAT) 

520 min_scale_val = inf 

521 min_scale_inst: set[int] = set() 

522 max_scale_val = -inf 

523 max_scale_inst: set[int] = set() 

524 for i, inst in enumerate(instances): 

525 features[i, 0] = inst.jobs 

526 features[i, 1] = inst.machines 

527 features[i, 2] = inst.jobs / inst.machines 

528 scale = __scale(inst.jobs, inst.machines) 

529 if scale <= min_scale_val: 

530 if scale < min_scale_val: 

531 min_scale_val = scale 

532 min_scale_inst.clear() 

533 min_scale_inst.add(i) 

534 if scale >= max_scale_val: 

535 if scale > max_scale_val: 

536 max_scale_inst.clear() 

537 max_scale_val = scale 

538 max_scale_inst.add(i) 

539 features[i, 3] = log2(log2(scale)) 

540 m = inst.matrix[:, 1::2] 

541 features[i, 4] = np.std(np.mean(m, axis=0)) 

542 features[i, 5] = np.min(m) 

543 features[i, 6] = np.max(m) 

544 features[i, 7] = inst.makespan_lower_bound 

545 features[i, 8] = inst.makespan_upper_bound / inst.makespan_lower_bound 

546 features[i, base_features + group_ids[inst_groups[i]]] = 1 

547 del instances 

548 logger(f"We computed {base_features + n_groups} features for each " 

549 f"instance, namely {base_features} features and {n_groups}" 

550 f" group features. The instances with the smallest scale " 

551 f" {min_scale_val} are {[inst_names[i] for i in min_scale_inst]} " 

552 f" (encoded as {min_scale_inst}) and those of the largest scale " 

553 f"{max_scale_val} are {[inst_names[i] for i in max_scale_inst]} " 

554 f"(encoded as {max_scale_inst}). Now we will cluster the " 

555 f"instances.") 

556 

557 # standardize feature columns 

558 for i in range(base_features): 

559 features[:, i] = (features[:, i] - np.mean(features[:, i])) \ 

560 / np.std(features[:, i]) 

561 

562 # so now we have the following things: 

563 # 1. a list `instances` of instance names 

564 # 2. the list `instance_groups` with the corresponding groups 

565 # 3. set `groups` with the group names 

566 # 4. the matrix `features` with instance features 

567 # 5. the bi-directional mapping between instance groups and group IDs 

568 # 6. the instance/group indexes for the smallest and largest-scale 

569 # instances 

570 

571 # now we cluster the instances 

572 model = SpectralClustering(n_clusters=n, 

573 n_init=100, 

574 random_state=RandomState( 

575 random.integers(2 ** 32))) 

576 clusters = model.fit_predict(features) 

577 if len(clusters) != n_instances: 

578 raise ValueError( 

579 f"Invalid number {len(clusters)} of cluster " 

580 f"assignments to {n_instances} instances.") 

581 if (max(clusters) - min(clusters) + 1) != n: 

582 raise ValueError(f"Expected {n} clusters, but got " 

583 f"{max(clusters) - min(clusters) + 1}.") 

584 logger(f"Found clusters {clusters}. The minimum instances are in" 

585 f"{[clusters[i] for i in min_scale_inst]}. The maximum instances " 

586 f"are in {[clusters[i] for i in max_scale_inst]}. now assigning" 

587 f"clusters to groups.") 

588 

589 # find which groups belong to which cluster 

590 cluster_groups: Final[tuple[tuple[int, ...], ...]] = tuple( 

591 tuple(sorted({group_ids[inst_groups[j]] 

592 for j in np.where(clusters == i)[0]})) 

593 for i in range(n)) 

594 logger(f"The groups available per cluster are {cluster_groups}.") 

595 

596 # Now we need to pick the extreme groups. 

597 extreme_groups = tuple(tuple(sorted( 

598 {(clusters[xx], group_ids[inst_groups[xx]]) for xx in ex})) 

599 for ex in [min_scale_inst, max_scale_inst]) 

600 logger(f"The extreme groups are {extreme_groups}.") 

601 

602 # With this method, we choose one instance group for each cluster. 

603 chosen_groups = __optimize_clusters(cluster_groups=cluster_groups, 

604 extreme_groups=extreme_groups, 

605 n_groups=n_groups, 

606 random=random) 

607 logger(f"The instance groups {chosen_groups} were chosen for the" 

608 f" {n} clusters.") 

609 

610 # OK, we have picked one instance group per cluster. 

611 # Now we need to pick the instance scales from these groups. 

612 # The goal is to now select instances with (jobs, machines) settings as 

613 # diverse as possible, while adhering to the selected instance groups. 

614 # For example, a selected group may be of the family "la", but there 

615 # might be instances of different (job, machines) settings in the cluster 

616 # for this group. 

617 # In this case, we want to pick those which do not already occur in other 

618 # clusters. 

619 # If we can, we will pick the instances with the minimum and the maximum 

620 # scales. 

621 # In a first step, we find out 

622 needs_min_scale = True 

623 needs_max_scale = True 

624 scale_choices: list[list[tuple[int, int]]] = [] 

625 inst_choices: list[list[str]] = [] 

626 

627 for i in range(n): 

628 elements = np.where(clusters == i)[0] 

629 sgroup: str = id_groups[chosen_groups[i]] 

630 possibility: set[int] = {i for i in elements 

631 if inst_groups[i] == sgroup} 

632 cur_scale_choices: list[tuple[int, int]] = [] 

633 scale_choices.append(cur_scale_choices) 

634 cur_inst_choices: list[str] = [] 

635 inst_choices.append(cur_inst_choices) 

636 can_skip: bool = False 

637 

638 if needs_min_scale: 

639 test = possibility.intersection(min_scale_inst) 

640 if len(test) > 0: 

641 logger( 

642 f"Choosing from groups {[inst_names[t] for t in test]} " 

643 f"for cluster {i} to facilitate minimum-scale instance.") 

644 sel = list(test) 

645 seli = sel[random.integers(len(sel))] 

646 possibility = {seli} 

647 cur_scale_choices.append(inst_sizes[seli]) 

648 cur_inst_choices.append(inst_names[seli]) 

649 can_skip = True 

650 needs_min_scale = False 

651 

652 if needs_max_scale: 

653 test = possibility.intersection(max_scale_inst) 

654 if len(test) > 0: 

655 logger( 

656 f"Choosing from groups {[inst_names[t] for t in test]} " 

657 f"for cluster {i} to facilitate maximum-scale instance.") 

658 needs_max_scale = False 

659 if can_skip: 

660 continue 

661 sel = list(test) 

662 seli = sel[random.integers(len(sel))] 

663 cur_scale_choices.append(inst_sizes[seli]) 

664 cur_inst_choices.append(inst_names[seli]) 

665 continue 

666 if can_skip: 

667 continue 

668 

669 scales: set[tuple[int, int]] = set() 

670 sel = list(possibility) 

671 random.shuffle(sel) 

672 for ii in sel: 

673 tup = inst_sizes[ii] 

674 if tup not in scales: 

675 scales.add(tup) 

676 cur_inst_choices.append(inst_names[ii]) 

677 cur_scale_choices.append(tup) 

678 

679 logger(f"Got the scale choices {scale_choices} resulting from the " 

680 f"possible instances {inst_choices}.") 

681 # do the actual scale optimization 

682 final_sel = __optimize_scales(scale_choices, random) 

683 

684 res_tmp: Final[list[tuple[int, str]]] = \ 

685 [(__scale(scale_choices[i][k][0], scale_choices[i][k][1]), 

686 inst_choices[i][k]) for i, k in enumerate(final_sel)] 

687 res_tmp.sort() 

688 result: Final[list[str]] = [sey[1] for sey in res_tmp] 

689 

690 # Finally, we sort and finalize the set of chosen instances. 

691 logger(f"Found final instance selection {result}.") 

692 return tuple(result)