Coverage for moptipy/api/experiment.py: 89%

1"""

2The experiment execution API.

4Via the function :func:`run_experiment`, you can execute a complex experiment

5where multiple optimization algorithms are applied to multiple problem

6instances, where log files with the results and progress information about the

7runs are collected, and where multiprocessing is used to parallelize the

8experiment execution.

9Experiments are replicable, as random seeds are automatically generated based

10on problem instance names in a replicable fashion.

12The log files are structured according to the documentation in

13https://thomasweise.github.io/moptipy/#file-names-and-folder-structure

14and their contents follow the specification given in

15https://thomasweise.github.io/moptipy/#log-file-sections.

16"""

17import copy

18import gc

19import os.path

20from os import getpid

21from typing import ( # pylint: disable=W0611

22 Any,

23 Callable,

24 Final,

25 Iterable,

26 cast,

27) # pylint: disable=W0611

29from numpy.random import Generator, default_rng

30from pycommons.ds.cache import str_is_new

31from pycommons.io.console import logger

32from pycommons.io.path import Path

33from pycommons.types import check_int_range, type_error

35from moptipy.api.execution import Execution

36from moptipy.api.logging import FILE_SUFFIX

37from moptipy.api.process import Process

38from moptipy.utils.nputils import rand_seeds_from_str

39from moptipy.utils.strings import sanitize_name, sanitize_names

40from moptipy.utils.sys_info import get_sys_info

43def __run_experiment(base_dir: Path,

44 experiments: list[list[Callable]],

45 n_runs: list[int],

46 thread_id: str,

47 perform_warmup: bool,

48 warmup_fes: int,

49 perform_pre_warmup: bool,

50 pre_warmup_fes: int,

51 on_completion: Callable[[

52 Any, Path, Process], None]) -> None:

53 """

54 Execute a single thread of experiments.

56 :param base_dir: the base directory

57 :param experiments: the stream of experiment setups

58 :param n_runs: the list of runs

59 :param thread_id: the thread id

60 :param perform_warmup: should we perform a warm-up per instance?

61 :param warmup_fes: the number of the FEs for the warm-up runs

62 :param perform_pre_warmup: should we do one warmup run for each

63 instance before we begin with the actual experiments?

64 :param pre_warmup_fes: the FEs for the pre-warmup runs

65 :param on_completion: a function to be called for every completed run,

66 receiving the instance, the path to the log file (before it is

67 created) and the :class:`~moptipy.api.process.Process` of the run

68 as parameters

69 """

70 random: Final[Generator] = default_rng()

71 cache: Final[Callable[[str], bool]] = str_is_new()

72 for warmup in ((True, False) if perform_pre_warmup else (False, )):

73 wss: str

74 if warmup:

75 wss = "pre-warmup"

76 else:

77 wss = "warmup"

78 if perform_pre_warmup:

79 gc.collect() # do full garbage collection after pre-warmups

80 gc.collect() # one more, to be double-safe

81 gc.freeze() # whatever survived now, keep it permanently

83 for runs in ((1, ) if warmup else n_runs): # for each number of runs

84 if not warmup:

85 logger(f"now doing {runs} runs.", thread_id)

86 random.shuffle(cast("list", experiments)) # shuffle

88 for setup in experiments: # for each setup

89 instance = setup[0]() # load instance

90 if instance is None:

91 raise TypeError("None is not an instance.")

92 inst_name = sanitize_name(str(instance))

94 exp = setup[1](instance) # setup algorithm for instance

95 if not isinstance(exp, Execution):

96 raise type_error(exp, "result of setup callable",

97 Execution)

98 # noinspection PyProtectedMember

99 algo_name = sanitize_name(str(exp._algorithm))

100

101 cd = Path(os.path.join(base_dir, algo_name, inst_name))

102 cd.ensure_dir_exists()

103

104 # generate sequence of seeds

105 seeds: list[int] = [0] if warmup else \

106 rand_seeds_from_str(string=inst_name, n_seeds=runs)

107 random.shuffle(seeds)

108 needs_warmup = warmup or perform_warmup

109 for seed in seeds: # for every run

110

111 filename = sanitize_names(

112 [algo_name, inst_name, hex(seed)])

113 if warmup:

114 log_file = filename

115 else:

116 log_file = Path(

117 os.path.join(cd, filename + FILE_SUFFIX))

118

119 skip = True

120 if cache(log_file):

121 skip = log_file.ensure_file_exists()

122 if skip:

123 continue # run already done

124

125 exp.set_rand_seed(seed)

126

127 if needs_warmup: # perform warmup run

128 needs_warmup = False

129 cpy: Execution = copy.copy(exp)

130 cpy.set_max_fes(

131 pre_warmup_fes if warmup else warmup_fes, True)

132 cpy.set_max_time_millis(3600000, True)

133 cpy.set_log_file(None)

134 cpy.set_log_improvements(False)

135 cpy.set_log_all_fes(False)

136 logger(f"{wss} for {filename!r}.", thread_id)

137 with cpy.execute():

138 pass

139 del cpy

140

141 if warmup:

142 continue

143

144 exp.set_log_file(log_file)

145 logger(filename, thread_id)

146 with exp.execute() as process: # run the experiment

147 on_completion(instance, cast(

148 "Path", log_file), process)

149

150

151def __no_complete(_: Any, __: Path, ___: Process) -> None:

152 """Do nothing."""

153

154

155def run_experiment(

156 base_dir: str, instances: Iterable[Callable[[], Any]],

157 setups: Iterable[Callable[[Any], Execution]],

158 n_runs: int | Iterable[int] = 11,

159 perform_warmup: bool = True, warmup_fes: int = 20,

160 perform_pre_warmup: bool = True, pre_warmup_fes: int = 20,

161 on_completion: Callable[[Any, Path, Process], None] = __no_complete) \

162 -> Path:

163 """

164 Run an experiment and store the log files into the given folder.

165

166 This function will automatically run an experiment, i.e., apply a set

167 `setups` of algorithm setups to a set `instances` of problem instances for

168 `n_runs` each. It will collect log files and store them into an

169 appropriate folder structure under the path `base_dir`. It will

170 automatically draw random seeds for all algorithm runs using

171 :func:`moptipy.utils.nputils.rand_seeds_from_str` based on the names of

172 the problem instances to solve. This yields replicable experiments, i.e.,

173 running the experiment program twice will yield exactly the same runs in

174 exactly the same file structure (give and take clock-time dependent

175 issues, which obviously cannot be controlled in a deterministic fashion).

176

177 :param base_dir: the base directory where to store the results

178 :param instances: an iterable of callables, each of which should return an

179 object representing a problem instance, whose `__str__` representation

180 is a valid name

181 :param setups: an iterable of callables, each receiving an instance (as

182 returned by instances) as input and producing an

183 :class:`moptipy.api.execution.Execution` as output

184 :param n_runs: the number of runs per algorithm-instance combination

185 :param perform_warmup: should we perform a warm-up for each instance?

186 If this parameter is `True`, then before the very first run of a

187 thread on an instance, we will execute the algorithm for just a few

188 function evaluations without logging and discard the results. The

189 idea is that during this warm-up, things such as JIT compilation or

190 complicated parsing can take place. While this cannot mitigate time

191 measurement problems for JIT compilations taking place late in runs,

192 it can at least somewhat solve the problem of delayed first FEs caused

193 by compilation and parsing.

194 :param warmup_fes: the number of the FEs for the warm-up runs

195 :param perform_pre_warmup: should we do one warmup run for each

196 instance before we begin with the actual experiments? This complements

197 the warmups defined by `perform_warmup`. It could be that, for some

198 reason, JIT or other activities may lead to stalls between multiple

199 processes when code is encountered for the first time. This may or may

200 not still cause strange timing issues even if `perform_warmup=True`.

201 We therefore can do one complete round of warmups before starting the

202 actual experiment. After that, we perform one garbage collection run

203 and then freeze all objects surviving it to prevent them from future

204 garbage collection runs. All processes that execute the experiment in

205 parallel will complete their pre-warmup and only after all of them have

206 completed it, the actual experiment will begin. I am not sure whether

207 this makes sense or not, but it also would not hurt.

208 :param pre_warmup_fes: the FEs for the pre-warmup runs

209 :param on_completion: a function to be called for every completed run,

210 receiving the instance, the path to the log file (before it is

211 created) and the :class:`~moptipy.api.process.Process` of the run

212 as parameters

213

214 :returns: the canonicalized path to `base_dir`

215 """

216 if not isinstance(instances, Iterable):

217 raise type_error(instances, "instances", Iterable)

218 if not isinstance(setups, Iterable):

219 raise type_error(setups, "setups", Iterable)

220 if not isinstance(perform_warmup, bool):

221 raise type_error(perform_warmup, "perform_warmup", bool)

222 if not isinstance(perform_pre_warmup, bool):

223 raise type_error(perform_pre_warmup, "perform_pre_warmup", bool)

224 check_int_range(warmup_fes, "warmup_fes", 1, 1_000_000)

225 check_int_range(pre_warmup_fes, "pre_warmup_fes", 1, 1_000_000)

226

227 instances = list(instances)

228 if list.__len__(instances) <= 0:

229 raise ValueError("Instance enumeration is empty.")

230 for instance in instances:

231 if not callable(instance):

232 raise type_error(instance, "all instances", call=True)

233

234 if str.__len__(get_sys_info()) <= 0:

235 raise ValueError("empty system info?")

236

237 setups = list(setups)

238 if list.__len__(setups) <= 0:

239 raise ValueError("Setup enumeration is empty.")

240 for setup in setups:

241 if not callable(setup):

242 raise type_error(setup, "all setups", call=True)

243

244 experiments: Final[list[list[Callable]]] = \

245 [[ii, ss] for ii in instances for ss in setups]

246

247 del instances

248 del setups

249

250 if list.__len__(experiments) <= 0:

251 raise ValueError("No experiments found?")

252

253 n_runs = [n_runs] if isinstance(n_runs, int) else list(n_runs)

254 if list.__len__(n_runs) <= 0:

255 raise ValueError("No number of runs provided?")

256 last = 0

257 for run in n_runs:

258 last = check_int_range(run, "n_runs", last + 1)

259

260 use_dir: Final[Path] = Path(base_dir)

261 use_dir.ensure_dir_exists()

262

263 thread_id: Final[str] = f"@{getpid():x}"

264 logger("beginning experiment execution.", thread_id)

265 __run_experiment(base_dir=use_dir,

266 experiments=experiments,

267 n_runs=n_runs,

268 thread_id=thread_id,

269 perform_warmup=perform_warmup,

270 warmup_fes=warmup_fes,

271 perform_pre_warmup=perform_pre_warmup,

272 pre_warmup_fes=pre_warmup_fes,

273 on_completion=on_completion)

274 logger("finished experiment execution.", thread_id)

275 return use_dir

Coverage for moptipy / api / experiment.py: 89%

118 statements