Coverage for moptipy/evaluation/progress.py: 68%

1"""

2Objects embodying the progress of a run over time.

4An instance of :class:`Progress` holds one :attr:`~Progress.time` vector and

5an objective value (:attr:`~Progress.f`) vector. The time dimension (stored in

6:attr:`~Progress.time_unit`) can either be in FEs or in milliseconds and the

7objective value dimension (stored in :attr:`~Progress.f_name`) can be raw

8objective values, standardized objective values, or normalized objective

9values.

10The two vectors together thus describe how a run of an optimization algorithm

11improves the objective value over time.

12"""

13from dataclasses import dataclass

14from math import inf, isfinite

15from typing import Any, Callable, Final, Generator, Iterable

17import numpy as np

18from pycommons.io.console import logger

19from pycommons.io.csv import COMMENT_START, CSV_SEPARATOR

20from pycommons.io.path import Path

21from pycommons.strings.string_conv import num_to_str, str_to_num

22from pycommons.types import type_error

24from moptipy.api.logging import (

25 KEY_ALGORITHM,

26 KEY_GOAL_F,

27 KEY_INSTANCE,

28 KEY_RAND_SEED,

29 PROGRESS_CURRENT_F,

30 PROGRESS_FES,

31 PROGRESS_TIME_MILLIS,

32 SECTION_PROGRESS,

33)

34from moptipy.evaluation.base import (

35 F_NAME_RAW,

36 F_NAME_SCALED,

37 KEY_ENCODING,

38 KEY_OBJECTIVE_FUNCTION,

39 TIME_UNIT_FES,

40 TIME_UNIT_MILLIS,

41 PerRunData,

42 check_f_name,

43 check_time_unit,

44)

45from moptipy.evaluation.log_parser import SetupAndStateParser

46from moptipy.utils.logger import (

47 KEY_VALUE_SEPARATOR,

48)

49from moptipy.utils.nputils import is_all_finite, is_np_float, is_np_int

52@dataclass(frozen=True, init=False, order=False, eq=False)

53class Progress(PerRunData):

54 """An immutable record of progress information over a single run."""

56 #: The time axis data.

57 time: np.ndarray

59 #: The unit of the time axis.

60 time_unit: str

62 #: The objective value data.

63 f: np.ndarray

65 #: the name of the objective value axis.

66 f_name: str

68 #: the standard value of the objective dimension.

69 #: If :attr:`f_name` is `F_NAME_SCALED` or `F_NAME_NORMALIZED`.

70 #: then this value has been used to normalize the data.

71 f_standard: int | float | None

73 def __init__(self,

74 algorithm: str,

75 instance: str,

76 objective: str,

77 encoding: str | None,

78 rand_seed: int,

79 time: np.ndarray,

80 time_unit: str,

81 f: np.ndarray,

82 f_name: str,

83 f_standard: int | float | None = None,

84 only_improvements: bool = True):

85 """

86 Create a consistent instance of :class:`EndResult`.

88 :param algorithm: the algorithm name

89 :param instance: the instance name

90 :param objective: the name of the objective function

91 :param encoding: the name of the encoding that was used, if any, or

92 `None` if no encoding was used

93 :param rand_seed: the random seed

94 :param time: the time axis data

95 :param time_unit: the unit of the time axis

96 :param f: the objective value axis data

97 :param f_name: the name of the objective value axis data

98 :param f_standard: the value used to standardize of the objective

99 value dimension

100 :param only_improvements: enforce that f-values should be

101 improving and time values increasing

102 """

103 super().__init__(algorithm, instance, objective, encoding, rand_seed)

104

105 if not isinstance(time, np.ndarray):

106 raise type_error(time, "time data", np.ndarray)

107 time.flags.writeable = False

108 if len(time.shape) != 1:

109 raise ValueError("time array must be one-dimensional, but "

110 f"has shape {time.shape}.")

111 if not is_np_int(time.dtype):

112 raise TypeError("time data must be integer-valued, "

113 f"but has type {time.dtype}.")

114 tl = time.size

115 if tl <= 0:

116 raise ValueError("time data must not be empty.")

117 if tl > 1:

118 if only_improvements:

119 if np.any(time[1:] <= time[:-1]):

120 raise ValueError("time data must be strictly increasing,"

121 f"but encountered {time}.")

122 elif np.any(time[1:] < time[:-1]):

123 raise ValueError("time data must be monotonously"

124 f"increasing, but encountered {time}.")

125

126 object.__setattr__(self, "time", time)

127 object.__setattr__(self, "time_unit", check_time_unit(time_unit))

128

129 mintime = 1 if time_unit == TIME_UNIT_FES else 0

130 if np.any(time < mintime):

131 raise ValueError(f"No time value can be less than {mintime} if"

132 f" time unit is {time_unit}.")

133

134 if not isinstance(f, np.ndarray):

135 raise type_error(f, "f data", np.ndarray)

136 f.flags.writeable = False

137 if len(f.shape) != 1:

138 raise ValueError(

139 f"f array must be one-dimensional, but has shape {f.shape}.")

140 if is_np_float(f.dtype):

141 if not is_all_finite(f):

142 raise ValueError("f must be all finite.")

143 elif not is_np_int(f.dtype):

144 raise TypeError("f data must be integer- or float valued, but"

145 f" encountered an {type(f)} of {f.dtype}.")

146 fl = f.size

147 if fl <= 0:

148 raise ValueError("f data must not be empty.")

149 if fl != tl:

150 raise ValueError(f"Length {fl} of f data and length {tl} of "

151 "time data must be the same.")

152 if not isinstance(only_improvements, bool):

153 raise type_error(only_improvements, "only_improvements", bool)

154 if only_improvements and (fl > 1):

155 if np.any(f[1:-1] >= f[:-2]):

156 raise ValueError(

157 "f data must be strictly decreasing, with "

158 "only the entry being permitted as exception.")

159 if f[-1] > f[-2]:

160 raise ValueError(f"last f-value ({f[-1]}) cannot be greater"

161 f"than second-to-last ({f[-2]}).")

162 object.__setattr__(self, "f", f)

163 object.__setattr__(self, "f_name", check_f_name(f_name))

164

165 if (f_name != F_NAME_RAW) and (f_standard is None):

166 raise ValueError(f"If f_name is {F_NAME_RAW}, "

167 f"then f_standard cannot be {f_standard}.")

168 if f_standard is not None:

169 if isinstance(f_standard, float):

170 if not isfinite(f_standard):

171 raise ValueError(f"f_standard cannot be {f_standard}.")

172 elif not isinstance(f_standard, int):

173 raise type_error(f_standard, "f_standard", (int, float))

174 object.__setattr__(self, "f_standard", f_standard)

175

176

177def to_csv(progress: Progress, file: str,

178 put_header: bool = True) -> str:

179 """

180 Store a :class:`Progress` record in a CSV file.

181

182 :param file: the file to generate

183 :param put_header: should we put a header with meta-data?

184 :return: the fully resolved file name

185 """

186 if not isinstance(progress, Progress):

187 raise type_error(progress, "progress", Progress)

188 if not isinstance(put_header, bool):

189 raise type_error(put_header, "put_header", bool)

190 path: Final[Path] = Path(file)

191 logger(f"Writing progress object to CSV file {path!r}.")

192 path.ensure_parent_dir_exists()

193

194 with path.open_for_write() as out:

195 sep: Final[str] = CSV_SEPARATOR

196 write: Final[Callable[[str], int]] = out.write

197 if put_header:

198 kv: Final[str] = KEY_VALUE_SEPARATOR

199 cmt: Final[str] = COMMENT_START

200 write(f"{cmt} {KEY_ALGORITHM}{kv}{progress.algorithm}\n")

201 write(f"{cmt} {KEY_INSTANCE}{kv}{progress.instance}\n")

202 write(f"{cmt} {KEY_OBJECTIVE_FUNCTION}{kv}{progress.objective}\n")

203 if progress.encoding is not None:

204 write(f"{cmt} {KEY_ENCODING}{kv}{progress.objective}\n")

205 write(f"{cmt} {KEY_RAND_SEED}{kv}{hex(progress.rand_seed)}\n")

206 if progress.f_standard is not None:

207 write(f"{cmt} {KEY_GOAL_F}{kv}{progress.f_standard}\n")

208 write(f"{progress.time_unit}{sep}{progress.f_name}\n")

209 for i, t in enumerate(progress.time):

210 write(f"{t}{sep}{num_to_str(progress.f[i])}\n")

211

212 logger(f"Done writing progress object to CSV file {path!r}.")

213

214 path.enforce_file()

215 return path

216

217

218class __InnerLogParser(SetupAndStateParser[Progress]):

219 """The internal log parser class."""

220

221 def __init__(self, time_unit: str, f_name: str,

222 f_standard: dict[str, int | float] | None,

223 only_improvements: bool,

224 path_filter: Callable[[Path], bool] | None) -> None:

225 """

226 Create the internal log parser.

227

228 :param time_unit: the time unit

229 :param f_name: the objective name

230 :param f_standard: a dictionary mapping instances to standard values

231 :param only_improvements: enforce that f-values should be improving

232 and time values increasing

233 :param path_filter: the path filter

234 """

235 super().__init__(path_filter)

236 self.__time_unit = check_time_unit(time_unit)

237 self.__f_name = check_f_name(f_name)

238 self.__last_fe: int | None = None

239 self.__t_collector: Final[list[int]] = []

240 self.__f_collector: Final[list[int | float]] = []

241 if not isinstance(only_improvements, bool):

242 raise type_error(only_improvements, "only_improvements", bool)

243 self.__only_improvements = only_improvements

244 if (f_standard is not None) and (not isinstance(f_standard, dict)):

245 raise type_error(f_standard, "f_standard", dict)

246 self.__f_standard: Final[dict[str, int | float] | None] \

247 = f_standard

248 self.__state: int = 0

249

250 def _parse_file(self, file: Path) -> Progress | None:

251 super()._parse_file(file)

252 if self.__state != 2:

253 raise ValueError(

254 "Illegal state, log file must have a "

255 f"{SECTION_PROGRESS!r} section.")

256 if not self.__f_collector:

257 raise ValueError("f-collector cannot be empty.")

258 if not self.__t_collector:

259 raise ValueError("time-collector cannot be empty.")

260 self.__state = 0

261

262 f_standard: int | float | None = None

263 if (self.__f_standard is not None) and \

264 (self.instance in self.__f_standard):

265 f_standard = self.__f_standard[self.instance]

266 if f_standard is None:

267 f_standard = self.goal_f

268 if (self.__f_name != F_NAME_RAW) and (f_standard is None):

269 raise ValueError(f"f_standard cannot be {f_standard} if f_name "

270 f"is {self.__f_name}.")

271 tt = self.total_time_millis if (self.__time_unit == TIME_UNIT_MILLIS) \

272 else self.total_fes

273 if tt < self.__t_collector[-1]:

274 raise ValueError(

275 f"Last time units {tt} inconsistent with last"

276 f"recorded time unit {self.__t_collector[-1]}.")

277 if self.__last_fe < self.total_fes:

278 if tt > self.__t_collector[-1]:

279 self.__t_collector.append(tt)

280 self.__f_collector.append(self.__f_collector[-1])

281 elif self.__last_fe > self.total_fes:

282 raise ValueError(

283 f"Last FE {self.__last_fe} inconsistent with total number"

284 f"{self.total_fes} of FEs.")

285

286 ff: np.ndarray

287 if self.__f_name == F_NAME_RAW:

288 ff = np.array(self.__f_collector)

289 elif self.__f_name == F_NAME_SCALED:

290 ff = np.array([f / f_standard for f in self.__f_collector])

291 else:

292 ff = np.array([(f - f_standard) / f_standard

293 for f in self.__f_collector])

294 self.__f_collector.clear()

295

296 return Progress(self.algorithm,

297 self.instance,

298 self.objective,

299 self.encoding,

300 self.rand_seed,

301 np.array(self.__t_collector),

302 self.__time_unit,

303 ff,

304 self.__f_name,

305 f_standard,

306 self.__only_improvements)

307

308 def _end_parse_file(self, file: Path) -> None:

309 """Clean up."""

310 self.__t_collector.clear()

311 self.__last_fe = None

312 super()._end_parse_file(file)

313

314 def _start_section(self, title: str) -> bool:

315 if title == SECTION_PROGRESS:

316 if self.__state != 0:

317 raise ValueError(f"Already did section {title}.")

318 self.__state = 1

319 return True

320 return super()._start_section(title)

321

322 def _needs_more_lines(self) -> bool:

323 return (self.__state < 2) or super()._needs_more_lines()

324

325 def _lines(self, lines: list[str]) -> bool:

326 if not isinstance(lines, list):

327 raise type_error(lines, "lines", list)

328 if self.__state != 1:

329 return super()._lines(lines)

330 n_rows = len(lines)

331 if n_rows < 2:

332 raise ValueError("lines must contain at least two elements,"

333 f"but contains {n_rows}.")

334

335 columns = [c.strip() for c in lines[0].split(CSV_SEPARATOR)]

336 n_cols = len(columns)

337 if n_cols < 3:

338 raise ValueError("There must be at least three columns, "

339 f"but found {n_cols} in {lines[0]!r}.")

340

341 time_col_name: str = PROGRESS_TIME_MILLIS if \

342 self.__time_unit == TIME_UNIT_MILLIS else PROGRESS_FES

343 time_col_idx: int = -1

344 f_col_idx: int = -1

345 fe_col_idx: int = -1

346 for idx, col in enumerate(columns): # find the columns we

347 if col == PROGRESS_FES:

348 fe_col_idx = idx

349 if col == time_col_name:

350 if time_col_idx >= 0:

351 raise ValueError(f"Time column {time_col_name} "

352 "appears twice.")

353 time_col_idx = idx

354 elif col == PROGRESS_CURRENT_F:

355 if f_col_idx >= 0:

356 raise ValueError(

357 f"F column {PROGRESS_CURRENT_F} "

358 "appears twice.")

359 f_col_idx = idx

360

361 def aa(splt): # noqa

362 return splt[time_col_idx], splt[f_col_idx]

363

364 time: Iterable[int]

365 f: Iterable[Any]

366 time, f = zip(*[[c.strip()

367 for c in aa(line.split(CSV_SEPARATOR))]

368 for line in lines[1:]], strict=True)

369 time = [int(t) for t in time]

370 f = [str_to_num(v) for v in f]

371 if self.__only_improvements:

372 biggest_t: int = -1

373 best_f: int | float = inf

374 for idx, t in enumerate(time):

375 v = f[idx]

376 if t > biggest_t:

377 if biggest_t >= 0:

378 self.__t_collector.append(biggest_t)

379 self.__f_collector.append(best_f)

380 best_f = v

381 biggest_t = t

382 elif v < best_f:

383 best_f = v

384 if biggest_t >= 0:

385 self.__t_collector.append(biggest_t)

386 self.__f_collector.append(best_f)

387 else:

388 self.__t_collector.extend(time)

389 self.__f_collector.extend(f)

390

391 self.__last_fe = int((lines[-1].split(CSV_SEPARATOR))[fe_col_idx])

392 if self.__last_fe <= 0:

393 raise ValueError(f"Last FE cannot be {self.__last_fe}.")

394

395 self.__state = 2

396 return self._needs_more_lines()

397

398

399def from_logs(path: str,

400 time_unit: str = TIME_UNIT_FES,

401 f_name: str = F_NAME_RAW,

402 f_standard: dict[str, int | float] | None = None,

403 only_improvements: bool = True,

404 path_filter: Callable[[Path], bool] | None = None) \

405 -> Generator[Progress, None, None]:

406 """

407 Parse a given path and pass yield all progress data found.

408

409 If `path` identifies a file with suffix `.txt`, then this file is

410 parsed. The appropriate :class:`Progress` is created. If `path` identifies

411 a directory, then this directory is parsed recursively for each log file

412 found, one record is returned.

413

414 :param path: the path to parse

415 :param time_unit: the time unit

416 :param f_name: the objective name

417 :param f_standard: a dictionary mapping instances to standard values

418 :param only_improvements: enforce that f-values should be improving and

419 time values increasing

420 :param path_filter: a function to filter paths

421 """

422 return __InnerLogParser(time_unit, f_name, f_standard,

423 only_improvements, path_filter).parse(path)

Coverage for moptipy / evaluation / progress.py: 68%

222 statements