Coverage for moptipy / utils / number_renderer.py: 83%

157 statements  

« prev     ^ index     » next       coverage.py v7.12.0, created at 2025-11-24 08:49 +0000

1"""The numeric format definitions.""" 

2 

3from math import inf, isfinite 

4from typing import Callable, Final, Iterable, cast 

5 

6from pycommons.types import check_int_range, type_error 

7 

8from moptipy.utils.formatted_string import FormattedStr 

9from moptipy.utils.lang import Lang 

10from moptipy.utils.math import try_int 

11 

12 

13def default_get_int_renderer() -> Callable[[int], str]: 

14 """ 

15 Get the default integer renderer. 

16 

17 :returns: the default integer renderer, which uses the integer rendering 

18 of the currently active language setting. 

19 

20 >>> from moptipy.utils.lang import EN, ZH 

21 >>> EN.set_current() 

22 >>> f = default_get_int_renderer() 

23 >>> f(1_000_000) 

24 "1'000'000" 

25 >>> ZH.set_current() 

26 >>> f = default_get_int_renderer() 

27 >>> f(1_000_000) 

28 "100'0000" 

29 """ 

30 return cast("Callable[[int], str]", Lang.current().format_int) 

31 

32 

33def default_get_float_format( 

34 min_finite: int | float = 0, 

35 max_finite: int | float = 0, 

36 max_frac_len: int = 2, 

37 min_non_zero_abs: int | float = inf, 

38 int_to_float_threshold: int | float = 10_000_000_000) -> str: 

39 """ 

40 Get the default float format for numbers in the given range. 

41 

42 :param min_finite: the minimum finite value that may need to be formatted 

43 :param max_finite: the maximum finite value that may need to be formatted 

44 :param max_frac_len: the length of the longest fractional part of any 

45 number encountered that can be converted to a string *not* in the "E" 

46 notation 

47 :param min_non_zero_abs: the minimum non-zero absolute value; will be 

48 `inf` if all absolute values are zero 

49 :param int_to_float_threshold: the threshold above which all integers are 

50 converted to floating point numbers with the 'E' notation 

51 

52 >>> default_get_float_format(0, 0, 0) 

53 '{:.0f}' 

54 >>> default_get_float_format(0, 1e5, 10) 

55 '{:.0f}' 

56 >>> default_get_float_format(-1e7, 1e2, 10) 

57 '{:.0f}' 

58 >>> default_get_float_format(0, 0, 1) 

59 '{:.1f}' 

60 >>> default_get_float_format(0, 1e3, 11) 

61 '{:.1f}' 

62 >>> default_get_float_format(-1e3, 1e2, 11) 

63 '{:.1f}' 

64 >>> default_get_float_format(0, 0, 2) 

65 '{:.2f}' 

66 >>> default_get_float_format(0, 0, 3) 

67 '{:.3f}' 

68 >>> default_get_float_format(0, 0, 4) 

69 '{:.3f}' 

70 >>> default_get_float_format(0, 1e11, 4) 

71 '{:.2e}' 

72 >>> default_get_float_format(-1, 1, 4, 1e-3) 

73 '{:.3f}' 

74 """ 

75 if not isinstance(min_finite, int | float): 

76 raise type_error(min_finite, "min_finite", (int, float)) 

77 if not isinstance(max_finite, int | float): 

78 raise type_error(max_finite, "max_finite", (int, float)) 

79 if not (isfinite(min_finite) and isfinite(max_finite) 

80 and (min_finite <= max_finite)): 

81 raise ValueError("invalid min_finite, max_finite pair " 

82 f"{min_finite}, {max_finite}.") 

83 check_int_range(max_frac_len, "max_frac_len", 0, 100) 

84 if not isinstance(int_to_float_threshold, int | float): 

85 raise type_error( 

86 int_to_float_threshold, "int_to_float_threshold", (int, float)) 

87 if (int_to_float_threshold <= 0) or not ( 

88 isfinite(int_to_float_threshold) 

89 or (int_to_float_threshold >= inf)): 

90 raise ValueError( 

91 f"invalid int_to_float_threshold={int_to_float_threshold}.") 

92 if not isinstance(min_non_zero_abs, int | float): 

93 raise type_error(min_non_zero_abs, "min_non_zero_abs", (int, float)) 

94 if min_non_zero_abs <= 0: 

95 raise ValueError(f"invalid min_non_zero_abs={min_non_zero_abs}") 

96 

97 # are the values in the [-1, 1] range, i.e., possibly just small fractions? 

98 if (min_finite >= -1) and (max_finite <= 1) and isfinite(min_non_zero_abs): 

99 if min_non_zero_abs >= 1e-1: 

100 return "{:.1f}" 

101 if min_non_zero_abs >= 1e-2: 

102 return "{:.2f}" 

103 if min_non_zero_abs >= 1e-3: 

104 return "{:.3f}" 

105 if min_non_zero_abs >= 1e-4: 

106 return "{:.4f}" 

107 return "{:.3e}" 

108 

109 # handle numbers that are outside [-1, 1] 

110 if ((-int_to_float_threshold) <= min_finite) \ 

111 and (max_finite <= int_to_float_threshold): 

112 if (max_frac_len <= 0) or (min_finite <= -1E4) or (max_finite >= 1E4): 

113 return "{:.0f}" 

114 if (max_frac_len <= 1) or (min_finite <= -1E3) or (max_finite >= 1E3): 

115 return "{:.1f}" 

116 if (max_frac_len <= 2) or (min_finite <= -1E2) or (max_finite >= 1E2): 

117 return "{:.2f}" 

118 return "{:.3f}" 

119 return "{:.2e}" 

120 

121 

122class NumberRenderer: 

123 """ 

124 A format description for a group of numbers. 

125 

126 With instances of this class, you can convert a sequence of numbers 

127 to a sequence of strings with uniform, pleasant formatting. The idea 

128 is that such numbers can be written, e.g., into a column of a table 

129 and that this column will then have a nice and uniform appearance. 

130 In other words, we will avoid situations like the following: 

131 "1234938845, 1e-20, 0.002, 34757773, 1e30, 0.9998837467" 

132 which looks rather odd. While the numbers may be displayed correctly, 

133 the formatting of all numbers is different. If we want to present 

134 numbers that describe related quantities, we rather want them to all 

135 have the same format. This class here can achieve this in a customizable 

136 way. 

137 """ 

138 

139 def __init__(self, 

140 int_to_float_threshold: int | float = 10_000_000_000, 

141 get_int_renderer: Callable[[], Callable[[int], str]] 

142 = default_get_int_renderer, 

143 get_float_format: Callable[ 

144 [int | float, int | float, int, 

145 int | float, int | float], str] = 

146 default_get_float_format): 

147 """ 

148 Create the number group format. 

149 

150 :param int_to_float_threshold: the threshold above which integers are 

151 converted to floating point numbers in the 'E' notation. 

152 :param get_int_renderer: the function to be used to get the renderer 

153 for all integers and integer parts of floats. 

154 :param get_float_format: the getter for the float format, i.e., a 

155 callable accepting the range [min, max] of all finite values to be 

156 rendered, the maximum length of any fractional part, and the 

157 `int_to_float_threshold` value and that then returns a string with 

158 the float format definition 

159 """ 

160 super().__init__() 

161 while True: 

162 if not isinstance(int_to_float_threshold, int | float): 

163 raise type_error(int_to_float_threshold, 

164 "int_to_float_threshold", (int, float)) 

165 if (int_to_float_threshold <= 0) or not ( 

166 isfinite(int_to_float_threshold) 

167 or (int_to_float_threshold >= inf)): 

168 raise ValueError("invalid int_to_float_threshold=" 

169 f"{int_to_float_threshold}.") 

170 if isinstance(int_to_float_threshold, float): 

171 a = int(int_to_float_threshold) 

172 if a == int_to_float_threshold: 

173 int_to_float_threshold = a 

174 else: 

175 break 

176 #: the absolute threshold above which all integer numbers must be 

177 #: converted to floats to render them in the 'E' notation 

178 self.int_to_float_threshold: Final[int | float] \ 

179 = int_to_float_threshold 

180 if not callable(get_int_renderer): 

181 raise type_error(get_int_renderer, "int_renderer", call=True) 

182 #: the function to be used to get the renderer for all integers and 

183 #: integer parts of floats 

184 self.get_int_renderer: Final[Callable[[], Callable[[int], str]]] \ 

185 = get_int_renderer 

186 #: the getter for the float format to be used to represent a range of 

187 #: values 

188 self.get_float_format: Final[Callable[ 

189 [int | float, int | float, int, 

190 int | float, int | float], str]] = get_float_format 

191 

192 def derive(self, 

193 int_to_float_threshold: int | float | None = None, 

194 get_int_renderer: Callable[[], Callable[ 

195 [int], str]] | None = None, 

196 get_float_format: Callable[[int | float, int | float, int, 

197 int | float, int | float], 

198 str] | None = None) \ 

199 -> "NumberRenderer": 

200 """ 

201 Derive a new number group format from this one. 

202 

203 :param int_to_float_threshold: the int-to-float threshold 

204 :param get_int_renderer: the integer renderer getter 

205 :param get_float_format: the float format getter 

206 :returns: a new number group format that differs from the current 

207 format only in terms of the non-`None` parameters specified 

208 

209 >>> d = DEFAULT_NUMBER_RENDERER 

210 >>> d.derive() is d 

211 True 

212 >>> d.int_to_float_threshold 

213 10000000000 

214 >>> from moptipy.utils.lang import EN 

215 >>> EN.set_current() 

216 >>> d.get_int_renderer()(123456789) 

217 "123'456'789" 

218 >>> d.get_float_format(-10, 10, 2) 

219 '{:.2f}' 

220 >>> d = d.derive(int_to_float_threshold=22) 

221 >>> d is DEFAULT_NUMBER_RENDERER 

222 False 

223 >>> d.int_to_float_threshold 

224 22 

225 >>> d = d.derive(get_int_renderer=lambda: lambda x: "bla") 

226 >>> d.get_int_renderer()(112) 

227 'bla' 

228 """ 

229 # pylint: disable=R0916 

230 if (((int_to_float_threshold is None) 

231 or (int_to_float_threshold == self.int_to_float_threshold)) 

232 and ((get_int_renderer is None) 

233 or (get_int_renderer is self.get_int_renderer)) 

234 and ((get_float_format is None) 

235 or (get_float_format is self.get_float_format))): 

236 return self 

237 return NumberRenderer( 

238 self.int_to_float_threshold if 

239 int_to_float_threshold is None else int_to_float_threshold, 

240 self.get_int_renderer if get_int_renderer is None 

241 else get_int_renderer, 

242 self.get_float_format if get_float_format is None 

243 else get_float_format) 

244 

245 def render(self, source: int | float | Iterable[int | float | None] | None, 

246 none_str: FormattedStr | None = None) \ 

247 -> list[FormattedStr | None]: 

248 r""" 

249 Convert a sequence of numbers to text with uniform shape. 

250 

251 Often, we need to convert a set of numbers to strings as output for a 

252 table or another representative thext. In such a case, you want to 

253 present all numbers in the set in the same format. 

254 

255 Imagine you have the number vector `[1E-4, 1/7, 123456789012345678]`. 

256 If you simply convert this list to a string directly, what you get is 

257 `[0.0001, 0.14285714285714285, 123456789012345678]`. Now this looks 

258 very ugly. First, we have one very big number `123456789012345678`. 

259 If the numbers stem from an experiment, then we are hardly able to 

260 obtain any number at a very extreme precision. The 18 digits in 

261 `123456789012345678` sort of suggest a precision to 18 decimals, since 

262 the number ends in specific digits (as opposed to `123450000000000000` 

263 which a reader would naturally preceive as a rounded quantity). 

264 Additionally, we have the number `0.14285714285714285`, which has a 

265 very long fractional part, which, too, suggests a very high precision. 

266 Writing both mentioned numbers next to each other, this suggests as if 

267 we could present a number as high as 10**18 at a precision of 10**-17. 

268 And it also looks ugly, because both numbers are not uniformly 

269 formatted. Instead, our function here renders the number list as 

270 `['1.00*10^-4^', '1.43*10^-1^', '1.23*10^17^']`. It recognizes that we 

271 should present numbers as powers of ten and then limits the precision 

272 to three digits. 

273 

274 This function is thus intended to produce some sort of uniform format 

275 with reasonable precision uniformly for a numerical vector, under the 

276 assumption that all numbers should be presented in the same numerical 

277 range and quantity. 

278 

279 :param source: the column data 

280 :param none_str: the string replacement for `None` 

281 :returns: a list with the text representation 

282 

283 >>> from moptipy.utils.lang import EN 

284 >>> EN.set_current() 

285 >>> ff = DEFAULT_NUMBER_RENDERER 

286 >>> ff.render([1.75651, 212, 3234234]) 

287 ['2', '212', "3'234'234"] 

288 >>> ff.render([1.75651, 22, 34]) 

289 ['1.757', '22.000', '34.000'] 

290 >>> ff.render([1.75651, 122, 34]) 

291 ['1.76', '122.00', '34.00'] 

292 >>> ff.render([1.75651, 122, 3334]) 

293 ['1.8', '122.0', "3'334.0"] 

294 >>> ff.render([1.5, 212, 3234234]) 

295 ['2', '212', "3'234'234"] 

296 >>> ff.render([1.5, 2e12, 3234234]) 

297 ['1.50e0', '2.00e12', '3.23e6'] 

298 >>> ff.render([233, 22139283482834, 3234234]) 

299 ['2.33e2', '2.21e13', '3.23e6'] 

300 >>> ff.render([233, 22139283, 3234234]) 

301 ['233', "22'139'283", "3'234'234"] 

302 >>> from math import nan, inf 

303 >>> ff.render([22139283, inf, -inf, nan, None]) 

304 ["22'139'283", 'inf', '-inf', 'nan', None] 

305 >>> ff.render([1E-4, 1/7, 123456789012345678]) 

306 ['1.00e-4', '1.43e-1', '1.23e17'] 

307 >>> ff.render([0, 0.02, 0.1, 1e-3]) 

308 ['0.000', '0.020', '0.100', '0.001'] 

309 >>> ff.render([-0.2, 1e-6, 0.9]) 

310 ['-2.000e-1', '1.000e-6', '9.000e-1'] 

311 """ 

312 if (source is None) or isinstance(source, int | float): 

313 source = [source] 

314 if not isinstance(source, Iterable): 

315 raise type_error(source, "source", Iterable) 

316 if (none_str is not None) and ( 

317 not isinstance(none_str, FormattedStr)): 

318 raise type_error(none_str, "none_str", (FormattedStr, None)) 

319 

320 # get the format parameters 

321 int_renderer: Final[Callable[[int], str]] = \ 

322 self.get_int_renderer() 

323 if not callable(int_renderer): 

324 raise type_error(int_renderer, "int_renderer", call=True) 

325 int_to_float_threshold: Final[int | float] \ 

326 = self.int_to_float_threshold 

327 

328 # step one: get the raw numerical data 

329 data: Final[list[int | float | None]] = \ 

330 cast("list", source) if isinstance(source, list) else list(source) 

331 dlen: Final[int] = len(data) 

332 if dlen <= 0: 

333 raise ValueError("Data cannot be empty.") 

334 

335 # step two: investigate the data ranges and structure 

336 all_is_none: bool = True 

337 all_is_int: bool = True 

338 max_finite: int | float = -inf 

339 min_finite: int | float = inf 

340 min_non_zero_abs: int | float = inf 

341 longest_fraction: int = -1 

342 da: int | float 

343 

344 for idx, d in enumerate(data): 

345 if d is None: 

346 continue 

347 all_is_none = False 

348 d2 = try_int(d) if isfinite(d) else d 

349 if isinstance(d2, int): 

350 min_finite = min(min_finite, d2) 

351 max_finite = max(max_finite, d2) 

352 da = abs(d2) 

353 if 0 < da < min_non_zero_abs: 

354 min_non_zero_abs = da 

355 if not ((-int_to_float_threshold) <= d2 

356 <= int_to_float_threshold): 

357 d2 = float(d2) 

358 if d2 is not d: 

359 data[idx] = d2 

360 

361 if isfinite(d2): 

362 if not isinstance(d2, int): 

363 all_is_int = False 

364 s = str(d2) 

365 if not (("E" in s) or ("e" in s)): 

366 i = s.find(".") 

367 if i >= 0: 

368 i = len(s) - i - 1 

369 longest_fraction = max(longest_fraction, i) 

370 min_finite = min(min_finite, d2) 

371 max_finite = max(max_finite, d2) 

372 da = abs(d2) 

373 if 0 < da < min_non_zero_abs: 

374 min_non_zero_abs = da 

375 

376 # step three: if all data is None, we can return here 

377 if all_is_none: 

378 return [none_str] * dlen 

379 

380 # create the protected integer renderer 

381 def __toint(value: int, form=int_renderer) -> str: 

382 sv: str = form(value).strip() 

383 if (sv is not None) and (not isinstance(sv, str)): 

384 raise type_error(s, f"conversion of {value}", (str, None)) 

385 return sv 

386 

387 # step four: if all data are integer, we can convert them directly 

388 if all_is_int: 

389 # an int render also processing None and special floats 

390 def __toint2(value: int | float | None, _ns=none_str, 

391 form=__toint) -> FormattedStr | None: 

392 if value is None: 

393 return none_str 

394 return FormattedStr.number(form(cast("int", value)) 

395 if isfinite(value) else value) 

396 return [__toint2(i) for i in data] 

397 

398 # ok, we have at least some finite floats that cannot be converted to 

399 # integers. therefore, we need to convert them to strings based on a 

400 # floating point number format. 

401 float_format = self.get_float_format( 

402 min_finite, max_finite, longest_fraction, min_non_zero_abs, 

403 int_to_float_threshold) 

404 if not isinstance(float_format, str): 

405 raise type_error(float_format, 

406 "float format from float_format_getter", str) 

407 if (len(float_format) <= 0) or ("{" not in float_format) \ 

408 or ("}" not in float_format) or (":" not in float_format): 

409 raise ValueError(f"invalid float format {float_format!r}.") 

410 

411 def __render_float(value: int | float, ir=__toint, 

412 ff=float_format) -> FormattedStr: 

413 if value is None: 

414 return none_str 

415 if isfinite(value): 

416 res: str = ff.format(value).strip() 

417 int_part: str 

418 frac_part: str = "" 

419 exp_part: str = "" 

420 eidx: int = res.find("e") 

421 if eidx < 0: 

422 eidx = res.find("E") 

423 if eidx >= 0: 

424 exp_part = f"e{ir(int(res[eidx + 1:])).strip()}" 

425 res = res[:eidx].strip() 

426 dotidx: int = res.find(".") 

427 if dotidx <= 0: 

428 int_part = ir(int(res)) 

429 else: 

430 int_part = ir(int(res[:dotidx])) 

431 frac_part = res[dotidx:].strip() 

432 if len(int_part) <= 0: 

433 int_part = "0" 

434 return FormattedStr.number(f"{int_part}{frac_part}{exp_part}") 

435 return FormattedStr.number(value) 

436 

437 # step five: first, create the raw float strings and mark special 

438 # values 

439 return [__render_float(value) for value in data] 

440 

441 

442#: the default shared singleton instance of the number group format 

443DEFAULT_NUMBER_RENDERER: Final[NumberRenderer] = NumberRenderer()