Coverage for moptipy/utils/number

1"""The numeric format definitions."""

3from math import inf, isfinite

4from typing import Callable, Final, Iterable, cast

6from pycommons.types import check_int_range, type_error

8from moptipy.utils.formatted_string import FormattedStr

9from moptipy.utils.lang import Lang

10from moptipy.utils.math import try_int

13def default_get_int_renderer() -> Callable[[int], str]:

14 """

15 Get the default integer renderer.

17 :returns: the default integer renderer, which uses the integer rendering

18 of the currently active language setting.

20 >>> from moptipy.utils.lang import EN, ZH

21 >>> EN.set_current()

22 >>> f = default_get_int_renderer()

23 >>> f(1_000_000)

24 "1'000'000"

25 >>> ZH.set_current()

26 >>> f = default_get_int_renderer()

27 >>> f(1_000_000)

28 "100'0000"

29 """

30 return cast("Callable[[int], str]", Lang.current().format_int)

33def default_get_float_format(

34 min_finite: int | float = 0,

35 max_finite: int | float = 0,

36 max_frac_len: int = 2,

37 min_non_zero_abs: int | float = inf,

38 int_to_float_threshold: int | float = 10_000_000_000) -> str:

39 """

40 Get the default float format for numbers in the given range.

42 :param min_finite: the minimum finite value that may need to be formatted

43 :param max_finite: the maximum finite value that may need to be formatted

44 :param max_frac_len: the length of the longest fractional part of any

45 number encountered that can be converted to a string *not* in the "E"

46 notation

47 :param min_non_zero_abs: the minimum non-zero absolute value; will be

48 `inf` if all absolute values are zero

49 :param int_to_float_threshold: the threshold above which all integers are

50 converted to floating point numbers with the 'E' notation

52 >>> default_get_float_format(0, 0, 0)

53 '{:.0f}'

54 >>> default_get_float_format(0, 1e5, 10)

55 '{:.0f}'

56 >>> default_get_float_format(-1e7, 1e2, 10)

57 '{:.0f}'

58 >>> default_get_float_format(0, 0, 1)

59 '{:.1f}'

60 >>> default_get_float_format(0, 1e3, 11)

61 '{:.1f}'

62 >>> default_get_float_format(-1e3, 1e2, 11)

63 '{:.1f}'

64 >>> default_get_float_format(0, 0, 2)

65 '{:.2f}'

66 >>> default_get_float_format(0, 0, 3)

67 '{:.3f}'

68 >>> default_get_float_format(0, 0, 4)

69 '{:.3f}'

70 >>> default_get_float_format(0, 1e11, 4)

71 '{:.2e}'

72 >>> default_get_float_format(-1, 1, 4, 1e-3)

73 '{:.3f}'

74 """

75 if not isinstance(min_finite, int | float):

76 raise type_error(min_finite, "min_finite", (int, float))

77 if not isinstance(max_finite, int | float):

78 raise type_error(max_finite, "max_finite", (int, float))

79 if not (isfinite(min_finite) and isfinite(max_finite)

80 and (min_finite <= max_finite)):

81 raise ValueError("invalid min_finite, max_finite pair "

82 f"{min_finite}, {max_finite}.")

83 check_int_range(max_frac_len, "max_frac_len", 0, 100)

84 if not isinstance(int_to_float_threshold, int | float):

85 raise type_error(

86 int_to_float_threshold, "int_to_float_threshold", (int, float))

87 if (int_to_float_threshold <= 0) or not (

88 isfinite(int_to_float_threshold)

89 or (int_to_float_threshold >= inf)):

90 raise ValueError(

91 f"invalid int_to_float_threshold={int_to_float_threshold}.")

92 if not isinstance(min_non_zero_abs, int | float):

93 raise type_error(min_non_zero_abs, "min_non_zero_abs", (int, float))

94 if min_non_zero_abs <= 0:

95 raise ValueError(f"invalid min_non_zero_abs={min_non_zero_abs}")

97 # are the values in the [-1, 1] range, i.e., possibly just small fractions?

98 if (min_finite >= -1) and (max_finite <= 1) and isfinite(min_non_zero_abs):

99 if min_non_zero_abs >= 1e-1:

100 return "{:.1f}"

101 if min_non_zero_abs >= 1e-2:

102 return "{:.2f}"

103 if min_non_zero_abs >= 1e-3:

104 return "{:.3f}"

105 if min_non_zero_abs >= 1e-4:

106 return "{:.4f}"

107 return "{:.3e}"

108

109 # handle numbers that are outside [-1, 1]

110 if ((-int_to_float_threshold) <= min_finite) \

111 and (max_finite <= int_to_float_threshold):

112 if (max_frac_len <= 0) or (min_finite <= -1E4) or (max_finite >= 1E4):

113 return "{:.0f}"

114 if (max_frac_len <= 1) or (min_finite <= -1E3) or (max_finite >= 1E3):

115 return "{:.1f}"

116 if (max_frac_len <= 2) or (min_finite <= -1E2) or (max_finite >= 1E2):

117 return "{:.2f}"

118 return "{:.3f}"

119 return "{:.2e}"

120

121

122class NumberRenderer:

123 """

124 A format description for a group of numbers.

125

126 With instances of this class, you can convert a sequence of numbers

127 to a sequence of strings with uniform, pleasant formatting. The idea

128 is that such numbers can be written, e.g., into a column of a table

129 and that this column will then have a nice and uniform appearance.

130 In other words, we will avoid situations like the following:

131 "1234938845, 1e-20, 0.002, 34757773, 1e30, 0.9998837467"

132 which looks rather odd. While the numbers may be displayed correctly,

133 the formatting of all numbers is different. If we want to present

134 numbers that describe related quantities, we rather want them to all

135 have the same format. This class here can achieve this in a customizable

136 way.

137 """

138

139 def __init__(self,

140 int_to_float_threshold: int | float = 10_000_000_000,

141 get_int_renderer: Callable[[], Callable[[int], str]]

142 = default_get_int_renderer,

143 get_float_format: Callable[

144 [int | float, int | float, int,

145 int | float, int | float], str] =

146 default_get_float_format):

147 """

148 Create the number group format.

149

150 :param int_to_float_threshold: the threshold above which integers are

151 converted to floating point numbers in the 'E' notation.

152 :param get_int_renderer: the function to be used to get the renderer

153 for all integers and integer parts of floats.

154 :param get_float_format: the getter for the float format, i.e., a

155 callable accepting the range [min, max] of all finite values to be

156 rendered, the maximum length of any fractional part, and the

157 `int_to_float_threshold` value and that then returns a string with

158 the float format definition

159 """

160 super().__init__()

161 while True:

162 if not isinstance(int_to_float_threshold, int | float):

163 raise type_error(int_to_float_threshold,

164 "int_to_float_threshold", (int, float))

165 if (int_to_float_threshold <= 0) or not (

166 isfinite(int_to_float_threshold)

167 or (int_to_float_threshold >= inf)):

168 raise ValueError("invalid int_to_float_threshold="

169 f"{int_to_float_threshold}.")

170 if isinstance(int_to_float_threshold, float):

171 a = int(int_to_float_threshold)

172 if a == int_to_float_threshold:

173 int_to_float_threshold = a

174 else:

175 break

176 #: the absolute threshold above which all integer numbers must be

177 #: converted to floats to render them in the 'E' notation

178 self.int_to_float_threshold: Final[int | float] \

179 = int_to_float_threshold

180 if not callable(get_int_renderer):

181 raise type_error(get_int_renderer, "int_renderer", call=True)

182 #: the function to be used to get the renderer for all integers and

183 #: integer parts of floats

184 self.get_int_renderer: Final[Callable[[], Callable[[int], str]]] \

185 = get_int_renderer

186 #: the getter for the float format to be used to represent a range of

187 #: values

188 self.get_float_format: Final[Callable[

189 [int | float, int | float, int,

190 int | float, int | float], str]] = get_float_format

191

192 def derive(self,

193 int_to_float_threshold: int | float | None = None,

194 get_int_renderer: Callable[[], Callable[

195 [int], str]] | None = None,

196 get_float_format: Callable[[int | float, int | float, int,

197 int | float, int | float],

198 str] | None = None) \

199 -> "NumberRenderer":

200 """

201 Derive a new number group format from this one.

202

203 :param int_to_float_threshold: the int-to-float threshold

204 :param get_int_renderer: the integer renderer getter

205 :param get_float_format: the float format getter

206 :returns: a new number group format that differs from the current

207 format only in terms of the non-`None` parameters specified

208

209 >>> d = DEFAULT_NUMBER_RENDERER

210 >>> d.derive() is d

211 True

212 >>> d.int_to_float_threshold

213 10000000000

214 >>> from moptipy.utils.lang import EN

215 >>> EN.set_current()

216 >>> d.get_int_renderer()(123456789)

217 "123'456'789"

218 >>> d.get_float_format(-10, 10, 2)

219 '{:.2f}'

220 >>> d = d.derive(int_to_float_threshold=22)

221 >>> d is DEFAULT_NUMBER_RENDERER

222 False

223 >>> d.int_to_float_threshold

224 22

225 >>> d = d.derive(get_int_renderer=lambda: lambda x: "bla")

226 >>> d.get_int_renderer()(112)

227 'bla'

228 """

229 # pylint: disable=R0916

230 if (((int_to_float_threshold is None)

231 or (int_to_float_threshold == self.int_to_float_threshold))

232 and ((get_int_renderer is None)

233 or (get_int_renderer is self.get_int_renderer))

234 and ((get_float_format is None)

235 or (get_float_format is self.get_float_format))):

236 return self

237 return NumberRenderer(

238 self.int_to_float_threshold if

239 int_to_float_threshold is None else int_to_float_threshold,

240 self.get_int_renderer if get_int_renderer is None

241 else get_int_renderer,

242 self.get_float_format if get_float_format is None

243 else get_float_format)

244

246 none_str: FormattedStr | None = None) \

247 -> list[FormattedStr | None]:

248 r"""

249 Convert a sequence of numbers to text with uniform shape.

250

251 Often, we need to convert a set of numbers to strings as output for a

252 table or another representative thext. In such a case, you want to

253 present all numbers in the set in the same format.

254

255 Imagine you have the number vector `[1E-4, 1/7, 123456789012345678]`.

256 If you simply convert this list to a string directly, what you get is

257 `[0.0001, 0.14285714285714285, 123456789012345678]`. Now this looks

258 very ugly. First, we have one very big number `123456789012345678`.

259 If the numbers stem from an experiment, then we are hardly able to

260 obtain any number at a very extreme precision. The 18 digits in

261 `123456789012345678` sort of suggest a precision to 18 decimals, since

262 the number ends in specific digits (as opposed to `123450000000000000`

263 which a reader would naturally preceive as a rounded quantity).

264 Additionally, we have the number `0.14285714285714285`, which has a

265 very long fractional part, which, too, suggests a very high precision.

266 Writing both mentioned numbers next to each other, this suggests as if

267 we could present a number as high as 10**18 at a precision of 10**-17.

268 And it also looks ugly, because both numbers are not uniformly

269 formatted. Instead, our function here renders the number list as

270 `['1.00*10^-4^', '1.43*10^-1^', '1.23*10^17^']`. It recognizes that we

271 should present numbers as powers of ten and then limits the precision

272 to three digits.

273

274 This function is thus intended to produce some sort of uniform format

275 with reasonable precision uniformly for a numerical vector, under the

276 assumption that all numbers should be presented in the same numerical

277 range and quantity.

278

279 :param source: the column data

280 :param none_str: the string replacement for `None`

281 :returns: a list with the text representation

282

283 >>> from moptipy.utils.lang import EN

284 >>> EN.set_current()

285 >>> ff = DEFAULT_NUMBER_RENDERER

286 >>> ff.render([1.75651, 212, 3234234])

287 ['2', '212', "3'234'234"]

288 >>> ff.render([1.75651, 22, 34])

289 ['1.757', '22.000', '34.000']

290 >>> ff.render([1.75651, 122, 34])

291 ['1.76', '122.00', '34.00']

292 >>> ff.render([1.75651, 122, 3334])

293 ['1.8', '122.0', "3'334.0"]

294 >>> ff.render([1.5, 212, 3234234])

295 ['2', '212', "3'234'234"]

296 >>> ff.render([1.5, 2e12, 3234234])

297 ['1.50e0', '2.00e12', '3.23e6']

298 >>> ff.render([233, 22139283482834, 3234234])

299 ['2.33e2', '2.21e13', '3.23e6']

300 >>> ff.render([233, 22139283, 3234234])

301 ['233', "22'139'283", "3'234'234"]

302 >>> from math import nan, inf

303 >>> ff.render([22139283, inf, -inf, nan, None])

304 ["22'139'283", 'inf', '-inf', 'nan', None]

305 >>> ff.render([1E-4, 1/7, 123456789012345678])

306 ['1.00e-4', '1.43e-1', '1.23e17']

307 >>> ff.render([0, 0.02, 0.1, 1e-3])

308 ['0.000', '0.020', '0.100', '0.001']

309 >>> ff.render([-0.2, 1e-6, 0.9])

310 ['-2.000e-1', '1.000e-6', '9.000e-1']

311 """

312 if (source is None) or isinstance(source, int | float):

313 source = [source]

314 if not isinstance(source, Iterable):

315 raise type_error(source, "source", Iterable)

316 if (none_str is not None) and (

317 not isinstance(none_str, FormattedStr)):

318 raise type_error(none_str, "none_str", (FormattedStr, None))

319

320 # get the format parameters

321 int_renderer: Final[Callable[[int], str]] = \

322 self.get_int_renderer()

323 if not callable(int_renderer):

324 raise type_error(int_renderer, "int_renderer", call=True)

325 int_to_float_threshold: Final[int | float] \

326 = self.int_to_float_threshold

327

328 # step one: get the raw numerical data

329 data: Final[list[int | float | None]] = \

330 cast("list", source) if isinstance(source, list) else list(source)

331 dlen: Final[int] = len(data)

332 if dlen <= 0:

333 raise ValueError("Data cannot be empty.")

334

335 # step two: investigate the data ranges and structure

336 all_is_none: bool = True

337 all_is_int: bool = True

338 max_finite: int | float = -inf

339 min_finite: int | float = inf

340 min_non_zero_abs: int | float = inf

341 longest_fraction: int = -1

342 da: int | float

343

344 for idx, d in enumerate(data):

345 if d is None:

346 continue

347 all_is_none = False

348 d2 = try_int(d) if isfinite(d) else d

349 if isinstance(d2, int):

350 min_finite = min(min_finite, d2)

351 max_finite = max(max_finite, d2)

352 da = abs(d2)

353 if 0 < da < min_non_zero_abs:

354 min_non_zero_abs = da

355 if not ((-int_to_float_threshold) <= d2

356 <= int_to_float_threshold):

357 d2 = float(d2)

358 if d2 is not d:

359 data[idx] = d2

360

361 if isfinite(d2):

362 if not isinstance(d2, int):

363 all_is_int = False

364 s = str(d2)

365 if not (("E" in s) or ("e" in s)):

366 i = s.find(".")

367 if i >= 0:

368 i = len(s) - i - 1

369 longest_fraction = max(longest_fraction, i)

370 min_finite = min(min_finite, d2)

371 max_finite = max(max_finite, d2)

372 da = abs(d2)

373 if 0 < da < min_non_zero_abs:

374 min_non_zero_abs = da

375

376 # step three: if all data is None, we can return here

377 if all_is_none:

378 return [none_str] * dlen

379

380 # create the protected integer renderer

381 def __toint(value: int, form=int_renderer) -> str:

382 sv: str = form(value).strip()

383 if (sv is not None) and (not isinstance(sv, str)):

384 raise type_error(s, f"conversion of {value}", (str, None))

385 return sv

386

387 # step four: if all data are integer, we can convert them directly

388 if all_is_int:

389 # an int render also processing None and special floats

390 def __toint2(value: int | float | None, _ns=none_str,

391 form=__toint) -> FormattedStr | None:

392 if value is None:

393 return none_str

394 return FormattedStr.number(form(cast("int", value))

395 if isfinite(value) else value)

396 return [__toint2(i) for i in data]

397

398 # ok, we have at least some finite floats that cannot be converted to

399 # integers. therefore, we need to convert them to strings based on a

400 # floating point number format.

401 float_format = self.get_float_format(

402 min_finite, max_finite, longest_fraction, min_non_zero_abs,

403 int_to_float_threshold)

404 if not isinstance(float_format, str):

405 raise type_error(float_format,

406 "float format from float_format_getter", str)

407 if (len(float_format) <= 0) or ("{" not in float_format) \

408 or ("}" not in float_format) or (":" not in float_format):

409 raise ValueError(f"invalid float format {float_format!r}.")

410

411 def __render_float(value: int | float, ir=__toint,

412 ff=float_format) -> FormattedStr:

413 if value is None:

414 return none_str

415 if isfinite(value):

416 res: str = ff.format(value).strip()

417 int_part: str

418 frac_part: str = ""

419 exp_part: str = ""

420 eidx: int = res.find("e")

421 if eidx < 0:

422 eidx = res.find("E")

423 if eidx >= 0:

424 exp_part = f"e{ir(int(res[eidx + 1:])).strip()}"

425 res = res[:eidx].strip()

426 dotidx: int = res.find(".")

427 if dotidx <= 0:

428 int_part = ir(int(res))

429 else:

430 int_part = ir(int(res[:dotidx]))

431 frac_part = res[dotidx:].strip()

432 if len(int_part) <= 0:

433 int_part = "0"

434 return FormattedStr.number(f"{int_part}{frac_part}{exp_part}")

435 return FormattedStr.number(value)

436

437 # step five: first, create the raw float strings and mark special

438 # values

439 return [__render_float(value) for value in data]

440

441

442#: the default shared singleton instance of the number group format

443DEFAULT_NUMBER_RENDERER: Final[NumberRenderer] = NumberRenderer()

Coverage for moptipy / utils / number_renderer.py: 83%

157 statements