Coverage for moptipy / utils / number_renderer.py: 83%
157 statements
« prev ^ index » next coverage.py v7.12.0, created at 2025-11-24 08:49 +0000
« prev ^ index » next coverage.py v7.12.0, created at 2025-11-24 08:49 +0000
1"""The numeric format definitions."""
3from math import inf, isfinite
4from typing import Callable, Final, Iterable, cast
6from pycommons.types import check_int_range, type_error
8from moptipy.utils.formatted_string import FormattedStr
9from moptipy.utils.lang import Lang
10from moptipy.utils.math import try_int
13def default_get_int_renderer() -> Callable[[int], str]:
14 """
15 Get the default integer renderer.
17 :returns: the default integer renderer, which uses the integer rendering
18 of the currently active language setting.
20 >>> from moptipy.utils.lang import EN, ZH
21 >>> EN.set_current()
22 >>> f = default_get_int_renderer()
23 >>> f(1_000_000)
24 "1'000'000"
25 >>> ZH.set_current()
26 >>> f = default_get_int_renderer()
27 >>> f(1_000_000)
28 "100'0000"
29 """
30 return cast("Callable[[int], str]", Lang.current().format_int)
33def default_get_float_format(
34 min_finite: int | float = 0,
35 max_finite: int | float = 0,
36 max_frac_len: int = 2,
37 min_non_zero_abs: int | float = inf,
38 int_to_float_threshold: int | float = 10_000_000_000) -> str:
39 """
40 Get the default float format for numbers in the given range.
42 :param min_finite: the minimum finite value that may need to be formatted
43 :param max_finite: the maximum finite value that may need to be formatted
44 :param max_frac_len: the length of the longest fractional part of any
45 number encountered that can be converted to a string *not* in the "E"
46 notation
47 :param min_non_zero_abs: the minimum non-zero absolute value; will be
48 `inf` if all absolute values are zero
49 :param int_to_float_threshold: the threshold above which all integers are
50 converted to floating point numbers with the 'E' notation
52 >>> default_get_float_format(0, 0, 0)
53 '{:.0f}'
54 >>> default_get_float_format(0, 1e5, 10)
55 '{:.0f}'
56 >>> default_get_float_format(-1e7, 1e2, 10)
57 '{:.0f}'
58 >>> default_get_float_format(0, 0, 1)
59 '{:.1f}'
60 >>> default_get_float_format(0, 1e3, 11)
61 '{:.1f}'
62 >>> default_get_float_format(-1e3, 1e2, 11)
63 '{:.1f}'
64 >>> default_get_float_format(0, 0, 2)
65 '{:.2f}'
66 >>> default_get_float_format(0, 0, 3)
67 '{:.3f}'
68 >>> default_get_float_format(0, 0, 4)
69 '{:.3f}'
70 >>> default_get_float_format(0, 1e11, 4)
71 '{:.2e}'
72 >>> default_get_float_format(-1, 1, 4, 1e-3)
73 '{:.3f}'
74 """
75 if not isinstance(min_finite, int | float):
76 raise type_error(min_finite, "min_finite", (int, float))
77 if not isinstance(max_finite, int | float):
78 raise type_error(max_finite, "max_finite", (int, float))
79 if not (isfinite(min_finite) and isfinite(max_finite)
80 and (min_finite <= max_finite)):
81 raise ValueError("invalid min_finite, max_finite pair "
82 f"{min_finite}, {max_finite}.")
83 check_int_range(max_frac_len, "max_frac_len", 0, 100)
84 if not isinstance(int_to_float_threshold, int | float):
85 raise type_error(
86 int_to_float_threshold, "int_to_float_threshold", (int, float))
87 if (int_to_float_threshold <= 0) or not (
88 isfinite(int_to_float_threshold)
89 or (int_to_float_threshold >= inf)):
90 raise ValueError(
91 f"invalid int_to_float_threshold={int_to_float_threshold}.")
92 if not isinstance(min_non_zero_abs, int | float):
93 raise type_error(min_non_zero_abs, "min_non_zero_abs", (int, float))
94 if min_non_zero_abs <= 0:
95 raise ValueError(f"invalid min_non_zero_abs={min_non_zero_abs}")
97 # are the values in the [-1, 1] range, i.e., possibly just small fractions?
98 if (min_finite >= -1) and (max_finite <= 1) and isfinite(min_non_zero_abs):
99 if min_non_zero_abs >= 1e-1:
100 return "{:.1f}"
101 if min_non_zero_abs >= 1e-2:
102 return "{:.2f}"
103 if min_non_zero_abs >= 1e-3:
104 return "{:.3f}"
105 if min_non_zero_abs >= 1e-4:
106 return "{:.4f}"
107 return "{:.3e}"
109 # handle numbers that are outside [-1, 1]
110 if ((-int_to_float_threshold) <= min_finite) \
111 and (max_finite <= int_to_float_threshold):
112 if (max_frac_len <= 0) or (min_finite <= -1E4) or (max_finite >= 1E4):
113 return "{:.0f}"
114 if (max_frac_len <= 1) or (min_finite <= -1E3) or (max_finite >= 1E3):
115 return "{:.1f}"
116 if (max_frac_len <= 2) or (min_finite <= -1E2) or (max_finite >= 1E2):
117 return "{:.2f}"
118 return "{:.3f}"
119 return "{:.2e}"
122class NumberRenderer:
123 """
124 A format description for a group of numbers.
126 With instances of this class, you can convert a sequence of numbers
127 to a sequence of strings with uniform, pleasant formatting. The idea
128 is that such numbers can be written, e.g., into a column of a table
129 and that this column will then have a nice and uniform appearance.
130 In other words, we will avoid situations like the following:
131 "1234938845, 1e-20, 0.002, 34757773, 1e30, 0.9998837467"
132 which looks rather odd. While the numbers may be displayed correctly,
133 the formatting of all numbers is different. If we want to present
134 numbers that describe related quantities, we rather want them to all
135 have the same format. This class here can achieve this in a customizable
136 way.
137 """
139 def __init__(self,
140 int_to_float_threshold: int | float = 10_000_000_000,
141 get_int_renderer: Callable[[], Callable[[int], str]]
142 = default_get_int_renderer,
143 get_float_format: Callable[
144 [int | float, int | float, int,
145 int | float, int | float], str] =
146 default_get_float_format):
147 """
148 Create the number group format.
150 :param int_to_float_threshold: the threshold above which integers are
151 converted to floating point numbers in the 'E' notation.
152 :param get_int_renderer: the function to be used to get the renderer
153 for all integers and integer parts of floats.
154 :param get_float_format: the getter for the float format, i.e., a
155 callable accepting the range [min, max] of all finite values to be
156 rendered, the maximum length of any fractional part, and the
157 `int_to_float_threshold` value and that then returns a string with
158 the float format definition
159 """
160 super().__init__()
161 while True:
162 if not isinstance(int_to_float_threshold, int | float):
163 raise type_error(int_to_float_threshold,
164 "int_to_float_threshold", (int, float))
165 if (int_to_float_threshold <= 0) or not (
166 isfinite(int_to_float_threshold)
167 or (int_to_float_threshold >= inf)):
168 raise ValueError("invalid int_to_float_threshold="
169 f"{int_to_float_threshold}.")
170 if isinstance(int_to_float_threshold, float):
171 a = int(int_to_float_threshold)
172 if a == int_to_float_threshold:
173 int_to_float_threshold = a
174 else:
175 break
176 #: the absolute threshold above which all integer numbers must be
177 #: converted to floats to render them in the 'E' notation
178 self.int_to_float_threshold: Final[int | float] \
179 = int_to_float_threshold
180 if not callable(get_int_renderer):
181 raise type_error(get_int_renderer, "int_renderer", call=True)
182 #: the function to be used to get the renderer for all integers and
183 #: integer parts of floats
184 self.get_int_renderer: Final[Callable[[], Callable[[int], str]]] \
185 = get_int_renderer
186 #: the getter for the float format to be used to represent a range of
187 #: values
188 self.get_float_format: Final[Callable[
189 [int | float, int | float, int,
190 int | float, int | float], str]] = get_float_format
192 def derive(self,
193 int_to_float_threshold: int | float | None = None,
194 get_int_renderer: Callable[[], Callable[
195 [int], str]] | None = None,
196 get_float_format: Callable[[int | float, int | float, int,
197 int | float, int | float],
198 str] | None = None) \
199 -> "NumberRenderer":
200 """
201 Derive a new number group format from this one.
203 :param int_to_float_threshold: the int-to-float threshold
204 :param get_int_renderer: the integer renderer getter
205 :param get_float_format: the float format getter
206 :returns: a new number group format that differs from the current
207 format only in terms of the non-`None` parameters specified
209 >>> d = DEFAULT_NUMBER_RENDERER
210 >>> d.derive() is d
211 True
212 >>> d.int_to_float_threshold
213 10000000000
214 >>> from moptipy.utils.lang import EN
215 >>> EN.set_current()
216 >>> d.get_int_renderer()(123456789)
217 "123'456'789"
218 >>> d.get_float_format(-10, 10, 2)
219 '{:.2f}'
220 >>> d = d.derive(int_to_float_threshold=22)
221 >>> d is DEFAULT_NUMBER_RENDERER
222 False
223 >>> d.int_to_float_threshold
224 22
225 >>> d = d.derive(get_int_renderer=lambda: lambda x: "bla")
226 >>> d.get_int_renderer()(112)
227 'bla'
228 """
229 # pylint: disable=R0916
230 if (((int_to_float_threshold is None)
231 or (int_to_float_threshold == self.int_to_float_threshold))
232 and ((get_int_renderer is None)
233 or (get_int_renderer is self.get_int_renderer))
234 and ((get_float_format is None)
235 or (get_float_format is self.get_float_format))):
236 return self
237 return NumberRenderer(
238 self.int_to_float_threshold if
239 int_to_float_threshold is None else int_to_float_threshold,
240 self.get_int_renderer if get_int_renderer is None
241 else get_int_renderer,
242 self.get_float_format if get_float_format is None
243 else get_float_format)
245 def render(self, source: int | float | Iterable[int | float | None] | None,
246 none_str: FormattedStr | None = None) \
247 -> list[FormattedStr | None]:
248 r"""
249 Convert a sequence of numbers to text with uniform shape.
251 Often, we need to convert a set of numbers to strings as output for a
252 table or another representative thext. In such a case, you want to
253 present all numbers in the set in the same format.
255 Imagine you have the number vector `[1E-4, 1/7, 123456789012345678]`.
256 If you simply convert this list to a string directly, what you get is
257 `[0.0001, 0.14285714285714285, 123456789012345678]`. Now this looks
258 very ugly. First, we have one very big number `123456789012345678`.
259 If the numbers stem from an experiment, then we are hardly able to
260 obtain any number at a very extreme precision. The 18 digits in
261 `123456789012345678` sort of suggest a precision to 18 decimals, since
262 the number ends in specific digits (as opposed to `123450000000000000`
263 which a reader would naturally preceive as a rounded quantity).
264 Additionally, we have the number `0.14285714285714285`, which has a
265 very long fractional part, which, too, suggests a very high precision.
266 Writing both mentioned numbers next to each other, this suggests as if
267 we could present a number as high as 10**18 at a precision of 10**-17.
268 And it also looks ugly, because both numbers are not uniformly
269 formatted. Instead, our function here renders the number list as
270 `['1.00*10^-4^', '1.43*10^-1^', '1.23*10^17^']`. It recognizes that we
271 should present numbers as powers of ten and then limits the precision
272 to three digits.
274 This function is thus intended to produce some sort of uniform format
275 with reasonable precision uniformly for a numerical vector, under the
276 assumption that all numbers should be presented in the same numerical
277 range and quantity.
279 :param source: the column data
280 :param none_str: the string replacement for `None`
281 :returns: a list with the text representation
283 >>> from moptipy.utils.lang import EN
284 >>> EN.set_current()
285 >>> ff = DEFAULT_NUMBER_RENDERER
286 >>> ff.render([1.75651, 212, 3234234])
287 ['2', '212', "3'234'234"]
288 >>> ff.render([1.75651, 22, 34])
289 ['1.757', '22.000', '34.000']
290 >>> ff.render([1.75651, 122, 34])
291 ['1.76', '122.00', '34.00']
292 >>> ff.render([1.75651, 122, 3334])
293 ['1.8', '122.0', "3'334.0"]
294 >>> ff.render([1.5, 212, 3234234])
295 ['2', '212', "3'234'234"]
296 >>> ff.render([1.5, 2e12, 3234234])
297 ['1.50e0', '2.00e12', '3.23e6']
298 >>> ff.render([233, 22139283482834, 3234234])
299 ['2.33e2', '2.21e13', '3.23e6']
300 >>> ff.render([233, 22139283, 3234234])
301 ['233', "22'139'283", "3'234'234"]
302 >>> from math import nan, inf
303 >>> ff.render([22139283, inf, -inf, nan, None])
304 ["22'139'283", 'inf', '-inf', 'nan', None]
305 >>> ff.render([1E-4, 1/7, 123456789012345678])
306 ['1.00e-4', '1.43e-1', '1.23e17']
307 >>> ff.render([0, 0.02, 0.1, 1e-3])
308 ['0.000', '0.020', '0.100', '0.001']
309 >>> ff.render([-0.2, 1e-6, 0.9])
310 ['-2.000e-1', '1.000e-6', '9.000e-1']
311 """
312 if (source is None) or isinstance(source, int | float):
313 source = [source]
314 if not isinstance(source, Iterable):
315 raise type_error(source, "source", Iterable)
316 if (none_str is not None) and (
317 not isinstance(none_str, FormattedStr)):
318 raise type_error(none_str, "none_str", (FormattedStr, None))
320 # get the format parameters
321 int_renderer: Final[Callable[[int], str]] = \
322 self.get_int_renderer()
323 if not callable(int_renderer):
324 raise type_error(int_renderer, "int_renderer", call=True)
325 int_to_float_threshold: Final[int | float] \
326 = self.int_to_float_threshold
328 # step one: get the raw numerical data
329 data: Final[list[int | float | None]] = \
330 cast("list", source) if isinstance(source, list) else list(source)
331 dlen: Final[int] = len(data)
332 if dlen <= 0:
333 raise ValueError("Data cannot be empty.")
335 # step two: investigate the data ranges and structure
336 all_is_none: bool = True
337 all_is_int: bool = True
338 max_finite: int | float = -inf
339 min_finite: int | float = inf
340 min_non_zero_abs: int | float = inf
341 longest_fraction: int = -1
342 da: int | float
344 for idx, d in enumerate(data):
345 if d is None:
346 continue
347 all_is_none = False
348 d2 = try_int(d) if isfinite(d) else d
349 if isinstance(d2, int):
350 min_finite = min(min_finite, d2)
351 max_finite = max(max_finite, d2)
352 da = abs(d2)
353 if 0 < da < min_non_zero_abs:
354 min_non_zero_abs = da
355 if not ((-int_to_float_threshold) <= d2
356 <= int_to_float_threshold):
357 d2 = float(d2)
358 if d2 is not d:
359 data[idx] = d2
361 if isfinite(d2):
362 if not isinstance(d2, int):
363 all_is_int = False
364 s = str(d2)
365 if not (("E" in s) or ("e" in s)):
366 i = s.find(".")
367 if i >= 0:
368 i = len(s) - i - 1
369 longest_fraction = max(longest_fraction, i)
370 min_finite = min(min_finite, d2)
371 max_finite = max(max_finite, d2)
372 da = abs(d2)
373 if 0 < da < min_non_zero_abs:
374 min_non_zero_abs = da
376 # step three: if all data is None, we can return here
377 if all_is_none:
378 return [none_str] * dlen
380 # create the protected integer renderer
381 def __toint(value: int, form=int_renderer) -> str:
382 sv: str = form(value).strip()
383 if (sv is not None) and (not isinstance(sv, str)):
384 raise type_error(s, f"conversion of {value}", (str, None))
385 return sv
387 # step four: if all data are integer, we can convert them directly
388 if all_is_int:
389 # an int render also processing None and special floats
390 def __toint2(value: int | float | None, _ns=none_str,
391 form=__toint) -> FormattedStr | None:
392 if value is None:
393 return none_str
394 return FormattedStr.number(form(cast("int", value))
395 if isfinite(value) else value)
396 return [__toint2(i) for i in data]
398 # ok, we have at least some finite floats that cannot be converted to
399 # integers. therefore, we need to convert them to strings based on a
400 # floating point number format.
401 float_format = self.get_float_format(
402 min_finite, max_finite, longest_fraction, min_non_zero_abs,
403 int_to_float_threshold)
404 if not isinstance(float_format, str):
405 raise type_error(float_format,
406 "float format from float_format_getter", str)
407 if (len(float_format) <= 0) or ("{" not in float_format) \
408 or ("}" not in float_format) or (":" not in float_format):
409 raise ValueError(f"invalid float format {float_format!r}.")
411 def __render_float(value: int | float, ir=__toint,
412 ff=float_format) -> FormattedStr:
413 if value is None:
414 return none_str
415 if isfinite(value):
416 res: str = ff.format(value).strip()
417 int_part: str
418 frac_part: str = ""
419 exp_part: str = ""
420 eidx: int = res.find("e")
421 if eidx < 0:
422 eidx = res.find("E")
423 if eidx >= 0:
424 exp_part = f"e{ir(int(res[eidx + 1:])).strip()}"
425 res = res[:eidx].strip()
426 dotidx: int = res.find(".")
427 if dotidx <= 0:
428 int_part = ir(int(res))
429 else:
430 int_part = ir(int(res[:dotidx]))
431 frac_part = res[dotidx:].strip()
432 if len(int_part) <= 0:
433 int_part = "0"
434 return FormattedStr.number(f"{int_part}{frac_part}{exp_part}")
435 return FormattedStr.number(value)
437 # step five: first, create the raw float strings and mark special
438 # values
439 return [__render_float(value) for value in data]
442#: the default shared singleton instance of the number group format
443DEFAULT_NUMBER_RENDERER: Final[NumberRenderer] = NumberRenderer()