"""The numeric format definitions."""
from math import inf, isfinite
from typing import Callable, Final, Iterable, cast
from pycommons.types import check_int_range, type_error
from moptipy.utils.formatted_string import FormattedStr
from moptipy.utils.lang import Lang
from moptipy.utils.math import try_int
[docs]
def default_get_int_renderer() -> Callable[[int], str]:
"""
Get the default integer renderer.
:returns: the default integer renderer, which uses the integer rendering
of the currently active language setting.
>>> from moptipy.utils.lang import EN, ZH
>>> EN.set_current()
>>> f = default_get_int_renderer()
>>> f(1_000_000)
"1'000'000"
>>> ZH.set_current()
>>> f = default_get_int_renderer()
>>> f(1_000_000)
"100'0000"
"""
return cast(Callable[[int], str], Lang.current().format_int)
[docs]
class NumberRenderer:
"""
A format description for a group of numbers.
With instances of this class, you can convert a sequence of numbers
to a sequence of strings with uniform, pleasant formatting. The idea
is that such numbers can be written, e.g., into a column of a table
and that this column will then have a nice and uniform appearance.
In other words, we will avoid situations like the following:
"1234938845, 1e-20, 0.002, 34757773, 1e30, 0.9998837467"
which looks rather odd. While the numbers may be displayed correctly,
the formatting of all numbers is different. If we want to present
numbers that describe related quantities, we rather want them to all
have the same format. This class here can achieve this in a customizable
way.
"""
def __init__(self,
int_to_float_threshold: int | float = 10_000_000_000,
get_int_renderer: Callable[[], Callable[[int], str]]
= default_get_int_renderer,
get_float_format: Callable[
[int | float, int | float, int,
int | float, int | float], str] =
default_get_float_format):
"""
Create the number group format.
:param int_to_float_threshold: the threshold above which integers are
converted to floating point numbers in the 'E' notation.
:param get_int_renderer: the function to be used to get the renderer
for all integers and integer parts of floats.
:param get_float_format: the getter for the float format, i.e., a
callable accepting the range [min, max] of all finite values to be
rendered, the maximum length of any fractional part, and the
`int_to_float_threshold` value and that then returns a string with
the float format definition
"""
super().__init__()
while True:
if not isinstance(int_to_float_threshold, int | float):
raise type_error(int_to_float_threshold,
"int_to_float_threshold", (int, float))
if (int_to_float_threshold <= 0) or not (
isfinite(int_to_float_threshold)
or (int_to_float_threshold >= inf)):
raise ValueError("invalid int_to_float_threshold="
f"{int_to_float_threshold}.")
if isinstance(int_to_float_threshold, float):
a = int(int_to_float_threshold)
if a == int_to_float_threshold:
int_to_float_threshold = a
else:
break
#: the absolute threshold above which all integer numbers must be
#: converted to floats to render them in the 'E' notation
self.int_to_float_threshold: Final[int | float] \
= int_to_float_threshold
if not callable(get_int_renderer):
raise type_error(get_int_renderer, "int_renderer", call=True)
#: the function to be used to get the renderer for all integers and
#: integer parts of floats
self.get_int_renderer: Final[Callable[[], Callable[[int], str]]] \
= get_int_renderer
#: the getter for the float format to be used to represent a range of
#: values
self.get_float_format: Final[Callable[
[int | float, int | float, int,
int | float, int | float], str]] = get_float_format
[docs]
def derive(self,
int_to_float_threshold: int | float | None = None,
get_int_renderer: Callable[[], Callable[
[int], str]] | None = None,
get_float_format: Callable[[int | float, int | float, int,
int | float, int | float],
str] | None = None) \
-> "NumberRenderer":
"""
Derive a new number group format from this one.
:param int_to_float_threshold: the int-to-float threshold
:param get_int_renderer: the integer renderer getter
:param get_float_format: the float format getter
:returns: a new number group format that differs from the current
format only in terms of the non-`None` parameters specified
>>> d = DEFAULT_NUMBER_RENDERER
>>> d.derive() is d
True
>>> d.int_to_float_threshold
10000000000
>>> from moptipy.utils.lang import EN
>>> EN.set_current()
>>> d.get_int_renderer()(123456789)
"123'456'789"
>>> d.get_float_format(-10, 10, 2)
'{:.2f}'
>>> d = d.derive(int_to_float_threshold=22)
>>> d is DEFAULT_NUMBER_RENDERER
False
>>> d.int_to_float_threshold
22
>>> d = d.derive(get_int_renderer=lambda: lambda x: "bla")
>>> d.get_int_renderer()(112)
'bla'
"""
# pylint: disable=R0916
if (((int_to_float_threshold is None)
or (int_to_float_threshold == self.int_to_float_threshold))
and ((get_int_renderer is None)
or (get_int_renderer is self.get_int_renderer))
and ((get_float_format is None)
or (get_float_format is self.get_float_format))):
return self
return NumberRenderer(
self.int_to_float_threshold if
int_to_float_threshold is None else int_to_float_threshold,
self.get_int_renderer if get_int_renderer is None
else get_int_renderer,
self.get_float_format if get_float_format is None
else get_float_format)
[docs]
def render(self, source: int | float | None | Iterable[int | float | None],
none_str: FormattedStr | None = None) \
-> list[FormattedStr | None]:
r"""
Convert a sequence of numbers to text with uniform shape.
Often, we need to convert a set of numbers to strings as output for a
table or another representative thext. In such a case, you want to
present all numbers in the set in the same format.
Imagine you have the number vector `[1E-4, 1/7, 123456789012345678]`.
If you simply convert this list to a string directly, what you get is
`[0.0001, 0.14285714285714285, 123456789012345678]`. Now this looks
very ugly. First, we have one very big number `123456789012345678`.
If the numbers stem from an experiment, then we are hardly able to
obtain any number at a very extreme precision. The 18 digits in
`123456789012345678` sort of suggest a precision to 18 decimals, since
the number ends in specific digits (as opposed to `123450000000000000`
which a reader would naturally preceive as a rounded quantity).
Additionally, we have the number `0.14285714285714285`, which has a
very long fractional part, which, too, suggests a very high precision.
Writing both mentioned numbers next to each other, this suggests as if
we could present a number as high as 10**18 at a precision of 10**-17.
And it also looks ugly, because both numbers are not uniformly
formatted. Instead, our function here renders the number list as
`['1.00*10^-4^', '1.43*10^-1^', '1.23*10^17^']`. It recognizes that we
should present numbers as powers of ten and then limits the precision
to three digits.
This function is thus intended to produce some sort of uniform format
with reasonable precision uniformly for a numerical vector, under the
assumption that all numbers should be presented in the same numerical
range and quantity.
:param source: the column data
:param none_str: the string replacement for `None`
:returns: a list with the text representation
>>> from moptipy.utils.lang import EN
>>> EN.set_current()
>>> ff = DEFAULT_NUMBER_RENDERER
>>> ff.render([1.75651, 212, 3234234])
['2', '212', "3'234'234"]
>>> ff.render([1.75651, 22, 34])
['1.757', '22.000', '34.000']
>>> ff.render([1.75651, 122, 34])
['1.76', '122.00', '34.00']
>>> ff.render([1.75651, 122, 3334])
['1.8', '122.0', "3'334.0"]
>>> ff.render([1.5, 212, 3234234])
['2', '212', "3'234'234"]
>>> ff.render([1.5, 2e12, 3234234])
['1.50e0', '2.00e12', '3.23e6']
>>> ff.render([233, 22139283482834, 3234234])
['2.33e2', '2.21e13', '3.23e6']
>>> ff.render([233, 22139283, 3234234])
['233', "22'139'283", "3'234'234"]
>>> from math import nan, inf
>>> ff.render([22139283, inf, -inf, nan, None])
["22'139'283", 'inf', '-inf', 'nan', None]
>>> ff.render([1E-4, 1/7, 123456789012345678])
['1.00e-4', '1.43e-1', '1.23e17']
>>> ff.render([0, 0.02, 0.1, 1e-3])
['0.000', '0.020', '0.100', '0.001']
>>> ff.render([-0.2, 1e-6, 0.9])
['-2.000e-1', '1.000e-6', '9.000e-1']
"""
if (source is None) or isinstance(source, int | float):
source = [source]
if not isinstance(source, Iterable):
raise type_error(source, "source", Iterable)
if (none_str is not None) and (
not isinstance(none_str, FormattedStr)):
raise type_error(none_str, "none_str", (FormattedStr, None))
# get the format parameters
int_renderer: Final[Callable[[int], str]] = \
self.get_int_renderer()
if not callable(int_renderer):
raise type_error(int_renderer, "int_renderer", call=True)
int_to_float_threshold: Final[int | float] \
= self.int_to_float_threshold
# step one: get the raw numerical data
data: Final[list[int | float | None]] = \
cast(list, source) if isinstance(source, list) else list(source)
dlen: Final[int] = len(data)
if dlen <= 0:
raise ValueError("Data cannot be empty.")
# step two: investigate the data ranges and structure
all_is_none: bool = True
all_is_int: bool = True
max_finite: int | float = -inf
min_finite: int | float = inf
min_non_zero_abs: int | float = inf
longest_fraction: int = -1
da: int | float
for idx, d in enumerate(data):
if d is None:
continue
all_is_none = False
d2 = try_int(d) if isfinite(d) else d
if isinstance(d2, int):
min_finite = min(min_finite, d2)
max_finite = max(max_finite, d2)
da = abs(d2)
if 0 < da < min_non_zero_abs:
min_non_zero_abs = da
if not ((-int_to_float_threshold) <= d2
<= int_to_float_threshold):
d2 = float(d2)
if d2 is not d:
data[idx] = d2
if isfinite(d2):
if not isinstance(d2, int):
all_is_int = False
s = str(d2)
if not (("E" in s) or ("e" in s)):
i = s.find(".")
if i >= 0:
i = len(s) - i - 1
longest_fraction = max(longest_fraction, i)
min_finite = min(min_finite, d2)
max_finite = max(max_finite, d2)
da = abs(d2)
if 0 < da < min_non_zero_abs:
min_non_zero_abs = da
# step three: if all data is None, we can return here
if all_is_none:
return [none_str] * dlen
# create the protected integer renderer
def __toint(value: int, form=int_renderer) -> str:
sv: str = form(value).strip()
if (sv is not None) and (not isinstance(sv, str)):
raise type_error(s, f"conversion of {value}", (str, None))
return sv
# step four: if all data are integer, we can convert them directly
if all_is_int:
# an int render also processing None and special floats
def __toint2(value: None | int | float, _ns=none_str,
form=__toint) -> FormattedStr | None:
if value is None:
return none_str
return FormattedStr.number(form(cast(int, value))
if isfinite(value) else value)
return [__toint2(i) for i in data]
# ok, we have at least some finite floats that cannot be converted to
# integers. therefore, we need to convert them to strings based on a
# floating point number format.
float_format = self.get_float_format(
min_finite, max_finite, longest_fraction, min_non_zero_abs,
int_to_float_threshold)
if not isinstance(float_format, str):
raise type_error(float_format,
"float format from float_format_getter", str)
if (len(float_format) <= 0) or ("{" not in float_format) \
or ("}" not in float_format) or (":" not in float_format):
raise ValueError(f"invalid float format {float_format!r}.")
def __render_float(value: int | float, ir=__toint,
ff=float_format) -> FormattedStr:
if value is None:
return none_str
if isfinite(value):
res: str = ff.format(value).strip()
int_part: str
frac_part: str = ""
exp_part: str = ""
eidx: int = res.find("e")
if eidx < 0:
eidx = res.find("E")
if eidx >= 0:
exp_part = f"e{ir(int(res[eidx + 1:])).strip()}"
res = res[:eidx].strip()
dotidx: int = res.find(".")
if dotidx <= 0:
int_part = ir(int(res))
else:
int_part = ir(int(res[:dotidx]))
frac_part = res[dotidx:].strip()
if len(int_part) <= 0:
int_part = "0"
return FormattedStr.number(f"{int_part}{frac_part}{exp_part}")
return FormattedStr.number(value)
# step five: first, create the raw float strings and mark special
# values
return [__render_float(value) for value in data]
#: the default shared singleton instance of the number group format
DEFAULT_NUMBER_RENDERER: Final[NumberRenderer] = NumberRenderer()