Source code for moptipy.utils.number_renderer

"""The numeric format definitions."""

from math import inf, isfinite
from typing import Callable, Final, Iterable, cast

from pycommons.types import check_int_range, type_error

from moptipy.utils.formatted_string import FormattedStr
from moptipy.utils.lang import Lang
from moptipy.utils.math import try_int



[docs]
def default_get_int_renderer() -> Callable[[int], str]:
    """
    Get the default integer renderer.

    :returns: the default integer renderer, which uses the integer rendering
        of the currently active language setting.

    >>> from moptipy.utils.lang import EN, ZH
    >>> EN.set_current()
    >>> f = default_get_int_renderer()
    >>> f(1_000_000)
    "1'000'000"
    >>> ZH.set_current()
    >>> f = default_get_int_renderer()
    >>> f(1_000_000)
    "100'0000"
    """
    return cast(Callable[[int], str], Lang.current().format_int)




[docs]
def default_get_float_format(
        min_finite: int | float = 0,
        max_finite: int | float = 0,
        max_frac_len: int = 2,
        min_non_zero_abs: int | float = inf,
        int_to_float_threshold: int | float = 10_000_000_000) -> str:
    """
    Get the default float format for numbers in the given range.

    :param min_finite: the minimum finite value that may need to be formatted
    :param max_finite: the maximum finite value that may need to be formatted
    :param max_frac_len: the length of the longest fractional part of any
        number encountered that can be converted to a string *not* in the "E"
        notation
    :param min_non_zero_abs: the minimum non-zero absolute value; will be
        `inf` if all absolute values are zero
    :param int_to_float_threshold: the threshold above which all integers are
        converted to floating point numbers with the 'E' notation

    >>> default_get_float_format(0, 0, 0)
    '{:.0f}'
    >>> default_get_float_format(0, 1e5, 10)
    '{:.0f}'
    >>> default_get_float_format(-1e7, 1e2, 10)
    '{:.0f}'
    >>> default_get_float_format(0, 0, 1)
    '{:.1f}'
    >>> default_get_float_format(0, 1e3, 11)
    '{:.1f}'
    >>> default_get_float_format(-1e3, 1e2, 11)
    '{:.1f}'
    >>> default_get_float_format(0, 0, 2)
    '{:.2f}'
    >>> default_get_float_format(0, 0, 3)
    '{:.3f}'
    >>> default_get_float_format(0, 0, 4)
    '{:.3f}'
    >>> default_get_float_format(0, 1e11, 4)
    '{:.2e}'
    >>> default_get_float_format(-1, 1, 4, 1e-3)
    '{:.3f}'
    """
    if not isinstance(min_finite, int | float):
        raise type_error(min_finite, "min_finite", (int, float))
    if not isinstance(max_finite, int | float):
        raise type_error(max_finite, "max_finite", (int, float))
    if not (isfinite(min_finite) and isfinite(max_finite)
            and (min_finite <= max_finite)):
        raise ValueError("invalid min_finite, max_finite pair "
                         f"{min_finite}, {max_finite}.")
    check_int_range(max_frac_len, "max_frac_len", 0, 100)
    if not isinstance(int_to_float_threshold, int | float):
        raise type_error(
            int_to_float_threshold, "int_to_float_threshold", (int, float))
    if (int_to_float_threshold <= 0) or not (
            isfinite(int_to_float_threshold)
            or (int_to_float_threshold >= inf)):
        raise ValueError(
            f"invalid int_to_float_threshold={int_to_float_threshold}.")
    if not isinstance(min_non_zero_abs, int | float):
        raise type_error(min_non_zero_abs, "min_non_zero_abs", (int, float))
    if min_non_zero_abs <= 0:
        raise ValueError(f"invalid min_non_zero_abs={min_non_zero_abs}")

    # are the values in the [-1, 1] range, i.e., possibly just small fractions?
    if (min_finite >= -1) and (max_finite <= 1) and isfinite(min_non_zero_abs):
        if min_non_zero_abs >= 1e-1:
            return "{:.1f}"
        if min_non_zero_abs >= 1e-2:
            return "{:.2f}"
        if min_non_zero_abs >= 1e-3:
            return "{:.3f}"
        if min_non_zero_abs >= 1e-4:
            return "{:.4f}"
        return "{:.3e}"

    # handle numbers that are outside [-1, 1]
    if ((-int_to_float_threshold) <= min_finite) \
            and (max_finite <= int_to_float_threshold):
        if (max_frac_len <= 0) or (min_finite <= -1E4) or (max_finite >= 1E4):
            return "{:.0f}"
        if (max_frac_len <= 1) or (min_finite <= -1E3) or (max_finite >= 1E3):
            return "{:.1f}"
        if (max_frac_len <= 2) or (min_finite <= -1E2) or (max_finite >= 1E2):
            return "{:.2f}"
        return "{:.3f}"
    return "{:.2e}"




[docs]
class NumberRenderer:
    """
    A format description for a group of numbers.

    With instances of this class, you can convert a sequence of numbers
    to a sequence of strings with uniform, pleasant formatting. The idea
    is that such numbers can be written, e.g., into a column of a table
    and that this column will then have a nice and uniform appearance.
    In other words, we will avoid situations like the following:
    "1234938845, 1e-20, 0.002, 34757773, 1e30, 0.9998837467"
    which looks rather odd. While the numbers may be displayed correctly,
    the formatting of all numbers is different. If we want to present
    numbers that describe related quantities, we rather want them to all
    have the same format. This class here can achieve this in a customizable
    way.
    """

    def __init__(self,
                 int_to_float_threshold: int | float = 10_000_000_000,
                 get_int_renderer: Callable[[], Callable[[int], str]]
                 = default_get_int_renderer,
                 get_float_format: Callable[
                     [int | float, int | float, int,
                      int | float, int | float], str] =
                 default_get_float_format):
        """
        Create the number group format.

        :param int_to_float_threshold: the threshold above which integers are
            converted to floating point numbers in the 'E' notation.
        :param get_int_renderer: the function to be used to get the renderer
            for all integers and integer parts of floats.
        :param get_float_format: the getter for the float format, i.e., a
            callable accepting the range [min, max] of all finite values to be
            rendered, the maximum length of any fractional part, and the
            `int_to_float_threshold` value and that then returns a string with
            the float format definition
        """
        super().__init__()
        while True:
            if not isinstance(int_to_float_threshold, int | float):
                raise type_error(int_to_float_threshold,
                                 "int_to_float_threshold", (int, float))
            if (int_to_float_threshold <= 0) or not (
                    isfinite(int_to_float_threshold)
                    or (int_to_float_threshold >= inf)):
                raise ValueError("invalid int_to_float_threshold="
                                 f"{int_to_float_threshold}.")
            if isinstance(int_to_float_threshold, float):
                a = int(int_to_float_threshold)
                if a == int_to_float_threshold:
                    int_to_float_threshold = a
            else:
                break
        #: the absolute threshold above which all integer numbers must be
        #: converted to floats to render them in the 'E' notation
        self.int_to_float_threshold: Final[int | float] \
            = int_to_float_threshold
        if not callable(get_int_renderer):
            raise type_error(get_int_renderer, "int_renderer", call=True)
        #: the function to be used to get the renderer for all integers and
        #: integer parts of floats
        self.get_int_renderer: Final[Callable[[], Callable[[int], str]]] \
            = get_int_renderer
        #: the getter for the float format to be used to represent a range of
        #: values
        self.get_float_format: Final[Callable[
            [int | float, int | float, int,
             int | float, int | float], str]] = get_float_format


[docs]
    def derive(self,
               int_to_float_threshold: int | float | None = None,
               get_int_renderer: Callable[[], Callable[
                   [int], str]] | None = None,
               get_float_format: Callable[[int | float, int | float, int,
                                           int | float, int | float],
                                          str] | None = None) \
            -> "NumberRenderer":
        """
        Derive a new number group format from this one.

        :param int_to_float_threshold: the int-to-float threshold
        :param get_int_renderer: the integer renderer getter
        :param get_float_format: the float format getter
        :returns: a new number group format that differs from the current
            format only in terms of the non-`None` parameters specified

        >>> d = DEFAULT_NUMBER_RENDERER
        >>> d.derive() is d
        True
        >>> d.int_to_float_threshold
        10000000000
        >>> from moptipy.utils.lang import EN
        >>> EN.set_current()
        >>> d.get_int_renderer()(123456789)
        "123'456'789"
        >>> d.get_float_format(-10, 10, 2)
        '{:.2f}'
        >>> d = d.derive(int_to_float_threshold=22)
        >>> d is DEFAULT_NUMBER_RENDERER
        False
        >>> d.int_to_float_threshold
        22
        >>> d = d.derive(get_int_renderer=lambda: lambda x: "bla")
        >>> d.get_int_renderer()(112)
        'bla'
        """
        # pylint: disable=R0916
        if (((int_to_float_threshold is None)
             or (int_to_float_threshold == self.int_to_float_threshold))
                and ((get_int_renderer is None)
                     or (get_int_renderer is self.get_int_renderer))
                and ((get_float_format is None)
                     or (get_float_format is self.get_float_format))):
            return self
        return NumberRenderer(
            self.int_to_float_threshold if
            int_to_float_threshold is None else int_to_float_threshold,
            self.get_int_renderer if get_int_renderer is None
            else get_int_renderer,
            self.get_float_format if get_float_format is None
            else get_float_format)



[docs]
    def render(self, source: int | float | None | Iterable[int | float | None],
               none_str: FormattedStr | None = None) \
            -> list[FormattedStr | None]:
        r"""
        Convert a sequence of numbers to text with uniform shape.

        Often, we need to convert a set of numbers to strings as output for a
        table or another representative thext. In such a case, you want to
        present all numbers in the set in the same format.

        Imagine you have the number vector `[1E-4, 1/7, 123456789012345678]`.
        If you simply convert this list to a string directly, what you get is
        `[0.0001, 0.14285714285714285, 123456789012345678]`. Now this looks
        very ugly. First, we have one very big number `123456789012345678`.
        If the numbers stem from an experiment, then we are hardly able to
        obtain any number at a very extreme precision. The 18 digits in
        `123456789012345678` sort of suggest a precision to 18 decimals, since
        the number ends in specific digits (as opposed to `123450000000000000`
        which a reader would naturally preceive as a rounded quantity).
        Additionally, we have the number `0.14285714285714285`, which has a
        very long fractional part, which, too, suggests a very high precision.
        Writing both mentioned numbers next to each other, this suggests as if
        we could present a number as high as 10**18 at a precision of 10**-17.
        And it also looks ugly, because both numbers are not uniformly
        formatted. Instead, our function here renders the number list as
        `['1.00*10^-4^', '1.43*10^-1^', '1.23*10^17^']`. It recognizes that we
        should present numbers as powers of ten and then limits the precision
        to three digits.

        This function is thus intended to produce some sort of uniform format
        with reasonable precision uniformly for a numerical vector, under the
        assumption that all numbers should be presented in the same numerical
        range and quantity.

        :param source: the column data
        :param none_str: the string replacement for `None`
        :returns: a list with the text representation

        >>> from moptipy.utils.lang import EN
        >>> EN.set_current()
        >>> ff = DEFAULT_NUMBER_RENDERER
        >>> ff.render([1.75651, 212, 3234234])
        ['2', '212', "3'234'234"]
        >>> ff.render([1.75651, 22, 34])
        ['1.757', '22.000', '34.000']
        >>> ff.render([1.75651, 122, 34])
        ['1.76', '122.00', '34.00']
        >>> ff.render([1.75651, 122, 3334])
        ['1.8', '122.0', "3'334.0"]
        >>> ff.render([1.5, 212, 3234234])
        ['2', '212', "3'234'234"]
        >>> ff.render([1.5, 2e12, 3234234])
        ['1.50e0', '2.00e12', '3.23e6']
        >>> ff.render([233, 22139283482834, 3234234])
        ['2.33e2', '2.21e13', '3.23e6']
        >>> ff.render([233, 22139283, 3234234])
        ['233', "22'139'283", "3'234'234"]
        >>> from math import nan, inf
        >>> ff.render([22139283, inf, -inf, nan, None])
        ["22'139'283", 'inf', '-inf', 'nan', None]
        >>> ff.render([1E-4, 1/7, 123456789012345678])
        ['1.00e-4', '1.43e-1', '1.23e17']
        >>> ff.render([0, 0.02, 0.1, 1e-3])
        ['0.000', '0.020', '0.100', '0.001']
        >>> ff.render([-0.2, 1e-6, 0.9])
        ['-2.000e-1', '1.000e-6', '9.000e-1']
        """
        if (source is None) or isinstance(source, int | float):
            source = [source]
        if not isinstance(source, Iterable):
            raise type_error(source, "source", Iterable)
        if (none_str is not None) and (
                not isinstance(none_str, FormattedStr)):
            raise type_error(none_str, "none_str", (FormattedStr, None))

        # get the format parameters
        int_renderer: Final[Callable[[int], str]] = \
            self.get_int_renderer()
        if not callable(int_renderer):
            raise type_error(int_renderer, "int_renderer", call=True)
        int_to_float_threshold: Final[int | float] \
            = self.int_to_float_threshold

        # step one: get the raw numerical data
        data: Final[list[int | float | None]] = \
            cast(list, source) if isinstance(source, list) else list(source)
        dlen: Final[int] = len(data)
        if dlen <= 0:
            raise ValueError("Data cannot be empty.")

        # step two: investigate the data ranges and structure
        all_is_none: bool = True
        all_is_int: bool = True
        max_finite: int | float = -inf
        min_finite: int | float = inf
        min_non_zero_abs: int | float = inf
        longest_fraction: int = -1
        da: int | float

        for idx, d in enumerate(data):
            if d is None:
                continue
            all_is_none = False
            d2 = try_int(d) if isfinite(d) else d
            if isinstance(d2, int):
                min_finite = min(min_finite, d2)
                max_finite = max(max_finite, d2)
                da = abs(d2)
                if 0 < da < min_non_zero_abs:
                    min_non_zero_abs = da
                if not ((-int_to_float_threshold) <= d2
                        <= int_to_float_threshold):
                    d2 = float(d2)
            if d2 is not d:
                data[idx] = d2

            if isfinite(d2):
                if not isinstance(d2, int):
                    all_is_int = False
                    s = str(d2)
                    if not (("E" in s) or ("e" in s)):
                        i = s.find(".")
                        if i >= 0:
                            i = len(s) - i - 1
                            longest_fraction = max(longest_fraction, i)
                min_finite = min(min_finite, d2)
                max_finite = max(max_finite, d2)
                da = abs(d2)
                if 0 < da < min_non_zero_abs:
                    min_non_zero_abs = da

        # step three: if all data is None, we can return here
        if all_is_none:
            return [none_str] * dlen

        # create the protected integer renderer
        def __toint(value: int, form=int_renderer) -> str:
            sv: str = form(value).strip()
            if (sv is not None) and (not isinstance(sv, str)):
                raise type_error(s, f"conversion of {value}", (str, None))
            return sv

        # step four: if all data are integer, we can convert them directly
        if all_is_int:
            # an int render also processing None and special floats
            def __toint2(value: None | int | float, _ns=none_str,
                         form=__toint) -> FormattedStr | None:
                if value is None:
                    return none_str
                return FormattedStr.number(form(cast(int, value))
                                           if isfinite(value) else value)
            return [__toint2(i) for i in data]

        # ok, we have at least some finite floats that cannot be converted to
        # integers. therefore, we need to convert them to strings based on a
        # floating point number format.
        float_format = self.get_float_format(
            min_finite, max_finite, longest_fraction, min_non_zero_abs,
            int_to_float_threshold)
        if not isinstance(float_format, str):
            raise type_error(float_format,
                             "float format from float_format_getter", str)
        if (len(float_format) <= 0) or ("{" not in float_format) \
                or ("}" not in float_format) or (":" not in float_format):
            raise ValueError(f"invalid float format {float_format!r}.")

        def __render_float(value: int | float, ir=__toint,
                           ff=float_format) -> FormattedStr:
            if value is None:
                return none_str
            if isfinite(value):
                res: str = ff.format(value).strip()
                int_part: str
                frac_part: str = ""
                exp_part: str = ""
                eidx: int = res.find("e")
                if eidx < 0:
                    eidx = res.find("E")
                if eidx >= 0:
                    exp_part = f"e{ir(int(res[eidx + 1:])).strip()}"
                    res = res[:eidx].strip()
                dotidx: int = res.find(".")
                if dotidx <= 0:
                    int_part = ir(int(res))
                else:
                    int_part = ir(int(res[:dotidx]))
                    frac_part = res[dotidx:].strip()
                if len(int_part) <= 0:
                    int_part = "0"
                return FormattedStr.number(f"{int_part}{frac_part}{exp_part}")
            return FormattedStr.number(value)

        # step five: first, create the raw float strings and mark special
        # values
        return [__render_float(value) for value in data]




#: the default shared singleton instance of the number group format
DEFAULT_NUMBER_RENDERER: Final[NumberRenderer] = NumberRenderer()