Source code for moptipy.utils.strings

"""Routines for handling strings."""

from re import compile as _compile
from re import sub
from typing import Final, Iterable, Pattern

from pycommons.strings.chars import superscript
from pycommons.strings.string_conv import float_to_str, num_to_str, str_to_num
from pycommons.strings.tools import replace_str
from pycommons.types import type_error



[docs]
def num_to_str_for_name(x: int | float) -> str:
    """
    Convert a float to a string for use in a component name.

    This function can be inverted by applying :func:`name_str_to_num`.

    :param x: the float
    :returns: the string

    >>> num_to_str_for_name(1.3)
    '1d3'
    >>> num_to_str_for_name(1.0)
    '1'
    >>> num_to_str_for_name(-7)
    'm7'
    >>> num_to_str_for_name(-6.32)
    'm6d32'
    >>> num_to_str_for_name(-1e-5)
    'm1em5'
    """
    return num_to_str(x).replace(".", DECIMAL_DOT_REPLACEMENT) \
        .replace("-", MINUS_REPLACEMENT)




[docs]
def name_str_to_num(s: str) -> int | float:
    """
    Convert a string from a name to a number.

    This function is the inverse of :func:`num_to_str_for_name`.

    :param s: the string from the name
    :returns: an integer or float, depending on the number represented by
        `s`

    >>> name_str_to_num(num_to_str_for_name(1.1))
    1.1
    >>> name_str_to_num(num_to_str_for_name(1))
    1
    >>> name_str_to_num(num_to_str_for_name(-5e3))
    -5000
    >>> name_str_to_num(num_to_str_for_name(-6e-3))
    -0.006
    >>> name_str_to_num(num_to_str_for_name(100.0))
    100
    >>> name_str_to_num(num_to_str_for_name(-1e-4))
    -0.0001
    """
    return str_to_num(s.replace(MINUS_REPLACEMENT, "-")
                      .replace(DECIMAL_DOT_REPLACEMENT, "."))




[docs]
def beautify_float_str(s: str | float) -> str:
    """
    Beautify the string representation of a float.

    This function beautifies the string representation of a float by using
    unicode superscripts for exponents.

    :param s: either a `float` or the string representation of a `float`
    :return: the beautified string representation

    >>> beautify_float_str('0.0')
    '0.0'
    >>> beautify_float_str('1e12')
    '1\u00d710\u00b9\u00b2'
    >>> beautify_float_str('1e-3')
    '1\u00d710\u207b\u00b3'
    >>> beautify_float_str('inf')
    '\u221e'
    >>> beautify_float_str('-inf')
    '-\u221e'
    >>> beautify_float_str('nan')
    '\u2205'
    """
    if isinstance(s, float):
        s = float_to_str(s)
    if not isinstance(s, str):
        raise type_error(s, "s", str)
    s = s.strip().lower()
    if s in ("+inf", "inf"):
        return "\u221e"
    if s == "-inf":
        return "-\u221e"
    if s == "nan":
        return "\u2205"
    eidx: int = s.find("e")
    if eidx < 0:
        return s
    return f"{s[:eidx]}\u00d710{superscript(s[eidx + 1:])}"



def __replace_double(replace: str, src: str) -> str:
    """
    Replace any double-occurrence of a string with a single occurrence.

    :param replace: the string to replace
    :param src: the source string
    :returns: the updated string
    """
    return replace_str(replace + replace, replace, src)


#: the separator of different filename parts
PART_SEPARATOR: Final[str] = "_"
#: the replacement for "." in a file name
DECIMAL_DOT_REPLACEMENT: Final[str] = "d"
#: the replacement for "-" in a file name
MINUS_REPLACEMENT: Final[str] = "m"
#: the replacement for "+" in a file name
PLUS_REPLACEMENT: Final[str] = "p"

#: a pattern used during name sanitization
__PATTERN_SPACE_BEFORE_MINUS: Final[Pattern] = _compile(r"[^\w\s-]")
#: the multiple-whitespace pattern
__PATTERN_MULTIPLE_WHITESPACE: Final[Pattern] = _compile(r"\s+")



[docs]
def sanitize_name(name: str) -> str:
    """
    Sanitize a name in such a way that it can be used as path component.

    >>> sanitize_name(" hello world ")
    'hello_world'
    >>> sanitize_name(" 56.6-455 ")
    '56d6m455'
    >>> sanitize_name(" _ i _ am _ funny   --6 _ ")
    'i_am_funny_m6'

    :param name: the name that should be sanitized
    :return: the sanitized name
    :raises ValueError: if the name is invalid or empty
    :raises TypeError: if the name is `None` or not a string
    """
    if not isinstance(name, str):
        raise type_error(name, "name", str)
    orig_name = name
    name = name.strip()
    name = __replace_double("-", name).replace("+", PLUS_REPLACEMENT)
    name = __replace_double("+", name).replace("-", MINUS_REPLACEMENT)
    name = __replace_double("_", name)
    name = __replace_double(".", name).replace(".", DECIMAL_DOT_REPLACEMENT)

    name = sub(__PATTERN_SPACE_BEFORE_MINUS, "", name)
    name = sub(__PATTERN_MULTIPLE_WHITESPACE, PART_SEPARATOR, name)
    name = __replace_double("_", name)

    name = name.removeprefix("_").removesuffix("_")
    if len(name) <= 0:
        raise ValueError(
            f"Sanitized name must not become empty, but {orig_name!r} does.")

    return name




[docs]
def sanitize_names(names: Iterable[str]) -> str:
    """
    Sanitize a set of names.

    >>> sanitize_names(["", " sdf ", "", "5-3"])
    'sdf_5m3'
    >>> sanitize_names([" a ", " b", " c", "", "6", ""])
    'a_b_c_6'

    :param names: the list of names.
    :return: the sanitized name
    """
    return PART_SEPARATOR.join([
        sanitize_name(name) for name in names if len(name) > 0])