Source code for moptipy.utils.strings

"""Routines for handling strings."""

from re import compile as _compile
from re import sub
from typing import Final, Iterable, Pattern

from pycommons.strings.chars import superscript
from pycommons.strings.string_conv import float_to_str, num_to_str, str_to_num
from pycommons.strings.tools import replace_str
from pycommons.types import type_error


[docs] def num_to_str_for_name(x: int | float) -> str: """ Convert a float to a string for use in a component name. This function can be inverted by applying :func:`name_str_to_num`. :param x: the float :returns: the string >>> num_to_str_for_name(1.3) '1d3' >>> num_to_str_for_name(1.0) '1' >>> num_to_str_for_name(-7) 'm7' >>> num_to_str_for_name(-6.32) 'm6d32' >>> num_to_str_for_name(-1e-5) 'm1em5' """ return num_to_str(x).replace(".", DECIMAL_DOT_REPLACEMENT) \ .replace("-", MINUS_REPLACEMENT)
[docs] def name_str_to_num(s: str) -> int | float: """ Convert a string from a name to a number. This function is the inverse of :func:`num_to_str_for_name`. :param s: the string from the name :returns: an integer or float, depending on the number represented by `s` >>> name_str_to_num(num_to_str_for_name(1.1)) 1.1 >>> name_str_to_num(num_to_str_for_name(1)) 1 >>> name_str_to_num(num_to_str_for_name(-5e3)) -5000 >>> name_str_to_num(num_to_str_for_name(-6e-3)) -0.006 >>> name_str_to_num(num_to_str_for_name(100.0)) 100 >>> name_str_to_num(num_to_str_for_name(-1e-4)) -0.0001 """ return str_to_num(s.replace(MINUS_REPLACEMENT, "-") .replace(DECIMAL_DOT_REPLACEMENT, "."))
[docs] def beautify_float_str(s: str | float) -> str: """ Beautify the string representation of a float. This function beautifies the string representation of a float by using unicode superscripts for exponents. :param s: either a `float` or the string representation of a `float` :return: the beautified string representation >>> beautify_float_str('0.0') '0.0' >>> beautify_float_str('1e12') '1\u00d710\u00b9\u00b2' >>> beautify_float_str('1e-3') '1\u00d710\u207b\u00b3' >>> beautify_float_str('inf') '\u221e' >>> beautify_float_str('-inf') '-\u221e' >>> beautify_float_str('nan') '\u2205' """ if isinstance(s, float): s = float_to_str(s) if not isinstance(s, str): raise type_error(s, "s", str) s = s.strip().lower() if s in ("+inf", "inf"): return "\u221e" if s == "-inf": return "-\u221e" if s == "nan": return "\u2205" eidx: int = s.find("e") if eidx < 0: return s return f"{s[:eidx]}\u00d710{superscript(s[eidx + 1:])}"
def __replace_double(replace: str, src: str) -> str: """ Replace any double-occurrence of a string with a single occurrence. :param replace: the string to replace :param src: the source string :returns: the updated string """ return replace_str(replace + replace, replace, src) #: the separator of different filename parts PART_SEPARATOR: Final[str] = "_" #: the replacement for "." in a file name DECIMAL_DOT_REPLACEMENT: Final[str] = "d" #: the replacement for "-" in a file name MINUS_REPLACEMENT: Final[str] = "m" #: the replacement for "+" in a file name PLUS_REPLACEMENT: Final[str] = "p" #: a pattern used during name sanitization __PATTERN_SPACE_BEFORE_MINUS: Final[Pattern] = _compile(r"[^\w\s-]") #: the multiple-whitespace pattern __PATTERN_MULTIPLE_WHITESPACE: Final[Pattern] = _compile(r"\s+")
[docs] def sanitize_name(name: str) -> str: """ Sanitize a name in such a way that it can be used as path component. >>> sanitize_name(" hello world ") 'hello_world' >>> sanitize_name(" 56.6-455 ") '56d6m455' >>> sanitize_name(" _ i _ am _ funny --6 _ ") 'i_am_funny_m6' :param name: the name that should be sanitized :return: the sanitized name :raises ValueError: if the name is invalid or empty :raises TypeError: if the name is `None` or not a string """ if not isinstance(name, str): raise type_error(name, "name", str) orig_name = name name = name.strip() name = __replace_double("-", name).replace("+", PLUS_REPLACEMENT) name = __replace_double("+", name).replace("-", MINUS_REPLACEMENT) name = __replace_double("_", name) name = __replace_double(".", name).replace(".", DECIMAL_DOT_REPLACEMENT) name = sub(__PATTERN_SPACE_BEFORE_MINUS, "", name) name = sub(__PATTERN_MULTIPLE_WHITESPACE, PART_SEPARATOR, name) name = __replace_double("_", name) name = name.removeprefix("_").removesuffix("_") if len(name) <= 0: raise ValueError( f"Sanitized name must not become empty, but {orig_name!r} does.") return name
[docs] def sanitize_names(names: Iterable[str]) -> str: """ Sanitize a set of names. >>> sanitize_names(["", " sdf ", "", "5-3"]) 'sdf_5m3' >>> sanitize_names([" a ", " b", " c", "", "6", ""]) 'a_b_c_6' :param names: the list of names. :return: the sanitized name """ return PART_SEPARATOR.join([ sanitize_name(name) for name in names if len(name) > 0])