Source code for moptipy.utils.strings

"""Routines for handling strings."""

from re import compile as _compile
from re import sub
from typing import Final, Iterable, Pattern

from pycommons.strings.string_conv import float_to_str, num_to_str, str_to_num
from pycommons.strings.tools import replace_str
from pycommons.types import type_error


[docs] def num_to_str_for_name(x: int | float) -> str: """ Convert a float to a string for use in a component name. This function can be inverted by applying :func:`name_str_to_num`. :param x: the float :returns: the string >>> num_to_str_for_name(1.3) '1d3' >>> num_to_str_for_name(1.0) '1' >>> num_to_str_for_name(-7) 'm7' >>> num_to_str_for_name(-6.32) 'm6d32' >>> num_to_str_for_name(-1e-5) 'm1em5' """ return num_to_str(x).replace(".", DECIMAL_DOT_REPLACEMENT) \ .replace("-", MINUS_REPLACEMENT)
[docs] def name_str_to_num(s: str) -> int | float: """ Convert a string from a name to a number. This function is the inverse of :func:`num_to_str_for_name`. :param s: the string from the name :returns: an integer or float, depending on the number represented by `s` >>> name_str_to_num(num_to_str_for_name(1.1)) 1.1 >>> name_str_to_num(num_to_str_for_name(1)) 1 >>> name_str_to_num(num_to_str_for_name(-5e3)) -5000 >>> name_str_to_num(num_to_str_for_name(-6e-3)) -0.006 >>> name_str_to_num(num_to_str_for_name(100.0)) 100 >>> name_str_to_num(num_to_str_for_name(-1e-4)) -0.0001 """ return str_to_num(s.replace(MINUS_REPLACEMENT, "-") .replace(DECIMAL_DOT_REPLACEMENT, "."))
#: the internal table for converting normal characters to unicode superscripts __SUPERSCRIPT: Final = str.maketrans({ # numbers from 0 to 9 0x30: 0x2070, 0x31: 0x00b9, 0x32: 0x00b2, 0x33: 0x00b3, 0x34: 0x2074, 0x35: 0x2075, 0x36: 0x2076, 0x37: 0x2077, 0x38: 0x2078, 0x39: 0x2079, # +/-/=/(/) 0x2b: 0x207A, 0x2d: 0x207b, 0x3d: 0x207c, 0x28: 0x207d, 0x29: 0x207e, # lower case letters 0x61: 0x1d43, 0x62: 0x1d47, 0x63: 0x1d9c, 0x64: 0x1d48, 0x65: 0x1d49, 0x66: 0x1da0, 0x67: 0x1d4d, 0x6b: 0x1d4f, 0x6c: 0x1da9, 0x6d: 0x1d50, 0x6e: 0x207f, 0x6f: 0x1d52, 0x70: 0x1d56, 0x74: 0x1d57, 0x75: 0x1d58, 0x76: 0x1d5b, 0x7a: 0x1dbb, })
[docs] def superscript(s: str) -> str: """ Transform a string into Unicode-based superscript. :param s: the string :returns: the string in superscript >>> superscript("a0=4(e)") '\u1d43\u2070\u207c\u2074\u207d\u1d49\u207e' """ if not isinstance(s, str): raise type_error(s, "s", str) return s.translate(__SUPERSCRIPT)
[docs] def beautify_float_str(s: str | float) -> str: """ Beautify the string representation of a float. This function beautifies the string representation of a float by using unicode superscripts for exponents. :param s: either a `float` or the string representation of a `float` :return: the beautified string representation >>> beautify_float_str('0.0') '0.0' >>> beautify_float_str('1e12') '1\u00d710\u00b9\u00b2' >>> beautify_float_str('1e-3') '1\u00d710\u207b\u00b3' >>> beautify_float_str('inf') '\u221e' >>> beautify_float_str('-inf') '-\u221e' >>> beautify_float_str('nan') '\u2205' """ if isinstance(s, float): s = float_to_str(s) if not isinstance(s, str): raise type_error(s, "s", str) s = s.strip().lower() if s in ("+inf", "inf"): return "\u221e" if s == "-inf": return "-\u221e" if s == "nan": return "\u2205" eidx: int = s.find("e") if eidx < 0: return s return f"{s[:eidx]}\u00d710{s[eidx + 1:].translate(__SUPERSCRIPT)}"
def __replace_double(replace: str, src: str) -> str: """ Replace any double-occurrence of a string with a single occurrence. :param replace: the string to replace :param src: the source string :returns: the updated string """ return replace_str(replace + replace, replace, src) #: the separator of different filename parts PART_SEPARATOR: Final[str] = "_" #: the replacement for "." in a file name DECIMAL_DOT_REPLACEMENT: Final[str] = "d" #: the replacement for "-" in a file name MINUS_REPLACEMENT: Final[str] = "m" #: the replacement for "+" in a file name PLUS_REPLACEMENT: Final[str] = "p" #: a pattern used during name sanitization __PATTERN_SPACE_BEFORE_MINUS: Final[Pattern] = _compile(r"[^\w\s-]") #: the multiple-whitespace pattern __PATTERN_MULTIPLE_WHITESPACE: Final[Pattern] = _compile(r"\s+")
[docs] def sanitize_name(name: str) -> str: """ Sanitize a name in such a way that it can be used as path component. >>> sanitize_name(" hello world ") 'hello_world' >>> sanitize_name(" 56.6-455 ") '56d6m455' >>> sanitize_name(" _ i _ am _ funny --6 _ ") 'i_am_funny_m6' :param name: the name that should be sanitized :return: the sanitized name :raises ValueError: if the name is invalid or empty :raises TypeError: if the name is `None` or not a string """ if not isinstance(name, str): raise type_error(name, "name", str) orig_name = name name = name.strip() name = __replace_double("-", name).replace("+", PLUS_REPLACEMENT) name = __replace_double("+", name).replace("-", MINUS_REPLACEMENT) name = __replace_double("_", name) name = __replace_double(".", name).replace(".", DECIMAL_DOT_REPLACEMENT) name = sub(__PATTERN_SPACE_BEFORE_MINUS, "", name) name = sub(__PATTERN_MULTIPLE_WHITESPACE, PART_SEPARATOR, name) name = __replace_double("_", name) if name.startswith("_"): name = name[1:] if name.endswith("_"): name = name[:len(name) - 1] if len(name) <= 0: raise ValueError( f"Sanitized name must not become empty, but {orig_name!r} does.") return name
[docs] def sanitize_names(names: Iterable[str]) -> str: """ Sanitize a set of names. >>> sanitize_names(["", " sdf ", "", "5-3"]) 'sdf_5m3' >>> sanitize_names([" a ", " b", " c", "", "6", ""]) 'a_b_c_6' :param names: the list of names. :return: the sanitized name """ return PART_SEPARATOR.join([ sanitize_name(name) for name in names if len(name) > 0])