Coverage for moptipy / utils / strings.py: 92%
53 statements
« prev ^ index » next coverage.py v7.12.0, created at 2025-11-24 08:49 +0000
« prev ^ index » next coverage.py v7.12.0, created at 2025-11-24 08:49 +0000
1"""Routines for handling strings."""
3from re import compile as _compile
4from re import sub
5from typing import Final, Iterable, Pattern
7from pycommons.strings.chars import superscript
8from pycommons.strings.string_conv import float_to_str, num_to_str, str_to_num
9from pycommons.strings.string_tools import replace_str
10from pycommons.types import type_error
13def num_to_str_for_name(x: int | float) -> str:
14 """
15 Convert a float to a string for use in a component name.
17 This function can be inverted by applying :func:`name_str_to_num`.
19 :param x: the float
20 :returns: the string
22 >>> num_to_str_for_name(1.3)
23 '1d3'
24 >>> num_to_str_for_name(1.0)
25 '1'
26 >>> num_to_str_for_name(-7)
27 'm7'
28 >>> num_to_str_for_name(-6.32)
29 'm6d32'
30 >>> num_to_str_for_name(-1e-5)
31 'm1em5'
32 """
33 return num_to_str(x).replace(".", DECIMAL_DOT_REPLACEMENT) \
34 .replace("-", MINUS_REPLACEMENT)
37def name_str_to_num(s: str) -> int | float:
38 """
39 Convert a string from a name to a number.
41 This function is the inverse of :func:`num_to_str_for_name`.
43 :param s: the string from the name
44 :returns: an integer or float, depending on the number represented by
45 `s`
47 >>> name_str_to_num(num_to_str_for_name(1.1))
48 1.1
49 >>> name_str_to_num(num_to_str_for_name(1))
50 1
51 >>> name_str_to_num(num_to_str_for_name(-5e3))
52 -5000
53 >>> name_str_to_num(num_to_str_for_name(-6e-3))
54 -0.006
55 >>> name_str_to_num(num_to_str_for_name(100.0))
56 100
57 >>> name_str_to_num(num_to_str_for_name(-1e-4))
58 -0.0001
59 """
60 return str_to_num(s.replace(MINUS_REPLACEMENT, "-")
61 .replace(DECIMAL_DOT_REPLACEMENT, "."))
64def beautify_float_str(s: str | float) -> str:
65 """
66 Beautify the string representation of a float.
68 This function beautifies the string representation of a float by using
69 unicode superscripts for exponents.
71 :param s: either a `float` or the string representation of a `float`
72 :return: the beautified string representation
74 >>> beautify_float_str('0.0')
75 '0.0'
76 >>> beautify_float_str('1e12')
77 '1\u00d710\u00b9\u00b2'
78 >>> beautify_float_str('1e-3')
79 '1\u00d710\u207b\u00b3'
80 >>> beautify_float_str('inf')
81 '\u221e'
82 >>> beautify_float_str('-inf')
83 '-\u221e'
84 >>> beautify_float_str('nan')
85 '\u2205'
86 """
87 if isinstance(s, float):
88 s = float_to_str(s)
89 if not isinstance(s, str):
90 raise type_error(s, "s", str)
91 s = s.strip().lower()
92 if s in {"+inf", "inf"}:
93 return "\u221e"
94 if s == "-inf":
95 return "-\u221e"
96 if s == "nan":
97 return "\u2205"
98 eidx: int = s.find("e")
99 if eidx < 0:
100 return s
101 return f"{s[:eidx]}\u00d710{superscript(s[eidx + 1:])}"
104def __replace_double(replace: str, src: str) -> str:
105 """
106 Replace any double-occurrence of a string with a single occurrence.
108 :param replace: the string to replace
109 :param src: the source string
110 :returns: the updated string
111 """
112 return replace_str(replace + replace, replace, src)
115#: the separator of different filename parts
116PART_SEPARATOR: Final[str] = "_"
117#: the replacement for "." in a file name
118DECIMAL_DOT_REPLACEMENT: Final[str] = "d"
119#: the replacement for "-" in a file name
120MINUS_REPLACEMENT: Final[str] = "m"
121#: the replacement for "+" in a file name
122PLUS_REPLACEMENT: Final[str] = "p"
124#: a pattern used during name sanitization
125__PATTERN_SPACE_BEFORE_MINUS: Final[Pattern] = _compile(r"[^\w\s-]")
126#: the multiple-whitespace pattern
127__PATTERN_MULTIPLE_WHITESPACE: Final[Pattern] = _compile(r"\s+")
130def sanitize_name(name: str) -> str:
131 """
132 Sanitize a name in such a way that it can be used as path component.
134 >>> sanitize_name(" hello world ")
135 'hello_world'
136 >>> sanitize_name(" 56.6-455 ")
137 '56d6m455'
138 >>> sanitize_name(" _ i _ am _ funny --6 _ ")
139 'i_am_funny_m6'
141 :param name: the name that should be sanitized
142 :return: the sanitized name
143 :raises ValueError: if the name is invalid or empty
144 :raises TypeError: if the name is `None` or not a string
145 """
146 if not isinstance(name, str):
147 raise type_error(name, "name", str)
148 orig_name = name
149 name = name.strip()
150 name = __replace_double("-", name).replace("+", PLUS_REPLACEMENT)
151 name = __replace_double("+", name).replace("-", MINUS_REPLACEMENT)
152 name = __replace_double("_", name)
153 name = __replace_double(".", name).replace(".", DECIMAL_DOT_REPLACEMENT)
155 name = sub(__PATTERN_SPACE_BEFORE_MINUS, "", name)
156 name = sub(__PATTERN_MULTIPLE_WHITESPACE, PART_SEPARATOR, name)
157 name = __replace_double("_", name)
159 name = name.removeprefix("_").removesuffix("_")
160 if len(name) <= 0:
161 raise ValueError(
162 f"Sanitized name must not become empty, but {orig_name!r} does.")
164 return name
167def sanitize_names(names: Iterable[str]) -> str:
168 """
169 Sanitize a set of names.
171 >>> sanitize_names(["", " sdf ", "", "5-3"])
172 'sdf_5m3'
173 >>> sanitize_names([" a ", " b", " c", "", "6", ""])
174 'a_b_c_6'
176 :param names: the list of names.
177 :return: the sanitized name
178 """
179 return PART_SEPARATOR.join([
180 sanitize_name(name) for name in names if len(name) > 0])