Coverage for moptipy / utils / strings.py: 92%

53 statements  

« prev     ^ index     » next       coverage.py v7.12.0, created at 2025-11-24 08:49 +0000

1"""Routines for handling strings.""" 

2 

3from re import compile as _compile 

4from re import sub 

5from typing import Final, Iterable, Pattern 

6 

7from pycommons.strings.chars import superscript 

8from pycommons.strings.string_conv import float_to_str, num_to_str, str_to_num 

9from pycommons.strings.string_tools import replace_str 

10from pycommons.types import type_error 

11 

12 

13def num_to_str_for_name(x: int | float) -> str: 

14 """ 

15 Convert a float to a string for use in a component name. 

16 

17 This function can be inverted by applying :func:`name_str_to_num`. 

18 

19 :param x: the float 

20 :returns: the string 

21 

22 >>> num_to_str_for_name(1.3) 

23 '1d3' 

24 >>> num_to_str_for_name(1.0) 

25 '1' 

26 >>> num_to_str_for_name(-7) 

27 'm7' 

28 >>> num_to_str_for_name(-6.32) 

29 'm6d32' 

30 >>> num_to_str_for_name(-1e-5) 

31 'm1em5' 

32 """ 

33 return num_to_str(x).replace(".", DECIMAL_DOT_REPLACEMENT) \ 

34 .replace("-", MINUS_REPLACEMENT) 

35 

36 

37def name_str_to_num(s: str) -> int | float: 

38 """ 

39 Convert a string from a name to a number. 

40 

41 This function is the inverse of :func:`num_to_str_for_name`. 

42 

43 :param s: the string from the name 

44 :returns: an integer or float, depending on the number represented by 

45 `s` 

46 

47 >>> name_str_to_num(num_to_str_for_name(1.1)) 

48 1.1 

49 >>> name_str_to_num(num_to_str_for_name(1)) 

50 1 

51 >>> name_str_to_num(num_to_str_for_name(-5e3)) 

52 -5000 

53 >>> name_str_to_num(num_to_str_for_name(-6e-3)) 

54 -0.006 

55 >>> name_str_to_num(num_to_str_for_name(100.0)) 

56 100 

57 >>> name_str_to_num(num_to_str_for_name(-1e-4)) 

58 -0.0001 

59 """ 

60 return str_to_num(s.replace(MINUS_REPLACEMENT, "-") 

61 .replace(DECIMAL_DOT_REPLACEMENT, ".")) 

62 

63 

64def beautify_float_str(s: str | float) -> str: 

65 """ 

66 Beautify the string representation of a float. 

67 

68 This function beautifies the string representation of a float by using 

69 unicode superscripts for exponents. 

70 

71 :param s: either a `float` or the string representation of a `float` 

72 :return: the beautified string representation 

73 

74 >>> beautify_float_str('0.0') 

75 '0.0' 

76 >>> beautify_float_str('1e12') 

77 '1\u00d710\u00b9\u00b2' 

78 >>> beautify_float_str('1e-3') 

79 '1\u00d710\u207b\u00b3' 

80 >>> beautify_float_str('inf') 

81 '\u221e' 

82 >>> beautify_float_str('-inf') 

83 '-\u221e' 

84 >>> beautify_float_str('nan') 

85 '\u2205' 

86 """ 

87 if isinstance(s, float): 

88 s = float_to_str(s) 

89 if not isinstance(s, str): 

90 raise type_error(s, "s", str) 

91 s = s.strip().lower() 

92 if s in {"+inf", "inf"}: 

93 return "\u221e" 

94 if s == "-inf": 

95 return "-\u221e" 

96 if s == "nan": 

97 return "\u2205" 

98 eidx: int = s.find("e") 

99 if eidx < 0: 

100 return s 

101 return f"{s[:eidx]}\u00d710{superscript(s[eidx + 1:])}" 

102 

103 

104def __replace_double(replace: str, src: str) -> str: 

105 """ 

106 Replace any double-occurrence of a string with a single occurrence. 

107 

108 :param replace: the string to replace 

109 :param src: the source string 

110 :returns: the updated string 

111 """ 

112 return replace_str(replace + replace, replace, src) 

113 

114 

115#: the separator of different filename parts 

116PART_SEPARATOR: Final[str] = "_" 

117#: the replacement for "." in a file name 

118DECIMAL_DOT_REPLACEMENT: Final[str] = "d" 

119#: the replacement for "-" in a file name 

120MINUS_REPLACEMENT: Final[str] = "m" 

121#: the replacement for "+" in a file name 

122PLUS_REPLACEMENT: Final[str] = "p" 

123 

124#: a pattern used during name sanitization 

125__PATTERN_SPACE_BEFORE_MINUS: Final[Pattern] = _compile(r"[^\w\s-]") 

126#: the multiple-whitespace pattern 

127__PATTERN_MULTIPLE_WHITESPACE: Final[Pattern] = _compile(r"\s+") 

128 

129 

130def sanitize_name(name: str) -> str: 

131 """ 

132 Sanitize a name in such a way that it can be used as path component. 

133 

134 >>> sanitize_name(" hello world ") 

135 'hello_world' 

136 >>> sanitize_name(" 56.6-455 ") 

137 '56d6m455' 

138 >>> sanitize_name(" _ i _ am _ funny --6 _ ") 

139 'i_am_funny_m6' 

140 

141 :param name: the name that should be sanitized 

142 :return: the sanitized name 

143 :raises ValueError: if the name is invalid or empty 

144 :raises TypeError: if the name is `None` or not a string 

145 """ 

146 if not isinstance(name, str): 

147 raise type_error(name, "name", str) 

148 orig_name = name 

149 name = name.strip() 

150 name = __replace_double("-", name).replace("+", PLUS_REPLACEMENT) 

151 name = __replace_double("+", name).replace("-", MINUS_REPLACEMENT) 

152 name = __replace_double("_", name) 

153 name = __replace_double(".", name).replace(".", DECIMAL_DOT_REPLACEMENT) 

154 

155 name = sub(__PATTERN_SPACE_BEFORE_MINUS, "", name) 

156 name = sub(__PATTERN_MULTIPLE_WHITESPACE, PART_SEPARATOR, name) 

157 name = __replace_double("_", name) 

158 

159 name = name.removeprefix("_").removesuffix("_") 

160 if len(name) <= 0: 

161 raise ValueError( 

162 f"Sanitized name must not become empty, but {orig_name!r} does.") 

163 

164 return name 

165 

166 

167def sanitize_names(names: Iterable[str]) -> str: 

168 """ 

169 Sanitize a set of names. 

170 

171 >>> sanitize_names(["", " sdf ", "", "5-3"]) 

172 'sdf_5m3' 

173 >>> sanitize_names([" a ", " b", " c", "", "6", ""]) 

174 'a_b_c_6' 

175 

176 :param names: the list of names. 

177 :return: the sanitized name 

178 """ 

179 return PART_SEPARATOR.join([ 

180 sanitize_name(name) for name in names if len(name) > 0])