Coverage for bookbuilderpy/strings.py: 62%
112 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-17 23:15 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-17 23:15 +0000
1"""Some utility methods for string processing."""
2import datetime
3import re
4import string
5from typing import (
6 Callable,
7 Final,
8 Iterable,
9 Pattern,
10)
11from urllib.parse import urlparse
13from bookbuilderpy.types import type_error
16def str_to_lines(text: str) -> list[str]:
17 r"""
18 Convert a string to an iterable of lines.
20 :param text: the original text string
21 :return: the lines
23 >>> str_to_lines("\n123\n 456\n789 \n 10\n\n")
24 ['', '123', ' 456', '789 ', ' 10', '', '']
25 """
26 if not isinstance(text, str):
27 raise type_error(text, "text", str)
28 return text.split("\n")
31def lines_to_str(lines: Iterable[str],
32 trailing_newline: bool = True) -> str:
33 r"""
34 Convert an iterable of strings to a single string.
36 :param lines: the lines
37 :param trailing_newline: should the re be a newline at the end?
38 :return: the single string
40 >>> lines_to_str(["a", "b", "", "c", ""], trailing_newline=True)
41 'a\nb\n\nc\n'
42 >>> lines_to_str(["a", "b", "", "c"], trailing_newline=True)
43 'a\nb\n\nc\n'
44 >>> lines_to_str(["a", "b", "", "c"], trailing_newline=False)
45 'a\nb\n\nc'
46 >>> lines_to_str(["a", "b", "", "c", ""], trailing_newline=False)
47 'a\nb\n\nc'
48 """
49 if not isinstance(lines, Iterable):
50 raise type_error(lines, "lines", Iterable)
52 res = "\n".join(lines).rstrip()
53 if trailing_newline:
54 return res + "\n"
55 return res
58def enforce_non_empty_str(text: str) -> str:
59 """
60 Enforce that a text is a non-empty string.
62 :param text: the text
63 :returns: the text
64 :raises TypeError: if `text` is not a `str`
65 :raises ValueError: if `text` is empty
66 """
67 if not isinstance(text, str):
68 raise type_error(text, "text", str)
69 if len(text) <= 0:
70 raise ValueError(f"Non-empty str expected, but got '{text}'.")
71 return text
74def enforce_non_empty_str_without_ws(text: str) -> str:
75 """
76 Enforce that a text is a non-empty string without white space.
78 :param text: the text
79 :returns: the text
80 :raises TypeError: if `text` is not a `str`
81 :raises ValueError: if `text` is empty or contains any white space
82 characters
83 """
84 text = enforce_non_empty_str(text)
85 if any(c in text for c in string.whitespace):
86 raise ValueError(
87 f"No white space allowed in string, but got '{text}'.")
88 return text
91def datetime_to_date_str(date: datetime.datetime) -> str:
92 """
93 Convert a datetime object to a date string.
95 :param date: the date
96 :return: the date string
97 """
98 if not isinstance(date, datetime.datetime):
99 raise type_error(date, "date", datetime.datetime)
100 return date.strftime("%Y\u2011%m\u2011%d")
103def datetime_to_datetime_str(date: datetime.datetime) -> str:
104 """
105 Convert a datetime object to a date-time string.
107 :param date: the date
108 :return: the date-time string
109 """
110 if not isinstance(date, datetime.datetime):
111 raise type_error(date, "date", datetime.datetime)
112 return date.strftime("%Y\u2011%m\u2011%d\u00a0%H:%M\u00a0%Z")
115def enforce_url(url: str) -> str:
116 """
117 Enforce that a string is a valid url.
119 :param url: the url
120 :return: the url
121 """
122 enforce_non_empty_str_without_ws(url)
123 if ".." in url:
124 raise ValueError(f"Invalid url '{url}', contains '..'.")
125 res = urlparse(url)
126 if res.scheme != "ssh":
127 if res.scheme not in ("http", "https"):
128 raise ValueError(f"Invalid scheme '{res.scheme}' in url '{url}'.")
129 if "@" in url:
130 raise ValueError(
131 f"Non-ssh URL must not contain '@', but '{url}' does")
132 enforce_non_empty_str_without_ws(res.netloc)
133 enforce_non_empty_str_without_ws(res.path)
134 return res.geturl()
137def get_prefix_str(str_list: tuple[str, ...] | list[str]) -> str:
138 r"""
139 Compute the common prefix string.
141 :param str_list: the list of strings
142 :return: the common prefix
144 >>> get_prefix_str(["abc", "acd"])
145 'a'
146 >>> get_prefix_str(["xyz", "gsdf"])
147 ''
148 >>> get_prefix_str([])
149 ''
150 >>> get_prefix_str(["abx"])
151 'abx'
152 >>> get_prefix_str(("\\relative.path", "\\relative.figure",
153 ... "\\relative.code"))
154 '\\relative.'
155 """
156 if len(str_list) <= 0:
157 return ""
158 prefix_str = ""
159 len_smallest_str = min([len(str_mem) for str_mem in str_list])
160 str_list_0 = str_list[0]
161 for i in range(len_smallest_str):
162 f = str_list_0[i]
163 if len([0 for ind in range(1, len(str_list))
164 if f != str_list[ind][i]]) > 0:
165 break
166 prefix_str += f
167 return prefix_str
170#: The language to locale dictionary for base locales.
171__LANG_DICT: Final[dict[str, str]] = {
172 "en": "en_US",
173 "zh": "zh_CN",
174 "cn": "zh_CN",
175 "tw": "zh_TW",
176 "de": "de_DE",
177 "fr": "fr_FR",
178 "it": "it_IT",
179 "ja": "ja_JP",
180 "ko": "ko_KR",
181 "pt": "pt_BR",
182 "es": "es_ES",
183}
186def lang_to_locale(lang: str) -> str:
187 """
188 Convert a language ID to a locale.
190 :param lang: the language id
191 :return: the locale
192 """
193 lang = enforce_non_empty_str_without_ws(lang)
194 if lang in __LANG_DICT:
195 return __LANG_DICT[lang]
196 if "-" in lang:
197 return "_".join(lang.split("-"))
198 return lang
201def file_size(size: int) -> str:
202 """
203 Convert a file size to a string.
205 :param size: the size
206 :return: the string
207 """
208 if isinstance(size, int) and (size >= 0):
209 if size <= 0:
210 return "0 B"
211 base_size: int = 1
212 for suffix in ["B", "KiB", "MiB", "GiB", "TiB", "PiB",
213 "EiB", "ZiB", "YiB"]:
214 ret_size = int((size + base_size - 1) / base_size)
215 if ret_size >= 1024:
216 base_size *= 1024
217 continue
218 return f"{ret_size} {suffix}"
219 raise ValueError(f"Invalid size: {size}.")
222#: The dictionary with "and" concatenations
223__AND_DICT: dict[str, tuple[str, str]] = {
224 "de": (" und ", " und "),
225 "en": (" and ", ", and "),
226}
229def to_string(obj,
230 locale: str | None = None,
231 use_seq_and: bool = True) -> str:
232 """
233 Convert any object to a string, try to use a proper locale.
235 :param obj: the input object
236 :param locale: the locale
237 :param use_seq_and: should we use "and" in sequences?
238 :return: the string representation
239 """
240 if obj is None:
241 return "None"
243 if isinstance(obj, str):
244 return obj.strip()
246 if isinstance(obj, Iterable):
247 merge = ", "
248 if (locale is not None) and (locale.startswith("zh")):
249 merge = ","
251 seq = [to_string(r, locale, use_seq_and).strip() for r in obj]
252 seql = len(seq)
253 if seql == 1:
254 return seq[0]
256 if use_seq_and and (locale is not None):
257 ands = __AND_DICT.get(locale, None)
258 if not ands:
259 ands = __AND_DICT.get(locale.split("_")[0], None)
260 if ands:
261 if seql == 2:
262 return ands[0].join(seq)
263 res = merge.join(seq[:-1])
264 return ands[1].join([res, seq[-1]])
266 return merge.join(seq)
268 return str(obj).strip()
271def regex_sub(search: str | Pattern,
272 replace: Callable | str,
273 inside: str) -> str:
274 r"""
275 Replace all occurrences of 'search' in 'inside' with 'replace'.
277 :param search: the regular expression to search
278 :param replace: the regular expression to replace it with
279 :param inside: the string in which to search/replace
280 :return: the new string after the recursive replacement
282 >>> regex_sub('[ \t]+\n', '\n', ' bla \nxyz\tabc\t\n')
283 ' bla\nxyz\tabc\n'
284 >>> regex_sub('[0-9]A', 'X', '23A7AA')
285 '2XXA'
286 """
287 while True:
288 text = re.sub(search, replace, inside, re.MULTILINE)
289 if text is inside:
290 return inside
291 inside = text