Coverage for texgit / formatters / source_tools.py: 82%
194 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-02-22 02:50 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-02-22 02:50 +0000
1"""In this file, we put some shared tools for rendering source codes."""
3import sys
4from re import MULTILINE, Pattern
5from re import compile as re_compile
6from typing import Callable, Final, Iterable
8from pycommons.strings.enforce import enforce_non_empty_str
9from pycommons.types import type_error
11# the split pattern
12__SPLT: Final[Pattern] = re_compile(r"[;,+\n\t ]", MULTILINE)
15def split_line_choices(lines: str | None) -> list[int] | None:
16 """
17 Split line choices to iterables of `int` or `None`.
19 This function converts `1`-based line selections into `0`-based ones.
21 :param lines: the line choices
22 :return: the split line choices.
24 >>> print(split_line_choices(None))
25 None
26 >>> print(split_line_choices(""))
27 None
28 >>> print(split_line_choices(","))
29 None
30 >>> split_line_choices("1")
31 [0]
32 >>> split_line_choices("1,2")
33 [0, 1]
34 >>> split_line_choices("1-5")
35 [0, 1, 2, 3, 4]
36 >>> split_line_choices("3;4-5;7-7;22+12-15;")
37 [2, 3, 4, 6, 21, 11, 12, 13, 14]
38 """
39 if lines is None:
40 return None
41 if not isinstance(lines, str):
42 raise type_error(lines, "lines", (str, None))
43 sel: list[int] = []
45 for patf in __SPLT.split(lines):
46 pats: str = patf.strip()
47 if len(pats) <= 0:
48 continue
49 idx: int = pats.find("-")
50 if idx < 0:
51 sel.append(int(enforce_non_empty_str(pats)) - 1)
52 continue
53 start: int = int(enforce_non_empty_str(pats[:idx].strip()))
54 end: int = int(enforce_non_empty_str(pats[idx + 1:].strip()))
55 sel.extend(range(start - 1, end))
57 return sel if len(sel) > 0 else None
60def split_labels(labels: str | None) -> set[str]:
61 r"""
62 Get a sequence of labels from a string.
64 :param labels: the labels string or `None`
65 :return: the labels set
67 >>> print(split_labels(None))
68 set()
69 >>> print(split_labels(""))
70 set()
71 >>> print(split_labels(","))
72 set()
73 >>> sorted(split_labels("a;b;d;c"))
74 ['a', 'b', 'c', 'd']
75 >>> sorted(split_labels("a,b c+a;a\td;c"))
76 ['a', 'b', 'c', 'd']
77 """
78 sel: set[str] = set()
79 if labels is None:
80 return sel
81 if not isinstance(labels, str):
82 raise type_error(labels, "labels", (str, None))
84 for patf in __SPLT.split(labels):
85 pats: str = patf.strip()
86 if len(pats) <= 0:
87 continue
88 sel.add(pats)
89 return sel
92def select_lines(code: Iterable[str],
93 lines: Iterable[int] | None = None,
94 labels: Iterable[str] | None = None,
95 line_comment_start: str = "#",
96 max_consecutive_empty_lines: int = 2) -> list[str]:
97 r"""
98 Select lines of source code based on labels and line indices.
100 First, we select all lines of the code we want to keep.
101 If labels are defined, then lines are kept as ranges or as single lines
102 for all pre-defined labels. Ranges may overlap and/or intersect.
103 Otherwise, all lines are selected in this step.
105 Then, if line numbers are provided, we selected the lines based on the
106 line numbers from the lines we have preserved.
108 Finally, leading and trailing empty lines as well as superfluous empty
109 lines are removed.
111 :param code: the code loaded from a file
112 :param lines: the lines to keep, or `None` if we keep all
113 :param labels: a list of labels marking start and
114 end of code snippets to include
115 :param line_comment_start: the string marking the line comment start
116 :param max_consecutive_empty_lines: the maximum number of permitted
117 consecutive empty lines
118 :return: the list of selected lines
120 >>> select_lines(["def a():", " b=c", " return x"])
121 ['def a():', ' b=c', ' return x']
122 >>> pc = ["# start x", "def a():", " b=c # -x", " return x", "# end x"]
123 >>> select_lines(pc, labels={"x"})
124 ['def a():', ' return x']
125 >>> select_lines(["def a():", " b=c", " return x"], lines=[0, 2])
126 ['def a():', ' return x']
127 """
128 if not isinstance(code, Iterable):
129 raise type_error(code, "code", Iterable)
130 if not isinstance(max_consecutive_empty_lines, int):
131 raise type_error(
132 max_consecutive_empty_lines, "max_consecutive_empty_lines", int)
133 if max_consecutive_empty_lines < 0:
134 raise ValueError("max_consecutive_empty_lines must be >= 0, but is "
135 f"{max_consecutive_empty_lines}.")
136 if not isinstance(line_comment_start, str):
137 raise type_error(line_comment_start, "line_comment_start", str)
138 if not line_comment_start:
139 raise ValueError("line_comment_start cannot be "
140 f"'{line_comment_start}'.")
142 keep_lines: list[str]
144 # make sure that labels are unique and non-empty
145 label_str: list[str] | None = None
146 if labels is not None:
147 if not isinstance(labels, Iterable):
148 raise type_error(labels, "labels", Iterable)
149 label_lst = list(labels)
150 label_lst.sort()
151 label_str = list({label.strip() for label in label_lst})
152 label_str.sort()
153 if label_lst != label_str:
154 raise ValueError(
155 f"Invalid label spec {label_lst} of length {len(label_lst)},"
156 f" leading to unique label set {label_str} of length "
157 f"{len(label_str)}.")
158 del label_lst
159 if label_str and not (label_str[0]):
160 raise ValueError(f"Empty label in {labels}.")
162 # process all labels, if any are specified
163 if label_str:
164 keep_lines = []
166 start_labels = [f"{line_comment_start} start {label}"
167 for label in label_str]
168 end_labels = [f"{line_comment_start} end {label}"
169 for label in label_str]
170 add_labels = [f"{line_comment_start} +{label}" for label in label_str]
171 del_labels = [f"{line_comment_start} -{label}" for label in label_str]
172 all_labels = set(start_labels + end_labels + add_labels + del_labels)
173 if len(all_labels) != (4 * len(label_str)):
174 raise ValueError("label clash? impossible?")
175 del all_labels
177 active_labels: set[int] = set() # the active label ranges
178 current_line_labels: set[int] = set() # labels of the current line
179 done_labels: set[int] = set() # the labels for which text as retained
181 for line, the_cl in enumerate(code): # iterate over all code lines
182 cl = the_cl.rstrip()
184 # first, we need to update the state
185 current_line_labels.clear()
186 current_line_labels.update(active_labels)
187 found_mark: bool = True
188 while found_mark:
189 found_mark = False
191 # check all potential range starts
192 for i, lbl in enumerate(start_labels):
193 if cl.endswith(lbl):
194 cl = cl[:len(cl) - len(lbl)].rstrip()
195 if i in active_labels:
196 raise ValueError(
197 f"Label '{label_str[i]}' already active in "
198 f"line {line} with text '{cl}', cannot "
199 "start.")
200 active_labels.add(i)
201 found_mark = True
203 # check all potential range end
204 for i, lbl in enumerate(end_labels):
205 if cl.endswith(lbl):
206 cl = cl[:len(cl) - len(lbl)].rstrip()
207 if i not in active_labels:
208 raise ValueError(
209 f"Label '{label_str[i]}' not active in "
210 f"line {line} with text '{cl}', cannot "
211 "end.")
212 active_labels.remove(i)
213 current_line_labels.remove(i)
214 found_mark = True
216 # check all potential line add labels
217 for i, lbl in enumerate(add_labels):
218 if cl.endswith(lbl):
219 cl = cl[:len(cl) - len(lbl)].rstrip()
220 if i in current_line_labels:
221 raise ValueError(
222 f"Label '{label_str[i]}' already active in "
223 f"line {line} with text '{cl}', cannot "
224 "add.")
225 current_line_labels.add(i)
226 found_mark = True
228 # check all potential line deletion markers
229 for i, lbl in enumerate(del_labels):
230 if cl.endswith(lbl):
231 cl = cl[:len(cl) - len(lbl)].rstrip()
232 if i not in current_line_labels:
233 raise ValueError(
234 f"Label '{label_str[i]}' already active in "
235 f"line {line} with text '{cl}', cannot "
236 "delete.")
237 current_line_labels.remove(i)
238 found_mark = True
239 break
240 if found_mark:
241 continue
243 if current_line_labels:
244 keep_lines.append(cl)
245 done_labels.update(current_line_labels)
247 if not keep_lines:
248 raise ValueError(
249 f"Nothing is left over after applying labels {labels}.")
250 if len(done_labels) < len(label_str):
251 raise ValueError(
252 "Never had any text for labels "
253 f"{set(label_str).difference(done_labels)}.")
254 else:
255 keep_lines = [cl.rstrip() for cl in code]
257 if lines is not None: # select the lines we want to keep
258 if not isinstance(lines, Iterable):
259 raise type_error(lines, "lines", Iterable)
260 lines_ints = list(set(lines))
261 if not lines_ints:
262 raise ValueError(f"Empty lines provided: {lines}.")
263 lines_ints.sort()
264 keep_lines = [keep_lines[i] for i in lines_ints]
266 # remove leading empty lines
267 while keep_lines:
268 if not keep_lines[0]:
269 del keep_lines[0]
270 else:
271 break
273 # remove trailing empty lines
274 while keep_lines:
275 if not keep_lines[-1]:
276 del keep_lines[-1]
277 else:
278 break
280 # remove superfluous empty lines
281 empty_lines = 0
282 current = len(keep_lines)
283 while current > 0:
284 current -= 1
285 if keep_lines[current]:
286 empty_lines = 0
287 elif empty_lines >= max_consecutive_empty_lines:
288 del keep_lines[current]
289 else:
290 empty_lines += 1
292 if not keep_lines:
293 raise ValueError(f"Empty code resulting from {code} after applying "
294 f"labels {labels} and lines {lines}.?")
296 return keep_lines
299def format_empty_lines(lines: Iterable[str],
300 empty_before: Callable = lambda _: False,
301 no_empty_after: Callable = lambda _: False,
302 force_no_empty_after: Callable = lambda _: False,
303 max_consecutive_empty_lines: int = 2) -> list[str]:
304 """
305 Obtain a generator that strips any consecutive empty lines.
307 :param lines: the original line iterable
308 :param empty_before: a function checking whether an empty line
309 is required before a certain string
310 :param no_empty_after: a function checking whether an empty line
311 is prohibited after a string
312 :param force_no_empty_after: a function checking whether an empty
313 line is prohibited after a string
314 :param max_consecutive_empty_lines: the maximum number of permitted
315 consecutive empty lines
316 :return: the generation
318 >>> code = ["", "a", "", "b", "", "", "c", "", "", "", "d", "e", ""]
319 >>> format_empty_lines(code, max_consecutive_empty_lines=3)
320 ['a', '', 'b', '', '', 'c', '', '', '', 'd', 'e']
321 >>> format_empty_lines(code, max_consecutive_empty_lines=2)
322 ['a', '', 'b', '', '', 'c', '', '', 'd', 'e']
323 >>> format_empty_lines(code, max_consecutive_empty_lines=1)
324 ['a', '', 'b', '', 'c', '', 'd', 'e']
325 >>> format_empty_lines(code, max_consecutive_empty_lines=0)
326 ['a', 'b', 'c', 'd', 'e']
327 >>> format_empty_lines(code, max_consecutive_empty_lines=2,
328 ... no_empty_after=lambda s: s == "b")
329 ['a', '', 'b', 'c', '', '', 'd', 'e']
330 >>> format_empty_lines(code, max_consecutive_empty_lines=2,
331 ... no_empty_after=lambda s: s == "b",
332 ... empty_before=lambda s: s == "e")
333 ['a', '', 'b', 'c', '', '', 'd', '', 'e']
334 >>> format_empty_lines(code, max_consecutive_empty_lines=2,
335 ... no_empty_after=lambda s: s == "b",
336 ... empty_before=lambda s: s == "e",
337 ... force_no_empty_after=lambda s: s == "d")
338 ['a', '', 'b', 'c', '', '', 'd', 'e']
339 """
340 if not isinstance(max_consecutive_empty_lines, int):
341 raise type_error(
342 max_consecutive_empty_lines, "max_consecutive_empty_lines", int)
343 if max_consecutive_empty_lines < 0:
344 raise ValueError("max_consecutive_empty_lines must be >= 0, but is "
345 f"{max_consecutive_empty_lines}.")
346 if not callable(empty_before):
347 raise type_error(empty_before, "empty_before", call=True)
348 if not callable(no_empty_after):
349 raise type_error(no_empty_after, "no_empty_after", call=True)
350 if not callable(force_no_empty_after):
351 raise type_error(
352 force_no_empty_after, "force_no_empty_after", call=True)
354 result: list[str] = []
355 print_empty: int = 0
356 no_empty = True
357 force_no_empty = True
358 for the_line in lines:
359 line = the_line.rstrip()
360 ltr = line.lstrip()
362 if line:
363 if (not force_no_empty) \
364 and (empty_before(ltr)
365 or ((print_empty > 0)
366 and (max_consecutive_empty_lines > 0))):
367 result.extend([""] * max(1, min(print_empty,
368 max_consecutive_empty_lines)))
369 no_empty = no_empty_after(ltr)
370 force_no_empty = force_no_empty_after(ltr)
371 result.append(line)
372 print_empty = 0
373 continue
375 if force_no_empty or no_empty:
376 continue
378 print_empty += 1
380 if not result:
381 raise ValueError("No lines of text found.")
382 return result
385def strip_common_whitespace_prefix(lines: Iterable[str]) -> list[str]:
386 r"""
387 Strip a common whitespace prefix from a list of strings and merge them.
389 :param lines: the lines
390 :return: the code with the white space prefix stripped
392 >>> strip_common_whitespace_prefix([" a", " b"])
393 ['a', ' b']
394 >>> strip_common_whitespace_prefix([" a", " b"])
395 ['a', 'b']
396 >>> strip_common_whitespace_prefix([" a", " b"])
397 ['a', 'b']
398 >>> strip_common_whitespace_prefix([" a", " b", "c"])
399 [' a', ' b', 'c']
400 >>> strip_common_whitespace_prefix([" a", " b", "c"])
401 [' a', ' b', 'c']
402 >>> strip_common_whitespace_prefix([" a", " b", " c"])
403 ['a', 'b', ' c']
404 """
405 prefix_len = sys.maxsize
406 for line in lines:
407 ll = len(line)
408 if ll <= 0:
409 continue
410 for k in range(min(ll, prefix_len)):
411 if line[k] != " ":
412 prefix_len = k
413 break
414 if prefix_len > 0:
415 return [line[prefix_len:] for line in lines]
416 return list(lines)