Coverage for bookbuilderpy/source_tools.py: 84%
160 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-17 23:15 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-17 23:15 +0000
1"""In this file, we put some shared tools for rendering source codes."""
3import sys
4from typing import Callable, Iterable
6from bookbuilderpy.types import type_error
9def select_lines(code: Iterable[str],
10 lines: Iterable[int] | None = None,
11 labels: Iterable[str] | None = None,
12 line_comment_start: str = "#",
13 max_consecutive_empty_lines: int = 1) -> list[str]:
14 r"""
15 Select lines of source code based on labels and line indices.
17 First, we select all lines of the code we want to keep.
18 If labels are defined, then lines are kept as ranges or as single lines
19 for all pre-defined labels. Ranges may overlap and/or intersect.
20 Otherwise, all lines are selected in this step.
22 Then, if line numbers are provided, we selected the lines based on the
23 line numbers from the lines we have preserved.
25 Finally, leading and trailing empty lines as well as superfluous empty
26 lines are removed.
28 :param code: the code loaded from a file
29 :param lines: the lines to keep, or `None` if we keep all
30 :param labels: a list of labels marking start and
31 end of code snippets to include
32 :param line_comment_start: the string marking the line comment start
33 :param max_consecutive_empty_lines: the maximum number of permitted
34 consecutive empty lines
35 :return: the list of selected lines
37 >>> select_lines(["def a():", " b=c", " return x"])
38 ['def a():', ' b=c', ' return x']
39 >>> pc = ["# start x", "def a():", " b=c # -x", " return x", "# end x"]
40 >>> select_lines(pc, labels={"x"})
41 ['def a():', ' return x']
42 """
43 if not isinstance(code, Iterable):
44 raise type_error(code, "code", Iterable)
45 if not isinstance(max_consecutive_empty_lines, int):
46 raise type_error(
47 max_consecutive_empty_lines, "max_consecutive_empty_lines", int)
48 if max_consecutive_empty_lines < 0:
49 raise ValueError("max_consecutive_empty_lines must be >= 0, but is "
50 f"{max_consecutive_empty_lines}.")
51 if not isinstance(line_comment_start, str):
52 raise type_error(line_comment_start, "line_comment_start", str)
53 if not line_comment_start:
54 raise ValueError("line_comment_start cannot be "
55 f"'{line_comment_start}'.")
57 keep_lines: list[str]
59 # make sure that labels are unique and non-empty
60 label_str: list[str] | None = None
61 if labels is not None:
62 if not isinstance(labels, Iterable):
63 raise type_error(labels, "labels", Iterable)
64 label_lst = list(labels)
65 label_lst.sort()
66 label_str = list({label.strip() for label in label_lst})
67 label_str.sort()
68 if label_lst != label_str:
69 raise ValueError(
70 f"Invalid label spec {label_lst} of length {len(label_lst)},"
71 f" leading to unique label set {label_str} of length "
72 f"{len(label_str)}.")
73 del label_lst
74 if label_str and not (label_str[0]):
75 raise ValueError(f"Empty label in {labels}.")
77 # process all labels, if any are specified
78 if label_str:
79 keep_lines = []
81 start_labels = [f"{line_comment_start} start {label}"
82 for label in label_str]
83 end_labels = [f"{line_comment_start} end {label}"
84 for label in label_str]
85 add_labels = [f"{line_comment_start} +{label}" for label in label_str]
86 del_labels = [f"{line_comment_start} -{label}" for label in label_str]
87 all_labels = set(start_labels + end_labels + add_labels + del_labels)
88 if len(all_labels) != (4 * len(label_str)):
89 raise ValueError("label clash? impossible?")
90 del all_labels
92 active_labels: set[int] = set() # the active label ranges
93 current_line_labels: set[int] = set() # labels of the current line
94 done_labels: set[int] = set() # the labes for which text as retained
96 for line, the_cl in enumerate(code): # iterate over all code lines
97 cl = the_cl.rstrip()
99 # first, we need to update the state
100 current_line_labels.clear()
101 current_line_labels.update(active_labels)
102 found_mark: bool = True
103 while found_mark:
104 found_mark = False
106 # check all potential range starts
107 for i, lbl in enumerate(start_labels):
108 if cl.endswith(lbl):
109 cl = cl[:len(cl) - len(lbl)].rstrip()
110 if i in active_labels:
111 raise ValueError(
112 f"Label '{label_str[i]}' already active in "
113 f"line {line} with text '{cl}', cannot "
114 "start.")
115 active_labels.add(i)
116 found_mark = True
118 # check all potential range end
119 for i, lbl in enumerate(end_labels):
120 if cl.endswith(lbl):
121 cl = cl[:len(cl) - len(lbl)].rstrip()
122 if i not in active_labels:
123 raise ValueError(
124 f"Label '{label_str[i]}' not active in "
125 f"line {line} with text '{cl}', cannot "
126 "end.")
127 active_labels.remove(i)
128 current_line_labels.remove(i)
129 found_mark = True
131 # check all potential line add labels
132 for i, lbl in enumerate(add_labels):
133 if cl.endswith(lbl):
134 cl = cl[:len(cl) - len(lbl)].rstrip()
135 if i in current_line_labels:
136 raise ValueError(
137 f"Label '{label_str[i]}' already active in "
138 f"line {line} with text '{cl}', cannot "
139 "add.")
140 current_line_labels.add(i)
141 found_mark = True
143 # check all potential line deletion markers
144 for i, lbl in enumerate(del_labels):
145 if cl.endswith(lbl):
146 cl = cl[:len(cl) - len(lbl)].rstrip()
147 if i not in current_line_labels:
148 raise ValueError(
149 f"Label '{label_str[i]}' already active in "
150 f"line {line} with text '{cl}', cannot "
151 "delete.")
152 current_line_labels.remove(i)
153 found_mark = True
154 break
155 if found_mark:
156 continue
158 if current_line_labels:
159 keep_lines.append(cl)
160 done_labels.update(current_line_labels)
162 if not keep_lines:
163 raise ValueError(
164 f"Nothing is left over after applying labels {labels}.")
165 if len(done_labels) < len(label_str):
166 raise ValueError(
167 "Never had any text for labels "
168 f"{set(label_str).difference(done_labels)}.")
169 else:
170 keep_lines = [cl.rstrip() for cl in code]
172 if lines is not None: # select the lines we want to keep
173 if not isinstance(lines, Iterable):
174 raise type_error(lines, "lines", Iterable)
175 lines_ints = list(set(lines))
176 if not lines_ints:
177 raise ValueError(f"Empty lines provided: {lines}.")
178 lines_ints.sort()
179 keep_lines = [keep_lines[i] for i in lines_ints]
181 # remove leading empty lines
182 while keep_lines:
183 if not keep_lines[0]:
184 del keep_lines[0]
185 else:
186 break
188 # remove trailing empty lines
189 while keep_lines:
190 if not keep_lines[-1]:
191 del keep_lines[-1]
192 else:
193 break
195 # remove superfluous empty lines
196 empty_lines = 0
197 current = len(keep_lines)
198 while current > 0:
199 current -= 1
200 if keep_lines[current]:
201 empty_lines = 0
202 elif empty_lines >= max_consecutive_empty_lines:
203 del keep_lines[current]
204 else:
205 empty_lines += 1
207 if not keep_lines:
208 raise ValueError(f"Empty code resulting from {code} after applying "
209 f"labels {labels} and lines {lines}.?")
211 return keep_lines
214def format_empty_lines(lines: Iterable[str],
215 empty_before: Callable = lambda line: False,
216 no_empty_after: Callable = lambda line: False,
217 force_no_empty_after: Callable = lambda line: False,
218 max_consecutive_empty_lines: int = 1) -> list[str]:
219 """
220 Obtain a generator that strips any consecutive empty lines.
222 :param lines: the original line iterable
223 :param empty_before: a function checking whether an empty line
224 is required before a certain string
225 :param no_empty_after: a function checking whether an empty line
226 is prohibited after a string
227 :param force_no_empty_after: a function checking whether an empty
228 line is prohibited after a string
229 :param max_consecutive_empty_lines: the maximum number of permitted
230 consecutive empty lines
231 :return: the generation
233 >>> code = ["", "a", "", "b", "", "", "c", "", "", "", "d", "e", ""]
234 >>> format_empty_lines(code, max_consecutive_empty_lines=3)
235 ['a', '', 'b', '', '', 'c', '', '', '', 'd', 'e']
236 >>> format_empty_lines(code, max_consecutive_empty_lines=2)
237 ['a', '', 'b', '', '', 'c', '', '', 'd', 'e']
238 >>> format_empty_lines(code, max_consecutive_empty_lines=1)
239 ['a', '', 'b', '', 'c', '', 'd', 'e']
240 >>> format_empty_lines(code, max_consecutive_empty_lines=0)
241 ['a', 'b', 'c', 'd', 'e']
242 >>> format_empty_lines(code, max_consecutive_empty_lines=2,
243 ... no_empty_after=lambda s: s == "b")
244 ['a', '', 'b', 'c', '', '', 'd', 'e']
245 >>> format_empty_lines(code, max_consecutive_empty_lines=2,
246 ... no_empty_after=lambda s: s == "b",
247 ... empty_before=lambda s: s == "e")
248 ['a', '', 'b', 'c', '', '', 'd', '', 'e']
249 >>> format_empty_lines(code, max_consecutive_empty_lines=2,
250 ... no_empty_after=lambda s: s == "b",
251 ... empty_before=lambda s: s == "e",
252 ... force_no_empty_after=lambda s: s == "d")
253 ['a', '', 'b', 'c', '', '', 'd', 'e']
254 """
255 if not isinstance(max_consecutive_empty_lines, int):
256 raise type_error(
257 max_consecutive_empty_lines, "max_consecutive_empty_lines", int)
258 if max_consecutive_empty_lines < 0:
259 raise ValueError("max_consecutive_empty_lines must be >= 0, but is "
260 f"{max_consecutive_empty_lines}.")
261 if not callable(empty_before):
262 raise type_error(empty_before, "empty_before", call=True)
263 if not callable(no_empty_after):
264 raise type_error(no_empty_after, "no_empty_after", call=True)
265 if not callable(force_no_empty_after):
266 raise type_error(
267 force_no_empty_after, "force_no_empty_after", call=True)
269 result: list[str] = []
270 print_empty: int = 0
271 no_empty = True
272 force_no_empty = True
273 for the_line in lines:
274 line = the_line.rstrip()
275 ltr = line.lstrip()
277 if line:
278 if (not force_no_empty) \
279 and (empty_before(ltr)
280 or ((print_empty > 0)
281 and (max_consecutive_empty_lines > 0))):
282 result.extend([""] * max(1, min(print_empty,
283 max_consecutive_empty_lines)))
284 no_empty = no_empty_after(ltr)
285 force_no_empty = force_no_empty_after(ltr)
286 result.append(line)
287 print_empty = 0
288 continue
290 if force_no_empty or no_empty:
291 continue
293 print_empty += 1
295 if not result:
296 raise ValueError("No lines of text found.")
297 return result
300def strip_common_whitespace_prefix(lines: Iterable[str]) -> list[str]:
301 r"""
302 Strip a common whitespace prefix from a list of strings and merge them.
304 :param lines: the lines
305 :return: the code with the white space prefix stripped
307 >>> strip_common_whitespace_prefix([" a", " b"])
308 ['a', ' b']
309 >>> strip_common_whitespace_prefix([" a", " b"])
310 ['a', 'b']
311 >>> strip_common_whitespace_prefix([" a", " b"])
312 ['a', 'b']
313 >>> strip_common_whitespace_prefix([" a", " b", "c"])
314 [' a', ' b', 'c']
315 >>> strip_common_whitespace_prefix([" a", " b", "c"])
316 [' a', ' b', 'c']
317 >>> strip_common_whitespace_prefix([" a", " b", " c"])
318 ['a', 'b', ' c']
319 """
320 prefix_len = sys.maxsize
321 for line in lines:
322 ll = len(line)
323 if ll <= 0:
324 continue
325 for k in range(min(ll, prefix_len)):
326 if line[k] != " ":
327 prefix_len = k
328 break
329 if prefix_len > 0:
330 return [line[prefix_len:] for line in lines]
331 return list(lines)