"""In this file, we put some shared tools for rendering source codes."""
import re
import sys
from typing import Callable, Iterable
from pycommons.strings.enforce import enforce_non_empty_str
from pycommons.types import type_error
# the split pattern
__SPLT: re.Pattern = re.compile("[;,+\n\t ]", re.MULTILINE)
[docs]
def split_line_choices(lines: str | None) -> list[int] | None:
"""
Split line choices to iterables of `int` or `None`.
This function converts `1`-based line selections into `0`-based ones.
:param lines: the line choices
:return: the split line choices.
>>> print(split_line_choices(None))
None
>>> print(split_line_choices(""))
None
>>> print(split_line_choices(","))
None
>>> split_line_choices("1")
[0]
>>> split_line_choices("1,2")
[0, 1]
>>> split_line_choices("1-5")
[0, 1, 2, 3, 4]
>>> split_line_choices("3;4-5;7-7;22+12-15;")
[2, 3, 4, 6, 21, 11, 12, 13, 14]
"""
if lines is None:
return None
if not isinstance(lines, str):
raise type_error(lines, "lines", (str, None))
sel: list[int] = []
for patf in __SPLT.split(lines):
pats: str = patf.strip()
if len(pats) <= 0:
continue
idx: int = pats.find("-")
if idx < 0:
sel.append(int(enforce_non_empty_str(pats)) - 1)
continue
start: int = int(enforce_non_empty_str(pats[:idx].strip()))
end: int = int(enforce_non_empty_str(pats[idx + 1:].strip()))
sel.extend(range(start - 1, end))
return sel if len(sel) > 0 else None
[docs]
def split_labels(labels: str | None) -> set[str]:
r"""
Get a sequence of labels from a string.
:param labels: the labels string or `None`
:return: the labels set
>>> print(split_labels(None))
set()
>>> print(split_labels(""))
set()
>>> print(split_labels(","))
set()
>>> sorted(split_labels("a;b;d;c"))
['a', 'b', 'c', 'd']
>>> sorted(split_labels("a,b c+a;a\td;c"))
['a', 'b', 'c', 'd']
"""
sel: set[str] = set()
if labels is None:
return sel
if not isinstance(labels, str):
raise type_error(labels, "labels", (str, None))
for patf in __SPLT.split(labels):
pats: str = patf.strip()
if len(pats) <= 0:
continue
sel.add(pats)
return sel
[docs]
def select_lines(code: Iterable[str],
lines: Iterable[int] | None = None,
labels: Iterable[str] | None = None,
line_comment_start: str = "#",
max_consecutive_empty_lines: int = 2) -> list[str]:
r"""
Select lines of source code based on labels and line indices.
First, we select all lines of the code we want to keep.
If labels are defined, then lines are kept as ranges or as single lines
for all pre-defined labels. Ranges may overlap and/or intersect.
Otherwise, all lines are selected in this step.
Then, if line numbers are provided, we selected the lines based on the
line numbers from the lines we have preserved.
Finally, leading and trailing empty lines as well as superfluous empty
lines are removed.
:param code: the code loaded from a file
:param lines: the lines to keep, or `None` if we keep all
:param labels: a list of labels marking start and
end of code snippets to include
:param line_comment_start: the string marking the line comment start
:param max_consecutive_empty_lines: the maximum number of permitted
consecutive empty lines
:return: the list of selected lines
>>> select_lines(["def a():", " b=c", " return x"])
['def a():', ' b=c', ' return x']
>>> pc = ["# start x", "def a():", " b=c # -x", " return x", "# end x"]
>>> select_lines(pc, labels={"x"})
['def a():', ' return x']
>>> select_lines(["def a():", " b=c", " return x"], lines=[0, 2])
['def a():', ' return x']
"""
if not isinstance(code, Iterable):
raise type_error(code, "code", Iterable)
if not isinstance(max_consecutive_empty_lines, int):
raise type_error(
max_consecutive_empty_lines, "max_consecutive_empty_lines", int)
if max_consecutive_empty_lines < 0:
raise ValueError("max_consecutive_empty_lines must be >= 0, but is "
f"{max_consecutive_empty_lines}.")
if not isinstance(line_comment_start, str):
raise type_error(line_comment_start, "line_comment_start", str)
if not line_comment_start:
raise ValueError("line_comment_start cannot be "
f"'{line_comment_start}'.")
keep_lines: list[str]
# make sure that labels are unique and non-empty
label_str: list[str] | None = None
if labels is not None:
if not isinstance(labels, Iterable):
raise type_error(labels, "labels", Iterable)
label_lst = list(labels)
label_lst.sort()
label_str = list({label.strip() for label in label_lst})
label_str.sort()
if label_lst != label_str:
raise ValueError(
f"Invalid label spec {label_lst} of length {len(label_lst)},"
f" leading to unique label set {label_str} of length "
f"{len(label_str)}.")
del label_lst
if label_str and not (label_str[0]):
raise ValueError(f"Empty label in {labels}.")
# process all labels, if any are specified
if label_str:
keep_lines = []
start_labels = [f"{line_comment_start} start {label}"
for label in label_str]
end_labels = [f"{line_comment_start} end {label}"
for label in label_str]
add_labels = [f"{line_comment_start} +{label}" for label in label_str]
del_labels = [f"{line_comment_start} -{label}" for label in label_str]
all_labels = set(start_labels + end_labels + add_labels + del_labels)
if len(all_labels) != (4 * len(label_str)):
raise ValueError("label clash? impossible?")
del all_labels
active_labels: set[int] = set() # the active label ranges
current_line_labels: set[int] = set() # labels of the current line
done_labels: set[int] = set() # the labels for which text as retained
for line, the_cl in enumerate(code): # iterate over all code lines
cl = the_cl.rstrip()
# first, we need to update the state
current_line_labels.clear()
current_line_labels.update(active_labels)
found_mark: bool = True
while found_mark:
found_mark = False
# check all potential range starts
for i, lbl in enumerate(start_labels):
if cl.endswith(lbl):
cl = cl[:len(cl) - len(lbl)].rstrip()
if i in active_labels:
raise ValueError(
f"Label '{label_str[i]}' already active in "
f"line {line} with text '{cl}', cannot "
"start.")
active_labels.add(i)
found_mark = True
# check all potential range end
for i, lbl in enumerate(end_labels):
if cl.endswith(lbl):
cl = cl[:len(cl) - len(lbl)].rstrip()
if i not in active_labels:
raise ValueError(
f"Label '{label_str[i]}' not active in "
f"line {line} with text '{cl}', cannot "
"end.")
active_labels.remove(i)
current_line_labels.remove(i)
found_mark = True
# check all potential line add labels
for i, lbl in enumerate(add_labels):
if cl.endswith(lbl):
cl = cl[:len(cl) - len(lbl)].rstrip()
if i in current_line_labels:
raise ValueError(
f"Label '{label_str[i]}' already active in "
f"line {line} with text '{cl}', cannot "
"add.")
current_line_labels.add(i)
found_mark = True
# check all potential line deletion markers
for i, lbl in enumerate(del_labels):
if cl.endswith(lbl):
cl = cl[:len(cl) - len(lbl)].rstrip()
if i not in current_line_labels:
raise ValueError(
f"Label '{label_str[i]}' already active in "
f"line {line} with text '{cl}', cannot "
"delete.")
current_line_labels.remove(i)
found_mark = True
break
if found_mark:
continue
if current_line_labels:
keep_lines.append(cl)
done_labels.update(current_line_labels)
if not keep_lines:
raise ValueError(
f"Nothing is left over after applying labels {labels}.")
if len(done_labels) < len(label_str):
raise ValueError(
"Never had any text for labels "
f"{set(label_str).difference(done_labels)}.")
else:
keep_lines = [cl.rstrip() for cl in code]
if lines is not None: # select the lines we want to keep
if not isinstance(lines, Iterable):
raise type_error(lines, "lines", Iterable)
lines_ints = list(set(lines))
if not lines_ints:
raise ValueError(f"Empty lines provided: {lines}.")
lines_ints.sort()
keep_lines = [keep_lines[i] for i in lines_ints]
# remove leading empty lines
while keep_lines:
if not keep_lines[0]:
del keep_lines[0]
else:
break
# remove trailing empty lines
while keep_lines:
if not keep_lines[-1]:
del keep_lines[-1]
else:
break
# remove superfluous empty lines
empty_lines = 0
current = len(keep_lines)
while current > 0:
current -= 1
if keep_lines[current]:
empty_lines = 0
elif empty_lines >= max_consecutive_empty_lines:
del keep_lines[current]
else:
empty_lines += 1
if not keep_lines:
raise ValueError(f"Empty code resulting from {code} after applying "
f"labels {labels} and lines {lines}.?")
return keep_lines
[docs]
def strip_common_whitespace_prefix(lines: Iterable[str]) -> list[str]:
r"""
Strip a common whitespace prefix from a list of strings and merge them.
:param lines: the lines
:return: the code with the white space prefix stripped
>>> strip_common_whitespace_prefix([" a", " b"])
['a', ' b']
>>> strip_common_whitespace_prefix([" a", " b"])
['a', 'b']
>>> strip_common_whitespace_prefix([" a", " b"])
['a', 'b']
>>> strip_common_whitespace_prefix([" a", " b", "c"])
[' a', ' b', 'c']
>>> strip_common_whitespace_prefix([" a", " b", "c"])
[' a', ' b', 'c']
>>> strip_common_whitespace_prefix([" a", " b", " c"])
['a', 'b', ' c']
"""
prefix_len = sys.maxsize
for line in lines:
ll = len(line)
if ll <= 0:
continue
for k in range(min(ll, prefix_len)):
if line[k] != " ":
prefix_len = k
break
if prefix_len > 0:
return [line[prefix_len:] for line in lines]
return list(lines)