Source code for latexgit.formatters.source_tools

"""In this file, we put some shared tools for rendering source codes."""

import re
import sys
from typing import Callable, Iterable

from pycommons.strings.enforce import enforce_non_empty_str
from pycommons.types import type_error

# the split pattern
__SPLT: re.Pattern = re.compile("[;,+\n\t ]", re.MULTILINE)


[docs] def split_line_choices(lines: str | None) -> list[int] | None: """ Split line choices to iterables of `int` or `None`. This function converts `1`-based line selections into `0`-based ones. :param lines: the line choices :return: the split line choices. >>> print(split_line_choices(None)) None >>> print(split_line_choices("")) None >>> print(split_line_choices(",")) None >>> split_line_choices("1") [0] >>> split_line_choices("1,2") [0, 1] >>> split_line_choices("1-5") [0, 1, 2, 3, 4] >>> split_line_choices("3;4-5;7-7;22+12-15;") [2, 3, 4, 6, 21, 11, 12, 13, 14] """ if lines is None: return None if not isinstance(lines, str): raise type_error(lines, "lines", (str, None)) sel: list[int] = [] for patf in __SPLT.split(lines): pats: str = patf.strip() if len(pats) <= 0: continue idx: int = pats.find("-") if idx < 0: sel.append(int(enforce_non_empty_str(pats)) - 1) continue start: int = int(enforce_non_empty_str(pats[:idx].strip())) end: int = int(enforce_non_empty_str(pats[idx + 1:].strip())) sel.extend(range(start - 1, end)) return sel if len(sel) > 0 else None
[docs] def split_labels(labels: str | None) -> set[str]: r""" Get a sequence of labels from a string. :param labels: the labels string or `None` :return: the labels set >>> print(split_labels(None)) set() >>> print(split_labels("")) set() >>> print(split_labels(",")) set() >>> sorted(split_labels("a;b;d;c")) ['a', 'b', 'c', 'd'] >>> sorted(split_labels("a,b c+a;a\td;c")) ['a', 'b', 'c', 'd'] """ sel: set[str] = set() if labels is None: return sel if not isinstance(labels, str): raise type_error(labels, "labels", (str, None)) for patf in __SPLT.split(labels): pats: str = patf.strip() if len(pats) <= 0: continue sel.add(pats) return sel
[docs] def select_lines(code: Iterable[str], lines: Iterable[int] | None = None, labels: Iterable[str] | None = None, line_comment_start: str = "#", max_consecutive_empty_lines: int = 2) -> list[str]: r""" Select lines of source code based on labels and line indices. First, we select all lines of the code we want to keep. If labels are defined, then lines are kept as ranges or as single lines for all pre-defined labels. Ranges may overlap and/or intersect. Otherwise, all lines are selected in this step. Then, if line numbers are provided, we selected the lines based on the line numbers from the lines we have preserved. Finally, leading and trailing empty lines as well as superfluous empty lines are removed. :param code: the code loaded from a file :param lines: the lines to keep, or `None` if we keep all :param labels: a list of labels marking start and end of code snippets to include :param line_comment_start: the string marking the line comment start :param max_consecutive_empty_lines: the maximum number of permitted consecutive empty lines :return: the list of selected lines >>> select_lines(["def a():", " b=c", " return x"]) ['def a():', ' b=c', ' return x'] >>> pc = ["# start x", "def a():", " b=c # -x", " return x", "# end x"] >>> select_lines(pc, labels={"x"}) ['def a():', ' return x'] >>> select_lines(["def a():", " b=c", " return x"], lines=[0, 2]) ['def a():', ' return x'] """ if not isinstance(code, Iterable): raise type_error(code, "code", Iterable) if not isinstance(max_consecutive_empty_lines, int): raise type_error( max_consecutive_empty_lines, "max_consecutive_empty_lines", int) if max_consecutive_empty_lines < 0: raise ValueError("max_consecutive_empty_lines must be >= 0, but is " f"{max_consecutive_empty_lines}.") if not isinstance(line_comment_start, str): raise type_error(line_comment_start, "line_comment_start", str) if not line_comment_start: raise ValueError("line_comment_start cannot be " f"'{line_comment_start}'.") keep_lines: list[str] # make sure that labels are unique and non-empty label_str: list[str] | None = None if labels is not None: if not isinstance(labels, Iterable): raise type_error(labels, "labels", Iterable) label_lst = list(labels) label_lst.sort() label_str = list({label.strip() for label in label_lst}) label_str.sort() if label_lst != label_str: raise ValueError( f"Invalid label spec {label_lst} of length {len(label_lst)}," f" leading to unique label set {label_str} of length " f"{len(label_str)}.") del label_lst if label_str and not (label_str[0]): raise ValueError(f"Empty label in {labels}.") # process all labels, if any are specified if label_str: keep_lines = [] start_labels = [f"{line_comment_start} start {label}" for label in label_str] end_labels = [f"{line_comment_start} end {label}" for label in label_str] add_labels = [f"{line_comment_start} +{label}" for label in label_str] del_labels = [f"{line_comment_start} -{label}" for label in label_str] all_labels = set(start_labels + end_labels + add_labels + del_labels) if len(all_labels) != (4 * len(label_str)): raise ValueError("label clash? impossible?") del all_labels active_labels: set[int] = set() # the active label ranges current_line_labels: set[int] = set() # labels of the current line done_labels: set[int] = set() # the labels for which text as retained for line, the_cl in enumerate(code): # iterate over all code lines cl = the_cl.rstrip() # first, we need to update the state current_line_labels.clear() current_line_labels.update(active_labels) found_mark: bool = True while found_mark: found_mark = False # check all potential range starts for i, lbl in enumerate(start_labels): if cl.endswith(lbl): cl = cl[:len(cl) - len(lbl)].rstrip() if i in active_labels: raise ValueError( f"Label '{label_str[i]}' already active in " f"line {line} with text '{cl}', cannot " "start.") active_labels.add(i) found_mark = True # check all potential range end for i, lbl in enumerate(end_labels): if cl.endswith(lbl): cl = cl[:len(cl) - len(lbl)].rstrip() if i not in active_labels: raise ValueError( f"Label '{label_str[i]}' not active in " f"line {line} with text '{cl}', cannot " "end.") active_labels.remove(i) current_line_labels.remove(i) found_mark = True # check all potential line add labels for i, lbl in enumerate(add_labels): if cl.endswith(lbl): cl = cl[:len(cl) - len(lbl)].rstrip() if i in current_line_labels: raise ValueError( f"Label '{label_str[i]}' already active in " f"line {line} with text '{cl}', cannot " "add.") current_line_labels.add(i) found_mark = True # check all potential line deletion markers for i, lbl in enumerate(del_labels): if cl.endswith(lbl): cl = cl[:len(cl) - len(lbl)].rstrip() if i not in current_line_labels: raise ValueError( f"Label '{label_str[i]}' already active in " f"line {line} with text '{cl}', cannot " "delete.") current_line_labels.remove(i) found_mark = True break if found_mark: continue if current_line_labels: keep_lines.append(cl) done_labels.update(current_line_labels) if not keep_lines: raise ValueError( f"Nothing is left over after applying labels {labels}.") if len(done_labels) < len(label_str): raise ValueError( "Never had any text for labels " f"{set(label_str).difference(done_labels)}.") else: keep_lines = [cl.rstrip() for cl in code] if lines is not None: # select the lines we want to keep if not isinstance(lines, Iterable): raise type_error(lines, "lines", Iterable) lines_ints = list(set(lines)) if not lines_ints: raise ValueError(f"Empty lines provided: {lines}.") lines_ints.sort() keep_lines = [keep_lines[i] for i in lines_ints] # remove leading empty lines while keep_lines: if not keep_lines[0]: del keep_lines[0] else: break # remove trailing empty lines while keep_lines: if not keep_lines[-1]: del keep_lines[-1] else: break # remove superfluous empty lines empty_lines = 0 current = len(keep_lines) while current > 0: current -= 1 if keep_lines[current]: empty_lines = 0 elif empty_lines >= max_consecutive_empty_lines: del keep_lines[current] else: empty_lines += 1 if not keep_lines: raise ValueError(f"Empty code resulting from {code} after applying " f"labels {labels} and lines {lines}.?") return keep_lines
[docs] def format_empty_lines(lines: Iterable[str], empty_before: Callable = lambda line: False, no_empty_after: Callable = lambda line: False, force_no_empty_after: Callable = lambda line: False, max_consecutive_empty_lines: int = 2) -> list[str]: """ Obtain a generator that strips any consecutive empty lines. :param lines: the original line iterable :param empty_before: a function checking whether an empty line is required before a certain string :param no_empty_after: a function checking whether an empty line is prohibited after a string :param force_no_empty_after: a function checking whether an empty line is prohibited after a string :param max_consecutive_empty_lines: the maximum number of permitted consecutive empty lines :return: the generation >>> code = ["", "a", "", "b", "", "", "c", "", "", "", "d", "e", ""] >>> format_empty_lines(code, max_consecutive_empty_lines=3) ['a', '', 'b', '', '', 'c', '', '', '', 'd', 'e'] >>> format_empty_lines(code, max_consecutive_empty_lines=2) ['a', '', 'b', '', '', 'c', '', '', 'd', 'e'] >>> format_empty_lines(code, max_consecutive_empty_lines=1) ['a', '', 'b', '', 'c', '', 'd', 'e'] >>> format_empty_lines(code, max_consecutive_empty_lines=0) ['a', 'b', 'c', 'd', 'e'] >>> format_empty_lines(code, max_consecutive_empty_lines=2, ... no_empty_after=lambda s: s == "b") ['a', '', 'b', 'c', '', '', 'd', 'e'] >>> format_empty_lines(code, max_consecutive_empty_lines=2, ... no_empty_after=lambda s: s == "b", ... empty_before=lambda s: s == "e") ['a', '', 'b', 'c', '', '', 'd', '', 'e'] >>> format_empty_lines(code, max_consecutive_empty_lines=2, ... no_empty_after=lambda s: s == "b", ... empty_before=lambda s: s == "e", ... force_no_empty_after=lambda s: s == "d") ['a', '', 'b', 'c', '', '', 'd', 'e'] """ if not isinstance(max_consecutive_empty_lines, int): raise type_error( max_consecutive_empty_lines, "max_consecutive_empty_lines", int) if max_consecutive_empty_lines < 0: raise ValueError("max_consecutive_empty_lines must be >= 0, but is " f"{max_consecutive_empty_lines}.") if not callable(empty_before): raise type_error(empty_before, "empty_before", call=True) if not callable(no_empty_after): raise type_error(no_empty_after, "no_empty_after", call=True) if not callable(force_no_empty_after): raise type_error( force_no_empty_after, "force_no_empty_after", call=True) result: list[str] = [] print_empty: int = 0 no_empty = True force_no_empty = True for the_line in lines: line = the_line.rstrip() ltr = line.lstrip() if line: if (not force_no_empty) \ and (empty_before(ltr) or ((print_empty > 0) and (max_consecutive_empty_lines > 0))): result.extend([""] * max(1, min(print_empty, max_consecutive_empty_lines))) no_empty = no_empty_after(ltr) force_no_empty = force_no_empty_after(ltr) result.append(line) print_empty = 0 continue if force_no_empty or no_empty: continue print_empty += 1 if not result: raise ValueError("No lines of text found.") return result
[docs] def strip_common_whitespace_prefix(lines: Iterable[str]) -> list[str]: r""" Strip a common whitespace prefix from a list of strings and merge them. :param lines: the lines :return: the code with the white space prefix stripped >>> strip_common_whitespace_prefix([" a", " b"]) ['a', ' b'] >>> strip_common_whitespace_prefix([" a", " b"]) ['a', 'b'] >>> strip_common_whitespace_prefix([" a", " b"]) ['a', 'b'] >>> strip_common_whitespace_prefix([" a", " b", "c"]) [' a', ' b', 'c'] >>> strip_common_whitespace_prefix([" a", " b", "c"]) [' a', ' b', 'c'] >>> strip_common_whitespace_prefix([" a", " b", " c"]) ['a', 'b', ' c'] """ prefix_len = sys.maxsize for line in lines: ll = len(line) if ll <= 0: continue for k in range(min(ll, prefix_len)): if line[k] != " ": prefix_len = k break if prefix_len > 0: return [line[prefix_len:] for line in lines] return list(lines)