Coverage for bookbuilderpy/preprocessor_commands.py: 79%
52 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-17 23:15 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-17 23:15 +0000
1"""Regular-expression based command generation and invocation."""
3from typing import Callable
5import regex as reg # type: ignore
7from bookbuilderpy.types import type_error
10def __create_command_re(name: str, n: int = 1,
11 strip_white_space: bool = False) -> reg.Regex:
12 r"""
13 Create a Regular Expression for a LaTeX-Style Command.
15 A LaTeX-style command can be defined as an (recursive) regular expression.
16 The start of the command is indicated by `\name`. It then has `n`
17 arguments with `n>=0`. Each argument is wrapped into a `{` and a `}`.
18 Example: `\sub{1}{2}`.
20 Here we create a regular expression `cmd` that can match such a command.
21 It can be applied to a string `s` using
22 `sub(cmd, lambda g: g[1]+"-"+g[2], s)`, which would then return `{1}-{2}`
23 for `s="\sub{1}{2}"`.
25 Note that the expression will pass the curly braces of the arguments to
26 the command which later need to be stripped away if necessary.
28 :param name: the name of the command
29 :param n: the number of parameters
30 :param strip_white_space: should the white space around
31 the command be stripped
32 :return: a regular expression representing the command
34 >>> cmd = __create_command_re("y", 2, False)
35 >>> s = 'blabla\\y{1}{2}xaxa \\y{3}{4} zhdfg'
36 >>> reg.sub(cmd, lambda g: g[1], s)
37 'blabla{1}xaxa {3} zhdfg'
38 >>> reg.sub(cmd, lambda g: g[2], s)
39 'blabla{2}xaxa {4} zhdfg'
40 >>> s = 'blabla\\y{\\y{1}{2}}{3} \\y{3}{4}.'
41 >>> reg.sub(cmd, lambda g: g[1], s)
42 'blabla{\\y{1}{2}} {3}.'
43 >>> reg.sub(cmd, lambda g: g[2], reg.sub(cmd, lambda g: g[1], s))
44 'blabla{{2}} {3}.'
45 >>> cmd = __create_command_re("y", 0, True)
46 >>> reg.sub(cmd, "Z", "hello\n\\y df")
47 'helloZdf'
48 >>> cmd = __create_command_re("z", 3, True)
49 >>> reg.sub(cmd, lambda g: g[1]+g[3], "hello\n\\z{A x}{b k}{z} df")
50 'hello{A x}{z}df'
51 >>> reg.sub(cmd, lambda g: g[1]+g[3], "hello\n\\z{{A x}}{b k}{z} df")
52 'hello{{A x}}{z}df'
53 >>> cmd = __create_command_re("sub", 2, True)
54 >>> reg.sub(cmd, lambda g: g[1]+"-"+g[2], "a \\sub{1}{2} b")
55 'a{1}-{2}b'
56 """
57 if not isinstance(name, str):
58 raise type_error(name, "name", str)
59 if len(name) <= 0:
60 raise ValueError(f"name cannot be '{name}'.")
61 if name in ("n", "r", "t", "x", "u"):
62 raise ValueError(f"invalid command name: '{name}'.")
63 if not isinstance(n, int):
64 raise type_error(n, "n", int)
65 if n < 0:
66 raise ValueError(f"n cannot be '{n}'.")
67 if not isinstance(strip_white_space, bool):
68 raise type_error(strip_white_space, "strip_white_space", bool)
70 # First, we build the regular expression, which makes sure that braces
71 # numbers match.
73 # Create the command the name.
74 regexpr: str = reg.escape(f"\\{name}")
76 # Add the parameter groups.
77 if n > 0:
78 regexpr += "".join(
79 ["".join([r"(\{(?>[^\{\}]|(?",
80 str(i + 1), r"))*+\})"])
81 for i in range(n)])
83 # Add potential whitespace strippers.
84 if strip_white_space:
85 regexpr = "\\s*" + regexpr + "\\s*"
87 return reg.compile(
88 regexpr, flags=reg.V1 | reg.MULTILINE) # pylint: disable=E1101
91def __strip_group(s: str) -> str:
92 """
93 Strip a possible surrounding `{}` pair and any inner white space.
95 This is needed because the regular expressions returned by
96 :meth:`__create_command_re` cannot strip the surrounding `{}` from the
97 arguments. After the leading `{` and the trailing `}` are removed, the
98 remaining string will be stripped of leading and trailing white space.
100 :param s: the input string
101 :return: the sanitized string
103 >>> __strip_group("{ f}")
104 'f'
105 >>> __strip_group("{x }")
106 'x'
107 """
108 if not isinstance(s, str):
109 raise type_error(s, "s", str)
110 if (len(s) <= 1) or (s[0] != "{") or (s[-1] != "}"):
111 raise ValueError(f"invalid argument '{s}'.")
112 return s[1:-1].strip()
115def create_preprocessor(name: str,
116 func: Callable,
117 n: int = 1,
118 strip_white_space: bool = False,
119 wrap_in_newlines: int = 0) -> Callable:
120 r"""
121 Create a preprocessor command.
123 A LaTeX-style command can be defined as an (recursive) regular expression.
124 The start of the command is indicated by `\name`. It then has `n`
125 arguments with `n>=0`. Each argument is wrapped into a `{` and a `}`.
126 Example: `\sub{1}{2}`.
128 This function returns a function `f` which can be applied an arbitrary
129 string `s`. The function `f` will iteratively process all invocations
130 of `name` that appear in `s`, pass the extracted parameter values to
131 `func`, and replace the whole matched string with the return value of
132 `func`.
134 The command can appear nested in its arguments. In this case, the
135 preprocessor `f` will resolve the inner-most occurences first.
137 :param name: the command name
138 :param func: the function to call
139 :param n: the number of arguments to pass to func
140 :param strip_white_space: should surrounding white space be stripped?
141 :param wrap_in_newlines: the number of newlines into which the output
142 should be wrapped
143 :return: a function that can be invoked on a string and which replaces
144 all the occurences of the command with the results of corresponding
145 `func` invocations
147 >>> f = lambda a, b: a + "-" + b
148 >>> cmd = create_preprocessor("sub", f, 2)
149 >>> cmd("x \\sub{7}{3} y \\sub{\\sub{8}{5}}{\\sub{4}{3}}")
150 'x 7-3 y 8-5-4-3'
151 >>> cmd = create_preprocessor("mm", lambda: "Z", 0, True)
152 >>> cmd("a\\mm\\mm\\mmb")
153 'aZZZb'
154 >>> cmd = create_preprocessor("swp", lambda a, b: "("+b+","+a+")", 2)
155 >>> cmd("\\swp{1}{2}")
156 '(2,1)'
157 >>> cmd("\\swp{\\swp{1}{2}}{3}")
158 '(3,(2,1))'
159 >>> cmd("\\swp{\\swp{\\swp{1}{2}}{3}}{\\swp{4}{5}}")
160 '((5,4),(3,(2,1)))'
161 >>> cmd = create_preprocessor("y", lambda x: str(int(x)*2), 1)
162 >>> cmd("12\\y{3}4")
163 '1264'
164 >>> cmd = create_preprocessor("y", lambda x: f"a{x}b", 1,
165 ... wrap_in_newlines=2)
166 >>> cmd("12\\y{3}4")
167 '12\n\na3b\n\n4'
168 """
169 if not callable(func):
170 raise type_error(func, "func", call=True)
172 # Create the inner function that sanitizes the arguments and passes them on
173 # to func.
174 def __func(args, inner_n=n, inner_func=func,
175 nls="\n" * wrap_in_newlines if
176 (wrap_in_newlines > 0) else None) -> str:
177 if inner_n == 0:
178 ret = inner_func()
179 else:
180 groups = args.groups()
181 if len(groups) != inner_n:
182 raise ValueError(
183 f"Expected {inner_n} groups, got {len(groups)}.")
184 ret = inner_func(*[__strip_group(g) for g in groups])
185 if not isinstance(ret, str):
186 raise type_error(ret, "return value", str)
187 if nls:
188 ret = ret.strip()
189 return nls if len(ret) <= 0 else f"{nls}{ret}{nls}"
190 return ret
192 # Create the actual command function that can be invoked and that
193 # recursively resolves all instances of the command name.
194 def __cmd(s: str,
195 regex=__create_command_re(
196 name=name, n=n, strip_white_space=strip_white_space),
197 inner_func=__func) -> str:
198 old = s
199 while True:
200 s = reg.sub(regex, inner_func, s)
201 if s == old:
202 return s
203 old = s
205 return __cmd