"""Regular-expression based command generation and invocation."""
from typing import Callable
import regex as reg # type: ignore
from bookbuilderpy.types import type_error
def __create_command_re(name: str, n: int = 1,
strip_white_space: bool = False) -> reg.Regex:
r"""
Create a Regular Expression for a LaTeX-Style Command.
A LaTeX-style command can be defined as an (recursive) regular expression.
The start of the command is indicated by `\name`. It then has `n`
arguments with `n>=0`. Each argument is wrapped into a `{` and a `}`.
Example: `\sub{1}{2}`.
Here we create a regular expression `cmd` that can match such a command.
It can be applied to a string `s` using
`sub(cmd, lambda g: g[1]+"-"+g[2], s)`, which would then return `{1}-{2}`
for `s="\sub{1}{2}"`.
Note that the expression will pass the curly braces of the arguments to
the command which later need to be stripped away if necessary.
:param name: the name of the command
:param n: the number of parameters
:param strip_white_space: should the white space around
the command be stripped
:return: a regular expression representing the command
>>> cmd = __create_command_re("y", 2, False)
>>> s = 'blabla\\y{1}{2}xaxa \\y{3}{4} zhdfg'
>>> reg.sub(cmd, lambda g: g[1], s)
'blabla{1}xaxa {3} zhdfg'
>>> reg.sub(cmd, lambda g: g[2], s)
'blabla{2}xaxa {4} zhdfg'
>>> s = 'blabla\\y{\\y{1}{2}}{3} \\y{3}{4}.'
>>> reg.sub(cmd, lambda g: g[1], s)
'blabla{\\y{1}{2}} {3}.'
>>> reg.sub(cmd, lambda g: g[2], reg.sub(cmd, lambda g: g[1], s))
'blabla{{2}} {3}.'
>>> cmd = __create_command_re("y", 0, True)
>>> reg.sub(cmd, "Z", "hello\n\\y df")
'helloZdf'
>>> cmd = __create_command_re("z", 3, True)
>>> reg.sub(cmd, lambda g: g[1]+g[3], "hello\n\\z{A x}{b k}{z} df")
'hello{A x}{z}df'
>>> reg.sub(cmd, lambda g: g[1]+g[3], "hello\n\\z{{A x}}{b k}{z} df")
'hello{{A x}}{z}df'
>>> cmd = __create_command_re("sub", 2, True)
>>> reg.sub(cmd, lambda g: g[1]+"-"+g[2], "a \\sub{1}{2} b")
'a{1}-{2}b'
"""
if not isinstance(name, str):
raise type_error(name, "name", str)
if len(name) <= 0:
raise ValueError(f"name cannot be '{name}'.")
if name in ("n", "r", "t", "x", "u"):
raise ValueError(f"invalid command name: '{name}'.")
if not isinstance(n, int):
raise type_error(n, "n", int)
if n < 0:
raise ValueError(f"n cannot be '{n}'.")
if not isinstance(strip_white_space, bool):
raise type_error(strip_white_space, "strip_white_space", bool)
# First, we build the regular expression, which makes sure that braces
# numbers match.
# Create the command the name.
regexpr: str = reg.escape(f"\\{name}")
# Add the parameter groups.
if n > 0:
regexpr += "".join(
["".join([r"(\{(?>[^\{\}]|(?",
str(i + 1), r"))*+\})"])
for i in range(n)])
# Add potential whitespace strippers.
if strip_white_space:
regexpr = "\\s*" + regexpr + "\\s*"
return reg.compile(
regexpr, flags=reg.V1 | reg.MULTILINE) # pylint: disable=E1101
def __strip_group(s: str) -> str:
"""
Strip a possible surrounding `{}` pair and any inner white space.
This is needed because the regular expressions returned by
:meth:`__create_command_re` cannot strip the surrounding `{}` from the
arguments. After the leading `{` and the trailing `}` are removed, the
remaining string will be stripped of leading and trailing white space.
:param s: the input string
:return: the sanitized string
>>> __strip_group("{ f}")
'f'
>>> __strip_group("{x }")
'x'
"""
if not isinstance(s, str):
raise type_error(s, "s", str)
if (len(s) <= 1) or (s[0] != "{") or (s[-1] != "}"):
raise ValueError(f"invalid argument '{s}'.")
return s[1:-1].strip()
[docs]def create_preprocessor(name: str,
func: Callable,
n: int = 1,
strip_white_space: bool = False,
wrap_in_newlines: int = 0) -> Callable:
r"""
Create a preprocessor command.
A LaTeX-style command can be defined as an (recursive) regular expression.
The start of the command is indicated by `\name`. It then has `n`
arguments with `n>=0`. Each argument is wrapped into a `{` and a `}`.
Example: `\sub{1}{2}`.
This function returns a function `f` which can be applied an arbitrary
string `s`. The function `f` will iteratively process all invocations
of `name` that appear in `s`, pass the extracted parameter values to
`func`, and replace the whole matched string with the return value of
`func`.
The command can appear nested in its arguments. In this case, the
preprocessor `f` will resolve the inner-most occurences first.
:param name: the command name
:param func: the function to call
:param n: the number of arguments to pass to func
:param strip_white_space: should surrounding white space be stripped?
:param wrap_in_newlines: the number of newlines into which the output
should be wrapped
:return: a function that can be invoked on a string and which replaces
all the occurences of the command with the results of corresponding
`func` invocations
>>> f = lambda a, b: a + "-" + b
>>> cmd = create_preprocessor("sub", f, 2)
>>> cmd("x \\sub{7}{3} y \\sub{\\sub{8}{5}}{\\sub{4}{3}}")
'x 7-3 y 8-5-4-3'
>>> cmd = create_preprocessor("mm", lambda: "Z", 0, True)
>>> cmd("a\\mm\\mm\\mmb")
'aZZZb'
>>> cmd = create_preprocessor("swp", lambda a, b: "("+b+","+a+")", 2)
>>> cmd("\\swp{1}{2}")
'(2,1)'
>>> cmd("\\swp{\\swp{1}{2}}{3}")
'(3,(2,1))'
>>> cmd("\\swp{\\swp{\\swp{1}{2}}{3}}{\\swp{4}{5}}")
'((5,4),(3,(2,1)))'
>>> cmd = create_preprocessor("y", lambda x: str(int(x)*2), 1)
>>> cmd("12\\y{3}4")
'1264'
>>> cmd = create_preprocessor("y", lambda x: f"a{x}b", 1,
... wrap_in_newlines=2)
>>> cmd("12\\y{3}4")
'12\n\na3b\n\n4'
"""
if not callable(func):
raise type_error(func, "func", call=True)
# Create the inner function that sanitizes the arguments and passes them on
# to func.
def __func(args, inner_n=n, inner_func=func,
nls="\n" * wrap_in_newlines if
(wrap_in_newlines > 0) else None) -> str:
if inner_n == 0:
ret = inner_func()
else:
groups = args.groups()
if len(groups) != inner_n:
raise ValueError(
f"Expected {inner_n} groups, got {len(groups)}.")
ret = inner_func(*[__strip_group(g) for g in groups])
if not isinstance(ret, str):
raise type_error(ret, "return value", str)
if nls:
ret = ret.strip()
return nls if len(ret) <= 0 else f"{nls}{ret}{nls}"
return ret
# Create the actual command function that can be invoked and that
# recursively resolves all instances of the command name.
def __cmd(s: str,
regex=__create_command_re(
name=name, n=n, strip_white_space=strip_white_space),
inner_func=__func) -> str:
old = s
while True:
s = reg.sub(regex, inner_func, s)
if s == old:
return s
old = s
return __cmd