Coverage for bookbuilderpy/preprocessor_commands.py: 79%

52 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-07-17 23:15 +0000

1"""Regular-expression based command generation and invocation.""" 

2 

3from typing import Callable 

4 

5import regex as reg # type: ignore 

6 

7from bookbuilderpy.types import type_error 

8 

9 

10def __create_command_re(name: str, n: int = 1, 

11 strip_white_space: bool = False) -> reg.Regex: 

12 r""" 

13 Create a Regular Expression for a LaTeX-Style Command. 

14 

15 A LaTeX-style command can be defined as an (recursive) regular expression. 

16 The start of the command is indicated by `\name`. It then has `n` 

17 arguments with `n>=0`. Each argument is wrapped into a `{` and a `}`. 

18 Example: `\sub{1}{2}`. 

19 

20 Here we create a regular expression `cmd` that can match such a command. 

21 It can be applied to a string `s` using 

22 `sub(cmd, lambda g: g[1]+"-"+g[2], s)`, which would then return `{1}-{2}` 

23 for `s="\sub{1}{2}"`. 

24 

25 Note that the expression will pass the curly braces of the arguments to 

26 the command which later need to be stripped away if necessary. 

27 

28 :param name: the name of the command 

29 :param n: the number of parameters 

30 :param strip_white_space: should the white space around 

31 the command be stripped 

32 :return: a regular expression representing the command 

33 

34 >>> cmd = __create_command_re("y", 2, False) 

35 >>> s = 'blabla\\y{1}{2}xaxa \\y{3}{4} zhdfg' 

36 >>> reg.sub(cmd, lambda g: g[1], s) 

37 'blabla{1}xaxa {3} zhdfg' 

38 >>> reg.sub(cmd, lambda g: g[2], s) 

39 'blabla{2}xaxa {4} zhdfg' 

40 >>> s = 'blabla\\y{\\y{1}{2}}{3} \\y{3}{4}.' 

41 >>> reg.sub(cmd, lambda g: g[1], s) 

42 'blabla{\\y{1}{2}} {3}.' 

43 >>> reg.sub(cmd, lambda g: g[2], reg.sub(cmd, lambda g: g[1], s)) 

44 'blabla{{2}} {3}.' 

45 >>> cmd = __create_command_re("y", 0, True) 

46 >>> reg.sub(cmd, "Z", "hello\n\\y df") 

47 'helloZdf' 

48 >>> cmd = __create_command_re("z", 3, True) 

49 >>> reg.sub(cmd, lambda g: g[1]+g[3], "hello\n\\z{A x}{b k}{z} df") 

50 'hello{A x}{z}df' 

51 >>> reg.sub(cmd, lambda g: g[1]+g[3], "hello\n\\z{{A x}}{b k}{z} df") 

52 'hello{{A x}}{z}df' 

53 >>> cmd = __create_command_re("sub", 2, True) 

54 >>> reg.sub(cmd, lambda g: g[1]+"-"+g[2], "a \\sub{1}{2} b") 

55 'a{1}-{2}b' 

56 """ 

57 if not isinstance(name, str): 

58 raise type_error(name, "name", str) 

59 if len(name) <= 0: 

60 raise ValueError(f"name cannot be '{name}'.") 

61 if name in ("n", "r", "t", "x", "u"): 

62 raise ValueError(f"invalid command name: '{name}'.") 

63 if not isinstance(n, int): 

64 raise type_error(n, "n", int) 

65 if n < 0: 

66 raise ValueError(f"n cannot be '{n}'.") 

67 if not isinstance(strip_white_space, bool): 

68 raise type_error(strip_white_space, "strip_white_space", bool) 

69 

70 # First, we build the regular expression, which makes sure that braces 

71 # numbers match. 

72 

73 # Create the command the name. 

74 regexpr: str = reg.escape(f"\\{name}") 

75 

76 # Add the parameter groups. 

77 if n > 0: 

78 regexpr += "".join( 

79 ["".join([r"(\{(?>[^\{\}]|(?", 

80 str(i + 1), r"))*+\})"]) 

81 for i in range(n)]) 

82 

83 # Add potential whitespace strippers. 

84 if strip_white_space: 

85 regexpr = "\\s*" + regexpr + "\\s*" 

86 

87 return reg.compile( 

88 regexpr, flags=reg.V1 | reg.MULTILINE) # pylint: disable=E1101 

89 

90 

91def __strip_group(s: str) -> str: 

92 """ 

93 Strip a possible surrounding `{}` pair and any inner white space. 

94 

95 This is needed because the regular expressions returned by 

96 :meth:`__create_command_re` cannot strip the surrounding `{}` from the 

97 arguments. After the leading `{` and the trailing `}` are removed, the 

98 remaining string will be stripped of leading and trailing white space. 

99 

100 :param s: the input string 

101 :return: the sanitized string 

102 

103 >>> __strip_group("{ f}") 

104 'f' 

105 >>> __strip_group("{x }") 

106 'x' 

107 """ 

108 if not isinstance(s, str): 

109 raise type_error(s, "s", str) 

110 if (len(s) <= 1) or (s[0] != "{") or (s[-1] != "}"): 

111 raise ValueError(f"invalid argument '{s}'.") 

112 return s[1:-1].strip() 

113 

114 

115def create_preprocessor(name: str, 

116 func: Callable, 

117 n: int = 1, 

118 strip_white_space: bool = False, 

119 wrap_in_newlines: int = 0) -> Callable: 

120 r""" 

121 Create a preprocessor command. 

122 

123 A LaTeX-style command can be defined as an (recursive) regular expression. 

124 The start of the command is indicated by `\name`. It then has `n` 

125 arguments with `n>=0`. Each argument is wrapped into a `{` and a `}`. 

126 Example: `\sub{1}{2}`. 

127 

128 This function returns a function `f` which can be applied an arbitrary 

129 string `s`. The function `f` will iteratively process all invocations 

130 of `name` that appear in `s`, pass the extracted parameter values to 

131 `func`, and replace the whole matched string with the return value of 

132 `func`. 

133 

134 The command can appear nested in its arguments. In this case, the 

135 preprocessor `f` will resolve the inner-most occurences first. 

136 

137 :param name: the command name 

138 :param func: the function to call 

139 :param n: the number of arguments to pass to func 

140 :param strip_white_space: should surrounding white space be stripped? 

141 :param wrap_in_newlines: the number of newlines into which the output 

142 should be wrapped 

143 :return: a function that can be invoked on a string and which replaces 

144 all the occurences of the command with the results of corresponding 

145 `func` invocations 

146 

147 >>> f = lambda a, b: a + "-" + b 

148 >>> cmd = create_preprocessor("sub", f, 2) 

149 >>> cmd("x \\sub{7}{3} y \\sub{\\sub{8}{5}}{\\sub{4}{3}}") 

150 'x 7-3 y 8-5-4-3' 

151 >>> cmd = create_preprocessor("mm", lambda: "Z", 0, True) 

152 >>> cmd("a\\mm\\mm\\mmb") 

153 'aZZZb' 

154 >>> cmd = create_preprocessor("swp", lambda a, b: "("+b+","+a+")", 2) 

155 >>> cmd("\\swp{1}{2}") 

156 '(2,1)' 

157 >>> cmd("\\swp{\\swp{1}{2}}{3}") 

158 '(3,(2,1))' 

159 >>> cmd("\\swp{\\swp{\\swp{1}{2}}{3}}{\\swp{4}{5}}") 

160 '((5,4),(3,(2,1)))' 

161 >>> cmd = create_preprocessor("y", lambda x: str(int(x)*2), 1) 

162 >>> cmd("12\\y{3}4") 

163 '1264' 

164 >>> cmd = create_preprocessor("y", lambda x: f"a{x}b", 1, 

165 ... wrap_in_newlines=2) 

166 >>> cmd("12\\y{3}4") 

167 '12\n\na3b\n\n4' 

168 """ 

169 if not callable(func): 

170 raise type_error(func, "func", call=True) 

171 

172 # Create the inner function that sanitizes the arguments and passes them on 

173 # to func. 

174 def __func(args, inner_n=n, inner_func=func, 

175 nls="\n" * wrap_in_newlines if 

176 (wrap_in_newlines > 0) else None) -> str: 

177 if inner_n == 0: 

178 ret = inner_func() 

179 else: 

180 groups = args.groups() 

181 if len(groups) != inner_n: 

182 raise ValueError( 

183 f"Expected {inner_n} groups, got {len(groups)}.") 

184 ret = inner_func(*[__strip_group(g) for g in groups]) 

185 if not isinstance(ret, str): 

186 raise type_error(ret, "return value", str) 

187 if nls: 

188 ret = ret.strip() 

189 return nls if len(ret) <= 0 else f"{nls}{ret}{nls}" 

190 return ret 

191 

192 # Create the actual command function that can be invoked and that 

193 # recursively resolves all instances of the command name. 

194 def __cmd(s: str, 

195 regex=__create_command_re( 

196 name=name, n=n, strip_white_space=strip_white_space), 

197 inner_func=__func) -> str: 

198 old = s 

199 while True: 

200 s = reg.sub(regex, inner_func, s) 

201 if s == old: 

202 return s 

203 old = s 

204 

205 return __cmd