Coverage for bookbuilderpy/source_tools.py: 84%

160 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-07-17 23:15 +0000

1"""In this file, we put some shared tools for rendering source codes.""" 

2 

3import sys 

4from typing import Callable, Iterable 

5 

6from bookbuilderpy.types import type_error 

7 

8 

9def select_lines(code: Iterable[str], 

10 lines: Iterable[int] | None = None, 

11 labels: Iterable[str] | None = None, 

12 line_comment_start: str = "#", 

13 max_consecutive_empty_lines: int = 1) -> list[str]: 

14 r""" 

15 Select lines of source code based on labels and line indices. 

16 

17 First, we select all lines of the code we want to keep. 

18 If labels are defined, then lines are kept as ranges or as single lines 

19 for all pre-defined labels. Ranges may overlap and/or intersect. 

20 Otherwise, all lines are selected in this step. 

21 

22 Then, if line numbers are provided, we selected the lines based on the 

23 line numbers from the lines we have preserved. 

24 

25 Finally, leading and trailing empty lines as well as superfluous empty 

26 lines are removed. 

27 

28 :param code: the code loaded from a file 

29 :param lines: the lines to keep, or `None` if we keep all 

30 :param labels: a list of labels marking start and 

31 end of code snippets to include 

32 :param line_comment_start: the string marking the line comment start 

33 :param max_consecutive_empty_lines: the maximum number of permitted 

34 consecutive empty lines 

35 :return: the list of selected lines 

36 

37 >>> select_lines(["def a():", " b=c", " return x"]) 

38 ['def a():', ' b=c', ' return x'] 

39 >>> pc = ["# start x", "def a():", " b=c # -x", " return x", "# end x"] 

40 >>> select_lines(pc, labels={"x"}) 

41 ['def a():', ' return x'] 

42 """ 

43 if not isinstance(code, Iterable): 

44 raise type_error(code, "code", Iterable) 

45 if not isinstance(max_consecutive_empty_lines, int): 

46 raise type_error( 

47 max_consecutive_empty_lines, "max_consecutive_empty_lines", int) 

48 if max_consecutive_empty_lines < 0: 

49 raise ValueError("max_consecutive_empty_lines must be >= 0, but is " 

50 f"{max_consecutive_empty_lines}.") 

51 if not isinstance(line_comment_start, str): 

52 raise type_error(line_comment_start, "line_comment_start", str) 

53 if not line_comment_start: 

54 raise ValueError("line_comment_start cannot be " 

55 f"'{line_comment_start}'.") 

56 

57 keep_lines: list[str] 

58 

59 # make sure that labels are unique and non-empty 

60 label_str: list[str] | None = None 

61 if labels is not None: 

62 if not isinstance(labels, Iterable): 

63 raise type_error(labels, "labels", Iterable) 

64 label_lst = list(labels) 

65 label_lst.sort() 

66 label_str = list({label.strip() for label in label_lst}) 

67 label_str.sort() 

68 if label_lst != label_str: 

69 raise ValueError( 

70 f"Invalid label spec {label_lst} of length {len(label_lst)}," 

71 f" leading to unique label set {label_str} of length " 

72 f"{len(label_str)}.") 

73 del label_lst 

74 if label_str and not (label_str[0]): 

75 raise ValueError(f"Empty label in {labels}.") 

76 

77 # process all labels, if any are specified 

78 if label_str: 

79 keep_lines = [] 

80 

81 start_labels = [f"{line_comment_start} start {label}" 

82 for label in label_str] 

83 end_labels = [f"{line_comment_start} end {label}" 

84 for label in label_str] 

85 add_labels = [f"{line_comment_start} +{label}" for label in label_str] 

86 del_labels = [f"{line_comment_start} -{label}" for label in label_str] 

87 all_labels = set(start_labels + end_labels + add_labels + del_labels) 

88 if len(all_labels) != (4 * len(label_str)): 

89 raise ValueError("label clash? impossible?") 

90 del all_labels 

91 

92 active_labels: set[int] = set() # the active label ranges 

93 current_line_labels: set[int] = set() # labels of the current line 

94 done_labels: set[int] = set() # the labes for which text as retained 

95 

96 for line, the_cl in enumerate(code): # iterate over all code lines 

97 cl = the_cl.rstrip() 

98 

99 # first, we need to update the state 

100 current_line_labels.clear() 

101 current_line_labels.update(active_labels) 

102 found_mark: bool = True 

103 while found_mark: 

104 found_mark = False 

105 

106 # check all potential range starts 

107 for i, lbl in enumerate(start_labels): 

108 if cl.endswith(lbl): 

109 cl = cl[:len(cl) - len(lbl)].rstrip() 

110 if i in active_labels: 

111 raise ValueError( 

112 f"Label '{label_str[i]}' already active in " 

113 f"line {line} with text '{cl}', cannot " 

114 "start.") 

115 active_labels.add(i) 

116 found_mark = True 

117 

118 # check all potential range end 

119 for i, lbl in enumerate(end_labels): 

120 if cl.endswith(lbl): 

121 cl = cl[:len(cl) - len(lbl)].rstrip() 

122 if i not in active_labels: 

123 raise ValueError( 

124 f"Label '{label_str[i]}' not active in " 

125 f"line {line} with text '{cl}', cannot " 

126 "end.") 

127 active_labels.remove(i) 

128 current_line_labels.remove(i) 

129 found_mark = True 

130 

131 # check all potential line add labels 

132 for i, lbl in enumerate(add_labels): 

133 if cl.endswith(lbl): 

134 cl = cl[:len(cl) - len(lbl)].rstrip() 

135 if i in current_line_labels: 

136 raise ValueError( 

137 f"Label '{label_str[i]}' already active in " 

138 f"line {line} with text '{cl}', cannot " 

139 "add.") 

140 current_line_labels.add(i) 

141 found_mark = True 

142 

143 # check all potential line deletion markers 

144 for i, lbl in enumerate(del_labels): 

145 if cl.endswith(lbl): 

146 cl = cl[:len(cl) - len(lbl)].rstrip() 

147 if i not in current_line_labels: 

148 raise ValueError( 

149 f"Label '{label_str[i]}' already active in " 

150 f"line {line} with text '{cl}', cannot " 

151 "delete.") 

152 current_line_labels.remove(i) 

153 found_mark = True 

154 break 

155 if found_mark: 

156 continue 

157 

158 if current_line_labels: 

159 keep_lines.append(cl) 

160 done_labels.update(current_line_labels) 

161 

162 if not keep_lines: 

163 raise ValueError( 

164 f"Nothing is left over after applying labels {labels}.") 

165 if len(done_labels) < len(label_str): 

166 raise ValueError( 

167 "Never had any text for labels " 

168 f"{set(label_str).difference(done_labels)}.") 

169 else: 

170 keep_lines = [cl.rstrip() for cl in code] 

171 

172 if lines is not None: # select the lines we want to keep 

173 if not isinstance(lines, Iterable): 

174 raise type_error(lines, "lines", Iterable) 

175 lines_ints = list(set(lines)) 

176 if not lines_ints: 

177 raise ValueError(f"Empty lines provided: {lines}.") 

178 lines_ints.sort() 

179 keep_lines = [keep_lines[i] for i in lines_ints] 

180 

181 # remove leading empty lines 

182 while keep_lines: 

183 if not keep_lines[0]: 

184 del keep_lines[0] 

185 else: 

186 break 

187 

188 # remove trailing empty lines 

189 while keep_lines: 

190 if not keep_lines[-1]: 

191 del keep_lines[-1] 

192 else: 

193 break 

194 

195 # remove superfluous empty lines 

196 empty_lines = 0 

197 current = len(keep_lines) 

198 while current > 0: 

199 current -= 1 

200 if keep_lines[current]: 

201 empty_lines = 0 

202 elif empty_lines >= max_consecutive_empty_lines: 

203 del keep_lines[current] 

204 else: 

205 empty_lines += 1 

206 

207 if not keep_lines: 

208 raise ValueError(f"Empty code resulting from {code} after applying " 

209 f"labels {labels} and lines {lines}.?") 

210 

211 return keep_lines 

212 

213 

214def format_empty_lines(lines: Iterable[str], 

215 empty_before: Callable = lambda line: False, 

216 no_empty_after: Callable = lambda line: False, 

217 force_no_empty_after: Callable = lambda line: False, 

218 max_consecutive_empty_lines: int = 1) -> list[str]: 

219 """ 

220 Obtain a generator that strips any consecutive empty lines. 

221 

222 :param lines: the original line iterable 

223 :param empty_before: a function checking whether an empty line 

224 is required before a certain string 

225 :param no_empty_after: a function checking whether an empty line 

226 is prohibited after a string 

227 :param force_no_empty_after: a function checking whether an empty 

228 line is prohibited after a string 

229 :param max_consecutive_empty_lines: the maximum number of permitted 

230 consecutive empty lines 

231 :return: the generation 

232 

233 >>> code = ["", "a", "", "b", "", "", "c", "", "", "", "d", "e", ""] 

234 >>> format_empty_lines(code, max_consecutive_empty_lines=3) 

235 ['a', '', 'b', '', '', 'c', '', '', '', 'd', 'e'] 

236 >>> format_empty_lines(code, max_consecutive_empty_lines=2) 

237 ['a', '', 'b', '', '', 'c', '', '', 'd', 'e'] 

238 >>> format_empty_lines(code, max_consecutive_empty_lines=1) 

239 ['a', '', 'b', '', 'c', '', 'd', 'e'] 

240 >>> format_empty_lines(code, max_consecutive_empty_lines=0) 

241 ['a', 'b', 'c', 'd', 'e'] 

242 >>> format_empty_lines(code, max_consecutive_empty_lines=2, 

243 ... no_empty_after=lambda s: s == "b") 

244 ['a', '', 'b', 'c', '', '', 'd', 'e'] 

245 >>> format_empty_lines(code, max_consecutive_empty_lines=2, 

246 ... no_empty_after=lambda s: s == "b", 

247 ... empty_before=lambda s: s == "e") 

248 ['a', '', 'b', 'c', '', '', 'd', '', 'e'] 

249 >>> format_empty_lines(code, max_consecutive_empty_lines=2, 

250 ... no_empty_after=lambda s: s == "b", 

251 ... empty_before=lambda s: s == "e", 

252 ... force_no_empty_after=lambda s: s == "d") 

253 ['a', '', 'b', 'c', '', '', 'd', 'e'] 

254 """ 

255 if not isinstance(max_consecutive_empty_lines, int): 

256 raise type_error( 

257 max_consecutive_empty_lines, "max_consecutive_empty_lines", int) 

258 if max_consecutive_empty_lines < 0: 

259 raise ValueError("max_consecutive_empty_lines must be >= 0, but is " 

260 f"{max_consecutive_empty_lines}.") 

261 if not callable(empty_before): 

262 raise type_error(empty_before, "empty_before", call=True) 

263 if not callable(no_empty_after): 

264 raise type_error(no_empty_after, "no_empty_after", call=True) 

265 if not callable(force_no_empty_after): 

266 raise type_error( 

267 force_no_empty_after, "force_no_empty_after", call=True) 

268 

269 result: list[str] = [] 

270 print_empty: int = 0 

271 no_empty = True 

272 force_no_empty = True 

273 for the_line in lines: 

274 line = the_line.rstrip() 

275 ltr = line.lstrip() 

276 

277 if line: 

278 if (not force_no_empty) \ 

279 and (empty_before(ltr) 

280 or ((print_empty > 0) 

281 and (max_consecutive_empty_lines > 0))): 

282 result.extend([""] * max(1, min(print_empty, 

283 max_consecutive_empty_lines))) 

284 no_empty = no_empty_after(ltr) 

285 force_no_empty = force_no_empty_after(ltr) 

286 result.append(line) 

287 print_empty = 0 

288 continue 

289 

290 if force_no_empty or no_empty: 

291 continue 

292 

293 print_empty += 1 

294 

295 if not result: 

296 raise ValueError("No lines of text found.") 

297 return result 

298 

299 

300def strip_common_whitespace_prefix(lines: Iterable[str]) -> list[str]: 

301 r""" 

302 Strip a common whitespace prefix from a list of strings and merge them. 

303 

304 :param lines: the lines 

305 :return: the code with the white space prefix stripped 

306 

307 >>> strip_common_whitespace_prefix([" a", " b"]) 

308 ['a', ' b'] 

309 >>> strip_common_whitespace_prefix([" a", " b"]) 

310 ['a', 'b'] 

311 >>> strip_common_whitespace_prefix([" a", " b"]) 

312 ['a', 'b'] 

313 >>> strip_common_whitespace_prefix([" a", " b", "c"]) 

314 [' a', ' b', 'c'] 

315 >>> strip_common_whitespace_prefix([" a", " b", "c"]) 

316 [' a', ' b', 'c'] 

317 >>> strip_common_whitespace_prefix([" a", " b", " c"]) 

318 ['a', 'b', ' c'] 

319 """ 

320 prefix_len = sys.maxsize 

321 for line in lines: 

322 ll = len(line) 

323 if ll <= 0: 

324 continue 

325 for k in range(min(ll, prefix_len)): 

326 if line[k] != " ": 

327 prefix_len = k 

328 break 

329 if prefix_len > 0: 

330 return [line[prefix_len:] for line in lines] 

331 return list(lines)