Coverage for texgit / formatters / source_tools.py: 82%

194 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-02-22 02:50 +0000

1"""In this file, we put some shared tools for rendering source codes.""" 

2 

3import sys 

4from re import MULTILINE, Pattern 

5from re import compile as re_compile 

6from typing import Callable, Final, Iterable 

7 

8from pycommons.strings.enforce import enforce_non_empty_str 

9from pycommons.types import type_error 

10 

11# the split pattern 

12__SPLT: Final[Pattern] = re_compile(r"[;,+\n\t ]", MULTILINE) 

13 

14 

15def split_line_choices(lines: str | None) -> list[int] | None: 

16 """ 

17 Split line choices to iterables of `int` or `None`. 

18 

19 This function converts `1`-based line selections into `0`-based ones. 

20 

21 :param lines: the line choices 

22 :return: the split line choices. 

23 

24 >>> print(split_line_choices(None)) 

25 None 

26 >>> print(split_line_choices("")) 

27 None 

28 >>> print(split_line_choices(",")) 

29 None 

30 >>> split_line_choices("1") 

31 [0] 

32 >>> split_line_choices("1,2") 

33 [0, 1] 

34 >>> split_line_choices("1-5") 

35 [0, 1, 2, 3, 4] 

36 >>> split_line_choices("3;4-5;7-7;22+12-15;") 

37 [2, 3, 4, 6, 21, 11, 12, 13, 14] 

38 """ 

39 if lines is None: 

40 return None 

41 if not isinstance(lines, str): 

42 raise type_error(lines, "lines", (str, None)) 

43 sel: list[int] = [] 

44 

45 for patf in __SPLT.split(lines): 

46 pats: str = patf.strip() 

47 if len(pats) <= 0: 

48 continue 

49 idx: int = pats.find("-") 

50 if idx < 0: 

51 sel.append(int(enforce_non_empty_str(pats)) - 1) 

52 continue 

53 start: int = int(enforce_non_empty_str(pats[:idx].strip())) 

54 end: int = int(enforce_non_empty_str(pats[idx + 1:].strip())) 

55 sel.extend(range(start - 1, end)) 

56 

57 return sel if len(sel) > 0 else None 

58 

59 

60def split_labels(labels: str | None) -> set[str]: 

61 r""" 

62 Get a sequence of labels from a string. 

63 

64 :param labels: the labels string or `None` 

65 :return: the labels set 

66 

67 >>> print(split_labels(None)) 

68 set() 

69 >>> print(split_labels("")) 

70 set() 

71 >>> print(split_labels(",")) 

72 set() 

73 >>> sorted(split_labels("a;b;d;c")) 

74 ['a', 'b', 'c', 'd'] 

75 >>> sorted(split_labels("a,b c+a;a\td;c")) 

76 ['a', 'b', 'c', 'd'] 

77 """ 

78 sel: set[str] = set() 

79 if labels is None: 

80 return sel 

81 if not isinstance(labels, str): 

82 raise type_error(labels, "labels", (str, None)) 

83 

84 for patf in __SPLT.split(labels): 

85 pats: str = patf.strip() 

86 if len(pats) <= 0: 

87 continue 

88 sel.add(pats) 

89 return sel 

90 

91 

92def select_lines(code: Iterable[str], 

93 lines: Iterable[int] | None = None, 

94 labels: Iterable[str] | None = None, 

95 line_comment_start: str = "#", 

96 max_consecutive_empty_lines: int = 2) -> list[str]: 

97 r""" 

98 Select lines of source code based on labels and line indices. 

99 

100 First, we select all lines of the code we want to keep. 

101 If labels are defined, then lines are kept as ranges or as single lines 

102 for all pre-defined labels. Ranges may overlap and/or intersect. 

103 Otherwise, all lines are selected in this step. 

104 

105 Then, if line numbers are provided, we selected the lines based on the 

106 line numbers from the lines we have preserved. 

107 

108 Finally, leading and trailing empty lines as well as superfluous empty 

109 lines are removed. 

110 

111 :param code: the code loaded from a file 

112 :param lines: the lines to keep, or `None` if we keep all 

113 :param labels: a list of labels marking start and 

114 end of code snippets to include 

115 :param line_comment_start: the string marking the line comment start 

116 :param max_consecutive_empty_lines: the maximum number of permitted 

117 consecutive empty lines 

118 :return: the list of selected lines 

119 

120 >>> select_lines(["def a():", " b=c", " return x"]) 

121 ['def a():', ' b=c', ' return x'] 

122 >>> pc = ["# start x", "def a():", " b=c # -x", " return x", "# end x"] 

123 >>> select_lines(pc, labels={"x"}) 

124 ['def a():', ' return x'] 

125 >>> select_lines(["def a():", " b=c", " return x"], lines=[0, 2]) 

126 ['def a():', ' return x'] 

127 """ 

128 if not isinstance(code, Iterable): 

129 raise type_error(code, "code", Iterable) 

130 if not isinstance(max_consecutive_empty_lines, int): 

131 raise type_error( 

132 max_consecutive_empty_lines, "max_consecutive_empty_lines", int) 

133 if max_consecutive_empty_lines < 0: 

134 raise ValueError("max_consecutive_empty_lines must be >= 0, but is " 

135 f"{max_consecutive_empty_lines}.") 

136 if not isinstance(line_comment_start, str): 

137 raise type_error(line_comment_start, "line_comment_start", str) 

138 if not line_comment_start: 

139 raise ValueError("line_comment_start cannot be " 

140 f"'{line_comment_start}'.") 

141 

142 keep_lines: list[str] 

143 

144 # make sure that labels are unique and non-empty 

145 label_str: list[str] | None = None 

146 if labels is not None: 

147 if not isinstance(labels, Iterable): 

148 raise type_error(labels, "labels", Iterable) 

149 label_lst = list(labels) 

150 label_lst.sort() 

151 label_str = list({label.strip() for label in label_lst}) 

152 label_str.sort() 

153 if label_lst != label_str: 

154 raise ValueError( 

155 f"Invalid label spec {label_lst} of length {len(label_lst)}," 

156 f" leading to unique label set {label_str} of length " 

157 f"{len(label_str)}.") 

158 del label_lst 

159 if label_str and not (label_str[0]): 

160 raise ValueError(f"Empty label in {labels}.") 

161 

162 # process all labels, if any are specified 

163 if label_str: 

164 keep_lines = [] 

165 

166 start_labels = [f"{line_comment_start} start {label}" 

167 for label in label_str] 

168 end_labels = [f"{line_comment_start} end {label}" 

169 for label in label_str] 

170 add_labels = [f"{line_comment_start} +{label}" for label in label_str] 

171 del_labels = [f"{line_comment_start} -{label}" for label in label_str] 

172 all_labels = set(start_labels + end_labels + add_labels + del_labels) 

173 if len(all_labels) != (4 * len(label_str)): 

174 raise ValueError("label clash? impossible?") 

175 del all_labels 

176 

177 active_labels: set[int] = set() # the active label ranges 

178 current_line_labels: set[int] = set() # labels of the current line 

179 done_labels: set[int] = set() # the labels for which text as retained 

180 

181 for line, the_cl in enumerate(code): # iterate over all code lines 

182 cl = the_cl.rstrip() 

183 

184 # first, we need to update the state 

185 current_line_labels.clear() 

186 current_line_labels.update(active_labels) 

187 found_mark: bool = True 

188 while found_mark: 

189 found_mark = False 

190 

191 # check all potential range starts 

192 for i, lbl in enumerate(start_labels): 

193 if cl.endswith(lbl): 

194 cl = cl[:len(cl) - len(lbl)].rstrip() 

195 if i in active_labels: 

196 raise ValueError( 

197 f"Label '{label_str[i]}' already active in " 

198 f"line {line} with text '{cl}', cannot " 

199 "start.") 

200 active_labels.add(i) 

201 found_mark = True 

202 

203 # check all potential range end 

204 for i, lbl in enumerate(end_labels): 

205 if cl.endswith(lbl): 

206 cl = cl[:len(cl) - len(lbl)].rstrip() 

207 if i not in active_labels: 

208 raise ValueError( 

209 f"Label '{label_str[i]}' not active in " 

210 f"line {line} with text '{cl}', cannot " 

211 "end.") 

212 active_labels.remove(i) 

213 current_line_labels.remove(i) 

214 found_mark = True 

215 

216 # check all potential line add labels 

217 for i, lbl in enumerate(add_labels): 

218 if cl.endswith(lbl): 

219 cl = cl[:len(cl) - len(lbl)].rstrip() 

220 if i in current_line_labels: 

221 raise ValueError( 

222 f"Label '{label_str[i]}' already active in " 

223 f"line {line} with text '{cl}', cannot " 

224 "add.") 

225 current_line_labels.add(i) 

226 found_mark = True 

227 

228 # check all potential line deletion markers 

229 for i, lbl in enumerate(del_labels): 

230 if cl.endswith(lbl): 

231 cl = cl[:len(cl) - len(lbl)].rstrip() 

232 if i not in current_line_labels: 

233 raise ValueError( 

234 f"Label '{label_str[i]}' already active in " 

235 f"line {line} with text '{cl}', cannot " 

236 "delete.") 

237 current_line_labels.remove(i) 

238 found_mark = True 

239 break 

240 if found_mark: 

241 continue 

242 

243 if current_line_labels: 

244 keep_lines.append(cl) 

245 done_labels.update(current_line_labels) 

246 

247 if not keep_lines: 

248 raise ValueError( 

249 f"Nothing is left over after applying labels {labels}.") 

250 if len(done_labels) < len(label_str): 

251 raise ValueError( 

252 "Never had any text for labels " 

253 f"{set(label_str).difference(done_labels)}.") 

254 else: 

255 keep_lines = [cl.rstrip() for cl in code] 

256 

257 if lines is not None: # select the lines we want to keep 

258 if not isinstance(lines, Iterable): 

259 raise type_error(lines, "lines", Iterable) 

260 lines_ints = list(set(lines)) 

261 if not lines_ints: 

262 raise ValueError(f"Empty lines provided: {lines}.") 

263 lines_ints.sort() 

264 keep_lines = [keep_lines[i] for i in lines_ints] 

265 

266 # remove leading empty lines 

267 while keep_lines: 

268 if not keep_lines[0]: 

269 del keep_lines[0] 

270 else: 

271 break 

272 

273 # remove trailing empty lines 

274 while keep_lines: 

275 if not keep_lines[-1]: 

276 del keep_lines[-1] 

277 else: 

278 break 

279 

280 # remove superfluous empty lines 

281 empty_lines = 0 

282 current = len(keep_lines) 

283 while current > 0: 

284 current -= 1 

285 if keep_lines[current]: 

286 empty_lines = 0 

287 elif empty_lines >= max_consecutive_empty_lines: 

288 del keep_lines[current] 

289 else: 

290 empty_lines += 1 

291 

292 if not keep_lines: 

293 raise ValueError(f"Empty code resulting from {code} after applying " 

294 f"labels {labels} and lines {lines}.?") 

295 

296 return keep_lines 

297 

298 

299def format_empty_lines(lines: Iterable[str], 

300 empty_before: Callable = lambda _: False, 

301 no_empty_after: Callable = lambda _: False, 

302 force_no_empty_after: Callable = lambda _: False, 

303 max_consecutive_empty_lines: int = 2) -> list[str]: 

304 """ 

305 Obtain a generator that strips any consecutive empty lines. 

306 

307 :param lines: the original line iterable 

308 :param empty_before: a function checking whether an empty line 

309 is required before a certain string 

310 :param no_empty_after: a function checking whether an empty line 

311 is prohibited after a string 

312 :param force_no_empty_after: a function checking whether an empty 

313 line is prohibited after a string 

314 :param max_consecutive_empty_lines: the maximum number of permitted 

315 consecutive empty lines 

316 :return: the generation 

317 

318 >>> code = ["", "a", "", "b", "", "", "c", "", "", "", "d", "e", ""] 

319 >>> format_empty_lines(code, max_consecutive_empty_lines=3) 

320 ['a', '', 'b', '', '', 'c', '', '', '', 'd', 'e'] 

321 >>> format_empty_lines(code, max_consecutive_empty_lines=2) 

322 ['a', '', 'b', '', '', 'c', '', '', 'd', 'e'] 

323 >>> format_empty_lines(code, max_consecutive_empty_lines=1) 

324 ['a', '', 'b', '', 'c', '', 'd', 'e'] 

325 >>> format_empty_lines(code, max_consecutive_empty_lines=0) 

326 ['a', 'b', 'c', 'd', 'e'] 

327 >>> format_empty_lines(code, max_consecutive_empty_lines=2, 

328 ... no_empty_after=lambda s: s == "b") 

329 ['a', '', 'b', 'c', '', '', 'd', 'e'] 

330 >>> format_empty_lines(code, max_consecutive_empty_lines=2, 

331 ... no_empty_after=lambda s: s == "b", 

332 ... empty_before=lambda s: s == "e") 

333 ['a', '', 'b', 'c', '', '', 'd', '', 'e'] 

334 >>> format_empty_lines(code, max_consecutive_empty_lines=2, 

335 ... no_empty_after=lambda s: s == "b", 

336 ... empty_before=lambda s: s == "e", 

337 ... force_no_empty_after=lambda s: s == "d") 

338 ['a', '', 'b', 'c', '', '', 'd', 'e'] 

339 """ 

340 if not isinstance(max_consecutive_empty_lines, int): 

341 raise type_error( 

342 max_consecutive_empty_lines, "max_consecutive_empty_lines", int) 

343 if max_consecutive_empty_lines < 0: 

344 raise ValueError("max_consecutive_empty_lines must be >= 0, but is " 

345 f"{max_consecutive_empty_lines}.") 

346 if not callable(empty_before): 

347 raise type_error(empty_before, "empty_before", call=True) 

348 if not callable(no_empty_after): 

349 raise type_error(no_empty_after, "no_empty_after", call=True) 

350 if not callable(force_no_empty_after): 

351 raise type_error( 

352 force_no_empty_after, "force_no_empty_after", call=True) 

353 

354 result: list[str] = [] 

355 print_empty: int = 0 

356 no_empty = True 

357 force_no_empty = True 

358 for the_line in lines: 

359 line = the_line.rstrip() 

360 ltr = line.lstrip() 

361 

362 if line: 

363 if (not force_no_empty) \ 

364 and (empty_before(ltr) 

365 or ((print_empty > 0) 

366 and (max_consecutive_empty_lines > 0))): 

367 result.extend([""] * max(1, min(print_empty, 

368 max_consecutive_empty_lines))) 

369 no_empty = no_empty_after(ltr) 

370 force_no_empty = force_no_empty_after(ltr) 

371 result.append(line) 

372 print_empty = 0 

373 continue 

374 

375 if force_no_empty or no_empty: 

376 continue 

377 

378 print_empty += 1 

379 

380 if not result: 

381 raise ValueError("No lines of text found.") 

382 return result 

383 

384 

385def strip_common_whitespace_prefix(lines: Iterable[str]) -> list[str]: 

386 r""" 

387 Strip a common whitespace prefix from a list of strings and merge them. 

388 

389 :param lines: the lines 

390 :return: the code with the white space prefix stripped 

391 

392 >>> strip_common_whitespace_prefix([" a", " b"]) 

393 ['a', ' b'] 

394 >>> strip_common_whitespace_prefix([" a", " b"]) 

395 ['a', 'b'] 

396 >>> strip_common_whitespace_prefix([" a", " b"]) 

397 ['a', 'b'] 

398 >>> strip_common_whitespace_prefix([" a", " b", "c"]) 

399 [' a', ' b', 'c'] 

400 >>> strip_common_whitespace_prefix([" a", " b", "c"]) 

401 [' a', ' b', 'c'] 

402 >>> strip_common_whitespace_prefix([" a", " b", " c"]) 

403 ['a', 'b', ' c'] 

404 """ 

405 prefix_len = sys.maxsize 

406 for line in lines: 

407 ll = len(line) 

408 if ll <= 0: 

409 continue 

410 for k in range(min(ll, prefix_len)): 

411 if line[k] != " ": 

412 prefix_len = k 

413 break 

414 if prefix_len > 0: 

415 return [line[prefix_len:] for line in lines] 

416 return list(lines)