Coverage for pycommons / dev / url_replacer.py: 100%

84 statements  

« prev     ^ index     » next       coverage.py v7.13.0, created at 2025-12-11 03:04 +0000

1""" 

2Process a markdown file in order to make it useful for distribution. 

3 

4In order to let sphinx properly load and insert the `README.md` file into the 

5project's documentation, we need to process this file from the GitHub style 

6markdown to a variant suitable for the myst parser used in sphinx. While we 

7are at it, we can also turn absolute URLs from the GitHub-`README.md` file 

8that point to the documentation URL to relative URLs. 

9""" 

10 

11from itertools import chain 

12from re import Match, Pattern, escape, subn 

13from re import compile as re_compile 

14from typing import Any, Callable, Final, Iterable, Mapping, cast 

15 

16from pycommons.net.url import URL 

17from pycommons.types import type_error 

18 

19#: the separators for URLs in html 

20__HTML_URL_SEPARTORS: Final[tuple[tuple[str, str, str, str], ...]] = ( 

21 (r'\s+href\s*=\s*"\s*', r'\s*"', ' href="', '"'), 

22 (r"\s+href\s*=\s*'\s*", r"\s*'", ' href="', '"'), 

23 (r'\s+src\s*=\s*"\s*', r'\s*"', ' src="', '"'), 

24 (r"\s+src\s*=\s*'\s*", r"\s*'", ' src="', '"'), 

25) 

26 

27#: the separators for URLs in markdown 

28__MD_URL_SEPARTORS: Final[tuple[tuple[str, str, str, str], ...]] = ( 

29 (r"\s*\]\s*\(\s*", r"\s*\)", r"](", r")"), 

30 (r"<\s*", r"\s*>", "<", ">"), 

31) 

32 

33 

34def __make_base_url_replacer( 

35 collector: Callable[[ 

36 tuple[Pattern, Callable[[Match], str] | str]], Any], 

37 base_url_to_replace: str, 

38 replace_base_url_with: str = "./", 

39 for_markdown: bool = True) -> None: 

40 r""" 

41 Make `(Pattern, Callable)` tuples that replace base URLs in Markdown. 

42 

43 :param collector: the collector (e.g. :meth:`list.append`) to receive the 

44 tuple 

45 :param base_url_to_replace: the base url to be replaced 

46 :param replace_base_url_with: the string with which the base URL should be 

47 replaced 

48 :param for_markdown: should replacers for Markdown be add (`True`) or only 

49 for HTML (`False`) 

50 

51 >>> from re import sub 

52 >>> coll = list() 

53 >>> __make_base_url_replacer( 

54 ... coll.append, "https://example.com/x", "./") 

55 >>> for k, y in coll: 

56 ... print(repr(k.pattern)) 

57 '\\s+href\\s*=\\s*"\\s*https?://example\\.com/x\\/?(.*)\\s*"' 

58 "\\s+href\\s*=\\s*'\\s*https?://example\\.com/x\\/?(.*)\\s*'" 

59 '\\s+src\\s*=\\s*"\\s*https?://example\\.com/x\\/?(.*)\\s*"' 

60 "\\s+src\\s*=\\s*'\\s*https?://example\\.com/x\\/?(.*)\\s*'" 

61 '\\s*\\]\\s*\\(\\s*https?://example\\.com/x\\/?(.*)\\s*\\)' 

62 '<\\s*https?://example\\.com/x\\/?(.*)\\s*>' 

63 >>> sub(coll[3][0], coll[3][1], " src= ' https://example.com/x/y '") 

64 ' src="./y"' 

65 >>> sub(coll[1][0], coll[1][1], " href ='http://example.com/x/y '") 

66 ' href="./y"' 

67 >>> sub(coll[2][0], coll[2][1], ' src ="https://example.com/x/y"') 

68 ' src="./y"' 

69 >>> sub(coll[0][0], coll[0][1], ' href ="http://example.com/x/y"') 

70 ' href="./y"' 

71 >>> sub(coll[4][0], coll[4][1], '[l]( https://example.com/x/y)') 

72 '[l](./y)' 

73 >>> sub(coll[4][0], coll[4][1], '![xx ] (http://example.com/x/y/g.jpg)') 

74 '![xx](./y/g.jpg)' 

75 >>> sub(coll[5][0], coll[5][1], '< https://example.com/x/y >') 

76 '<./y>' 

77 >>> sub(coll[3][0], coll[3][1], "src='https://example.com/x/y ") 

78 "src='https://example.com/x/y " 

79 

80 >>> coll = list() 

81 >>> __make_base_url_replacer( 

82 ... coll.append, "https://example.com/x/", "./") 

83 >>> for k, y in coll: 

84 ... print(repr(k.pattern)) 

85 '\\s+href\\s*=\\s*"\\s*https?://example\\.com/x\\/?(.*)\\s*"' 

86 "\\s+href\\s*=\\s*'\\s*https?://example\\.com/x\\/?(.*)\\s*'" 

87 '\\s+src\\s*=\\s*"\\s*https?://example\\.com/x\\/?(.*)\\s*"' 

88 "\\s+src\\s*=\\s*'\\s*https?://example\\.com/x\\/?(.*)\\s*'" 

89 '\\s*\\]\\s*\\(\\s*https?://example\\.com/x\\/?(.*)\\s*\\)' 

90 '<\\s*https?://example\\.com/x\\/?(.*)\\s*>' 

91 

92 >>> coll = list() 

93 >>> __make_base_url_replacer( 

94 ... coll.append, "https://example.com/x/", "./", False) 

95 >>> for k, y in coll: 

96 ... print(repr(k.pattern)) 

97 '\\s+href\\s*=\\s*"\\s*https?://example\\.com/x\\/?(.*)\\s*"' 

98 "\\s+href\\s*=\\s*'\\s*https?://example\\.com/x\\/?(.*)\\s*'" 

99 '\\s+src\\s*=\\s*"\\s*https?://example\\.com/x\\/?(.*)\\s*"' 

100 "\\s+src\\s*=\\s*'\\s*https?://example\\.com/x\\/?(.*)\\s*'" 

101 

102 >>> coll = list() 

103 >>> __make_base_url_replacer( 

104 ... coll.append, "https://example.com/x/", "/") 

105 >>> for k, y in coll: 

106 ... print(repr(k.pattern)) 

107 '\\s+href\\s*=\\s*"\\s*https?://example\\.com/x\\/?(.*)\\s*"' 

108 "\\s+href\\s*=\\s*'\\s*https?://example\\.com/x\\/?(.*)\\s*'" 

109 '\\s+src\\s*=\\s*"\\s*https?://example\\.com/x\\/?(.*)\\s*"' 

110 "\\s+src\\s*=\\s*'\\s*https?://example\\.com/x\\/?(.*)\\s*'" 

111 '\\s*\\]\\s*\\(\\s*https?://example\\.com/x\\/?(.*)\\s*\\)' 

112 '<\\s*https?://example\\.com/x\\/?(.*)\\s*>' 

113 

114 >>> coll = list() 

115 >>> __make_base_url_replacer( 

116 ... coll.append, "https://example.com/x/", "/", False) 

117 >>> for k, y in coll: 

118 ... print(repr(k.pattern)) 

119 '\\s+href\\s*=\\s*"\\s*https?://example\\.com/x\\/?(.*)\\s*"' 

120 "\\s+href\\s*=\\s*'\\s*https?://example\\.com/x\\/?(.*)\\s*'" 

121 '\\s+src\\s*=\\s*"\\s*https?://example\\.com/x\\/?(.*)\\s*"' 

122 "\\s+src\\s*=\\s*'\\s*https?://example\\.com/x\\/?(.*)\\s*'" 

123 

124 >>> coll = list() 

125 >>> __make_base_url_replacer( 

126 ... coll.append, "https://example.com/x/", "bb") 

127 >>> for k, y in coll: 

128 ... print(repr(k.pattern)) 

129 '\\s+href\\s*=\\s*"\\s*https?://example\\.com/x(.*)\\s*"' 

130 "\\s+href\\s*=\\s*'\\s*https?://example\\.com/x(.*)\\s*'" 

131 '\\s+src\\s*=\\s*"\\s*https?://example\\.com/x(.*)\\s*"' 

132 "\\s+src\\s*=\\s*'\\s*https?://example\\.com/x(.*)\\s*'" 

133 '\\s*\\]\\s*\\(\\s*https?://example\\.com/x(.*)\\s*\\)' 

134 '<\\s*https?://example\\.com/x(.*)\\s*>' 

135 

136 >>> coll = list() 

137 >>> __make_base_url_replacer( 

138 ... coll.append, "https://example.com/x/", "bb", False) 

139 >>> for k, y in coll: 

140 ... print(repr(k.pattern)) 

141 '\\s+href\\s*=\\s*"\\s*https?://example\\.com/x(.*)\\s*"' 

142 "\\s+href\\s*=\\s*'\\s*https?://example\\.com/x(.*)\\s*'" 

143 '\\s+src\\s*=\\s*"\\s*https?://example\\.com/x(.*)\\s*"' 

144 "\\s+src\\s*=\\s*'\\s*https?://example\\.com/x(.*)\\s*'" 

145 

146 >>> coll = list() 

147 >>> __make_base_url_replacer( 

148 ... coll.append, "https://example.com/x", "./") 

149 >>> for k, y in coll: 

150 ... print(repr(k.pattern)) 

151 '\\s+href\\s*=\\s*"\\s*https?://example\\.com/x\\/?(.*)\\s*"' 

152 "\\s+href\\s*=\\s*'\\s*https?://example\\.com/x\\/?(.*)\\s*'" 

153 '\\s+src\\s*=\\s*"\\s*https?://example\\.com/x\\/?(.*)\\s*"' 

154 "\\s+src\\s*=\\s*'\\s*https?://example\\.com/x\\/?(.*)\\s*'" 

155 '\\s*\\]\\s*\\(\\s*https?://example\\.com/x\\/?(.*)\\s*\\)' 

156 '<\\s*https?://example\\.com/x\\/?(.*)\\s*>' 

157 

158 >>> coll = list() 

159 >>> __make_base_url_replacer( 

160 ... coll.append, "https://example.com/x", "./", False) 

161 >>> for k, y in coll: 

162 ... print(repr(k.pattern)) 

163 '\\s+href\\s*=\\s*"\\s*https?://example\\.com/x\\/?(.*)\\s*"' 

164 "\\s+href\\s*=\\s*'\\s*https?://example\\.com/x\\/?(.*)\\s*'" 

165 '\\s+src\\s*=\\s*"\\s*https?://example\\.com/x\\/?(.*)\\s*"' 

166 "\\s+src\\s*=\\s*'\\s*https?://example\\.com/x\\/?(.*)\\s*'" 

167 

168 >>> coll = list() 

169 >>> __make_base_url_replacer( 

170 ... coll.append, "https://example.com/x", "/") 

171 >>> for k, y in coll: 

172 ... print(repr(k.pattern)) 

173 '\\s+href\\s*=\\s*"\\s*https?://example\\.com/x\\/?(.*)\\s*"' 

174 "\\s+href\\s*=\\s*'\\s*https?://example\\.com/x\\/?(.*)\\s*'" 

175 '\\s+src\\s*=\\s*"\\s*https?://example\\.com/x\\/?(.*)\\s*"' 

176 "\\s+src\\s*=\\s*'\\s*https?://example\\.com/x\\/?(.*)\\s*'" 

177 '\\s*\\]\\s*\\(\\s*https?://example\\.com/x\\/?(.*)\\s*\\)' 

178 '<\\s*https?://example\\.com/x\\/?(.*)\\s*>' 

179 

180 >>> coll = list() 

181 >>> __make_base_url_replacer( 

182 ... coll.append, "https://example.com/x", "/", False) 

183 >>> for k, y in coll: 

184 ... print(repr(k.pattern)) 

185 '\\s+href\\s*=\\s*"\\s*https?://example\\.com/x\\/?(.*)\\s*"' 

186 "\\s+href\\s*=\\s*'\\s*https?://example\\.com/x\\/?(.*)\\s*'" 

187 '\\s+src\\s*=\\s*"\\s*https?://example\\.com/x\\/?(.*)\\s*"' 

188 "\\s+src\\s*=\\s*'\\s*https?://example\\.com/x\\/?(.*)\\s*'" 

189 

190 >>> coll = list() 

191 >>> __make_base_url_replacer( 

192 ... coll.append, "https://example.com/x", "bb") 

193 >>> for k, y in coll: 

194 ... print(repr(k.pattern)) 

195 '\\s+href\\s*=\\s*"\\s*https?://example\\.com/x(.*)\\s*"' 

196 "\\s+href\\s*=\\s*'\\s*https?://example\\.com/x(.*)\\s*'" 

197 '\\s+src\\s*=\\s*"\\s*https?://example\\.com/x(.*)\\s*"' 

198 "\\s+src\\s*=\\s*'\\s*https?://example\\.com/x(.*)\\s*'" 

199 '\\s*\\]\\s*\\(\\s*https?://example\\.com/x(.*)\\s*\\)' 

200 '<\\s*https?://example\\.com/x(.*)\\s*>' 

201 

202 >>> coll = list() 

203 >>> __make_base_url_replacer( 

204 ... coll.append, "https://example.com/x", "bb", False) 

205 >>> for k, y in coll: 

206 ... print(repr(k.pattern)) 

207 '\\s+href\\s*=\\s*"\\s*https?://example\\.com/x(.*)\\s*"' 

208 "\\s+href\\s*=\\s*'\\s*https?://example\\.com/x(.*)\\s*'" 

209 '\\s+src\\s*=\\s*"\\s*https?://example\\.com/x(.*)\\s*"' 

210 "\\s+src\\s*=\\s*'\\s*https?://example\\.com/x(.*)\\s*'" 

211 

212 >>> try: 

213 ... __make_base_url_replacer(None, "https://example.com/x", "bb") 

214 ... except TypeError as te: 

215 ... print(te) 

216 collector should be a callable but is None. 

217 

218 >>> try: 

219 ... __make_base_url_replacer(1, "https://example.com/x", "bb") 

220 ... except TypeError as te: 

221 ... print(te) 

222 collector should be a callable but is int, namely 1. 

223 

224 >>> try: 

225 ... __make_base_url_replacer(coll.append, None, "bb") 

226 ... except TypeError as te: 

227 ... print(te) 

228 descriptor '__len__' requires a 'str' object but received a 'NoneType' 

229 

230 >>> try: 

231 ... __make_base_url_replacer(coll.append, 1, "bb") 

232 ... except TypeError as te: 

233 ... print(te) 

234 descriptor '__len__' requires a 'str' object but received a 'int' 

235 

236 >>> try: 

237 ... __make_base_url_replacer( 

238 ... coll.append, "https://example.com/x", None) 

239 ... except TypeError as te: 

240 ... print(te) 

241 descriptor 'strip' for 'str' objects doesn't apply to a 'NoneType' object 

242 

243 >>> try: 

244 ... __make_base_url_replacer(coll.append, "https://example.com/x", 1) 

245 ... except TypeError as te: 

246 ... print(te) 

247 descriptor 'strip' for 'str' objects doesn't apply to a 'int' object 

248 

249 >>> try: 

250 ... __make_base_url_replacer(coll.append, "tweise@hfuu.edu.cn", "x") 

251 ... except ValueError as ve: 

252 ... print(ve) 

253 Invalid scheme 'mailto' for url 'tweise@hfuu.edu.cn'. 

254 

255 >>> try: 

256 ... __make_base_url_replacer(coll.append, "https://example.com/x", 

257 ... "./", None) 

258 ... except TypeError as te: 

259 ... print(te) 

260 for_markdown should be an instance of bool but is None. 

261 

262 >>> try: 

263 ... __make_base_url_replacer(coll.append, "https://example.com/x", 

264 ... "./", 1) 

265 ... except TypeError as te: 

266 ... print(te) 

267 for_markdown should be an instance of bool but is int, namely 1. 

268 """ 

269 if not callable(collector): 

270 raise type_error(collector, "collector", call=True) 

271 if not isinstance(for_markdown, bool): 

272 raise type_error(for_markdown, "for_markdown", bool) 

273 url: Final[URL] = URL(base_url_to_replace) 

274 if url.scheme not in {"http", "https"}: 

275 raise ValueError( 

276 f"Invalid scheme {url.scheme!r} for url {base_url_to_replace!r}.") 

277 use_repl: Final[str] = str.strip(replace_base_url_with) 

278 

279 use_url: str = f"https?{escape(url[str.__len__(url.scheme):])}" 

280 if use_repl.startswith(("/", "./")): 

281 use_url += "?" if use_url.endswith("/") else r"\/?" 

282 

283 for sr, er, ss, es in chain(__HTML_URL_SEPARTORS, __MD_URL_SEPARTORS) \ 

284 if for_markdown else __HTML_URL_SEPARTORS: 

285 collector(( 

286 cast("Pattern", re_compile(f"{sr}{use_url}(.*){er}")), 

287 cast("Callable[[Match], str]", 

288 lambda mm, _ss=ss, _es=es, _rr=use_repl: 

289 f"{_ss}{_rr}{str.strip(mm.group(1))}{_es}"))) 

290 

291 

292def __make_full_url_replacer( 

293 collector: Callable[[ 

294 tuple[Pattern, Callable[[Match], str] | str]], Any], 

295 url_to_replace: str, replace_url_with: str = "./", 

296 for_markdown: bool = True) -> None: 

297 r""" 

298 Make `(Pattern, Callable)` tuples that replace full URLs in Markdown. 

299 

300 :param collector: the collector (e.g. :meth:`list.append`) to receive the 

301 tuple 

302 :param url_to_replace: the full url to be replaced 

303 :param replace_url_with: the string with which the URL should be 

304 replaced 

305 :param for_markdown: should replacers for Markdown be add (`True`) or only 

306 for HTML (`False`) 

307 

308 >>> from re import sub 

309 >>> coll = list() 

310 >>> __make_full_url_replacer( 

311 ... coll.append, "https://example.com/x.jpg", "x.jpg") 

312 >>> for k, y in coll: 

313 ... print(repr(k.pattern)) 

314 '\\s+href\\s*=\\s*"\\s*https?://example\\.com/x\\.jpg\\s*"' 

315 "\\s+href\\s*=\\s*'\\s*https?://example\\.com/x\\.jpg\\s*'" 

316 '\\s+src\\s*=\\s*"\\s*https?://example\\.com/x\\.jpg\\s*"' 

317 "\\s+src\\s*=\\s*'\\s*https?://example\\.com/x\\.jpg\\s*'" 

318 '\\s*\\]\\s*\\(\\s*https?://example\\.com/x\\.jpg\\s*\\)' 

319 '<\\s*https?://example\\.com/x\\.jpg\\s*>' 

320 >>> sub(coll[1][0], coll[1][1], " href= ' https://example.com/x.jpg '") 

321 ' href="x.jpg"' 

322 >>> sub(coll[1][0], coll[1][1], " href='https://example.com/x.jpg '") 

323 ' href="x.jpg"' 

324 >>> sub(coll[2][0], coll[2][1], ' src="https://example.com/x.jpg"') 

325 ' src="x.jpg"' 

326 >>> sub(coll[2][0], coll[2][1], ' src="https://example.com/x.jpg"') 

327 ' src="x.jpg"' 

328 >>> sub(coll[4][0], coll[4][1], '[l]( https://example.com/x.jpg)') 

329 '[l](x.jpg)' 

330 >>> sub(coll[4][0], coll[4][1], '![xx ] (https://example.com/x.jpg)') 

331 '![xx](x.jpg)' 

332 >>> sub(coll[5][0], coll[5][1], '< https://example.com/x.jpg>') 

333 '<x.jpg>' 

334 

335 >>> coll = list() 

336 >>> __make_full_url_replacer( 

337 ... coll.append, "https://example.com/", "./x") 

338 >>> for k, y in coll: 

339 ... print(repr(k.pattern)) 

340 '\\s+href\\s*=\\s*"\\s*https?://example\\.com\\/?\\s*"' 

341 "\\s+href\\s*=\\s*'\\s*https?://example\\.com\\/?\\s*'" 

342 '\\s+src\\s*=\\s*"\\s*https?://example\\.com\\/?\\s*"' 

343 "\\s+src\\s*=\\s*'\\s*https?://example\\.com\\/?\\s*'" 

344 '\\s*\\]\\s*\\(\\s*https?://example\\.com\\/?\\s*\\)' 

345 '<\\s*https?://example\\.com\\/?\\s*>' 

346 

347 >>> coll = list() 

348 >>> __make_full_url_replacer( 

349 ... coll.append, "https://example.com/", "./x", False) 

350 >>> for k, y in coll: 

351 ... print(repr(k.pattern)) 

352 '\\s+href\\s*=\\s*"\\s*https?://example\\.com\\/?\\s*"' 

353 "\\s+href\\s*=\\s*'\\s*https?://example\\.com\\/?\\s*'" 

354 '\\s+src\\s*=\\s*"\\s*https?://example\\.com\\/?\\s*"' 

355 "\\s+src\\s*=\\s*'\\s*https?://example\\.com\\/?\\s*'" 

356 

357 >>> coll = list() 

358 >>> __make_full_url_replacer( 

359 ... coll.append, "https://example.com/", "/x") 

360 >>> for k, y in coll: 

361 ... print(repr(k.pattern)) 

362 '\\s+href\\s*=\\s*"\\s*https?://example\\.com\\/?\\s*"' 

363 "\\s+href\\s*=\\s*'\\s*https?://example\\.com\\/?\\s*'" 

364 '\\s+src\\s*=\\s*"\\s*https?://example\\.com\\/?\\s*"' 

365 "\\s+src\\s*=\\s*'\\s*https?://example\\.com\\/?\\s*'" 

366 '\\s*\\]\\s*\\(\\s*https?://example\\.com\\/?\\s*\\)' 

367 '<\\s*https?://example\\.com\\/?\\s*>' 

368 

369 >>> coll = list() 

370 >>> __make_full_url_replacer( 

371 ... coll.append, "https://example.com/", "/x", False) 

372 >>> for k, y in coll: 

373 ... print(repr(k.pattern)) 

374 '\\s+href\\s*=\\s*"\\s*https?://example\\.com\\/?\\s*"' 

375 "\\s+href\\s*=\\s*'\\s*https?://example\\.com\\/?\\s*'" 

376 '\\s+src\\s*=\\s*"\\s*https?://example\\.com\\/?\\s*"' 

377 "\\s+src\\s*=\\s*'\\s*https?://example\\.com\\/?\\s*'" 

378 

379 >>> coll = list() 

380 >>> __make_full_url_replacer( 

381 ... coll.append, "https://example.com/", "bb") 

382 >>> for k, y in coll: 

383 ... print(repr(k.pattern)) 

384 '\\s+href\\s*=\\s*"\\s*https?://example\\.com\\s*"' 

385 "\\s+href\\s*=\\s*'\\s*https?://example\\.com\\s*'" 

386 '\\s+src\\s*=\\s*"\\s*https?://example\\.com\\s*"' 

387 "\\s+src\\s*=\\s*'\\s*https?://example\\.com\\s*'" 

388 '\\s*\\]\\s*\\(\\s*https?://example\\.com\\s*\\)' 

389 '<\\s*https?://example\\.com\\s*>' 

390 

391 >>> coll = list() 

392 >>> __make_full_url_replacer( 

393 ... coll.append, "https://example.com/", "bb", False) 

394 >>> for k, y in coll: 

395 ... print(repr(k.pattern)) 

396 '\\s+href\\s*=\\s*"\\s*https?://example\\.com\\s*"' 

397 "\\s+href\\s*=\\s*'\\s*https?://example\\.com\\s*'" 

398 '\\s+src\\s*=\\s*"\\s*https?://example\\.com\\s*"' 

399 "\\s+src\\s*=\\s*'\\s*https?://example\\.com\\s*'" 

400 

401 >>> coll = list() 

402 >>> __make_full_url_replacer( 

403 ... coll.append, "https://example.com", "./x") 

404 >>> for k, y in coll: 

405 ... print(repr(k.pattern)) 

406 '\\s+href\\s*=\\s*"\\s*https?://example\\.com\\/?\\s*"' 

407 "\\s+href\\s*=\\s*'\\s*https?://example\\.com\\/?\\s*'" 

408 '\\s+src\\s*=\\s*"\\s*https?://example\\.com\\/?\\s*"' 

409 "\\s+src\\s*=\\s*'\\s*https?://example\\.com\\/?\\s*'" 

410 '\\s*\\]\\s*\\(\\s*https?://example\\.com\\/?\\s*\\)' 

411 '<\\s*https?://example\\.com\\/?\\s*>' 

412 

413 >>> coll = list() 

414 >>> __make_full_url_replacer( 

415 ... coll.append, "https://example.com", "./x", False) 

416 >>> for k, y in coll: 

417 ... print(repr(k.pattern)) 

418 '\\s+href\\s*=\\s*"\\s*https?://example\\.com\\/?\\s*"' 

419 "\\s+href\\s*=\\s*'\\s*https?://example\\.com\\/?\\s*'" 

420 '\\s+src\\s*=\\s*"\\s*https?://example\\.com\\/?\\s*"' 

421 "\\s+src\\s*=\\s*'\\s*https?://example\\.com\\/?\\s*'" 

422 

423 >>> coll = list() 

424 >>> __make_full_url_replacer( 

425 ... coll.append, "https://example.com", "/x") 

426 >>> for k, y in coll: 

427 ... print(repr(k.pattern)) 

428 '\\s+href\\s*=\\s*"\\s*https?://example\\.com\\/?\\s*"' 

429 "\\s+href\\s*=\\s*'\\s*https?://example\\.com\\/?\\s*'" 

430 '\\s+src\\s*=\\s*"\\s*https?://example\\.com\\/?\\s*"' 

431 "\\s+src\\s*=\\s*'\\s*https?://example\\.com\\/?\\s*'" 

432 '\\s*\\]\\s*\\(\\s*https?://example\\.com\\/?\\s*\\)' 

433 '<\\s*https?://example\\.com\\/?\\s*>' 

434 

435 >>> coll = list() 

436 >>> __make_full_url_replacer( 

437 ... coll.append, "https://example.com", "/x", False) 

438 >>> for k, y in coll: 

439 ... print(repr(k.pattern)) 

440 '\\s+href\\s*=\\s*"\\s*https?://example\\.com\\/?\\s*"' 

441 "\\s+href\\s*=\\s*'\\s*https?://example\\.com\\/?\\s*'" 

442 '\\s+src\\s*=\\s*"\\s*https?://example\\.com\\/?\\s*"' 

443 "\\s+src\\s*=\\s*'\\s*https?://example\\.com\\/?\\s*'" 

444 

445 >>> coll = list() 

446 >>> __make_full_url_replacer( 

447 ... coll.append, "https://example.com", "bb") 

448 >>> for k, y in coll: 

449 ... print(repr(k.pattern)) 

450 '\\s+href\\s*=\\s*"\\s*https?://example\\.com\\s*"' 

451 "\\s+href\\s*=\\s*'\\s*https?://example\\.com\\s*'" 

452 '\\s+src\\s*=\\s*"\\s*https?://example\\.com\\s*"' 

453 "\\s+src\\s*=\\s*'\\s*https?://example\\.com\\s*'" 

454 '\\s*\\]\\s*\\(\\s*https?://example\\.com\\s*\\)' 

455 '<\\s*https?://example\\.com\\s*>' 

456 

457 >>> coll = list() 

458 >>> __make_full_url_replacer( 

459 ... coll.append, "https://example.com", "bb", False) 

460 >>> for k, y in coll: 

461 ... print(repr(k.pattern)) 

462 '\\s+href\\s*=\\s*"\\s*https?://example\\.com\\s*"' 

463 "\\s+href\\s*=\\s*'\\s*https?://example\\.com\\s*'" 

464 '\\s+src\\s*=\\s*"\\s*https?://example\\.com\\s*"' 

465 "\\s+src\\s*=\\s*'\\s*https?://example\\.com\\s*'" 

466 

467 >>> try: 

468 ... __make_full_url_replacer(None, "https://example.com", "bb") 

469 ... except TypeError as te: 

470 ... print(te) 

471 collector should be a callable but is None. 

472 

473 >>> try: 

474 ... __make_full_url_replacer(1, "https://example.com", "bb") 

475 ... except TypeError as te: 

476 ... print(te) 

477 collector should be a callable but is int, namely 1. 

478 

479 >>> try: 

480 ... __make_full_url_replacer(coll.append, None, "bb") 

481 ... except TypeError as te: 

482 ... print(te) 

483 descriptor '__len__' requires a 'str' object but received a 'NoneType' 

484 

485 >>> try: 

486 ... __make_full_url_replacer(coll.append, 1, "bb") 

487 ... except TypeError as te: 

488 ... print(te) 

489 descriptor '__len__' requires a 'str' object but received a 'int' 

490 

491 >>> try: 

492 ... __make_full_url_replacer(coll.append, "http://example.com", None) 

493 ... except TypeError as te: 

494 ... print(te) 

495 descriptor 'strip' for 'str' objects doesn't apply to a 'NoneType' object 

496 

497 >>> try: 

498 ... __make_full_url_replacer(coll.append, "http://example.com", 1) 

499 ... except TypeError as te: 

500 ... print(te) 

501 descriptor 'strip' for 'str' objects doesn't apply to a 'int' object 

502 

503 >>> try: 

504 ... __make_full_url_replacer(coll.append, "tweise@hfuu.edu.cn", "1") 

505 ... except ValueError as ve: 

506 ... print(ve) 

507 Invalid scheme 'mailto' for url 'tweise@hfuu.edu.cn'. 

508 

509 >>> try: 

510 ... __make_full_url_replacer(coll.append, "http://example.com", " ") 

511 ... except ValueError as ve: 

512 ... print(ve) 

513 Cannot replace URL 'http://example.com' with ' '. 

514 

515 >>> try: 

516 ... __make_full_url_replacer(coll.append, "http://example.com", 

517 ... ".", None) 

518 ... except TypeError as te: 

519 ... print(te) 

520 for_markdown should be an instance of bool but is None. 

521 

522 >>> try: 

523 ... __make_full_url_replacer(coll.append, "http://example.com", 

524 ... ".", 1) 

525 ... except TypeError as te: 

526 ... print(te) 

527 for_markdown should be an instance of bool but is int, namely 1. 

528 """ 

529 if not callable(collector): 

530 raise type_error(collector, "collector", call=True) 

531 if not isinstance(for_markdown, bool): 

532 raise type_error(for_markdown, "for_markdown", bool) 

533 url: Final[URL] = URL(url_to_replace) 

534 if url.scheme not in {"http", "https"}: 

535 raise ValueError( 

536 f"Invalid scheme {url.scheme!r} for url {url_to_replace!r}.") 

537 use_repl: Final[str] = str.strip(replace_url_with) 

538 if str.__len__(use_repl) <= 0: 

539 raise ValueError( 

540 f"Cannot replace URL {url!r} with {replace_url_with!r}.") 

541 

542 use_url: str = f"https?{escape(url[str.__len__(url.scheme):])}" 

543 if use_repl.startswith(("/", "./")): 

544 use_url += "?" if use_url.endswith("/") else r"\/?" 

545 

546 for sr, er, ss, es in chain(__HTML_URL_SEPARTORS, __MD_URL_SEPARTORS) \ 

547 if for_markdown else __HTML_URL_SEPARTORS: 

548 collector(( 

549 cast("Pattern", re_compile(f"{sr}{use_url}{er}")), 

550 f"{ss}{use_repl}{es}")) 

551 

552 

553def __make_replacer(replacers: Iterable[tuple[ 

554 Pattern, Callable[[Match], str] | str]]) -> Callable[[str], str]: 

555 """ 

556 Make a function that replaces all URL parts in a string. 

557 

558 :param replacers: the replacers patterns 

559 :returns: the function that can apply the replacers 

560 

561 >>> coll = list() 

562 >>> __make_full_url_replacer(coll.append, "https://example.com/log.txt", 

563 ... "https://example.org/log.txt") 

564 >>> __make_base_url_replacer(coll.append, "https://example.com/", "./") 

565 >>> f = __make_replacer(coll) 

566 >>> f("bla <a href='https://example.com/log.txt'>x</a> bla") 

567 'bla <a href="https://example.org/log.txt">x</a> bla' 

568 >>> f("bla <a href='https://example.com/xlog.txt'>x</a> bla") 

569 'bla <a href="./xlog.txt">x</a> bla' 

570 

571 >>> try: 

572 ... f(None) 

573 ... except TypeError as te: 

574 ... print(te) 

575 descriptor 'rstrip' for 'str' objects doesn't apply to a 'NoneType' object 

576 

577 >>> try: 

578 ... f(1) 

579 ... except TypeError as te: 

580 ... print(te) 

581 descriptor 'rstrip' for 'str' objects doesn't apply to a 'int' object 

582 

583 >>> coll.append((None, coll[0][1])) 

584 >>> try: 

585 ... __make_replacer(coll) 

586 ... except TypeError as te: 

587 ... print(te) 

588 pattern should be an instance of re.Pattern but is None. 

589 

590 >>> coll[-1] = (1, coll[0][1]) 

591 >>> try: 

592 ... __make_replacer(coll) 

593 ... except TypeError as te: 

594 ... print(te) 

595 pattern should be an instance of re.Pattern but is int, namely 1. 

596 

597 >>> coll[-1] = (coll[0][0], None) 

598 >>> try: 

599 ... __make_replacer(coll) 

600 ... except TypeError as te: 

601 ... print(te) 

602 replacer should be an instance of str or a callable but is None. 

603 

604 >>> coll[-1] = (coll[0][0], 1) 

605 >>> try: 

606 ... __make_replacer(coll) 

607 ... except TypeError as te: 

608 ... print(te) 

609 replacer should be an instance of str or a callable but is int, namely 1. 

610 

611 >>> try: 

612 ... __make_replacer(None) 

613 ... except TypeError as te: 

614 ... print(te) 

615 replacers should be an instance of typing.Iterable but is None. 

616 

617 >>> try: 

618 ... __make_replacer(1) 

619 ... except TypeError as te: 

620 ... print(te) 

621 replacers should be an instance of typing.Iterable but is int, namely 1. 

622 

623 >>> coll = list() 

624 >>> __make_full_url_replacer(coll.append, "https://example.com", 

625 ... "https://example.com") 

626 >>> f = __make_replacer(coll) 

627 >>> try: 

628 ... f("<a href='http://example.com' />") 

629 ... except ValueError as ve: 

630 ... print(str(ve)[:60]) 

631 Endless loop: "<a href='http://example.com' />" -> '<a href= 

632 """ 

633 if not isinstance(replacers, Iterable): 

634 raise type_error(replacers, "replacers", Iterable) 

635 

636 pats: Final[tuple[tuple[Pattern, Callable[[Match], str] | str], 

637 ...]] = tuple(replacers) 

638 for pattern, replacer in pats: 

639 if not isinstance(pattern, Pattern): 

640 raise type_error(pattern, "pattern", Pattern) 

641 if not (isinstance(replacer, str) or callable(replacer)): 

642 raise type_error(replacer, "replacer", str, True) 

643 

644 def __func(text: str, __pats=pats) -> str: 

645 out_str: str = str.rstrip(text) # enforce string 

646 if str.__len__(out_str) <= 0: 

647 return "" 

648 rc: int = 1 

649 iteration: int = 0 

650 while rc > 0: 

651 rc = 0 

652 for pp, rr in __pats: 

653 out_str, nn = subn(pp, rr, out_str) 

654 rc += nn 

655 iteration += 1 

656 if iteration > 100: 

657 raise ValueError(f"Endless loop: {text!r} -> {out_str!r}.") 

658 return str.rstrip(out_str) # enforce string 

659 

660 return cast("Callable[[str], str]", __func) 

661 

662 

663def make_url_replacer(base_urls: Mapping[str, str] | None = None, 

664 full_urls: Mapping[str, str] | None = None, 

665 for_markdown: bool = True) \ 

666 -> Callable[[str], str]: 

667 r""" 

668 Create the url replacers that fix absolute to relative URLs. 

669 

670 :param base_urls: a mapping of basic urls to shortcuts 

671 :param full_urls: a mapping of full urls to abbreviations 

672 :param for_markdown: should the replace be for Markdown (`True`) or for 

673 HTML only (`False`) 

674 :returns: a single callable that can process strings and fix the URLs 

675 therein 

676 :raises TypeError: if any of the inputs is of the wrong type 

677 :raises ValueError: if any of the inputs is incorrect 

678 

679 >>> f = make_url_replacer(None, None) 

680 >>> f("1") 

681 '1' 

682 

683 >>> f = make_url_replacer({"https://example.com/1": "./a/", 

684 ... "https://example.com": "./"}, 

685 ... {"https://example.com/1/1.txt": "y.txt", 

686 ... "https://example.com/x/1.txt": "z.txt"}) 

687 >>> f("<a href='http://example.com/1/2.txt' />") 

688 '<a href="./a/2.txt" />' 

689 >>> f("<a href='http://example.com/1' />") 

690 '<a href="./a/" />' 

691 >>> f("<a href='http://example.com' />") 

692 '<a href="./" />' 

693 >>> f("<a href='http://example.com/x.txt' />") 

694 '<a href="./x.txt" />' 

695 >>> f("<a href='http://example.com/1/1.txt' />") 

696 '<a href="y.txt" />' 

697 >>> f("<a href='http://example.com/x/1.txt' />") 

698 '<a href="z.txt" />' 

699 

700 >>> try: 

701 ... make_url_replacer(1, None) 

702 ... except TypeError as te: 

703 ... print(te) 

704 base_urls should be an instance of typing.Mapping but is int, namely 1. 

705 

706 >>> try: 

707 ... make_url_replacer(None, 1) 

708 ... except TypeError as te: 

709 ... print(te) 

710 full_urls should be an instance of typing.Mapping but is int, namely 1. 

711 

712 >>> try: 

713 ... make_url_replacer(None, None, None) 

714 ... except TypeError as te: 

715 ... print(te) 

716 for_markdown should be an instance of bool but is None. 

717 

718 >>> try: 

719 ... make_url_replacer(None, None, 1) 

720 ... except TypeError as te: 

721 ... print(te) 

722 for_markdown should be an instance of bool but is int, namely 1. 

723 """ 

724 if not isinstance(for_markdown, bool): 

725 raise type_error(for_markdown, "for_markdown", bool) 

726 keys: list[tuple[str, bool]] = [] 

727 

728 if base_urls is not None: 

729 if not isinstance(base_urls, Mapping): 

730 raise type_error(base_urls, "base_urls", Mapping) 

731 keys.extend((kk, False) for kk in base_urls) 

732 if full_urls is not None: 

733 if not isinstance(full_urls, Mapping): 

734 raise type_error(full_urls, "full_urls", Mapping) 

735 keys.extend((kk, True) for kk in full_urls) 

736 

737 if list.__len__(keys) <= 0: # no need to do anything 

738 return lambda s: s 

739 

740 # long keys and full urls first 

741 keys.sort(key=lambda tt: (str.__len__(tt[0]), tt[1], tt[0]), reverse=True) 

742 mappings: list[tuple[Pattern, Callable[[Match], str] | str]] = [] 

743 for k, w in keys: 

744 if w: 

745 __make_full_url_replacer(mappings.append, k, full_urls[k], 

746 for_markdown) 

747 else: 

748 __make_base_url_replacer(mappings.append, k, base_urls[k], 

749 for_markdown) 

750 return __make_replacer(mappings)