Coverage for pycommons/dev/doc/process

1"""

2Process a markdown file in order to make it useful for distribution.

4In order to let sphinx properly load and insert the `README.md` file into the

5project's documentation, we need to process this file from the GitHub style

6markdown to a variant suitable for the myst parser used in sphinx. While we

7are at it, we can also turn absolute URLs from the GitHub-`README.md` file

8that point to the documentation URL to relative URLs.

9"""

11from re import Pattern, sub

12from re import compile as re_compile

13from typing import Any, Callable, Final, Iterable, Mapping, cast

15from pycommons.dev.url_replacer import make_url_replacer

16from pycommons.types import type_error

18#: detects strings of the form [xyz](#123-bla) and gives \1=xyz and \2=bla

19__FIX_LINKS: Final[Pattern] = re_compile(r"(\[.+?])\(#\d+-(.+?)\)")

22def __process_markdown(

23 source: Iterable[str], dest: Callable[[str], Any],

24 line_processor: Callable[[str], str] = lambda s: s,

25 discard_until: str | None = "## 1. Introduction") -> None:

26 """

27 Process a markdown file in order to make it useful for distribution.

29 This process changes the GitHub-style markdown to a format that the myst

30 parser, which is used by sphinx, can render properly. This involves

31 several issues:

33 1. We discard the top-level heading.

34 2. We need to move all sub-headings one step up.

35 3. Furthermore, we can turn all absolute URLs pointing to the

36 documentation website to local references starting with `./`.

38 :param source: the source line iterable

39 :param dest: the destination callable receiving the output

40 :param line_processor: an optional callable for processing lines

41 :param discard_until: discard all strings until reaching this line. If

42 this is `None`, all lines will be used. If this is not `None`, then

43 this will be the first line to be forwarded to `dest`f

45 >>> lp = list()

46 >>> lpp = make_url_replacer({"https://example.com/": "./"},

47 ... {"https://example.com/A": "xyz"})

48 >>> src = ["![image](https://example.com/1.jp)",

49 ... "# This is `pycommons!`",

50 ... "Table of contents",

51 ... "## 1. Introduction",

52 ... "blabla bla <https://example.com/A>!",

53 ... "## 2. Some More Text",

54 ... "We [also say](https://example.com/z/hello.txt) stuff.",

55 ... "### 2.4. Code Example",

56 ... "```",

57 ... "But [not in code](https://example.com/z/hello.txt).",

58 ... "```",

59 ... "See also [here](#24-code-example)."]

60 >>> __process_markdown(src, print, lpp)

61 # 1. Introduction

62 blabla bla <xyz>!

63 # 2. Some More Text

64 We [also say](./z/hello.txt) stuff.

65 ## 2.4. Code Example

66 ```

67 But [not in code](https://example.com/z/hello.txt).

68 ```

69 See also [here](#24-code-example).

71 >>> try:

72 ... __process_markdown(None, print, lambda x: x, "bla")

73 ... except TypeError as te:

74 ... print(te)

75 source should be an instance of typing.Iterable but is None.

77 >>> try:

78 ... __process_markdown(1, print, lambda x: x, "bla")

79 ... except TypeError as te:

80 ... print(te)

81 source should be an instance of typing.Iterable but is int, namely 1.

83 >>> try:

84 ... __process_markdown([None], print, lambda x: x, "bla")

85 ... except TypeError as te:

86 ... print(te)

87 descriptor 'rstrip' for 'str' objects doesn't apply to a 'NoneType' object

89 >>> try:

90 ... __process_markdown([1], print, lambda x: x, "bla")

91 ... except TypeError as te:

92 ... print(te)

93 descriptor 'rstrip' for 'str' objects doesn't apply to a 'int' object

95 >>> try:

96 ... __process_markdown([""], None, lambda x: x, "bla")

97 ... except TypeError as te:

98 ... print(te)

99 dest should be a callable but is None.

100

101 >>> try:

102 ... __process_markdown([""], 1, lambda x: x, "bla")

103 ... except TypeError as te:

104 ... print(te)

105 dest should be a callable but is int, namely 1.

106

107 >>> try:

108 ... __process_markdown([""], print, None, "bla")

109 ... except TypeError as te:

110 ... print(te)

111 line_processor should be a callable but is None.

112

113 >>> try:

114 ... __process_markdown([""], print, 1, "bla")

115 ... except TypeError as te:

116 ... print(te)

117 line_processor should be a callable but is int, namely 1.

118

119 >>> try:

120 ... __process_markdown([""], print, lambda x: x, 1)

121 ... except TypeError as te:

122 ... print(te)

123 descriptor '__len__' requires a 'str' object but received a 'int'

124

125 >>> try:

126 ... __process_markdown([""], print, lambda x: x, "")

127 ... except ValueError as ve:

128 ... print(ve)

129 discard_until cannot be ''.

130

131 >>> __process_markdown([""], print, lambda x: x, None)

132 <BLANKLINE>

133 """

134 if not isinstance(source, Iterable):

135 raise type_error(source, "source", Iterable)

136 if not callable(dest):

137 raise type_error(dest, "dest", call=True)

138 if not callable(line_processor):

139 raise type_error(line_processor, "line_processor", call=True)

140

141 skip: bool = False

142 if discard_until is not None:

143 if str.__len__(discard_until) <= 0:

144 raise ValueError(f"discard_until cannot be {discard_until!r}.")

145 skip = True

146 else:

147 discard_until = ""

148

149 in_code: bool = False # we only process non-code lines

150 needs_newline: bool = False # required after image lines

151 add_images_anyway: bool = True

152 for the_line in source:

153 line = str.rstrip(the_line) # enforce string

154

155 # we skip everything until the introduction section

156 if skip:

157 the_line_lstr = str.lstrip(the_line)

158 if str.__len__(the_line_lstr) <= 0:

159 continue

160 if the_line_lstr.startswith(discard_until):

161 skip = False

162 elif the_line_lstr.startswith("[![") and add_images_anyway:

163 needs_newline = True

164 dest(line)

165 continue

166 else:

167 add_images_anyway = False

168 continue

169

170 if needs_newline:

171 dest("")

172 needs_newline = False

173

174 if in_code:

175 if line.startswith("```"):

176 in_code = False # toggle to non-code

177 elif line.startswith("```"):

178 in_code = True # toggle to code

179 elif line.startswith("#"):

180 line = line[1:] # move all sub-headings one step up

181 else: # e.g., fix all urls via the line processor

182 line = str.rstrip(line_processor(line))

183

184 dest(line)

185

186

187def process_markdown_for_sphinx(

188 source: Iterable[str], dest: Callable[[str], Any],

189 base_urls: Mapping[str, str] | None = None,

190 full_urls: Mapping[str, str] | None = None,

191 discard_until: str | None = "## 1. Introduction") -> None:

192 """

193 Process a markdown file in order to make it useful for distribution.

194

195 This process changes the GitHub-style markdown to a format that the myst

196 parser, which is used by sphinx, can render properly. This involves

197 several issues:

198

199 1. We discard the top-level heading.

200 2. We need to move all sub-headings one step up.

201 3. Furthermore, we can turn all absolute URLs pointing to the

202 documentation website to local references starting with `./`.

203 4. The myst parser drops the numerical prefixes of links, i.e., it tags

204 `## 1.2. Hello` with id `hello` instead of `12-hello`. This means that

205 we need to fix all references following the pattern `[xxx](#12-hello)`

206 to `[xxx](#hello)`.

207

208 :param source: the source line iterable

209 :param dest: the destination callable receiving the output

210 :param base_urls: a mapping of basic urls to shortcuts

211 :param full_urls: a mapping of full urls to abbreviations

212 :param discard_until: discard all strings until reaching this line. If

213 this is `None`, all lines will be used. If this is not `None`, then

214 this will be the first line to be forwarded to `dest`

215

216 >>> lp = list()

217 >>> src = ["![image](https://example.com/1.jp)",

218 ... "# This is `pycommons!`",

219 ... "Table of contents",

220 ... "## 1. Introduction",

221 ... "blabla bla <https://example.com/A>!",

222 ... "## 2. Some More Text",

223 ... "We [also say](https://example.com/z/hello.txt) stuff.",

224 ... "### 2.4. Code Example",

225 ... "```",

226 ... "But [not in code](https://example.com/z/hello.txt).",

227 ... "```",

228 ... "See also [here](#24-code-example)."]

229 >>> process_markdown_for_sphinx(src, print,

230 ... {"https://example.com/": "./"},

231 ... {"https://example.com/A": "xyz"})

232 # 1. Introduction

233 blabla bla <xyz>!

234 # 2. Some More Text

235 We [also say](./z/hello.txt) stuff.

236 ## 2.4. Code Example

237 ```

238 But [not in code](https://example.com/z/hello.txt).

239 ```

240 See also [here](#code-example).

241

242 >>> try:

243 ... process_markdown_for_sphinx(None, print)

244 ... except TypeError as te:

245 ... print(te)

246 source should be an instance of typing.Iterable but is None.

247

248 >>> try:

249 ... process_markdown_for_sphinx(1, print)

250 ... except TypeError as te:

251 ... print(te)

252 source should be an instance of typing.Iterable but is int, namely 1.

253

254 >>> try:

255 ... process_markdown_for_sphinx([None], print)

256 ... except TypeError as te:

257 ... print(te)

258 descriptor 'rstrip' for 'str' objects doesn't apply to a 'NoneType' object

259

260 >>> try:

261 ... process_markdown_for_sphinx([1], print)

262 ... except TypeError as te:

263 ... print(te)

264 descriptor 'rstrip' for 'str' objects doesn't apply to a 'int' object

265

266 >>> try:

267 ... process_markdown_for_sphinx([""], None)

268 ... except TypeError as te:

269 ... print(te)

270 dest should be a callable but is None.

271

272 >>> try:

273 ... process_markdown_for_sphinx([""], 1)

274 ... except TypeError as te:

275 ... print(te)

276 dest should be a callable but is int, namely 1.

277

278 >>> try:

279 ... process_markdown_for_sphinx([""], print, 1, None, "bla")

280 ... except TypeError as te:

281 ... print(te)

282 base_urls should be an instance of typing.Mapping but is int, namely 1.

283

284 >>> try:

285 ... process_markdown_for_sphinx([""], print, None, 1, "bla")

286 ... except TypeError as te:

287 ... print(te)

288 full_urls should be an instance of typing.Mapping but is int, namely 1.

289

290 >>> try:

291 ... process_markdown_for_sphinx([""], print, None, None, 1)

292 ... except TypeError as te:

293 ... print(te)

294 descriptor '__len__' requires a 'str' object but received a 'int'

295

296 >>> try:

297 ... process_markdown_for_sphinx([""], print, None, None, "")

298 ... except ValueError as ve:

299 ... print(ve)

300 discard_until cannot be ''.

301

302 >>> process_markdown_for_sphinx([""], print, None, None, None)

303 <BLANKLINE>

304 """

305 __process_markdown(source, dest, cast(

306 "Callable[[str], str]", lambda s, __l=make_url_replacer(

307 base_urls, full_urls): __l(sub(__FIX_LINKS, "\\1(#\\2)",

308 s))), discard_until)

Coverage for pycommons / dev / doc / process_md.py: 100%

50 statements