Coverage for pycommons / dev / doc / process_md.py: 100%
50 statements
« prev ^ index » next coverage.py v7.13.0, created at 2025-12-11 03:04 +0000
« prev ^ index » next coverage.py v7.13.0, created at 2025-12-11 03:04 +0000
1"""
2Process a markdown file in order to make it useful for distribution.
4In order to let sphinx properly load and insert the `README.md` file into the
5project's documentation, we need to process this file from the GitHub style
6markdown to a variant suitable for the myst parser used in sphinx. While we
7are at it, we can also turn absolute URLs from the GitHub-`README.md` file
8that point to the documentation URL to relative URLs.
9"""
11from re import Pattern, sub
12from re import compile as re_compile
13from typing import Any, Callable, Final, Iterable, Mapping, cast
15from pycommons.dev.url_replacer import make_url_replacer
16from pycommons.types import type_error
18#: detects strings of the form [xyz](#123-bla) and gives \1=xyz and \2=bla
19__FIX_LINKS: Final[Pattern] = re_compile(r"(\[.+?])\(#\d+-(.+?)\)")
22def __process_markdown(
23 source: Iterable[str], dest: Callable[[str], Any],
24 line_processor: Callable[[str], str] = lambda s: s,
25 discard_until: str | None = "## 1. Introduction") -> None:
26 """
27 Process a markdown file in order to make it useful for distribution.
29 This process changes the GitHub-style markdown to a format that the myst
30 parser, which is used by sphinx, can render properly. This involves
31 several issues:
33 1. We discard the top-level heading.
34 2. We need to move all sub-headings one step up.
35 3. Furthermore, we can turn all absolute URLs pointing to the
36 documentation website to local references starting with `./`.
38 :param source: the source line iterable
39 :param dest: the destination callable receiving the output
40 :param line_processor: an optional callable for processing lines
41 :param discard_until: discard all strings until reaching this line. If
42 this is `None`, all lines will be used. If this is not `None`, then
43 this will be the first line to be forwarded to `dest`f
45 >>> lp = list()
46 >>> lpp = make_url_replacer({"https://example.com/": "./"},
47 ... {"https://example.com/A": "xyz"})
48 >>> src = ["",
49 ... "# This is `pycommons!`",
50 ... "Table of contents",
51 ... "## 1. Introduction",
52 ... "blabla bla <https://example.com/A>!",
53 ... "## 2. Some More Text",
54 ... "We [also say](https://example.com/z/hello.txt) stuff.",
55 ... "### 2.4. Code Example",
56 ... "```",
57 ... "But [not in code](https://example.com/z/hello.txt).",
58 ... "```",
59 ... "See also [here](#24-code-example)."]
60 >>> __process_markdown(src, print, lpp)
61 # 1. Introduction
62 blabla bla <xyz>!
63 # 2. Some More Text
64 We [also say](./z/hello.txt) stuff.
65 ## 2.4. Code Example
66 ```
67 But [not in code](https://example.com/z/hello.txt).
68 ```
69 See also [here](#24-code-example).
71 >>> try:
72 ... __process_markdown(None, print, lambda x: x, "bla")
73 ... except TypeError as te:
74 ... print(te)
75 source should be an instance of typing.Iterable but is None.
77 >>> try:
78 ... __process_markdown(1, print, lambda x: x, "bla")
79 ... except TypeError as te:
80 ... print(te)
81 source should be an instance of typing.Iterable but is int, namely 1.
83 >>> try:
84 ... __process_markdown([None], print, lambda x: x, "bla")
85 ... except TypeError as te:
86 ... print(te)
87 descriptor 'rstrip' for 'str' objects doesn't apply to a 'NoneType' object
89 >>> try:
90 ... __process_markdown([1], print, lambda x: x, "bla")
91 ... except TypeError as te:
92 ... print(te)
93 descriptor 'rstrip' for 'str' objects doesn't apply to a 'int' object
95 >>> try:
96 ... __process_markdown([""], None, lambda x: x, "bla")
97 ... except TypeError as te:
98 ... print(te)
99 dest should be a callable but is None.
101 >>> try:
102 ... __process_markdown([""], 1, lambda x: x, "bla")
103 ... except TypeError as te:
104 ... print(te)
105 dest should be a callable but is int, namely 1.
107 >>> try:
108 ... __process_markdown([""], print, None, "bla")
109 ... except TypeError as te:
110 ... print(te)
111 line_processor should be a callable but is None.
113 >>> try:
114 ... __process_markdown([""], print, 1, "bla")
115 ... except TypeError as te:
116 ... print(te)
117 line_processor should be a callable but is int, namely 1.
119 >>> try:
120 ... __process_markdown([""], print, lambda x: x, 1)
121 ... except TypeError as te:
122 ... print(te)
123 descriptor '__len__' requires a 'str' object but received a 'int'
125 >>> try:
126 ... __process_markdown([""], print, lambda x: x, "")
127 ... except ValueError as ve:
128 ... print(ve)
129 discard_until cannot be ''.
131 >>> __process_markdown([""], print, lambda x: x, None)
132 <BLANKLINE>
133 """
134 if not isinstance(source, Iterable):
135 raise type_error(source, "source", Iterable)
136 if not callable(dest):
137 raise type_error(dest, "dest", call=True)
138 if not callable(line_processor):
139 raise type_error(line_processor, "line_processor", call=True)
141 skip: bool = False
142 if discard_until is not None:
143 if str.__len__(discard_until) <= 0:
144 raise ValueError(f"discard_until cannot be {discard_until!r}.")
145 skip = True
146 else:
147 discard_until = ""
149 in_code: bool = False # we only process non-code lines
150 needs_newline: bool = False # required after image lines
151 add_images_anyway: bool = True
152 for the_line in source:
153 line = str.rstrip(the_line) # enforce string
155 # we skip everything until the introduction section
156 if skip:
157 the_line_lstr = str.lstrip(the_line)
158 if str.__len__(the_line_lstr) <= 0:
159 continue
160 if the_line_lstr.startswith(discard_until):
161 skip = False
162 elif the_line_lstr.startswith("[![") and add_images_anyway:
163 needs_newline = True
164 dest(line)
165 continue
166 else:
167 add_images_anyway = False
168 continue
170 if needs_newline:
171 dest("")
172 needs_newline = False
174 if in_code:
175 if line.startswith("```"):
176 in_code = False # toggle to non-code
177 elif line.startswith("```"):
178 in_code = True # toggle to code
179 elif line.startswith("#"):
180 line = line[1:] # move all sub-headings one step up
181 else: # e.g., fix all urls via the line processor
182 line = str.rstrip(line_processor(line))
184 dest(line)
187def process_markdown_for_sphinx(
188 source: Iterable[str], dest: Callable[[str], Any],
189 base_urls: Mapping[str, str] | None = None,
190 full_urls: Mapping[str, str] | None = None,
191 discard_until: str | None = "## 1. Introduction") -> None:
192 """
193 Process a markdown file in order to make it useful for distribution.
195 This process changes the GitHub-style markdown to a format that the myst
196 parser, which is used by sphinx, can render properly. This involves
197 several issues:
199 1. We discard the top-level heading.
200 2. We need to move all sub-headings one step up.
201 3. Furthermore, we can turn all absolute URLs pointing to the
202 documentation website to local references starting with `./`.
203 4. The myst parser drops the numerical prefixes of links, i.e., it tags
204 `## 1.2. Hello` with id `hello` instead of `12-hello`. This means that
205 we need to fix all references following the pattern `[xxx](#12-hello)`
206 to `[xxx](#hello)`.
208 :param source: the source line iterable
209 :param dest: the destination callable receiving the output
210 :param base_urls: a mapping of basic urls to shortcuts
211 :param full_urls: a mapping of full urls to abbreviations
212 :param discard_until: discard all strings until reaching this line. If
213 this is `None`, all lines will be used. If this is not `None`, then
214 this will be the first line to be forwarded to `dest`
216 >>> lp = list()
217 >>> src = ["",
218 ... "# This is `pycommons!`",
219 ... "Table of contents",
220 ... "## 1. Introduction",
221 ... "blabla bla <https://example.com/A>!",
222 ... "## 2. Some More Text",
223 ... "We [also say](https://example.com/z/hello.txt) stuff.",
224 ... "### 2.4. Code Example",
225 ... "```",
226 ... "But [not in code](https://example.com/z/hello.txt).",
227 ... "```",
228 ... "See also [here](#24-code-example)."]
229 >>> process_markdown_for_sphinx(src, print,
230 ... {"https://example.com/": "./"},
231 ... {"https://example.com/A": "xyz"})
232 # 1. Introduction
233 blabla bla <xyz>!
234 # 2. Some More Text
235 We [also say](./z/hello.txt) stuff.
236 ## 2.4. Code Example
237 ```
238 But [not in code](https://example.com/z/hello.txt).
239 ```
240 See also [here](#code-example).
242 >>> try:
243 ... process_markdown_for_sphinx(None, print)
244 ... except TypeError as te:
245 ... print(te)
246 source should be an instance of typing.Iterable but is None.
248 >>> try:
249 ... process_markdown_for_sphinx(1, print)
250 ... except TypeError as te:
251 ... print(te)
252 source should be an instance of typing.Iterable but is int, namely 1.
254 >>> try:
255 ... process_markdown_for_sphinx([None], print)
256 ... except TypeError as te:
257 ... print(te)
258 descriptor 'rstrip' for 'str' objects doesn't apply to a 'NoneType' object
260 >>> try:
261 ... process_markdown_for_sphinx([1], print)
262 ... except TypeError as te:
263 ... print(te)
264 descriptor 'rstrip' for 'str' objects doesn't apply to a 'int' object
266 >>> try:
267 ... process_markdown_for_sphinx([""], None)
268 ... except TypeError as te:
269 ... print(te)
270 dest should be a callable but is None.
272 >>> try:
273 ... process_markdown_for_sphinx([""], 1)
274 ... except TypeError as te:
275 ... print(te)
276 dest should be a callable but is int, namely 1.
278 >>> try:
279 ... process_markdown_for_sphinx([""], print, 1, None, "bla")
280 ... except TypeError as te:
281 ... print(te)
282 base_urls should be an instance of typing.Mapping but is int, namely 1.
284 >>> try:
285 ... process_markdown_for_sphinx([""], print, None, 1, "bla")
286 ... except TypeError as te:
287 ... print(te)
288 full_urls should be an instance of typing.Mapping but is int, namely 1.
290 >>> try:
291 ... process_markdown_for_sphinx([""], print, None, None, 1)
292 ... except TypeError as te:
293 ... print(te)
294 descriptor '__len__' requires a 'str' object but received a 'int'
296 >>> try:
297 ... process_markdown_for_sphinx([""], print, None, None, "")
298 ... except ValueError as ve:
299 ... print(ve)
300 discard_until cannot be ''.
302 >>> process_markdown_for_sphinx([""], print, None, None, None)
303 <BLANKLINE>
304 """
305 __process_markdown(source, dest, cast(
306 "Callable[[str], str]", lambda s, __l=make_url_replacer(
307 base_urls, full_urls): __l(sub(__FIX_LINKS, "\\1(#\\2)",
308 s))), discard_until)