Coverage for bookbuilderpy/website.py: 67%
67 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-17 23:15 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-17 23:15 +0000
1"""The routine for building the website for the book."""
2import os.path
3from typing import Callable, Final, Iterable
5import markdown # type: ignore
7import bookbuilderpy.constants as bc
8from bookbuilderpy.build_result import File, LangResult
9from bookbuilderpy.html import html_postprocess
10from bookbuilderpy.logger import logger
11from bookbuilderpy.path import Path
12from bookbuilderpy.preprocessor_commands import create_preprocessor
13from bookbuilderpy.strings import (
14 enforce_non_empty_str,
15 file_size,
16 lang_to_locale,
17)
18from bookbuilderpy.temp import TempFile
20#: Explanations of file suffixes.
21__SUFFIXES: Final[dict[str, dict[str, str]]] = \
22 {"en": {
23 "pdf": 'The "portable document format" (<code>'
24 '<a href="https://www.iso.org/standard/75839.html">pdf</a>'
25 '</code>) is most suitable for reading on a PC and for '
26 'printing documents.',
27 "html": 'A stand-alone web page (<code><a href="https://www.w3.org/'
28 'TR/html5/">html</a></code>) can be viewed well both on '
29 'mobile phones as well as on PCs.',
30 "epub": 'The electronic book format (<code><a href="https://www.w3'
31 '.org/publishing/epub32/">epub</a></code>) is convenient for '
32 'many e-book readers as well as mobile phones.',
33 "azw3": 'The Amazon Kindle e-book format (<code><a href="http://'
34 'kindlegen.s3.amazonaws.com/AmazonKindlePublishingGuidelines.'
35 'pdf">azw3</a></code>) is a proprietary format suitable for '
36 'reading on a Kindle device.',
37 "zip": 'A <code><a href="https://www.loc.gov/preservation/digital/'
38 'formats/fdd/fdd000354.shtml">zip</a></code> archive '
39 'containing the book in all the formats mentioned above for '
40 'convenient download.',
41 "tar.xz": 'A <code><a href="https://www.gnu.org/software/tar/manual/'
42 'html_node/Standard.html">tar</a>.<a href="https://tukaani.'
43 'org/xz/format.html">xz</a></code> archive containing the '
44 'book in all the formats mentioned above for convenient '
45 'download.',
46 }, "de": {
47 "pdf": 'Das "portable document format" (<code><a href='
48 '"https://www.iso.org/standard/75839.html">pdf</a></code>) ist'
49 ' für das Lesen am PC oder das Ausdrucken geeignet.',
50 "html": 'Eine stand-alone Webseite (<code><a href="https://www.w3.org/'
51 'TR/html5/">html</a></code>) kann sowohl auf dem Mobiltelefon'
52 ' als auch auf dem PC gut gelesen werden.',
53 "epub": 'Das Format für E-Book (<code><a href="https://www.w3'
54 '.org/publishing/epub32/">epub</a></code>) ist günstig '
55 'für E-Book Lesegeräte, Tablets, und '
56 'Mobiltelefone.',
57 "azw3": 'Das Amazon Kindle E-Book Format (<code><a href="http://'
58 'kindlegen.s3.amazonaws.com/AmazonKindlePublishingGuidelines.'
59 'pdf">azw3</a></code>) ist proprietär und für '
60 'Kindles gedacht.',
61 "zip": 'Ein <code><a href="https://www.loc.gov/preservation/digital/'
62 'formats/fdd/fdd000354.shtml">zip</a></code> Archiv mit allen '
63 'oben genannten Formaten des Buchs.',
64 "tar.xz": 'Ein <code><a href="https://www.gnu.org/software/tar/manual/'
65 'html_node/Standard.html">tar</a>.<a href="https://tukaani.'
66 'org/xz/format.html">xz</a></code> Archiv mit allen '
67 'oben genannten Formaten des Buchs.',
68 }, "zh": {
69 "pdf": '便携式文档格式(<code><a href="https://www.iso.org/standard/'
70 '75839.html">pdf</a></code>)最适合在PC上阅读和打印文档。',
71 "html": '无论是在手机上还是在PC上,都可以很好地查看独立的网页(<code>'
72 '<a href="https://www.w3.org/TR/html5/">html</a></code>)。',
73 "epub": '电子书格式(<code><a href="https://www.w3.org/publishing/epub32/">'
74 'epub</a></code>)为许多电子书阅读器和手机提供了便利。',
75 "azw3": '亚马逊Kindle电子书格式(<code><a href="http://'
76 'kindlegen.s3.amazonaws.com/AmazonKindlePublishingGuidelines.'
77 'pdf">azw3</a></code>)是一种适合在Kindle设备上阅读的专有格式。',
78 "zip": '一个<code><a href="https://www.loc.gov/preservation/digital/'
79 'formats/fdd/fdd000354.shtml">zip</a></code>存档,包含上述所有格式的书籍,'
80 '便于下载。',
81 "tar.xz": '一个<code><a href="https://www.gnu.org/software/tar/manual/'
82 'html_node/Standard.html">tar</a>.<a href="https://tukaani.'
83 'org/xz/format.html">xz</a></code>存档,包含上述所有格式的书籍,便于下载。',
84 }}
87def build_website(docs: Iterable[LangResult],
88 outer_file: str,
89 body_file: str | None,
90 dest_dir: str,
91 input_dir: str,
92 get_meta: Callable) -> File:
93 """
94 Build a website linking to all the generated documents.
96 :param docs: the per-language results
97 :param outer_file: the wrapper file
98 :param body_file: the body file
99 :param dest_dir: the destination directory
100 :param input_dir: the base input directory
101 :param get_meta: a callable used to get the results
102 :return: the file record to the generated website
103 """
104 if docs is None:
105 raise ValueError("docs cannot be None.")
106 out_dir: Final[Path] = Path.directory(dest_dir)
107 in_dir = Path.directory(input_dir)
109 # load the html template
110 out_file = out_dir.resolve_inside("index.html")
111 if os.path.exists(out_file):
112 raise ValueError(f"File '{out_file}' already exists.")
114 logger(f"beginning to build website '{out_file}'.")
116 html = in_dir.resolve_inside(outer_file).read_all_str()
118 # should there be a body to be included?
119 tag_index = html.find(bc.WEBSITE_OUTER_TAG)
120 if tag_index >= 0:
121 # yes, so we load the body
122 body = in_dir.resolve_inside(body_file).read_all_str()
123 html = "\n".join(
124 [html[:tag_index].strip(),
125 markdown.markdown(text=body.strip(),
126 output_format="html").strip(),
127 html[(tag_index + len(bc.WEBSITE_OUTER_TAG)):].strip()])
128 del body
130 div_1 = html.find(bc.WEBSITE_BODY_TAG_1)
131 # should there be an auto-generated file list in markdown?
132 if div_1 >= 0:
133 # yes!
134 div_2 = html.find(bc.WEBSITE_BODY_TAG_2,
135 div_1 + len(bc.WEBSITE_BODY_TAG_1))
136 if div_2 <= div_1:
137 raise ValueError(
138 f"Website '{html}' contains "
139 f"'{bc.WEBSITE_BODY_TAG_1}' but not "
140 f"'{bc.WEBSITE_BODY_TAG_2}'.")
141 data = [html[:div_1].strip()]
143 ldocs = list(docs)
144 has_lang_ul = len(ldocs) > 1
145 if has_lang_ul:
146 data.append(f"<ul{bc.WEBSITE_LANGS_UL_ARG}>")
148 for lang in ldocs:
149 if has_lang_ul:
150 enforce_non_empty_str(lang.lang_name)
151 data.append(
152 f"<li{bc.WEBSITE_LANGS_LI_ARG}>"
153 f"<span{bc.WEBSITE_LANGS_NAME_SPAN_ARG}>"
154 f"{lang.lang_name}</span>")
155 data.append(f"<ul{bc.WEBSITE_DOWNLOAD_UL_ARG}>")
156 locale = "en" if not lang.lang else \
157 lang_to_locale(lang.lang).split("_")[0]
158 suffixes = __SUFFIXES[locale] if locale in __SUFFIXES else None
160 for res in lang.results:
161 name = os.path.basename(res.path)
162 size = file_size(res.size).replace(" ", " ")
163 data.append(
164 f'<li{bc.WEBSITE_DOWNLOAD_LI_ARG}>'
165 f'<span{bc.WEBSITE_DOWNLOAD_DOWNLOAD_SPAN_ARG}>'
166 f'<a href="{res.path.relative_to(out_dir)}"'
167 f'{bc.WEBSITE_DOWNLOAD_FILE_A_ARG}>'
168 f'{name}</a> '
169 f'<span{bc.WEBSITE_DOWNLOAD_SIZE_SPAN_ARG}>'
170 f'({size})</span></span>')
172 if suffixes and (res.suffix in suffixes):
173 desc = suffixes[res.suffix]
174 data.append(
175 f"<br><span{bc.WEBSITE_DOWNLOAD_FILE_DESC_SPAN_ARG}>"
176 f"{desc}</span>")
178 data.append("</li>")
179 data.append("</ul>")
180 if has_lang_ul:
181 data.append("</li>")
182 if has_lang_ul:
183 data.append("</ul>")
184 data.append(html[(div_2 + len(bc.WEBSITE_BODY_TAG_2)):].strip())
185 html = "".join(data).strip()
186 del data
188 html = (create_preprocessor(name=bc.CMD_GET_META,
189 func=get_meta,
190 n=1,
191 strip_white_space=False))(html)
192 with TempFile.create(suffix=".html") as temp:
193 temp.write_all(html.strip())
194 out_file = html_postprocess(in_file=temp,
195 out_file=out_file,
196 flatten_data_uris=True,
197 fully_evaluate_html=False,
198 purge_scripts=False,
199 minify=True,
200 canonicalize_ids=True,
201 purge_mathjax=False,
202 overwrite=False)
204 res = File(out_file)
205 logger(f"finished building website '{res.path}', size is {res.size}.")
206 return res