"""The routine for building the website for the book."""
import os.path
from typing import Callable, Final, Iterable
import markdown # type: ignore
import bookbuilderpy.constants as bc
from bookbuilderpy.build_result import File, LangResult
from bookbuilderpy.html import html_postprocess
from bookbuilderpy.logger import logger
from bookbuilderpy.path import Path
from bookbuilderpy.preprocessor_commands import create_preprocessor
from bookbuilderpy.strings import (
enforce_non_empty_str,
file_size,
lang_to_locale,
)
from bookbuilderpy.temp import TempFile
#: Explanations of file suffixes.
__SUFFIXES: Final[dict[str, dict[str, str]]] = \
{"en": {
"pdf": 'The "portable document format" (<code>'
'<a href="https://www.iso.org/standard/75839.html">pdf</a>'
'</code>) is most suitable for reading on a PC and for '
'printing documents.',
"html": 'A stand-alone web page (<code><a href="https://www.w3.org/'
'TR/html5/">html</a></code>) can be viewed well both on '
'mobile phones as well as on PCs.',
"epub": 'The electronic book format (<code><a href="https://www.w3'
'.org/publishing/epub32/">epub</a></code>) is convenient for '
'many e-book readers as well as mobile phones.',
"azw3": 'The Amazon Kindle e-book format (<code><a href="http://'
'kindlegen.s3.amazonaws.com/AmazonKindlePublishingGuidelines.'
'pdf">azw3</a></code>) is a proprietary format suitable for '
'reading on a Kindle device.',
"zip": 'A <code><a href="https://www.loc.gov/preservation/digital/'
'formats/fdd/fdd000354.shtml">zip</a></code> archive '
'containing the book in all the formats mentioned above for '
'convenient download.',
"tar.xz": 'A <code><a href="https://www.gnu.org/software/tar/manual/'
'html_node/Standard.html">tar</a>.<a href="https://tukaani.'
'org/xz/format.html">xz</a></code> archive containing the '
'book in all the formats mentioned above for convenient '
'download.',
}, "de": {
"pdf": 'Das "portable document format" (<code><a href='
'"https://www.iso.org/standard/75839.html">pdf</a></code>) ist'
' für das Lesen am PC oder das Ausdrucken geeignet.',
"html": 'Eine stand-alone Webseite (<code><a href="https://www.w3.org/'
'TR/html5/">html</a></code>) kann sowohl auf dem Mobiltelefon'
' als auch auf dem PC gut gelesen werden.',
"epub": 'Das Format für E-Book (<code><a href="https://www.w3'
'.org/publishing/epub32/">epub</a></code>) ist günstig '
'für E-Book Lesegeräte, Tablets, und '
'Mobiltelefone.',
"azw3": 'Das Amazon Kindle E-Book Format (<code><a href="http://'
'kindlegen.s3.amazonaws.com/AmazonKindlePublishingGuidelines.'
'pdf">azw3</a></code>) ist proprietär und für '
'Kindles gedacht.',
"zip": 'Ein <code><a href="https://www.loc.gov/preservation/digital/'
'formats/fdd/fdd000354.shtml">zip</a></code> Archiv mit allen '
'oben genannten Formaten des Buchs.',
"tar.xz": 'Ein <code><a href="https://www.gnu.org/software/tar/manual/'
'html_node/Standard.html">tar</a>.<a href="https://tukaani.'
'org/xz/format.html">xz</a></code> Archiv mit allen '
'oben genannten Formaten des Buchs.',
}, "zh": {
"pdf": '便携式文档格式(<code><a href="https://www.iso.org/standard/'
'75839.html">pdf</a></code>)最适合在PC上阅读和打印文档。',
"html": '无论是在手机上还是在PC上,都可以很好地查看独立的网页(<code>'
'<a href="https://www.w3.org/TR/html5/">html</a></code>)。',
"epub": '电子书格式(<code><a href="https://www.w3.org/publishing/epub32/">'
'epub</a></code>)为许多电子书阅读器和手机提供了便利。',
"azw3": '亚马逊Kindle电子书格式(<code><a href="http://'
'kindlegen.s3.amazonaws.com/AmazonKindlePublishingGuidelines.'
'pdf">azw3</a></code>)是一种适合在Kindle设备上阅读的专有格式。',
"zip": '一个<code><a href="https://www.loc.gov/preservation/digital/'
'formats/fdd/fdd000354.shtml">zip</a></code>存档,包含上述所有格式的书籍,'
'便于下载。',
"tar.xz": '一个<code><a href="https://www.gnu.org/software/tar/manual/'
'html_node/Standard.html">tar</a>.<a href="https://tukaani.'
'org/xz/format.html">xz</a></code>存档,包含上述所有格式的书籍,便于下载。',
}}
[docs]def build_website(docs: Iterable[LangResult],
outer_file: str,
body_file: str | None,
dest_dir: str,
input_dir: str,
get_meta: Callable) -> File:
"""
Build a website linking to all the generated documents.
:param docs: the per-language results
:param outer_file: the wrapper file
:param body_file: the body file
:param dest_dir: the destination directory
:param input_dir: the base input directory
:param get_meta: a callable used to get the results
:return: the file record to the generated website
"""
if docs is None:
raise ValueError("docs cannot be None.")
out_dir: Final[Path] = Path.directory(dest_dir)
in_dir = Path.directory(input_dir)
# load the html template
out_file = out_dir.resolve_inside("index.html")
if os.path.exists(out_file):
raise ValueError(f"File '{out_file}' already exists.")
logger(f"beginning to build website '{out_file}'.")
html = in_dir.resolve_inside(outer_file).read_all_str()
# should there be a body to be included?
tag_index = html.find(bc.WEBSITE_OUTER_TAG)
if tag_index >= 0:
# yes, so we load the body
body = in_dir.resolve_inside(body_file).read_all_str()
html = "\n".join(
[html[:tag_index].strip(),
markdown.markdown(text=body.strip(),
output_format="html").strip(),
html[(tag_index + len(bc.WEBSITE_OUTER_TAG)):].strip()])
del body
div_1 = html.find(bc.WEBSITE_BODY_TAG_1)
# should there be an auto-generated file list in markdown?
if div_1 >= 0:
# yes!
div_2 = html.find(bc.WEBSITE_BODY_TAG_2,
div_1 + len(bc.WEBSITE_BODY_TAG_1))
if div_2 <= div_1:
raise ValueError(
f"Website '{html}' contains "
f"'{bc.WEBSITE_BODY_TAG_1}' but not "
f"'{bc.WEBSITE_BODY_TAG_2}'.")
data = [html[:div_1].strip()]
ldocs = list(docs)
has_lang_ul = len(ldocs) > 1
if has_lang_ul:
data.append(f"<ul{bc.WEBSITE_LANGS_UL_ARG}>")
for lang in ldocs:
if has_lang_ul:
enforce_non_empty_str(lang.lang_name)
data.append(
f"<li{bc.WEBSITE_LANGS_LI_ARG}>"
f"<span{bc.WEBSITE_LANGS_NAME_SPAN_ARG}>"
f"{lang.lang_name}</span>")
data.append(f"<ul{bc.WEBSITE_DOWNLOAD_UL_ARG}>")
locale = "en" if not lang.lang else \
lang_to_locale(lang.lang).split("_")[0]
suffixes = __SUFFIXES[locale] if locale in __SUFFIXES else None
for res in lang.results:
name = os.path.basename(res.path)
size = file_size(res.size).replace(" ", " ")
data.append(
f'<li{bc.WEBSITE_DOWNLOAD_LI_ARG}>'
f'<span{bc.WEBSITE_DOWNLOAD_DOWNLOAD_SPAN_ARG}>'
f'<a href="{res.path.relative_to(out_dir)}"'
f'{bc.WEBSITE_DOWNLOAD_FILE_A_ARG}>'
f'{name}</a> '
f'<span{bc.WEBSITE_DOWNLOAD_SIZE_SPAN_ARG}>'
f'({size})</span></span>')
if suffixes and (res.suffix in suffixes):
desc = suffixes[res.suffix]
data.append(
f"<br><span{bc.WEBSITE_DOWNLOAD_FILE_DESC_SPAN_ARG}>"
f"{desc}</span>")
data.append("</li>")
data.append("</ul>")
if has_lang_ul:
data.append("</li>")
if has_lang_ul:
data.append("</ul>")
data.append(html[(div_2 + len(bc.WEBSITE_BODY_TAG_2)):].strip())
html = "".join(data).strip()
del data
html = (create_preprocessor(name=bc.CMD_GET_META,
func=get_meta,
n=1,
strip_white_space=False))(html)
with TempFile.create(suffix=".html") as temp:
temp.write_all(html.strip())
out_file = html_postprocess(in_file=temp,
out_file=out_file,
flatten_data_uris=True,
fully_evaluate_html=False,
purge_scripts=False,
minify=True,
canonicalize_ids=True,
purge_mathjax=False,
overwrite=False)
res = File(out_file)
logger(f"finished building website '{res.path}', size is {res.size}.")
return res