Source code for bookbuilderpy.website

"""The routine for building the website for the book."""
import os.path
from typing import Callable, Final, Iterable

import markdown  # type: ignore

import bookbuilderpy.constants as bc
from bookbuilderpy.build_result import File, LangResult
from bookbuilderpy.html import html_postprocess
from bookbuilderpy.logger import logger
from bookbuilderpy.path import Path
from bookbuilderpy.preprocessor_commands import create_preprocessor
from bookbuilderpy.strings import (
    enforce_non_empty_str,
    file_size,
    lang_to_locale,
)
from bookbuilderpy.temp import TempFile

#: Explanations of file suffixes.
__SUFFIXES: Final[dict[str, dict[str, str]]] = \
    {"en": {
        "pdf": 'The &quot;portable document format&quot; (<code>'
               '<a href="https://www.iso.org/standard/75839.html">pdf</a>'
               '</code>) is most suitable for reading on a PC and for '
               'printing documents.',
        "html": 'A stand-alone web page (<code><a href="https://www.w3.org/'
                'TR/html5/">html</a></code>) can be viewed well both on '
                'mobile phones as well as on PCs.',
        "epub": 'The electronic book format (<code><a href="https://www.w3'
                '.org/publishing/epub32/">epub</a></code>) is convenient for '
                'many e-book readers as well as mobile phones.',
        "azw3": 'The Amazon Kindle e-book format (<code><a href="http://'
                'kindlegen.s3.amazonaws.com/AmazonKindlePublishingGuidelines.'
                'pdf">azw3</a></code>) is a proprietary format suitable for '
                'reading on a Kindle device.',
        "zip": 'A <code><a href="https://www.loc.gov/preservation/digital/'
               'formats/fdd/fdd000354.shtml">zip</a></code> archive '
               'containing the book in all the formats mentioned above for '
               'convenient download.',
        "tar.xz": 'A <code><a href="https://www.gnu.org/software/tar/manual/'
                  'html_node/Standard.html">tar</a>.<a href="https://tukaani.'
                  'org/xz/format.html">xz</a></code> archive containing the '
                  'book in all the formats mentioned above for convenient '
                  'download.',
    }, "de": {
        "pdf": 'Das &quot;portable document format&quot; (<code><a href='
               '"https://www.iso.org/standard/75839.html">pdf</a></code>) ist'
               ' f&uuml;r das Lesen am PC oder das Ausdrucken geeignet.',
        "html": 'Eine stand-alone Webseite (<code><a href="https://www.w3.org/'
                'TR/html5/">html</a></code>) kann sowohl auf dem Mobiltelefon'
                ' als auch auf dem PC gut gelesen werden.',
        "epub": 'Das Format f&uuml;r E-Book (<code><a href="https://www.w3'
                '.org/publishing/epub32/">epub</a></code>) ist g&uuml;nstig '
                'f&uuml;r E-Book Leseger&auml;te, Tablets, und '
                'Mobiltelefone.',
        "azw3": 'Das Amazon Kindle E-Book Format (<code><a href="http://'
                'kindlegen.s3.amazonaws.com/AmazonKindlePublishingGuidelines.'
                'pdf">azw3</a></code>) ist propriet&auml;r und f&uuml;r '
                'Kindles gedacht.',
        "zip": 'Ein <code><a href="https://www.loc.gov/preservation/digital/'
               'formats/fdd/fdd000354.shtml">zip</a></code> Archiv mit allen '
               'oben genannten Formaten des Buchs.',
        "tar.xz": 'Ein <code><a href="https://www.gnu.org/software/tar/manual/'
                  'html_node/Standard.html">tar</a>.<a href="https://tukaani.'
                  'org/xz/format.html">xz</a></code> Archiv mit allen '
                  'oben genannten Formaten des Buchs.',
    }, "zh": {
        "pdf": '便携式文档格式(<code><a href="https://www.iso.org/standard/'
               '75839.html">pdf</a></code>)最适合在PC上阅读和打印文档。',
        "html": '无论是在手机上还是在PC上,都可以很好地查看独立的网页(<code>'
                '<a href="https://www.w3.org/TR/html5/">html</a></code>)。',
        "epub": '电子书格式(<code><a href="https://www.w3.org/publishing/epub32/">'
                'epub</a></code>)为许多电子书阅读器和手机提供了便利。',
        "azw3": '亚马逊Kindle电子书格式(<code><a href="http://'
                'kindlegen.s3.amazonaws.com/AmazonKindlePublishingGuidelines.'
                'pdf">azw3</a></code>)是一种适合在Kindle设备上阅读的专有格式。',
        "zip": '一个<code><a href="https://www.loc.gov/preservation/digital/'
               'formats/fdd/fdd000354.shtml">zip</a></code>存档,包含上述所有格式的书籍,'
               '便于下载。',
        "tar.xz": '一个<code><a href="https://www.gnu.org/software/tar/manual/'
                  'html_node/Standard.html">tar</a>.<a href="https://tukaani.'
                  'org/xz/format.html">xz</a></code>存档,包含上述所有格式的书籍,便于下载。',
    }}


[docs]def build_website(docs: Iterable[LangResult],
                  outer_file: str,
                  body_file: str | None,
                  dest_dir: str,
                  input_dir: str,
                  get_meta: Callable) -> File:
    """
    Build a website linking to all the generated documents.

    :param docs: the per-language results
    :param outer_file: the wrapper file
    :param body_file: the body file
    :param dest_dir: the destination directory
    :param input_dir: the base input directory
    :param get_meta: a callable used to get the results
    :return: the file record to the generated website
    """
    if docs is None:
        raise ValueError("docs cannot be None.")
    out_dir: Final[Path] = Path.directory(dest_dir)
    in_dir = Path.directory(input_dir)

    # load the html template
    out_file = out_dir.resolve_inside("index.html")
    if os.path.exists(out_file):
        raise ValueError(f"File '{out_file}' already exists.")

    logger(f"beginning to build website '{out_file}'.")

    html = in_dir.resolve_inside(outer_file).read_all_str()

    # should there be a body to be included?
    tag_index = html.find(bc.WEBSITE_OUTER_TAG)
    if tag_index >= 0:
        # yes, so we load the body
        body = in_dir.resolve_inside(body_file).read_all_str()
        html = "\n".join(
            [html[:tag_index].strip(),
             markdown.markdown(text=body.strip(),
                               output_format="html").strip(),
             html[(tag_index + len(bc.WEBSITE_OUTER_TAG)):].strip()])
        del body

    div_1 = html.find(bc.WEBSITE_BODY_TAG_1)
    # should there be an auto-generated file list in markdown?
    if div_1 >= 0:
        # yes!
        div_2 = html.find(bc.WEBSITE_BODY_TAG_2,
                          div_1 + len(bc.WEBSITE_BODY_TAG_1))
        if div_2 <= div_1:
            raise ValueError(
                f"Website '{html}' contains "
                f"'{bc.WEBSITE_BODY_TAG_1}' but not "
                f"'{bc.WEBSITE_BODY_TAG_2}'.")
        data = [html[:div_1].strip()]

        ldocs = list(docs)
        has_lang_ul = len(ldocs) > 1
        if has_lang_ul:
            data.append(f"<ul{bc.WEBSITE_LANGS_UL_ARG}>")

        for lang in ldocs:
            if has_lang_ul:
                enforce_non_empty_str(lang.lang_name)
                data.append(
                    f"<li{bc.WEBSITE_LANGS_LI_ARG}>"
                    f"<span{bc.WEBSITE_LANGS_NAME_SPAN_ARG}>"
                    f"{lang.lang_name}</span>")
            data.append(f"<ul{bc.WEBSITE_DOWNLOAD_UL_ARG}>")
            locale = "en" if not lang.lang else \
                lang_to_locale(lang.lang).split("_")[0]
            suffixes = __SUFFIXES[locale] if locale in __SUFFIXES else None

            for res in lang.results:
                name = os.path.basename(res.path)
                size = file_size(res.size).replace(" ", "&nbsp;")
                data.append(
                    f'<li{bc.WEBSITE_DOWNLOAD_LI_ARG}>'
                    f'<span{bc.WEBSITE_DOWNLOAD_DOWNLOAD_SPAN_ARG}>'
                    f'<a href="{res.path.relative_to(out_dir)}"'
                    f'{bc.WEBSITE_DOWNLOAD_FILE_A_ARG}>'
                    f'{name}</a>&nbsp;'
                    f'<span{bc.WEBSITE_DOWNLOAD_SIZE_SPAN_ARG}>'
                    f'({size})</span></span>')

                if suffixes and (res.suffix in suffixes):
                    desc = suffixes[res.suffix]
                    data.append(
                        f"<br><span{bc.WEBSITE_DOWNLOAD_FILE_DESC_SPAN_ARG}>"
                        f"{desc}</span>")

                data.append("</li>")
            data.append("</ul>")
            if has_lang_ul:
                data.append("</li>")
        if has_lang_ul:
            data.append("</ul>")
        data.append(html[(div_2 + len(bc.WEBSITE_BODY_TAG_2)):].strip())
        html = "".join(data).strip()
        del data

    html = (create_preprocessor(name=bc.CMD_GET_META,
                                func=get_meta,
                                n=1,
                                strip_white_space=False))(html)
    with TempFile.create(suffix=".html") as temp:
        temp.write_all(html.strip())
        out_file = html_postprocess(in_file=temp,
                                    out_file=out_file,
                                    flatten_data_uris=True,
                                    fully_evaluate_html=False,
                                    purge_scripts=False,
                                    minify=True,
                                    canonicalize_ids=True,
                                    purge_mathjax=False,
                                    overwrite=False)

    res = File(out_file)
    logger(f"finished building website '{res.path}', size is {res.size}.")
    return res