Coverage for bookbuilderpy/website.py: 67%

1"""The routine for building the website for the book."""

2import os.path

3from typing import Callable, Final, Iterable

5import markdown # type: ignore

7import bookbuilderpy.constants as bc

8from bookbuilderpy.build_result import File, LangResult

9from bookbuilderpy.html import html_postprocess

10from bookbuilderpy.logger import logger

11from bookbuilderpy.path import Path

12from bookbuilderpy.preprocessor_commands import create_preprocessor

13from bookbuilderpy.strings import (

14 enforce_non_empty_str,

15 file_size,

16 lang_to_locale,

17)

18from bookbuilderpy.temp import TempFile

20#: Explanations of file suffixes.

21__SUFFIXES: Final[dict[str, dict[str, str]]] = \

22 {"en": {

23 "pdf": 'The "portable document format" (<code>'

24 '<a href="https://www.iso.org/standard/75839.html">pdf</a>'

25 '</code>) is most suitable for reading on a PC and for '

26 'printing documents.',

27 "html": 'A stand-alone web page (<code><a href="https://www.w3.org/'

28 'TR/html5/">html</a></code>) can be viewed well both on '

29 'mobile phones as well as on PCs.',

30 "epub": 'The electronic book format (<code><a href="https://www.w3'

31 '.org/publishing/epub32/">epub</a></code>) is convenient for '

32 'many e-book readers as well as mobile phones.',

33 "azw3": 'The Amazon Kindle e-book format (<code><a href="http://'

34 'kindlegen.s3.amazonaws.com/AmazonKindlePublishingGuidelines.'

35 'pdf">azw3</a></code>) is a proprietary format suitable for '

36 'reading on a Kindle device.',

37 "zip": 'A <code><a href="https://www.loc.gov/preservation/digital/'

38 'formats/fdd/fdd000354.shtml">zip</a></code> archive '

39 'containing the book in all the formats mentioned above for '

40 'convenient download.',

41 "tar.xz": 'A <code><a href="https://www.gnu.org/software/tar/manual/'

42 'html_node/Standard.html">tar</a>.<a href="https://tukaani.'

43 'org/xz/format.html">xz</a></code> archive containing the '

44 'book in all the formats mentioned above for convenient '

45 'download.',

46 }, "de": {

47 "pdf": 'Das "portable document format" (<code><a href='

48 '"https://www.iso.org/standard/75839.html">pdf</a></code>) ist'

49 ' für das Lesen am PC oder das Ausdrucken geeignet.',

50 "html": 'Eine stand-alone Webseite (<code><a href="https://www.w3.org/'

51 'TR/html5/">html</a></code>) kann sowohl auf dem Mobiltelefon'

52 ' als auch auf dem PC gut gelesen werden.',

53 "epub": 'Das Format für E-Book (<code><a href="https://www.w3'

54 '.org/publishing/epub32/">epub</a></code>) ist günstig '

55 'für E-Book Lesegeräte, Tablets, und '

56 'Mobiltelefone.',

57 "azw3": 'Das Amazon Kindle E-Book Format (<code><a href="http://'

58 'kindlegen.s3.amazonaws.com/AmazonKindlePublishingGuidelines.'

59 'pdf">azw3</a></code>) ist proprietär und für '

60 'Kindles gedacht.',

61 "zip": 'Ein <code><a href="https://www.loc.gov/preservation/digital/'

62 'formats/fdd/fdd000354.shtml">zip</a></code> Archiv mit allen '

63 'oben genannten Formaten des Buchs.',

64 "tar.xz": 'Ein <code><a href="https://www.gnu.org/software/tar/manual/'

65 'html_node/Standard.html">tar</a>.<a href="https://tukaani.'

66 'org/xz/format.html">xz</a></code> Archiv mit allen '

67 'oben genannten Formaten des Buchs.',

68 }, "zh": {

69 "pdf": '便携式文档格式(<code><a href="https://www.iso.org/standard/'

70 '75839.html">pdf</a></code>)最适合在PC上阅读和打印文档。',

71 "html": '无论是在手机上还是在PC上,都可以很好地查看独立的网页(<code>'

72 '<a href="https://www.w3.org/TR/html5/">html</a></code>)。',

73 "epub": '电子书格式(<code><a href="https://www.w3.org/publishing/epub32/">'

74 'epub</a></code>)为许多电子书阅读器和手机提供了便利。',

75 "azw3": '亚马逊Kindle电子书格式(<code><a href="http://'

76 'kindlegen.s3.amazonaws.com/AmazonKindlePublishingGuidelines.'

77 'pdf">azw3</a></code>)是一种适合在Kindle设备上阅读的专有格式。',

78 "zip": '一个<code><a href="https://www.loc.gov/preservation/digital/'

79 'formats/fdd/fdd000354.shtml">zip</a></code>存档,包含上述所有格式的书籍,'

80 '便于下载。',

81 "tar.xz": '一个<code><a href="https://www.gnu.org/software/tar/manual/'

82 'html_node/Standard.html">tar</a>.<a href="https://tukaani.'

83 'org/xz/format.html">xz</a></code>存档,包含上述所有格式的书籍,便于下载。',

84 }}

87def build_website(docs: Iterable[LangResult],

88 outer_file: str,

89 body_file: str | None,

90 dest_dir: str,

91 input_dir: str,

92 get_meta: Callable) -> File:

93 """

94 Build a website linking to all the generated documents.

96 :param docs: the per-language results

97 :param outer_file: the wrapper file

98 :param body_file: the body file

99 :param dest_dir: the destination directory

100 :param input_dir: the base input directory

101 :param get_meta: a callable used to get the results

102 :return: the file record to the generated website

103 """

104 if docs is None:

105 raise ValueError("docs cannot be None.")

106 out_dir: Final[Path] = Path.directory(dest_dir)

107 in_dir = Path.directory(input_dir)

108

109 # load the html template

110 out_file = out_dir.resolve_inside("index.html")

111 if os.path.exists(out_file):

112 raise ValueError(f"File '{out_file}' already exists.")

113

114 logger(f"beginning to build website '{out_file}'.")

115

116 html = in_dir.resolve_inside(outer_file).read_all_str()

117

118 # should there be a body to be included?

119 tag_index = html.find(bc.WEBSITE_OUTER_TAG)

120 if tag_index >= 0:

121 # yes, so we load the body

122 body = in_dir.resolve_inside(body_file).read_all_str()

123 html = "\n".join(

124 [html[:tag_index].strip(),

125 markdown.markdown(text=body.strip(),

126 output_format="html").strip(),

127 html[(tag_index + len(bc.WEBSITE_OUTER_TAG)):].strip()])

128 del body

129

130 div_1 = html.find(bc.WEBSITE_BODY_TAG_1)

131 # should there be an auto-generated file list in markdown?

132 if div_1 >= 0:

133 # yes!

134 div_2 = html.find(bc.WEBSITE_BODY_TAG_2,

135 div_1 + len(bc.WEBSITE_BODY_TAG_1))

136 if div_2 <= div_1:

137 raise ValueError(

138 f"Website '{html}' contains "

139 f"'{bc.WEBSITE_BODY_TAG_1}' but not "

140 f"'{bc.WEBSITE_BODY_TAG_2}'.")

141 data = [html[:div_1].strip()]

142

143 ldocs = list(docs)

144 has_lang_ul = len(ldocs) > 1

145 if has_lang_ul:

146 data.append(f"<ul{bc.WEBSITE_LANGS_UL_ARG}>")

147

148 for lang in ldocs:

149 if has_lang_ul:

150 enforce_non_empty_str(lang.lang_name)

151 data.append(

152 f"<li{bc.WEBSITE_LANGS_LI_ARG}>"

153 f"<span{bc.WEBSITE_LANGS_NAME_SPAN_ARG}>"

154 f"{lang.lang_name}")

155 data.append(f"<ul{bc.WEBSITE_DOWNLOAD_UL_ARG}>")

156 locale = "en" if not lang.lang else \

157 lang_to_locale(lang.lang).split("_")[0]

158 suffixes = __SUFFIXES[locale] if locale in __SUFFIXES else None

159

160 for res in lang.results:

161 name = os.path.basename(res.path)

162 size = file_size(res.size).replace(" ", " ")

163 data.append(

164 f'<li{bc.WEBSITE_DOWNLOAD_LI_ARG}>'

165 f'<span{bc.WEBSITE_DOWNLOAD_DOWNLOAD_SPAN_ARG}>'

166 f'<a href="{res.path.relative_to(out_dir)}"'

167 f'{bc.WEBSITE_DOWNLOAD_FILE_A_ARG}>'

168 f'{name}</a> '

169 f'<span{bc.WEBSITE_DOWNLOAD_SIZE_SPAN_ARG}>'

170 f'({size})')

171

172 if suffixes and (res.suffix in suffixes):

173 desc = suffixes[res.suffix]

174 data.append(

175 f" <span{bc.WEBSITE_DOWNLOAD_FILE_DESC_SPAN_ARG}>"

176 f"{desc}")

177

178 data.append("</li>")

179 data.append("</ul>")

180 if has_lang_ul:

181 data.append("</li>")

182 if has_lang_ul:

183 data.append("</ul>")

184 data.append(html[(div_2 + len(bc.WEBSITE_BODY_TAG_2)):].strip())

185 html = "".join(data).strip()

186 del data

187

188 html = (create_preprocessor(name=bc.CMD_GET_META,

189 func=get_meta,

190 n=1,

191 strip_white_space=False))(html)

192 with TempFile.create(suffix=".html") as temp:

193 temp.write_all(html.strip())

194 out_file = html_postprocess(in_file=temp,

195 out_file=out_file,

196 flatten_data_uris=True,

197 fully_evaluate_html=False,

198 purge_scripts=False,

199 minify=True,

200 canonicalize_ids=True,

201 purge_mathjax=False,

202 overwrite=False)

203

204 res = File(out_file)

205 logger(f"finished building website '{res.path}', size is {res.size}.")

206 return res