Coverage for bookbuilderpy/website.py: 67%

67 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-07-17 23:15 +0000

1"""The routine for building the website for the book.""" 

2import os.path 

3from typing import Callable, Final, Iterable 

4 

5import markdown # type: ignore 

6 

7import bookbuilderpy.constants as bc 

8from bookbuilderpy.build_result import File, LangResult 

9from bookbuilderpy.html import html_postprocess 

10from bookbuilderpy.logger import logger 

11from bookbuilderpy.path import Path 

12from bookbuilderpy.preprocessor_commands import create_preprocessor 

13from bookbuilderpy.strings import ( 

14 enforce_non_empty_str, 

15 file_size, 

16 lang_to_locale, 

17) 

18from bookbuilderpy.temp import TempFile 

19 

20#: Explanations of file suffixes. 

21__SUFFIXES: Final[dict[str, dict[str, str]]] = \ 

22 {"en": { 

23 "pdf": 'The &quot;portable document format&quot; (<code>' 

24 '<a href="https://www.iso.org/standard/75839.html">pdf</a>' 

25 '</code>) is most suitable for reading on a PC and for ' 

26 'printing documents.', 

27 "html": 'A stand-alone web page (<code><a href="https://www.w3.org/' 

28 'TR/html5/">html</a></code>) can be viewed well both on ' 

29 'mobile phones as well as on PCs.', 

30 "epub": 'The electronic book format (<code><a href="https://www.w3' 

31 '.org/publishing/epub32/">epub</a></code>) is convenient for ' 

32 'many e-book readers as well as mobile phones.', 

33 "azw3": 'The Amazon Kindle e-book format (<code><a href="http://' 

34 'kindlegen.s3.amazonaws.com/AmazonKindlePublishingGuidelines.' 

35 'pdf">azw3</a></code>) is a proprietary format suitable for ' 

36 'reading on a Kindle device.', 

37 "zip": 'A <code><a href="https://www.loc.gov/preservation/digital/' 

38 'formats/fdd/fdd000354.shtml">zip</a></code> archive ' 

39 'containing the book in all the formats mentioned above for ' 

40 'convenient download.', 

41 "tar.xz": 'A <code><a href="https://www.gnu.org/software/tar/manual/' 

42 'html_node/Standard.html">tar</a>.<a href="https://tukaani.' 

43 'org/xz/format.html">xz</a></code> archive containing the ' 

44 'book in all the formats mentioned above for convenient ' 

45 'download.', 

46 }, "de": { 

47 "pdf": 'Das &quot;portable document format&quot; (<code><a href=' 

48 '"https://www.iso.org/standard/75839.html">pdf</a></code>) ist' 

49 ' f&uuml;r das Lesen am PC oder das Ausdrucken geeignet.', 

50 "html": 'Eine stand-alone Webseite (<code><a href="https://www.w3.org/' 

51 'TR/html5/">html</a></code>) kann sowohl auf dem Mobiltelefon' 

52 ' als auch auf dem PC gut gelesen werden.', 

53 "epub": 'Das Format f&uuml;r E-Book (<code><a href="https://www.w3' 

54 '.org/publishing/epub32/">epub</a></code>) ist g&uuml;nstig ' 

55 'f&uuml;r E-Book Leseger&auml;te, Tablets, und ' 

56 'Mobiltelefone.', 

57 "azw3": 'Das Amazon Kindle E-Book Format (<code><a href="http://' 

58 'kindlegen.s3.amazonaws.com/AmazonKindlePublishingGuidelines.' 

59 'pdf">azw3</a></code>) ist propriet&auml;r und f&uuml;r ' 

60 'Kindles gedacht.', 

61 "zip": 'Ein <code><a href="https://www.loc.gov/preservation/digital/' 

62 'formats/fdd/fdd000354.shtml">zip</a></code> Archiv mit allen ' 

63 'oben genannten Formaten des Buchs.', 

64 "tar.xz": 'Ein <code><a href="https://www.gnu.org/software/tar/manual/' 

65 'html_node/Standard.html">tar</a>.<a href="https://tukaani.' 

66 'org/xz/format.html">xz</a></code> Archiv mit allen ' 

67 'oben genannten Formaten des Buchs.', 

68 }, "zh": { 

69 "pdf": '便携式文档格式(<code><a href="https://www.iso.org/standard/' 

70 '75839.html">pdf</a></code>)最适合在PC上阅读和打印文档。', 

71 "html": '无论是在手机上还是在PC上,都可以很好地查看独立的网页(<code>' 

72 '<a href="https://www.w3.org/TR/html5/">html</a></code>)。', 

73 "epub": '电子书格式(<code><a href="https://www.w3.org/publishing/epub32/">' 

74 'epub</a></code>)为许多电子书阅读器和手机提供了便利。', 

75 "azw3": '亚马逊Kindle电子书格式(<code><a href="http://' 

76 'kindlegen.s3.amazonaws.com/AmazonKindlePublishingGuidelines.' 

77 'pdf">azw3</a></code>)是一种适合在Kindle设备上阅读的专有格式。', 

78 "zip": '一个<code><a href="https://www.loc.gov/preservation/digital/' 

79 'formats/fdd/fdd000354.shtml">zip</a></code>存档,包含上述所有格式的书籍,' 

80 '便于下载。', 

81 "tar.xz": '一个<code><a href="https://www.gnu.org/software/tar/manual/' 

82 'html_node/Standard.html">tar</a>.<a href="https://tukaani.' 

83 'org/xz/format.html">xz</a></code>存档,包含上述所有格式的书籍,便于下载。', 

84 }} 

85 

86 

87def build_website(docs: Iterable[LangResult], 

88 outer_file: str, 

89 body_file: str | None, 

90 dest_dir: str, 

91 input_dir: str, 

92 get_meta: Callable) -> File: 

93 """ 

94 Build a website linking to all the generated documents. 

95 

96 :param docs: the per-language results 

97 :param outer_file: the wrapper file 

98 :param body_file: the body file 

99 :param dest_dir: the destination directory 

100 :param input_dir: the base input directory 

101 :param get_meta: a callable used to get the results 

102 :return: the file record to the generated website 

103 """ 

104 if docs is None: 

105 raise ValueError("docs cannot be None.") 

106 out_dir: Final[Path] = Path.directory(dest_dir) 

107 in_dir = Path.directory(input_dir) 

108 

109 # load the html template 

110 out_file = out_dir.resolve_inside("index.html") 

111 if os.path.exists(out_file): 

112 raise ValueError(f"File '{out_file}' already exists.") 

113 

114 logger(f"beginning to build website '{out_file}'.") 

115 

116 html = in_dir.resolve_inside(outer_file).read_all_str() 

117 

118 # should there be a body to be included? 

119 tag_index = html.find(bc.WEBSITE_OUTER_TAG) 

120 if tag_index >= 0: 

121 # yes, so we load the body 

122 body = in_dir.resolve_inside(body_file).read_all_str() 

123 html = "\n".join( 

124 [html[:tag_index].strip(), 

125 markdown.markdown(text=body.strip(), 

126 output_format="html").strip(), 

127 html[(tag_index + len(bc.WEBSITE_OUTER_TAG)):].strip()]) 

128 del body 

129 

130 div_1 = html.find(bc.WEBSITE_BODY_TAG_1) 

131 # should there be an auto-generated file list in markdown? 

132 if div_1 >= 0: 

133 # yes! 

134 div_2 = html.find(bc.WEBSITE_BODY_TAG_2, 

135 div_1 + len(bc.WEBSITE_BODY_TAG_1)) 

136 if div_2 <= div_1: 

137 raise ValueError( 

138 f"Website '{html}' contains " 

139 f"'{bc.WEBSITE_BODY_TAG_1}' but not " 

140 f"'{bc.WEBSITE_BODY_TAG_2}'.") 

141 data = [html[:div_1].strip()] 

142 

143 ldocs = list(docs) 

144 has_lang_ul = len(ldocs) > 1 

145 if has_lang_ul: 

146 data.append(f"<ul{bc.WEBSITE_LANGS_UL_ARG}>") 

147 

148 for lang in ldocs: 

149 if has_lang_ul: 

150 enforce_non_empty_str(lang.lang_name) 

151 data.append( 

152 f"<li{bc.WEBSITE_LANGS_LI_ARG}>" 

153 f"<span{bc.WEBSITE_LANGS_NAME_SPAN_ARG}>" 

154 f"{lang.lang_name}</span>") 

155 data.append(f"<ul{bc.WEBSITE_DOWNLOAD_UL_ARG}>") 

156 locale = "en" if not lang.lang else \ 

157 lang_to_locale(lang.lang).split("_")[0] 

158 suffixes = __SUFFIXES[locale] if locale in __SUFFIXES else None 

159 

160 for res in lang.results: 

161 name = os.path.basename(res.path) 

162 size = file_size(res.size).replace(" ", "&nbsp;") 

163 data.append( 

164 f'<li{bc.WEBSITE_DOWNLOAD_LI_ARG}>' 

165 f'<span{bc.WEBSITE_DOWNLOAD_DOWNLOAD_SPAN_ARG}>' 

166 f'<a href="{res.path.relative_to(out_dir)}"' 

167 f'{bc.WEBSITE_DOWNLOAD_FILE_A_ARG}>' 

168 f'{name}</a>&nbsp;' 

169 f'<span{bc.WEBSITE_DOWNLOAD_SIZE_SPAN_ARG}>' 

170 f'({size})</span></span>') 

171 

172 if suffixes and (res.suffix in suffixes): 

173 desc = suffixes[res.suffix] 

174 data.append( 

175 f"<br><span{bc.WEBSITE_DOWNLOAD_FILE_DESC_SPAN_ARG}>" 

176 f"{desc}</span>") 

177 

178 data.append("</li>") 

179 data.append("</ul>") 

180 if has_lang_ul: 

181 data.append("</li>") 

182 if has_lang_ul: 

183 data.append("</ul>") 

184 data.append(html[(div_2 + len(bc.WEBSITE_BODY_TAG_2)):].strip()) 

185 html = "".join(data).strip() 

186 del data 

187 

188 html = (create_preprocessor(name=bc.CMD_GET_META, 

189 func=get_meta, 

190 n=1, 

191 strip_white_space=False))(html) 

192 with TempFile.create(suffix=".html") as temp: 

193 temp.write_all(html.strip()) 

194 out_file = html_postprocess(in_file=temp, 

195 out_file=out_file, 

196 flatten_data_uris=True, 

197 fully_evaluate_html=False, 

198 purge_scripts=False, 

199 minify=True, 

200 canonicalize_ids=True, 

201 purge_mathjax=False, 

202 overwrite=False) 

203 

204 res = File(out_file) 

205 logger(f"finished building website '{res.path}', size is {res.size}.") 

206 return res