Coverage for bookbuilderpy/build.py: 72%
307 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-17 23:15 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-17 23:15 +0000
1"""The build process: The main class of the book building tool chain."""
3import datetime
4import os
5import sys
6import traceback as tb
7from contextlib import AbstractContextManager, ExitStack
8from os.path import basename
9from typing import Any, Final, Iterable
11import bookbuilderpy.constants as bc
12from bookbuilderpy.build_result import File, LangResult
13from bookbuilderpy.compress import (
14 can_xz_compress,
15 can_zip_compress,
16 compress_xz,
17 compress_zip,
18)
19from bookbuilderpy.git import Repo
20from bookbuilderpy.logger import logger
21from bookbuilderpy.pandoc import azw3, epub, html, latex
22from bookbuilderpy.parse_metadata import load_initial_metadata, parse_metadata
23from bookbuilderpy.path import Path
24from bookbuilderpy.preprocessor import preprocess
25from bookbuilderpy.preprocessor_input import load_input
26from bookbuilderpy.resources import load_resource
27from bookbuilderpy.strings import (
28 datetime_to_date_str,
29 datetime_to_datetime_str,
30 enforce_non_empty_str,
31 enforce_non_empty_str_without_ws,
32 lang_to_locale,
33 to_string,
34)
35from bookbuilderpy.temp import TempDir
36from bookbuilderpy.types import type_error
37from bookbuilderpy.versions import TOOL_PANDOC, get_versions, has_tool
38from bookbuilderpy.website import build_website
41class Build(AbstractContextManager):
42 """A class to keep and access information about the build process."""
44 def __init__(self,
45 input_file: str,
46 output_dir: str,
47 fail_without_pandoc: bool = True):
48 """
49 Set up the build.
51 :param input_file: the input file
52 :param output_dir: the output dir
53 :param fail_without_pandoc: fail if pandoc is not available?
54 """
55 super().__init__()
57 #: the start time
58 tz: Final[datetime.timezone] = datetime.timezone.utc
59 self.__start: Final[datetime.datetime] = datetime.datetime.now(tz)
60 #: the internal exit stack
61 self.__exit: Final[ExitStack] = ExitStack()
62 #: the input file path
63 self.__input_file: Final[Path] = Path.file(input_file)
64 #: the input directory
65 self.__input_dir: Final[Path] = self.__input_file.as_directory()
66 #: the output directory path
67 self.__output_dir: Final[Path] = Path.path(output_dir)
68 self.__output_dir.ensure_dir_exists()
69 self.__output_dir.enforce_neither_contains(self.__input_dir)
70 #: are we open for business?
71 self.__is_open = True
72 #: the start date
73 self.__start_date: Final[str] = datetime_to_date_str(self.__start)
74 #: the start date and time
75 self.__start_time: Final[str] = datetime_to_datetime_str(self.__start)
76 #: the start year
77 self.__start_year: Final[str] = self.__start.strftime("%Y")
78 #: the raw metadata
79 self.__metadata_raw: dict[str, Any] | None = None
80 #: the language-specific metadata
81 self.__metadata_lang: dict[str, Any] | None = None
82 #: the mapping of urls to repositories
83 self.__repo_urls: dict[str, Repo] = {}
84 #: the mapping of repo IDs to repositories
85 self.__repo_ids: dict[str, Repo] = {}
86 #: the internal collection of build results
87 self.__results: list[LangResult] = []
88 #: the own repository information
89 self.__repo: Repo | None = None
90 #: fail if pandoc is not available?
91 self.__fail_without_pandoc: Final[bool] = fail_without_pandoc
93 @property
94 def input_dir(self) -> Path:
95 """
96 Get the input directory.
98 :return: the input directory
99 """
100 return self.__input_dir
102 @property
103 def input_file(self) -> Path:
104 """
105 Get the input file.
107 :return: the input file
108 """
109 return self.__input_file
111 @property
112 def output_dir(self) -> Path:
113 """
114 Get the output directory.
116 :return: the output directory
117 """
118 return self.__output_dir
120 def __get_meta(self, key: str, raise_on_none: bool = True) -> Any:
121 """
122 Get a meta-data element.
124 :param key: the key
125 :param raise_on_none: should we raise an error if the property
126 was not found (True) or return None (False)?
127 :return: the meta-data element
128 """
129 if not isinstance(key, str):
130 raise type_error(key, "key", str)
131 key = key.strip()
133 if key == bc.META_DATE:
134 return self.__start_date
135 if key == bc.META_DATE_TIME:
136 return self.__start_time
137 if key == bc.META_YEAR:
138 return self.__start_year
140 if self.__metadata_lang is not None and key in self.__metadata_lang:
141 return self.__metadata_lang[key]
143 if self.__metadata_raw is not None and key in self.__metadata_raw:
144 return self.__metadata_raw[key]
146 # If no meta data language properties are set: return default values.
147 if key == bc.META_LANG:
148 return "en"
149 if key == bc.META_LOCALE:
150 return lang_to_locale(self.__get_meta(bc.META_LANG, False))
151 if key in (bc.META_LANG_NAME, bc.META_CUR_LANG_NAME):
152 return "English"
154 if key in (bc.META_REPO_INFO_URL, bc.META_REPO_INFO_DATE,
155 bc.META_REPO_INFO_COMMIT, bc.META_REPO_INFO_NAME):
156 if self.__repo is None:
157 if raise_on_none:
158 raise ValueError(
159 f"Cannot access {key} if build is not based on repo.")
160 return None
161 if key == bc.META_REPO_INFO_URL:
162 return self.__repo.get_base_url()
163 if key == bc.META_REPO_INFO_DATE:
164 return self.__repo.date_time
165 if key == bc.META_REPO_INFO_COMMIT:
166 return self.__repo.commit
167 return self.__repo.get_name()
169 if raise_on_none:
170 raise ValueError(f"Metadata key '{key}' not found.")
171 return None
173 def __get_meta_no_error(self, key: str) -> Any:
174 """
175 Get a metadata element without raising an error if it is not present.
177 :param key: the key
178 :return: the metadata element, or None
179 """
180 return self.__get_meta(key, False)
182 def get_meta(self, key: str) -> Any:
183 """
184 Get a meta-data element.
186 :param key: the key
187 :return: the meta-data element
188 """
189 return self.__get_meta(key, True)
191 def get_meta_str(self, key: str) -> str:
192 """
193 Get a meta-data element as a string.
195 :param key: the key
196 :return: the meta-data element
197 """
198 return to_string(obj=self.get_meta(key),
199 locale=self.__get_meta_no_error(bc.META_LOCALE),
200 use_seq_and=key == bc.META_AUTHOR)
202 def __load_repo(self, name: str, url: str) -> None:
203 """
204 Make the repository at the specified url available under the given id.
206 :param name: the repository name
207 :param url: the repository url
208 """
209 name = enforce_non_empty_str(name).strip()
210 url = enforce_non_empty_str(url).strip()
211 if name in self.__repo_ids:
212 r = self.__repo_ids[name]
213 if r.url == url:
214 return
215 del self.__repo_ids[name]
216 if url in self.__repo_urls:
217 self.__repo_ids[name] = self.__repo_urls[url]
218 dest = TempDir.create()
219 self.__exit.push(dest)
220 r = Repo.download(url, dest)
221 self.__repo_ids[name] = r
222 self.__repo_urls[r.url] = r
224 def __load_repos_from_meta(self, meta: dict[str, Any]) -> None:
225 """
226 Load the repositories listed in the metadata.
228 :param meta: the metadata
229 """
230 if not isinstance(meta, dict):
231 raise type_error(meta, "meta", dict)
232 logger("checking metadata for repositories.")
233 if bc.META_REPOS in meta:
234 repo_list = meta[bc.META_REPOS]
235 if not isinstance(repo_list, Iterable):
236 raise type_error(
237 repo_list, f"meta[{bc.META_REPOS}]", Iterable)
238 for repo in repo_list:
239 if bc.META_REPO_ID not in repo:
240 raise ValueError(
241 f"repo {repo} must include '{bc.META_REPO_ID}'.")
242 if bc.META_REPO_URL not in repo:
243 raise ValueError(
244 f"repo {repo} must include '{bc.META_REPO_URL}'.")
245 self.__load_repo(repo[bc.META_REPO_ID],
246 repo[bc.META_REPO_URL])
248 def get_repo(self, name: str) -> Repo:
249 """
250 Get a repository of the given name.
252 :param name: the repository name
253 :return: the repository structure
254 """
255 name = enforce_non_empty_str(name).strip()
256 if name not in self.__repo_ids:
257 raise ValueError(f"unknown repository '{name}'.")
258 r = self.__repo_ids[name]
259 if not isinstance(r, Repo):
260 raise type_error(name, f"invalid repository '{name}'?", Repo)
261 return r
263 def __get_resource(self, name: str, directory: Path) -> Path | None:
264 """
265 Get an internal build resource to a directory.
267 :param name: the resource name
268 :param directory: the destination path
269 :return: the path to the resource, or None if none was copied
270 """
271 return load_resource(enforce_non_empty_str_without_ws(name),
272 self.__input_dir, directory)
274 def __pandoc_build(self,
275 input_file: Path,
276 output_dir: Path,
277 lang_id: str | None,
278 lang_name: str | None,
279 has_bibliography: bool) -> None:
280 """
281 Apply pandoc and build the input file to the output dir.
283 :param input_file: the path to the input file
284 :param output_dir: the path to the output directory
285 :param lang_id: the language ID
286 :param lang_name: the language name
287 :param has_bibliography: is there a bibliography?
288 """
289 if not has_tool(TOOL_PANDOC):
290 if self.__fail_without_pandoc:
291 raise ValueError("Pandoc not installed.")
292 return
293 logger(f"now invoking pandoc build steps to file '{input_file}' "
294 f"with target director '{output_dir}' for lang-id "
295 f"'{lang_id}'.")
296 input_file.enforce_file()
297 output_dir.enforce_dir()
298 name, _ = Path.split_prefix_suffix(os.path.basename(input_file))
299 results: list[File] = []
300 locale: str | None = self.__get_meta_no_error(bc.META_LOCALE)
302 results.append(latex(
303 source_file=input_file,
304 dest_file=output_dir.resolve_inside(f"{name}.pdf"),
305 locale=locale,
306 bibliography=has_bibliography,
307 get_meta=self.__get_meta_no_error,
308 resolve_resources=self.__get_resource))
309 results.append(html(
310 source_file=input_file,
311 dest_file=output_dir.resolve_inside(f"{name}.html"),
312 locale=locale,
313 bibliography=has_bibliography,
314 get_meta=self.__get_meta_no_error,
315 resolve_resources=self.__get_resource))
316 epub_res = epub(
317 source_file=input_file,
318 dest_file=output_dir.resolve_inside(f"{name}.epub"),
319 locale=locale,
320 bibliography=has_bibliography,
321 get_meta=self.__get_meta_no_error,
322 resolve_resources=self.__get_resource)
323 results.append(epub_res)
324 results.append(azw3(epub_res.path))
326 # now trying to create compressed versions
327 if can_xz_compress():
328 tar_xz = compress_xz(results,
329 output_dir.resolve_inside(f"{name}.tar.xz"))
330 else:
331 tar_xz = None
332 if can_zip_compress():
333 zipf = compress_zip(results,
334 output_dir.resolve_inside(f"{name}.zip"))
335 else:
336 zipf = None
338 if tar_xz:
339 results.append(tar_xz)
340 if zipf:
341 results.append(zipf)
343 logger(f"finished pandoc build steps to file '{input_file}' "
344 f"with target director '{output_dir}' for lang-id '{lang_id}'"
345 f", produced {len(results)} files.")
347 self.__results.append(LangResult(lang_id, lang_name, output_dir,
348 tuple(results)))
350 def __build_one_lang(self,
351 lang_id: str | None,
352 lang_name: str | None,
353 use_lang_id_as_suffix: bool = False) -> None:
354 """
355 Perform the book build for one language.
357 :param lang_id: the language ID
358 :param lang_name: the language name
359 :param use_lang_id_as_suffix: should we use the language id as
360 file name suffix?
361 """
362 self.__metadata_lang = None
364 # Start up and define the output directory.
365 if lang_id is None:
366 logger("beginning build with no language set.")
367 base_dir = self.output_dir
368 if lang_name:
369 raise ValueError("Cannot have language name "
370 f"'{lang_name}' but no language id!")
371 elif use_lang_id_as_suffix:
372 lang_id = enforce_non_empty_str_without_ws(lang_id)
373 logger(f"beginning multi-language build for language {lang_id}.")
374 base_dir = self.output_dir.resolve_inside(lang_id)
375 if lang_name:
376 enforce_non_empty_str(lang_name)
377 else:
378 logger(f"beginning single-language build in language {lang_id}.")
379 base_dir = self.output_dir
380 if lang_name:
381 enforce_non_empty_str(lang_name)
382 base_dir.ensure_dir_exists()
384 # First obtain the full language-specific input text.
385 text = enforce_non_empty_str(
386 load_input(self.__input_file, self.__input_dir, lang_id).strip())
388 # Then we extract the meta-data.
389 self.__metadata_lang = parse_metadata(text)
390 logger("done parsing metadata.")
391 if lang_id:
392 # We set up the language id and lange name meta data properties.
393 if bc.META_LANG not in self.__metadata_lang.keys():
394 self.__metadata_lang[bc.META_LANG] = lang_id
395 if bc.META_CUR_LANG_NAME not in self.__metadata_lang.keys():
396 self.__metadata_lang[bc.META_CUR_LANG_NAME] = lang_name
398 # Then load the repositories from the meta data.
399 self.__load_repos_from_meta(self.__metadata_lang)
401 with TempDir.create() as temp:
402 logger(f"building in temp directory '{temp}': "
403 "first applying preprocessor.")
405 text = enforce_non_empty_str(preprocess(
406 text=text, input_dir=self.input_dir,
407 get_meta=self.get_meta_str, get_repo=self.get_repo,
408 repo=self.__repo, output_dir=temp))
409 logger("finished applying preprocessor.")
411 has_bibliography = False
412 bib = self.__get_meta_no_error(bc.PANDOC_BIBLIOGRAPHY)
413 if bib:
414 logger(f"found bibliography spec '{bib}', so we load it.")
415 Path.copy_resource(self.__input_dir,
416 self.__input_dir.resolve_inside(bib),
417 temp)
418 has_bibliography = True
420 prefix, suffix = Path.split_prefix_suffix(
421 basename(self.__input_file))
422 if use_lang_id_as_suffix:
423 end = "_" + enforce_non_empty_str_without_ws(lang_id)
424 if not prefix.endswith(end):
425 prefix = prefix + end
426 file = temp.resolve_inside(f"{prefix}.{suffix}")
427 logger("finished applying preprocessor, now storing "
428 f"{len(text)} characters to file '{file}'.")
429 file.write_all(text)
430 del prefix, suffix, text
431 self.__pandoc_build(input_file=file,
432 output_dir=base_dir,
433 lang_id=lang_id,
434 lang_name=lang_name,
435 has_bibliography=has_bibliography)
437 # Finalize the build.
438 self.__metadata_lang = None
439 if lang_id is None:
440 logger("finished build with no language set.")
441 else:
442 logger(f"finished build in language {lang_id}.")
444 def __load_self_repo(self) -> None:
445 """Attempt to load the self repository information."""
446 logger("checking if build process is based on git checkout.")
447 check = self.__input_dir
448 while True:
449 if check == "/":
450 break
451 if not os.access(check, os.R_OK):
452 break
453 test = Path.path(os.path.join(check, ".git"))
454 if os.path.isdir(test):
455 self.__repo = Repo.from_local(check)
456 logger(
457 f"build process is based on commit '{self.__repo.commit}'"
458 f" of repo '{self.__repo.url}'.")
459 return
460 check = Path.path(os.path.join(check, ".."))
461 logger("build process is not based on git checkout.")
463 def __build_all_langs(self) -> None:
464 """Perform all the book build steps."""
465 logger("beginning the build loop for all languages.")
466 no_lang = True
467 if bc.META_LANGS in self.__metadata_raw:
468 langs = self.__metadata_raw[bc.META_LANGS]
469 done = set()
470 if not isinstance(langs, Iterable):
471 raise type_error(
472 langs, f"self.__metadata_raw[{bc.META_LANGS}]", Iterable)
473 llangs = list(langs)
474 for lang in llangs:
475 if not isinstance(lang, dict):
476 raise type_error(lang, "item of llangs", dict)
477 lang_id = enforce_non_empty_str_without_ws(
478 lang[bc.META_LANG_ID])
479 if lang_id in done:
480 raise ValueError(f"Duplicate language id '{lang_id}'.")
481 done.add(lang_id)
482 lang_name = enforce_non_empty_str(enforce_non_empty_str(
483 lang[bc.META_LANG_NAME]).strip())
484 self.__build_one_lang(lang_id, lang_name, len(llangs) > 1)
485 no_lang = False
487 if no_lang:
488 self.__build_one_lang(
489 self.__get_meta_no_error(bc.META_LANG), None, False)
490 logger("finished the build loop for all languages.")
492 def __build_website(self) -> None:
493 """Build the website, if any."""
494 template = self.__get_meta_no_error(bc.META_WEBSITE_OUTER)
495 if template:
496 logger(f"found website template spec '{template}'.")
497 template = self.input_dir.resolve_inside(template)
498 body = self.__get_meta_no_error(bc.META_WEBSITE_BODY)
499 if body:
500 logger(f"found website body spec '{body}'.")
501 body = self.input_dir.resolve_inside(body)
502 build_website(docs=self.__results,
503 outer_file=template,
504 body_file=body,
505 dest_dir=self.__output_dir,
506 input_dir=self.__input_dir,
507 get_meta=self.get_meta_str)
509 def build(self) -> tuple[LangResult, ...]:
510 """
511 Perform the build.
513 :returns: the results
514 """
515 logger(f"starting the build process for input file "
516 f"'{self.__input_file}', input dir '{self.__input_dir}', and "
517 f"output dir '{self.__output_dir}' with the following tool "
518 f"versions:\n{get_versions()}")
519 self.__load_self_repo()
520 self.__metadata_raw = load_initial_metadata(self.__input_file,
521 self.__input_dir)
522 self.__load_repos_from_meta(self.__metadata_raw)
523 self.__build_all_langs()
524 self.__build_website()
525 logger("build process completed, created "
526 f"{sum(len(c.results) for c in self.__results)} "
527 f"book files in total for {len(self.__results)} language(s).")
528 res = tuple(self.__results)
529 if (res is None) or (len(res) <= 0):
530 raise ValueError("Did not build any results.")
531 return res
533 def __enter__(self) -> "Build":
534 """Nothing, just exists for `with`."""
535 if not self.__is_open:
536 raise ValueError("Build already closed.")
537 logger(f"starting the build of '{self.__input_file}' "
538 f"to '{self.__output_dir}'.")
539 return self
541 def __exit__(self, exception_type, exception_value, traceback) -> bool:
542 """
543 Delete the temporary directory and everything in it.
545 :param exception_type: ignored
546 :param exception_value: ignored
547 :param traceback: ignored
548 :returns: `True` to suppress an exception, `False` to rethrow it
549 """
550 opn = self.__is_open
551 self.__is_open = False
552 if opn:
553 logger("cleaning up temporary files.")
554 self.__exit.close()
555 logger(f"finished the build of '{self.__input_file}' "
556 f"to '{self.__output_dir}'.")
557 del exception_value
558 del traceback
559 return exception_type is None
561 @staticmethod
562 def run(input_file: str,
563 output_dir: str,
564 exit_on_error: bool = False) -> tuple[LangResult, ...]:
565 """
566 Run a build on an input file to an output directory.
568 :param input_file: the input file
569 :param output_dir: the output directory
570 :param exit_on_error: should we quit Python upon error?
571 :return: a tuple of results
572 """
573 try:
574 with Build(input_file, output_dir, True) as bd:
575 res = bd.build()
576 sys.stdout.flush()
577 sys.stderr.flush()
578 except BaseException as be:
579 sys.stdout.flush()
580 sys.stderr.flush()
581 exinfo = " ".join(tb.format_exception(type(be),
582 value=be,
583 tb=be.__traceback__))
584 logger(f"The build process has FAILED with error '{be}':"
585 f"\n {exinfo}")
586 sys.stdout.flush()
587 sys.stderr.flush()
588 if exit_on_error:
589 sys.exit(1)
590 if isinstance(be, ValueError):
591 raise
592 raise ValueError from be
593 return res