Coverage for bookbuilderpy/build.py: 72%

307 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-07-17 23:15 +0000

1"""The build process: The main class of the book building tool chain.""" 

2 

3import datetime 

4import os 

5import sys 

6import traceback as tb 

7from contextlib import AbstractContextManager, ExitStack 

8from os.path import basename 

9from typing import Any, Final, Iterable 

10 

11import bookbuilderpy.constants as bc 

12from bookbuilderpy.build_result import File, LangResult 

13from bookbuilderpy.compress import ( 

14 can_xz_compress, 

15 can_zip_compress, 

16 compress_xz, 

17 compress_zip, 

18) 

19from bookbuilderpy.git import Repo 

20from bookbuilderpy.logger import logger 

21from bookbuilderpy.pandoc import azw3, epub, html, latex 

22from bookbuilderpy.parse_metadata import load_initial_metadata, parse_metadata 

23from bookbuilderpy.path import Path 

24from bookbuilderpy.preprocessor import preprocess 

25from bookbuilderpy.preprocessor_input import load_input 

26from bookbuilderpy.resources import load_resource 

27from bookbuilderpy.strings import ( 

28 datetime_to_date_str, 

29 datetime_to_datetime_str, 

30 enforce_non_empty_str, 

31 enforce_non_empty_str_without_ws, 

32 lang_to_locale, 

33 to_string, 

34) 

35from bookbuilderpy.temp import TempDir 

36from bookbuilderpy.types import type_error 

37from bookbuilderpy.versions import TOOL_PANDOC, get_versions, has_tool 

38from bookbuilderpy.website import build_website 

39 

40 

41class Build(AbstractContextManager): 

42 """A class to keep and access information about the build process.""" 

43 

44 def __init__(self, 

45 input_file: str, 

46 output_dir: str, 

47 fail_without_pandoc: bool = True): 

48 """ 

49 Set up the build. 

50 

51 :param input_file: the input file 

52 :param output_dir: the output dir 

53 :param fail_without_pandoc: fail if pandoc is not available? 

54 """ 

55 super().__init__() 

56 

57 #: the start time 

58 tz: Final[datetime.timezone] = datetime.timezone.utc 

59 self.__start: Final[datetime.datetime] = datetime.datetime.now(tz) 

60 #: the internal exit stack 

61 self.__exit: Final[ExitStack] = ExitStack() 

62 #: the input file path 

63 self.__input_file: Final[Path] = Path.file(input_file) 

64 #: the input directory 

65 self.__input_dir: Final[Path] = self.__input_file.as_directory() 

66 #: the output directory path 

67 self.__output_dir: Final[Path] = Path.path(output_dir) 

68 self.__output_dir.ensure_dir_exists() 

69 self.__output_dir.enforce_neither_contains(self.__input_dir) 

70 #: are we open for business? 

71 self.__is_open = True 

72 #: the start date 

73 self.__start_date: Final[str] = datetime_to_date_str(self.__start) 

74 #: the start date and time 

75 self.__start_time: Final[str] = datetime_to_datetime_str(self.__start) 

76 #: the start year 

77 self.__start_year: Final[str] = self.__start.strftime("%Y") 

78 #: the raw metadata 

79 self.__metadata_raw: dict[str, Any] | None = None 

80 #: the language-specific metadata 

81 self.__metadata_lang: dict[str, Any] | None = None 

82 #: the mapping of urls to repositories 

83 self.__repo_urls: dict[str, Repo] = {} 

84 #: the mapping of repo IDs to repositories 

85 self.__repo_ids: dict[str, Repo] = {} 

86 #: the internal collection of build results 

87 self.__results: list[LangResult] = [] 

88 #: the own repository information 

89 self.__repo: Repo | None = None 

90 #: fail if pandoc is not available? 

91 self.__fail_without_pandoc: Final[bool] = fail_without_pandoc 

92 

93 @property 

94 def input_dir(self) -> Path: 

95 """ 

96 Get the input directory. 

97 

98 :return: the input directory 

99 """ 

100 return self.__input_dir 

101 

102 @property 

103 def input_file(self) -> Path: 

104 """ 

105 Get the input file. 

106 

107 :return: the input file 

108 """ 

109 return self.__input_file 

110 

111 @property 

112 def output_dir(self) -> Path: 

113 """ 

114 Get the output directory. 

115 

116 :return: the output directory 

117 """ 

118 return self.__output_dir 

119 

120 def __get_meta(self, key: str, raise_on_none: bool = True) -> Any: 

121 """ 

122 Get a meta-data element. 

123 

124 :param key: the key 

125 :param raise_on_none: should we raise an error if the property 

126 was not found (True) or return None (False)? 

127 :return: the meta-data element 

128 """ 

129 if not isinstance(key, str): 

130 raise type_error(key, "key", str) 

131 key = key.strip() 

132 

133 if key == bc.META_DATE: 

134 return self.__start_date 

135 if key == bc.META_DATE_TIME: 

136 return self.__start_time 

137 if key == bc.META_YEAR: 

138 return self.__start_year 

139 

140 if self.__metadata_lang is not None and key in self.__metadata_lang: 

141 return self.__metadata_lang[key] 

142 

143 if self.__metadata_raw is not None and key in self.__metadata_raw: 

144 return self.__metadata_raw[key] 

145 

146 # If no meta data language properties are set: return default values. 

147 if key == bc.META_LANG: 

148 return "en" 

149 if key == bc.META_LOCALE: 

150 return lang_to_locale(self.__get_meta(bc.META_LANG, False)) 

151 if key in (bc.META_LANG_NAME, bc.META_CUR_LANG_NAME): 

152 return "English" 

153 

154 if key in (bc.META_REPO_INFO_URL, bc.META_REPO_INFO_DATE, 

155 bc.META_REPO_INFO_COMMIT, bc.META_REPO_INFO_NAME): 

156 if self.__repo is None: 

157 if raise_on_none: 

158 raise ValueError( 

159 f"Cannot access {key} if build is not based on repo.") 

160 return None 

161 if key == bc.META_REPO_INFO_URL: 

162 return self.__repo.get_base_url() 

163 if key == bc.META_REPO_INFO_DATE: 

164 return self.__repo.date_time 

165 if key == bc.META_REPO_INFO_COMMIT: 

166 return self.__repo.commit 

167 return self.__repo.get_name() 

168 

169 if raise_on_none: 

170 raise ValueError(f"Metadata key '{key}' not found.") 

171 return None 

172 

173 def __get_meta_no_error(self, key: str) -> Any: 

174 """ 

175 Get a metadata element without raising an error if it is not present. 

176 

177 :param key: the key 

178 :return: the metadata element, or None 

179 """ 

180 return self.__get_meta(key, False) 

181 

182 def get_meta(self, key: str) -> Any: 

183 """ 

184 Get a meta-data element. 

185 

186 :param key: the key 

187 :return: the meta-data element 

188 """ 

189 return self.__get_meta(key, True) 

190 

191 def get_meta_str(self, key: str) -> str: 

192 """ 

193 Get a meta-data element as a string. 

194 

195 :param key: the key 

196 :return: the meta-data element 

197 """ 

198 return to_string(obj=self.get_meta(key), 

199 locale=self.__get_meta_no_error(bc.META_LOCALE), 

200 use_seq_and=key == bc.META_AUTHOR) 

201 

202 def __load_repo(self, name: str, url: str) -> None: 

203 """ 

204 Make the repository at the specified url available under the given id. 

205 

206 :param name: the repository name 

207 :param url: the repository url 

208 """ 

209 name = enforce_non_empty_str(name).strip() 

210 url = enforce_non_empty_str(url).strip() 

211 if name in self.__repo_ids: 

212 r = self.__repo_ids[name] 

213 if r.url == url: 

214 return 

215 del self.__repo_ids[name] 

216 if url in self.__repo_urls: 

217 self.__repo_ids[name] = self.__repo_urls[url] 

218 dest = TempDir.create() 

219 self.__exit.push(dest) 

220 r = Repo.download(url, dest) 

221 self.__repo_ids[name] = r 

222 self.__repo_urls[r.url] = r 

223 

224 def __load_repos_from_meta(self, meta: dict[str, Any]) -> None: 

225 """ 

226 Load the repositories listed in the metadata. 

227 

228 :param meta: the metadata 

229 """ 

230 if not isinstance(meta, dict): 

231 raise type_error(meta, "meta", dict) 

232 logger("checking metadata for repositories.") 

233 if bc.META_REPOS in meta: 

234 repo_list = meta[bc.META_REPOS] 

235 if not isinstance(repo_list, Iterable): 

236 raise type_error( 

237 repo_list, f"meta[{bc.META_REPOS}]", Iterable) 

238 for repo in repo_list: 

239 if bc.META_REPO_ID not in repo: 

240 raise ValueError( 

241 f"repo {repo} must include '{bc.META_REPO_ID}'.") 

242 if bc.META_REPO_URL not in repo: 

243 raise ValueError( 

244 f"repo {repo} must include '{bc.META_REPO_URL}'.") 

245 self.__load_repo(repo[bc.META_REPO_ID], 

246 repo[bc.META_REPO_URL]) 

247 

248 def get_repo(self, name: str) -> Repo: 

249 """ 

250 Get a repository of the given name. 

251 

252 :param name: the repository name 

253 :return: the repository structure 

254 """ 

255 name = enforce_non_empty_str(name).strip() 

256 if name not in self.__repo_ids: 

257 raise ValueError(f"unknown repository '{name}'.") 

258 r = self.__repo_ids[name] 

259 if not isinstance(r, Repo): 

260 raise type_error(name, f"invalid repository '{name}'?", Repo) 

261 return r 

262 

263 def __get_resource(self, name: str, directory: Path) -> Path | None: 

264 """ 

265 Get an internal build resource to a directory. 

266 

267 :param name: the resource name 

268 :param directory: the destination path 

269 :return: the path to the resource, or None if none was copied 

270 """ 

271 return load_resource(enforce_non_empty_str_without_ws(name), 

272 self.__input_dir, directory) 

273 

274 def __pandoc_build(self, 

275 input_file: Path, 

276 output_dir: Path, 

277 lang_id: str | None, 

278 lang_name: str | None, 

279 has_bibliography: bool) -> None: 

280 """ 

281 Apply pandoc and build the input file to the output dir. 

282 

283 :param input_file: the path to the input file 

284 :param output_dir: the path to the output directory 

285 :param lang_id: the language ID 

286 :param lang_name: the language name 

287 :param has_bibliography: is there a bibliography? 

288 """ 

289 if not has_tool(TOOL_PANDOC): 

290 if self.__fail_without_pandoc: 

291 raise ValueError("Pandoc not installed.") 

292 return 

293 logger(f"now invoking pandoc build steps to file '{input_file}' " 

294 f"with target director '{output_dir}' for lang-id " 

295 f"'{lang_id}'.") 

296 input_file.enforce_file() 

297 output_dir.enforce_dir() 

298 name, _ = Path.split_prefix_suffix(os.path.basename(input_file)) 

299 results: list[File] = [] 

300 locale: str | None = self.__get_meta_no_error(bc.META_LOCALE) 

301 

302 results.append(latex( 

303 source_file=input_file, 

304 dest_file=output_dir.resolve_inside(f"{name}.pdf"), 

305 locale=locale, 

306 bibliography=has_bibliography, 

307 get_meta=self.__get_meta_no_error, 

308 resolve_resources=self.__get_resource)) 

309 results.append(html( 

310 source_file=input_file, 

311 dest_file=output_dir.resolve_inside(f"{name}.html"), 

312 locale=locale, 

313 bibliography=has_bibliography, 

314 get_meta=self.__get_meta_no_error, 

315 resolve_resources=self.__get_resource)) 

316 epub_res = epub( 

317 source_file=input_file, 

318 dest_file=output_dir.resolve_inside(f"{name}.epub"), 

319 locale=locale, 

320 bibliography=has_bibliography, 

321 get_meta=self.__get_meta_no_error, 

322 resolve_resources=self.__get_resource) 

323 results.append(epub_res) 

324 results.append(azw3(epub_res.path)) 

325 

326 # now trying to create compressed versions 

327 if can_xz_compress(): 

328 tar_xz = compress_xz(results, 

329 output_dir.resolve_inside(f"{name}.tar.xz")) 

330 else: 

331 tar_xz = None 

332 if can_zip_compress(): 

333 zipf = compress_zip(results, 

334 output_dir.resolve_inside(f"{name}.zip")) 

335 else: 

336 zipf = None 

337 

338 if tar_xz: 

339 results.append(tar_xz) 

340 if zipf: 

341 results.append(zipf) 

342 

343 logger(f"finished pandoc build steps to file '{input_file}' " 

344 f"with target director '{output_dir}' for lang-id '{lang_id}'" 

345 f", produced {len(results)} files.") 

346 

347 self.__results.append(LangResult(lang_id, lang_name, output_dir, 

348 tuple(results))) 

349 

350 def __build_one_lang(self, 

351 lang_id: str | None, 

352 lang_name: str | None, 

353 use_lang_id_as_suffix: bool = False) -> None: 

354 """ 

355 Perform the book build for one language. 

356 

357 :param lang_id: the language ID 

358 :param lang_name: the language name 

359 :param use_lang_id_as_suffix: should we use the language id as 

360 file name suffix? 

361 """ 

362 self.__metadata_lang = None 

363 

364 # Start up and define the output directory. 

365 if lang_id is None: 

366 logger("beginning build with no language set.") 

367 base_dir = self.output_dir 

368 if lang_name: 

369 raise ValueError("Cannot have language name " 

370 f"'{lang_name}' but no language id!") 

371 elif use_lang_id_as_suffix: 

372 lang_id = enforce_non_empty_str_without_ws(lang_id) 

373 logger(f"beginning multi-language build for language {lang_id}.") 

374 base_dir = self.output_dir.resolve_inside(lang_id) 

375 if lang_name: 

376 enforce_non_empty_str(lang_name) 

377 else: 

378 logger(f"beginning single-language build in language {lang_id}.") 

379 base_dir = self.output_dir 

380 if lang_name: 

381 enforce_non_empty_str(lang_name) 

382 base_dir.ensure_dir_exists() 

383 

384 # First obtain the full language-specific input text. 

385 text = enforce_non_empty_str( 

386 load_input(self.__input_file, self.__input_dir, lang_id).strip()) 

387 

388 # Then we extract the meta-data. 

389 self.__metadata_lang = parse_metadata(text) 

390 logger("done parsing metadata.") 

391 if lang_id: 

392 # We set up the language id and lange name meta data properties. 

393 if bc.META_LANG not in self.__metadata_lang.keys(): 

394 self.__metadata_lang[bc.META_LANG] = lang_id 

395 if bc.META_CUR_LANG_NAME not in self.__metadata_lang.keys(): 

396 self.__metadata_lang[bc.META_CUR_LANG_NAME] = lang_name 

397 

398 # Then load the repositories from the meta data. 

399 self.__load_repos_from_meta(self.__metadata_lang) 

400 

401 with TempDir.create() as temp: 

402 logger(f"building in temp directory '{temp}': " 

403 "first applying preprocessor.") 

404 

405 text = enforce_non_empty_str(preprocess( 

406 text=text, input_dir=self.input_dir, 

407 get_meta=self.get_meta_str, get_repo=self.get_repo, 

408 repo=self.__repo, output_dir=temp)) 

409 logger("finished applying preprocessor.") 

410 

411 has_bibliography = False 

412 bib = self.__get_meta_no_error(bc.PANDOC_BIBLIOGRAPHY) 

413 if bib: 

414 logger(f"found bibliography spec '{bib}', so we load it.") 

415 Path.copy_resource(self.__input_dir, 

416 self.__input_dir.resolve_inside(bib), 

417 temp) 

418 has_bibliography = True 

419 

420 prefix, suffix = Path.split_prefix_suffix( 

421 basename(self.__input_file)) 

422 if use_lang_id_as_suffix: 

423 end = "_" + enforce_non_empty_str_without_ws(lang_id) 

424 if not prefix.endswith(end): 

425 prefix = prefix + end 

426 file = temp.resolve_inside(f"{prefix}.{suffix}") 

427 logger("finished applying preprocessor, now storing " 

428 f"{len(text)} characters to file '{file}'.") 

429 file.write_all(text) 

430 del prefix, suffix, text 

431 self.__pandoc_build(input_file=file, 

432 output_dir=base_dir, 

433 lang_id=lang_id, 

434 lang_name=lang_name, 

435 has_bibliography=has_bibliography) 

436 

437 # Finalize the build. 

438 self.__metadata_lang = None 

439 if lang_id is None: 

440 logger("finished build with no language set.") 

441 else: 

442 logger(f"finished build in language {lang_id}.") 

443 

444 def __load_self_repo(self) -> None: 

445 """Attempt to load the self repository information.""" 

446 logger("checking if build process is based on git checkout.") 

447 check = self.__input_dir 

448 while True: 

449 if check == "/": 

450 break 

451 if not os.access(check, os.R_OK): 

452 break 

453 test = Path.path(os.path.join(check, ".git")) 

454 if os.path.isdir(test): 

455 self.__repo = Repo.from_local(check) 

456 logger( 

457 f"build process is based on commit '{self.__repo.commit}'" 

458 f" of repo '{self.__repo.url}'.") 

459 return 

460 check = Path.path(os.path.join(check, "..")) 

461 logger("build process is not based on git checkout.") 

462 

463 def __build_all_langs(self) -> None: 

464 """Perform all the book build steps.""" 

465 logger("beginning the build loop for all languages.") 

466 no_lang = True 

467 if bc.META_LANGS in self.__metadata_raw: 

468 langs = self.__metadata_raw[bc.META_LANGS] 

469 done = set() 

470 if not isinstance(langs, Iterable): 

471 raise type_error( 

472 langs, f"self.__metadata_raw[{bc.META_LANGS}]", Iterable) 

473 llangs = list(langs) 

474 for lang in llangs: 

475 if not isinstance(lang, dict): 

476 raise type_error(lang, "item of llangs", dict) 

477 lang_id = enforce_non_empty_str_without_ws( 

478 lang[bc.META_LANG_ID]) 

479 if lang_id in done: 

480 raise ValueError(f"Duplicate language id '{lang_id}'.") 

481 done.add(lang_id) 

482 lang_name = enforce_non_empty_str(enforce_non_empty_str( 

483 lang[bc.META_LANG_NAME]).strip()) 

484 self.__build_one_lang(lang_id, lang_name, len(llangs) > 1) 

485 no_lang = False 

486 

487 if no_lang: 

488 self.__build_one_lang( 

489 self.__get_meta_no_error(bc.META_LANG), None, False) 

490 logger("finished the build loop for all languages.") 

491 

492 def __build_website(self) -> None: 

493 """Build the website, if any.""" 

494 template = self.__get_meta_no_error(bc.META_WEBSITE_OUTER) 

495 if template: 

496 logger(f"found website template spec '{template}'.") 

497 template = self.input_dir.resolve_inside(template) 

498 body = self.__get_meta_no_error(bc.META_WEBSITE_BODY) 

499 if body: 

500 logger(f"found website body spec '{body}'.") 

501 body = self.input_dir.resolve_inside(body) 

502 build_website(docs=self.__results, 

503 outer_file=template, 

504 body_file=body, 

505 dest_dir=self.__output_dir, 

506 input_dir=self.__input_dir, 

507 get_meta=self.get_meta_str) 

508 

509 def build(self) -> tuple[LangResult, ...]: 

510 """ 

511 Perform the build. 

512 

513 :returns: the results 

514 """ 

515 logger(f"starting the build process for input file " 

516 f"'{self.__input_file}', input dir '{self.__input_dir}', and " 

517 f"output dir '{self.__output_dir}' with the following tool " 

518 f"versions:\n{get_versions()}") 

519 self.__load_self_repo() 

520 self.__metadata_raw = load_initial_metadata(self.__input_file, 

521 self.__input_dir) 

522 self.__load_repos_from_meta(self.__metadata_raw) 

523 self.__build_all_langs() 

524 self.__build_website() 

525 logger("build process completed, created " 

526 f"{sum(len(c.results) for c in self.__results)} " 

527 f"book files in total for {len(self.__results)} language(s).") 

528 res = tuple(self.__results) 

529 if (res is None) or (len(res) <= 0): 

530 raise ValueError("Did not build any results.") 

531 return res 

532 

533 def __enter__(self) -> "Build": 

534 """Nothing, just exists for `with`.""" 

535 if not self.__is_open: 

536 raise ValueError("Build already closed.") 

537 logger(f"starting the build of '{self.__input_file}' " 

538 f"to '{self.__output_dir}'.") 

539 return self 

540 

541 def __exit__(self, exception_type, exception_value, traceback) -> bool: 

542 """ 

543 Delete the temporary directory and everything in it. 

544 

545 :param exception_type: ignored 

546 :param exception_value: ignored 

547 :param traceback: ignored 

548 :returns: `True` to suppress an exception, `False` to rethrow it 

549 """ 

550 opn = self.__is_open 

551 self.__is_open = False 

552 if opn: 

553 logger("cleaning up temporary files.") 

554 self.__exit.close() 

555 logger(f"finished the build of '{self.__input_file}' " 

556 f"to '{self.__output_dir}'.") 

557 del exception_value 

558 del traceback 

559 return exception_type is None 

560 

561 @staticmethod 

562 def run(input_file: str, 

563 output_dir: str, 

564 exit_on_error: bool = False) -> tuple[LangResult, ...]: 

565 """ 

566 Run a build on an input file to an output directory. 

567 

568 :param input_file: the input file 

569 :param output_dir: the output directory 

570 :param exit_on_error: should we quit Python upon error? 

571 :return: a tuple of results 

572 """ 

573 try: 

574 with Build(input_file, output_dir, True) as bd: 

575 res = bd.build() 

576 sys.stdout.flush() 

577 sys.stderr.flush() 

578 except BaseException as be: 

579 sys.stdout.flush() 

580 sys.stderr.flush() 

581 exinfo = " ".join(tb.format_exception(type(be), 

582 value=be, 

583 tb=be.__traceback__)) 

584 logger(f"The build process has FAILED with error '{be}':" 

585 f"\n {exinfo}") 

586 sys.stdout.flush() 

587 sys.stderr.flush() 

588 if exit_on_error: 

589 sys.exit(1) 

590 if isinstance(be, ValueError): 

591 raise 

592 raise ValueError from be 

593 return res