Coverage for bookbuilderpy/parse_metadata.py: 86%
57 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-17 23:15 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-17 23:15 +0000
1"""An internal package for loading metadata."""
2import io
3import os.path
4import re
5from typing import Any, Final
7import yaml # type: ignore
9import bookbuilderpy.constants as bc
10from bookbuilderpy.path import Path
11from bookbuilderpy.preprocessor_commands import create_preprocessor
12from bookbuilderpy.strings import enforce_non_empty_str
13from bookbuilderpy.types import type_error
15#: the full metadata command
16__FULL_META_CMD: Final[str] = f"\\{bc.CMD_GET_META}"
19def parse_metadata(text: str) -> dict[str, Any]:
20 """
21 Extract the metadata of a string and parse it.
23 :param text: the text
24 :return: the metadata
25 """
26 enforce_non_empty_str(text)
28 start = re.search(r"^\s*-{3,}\s*$", text, re.MULTILINE)
29 if start is None:
30 raise ValueError("No metadata start mark (---) found.")
31 end = re.search(r"^\s*\.{3,}\s*$", text, re.MULTILINE)
32 if end is None:
33 raise ValueError("No metadata end mark (...) found.")
34 s = start.end()
35 e = end.start()
36 if s >= e:
37 raise ValueError(
38 f"End of start mark {s} is >= than start of end mark {e}.")
39 text = text[s:e].strip()
40 if (text is None) or (len(text) <= 0):
41 raise ValueError(f"Metadata is '{text}'.")
43 text = "\n".join([t for t in text.split("\n") if __FULL_META_CMD not in t])
45 with io.StringIO(text) as stream:
46 try:
47 res = yaml.safe_load(stream)
48 except BaseException as e:
49 raise ValueError(f"Invalid metadata '{text}'.") from e
51 if not isinstance(res, dict):
52 raise type_error(res, "metadata", dict)
53 if len(res) <= 0:
54 raise ValueError(f"Metadata should not be empty, but is '{res}'.")
55 return res
58#: the full input command
59__FULL_INPUT_CMD: Final[str] = f"\\{bc.CMD_INPUT}"
62def __raw_load(in_file: Path,
63 in_dir: Path,
64 resolve_cmd_only_once: bool = True) -> str:
65 """
66 Perform a raw version of the recursive path resolution.
68 :param in_file: the input file path
69 :param in_dir: the input directory
70 :param resolve_cmd_only_once: should only one include be resolved?
71 :return: the loaded string
72 """
73 text = in_file.read_all_str()
75 i = text.find(__FULL_INPUT_CMD)
76 if i < 0:
77 return text
78 if resolve_cmd_only_once:
79 i = text.find(__FULL_INPUT_CMD, i + len(__FULL_INPUT_CMD))
80 if i > 0:
81 text = text[:i]
83 def __side_load(path: str,
84 _in_file: Path = in_file,
85 _in_dir: Path = in_dir) -> str:
86 src = _in_dir.resolve_input_file(path)
87 src.enforce_file()
88 _new_dir = Path.directory(os.path.dirname(src))
89 _in_dir.enforce_contains(_new_dir)
90 return __raw_load(src, _new_dir, False)
92 cmd = create_preprocessor(name=bc.CMD_INPUT,
93 func=__side_load,
94 n=1,
95 strip_white_space=False,
96 wrap_in_newlines=1)
97 return cmd(text)
100def load_initial_metadata(in_file: Path,
101 in_dir: Path) -> dict[str, Any]:
102 """
103 Load the initial metadata.
105 This function does not process the complete document structure but only
106 resolves at most one include. It also does not expand other commands and
107 it does not perform any language-based resolution. It is only there to
108 gain access to the raw metadata which should be the same over all builds
109 of a book. This means things such as shared source code repositories.
111 :param in_file: the input file
112 :param in_dir: the input directory
113 :return: the map with the meta-data
114 """
115 return parse_metadata(__raw_load(in_file, in_dir, True))