Coverage for bookbuilderpy/parse_metadata.py: 86%

57 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-07-17 23:15 +0000

1"""An internal package for loading metadata.""" 

2import io 

3import os.path 

4import re 

5from typing import Any, Final 

6 

7import yaml # type: ignore 

8 

9import bookbuilderpy.constants as bc 

10from bookbuilderpy.path import Path 

11from bookbuilderpy.preprocessor_commands import create_preprocessor 

12from bookbuilderpy.strings import enforce_non_empty_str 

13from bookbuilderpy.types import type_error 

14 

15#: the full metadata command 

16__FULL_META_CMD: Final[str] = f"\\{bc.CMD_GET_META}" 

17 

18 

19def parse_metadata(text: str) -> dict[str, Any]: 

20 """ 

21 Extract the metadata of a string and parse it. 

22 

23 :param text: the text 

24 :return: the metadata 

25 """ 

26 enforce_non_empty_str(text) 

27 

28 start = re.search(r"^\s*-{3,}\s*$", text, re.MULTILINE) 

29 if start is None: 

30 raise ValueError("No metadata start mark (---) found.") 

31 end = re.search(r"^\s*\.{3,}\s*$", text, re.MULTILINE) 

32 if end is None: 

33 raise ValueError("No metadata end mark (...) found.") 

34 s = start.end() 

35 e = end.start() 

36 if s >= e: 

37 raise ValueError( 

38 f"End of start mark {s} is >= than start of end mark {e}.") 

39 text = text[s:e].strip() 

40 if (text is None) or (len(text) <= 0): 

41 raise ValueError(f"Metadata is '{text}'.") 

42 

43 text = "\n".join([t for t in text.split("\n") if __FULL_META_CMD not in t]) 

44 

45 with io.StringIO(text) as stream: 

46 try: 

47 res = yaml.safe_load(stream) 

48 except BaseException as e: 

49 raise ValueError(f"Invalid metadata '{text}'.") from e 

50 

51 if not isinstance(res, dict): 

52 raise type_error(res, "metadata", dict) 

53 if len(res) <= 0: 

54 raise ValueError(f"Metadata should not be empty, but is '{res}'.") 

55 return res 

56 

57 

58#: the full input command 

59__FULL_INPUT_CMD: Final[str] = f"\\{bc.CMD_INPUT}" 

60 

61 

62def __raw_load(in_file: Path, 

63 in_dir: Path, 

64 resolve_cmd_only_once: bool = True) -> str: 

65 """ 

66 Perform a raw version of the recursive path resolution. 

67 

68 :param in_file: the input file path 

69 :param in_dir: the input directory 

70 :param resolve_cmd_only_once: should only one include be resolved? 

71 :return: the loaded string 

72 """ 

73 text = in_file.read_all_str() 

74 

75 i = text.find(__FULL_INPUT_CMD) 

76 if i < 0: 

77 return text 

78 if resolve_cmd_only_once: 

79 i = text.find(__FULL_INPUT_CMD, i + len(__FULL_INPUT_CMD)) 

80 if i > 0: 

81 text = text[:i] 

82 

83 def __side_load(path: str, 

84 _in_file: Path = in_file, 

85 _in_dir: Path = in_dir) -> str: 

86 src = _in_dir.resolve_input_file(path) 

87 src.enforce_file() 

88 _new_dir = Path.directory(os.path.dirname(src)) 

89 _in_dir.enforce_contains(_new_dir) 

90 return __raw_load(src, _new_dir, False) 

91 

92 cmd = create_preprocessor(name=bc.CMD_INPUT, 

93 func=__side_load, 

94 n=1, 

95 strip_white_space=False, 

96 wrap_in_newlines=1) 

97 return cmd(text) 

98 

99 

100def load_initial_metadata(in_file: Path, 

101 in_dir: Path) -> dict[str, Any]: 

102 """ 

103 Load the initial metadata. 

104 

105 This function does not process the complete document structure but only 

106 resolves at most one include. It also does not expand other commands and 

107 it does not perform any language-based resolution. It is only there to 

108 gain access to the raw metadata which should be the same over all builds 

109 of a book. This means things such as shared source code repositories. 

110 

111 :param in_file: the input file 

112 :param in_dir: the input directory 

113 :return: the map with the meta-data 

114 """ 

115 return parse_metadata(__raw_load(in_file, in_dir, True))