Coverage for texgit / repository / file_manager.py: 96%

126 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-02-22 02:50 +0000

1""" 

2A class for managing files and directories. 

3 

4A :class:`~texgit.repository.file_manager.FileManager` provides a two-level 

5abstraction for assigning paths to unique IDs. 

6An ID is a combination of a "realm" and a "name", both of which are non-empty 

7strings without whitespace. 

8 

9A file manager resides within a certain base directory. 

10Inside the base directory, it provides so-called "realms". 

11Each realm is a separate namespace. 

12With a realm, "names" are mapped to paths. 

13The file manager ensures that the same realm-name combination is always 

14assigned to the same path. 

15The first time such a combination is queried, the path is created. 

16This path can be a file or a directory, depending on what was queried. 

17Every realm-name combination always uniquely identifies a path and there can 

18never be another realm-name combination pointing to the same path. 

19If need be, the paths are randomized to avoid potential clashes. 

20 

21Once the file manager is closed, the realm-name to path associations are 

22stored. 

23When a new file manager instance is created for the same base directory, the 

24associations of realms-names to paths are restored. 

25This means that a program that creates output files for certain commands can 

26then find these files again later. 

27 

28:class:`~texgit.repository.git_manager.GitManager` is the base and root of 

29the functionality of a managed repository of files and data. 

30Step-by-step, functionality is added to the manager by derived classes. 

31We do this iteratively: 

32 

33:class:`~texgit.repository.git_manager.GitManager` adds the capability to 

34automatically download and use `git` repositories. For this purpose, it uses 

35the realm `git`. 

36 

37:class:`~texgit.repository.process_manager.ProcessManager` adds the ability 

38to execute programs or scripts and to store their output in files to the 

39:class:`~texgit.repository.git_manager.GitManager`. 

40These programs and scripts may be located in `git` repositories that have 

41automatically been downloaded. 

42""" 

43import json 

44from contextlib import AbstractContextManager, suppress 

45from os import close as os_close 

46from os import remove as os_remove 

47from tempfile import mkstemp 

48from typing import Callable, Final 

49 

50from pycommons.io.path import Path 

51 

52__OK_CHARS: Final[str] = ("abcdefghijklmnopqrstuvwxyz" 

53 "ABCDEFGHIJKLMNOPQRSTUVWXYZ" 

54 "0123456789+-_") 

55 

56#: the characters that are OK for a file name 

57_FILENAME_OK: Callable[[str], bool] = set(__OK_CHARS).__contains__ 

58 

59#: characters allowed in a key 

60__KEY_ALLOWED: Callable[[str], bool] = ( 

61 set(__OK_CHARS).union(":.@/").__contains__) 

62 

63 

64def _make_key(s: str) -> str: 

65 """ 

66 Create a compliant key. 

67 

68 :param s: the string 

69 :return: the key 

70 """ 

71 s = str.strip(s) 

72 if str.__len__(s) <= 0: 

73 raise ValueError("Non-empty str expected, but got empty string " 

74 "or string of only white space.") 

75 if not all(map(__KEY_ALLOWED, s)): 

76 raise ValueError("String contains forbidden character.") 

77 return s 

78 

79 

80def _make_ignore(path: Path) -> None: 

81 """ 

82 Create a `.gitignore` file in the given path. 

83 

84 :param path: the path 

85 """ 

86 file: Path = path.resolve_inside(".gitignore") 

87 if not file.exists(): 

88 file.write_all_str("*\n**/*") 

89 file = path.resolve_inside(".nojekyll") 

90 if not file.exists(): 

91 file.ensure_file_exists() 

92 

93 

94class FileManager(AbstractContextManager): 

95 """A manager for files.""" 

96 

97 def __init__(self, base_dir: str) -> None: 

98 """ 

99 Set up the git repository manager. 

100 

101 :param base_dir: the base directory 

102 """ 

103 #: the base directory of the repository manager 

104 self.__base_dir: Final[Path] = Path(base_dir) 

105 self.__base_dir.ensure_dir_exists() 

106 _make_ignore(self.__base_dir) 

107 

108 #: the directory with the realms 

109 self.__realms_dir: Final[Path] = self.__base_dir.resolve_inside( 

110 "realms") 

111 self.__realms_dir.ensure_dir_exists() 

112 _make_ignore(self.__realms_dir) 

113 

114 #: the internal cache file 

115 self.__cache_file: Final[Path] = self.__base_dir.resolve_inside( 

116 ".cache.json") 

117 #: we are open 

118 self.__is_open = True 

119 

120 #: the dictionary of realms and IDs 

121 self.__map: Final[dict[str, tuple[Path, dict[str, Path]]]] = {} 

122 

123 #: load the cache 

124 if self.__cache_file.exists(): 

125 self.__cache_file.enforce_file() 

126 for key, values in json.loads( 

127 self.__cache_file.read_all_str()).items(): 

128 realm = _make_key(key) 

129 realm_dir = self.__realms_dir.resolve_inside(realm) 

130 realm_map = {} 

131 for name, path in values.items(): 

132 use_name = _make_key(name) 

133 use_path = realm_dir.resolve_inside(path) 

134 if use_path.exists() and ( 

135 use_path.is_file() or use_path.is_dir()): 

136 realm_map[use_name] = use_path 

137 if dict.__len__(realm_map) > 0: 

138 self.__map[realm] = (realm_dir, realm_map) 

139 

140 def _check_open(self) -> None: 

141 """Enforce that the file manager is open.""" 

142 if not self.__is_open: 

143 raise ValueError("Already closed!") 

144 

145 def _get_sensitive_paths(self) -> list[Path]: 

146 """ 

147 Get the list of sensitive paths. 

148 

149 :return: the list of sensitive paths 

150 """ 

151 paths: Final[list[Path]] = [ 

152 self.__base_dir, self.__realms_dir, self.__cache_file] 

153 paths.extend(map(self.__realms_dir.resolve_inside, self.__map.keys())) 

154 return paths 

155 

156 def __get(self, realm: str, name: str, 

157 is_file: bool, 

158 prefix: str | None = None, 

159 suffix: str | None = None) -> tuple[Path, bool]: 

160 """ 

161 Get a file or directory with the given ID in the specified realm. 

162 

163 :param realm: the realm 

164 :param name: the id for the file 

165 :param is_file: is it a file? 

166 :param prefix: the optional prefix 

167 :param suffix: the optional suffix 

168 :return: the generated path and `True` if it was new, 

169 or `False` if not. 

170 """ 

171 self._check_open() 

172 realm = _make_key(realm) 

173 name = _make_key(name) 

174 if prefix is not None: 

175 prefix = _make_key(prefix) 

176 if suffix is not None: 

177 suffix = _make_key(suffix) 

178 

179 if realm in self.__map: 

180 realm_dir, realm_map = self.__map[realm] 

181 else: 

182 realm_dir = self.__realms_dir.resolve_inside(realm) 

183 realm_dir.ensure_dir_exists() 

184 _make_ignore(realm_dir) 

185 realm_map = {} 

186 self.__map[realm] = realm_dir, realm_map 

187 

188 result: Path | None = None 

189 is_new: bool = False 

190 if name in realm_map: 

191 result = realm_map[name] 

192 else: 

193 is_new = True 

194 rootname = prefix or "".join(filter(_FILENAME_OK, name)) 

195 usename = rootname 

196 if suffix: 

197 usename = f"{usename}{suffix}" 

198 

199 if usename: 

200 test = realm_dir.resolve_inside(usename) 

201 if not test.exists(): 

202 if is_file: 

203 if not test.ensure_file_exists(): 

204 result = test 

205 else: 

206 test.ensure_dir_exists() 

207 result = test 

208 

209 if result is None: 

210 (handle, fpath) = mkstemp(prefix=rootname or None, 

211 suffix=suffix, dir=realm_dir) 

212 os_close(handle) 

213 result = Path(fpath) 

214 if not is_file: 

215 with suppress(FileNotFoundError): 

216 os_remove(result) 

217 result.ensure_dir_exists() 

218 realm_map[name] = result 

219 

220 if is_file: 

221 result.enforce_file() 

222 else: 

223 result.enforce_dir() 

224 

225 bn: Final[str] = result.basename() 

226 if prefix and (not bn.startswith(prefix)): 

227 raise ValueError(f"prefix={prefix!r} but f={result!r}.") 

228 if suffix and (not bn.endswith(suffix)): 

229 raise ValueError(f"suffix={suffix!r} but f={result!r}.") 

230 return result, is_new 

231 

232 def list_realm(self, realm: str, files: bool = True, 

233 directories: bool = True) -> tuple[Path, ...]: 

234 """ 

235 List all the files and directories in a given realm. 

236 

237 :param realm: the realm that we want to list 

238 :param files: should we list files? 

239 :param directories: should we list directories? 

240 :return: the iterator with the data 

241 """ 

242 realm = _make_key(realm) 

243 if realm in self.__map: 

244 _, realm_map = self.__map[_make_key(realm)] 

245 return tuple(filter(lambda v: (files and v.is_file()) or ( 

246 directories and v.is_dir()), realm_map.values())) 

247 return () 

248 

249 def get_dir(self, realm: str, name: str) -> tuple[Path, bool]: 

250 """ 

251 Get a directory representing the given name in the given realm. 

252 

253 :param realm: the realm 

254 :param name: the name or ID that the directory should represent 

255 :return: the directory path and a `bool` indicating whether it was 

256 newly generated (`True`) or not if it already existed (`False`) 

257 """ 

258 return self.__get(realm, name, False) 

259 

260 def get_file(self, realm: str, name: str, 

261 prefix: str | None = None, 

262 suffix: str | None = None) -> tuple[Path, bool]: 

263 """ 

264 Get a file representing the given name in the given realm. 

265 

266 :param realm: the realm 

267 :param name: the name or ID that the file should represent 

268 :param prefix: the optional prefix 

269 :param suffix: the optional suffix 

270 :return: the generated file path and `True` if it was new, or 

271 `False` if not. 

272 """ 

273 return self.__get(realm, name, True, prefix, suffix) 

274 

275 def close(self) -> None: 

276 """Close the file manager and write cache list.""" 

277 opn: bool = self.__is_open 

278 self.__is_open = False 

279 if opn: # only if we were open... 

280 # flush or clear directory of cached post-processed files 

281 with suppress(FileNotFoundError): 

282 os_remove(self.__cache_file) 

283 if len(self.__map) > 0: # we got cached files 

284 self.__cache_file.write_all_str(json.dumps( # store cache 

285 {realm: { 

286 name: path.relative_to(rv[0]) 

287 for name, path in rv[1].items() 

288 } for realm, rv in self.__map.items()})) 

289 

290 def __exit__(self, exception_type, _, __) -> bool: 

291 """ 

292 Close the context manager. 

293 

294 :param exception_type: ignored 

295 :param _: ignored 

296 :param __: ignored 

297 :returns: `True` to suppress an exception, `False` to rethrow it 

298 """ 

299 self.close() # close the manager and flush cache 

300 return exception_type is None