Coverage for texgit / repository / file_manager.py: 96%
126 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-02-22 02:50 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-02-22 02:50 +0000
1"""
2A class for managing files and directories.
4A :class:`~texgit.repository.file_manager.FileManager` provides a two-level
5abstraction for assigning paths to unique IDs.
6An ID is a combination of a "realm" and a "name", both of which are non-empty
7strings without whitespace.
9A file manager resides within a certain base directory.
10Inside the base directory, it provides so-called "realms".
11Each realm is a separate namespace.
12With a realm, "names" are mapped to paths.
13The file manager ensures that the same realm-name combination is always
14assigned to the same path.
15The first time such a combination is queried, the path is created.
16This path can be a file or a directory, depending on what was queried.
17Every realm-name combination always uniquely identifies a path and there can
18never be another realm-name combination pointing to the same path.
19If need be, the paths are randomized to avoid potential clashes.
21Once the file manager is closed, the realm-name to path associations are
22stored.
23When a new file manager instance is created for the same base directory, the
24associations of realms-names to paths are restored.
25This means that a program that creates output files for certain commands can
26then find these files again later.
28:class:`~texgit.repository.git_manager.GitManager` is the base and root of
29the functionality of a managed repository of files and data.
30Step-by-step, functionality is added to the manager by derived classes.
31We do this iteratively:
33:class:`~texgit.repository.git_manager.GitManager` adds the capability to
34automatically download and use `git` repositories. For this purpose, it uses
35the realm `git`.
37:class:`~texgit.repository.process_manager.ProcessManager` adds the ability
38to execute programs or scripts and to store their output in files to the
39:class:`~texgit.repository.git_manager.GitManager`.
40These programs and scripts may be located in `git` repositories that have
41automatically been downloaded.
42"""
43import json
44from contextlib import AbstractContextManager, suppress
45from os import close as os_close
46from os import remove as os_remove
47from tempfile import mkstemp
48from typing import Callable, Final
50from pycommons.io.path import Path
52__OK_CHARS: Final[str] = ("abcdefghijklmnopqrstuvwxyz"
53 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
54 "0123456789+-_")
56#: the characters that are OK for a file name
57_FILENAME_OK: Callable[[str], bool] = set(__OK_CHARS).__contains__
59#: characters allowed in a key
60__KEY_ALLOWED: Callable[[str], bool] = (
61 set(__OK_CHARS).union(":.@/").__contains__)
64def _make_key(s: str) -> str:
65 """
66 Create a compliant key.
68 :param s: the string
69 :return: the key
70 """
71 s = str.strip(s)
72 if str.__len__(s) <= 0:
73 raise ValueError("Non-empty str expected, but got empty string "
74 "or string of only white space.")
75 if not all(map(__KEY_ALLOWED, s)):
76 raise ValueError("String contains forbidden character.")
77 return s
80def _make_ignore(path: Path) -> None:
81 """
82 Create a `.gitignore` file in the given path.
84 :param path: the path
85 """
86 file: Path = path.resolve_inside(".gitignore")
87 if not file.exists():
88 file.write_all_str("*\n**/*")
89 file = path.resolve_inside(".nojekyll")
90 if not file.exists():
91 file.ensure_file_exists()
94class FileManager(AbstractContextManager):
95 """A manager for files."""
97 def __init__(self, base_dir: str) -> None:
98 """
99 Set up the git repository manager.
101 :param base_dir: the base directory
102 """
103 #: the base directory of the repository manager
104 self.__base_dir: Final[Path] = Path(base_dir)
105 self.__base_dir.ensure_dir_exists()
106 _make_ignore(self.__base_dir)
108 #: the directory with the realms
109 self.__realms_dir: Final[Path] = self.__base_dir.resolve_inside(
110 "realms")
111 self.__realms_dir.ensure_dir_exists()
112 _make_ignore(self.__realms_dir)
114 #: the internal cache file
115 self.__cache_file: Final[Path] = self.__base_dir.resolve_inside(
116 ".cache.json")
117 #: we are open
118 self.__is_open = True
120 #: the dictionary of realms and IDs
121 self.__map: Final[dict[str, tuple[Path, dict[str, Path]]]] = {}
123 #: load the cache
124 if self.__cache_file.exists():
125 self.__cache_file.enforce_file()
126 for key, values in json.loads(
127 self.__cache_file.read_all_str()).items():
128 realm = _make_key(key)
129 realm_dir = self.__realms_dir.resolve_inside(realm)
130 realm_map = {}
131 for name, path in values.items():
132 use_name = _make_key(name)
133 use_path = realm_dir.resolve_inside(path)
134 if use_path.exists() and (
135 use_path.is_file() or use_path.is_dir()):
136 realm_map[use_name] = use_path
137 if dict.__len__(realm_map) > 0:
138 self.__map[realm] = (realm_dir, realm_map)
140 def _check_open(self) -> None:
141 """Enforce that the file manager is open."""
142 if not self.__is_open:
143 raise ValueError("Already closed!")
145 def _get_sensitive_paths(self) -> list[Path]:
146 """
147 Get the list of sensitive paths.
149 :return: the list of sensitive paths
150 """
151 paths: Final[list[Path]] = [
152 self.__base_dir, self.__realms_dir, self.__cache_file]
153 paths.extend(map(self.__realms_dir.resolve_inside, self.__map.keys()))
154 return paths
156 def __get(self, realm: str, name: str,
157 is_file: bool,
158 prefix: str | None = None,
159 suffix: str | None = None) -> tuple[Path, bool]:
160 """
161 Get a file or directory with the given ID in the specified realm.
163 :param realm: the realm
164 :param name: the id for the file
165 :param is_file: is it a file?
166 :param prefix: the optional prefix
167 :param suffix: the optional suffix
168 :return: the generated path and `True` if it was new,
169 or `False` if not.
170 """
171 self._check_open()
172 realm = _make_key(realm)
173 name = _make_key(name)
174 if prefix is not None:
175 prefix = _make_key(prefix)
176 if suffix is not None:
177 suffix = _make_key(suffix)
179 if realm in self.__map:
180 realm_dir, realm_map = self.__map[realm]
181 else:
182 realm_dir = self.__realms_dir.resolve_inside(realm)
183 realm_dir.ensure_dir_exists()
184 _make_ignore(realm_dir)
185 realm_map = {}
186 self.__map[realm] = realm_dir, realm_map
188 result: Path | None = None
189 is_new: bool = False
190 if name in realm_map:
191 result = realm_map[name]
192 else:
193 is_new = True
194 rootname = prefix or "".join(filter(_FILENAME_OK, name))
195 usename = rootname
196 if suffix:
197 usename = f"{usename}{suffix}"
199 if usename:
200 test = realm_dir.resolve_inside(usename)
201 if not test.exists():
202 if is_file:
203 if not test.ensure_file_exists():
204 result = test
205 else:
206 test.ensure_dir_exists()
207 result = test
209 if result is None:
210 (handle, fpath) = mkstemp(prefix=rootname or None,
211 suffix=suffix, dir=realm_dir)
212 os_close(handle)
213 result = Path(fpath)
214 if not is_file:
215 with suppress(FileNotFoundError):
216 os_remove(result)
217 result.ensure_dir_exists()
218 realm_map[name] = result
220 if is_file:
221 result.enforce_file()
222 else:
223 result.enforce_dir()
225 bn: Final[str] = result.basename()
226 if prefix and (not bn.startswith(prefix)):
227 raise ValueError(f"prefix={prefix!r} but f={result!r}.")
228 if suffix and (not bn.endswith(suffix)):
229 raise ValueError(f"suffix={suffix!r} but f={result!r}.")
230 return result, is_new
232 def list_realm(self, realm: str, files: bool = True,
233 directories: bool = True) -> tuple[Path, ...]:
234 """
235 List all the files and directories in a given realm.
237 :param realm: the realm that we want to list
238 :param files: should we list files?
239 :param directories: should we list directories?
240 :return: the iterator with the data
241 """
242 realm = _make_key(realm)
243 if realm in self.__map:
244 _, realm_map = self.__map[_make_key(realm)]
245 return tuple(filter(lambda v: (files and v.is_file()) or (
246 directories and v.is_dir()), realm_map.values()))
247 return ()
249 def get_dir(self, realm: str, name: str) -> tuple[Path, bool]:
250 """
251 Get a directory representing the given name in the given realm.
253 :param realm: the realm
254 :param name: the name or ID that the directory should represent
255 :return: the directory path and a `bool` indicating whether it was
256 newly generated (`True`) or not if it already existed (`False`)
257 """
258 return self.__get(realm, name, False)
260 def get_file(self, realm: str, name: str,
261 prefix: str | None = None,
262 suffix: str | None = None) -> tuple[Path, bool]:
263 """
264 Get a file representing the given name in the given realm.
266 :param realm: the realm
267 :param name: the name or ID that the file should represent
268 :param prefix: the optional prefix
269 :param suffix: the optional suffix
270 :return: the generated file path and `True` if it was new, or
271 `False` if not.
272 """
273 return self.__get(realm, name, True, prefix, suffix)
275 def close(self) -> None:
276 """Close the file manager and write cache list."""
277 opn: bool = self.__is_open
278 self.__is_open = False
279 if opn: # only if we were open...
280 # flush or clear directory of cached post-processed files
281 with suppress(FileNotFoundError):
282 os_remove(self.__cache_file)
283 if len(self.__map) > 0: # we got cached files
284 self.__cache_file.write_all_str(json.dumps( # store cache
285 {realm: {
286 name: path.relative_to(rv[0])
287 for name, path in rv[1].items()
288 } for realm, rv in self.__map.items()}))
290 def __exit__(self, exception_type, _, __) -> bool:
291 """
292 Close the context manager.
294 :param exception_type: ignored
295 :param _: ignored
296 :param __: ignored
297 :returns: `True` to suppress an exception, `False` to rethrow it
298 """
299 self.close() # close the manager and flush cache
300 return exception_type is None