"""
A class for managing files and directories.
A :class:`~texgit.repository.file_manager.FileManager` provides a two-level
abstraction for assigning paths to unique IDs.
An ID is a combination of a "realm" and a "name", both of which are non-empty
strings without whitespace.
A file manager resides within a certain base directory.
Inside the base directory, it provides so-called "realms".
Each realm is a separate namespace.
With a realm, "names" are mapped to paths.
The file manager ensures that the same realm-name combination is always
assigned to the same path.
The first time such a combination is queried, the path is created.
This path can be a file or a directory, depending on what was queried.
Every realm-name combination always uniquely identifies a path and there can
never be another realm-name combination pointing to the same path.
If need be, the paths are randomized to avoid potential clashes.
Once the file manager is closed, the realm-name to path associations are
stored.
When a new file manager instance is created for the same base directory, the
associations of realms-names to paths are restored.
This means that a program that creates output files for certain commands can
then find these files again later.
:class:`~texgit.repository.git_manager.GitManager` is the base and root of
the functionality of a managed repository of files and data.
Step-by-step, functionality is added to the manager by derived classes.
We do this iteratively:
:class:`~texgit.repository.git_manager.GitManager` adds the capability to
automatically download and use `git` repositories. For this purpose, it uses
the realm `git`.
:class:`~texgit.repository.process_manager.ProcessManager` adds the ability
to execute programs or scripts and to store their output in files to the
:class:`~texgit.repository.git_manager.GitManager`.
These programs and scripts may be located in `git` repositories that have
automatically been downloaded.
"""
import json
from contextlib import AbstractContextManager, suppress
from os import close as os_close
from os import remove as os_remove
from tempfile import mkstemp
from typing import Callable, Final
from pycommons.io.path import Path
from pycommons.strings.enforce import enforce_non_empty_str_without_ws
#: the characters that are OK for a file name
_FILENAME_OK: Callable[[str], bool] = set(
"abcdefghijklmnopqrstuvwxyz"
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"0123456789+-_").__contains__
def _make_key(s: str) -> str:
"""
Create a compliant key.
:param s: the string
:return: the key
"""
return enforce_non_empty_str_without_ws(str.strip(s))
def _make_ignore(path: Path) -> None:
"""
Create a `.gitignore` file in the given path.
:param path: the path
"""
file: Path = path.resolve_inside(".gitignore")
if not file.exists():
file.write_all_str("*\n**/*")
file = path.resolve_inside(".nojekyll")
if not file.exists():
file.ensure_file_exists()
[docs]
class FileManager(AbstractContextManager):
"""A manager for files."""
def __init__(self, base_dir: str) -> None:
"""
Set up the git repository manager.
:param base_dir: the base directory
"""
#: the base directory of the repository manager
self.__base_dir: Final[Path] = Path(base_dir)
self.__base_dir.ensure_dir_exists()
_make_ignore(self.__base_dir)
#: the directory with the realms
self.__realms_dir: Final[Path] = self.__base_dir.resolve_inside(
"realms")
self.__realms_dir.ensure_dir_exists()
_make_ignore(self.__realms_dir)
#: the internal cache file
self.__cache_file: Final[Path] = self.__base_dir.resolve_inside(
".cache.json")
#: we are open
self.__is_open = True
#: the dictionary of realms and IDs
self.__map: Final[dict[str, tuple[Path, dict[str, Path]]]] = {}
#: load the cache
if self.__cache_file.exists():
self.__cache_file.enforce_file()
for key, values in json.loads(
self.__cache_file.read_all_str()).items():
realm = _make_key(key)
realm_dir = self.__realms_dir.resolve_inside(realm)
realm_map = {}
for name, path in values.items():
use_name = _make_key(name)
use_path = realm_dir.resolve_inside(path)
if use_path.exists() and (
use_path.is_file() or use_path.is_dir()):
realm_map[use_name] = use_path
if dict.__len__(realm_map) > 0:
self.__map[realm] = (realm_dir, realm_map)
def _check_open(self) -> None:
"""Enforce that the file manager is open."""
if not self.__is_open:
raise ValueError("Already closed!")
def _get_sensitive_paths(self) -> list[Path]:
"""
Get the list of sensitive paths.
:return: the list of sensitive paths
"""
paths: Final[list[Path]] = [
self.__base_dir, self.__realms_dir, self.__cache_file]
paths.extend(map(self.__realms_dir.resolve_inside, self.__map.keys()))
return paths
def __get(self, realm: str, name: str,
is_file: bool,
prefix: str | None = None,
suffix: str | None = None) -> tuple[Path, bool]:
"""
Get a file or directory with the given ID in the specified realm.
:param realm: the realm
:param name: the id for the file
:param is_file: is it a file?
:param prefix: the optional prefix
:param suffix: the optional suffix
:return: the generated path and `True` if it was new,
or `False` if not.
"""
self._check_open()
realm = _make_key(realm)
name = _make_key(name)
if prefix is not None:
prefix = _make_key(prefix)
if suffix is not None:
suffix = _make_key(suffix)
if realm in self.__map:
realm_dir, realm_map = self.__map[realm]
else:
realm_dir = self.__realms_dir.resolve_inside(realm)
realm_dir.ensure_dir_exists()
_make_ignore(realm_dir)
realm_map = {}
self.__map[realm] = realm_dir, realm_map
result: Path | None = None
is_new: bool = False
if name in realm_map:
result = realm_map[name]
else:
is_new = True
rootname = prefix or "".join(filter(_FILENAME_OK, name))
usename = rootname
if suffix:
usename = f"{usename}{suffix}"
if usename:
test = realm_dir.resolve_inside(usename)
if not test.exists():
if is_file:
if not test.ensure_file_exists():
result = test
else:
test.ensure_dir_exists()
result = test
if result is None:
(handle, fpath) = mkstemp(prefix=rootname or None,
suffix=suffix, dir=realm_dir)
os_close(handle)
result = Path(fpath)
if not is_file:
with suppress(FileNotFoundError):
os_remove(result)
result.ensure_dir_exists()
realm_map[name] = result
if is_file:
result.enforce_file()
else:
result.enforce_dir()
bn: Final[str] = result.basename()
if prefix and (not bn.startswith(prefix)):
raise ValueError(f"prefix={prefix!r} but f={result!r}.")
if suffix and (not bn.endswith(suffix)):
raise ValueError(f"suffix={suffix!r} but f={result!r}.")
return result, is_new
[docs]
def list_realm(self, realm: str, files: bool = True,
directories: bool = True) -> tuple[Path, ...]:
"""
List all the files and directories in a given realm.
:param realm: the realm that we want to list
:param files: should we list files?
:param directories: should we list directories?
:return: the iterator with the data
"""
realm = _make_key(realm)
if realm in self.__map:
_, realm_map = self.__map[_make_key(realm)]
return tuple(filter(lambda v: (files and v.is_file()) or (
directories and v.is_dir()), realm_map.values()))
return ()
[docs]
def get_dir(self, realm: str, name: str) -> tuple[Path, bool]:
"""
Get a directory representing the given name in the given realm.
:param realm: the realm
:param name: the name or ID that the directory should represent
:return: the directory path and a `bool` indicating whether it was
newly generated (`True`) or not if it already existed (`False`)
"""
return self.__get(realm, name, False)
[docs]
def get_file(self, realm: str, name: str,
prefix: str | None = None,
suffix: str | None = None) -> tuple[Path, bool]:
"""
Get a file representing the given name in the given realm.
:param realm: the realm
:param name: the name or ID that the file should represent
:param prefix: the optional prefix
:param suffix: the optional suffix
:return: the generated file path and `True` if it was new, or
`False` if not.
"""
return self.__get(realm, name, True, prefix, suffix)
[docs]
def close(self) -> None:
"""Close the file manager and write cache list."""
opn: bool = self.__is_open
self.__is_open = False
if opn: # only if we were open...
# flush or clear directory of cached post-processed files
with suppress(FileNotFoundError):
os_remove(self.__cache_file)
if len(self.__map) > 0: # we got cached files
self.__cache_file.write_all_str(json.dumps( # store cache
{realm: {
name: path.relative_to(rv[0])
for name, path in rv[1].items()
} for realm, rv in self.__map.items()}))
def __exit__(self, exception_type, _, __) -> bool:
"""
Close the context manager.
:param exception_type: ignored
:param _: ignored
:param __: ignored
:returns: `True` to suppress an exception, `False` to rethrow it
"""
self.close() # close the manager and flush cache
return exception_type is None