Coverage for pycommons / io / path.py: 99%
174 statements
« prev ^ index » next coverage.py v7.13.0, created at 2025-12-11 03:04 +0000
« prev ^ index » next coverage.py v7.13.0, created at 2025-12-11 03:04 +0000
1"""
2The class `Path` for handling paths to files and directories.
4The instances of :class:`Path` identify file system paths.
5They are always fully canonicalized with all relative components resolved.
6They thus allow the clear and unique identification of files and directories.
7They also offer support for opening streams, creating paths to sub-folders,
8and so on.
10The first goal is to encapsulate the functionality of the :mod:`os.path`
11module into a single class.
12The second goal is to make sure that we do not run into any dodgy situation
13with paths pointing to security-sensitive locations or something due to
14strange `.` and `..` trickery.
15If you try to resolve a path inside a directory and the resulting canonical
16path is outside that directory, you get an error raised, for example.
17"""
19import codecs
20from io import TextIOBase
21from os import O_CREAT, O_EXCL, O_TRUNC, makedirs, scandir
22from os import close as osclose
23from os import open as osopen
24from os import remove as osremove
25from os.path import (
26 abspath,
27 commonpath,
28 dirname,
29 expanduser,
30 expandvars,
31 isdir,
32 isfile,
33 join,
34 normcase,
35 realpath,
36 relpath,
37)
38from os.path import basename as osbasename
39from os.path import exists as osexists
40from shutil import rmtree
41from typing import (
42 Any,
43 Callable,
44 Final,
45 Generator,
46 Iterable,
47 Iterator,
48 TextIO,
49 cast,
50)
52from pycommons.types import check_int_range, type_error
54#: the UTF-8 encoding
55UTF8: Final[str] = "utf-8-sig"
57#: The list of possible text encodings
58__ENCODINGS: Final[tuple[tuple[tuple[bytes, ...], str], ...]] = \
59 (((codecs.BOM_UTF8,), UTF8),
60 ((codecs.BOM_UTF32_LE, codecs.BOM_UTF32_BE), "utf-32"),
61 ((codecs.BOM_UTF16_LE, codecs.BOM_UTF16_BE), "utf-16"))
64def _get_text_encoding(filename: str) -> str:
65 r"""
66 Get the text encoding from a BOM if present.
68 If no encoding BOM can be found, we return the standard UTF-8 encoding.
69 Adapted from https://stackoverflow.com/questions/13590749.
71 :param filename: the filename
72 :returns: the encoding
74 >>> from tempfile import mkstemp
75 >>> from os import close as osxclose
76 >>> from os import remove as osremove
77 >>> (h, tf) = mkstemp()
78 >>> osxclose(h)
79 >>> with open(tf, "wb") as out:
80 ... out.write(b'\xef\xbb\xbf')
81 3
82 >>> _get_text_encoding(tf)
83 'utf-8-sig'
85 >>> with open(tf, "wb") as out:
86 ... out.write(b'\xff\xfe\x00\x00')
87 4
88 >>> _get_text_encoding(tf)
89 'utf-32'
91 >>> with open(tf, "wb") as out:
92 ... out.write(b'\x00\x00\xfe\xff')
93 4
94 >>> _get_text_encoding(tf)
95 'utf-32'
97 >>> with open(tf, "wb") as out:
98 ... out.write(b'\xff\xfe')
99 2
100 >>> _get_text_encoding(tf)
101 'utf-16'
103 >>> with open(tf, "wb") as out:
104 ... out.write(b'\xfe\xff')
105 2
106 >>> _get_text_encoding(tf)
107 'utf-16'
109 >>> with open(tf, "wb") as out:
110 ... out.write(b'\xaa\xf3')
111 2
112 >>> _get_text_encoding(tf)
113 'utf-8-sig'
115 >>> osremove(tf)
116 """
117 with open(filename, "rb") as f:
118 header = f.read(4) # Read just the first four bytes.
119 for boms, encoding in __ENCODINGS:
120 for bom in boms:
121 if header.find(bom) == 0:
122 return encoding
123 return UTF8
126class Path(str):
127 """
128 An immutable representation of a canonical path.
130 All instances of this class identify a fully-qualified path which does not
131 contain any relative parts (`"."` or `".."`), is fully expanded, and, if
132 the file system is case-insensitive, has the case normalized. A path is
133 also an instance of `str`, so it can be used wherever strings are required
134 and functions can be designed to accept `str` and receive `Path` instances
135 instead.
137 >>> try:
138 ... Path(1)
139 ... except TypeError as te:
140 ... print(te)
141 descriptor '__len__' requires a 'str' object but received a 'int'
143 >>> try:
144 ... Path(None)
145 ... except TypeError as te:
146 ... print(te)
147 descriptor '__len__' requires a 'str' object but received a 'NoneType'
149 >>> try:
150 ... Path("")
151 ... except ValueError as ve:
152 ... print(ve)
153 Path must not be empty.
155 >>> try:
156 ... Path(" ")
157 ... except ValueError as ve:
158 ... print(ve)
159 Path must not start or end with white space, but ' ' does.
161 >>> from os.path import dirname
162 >>> Path(dirname(realpath(__file__)) + '/..') == \
163dirname(dirname(realpath(__file__)))
164 True
166 >>> Path(dirname(realpath(__file__)) + "/.") == \
167dirname(realpath(__file__))
168 True
170 >>> Path(__file__) == realpath(__file__)
171 True
173 >>> from os import getcwd
174 >>> Path(".") == realpath(getcwd())
175 True
177 >>> from os import getcwd
178 >>> Path("..") == dirname(realpath(getcwd()))
179 True
181 >>> from os import getcwd
182 >>> Path("../.") == dirname(realpath(getcwd()))
183 True
185 >>> from os import getcwd
186 >>> Path("../1.txt") == \
187join(dirname(realpath(getcwd())), "1.txt")
188 True
190 >>> from os import getcwd
191 >>> Path("./1.txt") == join(realpath(getcwd()), "1.txt")
192 True
194 >>> from os.path import isabs
195 >>> isabs(Path(".."))
196 True
197 """
199 # see https://docs.astral.sh/ruff/rules/no-slots-in-str-subclass/
200 __slots__ = ()
202 def __new__(cls, value: Any): # noqa
203 """
204 Construct the path object by normalizing the path string.
206 :param value: the string value
207 :raises TypeError: if `value` is not a string
208 :raises ValueError: if `value` is not a proper path
210 >>> isinstance(Path("."), Path)
211 True
212 >>> isinstance(Path("."), str)
213 True
214 >>> isinstance(Path(".")[-2:], Path)
215 False
216 >>> isinstance(Path(".")[-2:], str)
217 True
218 >>> isinstance(Path(__file__).strip(), Path)
219 False
221 >>> isinstance(__file__, Path)
222 False
223 >>> isinstance(Path(__file__), Path)
224 True
225 >>> p = Path(__file__)
226 >>> Path(p) is p
227 True
229 >>> try:
230 ... Path(None)
231 ... except TypeError as te:
232 ... print(te)
233 descriptor '__len__' requires a 'str' object but received a 'NoneType'
235 >>> try:
236 ... Path(1)
237 ... except TypeError as te:
238 ... print(te)
239 descriptor '__len__' requires a 'str' object but received a 'int'
241 >>> try:
242 ... Path("")
243 ... except ValueError as ve:
244 ... print(ve)
245 Path must not be empty.
246 """
247 if isinstance(value, Path):
248 return cast("Path", value)
250 if str.__len__(value) <= 0:
251 raise ValueError("Path must not be empty.")
252 if str.strip(value) != value:
253 raise ValueError("Path must not start or end with white space, "
254 f"but {value!r} does.")
255 value = normcase(abspath(realpath(expanduser(expandvars(value)))))
256 if (str.__len__(value) <= 0) or (value in {".", ".."}): # impossible!
257 raise ValueError(f"Canonicalization cannot yield {value!r}.")
259 return super().__new__(cls, value)
261 def exists(self) -> bool:
262 """
263 Check if this path identifies an existing file or directory.
265 See also :meth:`~Path.is_file` and :meth:`~Path.is_dir`.
267 :returns: `True` if this path identifies an existing file, `False`
268 otherwise.
270 >>> Path(__file__).exists()
271 True
272 >>> from os.path import dirname
273 >>> Path(dirname(__file__)).exists()
274 True
275 >>> from tempfile import mkstemp
276 >>> from os import close as osxclose
277 >>> from os import remove as osremove
278 >>> (h, tf) = mkstemp()
279 >>> osxclose(h)
280 >>> p = Path(tf)
281 >>> p.exists()
282 True
283 >>> osremove(p)
284 >>> p.exists()
285 False
286 """
287 return osexists(self)
289 def is_file(self) -> bool:
290 """
291 Check if this path identifies an existing file.
293 See also :meth:`~enforce_file`, which raises an error if the `is_file`
294 is not `True`.
296 :returns: `True` if this path identifies an existing file, `False`
297 otherwise.
299 >>> Path(__file__).is_file()
300 True
301 >>> from os.path import dirname
302 >>> Path(dirname(__file__)).is_file()
303 False
304 """
305 return isfile(self)
307 def enforce_file(self) -> None:
308 """
309 Raise an error if the path does not reference an existing file.
311 This function uses :meth:`is_file` internally and raises a
312 `ValueError` if it returns `False`. It is therefore a shorthand
313 for situations where you want to have an error if a path does
314 not identify a file.
316 :raises ValueError: if this path does not reference an existing file
318 >>> Path(__file__).enforce_file() # nothing happens
319 >>> from os import getcwd
320 >>> try:
321 ... Path(getcwd()).enforce_file()
322 ... except ValueError as ve:
323 ... print(str(ve)[-25:])
324 does not identify a file.
325 """
326 if not self.is_file():
327 raise ValueError(f"Path {self!r} does not identify a file.")
329 def is_dir(self) -> bool:
330 """
331 Check if this path identifies an existing directory.
333 The method :meth:`~enforce_dir` also checks this, but raises an
334 exception if it is not `True`.
336 :returns: `True` if this path identifies an existing directory,
337 `False` otherwise.
339 >>> Path(__file__).is_dir()
340 False
341 >>> from os.path import dirname
342 >>> Path(dirname(__file__)).is_dir()
343 True
344 """
345 return isdir(self)
347 def enforce_dir(self) -> None:
348 """
349 Raise an error if the path does not reference an existing directory.
351 This function uses :meth:`is_dir` internally and raises a
352 `ValueError` if it returns `False`. It is therefore a shorthand
353 for situations where you want to have an error if a path does
354 not identify a directory.
356 :raises ValueError: if this path does not reference an existing
357 directory
359 >>> try:
360 ... Path(__file__).enforce_dir()
361 ... except ValueError as ve:
362 ... print(str(ve)[-30:])
363 does not identify a directory.
365 >>> from os import getcwd
366 >>> Path(getcwd()).enforce_dir() # nothing happens
367 """
368 if not self.is_dir():
369 raise ValueError(f"Path {self!r} does not identify a directory.")
371 def contains(self, other: str) -> bool:
372 """
373 Check whether this path is a directory and contains another path.
375 A file can never contain anything else. A directory contains itself as
376 well as any sub-directories, i.e., `a/b/` contains `a/b/` and `a/b/c`.
377 The function :meth:`~enforce_contains` throws an exception if the
378 path does not contain `other`.
380 :param other: the other path
381 :returns: `True` is this path contains the other path, `False` of not
383 >>> from os.path import dirname
384 >>> Path(dirname(__file__)).contains(__file__)
385 True
386 >>> Path(__file__).contains(__file__)
387 False
388 >>> Path(dirname(__file__)).contains(dirname(__file__))
389 True
390 >>> Path(__file__).contains(dirname(__file__))
391 False
392 >>> Path(join(dirname(__file__), "a")).contains(\
393join(dirname(__file__), "b"))
394 False
396 >>> try:
397 ... Path(dirname(__file__)).contains(1)
398 ... except TypeError as te:
399 ... print(te)
400 descriptor '__len__' requires a 'str' object but received a 'int'
402 >>> try:
403 ... Path(dirname(__file__)).contains(None)
404 ... except TypeError as te:
405 ... print(te)
406 descriptor '__len__' requires a 'str' object but received a 'NoneType'
408 >>> try:
409 ... Path(dirname(__file__)).contains("")
410 ... except ValueError as ve:
411 ... print(ve)
412 Path must not be empty.
413 """
414 return self.is_dir() and (
415 commonpath([self]) == commonpath([self, Path(other)]))
417 def enforce_contains(self, other: str) -> None:
418 """
419 Raise an exception if this is not a directory containing another path.
421 The method :meth:`contains` checks whether this path is a directory
422 and contains the other path and returns the result of this check as a
423 `bool`. This function here raises an exception if that check fails.
425 :param other: the other path
426 :raises ValueError: if `other` is not a sub-path of this path.
428 >>> try:
429 ... Path(__file__).enforce_contains(__file__)
430 ... except ValueError as ve:
431 ... print(str(ve)[-25:])
432 not identify a directory.
434 >>> from os.path import dirname
435 >>> Path(dirname(__file__)).enforce_contains(__file__) # nothing
436 >>> try:
437 ... Path(join(dirname(__file__), "a")).enforce_contains(\
438Path(join(dirname(__file__), "b")))
439 ... except ValueError as ve:
440 ... print(str(ve)[-25:])
441 not identify a directory.
443 >>> Path(dirname(__file__)).enforce_contains(Path(join(dirname(\
444__file__), "b"))) # nothing happens
445 >>> try:
446 ... Path(dirname(__file__)).enforce_contains(dirname(\
447dirname(__file__)))
448 ... except ValueError as ve:
449 ... print(str(ve)[:4])
450 ... print("does not contain" in str(ve))
451 Path
452 True
453 """
454 self.enforce_dir()
455 if not self.contains(other):
456 raise ValueError(f"Path {self!r} does not contain {other!r}.")
458 def resolve_inside(self, relative_path: str) -> "Path":
459 """
460 Resolve a relative path to an absolute path inside this path.
462 Resolve the relative path inside this path. This path must identify
463 a directory. The relative path cannot contain anything that makes it
464 leave the directory, e.g., any `".."`. The paths are joined and then
465 it is enforced that this path must contain the result via
466 :meth:`enforce_contains` and otherwise an error is raised.
468 :param relative_path: the path to resolve
469 :returns: the resolved child path
470 :raises TypeError: If the `relative_path` is not a string.
471 :raises ValueError: If the `relative_path` would resolve to something
472 outside of this path and/or if it is empty.
474 >>> from os.path import dirname
475 >>> Path(dirname(__file__)).resolve_inside("a.txt")[-5:]
476 'a.txt'
478 >>> from os.path import basename
479 >>> Path(dirname(__file__)).resolve_inside(basename(__file__)) \
480== Path(__file__)
481 True
483 >>> try:
484 ... Path(dirname(__file__)).resolve_inside("..")
485 ... except ValueError as ve:
486 ... print("does not contain" in str(ve))
487 True
489 >>> try:
490 ... Path(__file__).resolve_inside("..")
491 ... except ValueError as ve:
492 ... print("does not identify a directory" in str(ve))
493 True
495 >>> try:
496 ... Path(dirname(__file__)).resolve_inside(None)
497 ... except TypeError as te:
498 ... print(te)
499 descriptor '__len__' requires a 'str' object but received a 'NoneType'
501 >>> try:
502 ... Path(dirname(__file__)).resolve_inside(2)
503 ... except TypeError as te:
504 ... print(te)
505 descriptor '__len__' requires a 'str' object but received a 'int'
507 >>> try:
508 ... Path(__file__).resolve_inside("")
509 ... except ValueError as ve:
510 ... print(ve)
511 Relative path must not be empty.
513 >>> try:
514 ... Path(__file__).resolve_inside(" ")
515 ... except ValueError as ve:
516 ... print(ve)
517 Relative path must not start or end with white space, but ' ' does.
518 """
519 if str.__len__(relative_path) == 0:
520 raise ValueError("Relative path must not be empty.")
521 if str.strip(relative_path) != relative_path:
522 raise ValueError("Relative path must not start or end with white "
523 f"space, but {relative_path!r} does.")
524 opath: Final[Path] = Path(join(self, relative_path))
525 self.enforce_contains(opath)
526 return opath
528 def ensure_file_exists(self) -> bool:
529 """
530 Atomically ensure that the file exists and create it otherwise.
532 While :meth:`is_file` checks if the path identifies an existing file
533 and :meth:`enforce_file` raises an error if it does not, this method
534 here creates the file if it does not exist. The method can only create
535 the file if the directory already exists.
537 :returns: `True` if the file already existed and
538 `False` if it was newly and atomically created.
539 :raises: ValueError if anything goes wrong during the file creation
541 >>> print(Path(__file__).ensure_file_exists())
542 True
544 >>> from os.path import dirname
545 >>> try:
546 ... Path(dirname(__file__)).ensure_file_exists()
547 ... print("??")
548 ... except ValueError as ve:
549 ... print("does not identify a file." in str(ve))
550 True
552 >>> try:
553 ... Path(join(join(dirname(__file__), "a"), "b"))\
554.ensure_file_exists()
555 ... print("??")
556 ... except ValueError as ve:
557 ... print("Error when trying to create file" in str(ve))
558 True
559 """
560 existed: bool = False
561 try:
562 osclose(osopen(self, O_CREAT | O_EXCL))
563 except FileExistsError:
564 existed = True
565 except Exception as err:
566 raise ValueError(
567 f"Error when trying to create file {self!r}.") from err
568 self.enforce_file()
569 return existed
571 def create_file_or_truncate(self) -> None:
572 """
573 Create the file identified by this path and truncate it if it exists.
575 :raises: ValueError if anything goes wrong during the file creation
577 >>> from tempfile import mkstemp
578 >>> from os import close as osxclose
579 >>> from os import remove as osremove
580 >>> (h, tf) = mkstemp()
581 >>> osxclose(h)
583 >>> pth = Path(tf)
584 >>> pth.write_all_str("test")
585 >>> print(pth.read_all_str())
586 test
587 <BLANKLINE>
589 >>> pth.create_file_or_truncate()
590 >>> pth.is_file()
591 True
593 >>> try:
594 ... pth.read_all_str()
595 ... except ValueError as ve:
596 ... print(str(ve)[-17:])
597 contains no text.
599 >>> osremove(pth)
600 >>> pth.is_file()
601 False
603 >>> pth.create_file_or_truncate()
604 >>> pth.is_file()
605 True
607 >>> osremove(pth)
609 >>> from os import makedirs as osmkdir
610 >>> from os import rmdir as osrmdir
611 >>> osmkdir(pth)
613 >>> try:
614 ... pth.create_file_or_truncate()
615 ... except ValueError as ve:
616 ... print(str(ve)[:35])
617 Error when truncating/creating file
619 >>> osrmdir(pth)
620 """
621 try:
622 osclose(osopen(self, O_CREAT | O_TRUNC))
623 except BaseException as err: # noqa
624 raise ValueError(
625 f"Error when truncating/creating file {self!r}.") from err
626 self.enforce_file()
628 def ensure_dir_exists(self) -> None:
629 """
630 Make sure that the directory exists, create it otherwise.
632 Method :meth:`is_dir` checks whether the path identifies an
633 existing directory, method :meth:`enforce_dir` raises an error if not,
634 and this method creates the directory if it does not exist.
636 :raises ValueError: if the directory did not exist and creation failed
638 >>> from os.path import dirname
639 >>> Path(dirname(__file__)).ensure_dir_exists() # nothing happens
641 >>> try:
642 ... Path(__file__).ensure_dir_exists()
643 ... except ValueError as ve:
644 ... print("does not identify a directory" in str(ve))
645 True
647 >>> try:
648 ... Path(join(__file__, "a")).ensure_dir_exists()
649 ... except ValueError as ve:
650 ... print("Error when trying to create directory" in str(ve))
651 True
653 >>> from tempfile import mkdtemp
654 >>> from os import rmdir as osrmdirx
655 >>> td = mkdtemp()
656 >>> Path(td).ensure_dir_exists()
657 >>> osrmdirx(td)
658 >>> Path(td).ensure_dir_exists()
659 >>> p = Path(td).resolve_inside("a")
660 >>> p.ensure_dir_exists()
661 >>> p2 = p.resolve_inside("b")
662 >>> p2.ensure_dir_exists()
663 >>> osrmdirx(p2)
664 >>> osrmdirx(p)
665 >>> osrmdirx(td)
666 >>> p2.ensure_dir_exists()
667 >>> osrmdirx(p2)
668 >>> osrmdirx(p)
669 >>> osrmdirx(td)
670 """
671 try:
672 makedirs(name=self, exist_ok=True)
673 except FileExistsError:
674 pass
675 except Exception as err:
676 raise ValueError(
677 f"Error when trying to create directory {self!r}.") from err
678 self.enforce_dir()
680 def ensure_parent_dir_exists(self) -> "Path":
681 """
682 Make sure that the parent directory exists, create it otherwise.
684 This path may identify a file or directory to be created that does not
685 yet exist. The parent directory of this path is ensured to exist,
686 i.e., if it already exists, nothing happens, but if it does not yet
687 exist, it is created. If the parent directory cannot be created, a
688 :class:`ValueError` is raised.
690 :returns: the parent dir
691 :raises ValueError: if the directory did not exist and creation failed
693 >>> from os.path import dirname
694 >>> _ = Path(__file__).ensure_parent_dir_exists() # nothing happens
696 >>> try:
697 ... _ = Path(join(__file__, "a")).ensure_parent_dir_exists()
698 ... except ValueError as ve:
699 ... print("does not identify a directory" in str(ve))
700 True
702 >>> from tempfile import mkdtemp
703 >>> from os import rmdir as osrmdirx
704 >>> td = mkdtemp()
705 >>> tf = Path(join(td, "xxx"))
706 >>> _ = tf.ensure_parent_dir_exists()
707 >>> osrmdirx(td)
708 >>> isdir(dirname(tf))
709 False
710 >>> _ = tf.ensure_parent_dir_exists()
711 >>> isdir(dirname(tf))
712 True
713 >>> osrmdirx(td)
715 >>> td = mkdtemp()
716 >>> isdir(td)
717 True
718 >>> td2 = join(td, "xxx")
719 >>> isdir(td2)
720 False
721 >>> tf = join(td2, "xxx")
722 >>> _ = Path(tf).ensure_parent_dir_exists()
723 >>> isdir(td2)
724 True
725 >>> osrmdirx(td2)
726 >>> osrmdirx(td)
728 >>> td = mkdtemp()
729 >>> isdir(td)
730 True
731 >>> td2 = join(td, "xxx")
732 >>> isdir(td2)
733 False
734 >>> td3 = join(td2, "xxx")
735 >>> isdir(td3)
736 False
737 >>> tf = join(td3, "xxx")
738 >>> _ = Path(tf).ensure_parent_dir_exists()
739 >>> isdir(td3)
740 True
741 >>> isdir(td2)
742 True
743 >>> osrmdirx(td3)
744 >>> osrmdirx(td2)
745 >>> osrmdirx(td)
746 """
747 pd: Final[Path] = Path(dirname(self))
748 Path.ensure_dir_exists(pd)
749 return pd
751 def open_for_read(self) -> TextIOBase:
752 r"""
753 Open this file for reading text.
755 The resulting text stream will automatically use the right encoding
756 and take any encoding error serious. If the path does not identify an
757 existing file, an exception is thrown.
759 :returns: the file open for reading
760 :raises ValueError: if the path does not identify a file
762 >>> with Path(__file__).open_for_read() as rd:
763 ... print(f"{len(rd.readline())}")
764 ... print(f"{rd.readline()!r}")
765 4
766 'The class `Path` for handling paths to files and directories.\n'
768 >>> from os.path import dirname
769 >>> try:
770 ... with Path(dirname(__file__)).open_for_read():
771 ... pass
772 ... except ValueError as ve:
773 ... print(str(ve)[-25:])
774 does not identify a file.
775 """
776 self.enforce_file()
777 return cast("TextIOBase", open( # noqa: SIM115
778 self, encoding=_get_text_encoding(self), errors="strict"))
780 def read_all_str(self) -> str:
781 r"""
782 Read a file as a single string.
784 Read the complete contents of a file as a single string. If the file
785 is empty, an exception will be raised. No modification is applied to
786 the text that is read.
788 :returns: the single string of text
789 :raises ValueError: if the path does not identify a file or if the
790 file it identifies is empty
792 >>> Path(__file__).read_all_str()[4:30]
793 'The class `Path` for handl'
795 >>> from os.path import dirname
796 >>> try:
797 ... Path(dirname(__file__)).read_all_str()
798 ... except ValueError as ve:
799 ... print(str(ve)[-25:])
800 does not identify a file.
802 >>> from tempfile import mkstemp
803 >>> from os import remove as osremovex
804 >>> h, p = mkstemp(text=True)
805 >>> osclose(h)
806 >>> try:
807 ... Path(p).read_all_str()
808 ... except ValueError as ve:
809 ... print(str(ve)[-19:])
810 ' contains no text.
812 >>> with open(p, "wt") as tx:
813 ... tx.write("aa\n")
814 ... tx.write(" bb ")
815 3
816 6
817 >>> Path(p).read_all_str()
818 'aa\n bb '
819 >>> osremovex(p)
820 """
821 with self.open_for_read() as reader:
822 res: Final[str] = reader.read()
823 if str.__len__(res) <= 0:
824 raise ValueError(f"File {self!r} contains no text.")
825 return res
827 def open_for_write(self) -> TextIOBase:
828 """
829 Open the file for writing UTF-8 encoded text.
831 If the path cannot be opened for writing, some error will be raised.
833 :returns: the text io wrapper for writing
834 :raises ValueError: if the path does not identify a file or such a
835 file cannot be created
837 >>> from tempfile import mkstemp
838 >>> from os import remove as osremovex
839 >>> h, p = mkstemp(text=True)
840 >>> osclose(h)
841 >>> with Path(p).open_for_write() as wd:
842 ... wd.write("1234")
843 4
844 >>> Path(p).read_all_str()
845 '1234'
846 >>> osremovex(p)
848 >>> from os.path import dirname
849 >>> try:
850 ... with Path(dirname(__file__)).open_for_write() as wd:
851 ... pass
852 ... except ValueError as ve:
853 ... print("does not identify a file." in str(ve))
854 True
855 """
856 self.ensure_file_exists()
857 return cast("TextIOBase", open( # noqa: SIM115
858 self, mode="w", encoding="utf-8", errors="strict"))
860 def write_all_str(self, contents: str) -> None:
861 r"""
862 Write the given string to the file.
864 The string `contents` is written to a file. If it does not end
865 with `\n`, then `\n` will automatically be appended. No other changes
866 are applied to `contents`. `contents` must be a `str` and it must not
867 be empty.
869 :param contents: the contents to write
870 :raises TypeError: if the contents are not a string or an `Iterable`
871 of strings
872 :raises ValueError: if the path is not a file or it cannot be opened
873 as a file or the `contents` are an empty string
875 >>> from tempfile import mkstemp
876 >>> from os import remove as osremovex
877 >>> h, p = mkstemp(text=True)
878 >>> osclose(h)
880 >>> try:
881 ... Path(p).write_all_str(None)
882 ... except TypeError as te:
883 ... print(str(te))
884 descriptor '__len__' requires a 'str' object but received a 'NoneType'
886 >>> try:
887 ... Path(p).write_all_str(["a"])
888 ... except TypeError as te:
889 ... print(str(te))
890 descriptor '__len__' requires a 'str' object but received a 'list'
892 >>> Path(p).write_all_str("\na\nb")
893 >>> Path(p).read_all_str()
894 '\na\nb\n'
896 >>> Path(p).write_all_str(" \na\n b ")
897 >>> Path(p).read_all_str()
898 ' \na\n b \n'
900 >>> try:
901 ... Path(p).write_all_str("")
902 ... except ValueError as ve:
903 ... print(str(ve)[:34])
904 Cannot write empty content to file
906 >>> osremovex(p)
907 >>> from os.path import dirname
908 >>> try:
909 ... Path(dirname(__file__)).write_all_str("a")
910 ... except ValueError as ve:
911 ... print("does not identify a file." in str(ve))
912 True
913 """
914 if str.__len__(contents) <= 0:
915 raise ValueError(f"Cannot write empty content to file {self!r}.")
916 with self.open_for_write() as writer:
917 writer.write(contents)
918 if contents[-1] != "\n":
919 writer.write("\n")
921 def relative_to(self, base_path: str) -> str:
922 """
923 Compute a relative path of this path towards the given base path.
925 :param base_path: the string
926 :returns: a relative path
927 :raises ValueError: if this path is not inside `base_path` or the
928 relativization result is otherwise invalid
930 >>> from os.path import dirname
931 >>> f = file_path(__file__)
932 >>> d1 = directory_path(dirname(f))
933 >>> d2 = directory_path(dirname(d1))
934 >>> d3 = directory_path(dirname(d2))
935 >>> f.relative_to(d1)
936 'path.py'
937 >>> f.relative_to(d2)
938 'io/path.py'
939 >>> f.relative_to(d3)
940 'pycommons/io/path.py'
941 >>> d1.relative_to(d3)
942 'pycommons/io'
943 >>> d1.relative_to(d1)
944 '.'
946 >>> try:
947 ... d1.relative_to(f)
948 ... except ValueError as ve:
949 ... print(str(ve)[-30:])
950 does not identify a directory.
952 >>> try:
953 ... d2.relative_to(d1)
954 ... except ValueError as ve:
955 ... print(str(ve)[-21:])
956 pycommons/pycommons'.
957 """
958 opath: Final[Path] = Path(base_path)
959 opath.enforce_contains(self)
960 rv: Final[str] = relpath(self, opath)
961 if (str.__len__(rv) == 0) or (str.strip(rv) is not rv):
962 raise ValueError( # close to impossible
963 f"Invalid relative path {rv!r} resulting from relativizing "
964 f"{self!r} to {base_path!r}={opath!r}.")
965 return rv
967 def up(self, levels: int = 1) -> "Path":
968 """
969 Go up the directory tree for a given number of times.
971 Get a `Path` identifying the containing directory, or its containing
972 directory, depending on the number of `levels` specified.
974 :param levels: the number levels to go up: `1` for getting the
975 directly containing directory, `2` for the next higher directory,
976 and so on.
977 :returns: the resulting path
979 >>> f = file_path(__file__)
980 >>> print(f.up()[-13:])
981 /pycommons/io
982 >>> print(f.up(1)[-13:])
983 /pycommons/io
984 >>> print(f.up(2)[-10:])
985 /pycommons
987 >>> try:
988 ... f.up(0)
989 ... except ValueError as ve:
990 ... print(ve)
991 levels=0 is invalid, must be in 1..255.
993 >>> try:
994 ... f.up(None)
995 ... except TypeError as te:
996 ... print(te)
997 levels should be an instance of int but is None.
999 >>> try:
1000 ... f.up('x')
1001 ... except TypeError as te:
1002 ... print(te)
1003 levels should be an instance of int but is str, namely 'x'.
1005 >>> try:
1006 ... f.up(255)
1007 ... except ValueError as ve:
1008 ... print(str(ve)[:70])
1009 Cannot go up from directory '/' anymore when going up for 255 levels f
1010 """
1011 s: str = self
1012 for _ in range(check_int_range(levels, "levels", 1, 255)):
1013 old: str = s
1014 s = dirname(s)
1015 if (str.__len__(s) == 0) or (s == old):
1016 raise ValueError(
1017 f"Cannot go up from directory {old!r} anymore when going "
1018 f"up for {levels} levels from {self!r}.")
1019 return directory_path(s)
1021 def basename(self) -> str:
1022 """
1023 Get the name of the file or directory identified by this path.
1025 :returns: the name of the file or directory
1027 >>> file_path(__file__).basename()
1028 'path.py'
1029 >>> file_path(__file__).up(2).basename()
1030 'pycommons'
1032 >>> try:
1033 ... Path("/").basename()
1034 ... except ValueError as ve:
1035 ... print(ve)
1036 Invalid basename '' of path '/'.
1037 """
1038 s: Final[str] = osbasename(self)
1039 if str.__len__(s) <= 0:
1040 raise ValueError(f"Invalid basename {s!r} of path {self!r}.")
1041 return s
1043 def list_dir(self, files: bool = True,
1044 directories: bool = True) -> Iterator["Path"]:
1045 """
1046 List the files and/or sub-directories in this directory.
1048 :returns: an iterable with the fully-qualified paths
1050 >>> from tempfile import mkstemp, mkdtemp
1051 >>> from os import close as osxclose
1053 >>> dir1 = Path(mkdtemp())
1054 >>> dir2 = Path(mkdtemp(dir=dir1))
1055 >>> dir3 = Path(mkdtemp(dir=dir1))
1056 >>> (h, tf1) = mkstemp(dir=dir1)
1057 >>> osclose(h)
1058 >>> (h, tf2) = mkstemp(dir=dir1)
1059 >>> osclose(h)
1060 >>> file1 = Path(tf1)
1061 >>> file2 = Path(tf2)
1063 >>> set(dir1.list_dir()) == {dir2, dir3, file1, file2}
1064 True
1066 >>> set(dir1.list_dir(files=False)) == {dir2, dir3}
1067 True
1069 >>> set(dir1.list_dir(directories=False)) == {file1, file2}
1070 True
1072 >>> try:
1073 ... dir1.list_dir(None)
1074 ... except TypeError as te:
1075 ... print(te)
1076 files should be an instance of bool but is None.
1078 >>> try:
1079 ... dir1.list_dir(1)
1080 ... except TypeError as te:
1081 ... print(te)
1082 files should be an instance of bool but is int, namely 1.
1084 >>> try:
1085 ... dir1.list_dir(True, None)
1086 ... except TypeError as te:
1087 ... print(te)
1088 directories should be an instance of bool but is None.
1090 >>> try:
1091 ... dir1.list_dir(True, 1)
1092 ... except TypeError as te:
1093 ... print(te)
1094 directories should be an instance of bool but is int, namely 1.
1096 >>> try:
1097 ... dir1.list_dir(False, False)
1098 ... except ValueError as ve:
1099 ... print(ve)
1100 files and directories cannot both be False.
1102 >>> delete_path(dir1)
1103 """
1104 if not isinstance(files, bool):
1105 raise type_error(files, "files", bool)
1106 if not isinstance(directories, bool):
1107 raise type_error(directories, "directories", bool)
1108 if not (files or directories):
1109 raise ValueError("files and directories cannot both be False.")
1110 self.enforce_dir()
1111 return map(self.resolve_inside, (
1112 f.name for f in scandir(self) if (
1113 directories and f.is_dir(follow_symlinks=False)) or (
1114 files and f.is_file(follow_symlinks=False))))
1117def file_path(pathstr: str) -> "Path":
1118 """
1119 Get a path identifying an existing file.
1121 This is a shorthand for creating a :class:`~Path` and then invoking
1122 :meth:`~Path.enforce_file`.
1124 :param pathstr: the path
1125 :returns: the file
1127 >>> file_path(__file__)[-20:]
1128 'pycommons/io/path.py'
1130 >>> from os.path import dirname
1131 >>> try:
1132 ... file_path(dirname(__file__))
1133 ... except ValueError as ve:
1134 ... print("does not identify a file." in str(ve))
1135 True
1136 """
1137 fi: Final[Path] = Path(pathstr)
1138 fi.enforce_file()
1139 return fi
1142def directory_path(pathstr: str) -> "Path":
1143 """
1144 Get a path identifying an existing directory.
1146 This is a shorthand for creating a :class:`~Path` and then invoking
1147 :meth:`~Path.enforce_dir`.
1149 :param pathstr: the path
1150 :returns: the file
1152 >>> from os.path import dirname
1153 >>> directory_path(dirname(__file__))[-12:]
1154 'pycommons/io'
1156 >>> try:
1157 ... directory_path(__file__)
1158 ... except ValueError as ve:
1159 ... print("does not identify a directory." in str(ve))
1160 True
1161 """
1162 fi: Final[Path] = Path(pathstr)
1163 fi.enforce_dir()
1164 return fi
1167#: the ends-with check
1168__ENDSWITH: Final[Callable[[str, str], bool]] = cast(
1169 "Callable[[str, str], bool]", str.endswith)
1172def line_writer(output: TextIO | TextIOBase) -> Callable[[str], None]:
1173 r"""
1174 Create a line-writing :class:`typing.Callable` from an output stream.
1176 This function takes any string passed to it and writes it to the
1177 :class:`typing.TextIO` instance. If the string does not end in `"\n"`,
1178 it then writes `"\n"` as well to terminate the line. If something that
1179 is not a :class:`str` is passed in, it will throw a :class:`TypeError`.
1181 Notice that :meth:`~io.TextIOBase.write` and
1182 :meth:`~io.IOBase.writelines` of class :class:`io.TextIOBase` do not
1183 terminate lines that are written
1184 with a `"\n"`. This means that, unless you manually make sure that all
1185 lines are terminated by `"\n"`, they get written as a single line instead
1186 of multiple lines. To solve this issue conveniently, we provide the
1187 functions :func:`line_writer`, which wraps the
1188 :meth:`~io.TextIOBase.write` into another function, which automatically
1189 terminates all strings passed to it with `"\n"` unless they already end in
1190 `"\n"`, and :func:`write_lines`, which iterates over a sequence of strings
1191 and writes each of them to a given :class:`typing.TextIO` and automatically
1192 adds the `"\n"` terminator to each of them if necessary.
1194 :param output: the output stream
1195 :returns: an instance of :class:`typing.Callable` that will write each
1196 string it receives as a properly terminated line to the output
1197 stream.
1198 :raises TypeError: if `output` is not an instance of
1199 :class:`io.TextIOBase`.
1201 >>> from tempfile import mkstemp
1202 >>> from os import close as osclose
1203 >>> from os import remove as osremove
1204 >>> (h, tf) = mkstemp()
1205 >>> osclose(h)
1207 >>> with open(tf, "wt") as out:
1208 ... w = line_writer(out)
1209 ... w("123")
1210 >>> with open(tf, "rt") as inp:
1211 ... print(list(inp))
1212 ['123\n']
1214 >>> with open(tf, "wt") as out:
1215 ... w = line_writer(out)
1216 ... w("")
1217 >>> with open(tf, "rt") as inp:
1218 ... print(list(inp))
1219 ['\n']
1221 >>> with open(tf, "wt") as out:
1222 ... w = line_writer(out)
1223 ... w("123\n")
1224 >>> with open(tf, "rt") as inp:
1225 ... print(list(inp))
1226 ['123\n']
1228 >>> with open(tf, "wt") as out:
1229 ... w = line_writer(out)
1230 ... w("\n")
1231 >>> with open(tf, "rt") as inp:
1232 ... print(list(inp))
1233 ['\n']
1235 >>> with open(tf, "wt") as out:
1236 ... w = line_writer(out)
1237 ... w("123")
1238 ... w("456")
1239 >>> with open(tf, "rt") as inp:
1240 ... print(list(inp))
1241 ['123\n', '456\n']
1243 >>> with open(tf, "wt") as out:
1244 ... w = line_writer(out)
1245 ... w("123 ")
1246 ... w("")
1247 ... w(" 456")
1248 >>> with open(tf, "rt") as inp:
1249 ... print(list(inp))
1250 ['123 \n', '\n', ' 456\n']
1252 >>> with open(tf, "wt") as out:
1253 ... w = line_writer(out)
1254 ... w("123 \n")
1255 ... w("\n")
1256 ... w(" 456")
1257 >>> with open(tf, "rt") as inp:
1258 ... print(list(inp))
1259 ['123 \n', '\n', ' 456\n']
1261 >>> try:
1262 ... with open(tf, "wt") as out:
1263 ... w = line_writer(out)
1264 ... w("123 ")
1265 ... w(None)
1266 ... except TypeError as te:
1267 ... print(str(te)[:-10])
1268 descriptor 'endswith' for 'str' objects doesn't apply to a 'NoneTy
1270 >>> try:
1271 ... with open(tf, "wt") as out:
1272 ... w = line_writer(out)
1273 ... w("123 ")
1274 ... w(2)
1275 ... except TypeError as te:
1276 ... print(te)
1277 descriptor 'endswith' for 'str' objects doesn't apply to a 'int' object
1279 >>> osremove(tf)
1281 >>> try:
1282 ... line_writer(1)
1283 ... except TypeError as te:
1284 ... print(te)
1285 output should be an instance of io.TextIOBase but is int, namely 1.
1287 >>> try:
1288 ... line_writer(None)
1289 ... except TypeError as te:
1290 ... print(te)
1291 output should be an instance of io.TextIOBase but is None.
1292 """
1293 if not isinstance(output, TextIOBase):
1294 raise type_error(output, "output", TextIOBase)
1296 def __call(s: str, __w: Callable[[str], Any] = output.write) -> None:
1297 b: Final[bool] = __ENDSWITH(s, "\n")
1298 __w(s)
1299 if not b:
1300 __w("\n")
1302 return cast("Callable[[str], None]", __call)
1305def __line_iterator(lines: Iterable[str]) -> Generator[str, None, None]:
1306 r"""
1307 Iterate over the given lines, adding newlines where needed.
1309 :param lines: the lines
1310 :returns: the generator
1312 >>> list(__line_iterator([]))
1313 []
1315 >>> list(__line_iterator(['a']))
1316 ['a', '\n']
1318 >>> list(__line_iterator(['a', 'b']))
1319 ['a', '\n', 'b', '\n']
1321 >>> list(__line_iterator(['a\n']))
1322 ['a\n']
1324 >>> list(__line_iterator(['a\n', 'b']))
1325 ['a\n', 'b', '\n']
1327 >>> list(__line_iterator(['a', 'b\n']))
1328 ['a', '\n', 'b\n']
1330 >>> list(__line_iterator(['a\n', 'b\n']))
1331 ['a\n', 'b\n']
1333 >>> try:
1334 ... list(__line_iterator(["a", 1]))
1335 ... except TypeError as te:
1336 ... print(te)
1337 descriptor 'endswith' for 'str' objects doesn't apply to a 'int' object
1338 """
1339 for line in lines:
1340 b: bool = __ENDSWITH(line, "\n")
1341 yield line
1342 if not b:
1343 yield "\n"
1346def write_lines(lines: Iterable[str], output: TextIO | TextIOBase) -> None:
1347 r"""
1348 Write all the lines in the given :class:`typing.Iterable` to the output.
1350 This function takes care of properly terminating lines using `"\n"` when
1351 writing them to an output and also performs type-checking.
1353 Notice that :meth:`~io.TextIOBase.write` and
1354 :meth:`~io.IOBase.writelines` of class :class:`io.TextIOBase` do not
1355 terminate lines that are written with a `"\n"`. This means that, unless
1356 you manually make sure that all lines are terminated by `"\n"`, they get
1357 written as a single line instead of multiple lines. To solve this issue
1358 conveniently, we provide the functions :func:`line_writer`, which wraps
1359 the :meth:`~io.TextIOBase.write` into another function, which
1360 automatically terminates all strings passed to it with `"\n"` unless they
1361 already end in `"\n"`, and :func:`write_lines`, which iterates over a
1362 sequence of strings and writes each of them to a given
1363 :class:`typing.TextIO` and automatically adds the `"\n"` terminator to
1364 each of them if necessary.
1366 :param lines: the lines
1367 :param output: the output
1368 :raises TypeError: If anything is of the wrong type.
1370 >>> from io import StringIO
1372 >>> with StringIO() as sio:
1373 ... write_lines(("123", "456"), sio)
1374 ... print(sio.getvalue())
1375 123
1376 456
1377 <BLANKLINE>
1379 >>> from io import StringIO
1380 >>> with StringIO() as sio:
1381 ... write_lines(("123\n", "456"), sio)
1382 ... print(sio.getvalue())
1383 123
1384 456
1385 <BLANKLINE>
1387 >>> from io import StringIO
1388 >>> with StringIO() as sio:
1389 ... write_lines(("123\n", "456\n"), sio)
1390 ... print(sio.getvalue())
1391 123
1392 456
1393 <BLANKLINE>
1395 >>> with StringIO() as sio:
1396 ... write_lines(["123"], sio)
1397 ... print(sio.getvalue())
1398 123
1399 <BLANKLINE>
1401 >>> with StringIO() as sio:
1402 ... write_lines(["123\n"], sio)
1403 ... print(sio.getvalue())
1404 123
1405 <BLANKLINE>
1407 >>> with StringIO() as sio:
1408 ... write_lines("123", sio)
1409 ... print(sio.getvalue())
1410 1
1411 2
1412 3
1413 <BLANKLINE>
1415 >>> with StringIO() as sio:
1416 ... write_lines((sss for sss in ["123", "abc"]), sio)
1417 ... print(sio.getvalue())
1418 123
1419 abc
1420 <BLANKLINE>
1422 >>> with StringIO() as sio:
1423 ... write_lines("", sio)
1424 ... print(sio.getvalue())
1425 <BLANKLINE>
1427 >>> from tempfile import mkstemp
1428 >>> from os import close as osclose
1429 >>> from os import remove as osremove
1430 >>> (h, tf) = mkstemp()
1431 >>> osclose(h)
1433 >>> with open(tf, "wt") as out:
1434 ... write_lines(["123"], out)
1435 >>> with open(tf, "rt") as inp:
1436 ... print(list(inp))
1437 ['123\n']
1439 >>> with open(tf, "wt") as out:
1440 ... write_lines([""], out)
1441 >>> with open(tf, "rt") as inp:
1442 ... print(repr(inp.read()))
1443 '\n'
1445 >>> with open(tf, "wt") as out:
1446 ... write_lines(["\n"], out)
1447 >>> with open(tf, "rt") as inp:
1448 ... print(repr(inp.read()))
1449 '\n'
1451 >>> with open(tf, "wt") as out:
1452 ... write_lines([" \n"], out)
1453 >>> with open(tf, "rt") as inp:
1454 ... print(repr(inp.read()))
1455 ' \n'
1457 >>> osremove(tf)
1459 >>> with StringIO() as sio:
1460 ... write_lines(["\n"], sio)
1461 ... print(repr(sio.getvalue()))
1462 '\n'
1464 >>> with StringIO() as sio:
1465 ... write_lines([""], sio)
1466 ... print(repr(sio.getvalue()))
1467 '\n'
1469 >>> sio = StringIO()
1470 >>> try:
1471 ... write_lines(None, sio)
1472 ... except TypeError as te:
1473 ... print(te)
1474 lines should be an instance of typing.Iterable but is None.
1476 >>> sio = StringIO()
1477 >>> try:
1478 ... write_lines(123, sio)
1479 ... except TypeError as te:
1480 ... print(te)
1481 lines should be an instance of typing.Iterable but is int, namely 123.
1483 >>> sio = StringIO()
1484 >>> try:
1485 ... write_lines([1, "sdf"], sio)
1486 ... except TypeError as te:
1487 ... print(te)
1488 descriptor 'endswith' for 'str' objects doesn't apply to a 'int' object
1490 >>> sio = StringIO()
1491 >>> try:
1492 ... write_lines(["sdf", 1], sio)
1493 ... except TypeError as te:
1494 ... print(te)
1495 descriptor 'endswith' for 'str' objects doesn't apply to a 'int' object
1496 >>> print(repr(sio.getvalue()))
1497 'sdf\n'
1499 >>> try:
1500 ... write_lines("x", None)
1501 ... except TypeError as te:
1502 ... print(te)
1503 output should be an instance of io.TextIOBase but is None.
1505 >>> try:
1506 ... write_lines("x", 1)
1507 ... except TypeError as te:
1508 ... print(te)
1509 output should be an instance of io.TextIOBase but is int, namely 1.
1511 >>> try:
1512 ... write_lines(2, 1)
1513 ... except TypeError as te:
1514 ... print(te)
1515 lines should be an instance of typing.Iterable but is int, namely 2.
1516 """
1517 if not isinstance(lines, Iterable):
1518 raise type_error(lines, "lines", Iterable)
1519 if not isinstance(output, TextIOBase):
1520 raise type_error(output, "output", TextIOBase)
1521 output.writelines(__line_iterator(lines))
1524def delete_path(path: str) -> None:
1525 """
1526 Delete a path, completely, and recursively.
1528 This is intentionally inserted as an additional function and not a member
1529 of the :class:`Path` in order make the deletion more explicit and to avoid
1530 any form of accidental deleting. This function will not raise an error if
1531 the file deletion fails.
1533 :param path: The path to be deleted
1534 :raises ValueError: if `path` does not refer to an existing file or
1535 directory
1536 :raises TypeError: if `path` is not a string
1538 >>> from tempfile import mkstemp, mkdtemp
1539 >>> from os import close as osxclose
1541 >>> (h, tf) = mkstemp()
1542 >>> isfile(tf)
1543 True
1544 >>> delete_path(tf)
1545 >>> isfile(tf)
1546 False
1548 >>> try:
1549 ... delete_path(tf)
1550 ... except ValueError as ve:
1551 ... print(str(ve).endswith("is neither file nor directory."))
1552 True
1554 >>> td = mkdtemp()
1555 >>> isdir(td)
1556 True
1557 >>> delete_path(td)
1558 >>> isdir(td)
1559 False
1561 >>> try:
1562 ... delete_path(tf)
1563 ... except ValueError as ve:
1564 ... print(str(ve).endswith("is neither file nor directory."))
1565 True
1566 """
1567 p: Final[Path] = Path(path)
1568 if isfile(p):
1569 osremove(p)
1570 elif isdir(p):
1571 rmtree(p, ignore_errors=True)
1572 else:
1573 raise ValueError(f"{path!r} is neither file nor directory.")