Coverage for bookbuilderpy/url.py: 18%
50 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-17 23:15 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-17 23:15 +0000
1"""Loading of data from urls."""
3from typing import Final, cast
5import certifi
6import urllib3 # type: ignore
8from bookbuilderpy.path import UTF8
9from bookbuilderpy.strings import (
10 enforce_non_empty_str,
11 enforce_non_empty_str_without_ws,
12)
14#: The shared HTTP pool
15__HTTP: Final[urllib3.PoolManager] = urllib3.PoolManager(
16 cert_reqs="CERT_REQUIRED", ca_certs=certifi.where())
19def __name(request: urllib3.HTTPResponse) -> str:
20 """
21 Extract the file name from a request.
23 :param request: the request
24 :return: the file name
25 """
26 content_disp: str = "Content-Disposition"
27 if content_disp in request.headers:
28 content_disp = request.headers[content_disp]
29 i: int = content_disp.find("filename")
30 if i >= 0:
31 i = content_disp.find("=", i + 1)
32 if i > 0:
33 k = content_disp.find('"', i + 1)
34 if k > 0:
35 k += 1
36 j = content_disp.find('"', k)
37 if j > k:
38 return enforce_non_empty_str_without_ws(
39 content_disp[k:j])
40 else:
41 k = content_disp.find("'", i + 1)
42 if k > 0:
43 k += 1
44 j = content_disp.find("'", k)
45 if j > k:
46 return enforce_non_empty_str_without_ws(
47 content_disp[k:j])
48 else:
49 return enforce_non_empty_str_without_ws(
50 content_disp[i + 1:])
51 _url = enforce_non_empty_str(request.geturl())
52 url = _url
53 last = url.rfind("#")
54 if last > 0:
55 url = url[:last]
56 last = url.rfind("?")
57 if last > 0:
58 url = url[:last]
59 first = url.rfind("/")
60 if first < 0:
61 raise ValueError(f"Invalid URL '{_url}'.")
62 return enforce_non_empty_str_without_ws(url[first + 1:])
65def load_binary_from_url(url: str) -> tuple[str, bytes]:
66 """
67 Load all the binary data from one url.
69 :param url: the url
70 :return: a tuple of the file name and the binary data that was loaded
71 """
72 request: urllib3.HTTPResponse = cast(
73 urllib3.HTTPResponse, __HTTP.request("GET", url))
74 if request.status != 200:
75 raise ValueError(
76 f"Error '{request.status}' when downloading url '{url}'.")
77 data = request.data
78 name = __name(request)
79 request.close()
80 return name, data
83def load_text_from_url(url: str) -> tuple[str, str]:
84 """
85 Load all the text from one url.
87 :param url: the url
88 :return: a tuple of the file name and the text that was loaded
89 """
90 name, data = load_binary_from_url(url)
91 return name, data.decode(UTF8)