Coverage for bookbuilderpy/url.py: 18%

1"""Loading of data from urls."""

3from typing import Final, cast

5import certifi

6import urllib3 # type: ignore

8from bookbuilderpy.path import UTF8

9from bookbuilderpy.strings import (

10 enforce_non_empty_str,

11 enforce_non_empty_str_without_ws,

12)

14#: The shared HTTP pool

15__HTTP: Final[urllib3.PoolManager] = urllib3.PoolManager(

16 cert_reqs="CERT_REQUIRED", ca_certs=certifi.where())

19def __name(request: urllib3.HTTPResponse) -> str:

20 """

21 Extract the file name from a request.

23 :param request: the request

24 :return: the file name

25 """

26 content_disp: str = "Content-Disposition"

27 if content_disp in request.headers:

28 content_disp = request.headers[content_disp]

29 i: int = content_disp.find("filename")

30 if i >= 0:

31 i = content_disp.find("=", i + 1)

32 if i > 0:

33 k = content_disp.find('"', i + 1)

34 if k > 0:

35 k += 1

36 j = content_disp.find('"', k)

37 if j > k:

38 return enforce_non_empty_str_without_ws(

39 content_disp[k:j])

40 else:

41 k = content_disp.find("'", i + 1)

42 if k > 0:

43 k += 1

44 j = content_disp.find("'", k)

45 if j > k:

46 return enforce_non_empty_str_without_ws(

47 content_disp[k:j])

48 else:

49 return enforce_non_empty_str_without_ws(

50 content_disp[i + 1:])

51 _url = enforce_non_empty_str(request.geturl())

52 url = _url

53 last = url.rfind("#")

54 if last > 0:

55 url = url[:last]

56 last = url.rfind("?")

57 if last > 0:

58 url = url[:last]

59 first = url.rfind("/")

60 if first < 0:

61 raise ValueError(f"Invalid URL '{_url}'.")

62 return enforce_non_empty_str_without_ws(url[first + 1:])

65def load_binary_from_url(url: str) -> tuple[str, bytes]:

66 """

67 Load all the binary data from one url.

69 :param url: the url

70 :return: a tuple of the file name and the binary data that was loaded

71 """

72 request: urllib3.HTTPResponse = cast(

73 urllib3.HTTPResponse, __HTTP.request("GET", url))

74 if request.status != 200:

75 raise ValueError(

76 f"Error '{request.status}' when downloading url '{url}'.")

77 data = request.data

78 name = __name(request)

79 request.close()

80 return name, data

83def load_text_from_url(url: str) -> tuple[str, str]:

84 """

85 Load all the text from one url.

87 :param url: the url

88 :return: a tuple of the file name and the text that was loaded

89 """

90 name, data = load_binary_from_url(url)

91 return name, data.decode(UTF8)