Coverage for bookbuilderpy/url.py: 18%

50 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-07-17 23:15 +0000

1"""Loading of data from urls.""" 

2 

3from typing import Final, cast 

4 

5import certifi 

6import urllib3 # type: ignore 

7 

8from bookbuilderpy.path import UTF8 

9from bookbuilderpy.strings import ( 

10 enforce_non_empty_str, 

11 enforce_non_empty_str_without_ws, 

12) 

13 

14#: The shared HTTP pool 

15__HTTP: Final[urllib3.PoolManager] = urllib3.PoolManager( 

16 cert_reqs="CERT_REQUIRED", ca_certs=certifi.where()) 

17 

18 

19def __name(request: urllib3.HTTPResponse) -> str: 

20 """ 

21 Extract the file name from a request. 

22 

23 :param request: the request 

24 :return: the file name 

25 """ 

26 content_disp: str = "Content-Disposition" 

27 if content_disp in request.headers: 

28 content_disp = request.headers[content_disp] 

29 i: int = content_disp.find("filename") 

30 if i >= 0: 

31 i = content_disp.find("=", i + 1) 

32 if i > 0: 

33 k = content_disp.find('"', i + 1) 

34 if k > 0: 

35 k += 1 

36 j = content_disp.find('"', k) 

37 if j > k: 

38 return enforce_non_empty_str_without_ws( 

39 content_disp[k:j]) 

40 else: 

41 k = content_disp.find("'", i + 1) 

42 if k > 0: 

43 k += 1 

44 j = content_disp.find("'", k) 

45 if j > k: 

46 return enforce_non_empty_str_without_ws( 

47 content_disp[k:j]) 

48 else: 

49 return enforce_non_empty_str_without_ws( 

50 content_disp[i + 1:]) 

51 _url = enforce_non_empty_str(request.geturl()) 

52 url = _url 

53 last = url.rfind("#") 

54 if last > 0: 

55 url = url[:last] 

56 last = url.rfind("?") 

57 if last > 0: 

58 url = url[:last] 

59 first = url.rfind("/") 

60 if first < 0: 

61 raise ValueError(f"Invalid URL '{_url}'.") 

62 return enforce_non_empty_str_without_ws(url[first + 1:]) 

63 

64 

65def load_binary_from_url(url: str) -> tuple[str, bytes]: 

66 """ 

67 Load all the binary data from one url. 

68 

69 :param url: the url 

70 :return: a tuple of the file name and the binary data that was loaded 

71 """ 

72 request: urllib3.HTTPResponse = cast( 

73 urllib3.HTTPResponse, __HTTP.request("GET", url)) 

74 if request.status != 200: 

75 raise ValueError( 

76 f"Error '{request.status}' when downloading url '{url}'.") 

77 data = request.data 

78 name = __name(request) 

79 request.close() 

80 return name, data 

81 

82 

83def load_text_from_url(url: str) -> tuple[str, str]: 

84 """ 

85 Load all the text from one url. 

86 

87 :param url: the url 

88 :return: a tuple of the file name and the text that was loaded 

89 """ 

90 name, data = load_binary_from_url(url) 

91 return name, data.decode(UTF8)