Source code for bookbuilderpy.url

"""Loading of data from urls."""

from typing import Final, cast

import certifi
import urllib3  # type: ignore

from bookbuilderpy.path import UTF8
from bookbuilderpy.strings import (
    enforce_non_empty_str,
    enforce_non_empty_str_without_ws,
)

#: The shared HTTP pool
__HTTP: Final[urllib3.PoolManager] = urllib3.PoolManager(
    cert_reqs="CERT_REQUIRED", ca_certs=certifi.where())


def __name(request: urllib3.HTTPResponse) -> str:
    """
    Extract the file name from a request.

    :param request: the request
    :return: the file name
    """
    content_disp: str = "Content-Disposition"
    if content_disp in request.headers:
        content_disp = request.headers[content_disp]
        i: int = content_disp.find("filename")
        if i >= 0:
            i = content_disp.find("=", i + 1)
            if i > 0:
                k = content_disp.find('"', i + 1)
                if k > 0:
                    k += 1
                    j = content_disp.find('"', k)
                    if j > k:
                        return enforce_non_empty_str_without_ws(
                            content_disp[k:j])
                else:
                    k = content_disp.find("'", i + 1)
                    if k > 0:
                        k += 1
                        j = content_disp.find("'", k)
                        if j > k:
                            return enforce_non_empty_str_without_ws(
                                content_disp[k:j])
                    else:
                        return enforce_non_empty_str_without_ws(
                            content_disp[i + 1:])
    _url = enforce_non_empty_str(request.geturl())
    url = _url
    last = url.rfind("#")
    if last > 0:
        url = url[:last]
    last = url.rfind("?")
    if last > 0:
        url = url[:last]
    first = url.rfind("/")
    if first < 0:
        raise ValueError(f"Invalid URL '{_url}'.")
    return enforce_non_empty_str_without_ws(url[first + 1:])


[docs]def load_binary_from_url(url: str) -> tuple[str, bytes]:
    """
    Load all the binary data from one url.

    :param url: the url
    :return: a tuple of the file name and the binary data that was loaded
    """
    request: urllib3.HTTPResponse = cast(
        urllib3.HTTPResponse, __HTTP.request("GET", url))
    if request.status != 200:
        raise ValueError(
            f"Error '{request.status}' when downloading url '{url}'.")
    data = request.data
    name = __name(request)
    request.close()
    return name, data


[docs]def load_text_from_url(url: str) -> tuple[str, str]:
    """
    Load all the text from one url.

    :param url: the url
    :return: a tuple of the file name and the text that was loaded
    """
    name, data = load_binary_from_url(url)
    return name, data.decode(UTF8)