Refactor: adjust the URL/file-system data iterators for extensions

The logic to generalize over downloading a URL or reading data from the file-system included static values in the iterator along with the data being read - so the caller could access the total expected size of the data being read. While this worked, the result didn't read well. Now these iterators yield the data being read, a new type has been added for the caller to read the size hint from. Also correct invalid doc-string.
2025-01-07 17:42:39 +11:00
parent 9af355fcc1
commit 104c655fb7
1 changed files with 68 additions and 35 deletions
--- a/scripts/addons_core/bl_pkg/cli/blender_ext.py
+++ b/scripts/addons_core/bl_pkg/cli/blender_ext.py
@@ -1290,6 +1290,26 @@ class PathPatternMatch:
 # -----------------------------------------------------------------------------
 # URL Downloading

+
+# NOTE:
+# - Using return arguments isn't ideal but is better than including
+#   a static value in the iterator.
+# - Other data could be added here as needed (response headers if the caller needs them).
+class DataRetrieveInfo:
+    """
+    When accessing a file from a URL or from the file-system,
+    this is a "return" argument so the caller can know the size of the chunks it's iterating over,
+    or -1 when the size is not known.
+    """
+    __slots__ = (
+        "size_hint",
+    )
+    size_hint: int
+
+    def __init__(self) -> None:
+        self.size_hint = -1
+
+
 # Originally based on `urllib.request.urlretrieve`.
 def url_retrieve_to_data_iter(
        url: str,
@@ -1298,21 +1318,16 @@ def url_retrieve_to_data_iter(
        headers: dict[str, str],
        chunk_size: int,
        timeout_in_seconds: float,
-) -> Iterator[tuple[bytes, int, Any]]:
+        retrieve_info: DataRetrieveInfo,
+) -> Iterator[bytes]:
    """
-    Retrieve a URL into a temporary location on disk.
+    Iterate over byte data downloaded from a from a URL
+    limited to ``chunk_size``.

-    Requires a URL argument. If a filename is passed, it is used as
-    the temporary file location. The reporthook argument should be
-    a callable that accepts a block number, a read size, and the
-    total file size of the URL target. The data argument should be
-    valid URL encoded data.
-
-    If a filename is passed and the URL points to a local resource,
-    the result is a copy from local file to new file.
-
-    Returns a tuple containing the path to the newly created
-    data file as well as the resulting HTTPMessage object.
+    - The ``retrieve_info.size_hint``
+      will be set once the iterator starts and can be used for progress display.
+    - The iterator will start with an empty block, so the size can be known
+      before time is spent downloading data.
    """
    from urllib.error import ContentTooShortError
    from urllib.request import urlopen
@@ -1334,7 +1349,10 @@ def url_retrieve_to_data_iter(
        if "content-length" in response_headers:
            size = int(response_headers["Content-Length"])

-        yield (b'', size, response_headers)
+        retrieve_info.size_hint = size
+
+        # Yield an empty block so progress display may start.
+        yield b""

        if timeout_in_seconds <= 0.0:
            while True:
@@ -1342,14 +1360,14 @@ def url_retrieve_to_data_iter(
                if not block:
                    break
                read += len(block)
-                yield (block, size, response_headers)
+                yield block
        else:
            while True:
                block = read_with_timeout(fp, chunk_size, timeout_in_seconds=timeout_in_seconds)
                if not block:
                    break
                read += len(block)
-                yield (block, size, response_headers)
+                yield block

    if size >= 0 and read < size:
        raise ContentTooShortError(
@@ -1358,6 +1376,7 @@ def url_retrieve_to_data_iter(
        )


+# See `url_retrieve_to_data_iter` doc-string.
 def url_retrieve_to_filepath_iter(
        url: str,
        filepath: str,
@@ -1366,69 +1385,77 @@ def url_retrieve_to_filepath_iter(
        data: Any | None = None,
        chunk_size: int,
        timeout_in_seconds: float,
-) -> Iterator[tuple[int, int, Any]]:
+        retrieve_info: DataRetrieveInfo,
+) -> Iterator[int]:
    # Handle temporary file setup.
    with open(filepath, 'wb') as fh_output:
-        for block, size, response_headers in url_retrieve_to_data_iter(
+        for block in url_retrieve_to_data_iter(
                url,
                headers=headers,
                data=data,
                chunk_size=chunk_size,
                timeout_in_seconds=timeout_in_seconds,
+                retrieve_info=retrieve_info,
        ):
            fh_output.write(block)
-            yield (len(block), size, response_headers)
+            yield len(block)


+# See `url_retrieve_to_data_iter` doc-string.
 def filepath_retrieve_to_filepath_iter(
        filepath_src: str,
        filepath: str,
        *,
        chunk_size: int,
        timeout_in_seconds: float,
-) -> Iterator[tuple[int, int]]:
+        retrieve_info: DataRetrieveInfo,
+) -> Iterator[int]:
    # TODO: `timeout_in_seconds`.
    # Handle temporary file setup.
    _ = timeout_in_seconds
    with open(filepath_src, 'rb') as fh_input:
-        size = os.fstat(fh_input.fileno()).st_size
+        retrieve_info.size_hint = os.fstat(fh_input.fileno()).st_size
+        yield 0
        with open(filepath, 'wb') as fh_output:
            while (block := fh_input.read(chunk_size)):
                fh_output.write(block)
-                yield (len(block), size)
+                yield len(block)


 def url_retrieve_to_data_iter_or_filesystem(
        url: str,
        headers: dict[str, str],
+        *,
        chunk_size: int,
        timeout_in_seconds: float,
+        retrieve_info: DataRetrieveInfo,
 ) -> Iterator[bytes]:
    if url_is_filesystem(url):
        with open(path_from_url(url), "rb") as fh_source:
+            retrieve_info.size_hint = os.fstat(fh_source.fileno()).st_size
+            yield b""
            while (block := fh_source.read(chunk_size)):
                yield block
    else:
-        for (
-                block,
-                _size,
-                _response_headers,
-        ) in url_retrieve_to_data_iter(
+        yield from url_retrieve_to_data_iter(
            url,
            headers=headers,
            chunk_size=chunk_size,
            timeout_in_seconds=timeout_in_seconds,
-        ):
-            yield block
+            retrieve_info=retrieve_info,
+        )


+# See `url_retrieve_to_data_iter` doc-string.
 def url_retrieve_to_filepath_iter_or_filesystem(
        url: str,
        filepath: str,
+        *,
        headers: dict[str, str],
        chunk_size: int,
        timeout_in_seconds: float,
-) -> Iterator[tuple[int, int]]:
+        retrieve_info: DataRetrieveInfo,
+) -> Iterator[int]:
    """
    Callers should catch: ``(Exception, KeyboardInterrupt)`` and convert them to message using:
    ``url_retrieve_exception_as_message``.
@@ -1439,16 +1466,17 @@ def url_retrieve_to_filepath_iter_or_filesystem(
            filepath,
            chunk_size=chunk_size,
            timeout_in_seconds=timeout_in_seconds,
+            retrieve_info=retrieve_info,
        )
    else:
-        for (read, size, _response_headers) in url_retrieve_to_filepath_iter(
+        yield from url_retrieve_to_filepath_iter(
            url,
            filepath,
            headers=headers,
            chunk_size=chunk_size,
            timeout_in_seconds=timeout_in_seconds,
-        ):
-            yield (read, size)
+            retrieve_info=retrieve_info,
+        )


 def url_retrieve_exception_is_connectivity(
@@ -2965,8 +2993,9 @@ def repo_sync_from_remote(
            return False

        try:
+            retrieve_info = DataRetrieveInfo()
            read_total = 0
-            for (read, size) in url_retrieve_to_filepath_iter_or_filesystem(
+            for read in url_retrieve_to_filepath_iter_or_filesystem(
                    remote_json_url,
                    local_json_path_temp,
                    headers=url_request_headers_create(
@@ -2976,12 +3005,14 @@ def repo_sync_from_remote(
                    ),
                    chunk_size=CHUNK_SIZE_DEFAULT,
                    timeout_in_seconds=timeout_in_seconds,
+                    retrieve_info=retrieve_info,
            ):
-                request_exit |= msglog.progress("Downloading...", read_total, size, 'BYTE')
+                request_exit |= msglog.progress("Downloading...", read_total, retrieve_info.size_hint, 'BYTE')
                if request_exit:
                    break
                read_total += read
            del read_total
+            del retrieve_info
        except (Exception, KeyboardInterrupt) as ex:
            msg = url_retrieve_exception_as_message(ex, prefix="sync", url=remote_url)
            if demote_connection_errors_to_status and url_retrieve_exception_is_connectivity(ex):
@@ -3888,6 +3919,7 @@ class subcmd_client:
                    ),
                    chunk_size=CHUNK_SIZE_DEFAULT,
                    timeout_in_seconds=timeout_in_seconds,
+                    retrieve_info=DataRetrieveInfo(),  # Unused.
            ):
                result.write(block)

@@ -4339,6 +4371,7 @@ class subcmd_client:
                                    ),
                                    chunk_size=CHUNK_SIZE_DEFAULT,
                                    timeout_in_seconds=timeout_in_seconds,
+                                    retrieve_info=DataRetrieveInfo(),  # Unused.
                            ):
                                request_exit |= msglog.progress(
                                    "Downloading \"{:s}\"".format(pkg_idname),