# -*- coding: utf-8 -*-
"""
Improve the list_objects_v2_ and ListObjectsV2_ API.
.. _list_objects_v2: https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html#S3.Client.list_objects_v2
.. _ListObjectsV2: https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3/paginator/ListObjectsV2.html
"""
import typing as T
from func_args import NOTHING, resolve_kwargs
from iterproxy import IterProxy
if T.TYPE_CHECKING: # pragma: no cover
from mypy_boto3_s3 import S3Client
from mypy_boto3_s3.type_defs import (
ListObjectsV2OutputTypeDef,
ObjectTypeDef,
CommonPrefixTypeDef,
)
_ = ListObjectsV2OutputTypeDef
[docs]
class ObjectTypeDefIterproxy(IterProxy["ObjectTypeDef"]):
"""
An iterproxy that yields the "Contents" part of the ListObjectsV2_ response.
.. versionadded:: 2.0.1
"""
[docs]
class CommonPrefixTypeDefIterproxy(IterProxy["CommonPrefixTypeDef"]):
"""
An iterproxy that yields the "CommonPrefixes" part of the ListObjectsV2_ response.
.. versionadded:: 2.0.1
"""
[docs]
class ListObjectsV2OutputTypeDefIterproxy(IterProxy["ListObjectsV2OutputTypeDef"]):
"""
An iterproxy that yields the original ListObjectsV2_ response.
It has two utility methods to get the contents and common prefixes.
.. versionadded:: 2.0.1
"""
def _yield_content(self) -> T.Iterator["ObjectTypeDef"]:
for response in self:
for content in response.get("Contents", []):
yield content
[docs]
def contents(self) -> ObjectTypeDefIterproxy:
"""
Iterate object contents.
.. versionadded:: 2.0.1
"""
return ObjectTypeDefIterproxy(self._yield_content())
def _yield_common_prefixes(self) -> T.Iterator["CommonPrefixTypeDef"]:
for response in self:
for common_prefix in response.get("CommonPrefixes", []):
yield common_prefix
[docs]
def common_prefixs(self) -> CommonPrefixTypeDefIterproxy:
"""
Iterate folders.
.. versionadded:: 2.0.1
"""
return CommonPrefixTypeDefIterproxy(self._yield_common_prefixes())
def extract_contents_and_common_prefixes(
self, response: dict
) -> T.Tuple[
T.List["ObjectTypeDef"],
T.List["CommonPrefixTypeDef"],
]:
return (
response.get("Contents", []),
response.get("CommonPrefixes", []),
)
[docs]
def contents_and_common_prefixs(
self,
) -> T.Tuple[T.List["ObjectTypeDef"], T.List["CommonPrefixTypeDef"]]:
"""
Return the full list of object contents and folders.
.. versionadded:: 2.0.1
"""
contents = list()
common_prefixs = list()
for response in self:
(
_contents,
_common_prefixs,
) = self.extract_contents_and_common_prefixes(response)
contents.extend(_contents)
common_prefixs.extend(_common_prefixs)
return contents, common_prefixs
[docs]
def paginate_list_objects_v2(
s3_client: "S3Client",
bucket: str,
prefix: str,
batch_size: int = 1000,
limit: int = NOTHING,
delimiter: str = NOTHING,
encoding_type: str = NOTHING,
fetch_owner: bool = NOTHING,
start_after: str = NOTHING,
request_payer: str = NOTHING,
expected_bucket_owner: str = NOTHING,
) -> ListObjectsV2OutputTypeDefIterproxy:
"""
Wrapper of list_objects_v2_ and ListObjectsV2_. However, it returns
a user-friendly :class:`ListObjectsV2OutputTypeDefIterproxy` object.
Example::
>>> result = paginate_list_objects_v2(
... s3_client=s3_client,
... bucket="my-bucket",
... prefix="my-folder",
... )
>>> for content in result.contents():
... print(content)
{"Key": "1.json", "ETag": "...", "Size": 123, "LastModified": datetime(2015, 1, 1), "StorageClass": "...", "Owner", {...}}
{"Key": "2.json", "ETag": "...", "Size": 123, "LastModified": datetime(2015, 1, 1), "StorageClass": "...", "Owner", {...}}
{"Key": "3.json", "ETag": "...", "Size": 123, "LastModified": datetime(2015, 1, 1), "StorageClass": "...", "Owner", {...}}
...
:param s3_client: ``boto3.session.Session().client("s3")`` object.
:param bucket: See ListObjectsV2_.
:param prefix: See ListObjectsV2_.
:param batch_size: See ListObjectsV2_.
:param limit: See ListObjectsV2_.
:param delimiter: See ListObjectsV2_.
:param encoding_type: See ListObjectsV2_.
:param fetch_owner: See ListObjectsV2_.
:param start_after: See ListObjectsV2_.
:param request_payer: See ListObjectsV2_.
:param expected_bucket_owner: See ListObjectsV2_.
:return: a :class:`ListObjectsV2OutputTypeDefIterproxy` object.
.. versionadded:: 2.0.1
"""
# validate arguments
if batch_size < 1 or batch_size > 1000:
raise ValueError("``batch_size`` has to be 1 ~ 1000.")
if (limit is not NOTHING) and (batch_size > limit):
batch_size = limit
def _paginate_list_objects_v2():
paginator = s3_client.get_paginator("list_objects_v2")
kwargs = resolve_kwargs(
Bucket=bucket,
Prefix=prefix,
Delimiter=delimiter,
EncodingType=encoding_type,
FetchOwner=fetch_owner,
StartAfter=start_after,
RequestPayer=request_payer,
ExpectedBucketOwner=expected_bucket_owner,
PaginationConfig=resolve_kwargs(
MaxItems=limit,
PageSize=batch_size,
),
)
for response in paginator.paginate(**kwargs):
yield response
return ListObjectsV2OutputTypeDefIterproxy(_paginate_list_objects_v2())
[docs]
def is_content_an_object(content: "ObjectTypeDef") -> bool:
"""
Return True if the content is an object (not a folder).
Truth table
- ends with "/", size is 0: False
- ends with "/", size > 0: False
- ends without "/", size is 0:
- ends without "/", size > 0:
"""
return (not content["Key"].endswith("/")) or (content["Size"] != 0)
[docs]
def calculate_total_size(
s3_client: "S3Client",
bucket: str,
prefix: str,
include_folder: bool = False,
) -> T.Tuple[int, int]:
"""
Perform the "Calculate Total Size" action in AWS S3 console.
:param s3_client: ``boto3.session.Session().client("s3")`` object
:param bucket: S3 bucket name
:param prefix: The s3 prefix (logic directory) you want to calculate
:param include_folder: Default False, whether counting the hard folder
(an empty "/" object).
:return: Tuple of ``(count, total_size)``. First value is number of objects,
Second value is total size in bytes.
.. versionadded:: 2.0.1
"""
count = 0
total_size = 0
contents_iterproxy = paginate_list_objects_v2(
s3_client=s3_client,
bucket=bucket,
prefix=prefix,
).contents()
if include_folder is False:
contents_iterproxy = contents_iterproxy.filter(is_content_an_object)
for content in contents_iterproxy:
count += 1
total_size += content["Size"]
return count, total_size
[docs]
def count_objects(
s3_client: "S3Client",
bucket: str,
prefix: str,
include_folder: bool = False,
) -> int:
"""
Count number of objects under prefix.
:param s3_client: ``boto3.session.Session().client("s3")`` object
:param bucket: S3 bucket name
:param prefix: The s3 prefix (logic directory) you want to calculate
:param include_folder: Default False, whether counting the hard folder
(an empty "/" object).
:return: Number of objects under prefix.
.. versionadded:: 2.0.1
"""
contents_iterproxy = paginate_list_objects_v2(
s3_client=s3_client,
bucket=bucket,
prefix=prefix,
).contents()
if include_folder is False:
contents_iterproxy = contents_iterproxy.filter(is_content_an_object)
count = 0
for count, _ in enumerate(contents_iterproxy, start=1):
pass
return count