Source code for s3pathlib.core.metadata

# -*- coding: utf-8 -*-

"""
Metadata related API.
"""

import typing as T
from datetime import datetime

from .. import utils
from ..constants import IS_DELETE_MARKER
from ..better_client.head_object import head_object
from ..aws import context

from .resolve_s3_client import resolve_s3_client
from .filterable_property import FilterableProperty

if T.TYPE_CHECKING:  # pragma: no cover
    from .s3path import S3Path
    from boto_session_manager import BotoSesManager
    from mypy_boto3_s3 import S3Client
    from mypy_boto3_s3.type_defs import HeadObjectOutputTypeDef, PutObjectOutputTypeDef


[docs] class MetadataAPIMixin: """ A mixin class that implements the metadata related methods. Note: 1. only S3 object can have metadata. 2. metadata is immutable. 3. user metadata key is always lower case. """
[docs] def head_object( self: "S3Path", bsm: T.Optional[T.Union["BotoSesManager", "S3Client"]] = None, ) -> dict: """ Call head_object() api, store metadata value. """ s3_client = resolve_s3_client(context, bsm) dct = head_object(s3_client, self.bucket, self.key) self._meta = dct return dct
@property def response(self) -> T.Union["HeadObjectOutputTypeDef", "PutObjectOutputTypeDef"]: """ The response dictionary from the S3 API, returned by the following ``boto3.client("s3")`` methods: - head_object: https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3/client/head_object.html - put_object: https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3/client/put_object.html It's particularly useful when you need to access specific response data after you called ``S3Path.head_object(...)``, ``S3Path.write_bytes(...)`` or ``S3Path.write_text(...)`` method. .. versionadded:: 2.2.1 """ return self._meta def _get_meta_value( self: "S3Path", key: str, default: T.Any = None, bsm: T.Optional[T.Union["BotoSesManager", "S3Client"]] = None, ) -> T.Any: """ Note: This method is for those metadata fields that conditionally exists. """ if self._meta is None: self.head_object(bsm=bsm) return self._meta.get(key, default) def _get_or_pull_meta_value( self: "S3Path", key: str, bsm: T.Optional[T.Union["BotoSesManager", "S3Client"]] = None, ) -> T.Any: """ Note: This method is for those metadata fields that always exists. """ value = self._get_meta_value(key, default=None, bsm=bsm) if value is None: self.head_object(bsm=bsm) return self._meta[key]
[docs] @FilterableProperty def etag(self: "S3Path") -> T.Optional[str]: """ For small file, it is the md5 check sum. For large file, because it is created from multi part upload, it is the sum of md5 for each part and md5 of the sum. Ref: https://docs.aws.amazon.com/AmazonS3/latest/API/API_Object.html .. versionadded:: 1.0.1 """ v = self._get_meta_value(key="ETag", default=None) if v is None: return v else: return v[1:-1]
[docs] @FilterableProperty def last_modified_at(self: "S3Path") -> datetime: """ Ref: https://docs.aws.amazon.com/AmazonS3/latest/API/API_Object.html .. versionadded:: 1.0.1 """ return self._get_or_pull_meta_value(key="LastModified")
[docs] @FilterableProperty def size(self: "S3Path") -> int: """ Ref: https://docs.aws.amazon.com/AmazonS3/latest/API/API_Object.html .. versionadded:: 1.0.1 """ return self._get_meta_value(key="ContentLength", default=0)
@property def size_for_human(self: "S3Path") -> str: """ A human-readable string version of the size. .. versionadded:: 1.0.1 """ return utils.repr_data_size(self.size) @property def _static_version_id(self: "S3Path") -> T.Optional[str]: """ This method use the ``self._meta`` to get the version id. Unlike other metadata property methods, this method does not call head_object(). """ if self._meta is None: return None else: return self._meta.get("VersionId", None)
[docs] @FilterableProperty def version_id(self: "S3Path") -> T.Optional[str]: """ Only available if you turned on versioning for the bucket. Ref: https://docs.aws.amazon.com/AmazonS3/latest/API/API_Object.html .. versionadded:: 1.0.1 .. versionchanged:: 2.0.1 return 'null' if it is not a version enabled bucket """ return self._get_meta_value(key="VersionId", default="null")
[docs] @FilterableProperty def expire_at(self: "S3Path") -> datetime: """ Only available if you turned on TTL. Ref: https://docs.aws.amazon.com/AmazonS3/latest/API/API_Object.html .. versionadded:: 1.0.1 """ return self._get_meta_value(key="Expires")
@property def metadata(self: "S3Path") -> dict: """ Access the metadata of the object. Ref: https://docs.aws.amazon.com/AmazonS3/latest/API/API_Object.html .. versionadded:: 1.0.1 """ return self._get_or_pull_meta_value(key="Metadata")
[docs] def clear_cache(self: "S3Path") -> None: """ Clear all cache that stores metadata information. .. versionadded:: 1.0.1 """ self._meta = None
@classmethod def _from_content_dict(cls: T.Type["S3Path"], bucket: str, dct: dict) -> "S3Path": """ Construct S3Path object from the response["Content"] dictionary data. Example ``dct``:: { 'Key': 'string', 'LastModified': datetime(2015, 1, 1), 'ETag': 'string', 'ChecksumAlgorithm': [ 'CRC32'|'CRC32C'|'SHA1'|'SHA256', ], 'Size': 123, 'StorageClass': 'STANDARD'|'REDUCED_REDUNDANCY'|'GLACIER'|'STANDARD_IA'|'ONEZONE_IA'|'INTELLIGENT_TIERING'|'DEEP_ARCHIVE'|'OUTPOSTS'|'GLACIER_IR', 'Owner': { 'DisplayName': 'string', 'ID': 'string' } } Ref: - https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html#S3.Client.list_objects - https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html#S3.Client.list_objects_v2 :return: a new S3Path object. """ p = cls(bucket, dct["Key"]) p._meta = { "Key": dct["Key"], "LastModified": dct["LastModified"], "ETag": dct["ETag"], "ContentLength": dct["Size"], "StorageClass": dct["StorageClass"], "ChecksumAlgorithm": dct.get("ChecksumAlgorithm", []), "Owner": dct.get("Owner", {}), } return p @classmethod def _from_version_dict(cls: T.Type["S3Path"], bucket: str, dct: dict) -> "S3Path": p = cls(bucket, dct["Key"]) p._meta = { "Key": dct["Key"], "VersionId": dct["VersionId"], "LastModified": dct["LastModified"], "ETag": dct["ETag"], "ContentLength": dct["Size"], "StorageClass": dct["StorageClass"], "IsLatest": dct["IsLatest"], "ChecksumAlgorithm": dct.get("ChecksumAlgorithm", []), "Owner": dct.get("Owner", {}), } return p @classmethod def _from_delete_marker(cls: T.Type["S3Path"], bucket: str, dct: dict) -> "S3Path": p = cls(bucket, dct["Key"]) p._meta = { "Key": dct["Key"], "VersionId": dct["VersionId"], "LastModified": dct["LastModified"], "IsLatest": dct["IsLatest"], "Owner": dct.get("Owner", {}), IS_DELETE_MARKER: True, } return p def update_metadata(self: "S3Path", metadata: dict): # pragma: no cover raise NotImplementedError( "You CANNOT only update metadata without changing the content of the " "object! You can only do full replace ment via the .write_text() or " ".write_bytes() API. This method will NEVER be implemented!" )