Skip to content

Commit 8dfde49

Browse files
add Java cross-language reference vector, harden body type handling
1 parent 64c1042 commit 8dfde49

3 files changed

Lines changed: 74 additions & 13 deletions

File tree

pyiceberg/catalog/rest/__init__.py

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -765,21 +765,18 @@ def _init_sigv4(self, session: Session) -> None:
765765
from requests.adapters import HTTPAdapter
766766

767767
class _IcebergSigV4Auth(SigV4Auth):
768-
def canonical_request(self, request: Any) -> str:
769-
# Reuses the logic from botocore's SigV4Auth.canonical_request
770-
# (https://github.com/boto/botocore/blob/develop/botocore/auth.py)
771-
# but always uses self.payload(request) for the body checksum.
772-
# Validated against botocore <= 1.42.x
773-
# (https://github.com/boto/botocore/blob/1.42.85/botocore/auth.py#L622-L637)
768+
def canonical_request(self, request: AWSRequest) -> str:
769+
# Override forces hex payload hash in the canonical request even when
770+
# x-amz-content-sha256 header is base64 (see body-hash block below).
771+
# Mirrors botocore <=1.42.x SigV4Auth.canonical_request layout:
772+
# https://github.com/boto/botocore/blob/1.42.85/botocore/auth.py#L622-L637
774773
cr = [request.method.upper()]
775774
path = self._normalize_url_path(parse.urlsplit(request.url).path)
776775
cr.append(path)
777776
cr.append(self.canonical_query_string(request))
778777
headers_to_sign = self.headers_to_sign(request)
779778
cr.append(self.canonical_headers(headers_to_sign) + "\n")
780779
cr.append(self.signed_headers(headers_to_sign))
781-
# Always use hex-encoded payload hash per SigV4 spec,
782-
# regardless of the x-amz-content-sha256 header value (which may be base64).
783780
cr.append(self.payload(request))
784781
return "\n".join(cr)
785782

@@ -810,11 +807,20 @@ def add_headers(self, request: PreparedRequest, **kwargs: Any) -> None: # pylin
810807
if "connection" in request.headers:
811808
del request.headers["connection"]
812809

813-
# Compute the x-amz-content-sha256 header to match Iceberg Java SDK:
814-
# - empty body → hex (EMPTY_BODY_SHA256)
815-
# - non-empty body → base64
810+
# Match Iceberg Java's AWS SDK v2 flexible-checksum signing:
811+
# x-amz-content-sha256 header is base64 for non-empty bodies, hex for empty.
812+
# The SigV4 canonical request still uses hex (enforced in _IcebergSigV4Auth above).
813+
# Ref: https://github.com/apache/iceberg/blob/main/aws/src/main/java/org/apache/iceberg/aws/RESTSigV4AuthSession.java
816814
if request.body:
817-
body_bytes = request.body.encode("utf-8") if isinstance(request.body, str) else request.body
815+
if isinstance(request.body, str):
816+
body_bytes = request.body.encode("utf-8")
817+
elif isinstance(request.body, (bytes, bytearray)):
818+
body_bytes = request.body
819+
else:
820+
raise TypeError(
821+
f"Unsupported request body type for SigV4 signing: "
822+
f"{type(request.body).__name__}; expected str or bytes."
823+
)
818824
content_sha256_header = base64.b64encode(hashlib.sha256(body_bytes).digest()).decode()
819825
else:
820826
content_sha256_header = EMPTY_BODY_SHA256

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ sql-postgres = [
9292
]
9393
sql-sqlite = ["sqlalchemy>=2.0.18,<3"]
9494
gcsfs = ["gcsfs>=2023.1.0"]
95-
rest-sigv4 = ["boto3>=1.24.59"]
95+
rest-sigv4 = ["boto3>=1.24.59", "botocore<2"]
9696
hf = ["huggingface-hub>=0.24.0"]
9797
pyiceberg-core = ["pyiceberg-core>=0.5.1,<0.10.0"]
9898
datafusion = ["datafusion>=52,<53"]

tests/catalog/test_rest.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -684,6 +684,61 @@ def capturing_add_auth(self: Any, request: Any) -> None:
684684
assert prepared.headers["x-amz-content-sha256"] == base64.b64encode(hashlib.sha256(body_content).digest()).decode()
685685

686686

687+
def test_sigv4_content_sha256_matches_iceberg_java_reference(rest_mock: Mocker) -> None:
688+
"""Pin byte-for-byte equivalence with Iceberg Java TestRESTSigV4AuthSession (L121, L177)."""
689+
java_reference_body = b'{"namespace":["ns"],"properties":{}}'
690+
java_reference_base64 = "yc5oAKPWjHY4sW8XQq0l/3aNrrXJKBycVFNnDEGMfww="
691+
java_reference_empty_hex = "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
692+
693+
catalog = RestCatalog(
694+
"rest",
695+
**{
696+
"uri": TEST_URI,
697+
"rest.sigv4-enabled": "true",
698+
"rest.signing-region": "us-east-1",
699+
"client.access-key-id": "id",
700+
"client.secret-access-key": "secret",
701+
},
702+
)
703+
adapter = catalog._session.adapters[catalog.uri]
704+
assert isinstance(adapter, HTTPAdapter)
705+
706+
# Non-empty body: must match Java's base64 reference value exactly
707+
prepared_with_body = catalog._session.prepare_request(
708+
Request("POST", f"{TEST_URI}v1/namespaces", data=java_reference_body)
709+
)
710+
adapter.add_headers(prepared_with_body)
711+
assert prepared_with_body.headers["x-amz-content-sha256"] == java_reference_base64
712+
713+
# Empty body: must match Java's hex reference value exactly
714+
prepared_empty = catalog._session.prepare_request(Request("GET", f"{TEST_URI}v1/config"))
715+
adapter.add_headers(prepared_empty)
716+
assert prepared_empty.headers["x-amz-content-sha256"] == java_reference_empty_hex
717+
718+
719+
def test_sigv4_unsupported_body_type_raises(rest_mock: Mocker) -> None:
720+
"""Unsupported body types (e.g. file-like) raise a clear error rather than crashing in hashlib."""
721+
catalog = RestCatalog(
722+
"rest",
723+
**{
724+
"uri": TEST_URI,
725+
"rest.sigv4-enabled": "true",
726+
"rest.signing-region": "us-east-1",
727+
"client.access-key-id": "id",
728+
"client.secret-access-key": "secret",
729+
},
730+
)
731+
adapter = catalog._session.adapters[catalog.uri]
732+
assert isinstance(adapter, HTTPAdapter)
733+
734+
prepared = catalog._session.prepare_request(Request("POST", f"{TEST_URI}v1/namespaces"))
735+
# Inject an unsupported body type (a list — not str/bytes)
736+
prepared.body = ["not", "a", "valid", "body"] # type: ignore[assignment]
737+
738+
with pytest.raises(TypeError, match="Unsupported request body type for SigV4 signing"):
739+
adapter.add_headers(prepared)
740+
741+
687742
def test_sigv4_adapter_default_retry_config(rest_mock: Mocker) -> None:
688743
catalog = RestCatalog(
689744
"rest",

0 commit comments

Comments
 (0)