Skip to content

Commit 7c9e063

Browse files
ndrluissungwy
authored andcommitted
Fix overwrite when filtering complete files (#1023)
1 parent e00a55c commit 7c9e063

2 files changed

Lines changed: 43 additions & 2 deletions

File tree

pyiceberg/table/__init__.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -622,7 +622,9 @@ def delete(self, delete_filter: Union[str, BooleanExpression], snapshot_properti
622622
filtered_df = df.filter(preserve_row_filter)
623623

624624
# Only rewrite if there are records being deleted
625-
if len(df) != len(filtered_df):
625+
if len(filtered_df) == 0:
626+
replaced_files.append((original_file.file, []))
627+
elif len(df) != len(filtered_df):
626628
replaced_files.append((
627629
original_file.file,
628630
list(

tests/integration/test_writes/test_writes.py

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,12 +38,13 @@
3838
from pyiceberg.catalog.rest import RestCatalog
3939
from pyiceberg.catalog.sql import SqlCatalog
4040
from pyiceberg.exceptions import NoSuchTableError
41+
from pyiceberg.expressions import In
4142
from pyiceberg.io.pyarrow import _dataframe_to_data_files
4243
from pyiceberg.partitioning import PartitionField, PartitionSpec
4344
from pyiceberg.schema import Schema
4445
from pyiceberg.table import TableProperties
4546
from pyiceberg.transforms import IdentityTransform
46-
from pyiceberg.types import IntegerType, LongType, NestedField
47+
from pyiceberg.types import IntegerType, LongType, NestedField, StringType
4748
from utils import _create_table
4849

4950

@@ -1293,3 +1294,41 @@ def test_rest_catalog_with_empty_catalog_name_append_data(session_catalog: Catal
12931294
)
12941295
tbl = _create_table(test_catalog, identifier, data=[])
12951296
tbl.append(arrow_table_with_null)
1297+
1298+
1299+
@pytest.mark.integration
1300+
def test_table_v1_with_null_nested_namespace(session_catalog: Catalog, arrow_table_with_null: pa.Table) -> None:
1301+
identifier = "default.lower.table_v1_with_null_nested_namespace"
1302+
tbl = _create_table(session_catalog, identifier, {"format-version": "1"}, [arrow_table_with_null])
1303+
assert tbl.format_version == 1, f"Expected v1, got: v{tbl.format_version}"
1304+
# TODO: Add session_catalog.table_exists check here when we integrate a REST catalog image
1305+
# that supports HEAD request on table endpoint
1306+
1307+
# assert session_catalog.table_exists(identifier)
1308+
1309+
# We expect no error here
1310+
session_catalog.drop_table(identifier)
1311+
1312+
1313+
@pytest.mark.integration
1314+
def test_overwrite_all_data_with_filter(session_catalog: Catalog) -> None:
1315+
schema = Schema(
1316+
NestedField(1, "id", StringType(), required=True),
1317+
NestedField(2, "name", StringType(), required=False),
1318+
identifier_field_ids=[1],
1319+
)
1320+
1321+
data = pa.Table.from_pylist(
1322+
[
1323+
{"id": "1", "name": "Amsterdam"},
1324+
{"id": "2", "name": "San Francisco"},
1325+
{"id": "3", "name": "Drachten"},
1326+
],
1327+
schema=schema.as_arrow(),
1328+
)
1329+
1330+
identifier = "default.test_overwrite_all_data_with_filter"
1331+
tbl = _create_table(session_catalog, identifier, data=[data], schema=schema)
1332+
tbl.overwrite(data, In("id", ["1", "2", "3"]))
1333+
1334+
assert len(tbl.scan().to_arrow()) == 3

0 commit comments

Comments
 (0)