Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
93 changes: 93 additions & 0 deletions xrspatial/tests/test_zonal.py
Original file line number Diff line number Diff line change
Expand Up @@ -1900,6 +1900,99 @@ def test_crop_missing_zone_ids_raises():
crop(raster, raster)


# ---------------------------------------------------------------------------
# Regression tests for #2561: crop() must return the same shape across all
# backends when the requested zone_ids are absent (previously the dask path
# returned the full input raster while numpy/cupy returned an empty (0, 0)).
# ---------------------------------------------------------------------------

_CROP_ARR_2561 = np.array([[0, 4, 0, 3],
[0, 4, 4, 3],
[0, 1, 1, 3],
[0, 1, 1, 3],
[0, 0, 0, 0]], dtype=np.int64)


def _crop_backends_2561():
backends = ['numpy']
if has_dask_array():
backends.append('dask+numpy')
if has_cuda_and_cupy():
backends.append('cupy')
if has_dask_array():
backends.append('dask+cupy')
return backends


@pytest.mark.parametrize("backend", _crop_backends_2561())
def test_crop_absent_zone_ids_returns_empty_2561(backend):
"""All backends must return shape (0, 0) when no requested zone exists."""
zones = create_test_raster(_CROP_ARR_2561, backend=backend)
values = create_test_raster(_CROP_ARR_2561, backend=backend)

# zone_id 99 is not present anywhere in the raster.
result = crop(zones, values, zone_ids=(99,))

# Output array type must match input type.
assert isinstance(result.data, type(zones.data))
assert result.shape == (0, 0)


@pytest.mark.parametrize("backend", _crop_backends_2561())
def test_crop_partial_zone_ids_2561(backend):
"""Mix of present and absent zone_ids crops to the present ones only."""
zones = create_test_raster(_CROP_ARR_2561, backend=backend)
values = create_test_raster(_CROP_ARR_2561, backend=backend)

# Only zone_id 1 is actually present here. 77 and 88 are absent.
result_partial = crop(zones, values, zone_ids=(77, 1, 88))
result_present_only = crop(zones, values, zone_ids=(1,))

# Pin the expected bounding box of zone_id=1 in _CROP_ARR_2561
# (rows 2-3, cols 1-2) so a regression cannot pass by drifting
# both code paths in the same direction.
assert result_partial.shape == (2, 2)
assert result_partial.shape == result_present_only.shape

if backend.startswith('dask'):
partial_np = result_partial.data.compute()
present_np = result_present_only.data.compute()
else:
partial_np = result_partial.data
present_np = result_present_only.data

if 'cupy' in backend:
partial_np = partial_np.get()
present_np = present_np.get()

np.testing.assert_array_equal(partial_np, present_np)


@pytest.mark.parametrize("backend", _crop_backends_2561())
def test_crop_happy_path_matches_numpy_2561(backend):
"""All backends agree on the standard crop result."""
zones = create_test_raster(_CROP_ARR_2561, backend=backend)
values = create_test_raster(_CROP_ARR_2561, backend=backend)

result = crop(zones, values, zone_ids=(1, 3))
assert result.shape == (4, 3)

expected = np.array([[4, 0, 3],
[4, 4, 3],
[1, 1, 3],
[1, 1, 3]], dtype=np.int64)

if backend.startswith('dask'):
result_np = result.data.compute()
else:
result_np = result.data

if 'cupy' in backend:
result_np = result_np.get()

np.testing.assert_array_equal(result_np, expected)


# ---------------------------------------------------------------------------
# Regression tests for #2528: dask backend must not silently drop 'majority'
# from the requested stats list.
Expand Down
58 changes: 43 additions & 15 deletions xrspatial/zonal.py
Original file line number Diff line number Diff line change
Expand Up @@ -2496,16 +2496,25 @@ def trim(

@ngjit
def _crop(data, values):
"""Scan-based crop bounds.

Returns
-------
top, bottom, left, right, found : ints
``found`` is 1 if any cell in *data* matches one of *values*, else 0.
When ``found == 0`` the bounds are meaningless and the caller must
treat the result as an empty crop.
"""

rows, cols = data.shape

top = -1
bottom = -1
left = -1
right = -1
top = 0
bottom = 0
left = 0
right = 0
found = 0

# find empty top rows
top = 0
scan_complete = False
for y in range(rows):

Expand All @@ -2519,15 +2528,18 @@ def _crop(data, values):
for v in values:
if v == val:
scan_complete = True
found = 1
break
else:
continue

if scan_complete:
break

if found == 0:
return 0, 0, 0, 0, 0

# find empty bottom rows
bottom = 0
scan_complete = False
for y in range(rows - 1, -1, -1):

Expand All @@ -2549,7 +2561,6 @@ def _crop(data, values):
break

# find empty left cols
left = 0
scan_complete = False
for x in range(cols):

Expand All @@ -2571,7 +2582,6 @@ def _crop(data, values):
break

# find empty right cols
right = 0
scan_complete = False
for x in range(cols - 1, -1, -1):
if scan_complete:
Expand All @@ -2589,11 +2599,20 @@ def _crop(data, values):
if scan_complete:
break

return top, bottom, left, right
return top, bottom, left, right, found


def _crop_bounds_dask(data, target_values):
"""Find crop bounds using lazy dask reductions (O(rows+cols) memory)."""
"""Find crop bounds using lazy dask reductions (O(rows+cols) memory).

Returns
-------
top, bottom, left, right, found : ints
``found`` is 1 if any cell in *data* matches one of *target_values*,
else 0. When ``found == 0`` the bounds are meaningless and the caller
must treat the result as an empty crop. Matches the contract of
:func:`_crop`.
"""
matched = da.zeros_like(data, dtype=bool)
for v in target_values:
matched = matched | (data == v)
Expand All @@ -2612,10 +2631,10 @@ def _crop_bounds_dask(data, target_values):
match_cols = np.where(np.asarray(col_mask))[0]

if len(match_rows) == 0 or len(match_cols) == 0:
return 0, data.shape[0] - 1, 0, data.shape[1] - 1
return 0, 0, 0, 0, 0

return (int(match_rows[0]), int(match_rows[-1]),
int(match_cols[0]), int(match_cols[-1]))
int(match_cols[0]), int(match_cols[-1]), 1)


def crop(
Expand Down Expand Up @@ -2666,6 +2685,9 @@ def crop(
Notes
-----
- This operation will change the output size of the raster.
- If none of the requested ``zone_ids`` are present in ``zones``, the
returned DataArray has shape ``(0, 0)``. This behaviour is the same
across all backends (numpy, cupy, dask+numpy, dask+cupy).

Examples
--------
Expand Down Expand Up @@ -2790,12 +2812,18 @@ def crop(

data = zones.data
if has_dask_array() and isinstance(data, da.Array):
top, bottom, left, right = _crop_bounds_dask(data, zone_ids)
top, bottom, left, right, found = _crop_bounds_dask(data, zone_ids)
else:
if is_cupy_array(data):
data = data.get()
top, bottom, left, right = _crop(data, np.asarray(zone_ids))
top, bottom, left, right, found = _crop(data, np.asarray(zone_ids))

arr = values[top: bottom + 1, left: right + 1]
if not found:
# No requested zone exists in `zones`; return an empty (0, 0) slice
# so all backends agree (see GH #2561). Slicing with `0:0` preserves
# the underlying array type (numpy/cupy/dask) and the dim names.
arr = values[0:0, 0:0]
else:
arr = values[top: bottom + 1, left: right + 1]
arr.name = name
return arr
Loading