From ec4596397cd3743296f0cf3058a09bc6e29a12d8 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Thu, 28 May 2026 08:26:51 -0700 Subject: [PATCH 1/2] zonal: make crop() return (0, 0) on all backends when no zone matches (#2561) The numpy/cupy `_crop` helper fell through its scan loops with inverted bounds when none of the requested zone_ids existed in the input, yielding an empty `(0, 0)` slice. The dask `_crop_bounds_dask` path explicitly returned full-raster bounds in the same situation, so the caller silently got back the entire input raster instead. Both helpers now return an extra `found` flag (1/0). `crop()` slices to `[0:0, 0:0]` when nothing matched, so all four backends agree on shape `(0, 0)`. Documented in the docstring Notes section. Added cross-backend regression tests covering the absent, partially present, and standard cases. --- xrspatial/tests/test_zonal.py | 89 +++++++++++++++++++++++++++++++++++ xrspatial/zonal.py | 58 +++++++++++++++++------ 2 files changed, 132 insertions(+), 15 deletions(-) diff --git a/xrspatial/tests/test_zonal.py b/xrspatial/tests/test_zonal.py index 2989f34b..69d41f16 100644 --- a/xrspatial/tests/test_zonal.py +++ b/xrspatial/tests/test_zonal.py @@ -1900,6 +1900,95 @@ def test_crop_missing_zone_ids_raises(): crop(raster, raster) +# --------------------------------------------------------------------------- +# Regression tests for #2561: crop() must return the same shape across all +# backends when the requested zone_ids are absent (previously the dask path +# returned the full input raster while numpy/cupy returned an empty (0, 0)). +# --------------------------------------------------------------------------- + +_CROP_ARR_2561 = np.array([[0, 4, 0, 3], + [0, 4, 4, 3], + [0, 1, 1, 3], + [0, 1, 1, 3], + [0, 0, 0, 0]], dtype=np.int64) + + +def _crop_backends_2561(): + backends = ['numpy'] + if has_dask_array(): + backends.append('dask+numpy') + if has_cuda_and_cupy(): + backends.append('cupy') + if has_dask_array(): + backends.append('dask+cupy') + return backends + + +@pytest.mark.parametrize("backend", _crop_backends_2561()) +def test_crop_absent_zone_ids_returns_empty_2561(backend): + """All backends must return shape (0, 0) when no requested zone exists.""" + zones = create_test_raster(_CROP_ARR_2561, backend=backend) + values = create_test_raster(_CROP_ARR_2561, backend=backend) + + # zone_id 99 is not present anywhere in the raster. + result = crop(zones, values, zone_ids=(99,)) + + # Output array type must match input type. + assert isinstance(result.data, type(zones.data)) + assert result.shape == (0, 0) + + +@pytest.mark.parametrize("backend", _crop_backends_2561()) +def test_crop_partial_zone_ids_2561(backend): + """Mix of present and absent zone_ids crops to the present ones only.""" + zones = create_test_raster(_CROP_ARR_2561, backend=backend) + values = create_test_raster(_CROP_ARR_2561, backend=backend) + + # Only zone_id 1 is actually present here. 77 and 88 are absent. + result_partial = crop(zones, values, zone_ids=(77, 1, 88)) + result_present_only = crop(zones, values, zone_ids=(1,)) + + assert result_partial.shape == result_present_only.shape + + if backend.startswith('dask'): + partial_np = result_partial.data.compute() + present_np = result_present_only.data.compute() + else: + partial_np = result_partial.data + present_np = result_present_only.data + + if 'cupy' in backend: + partial_np = partial_np.get() + present_np = present_np.get() + + np.testing.assert_array_equal(partial_np, present_np) + + +@pytest.mark.parametrize("backend", _crop_backends_2561()) +def test_crop_happy_path_matches_numpy_2561(backend): + """All backends agree on the standard crop result.""" + zones = create_test_raster(_CROP_ARR_2561, backend=backend) + values = create_test_raster(_CROP_ARR_2561, backend=backend) + + result = crop(zones, values, zone_ids=(1, 3)) + assert result.shape == (4, 3) + + expected = np.array([[4, 0, 3], + [4, 4, 3], + [1, 1, 3], + [1, 1, 3]], dtype=np.int64) + + if backend.startswith('dask'): + result_np = result.data.compute() + else: + result_np = result.data + + if 'cupy' in backend: + result_np = result_np.get() + + np.testing.assert_array_equal(result_np, expected) + + # --------------------------------------------------------------------------- # Regression tests for #2528: dask backend must not silently drop 'majority' # from the requested stats list. diff --git a/xrspatial/zonal.py b/xrspatial/zonal.py index 714d052c..7823dad4 100644 --- a/xrspatial/zonal.py +++ b/xrspatial/zonal.py @@ -2496,16 +2496,25 @@ def trim( @ngjit def _crop(data, values): + """Scan-based crop bounds. + + Returns + ------- + top, bottom, left, right, found : ints + ``found`` is 1 if any cell in *data* matches one of *values*, else 0. + When ``found == 0`` the bounds are meaningless and the caller must + treat the result as an empty crop. + """ rows, cols = data.shape - top = -1 - bottom = -1 - left = -1 - right = -1 + top = 0 + bottom = 0 + left = 0 + right = 0 + found = 0 # find empty top rows - top = 0 scan_complete = False for y in range(rows): @@ -2519,6 +2528,7 @@ def _crop(data, values): for v in values: if v == val: scan_complete = True + found = 1 break else: continue @@ -2526,8 +2536,10 @@ def _crop(data, values): if scan_complete: break + if found == 0: + return 0, 0, 0, 0, 0 + # find empty bottom rows - bottom = 0 scan_complete = False for y in range(rows - 1, -1, -1): @@ -2549,7 +2561,6 @@ def _crop(data, values): break # find empty left cols - left = 0 scan_complete = False for x in range(cols): @@ -2571,7 +2582,6 @@ def _crop(data, values): break # find empty right cols - right = 0 scan_complete = False for x in range(cols - 1, -1, -1): if scan_complete: @@ -2589,11 +2599,20 @@ def _crop(data, values): if scan_complete: break - return top, bottom, left, right + return top, bottom, left, right, 1 def _crop_bounds_dask(data, target_values): - """Find crop bounds using lazy dask reductions (O(rows+cols) memory).""" + """Find crop bounds using lazy dask reductions (O(rows+cols) memory). + + Returns + ------- + top, bottom, left, right, found : ints + ``found`` is 1 if any cell in *data* matches one of *target_values*, + else 0. When ``found == 0`` the bounds are meaningless and the caller + must treat the result as an empty crop. Matches the contract of + :func:`_crop`. + """ matched = da.zeros_like(data, dtype=bool) for v in target_values: matched = matched | (data == v) @@ -2612,10 +2631,10 @@ def _crop_bounds_dask(data, target_values): match_cols = np.where(np.asarray(col_mask))[0] if len(match_rows) == 0 or len(match_cols) == 0: - return 0, data.shape[0] - 1, 0, data.shape[1] - 1 + return 0, 0, 0, 0, 0 return (int(match_rows[0]), int(match_rows[-1]), - int(match_cols[0]), int(match_cols[-1])) + int(match_cols[0]), int(match_cols[-1]), 1) def crop( @@ -2666,6 +2685,9 @@ def crop( Notes ----- - This operation will change the output size of the raster. + - If none of the requested ``zone_ids`` are present in ``zones``, the + returned DataArray has shape ``(0, 0)``. This behaviour is the same + across all backends (numpy, cupy, dask+numpy, dask+cupy). Examples -------- @@ -2790,12 +2812,18 @@ def crop( data = zones.data if has_dask_array() and isinstance(data, da.Array): - top, bottom, left, right = _crop_bounds_dask(data, zone_ids) + top, bottom, left, right, found = _crop_bounds_dask(data, zone_ids) else: if is_cupy_array(data): data = data.get() - top, bottom, left, right = _crop(data, np.asarray(zone_ids)) + top, bottom, left, right, found = _crop(data, np.asarray(zone_ids)) - arr = values[top: bottom + 1, left: right + 1] + if not found: + # No requested zone exists in `zones`; return an empty (0, 0) slice + # so all backends agree (see GH #2561). Slicing with `0:0` preserves + # the underlying array type (numpy/cupy/dask) and the dim names. + arr = values[0:0, 0:0] + else: + arr = values[top: bottom + 1, left: right + 1] arr.name = name return arr From ca1cae2296e58228773ee3b12326eb034f6ea2f2 Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Thu, 28 May 2026 08:29:05 -0700 Subject: [PATCH 2/2] Address review: pin expected shape and return `found` variable (#2561) - test_crop_partial_zone_ids_2561: pin `result_partial.shape == (2, 2)` so the regression bar is explicit and the test cannot pass by drifting both code paths the same way. - _crop: return the `found` local instead of the literal `1`, matching the docstring. --- xrspatial/tests/test_zonal.py | 4 ++++ xrspatial/zonal.py | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/xrspatial/tests/test_zonal.py b/xrspatial/tests/test_zonal.py index 69d41f16..154f8b29 100644 --- a/xrspatial/tests/test_zonal.py +++ b/xrspatial/tests/test_zonal.py @@ -1948,6 +1948,10 @@ def test_crop_partial_zone_ids_2561(backend): result_partial = crop(zones, values, zone_ids=(77, 1, 88)) result_present_only = crop(zones, values, zone_ids=(1,)) + # Pin the expected bounding box of zone_id=1 in _CROP_ARR_2561 + # (rows 2-3, cols 1-2) so a regression cannot pass by drifting + # both code paths in the same direction. + assert result_partial.shape == (2, 2) assert result_partial.shape == result_present_only.shape if backend.startswith('dask'): diff --git a/xrspatial/zonal.py b/xrspatial/zonal.py index 7823dad4..f8b20b14 100644 --- a/xrspatial/zonal.py +++ b/xrspatial/zonal.py @@ -2599,7 +2599,7 @@ def _crop(data, values): if scan_complete: break - return top, bottom, left, right, 1 + return top, bottom, left, right, found def _crop_bounds_dask(data, target_values):