Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,20 @@

### Unreleased

#### Fixed

- `polygonize(return_type='geopandas')` now auto-detects the raster's
affine transform from `attrs['transform']` (xrspatial.geotiff
convention) or `rio.transform()` (rioxarray) when the caller did
not pass an explicit `transform=` argument. Previously the function
auto-detected `attrs['crs']` and stamped it on the `GeoDataFrame`
while leaving the geometries in pixel space, so the CRS attribute
claimed projected space but the coordinates did not match. Callers
who relied on the pre-fix behaviour and expected pixel-space
geometries from a rasterio-loaded raster should pass `transform=
np.array([1, 0, 0, 0, 1, 0])` explicitly, or strip the transform
attr before calling polygonize. (#2536)


### Version 0.10.0 - 2026-05-27

Expand Down
80 changes: 78 additions & 2 deletions xrspatial/polygonize.py
Original file line number Diff line number Diff line change
Expand Up @@ -599,6 +599,62 @@ def _detect_raster_crs(raster: xr.DataArray):
return None


def _detect_raster_transform(raster: xr.DataArray):
"""Detect the affine transform of an input raster.

Returns a rasterio-ordered 6-tuple ``(pixel_width, 0.0, origin_x,
0.0, pixel_height, origin_y)`` -- the same layout that
``_transform_points`` consumes -- or ``None`` if no transform is
available.

Resolution order, parallel to ``_detect_raster_crs``:

1. ``raster.attrs['transform']`` (xrspatial.geotiff convention, an
already rasterio-ordered 6-tuple).
2. ``raster.rio.transform()`` (rioxarray, if installed). Returns an
``affine.Affine``; iterating it yields the rasterio-ordered 6
coefficients ``(a, b, c, d, e, f)``.
3. ``None``.

A raster carrying ``attrs['_xrspatial_no_georef']=True`` (the
xrspatial.geotiff "no georeference" marker) is treated as having
no transform even if ``rio.transform()`` is present, because that
marker explicitly opts out of georeferencing.
"""
# Honour the xrspatial.geotiff "no georeference" marker if set.
if raster.attrs.get('_xrspatial_no_georef'):
return None

transform_attr = raster.attrs.get('transform')
if transform_attr is not None:
try:
t = tuple(float(v) for v in transform_attr)
except (TypeError, ValueError):
t = None
if t is not None and len(t) == 6:
return t

try:
rio_transform = raster.rio.transform()
if rio_transform is not None:
# rasterio's Affine iterates as (a, b, c, d, e, f); the
# first 6 coefficients are the rasterio-ordered tuple. The
# 7th-9th elements are the (0, 0, 1) projective row, which
# _transform_points does not need.
t = tuple(float(v) for v in tuple(rio_transform)[:6])
# rioxarray returns the identity affine when no transform
# is available, which would silently shift outputs by (0,0)
# and look identical to "no transform". Treat the identity
# the same as None so callers get an unambiguous answer.
if t == (1.0, 0.0, 0.0, 0.0, 1.0, 0.0):
return None
return t
except Exception:
pass

return None


def _to_geopandas(
column: List[Union[int, float]],
polygon_points: List[np.ndarray],
Expand Down Expand Up @@ -1890,6 +1946,18 @@ def polygonize(
value is dropped rather than raised. The ``spatialpandas`` and
``geojson`` return types do not carry CRS metadata: spatialpandas
has no CRS slot, and GeoJSON (RFC 7946) is WGS84 only.

When ``transform`` is not supplied explicitly, the raster's affine
transform is auto-detected in this order: ``raster.attrs['transform']``
(xrspatial.geotiff convention, a rasterio-ordered 6-tuple), then
``raster.rio.transform()`` (if rioxarray is installed). An explicit
``transform=`` argument always overrides the auto-detected value.
Auto-detection is skipped when the raster carries
``attrs['_xrspatial_no_georef']=True``. This applies to all return
types -- the geometries themselves are transformed, so the
coordinates emitted in the "numpy", "awkward", "spatialpandas" and
"geojson" outputs are also in CRS coordinate space, not pixel
space, when the raster carries a transform.
"""
_validate_raster(raster, func_name='polygonize', name='raster', ndim=2)
if raster.shape[0] < 1 or raster.shape[1] < 1:
Expand Down Expand Up @@ -1917,9 +1985,17 @@ def polygonize(
f"connectivity must be either 4 or 8, not {connectivity}")
connectivity_8 = (connectivity == 8)

# Check transform.
# Check transform. When the caller did not pass an explicit
# transform, fall back to one carried on the raster (attrs[
# 'transform'] or rio.transform()). This keeps the
# ``return_type='geopandas'`` output consistent with the
# auto-detected CRS: pre-#2536, the GeoDataFrame got the raster's
# CRS but stayed in pixel space, so the metadata lied about the
# data. See _detect_raster_transform() for the resolution order.
if transform is None:
transform = _detect_raster_transform(raster)
if transform is not None:
transform = np.asarray(transform)
transform = np.asarray(transform, dtype=np.float64)
if len(transform) != 6:
raise ValueError(
f"Incorrect transform length of {len(transform)} instead of 6")
Expand Down
227 changes: 227 additions & 0 deletions xrspatial/tests/test_polygonize.py
Original file line number Diff line number Diff line change
Expand Up @@ -1364,6 +1364,233 @@ def test_no_crs_attr_simplify_still_works(self):
assert df.crs.to_epsg() == 4326


# --- transform attr propagation tests (issue #2536) ---
#
# polygonize used to propagate attrs['crs'] but ignore attrs['transform'].
# The result was a GeoDataFrame whose .crs claimed projected space while
# the geometries were in pixel space (silent misalignment). The fix auto-
# detects attrs['transform'] (or rio.transform()) when no explicit
# transform= is passed, mirroring the CRS auto-detection.

@pytest.mark.skipif(gpd is None, reason="geopandas not installed")
class TestPolygonizeTransformPropagation:
"""polygonize auto-detects attrs['transform'] when not given one (#2536)."""

# rasterio-ordered transform: 10 m pixels, origin at (1e6, 5e6).
# _transform_points consumes this as
# x = a*col + b*row + c, y = d*col + e*row + f.
_TRANSFORM = (10.0, 0.0, 1_000_000.0, 0.0, -10.0, 5_000_000.0)

@staticmethod
def _raster(**attrs):
data = np.array([[1, 1, 2], [1, 1, 2], [2, 2, 2]], dtype=np.int32)
return xr.DataArray(
data,
dims=('y', 'x'),
coords={'y': np.arange(3), 'x': np.arange(3)},
attrs=attrs,
)

def test_transform_attr_auto_detected_numpy(self):
"""attrs['transform'] is applied even when transform= is omitted."""
raster = self._raster(transform=self._TRANSFORM)
_, polys = polygonize(raster, return_type='numpy')
# Polygons should span the CRS origin offset, not pixel space.
all_xs = np.concatenate([p[0][:, 0] for p in polys])
all_ys = np.concatenate([p[0][:, 1] for p in polys])
assert all_xs.min() >= 1_000_000.0 - 1e-6
assert all_xs.max() <= 1_000_000.0 + 30.0 + 1e-6
# pixel_height is negative so y decreases from origin.
assert all_ys.max() <= 5_000_000.0 + 1e-6
assert all_ys.min() >= 5_000_000.0 - 30.0 - 1e-6

def test_transform_attr_auto_detected_geopandas(self):
"""The geopandas path applies the auto-detected transform."""
raster = self._raster(crs='EPSG:3857', transform=self._TRANSFORM)
df = polygonize(raster, return_type='geopandas')
assert df.crs is not None and df.crs.to_epsg() == 3857
bounds = df.geometry.total_bounds # [minx, miny, maxx, maxy]
assert bounds[0] >= 1_000_000.0 - 1e-6
assert bounds[2] <= 1_000_000.0 + 30.0 + 1e-6
assert bounds[1] >= 5_000_000.0 - 30.0 - 1e-6
assert bounds[3] <= 5_000_000.0 + 1e-6

def test_explicit_transform_overrides_attr(self):
"""An explicit transform= wins over attrs['transform']."""
raster = self._raster(transform=self._TRANSFORM)
# Identity-ish: pixel space, no offset. Output must be in 0..3.
identity = np.array([1.0, 0.0, 0.0, 0.0, 1.0, 0.0])
_, polys = polygonize(raster, transform=identity,
return_type='numpy')
all_xs = np.concatenate([p[0][:, 0] for p in polys])
all_ys = np.concatenate([p[0][:, 1] for p in polys])
assert all_xs.max() <= 3.0 + 1e-6
assert all_ys.max() <= 3.0 + 1e-6

def test_no_transform_info_yields_pixel_coords(self):
"""A raster with no transform info keeps pixel-space coords."""
raster = self._raster() # no attrs
_, polys = polygonize(raster, return_type='numpy')
all_xs = np.concatenate([p[0][:, 0] for p in polys])
all_ys = np.concatenate([p[0][:, 1] for p in polys])
assert all_xs.max() <= 3.0 + 1e-6
assert all_ys.max() <= 3.0 + 1e-6

def test_no_georef_marker_suppresses_auto_detect(self):
"""attrs['_xrspatial_no_georef']=True opts out of auto-detect even
when attrs['transform'] is also set (defensive: should not happen
in practice, but a malformed dict must not silently mis-transform).

Note: ``xrspatial.geotiff`` never produces a raster with both attrs
set simultaneously -- see ``xrspatial/geotiff/_attrs.py:782-785``,
which only writes ``_xrspatial_no_georef=True`` when
``has_georef=False`` and in that branch does NOT write
``attrs['transform']``. This test guards against a hand-built or
future-third-party attrs dict that violates that invariant."""
raster = self._raster(
transform=self._TRANSFORM, _xrspatial_no_georef=True)
_, polys = polygonize(raster, return_type='numpy')
all_xs = np.concatenate([p[0][:, 0] for p in polys])
assert all_xs.max() <= 3.0 + 1e-6

def test_transform_attr_list_form_auto_detected(self):
"""attrs['transform'] stored as a list (not a tuple) still works."""
raster = self._raster(transform=list(self._TRANSFORM))
_, polys = polygonize(raster, return_type='numpy')
all_xs = np.concatenate([p[0][:, 0] for p in polys])
assert all_xs.min() >= 1_000_000.0 - 1e-6

def test_transform_attr_invalid_length_falls_back(self):
"""A malformed (wrong-length) attrs['transform'] is silently
ignored, parallel to how an unparseable attrs['crs'] is dropped
rather than raised. The user did not opt in to that transform
explicitly, so falling back to pixel coords is friendlier than
breaking the call."""
raster = self._raster(transform=(1.0, 0.0, 0.0)) # only 3 elements
_, polys = polygonize(raster, return_type='numpy')
# Falls back to pixel-space coords.
all_xs = np.concatenate([p[0][:, 0] for p in polys])
assert all_xs.max() <= 3.0 + 1e-6

def test_explicit_transform_invalid_length_still_raises(self):
"""An EXPLICIT transform= of wrong length must still raise --
only the attrs path is silent on malformed input."""
raster = self._raster()
with pytest.raises(ValueError, match="Incorrect transform length"):
polygonize(raster, transform=(1.0, 0.0, 0.0), return_type='numpy')

def test_transform_attr_unparseable_falls_back(self):
"""A non-numeric attrs['transform'] is ignored, not raised."""
raster = self._raster(transform=("not", "a", "transform"))
# Falls back to pixel coords; no exception.
_, polys = polygonize(raster, return_type='numpy')
all_xs = np.concatenate([p[0][:, 0] for p in polys])
assert all_xs.max() <= 3.0 + 1e-6

def test_simplify_works_with_auto_detected_transform(self):
"""simplify_tolerance still works after auto-detected transform."""
# Larger raster to leave room for simplification.
data = np.zeros((10, 10), dtype=np.int32)
data[2:8, 2:8] = 1
raster = xr.DataArray(
data,
dims=('y', 'x'),
attrs={'crs': 'EPSG:3857', 'transform': self._TRANSFORM},
)
df = polygonize(
raster, return_type='geopandas', simplify_tolerance=1.0)
assert df.crs.to_epsg() == 3857
# Should land near the projected origin.
assert df.geometry.total_bounds[0] >= 1_000_000.0 - 1e-6

@pytest.mark.skipif(da is None, reason="dask not installed")
def test_transform_attr_auto_detected_dask(self):
"""Dask backend honours auto-detected attrs['transform']."""
data = np.array([[1, 1, 2], [1, 1, 2], [2, 2, 2]], dtype=np.int32)
dask_data = da.from_array(data, chunks=(2, 2))
raster = xr.DataArray(
dask_data,
dims=('y', 'x'),
coords={'y': np.arange(3), 'x': np.arange(3)},
attrs={'transform': self._TRANSFORM},
)
_, polys = polygonize(raster, return_type='numpy')
all_xs = np.concatenate([p[0][:, 0] for p in polys])
all_ys = np.concatenate([p[0][:, 1] for p in polys])
assert all_xs.min() >= 1_000_000.0 - 1e-6
assert all_ys.max() <= 5_000_000.0 + 1e-6

@pytest.mark.parametrize("connectivity", [4, 8])
def test_transform_attr_works_with_both_connectivities(self, connectivity):
"""Auto-detected transform applies for connectivity=4 and 8 alike.

The transform is applied inside ``_scan`` after region tracing, so
the connectivity choice should not interact with the transform
path. Parametrize to catch any future change that decouples one
connectivity from the transform call site.
"""
raster = self._raster(transform=self._TRANSFORM)
_, polys = polygonize(
raster, connectivity=connectivity, return_type='numpy')
all_xs = np.concatenate([p[0][:, 0] for p in polys])
all_ys = np.concatenate([p[0][:, 1] for p in polys])
assert all_xs.min() >= 1_000_000.0 - 1e-6
assert all_xs.max() <= 1_000_000.0 + 30.0 + 1e-6
assert all_ys.max() <= 5_000_000.0 + 1e-6
assert all_ys.min() >= 5_000_000.0 - 30.0 - 1e-6

def test_transform_attr_with_mask(self):
"""Auto-detected transform composes with a mask= argument.

The mask filters which pixels participate in polygonization but
does not touch the transform path; the surviving polygons should
still emerge in CRS space.
"""
raster = self._raster(transform=self._TRANSFORM)
# Mask out the right column (col index 2) so only the value-1
# region in cols 0-1 remains.
mask_data = np.array(
[[True, True, False],
[True, True, False],
[False, False, False]], dtype=bool)
mask = xr.DataArray(mask_data, dims=('y', 'x'))
df = polygonize(raster, mask=mask, return_type='geopandas')
assert len(df) == 1 # only the value-1 polygon remains
# The surviving polygon should land at the CRS origin, not pixel 0.
bounds = df.geometry.total_bounds
assert bounds[0] >= 1_000_000.0 - 1e-6
assert bounds[2] <= 1_000_000.0 + 20.0 + 1e-6
assert bounds[3] <= 5_000_000.0 + 1e-6

def test_rio_transform_auto_detected(self):
"""rio.transform() is used when attrs['transform'] is absent.

rioxarray derives ``rio.transform()`` from the DataArray's x/y
coords when they are present, so the test sets coords that
encode the desired transform rather than calling
``rio.write_transform`` (whose stamped value is overridden by
any existing coords).
"""
pytest.importorskip("rioxarray")
# 10 m pixels, origin at (1e6, 5e6), pixel_height negative. The
# coord values are pixel-centre coordinates, so the implied
# upper-left corner is (1e6, 5e6).
data = np.array([[1, 1, 2], [1, 1, 2], [2, 2, 2]], dtype=np.int32)
xs = 1_000_000.0 + 10.0 * np.arange(3) + 5.0 # centres
ys = 5_000_000.0 - 10.0 * np.arange(3) - 5.0
raster = xr.DataArray(
data,
dims=('y', 'x'),
coords={'y': ys, 'x': xs},
)
# Sanity: rio derives the right transform from the coords.
derived = tuple(raster.rio.transform())[:6]
assert derived == self._TRANSFORM, derived
_, polys = polygonize(raster, return_type='numpy')
all_xs = np.concatenate([p[0][:, 0] for p in polys])
assert all_xs.min() >= 1_000_000.0 - 1e-6


# --- atol / rtol tolerance parameter tests (issue #2173) ---
#
# The float pathway in _calculate_regions groups adjacent pixels using an
Expand Down
Loading