diff --git a/.claude/sweep-test-coverage-state.csv b/.claude/sweep-test-coverage-state.csv index b6ea17498..8868380ec 100644 --- a/.claude/sweep-test-coverage-state.csv +++ b/.claude/sweep-test-coverage-state.csv @@ -8,5 +8,6 @@ reproject,2026-05-10,,HIGH,1;4;5,"Added 39 tests: LiteCRS direct coverage, itrf_ Pass 17 (2026-05-18): added test_mask_nodata_gpu_vrt_2052.py closing Cat 1 HIGH backend-coverage gap on the mask_nodata= opt-out kwarg (#2052). The kwarg was added in #2052 and wired through the four public readers (open_geotiff, read_geotiff_gpu, read_geotiff_dask, read_vrt), but test_mask_nodata_kwarg_2052.py only exercised the eager-numpy and dask+numpy branches. The pure-GPU mask gating at _backends/gpu.py:709, the dask+GPU dispatcher forwarding at _backends/gpu.py:991, the eager VRT mask gating at _backends/vrt.py:320, and the chunked VRT graph builder at _backends/vrt.py:408/588 had zero direct coverage. 19 new tests, all passing on GPU host: GPU eager + dask+GPU mask_nodata=False preserves uint16, GPU defaults still promote to float64, dispatcher thread-through for open_geotiff(gpu=True, mask_nodata=False) and open_geotiff(gpu=True, chunks=N, mask_nodata=False), VRT eager and chunked branches mirror, cross-backend parity (eager vs dask, eager vs GPU, eager vs dask+GPU, eager vs VRT) bit-exact under mask_nodata=False, direct read_geotiff_dask entry-point coverage. Fixture uses tiled+deflate compression so the pure nvCOMP decode path is exercised, not the CPU-fallback piggyback path. Mutation against gpu.py:709 (force mask_nodata=True) flipped 4 GPU tests red; mutation against vrt.py eager mask gate flipped 4 VRT tests red. Cat 1 HIGH (backend coverage on mask_nodata=False for GPU, dask+GPU, VRT eager, VRT chunked). Pass 16 (2026-05-15): added test_max_cloud_bytes_dispatcher_silent_drop_2026_05_15.py closing Cat 4 HIGH parameter-coverage gap on the open_geotiff dispatcher's max_cloud_bytes kwarg. The kwarg was added in #1928 (eager fsspec budget) and re-ordered into the canonical reader signature by #1957, but open_geotiff only forwards it to _read_to_array on the eager non-VRT branch (__init__.py:431). The GPU branch at line 410, the dask branch at line 422, and the VRT branch at line 362 never reference the kwarg, so open_geotiff(p, max_cloud_bytes=8, gpu=True) / open_geotiff(p, max_cloud_bytes=8, chunks=N) / open_geotiff(vrt, max_cloud_bytes=8) all silently drop the budget. Same class of dispatcher-silently-drops-backend-kwarg bug fixed by #1561 / #1605 / #1685 / #1810 for other kwargs; the two sibling kwargs on_gpu_failure (line 339) and missing_sources (line 355) already raise ValueError when used on a path where they do not apply. 11 tests: 4 xfail(strict=True) pinning the fix surface (gpu, dask, vrt, dask+gpu), 3 passing pins on the current silent-drop behaviour so the fix is visible as a diff, 4 positive pins that the eager local + file-like paths accept the kwarg (docstring no-op contract). Filed issue #1974 for the dispatcher fix (sweep is test-only). Cat 4 HIGH (silent backend-kwarg drop). Pass 15 (2026-05-15): added test_write_vrt_bool_nodata_1921.py closing Cat 1 HIGH backend-parity gap on bool nodata rejection. Issue #1911 added the isinstance(nodata, (bool, np.bool_)) -> TypeError guard at to_geotiff and build_geo_tags, but the sibling writers were left unchecked: write_vrt(nodata=True) silently emits True into the VRT XML (str(True) drops the sentinel because no reader parses 'True' as numeric); write_geotiff_gpu direct call relies on the build_geo_tags defense-in-depth rather than an entry-point check, so a future refactor moving that guard would regress the GPU writer with no test coverage. 17 new tests: 4 xfail (strict=True) pinning the write_vrt fix surface (issue #1921), 1 passing pin on the current buggy str(True) emission so the fix is visible as a diff, 6 numeric/None happy-path tests on write_vrt, 4 GPU writer direct-call bool-reject tests (4 dtypes x 1 call), 1 to_geotiff(gpu=True) dispatcher thread-through. Filed issue #1921 for the write_vrt fix (sweep is test-only). Cat 1 HIGH (write_vrt backend parity bug) + Cat 1 MEDIUM (write_geotiff_gpu defense-in-depth pin). Pass 14 (2026-05-15): added test_dask_streaming_write_degenerate_2026_05_15.py closing Cat 3 HIGH and Cat 2 HIGH/MEDIUM gaps on the dask streaming write path (to_geotiff with dask-backed DataArray, #1084). test_streaming_write.py covered 100x100 with a NaN block plus a 2x2 small raster but had nothing 1-pixel-row, 1-pixel-column, all-NaN, all-Inf, or +/-Inf-mixed. The streaming tile-row segmenter (#1485) on a 1-pixel-tall raster and the streaming nodata-mask coercion on an all-NaN chunk were reachable only with a dask input and had no direct coverage; a regression on either would not surface from the eager numpy path or the write_geotiff_gpu path (pass 5 covered the GPU writer's degenerate shapes). 16 new tests, all passing: 1x1 chunk-matches-shape + nodata-attr round-trip + uint16, 1xN single chunk + chunks-split-columns + wide-segmented-by-buffer (#1485 streaming_buffer_bytes=1 forces the segmenter), Nx1 single chunk + chunks-split-rows, all-NaN with finite sentinel + all-NaN without sentinel, mixed NaN/+Inf/-Inf preserving Inf bit-exact + sentinel masking NaN only, all-+Inf and all--Inf, predictor=3 (float predictor) round-trip on float32 + float64 plus int-dtype ValueError. predictor=3 streaming coverage extends the small-chunk and int-rejection geometry around test_predictor_fp_write_1313.test_predictor3_streaming_dask (which already covers a 128x192 predictor=3 dask streaming write with a Predictor-tag assertion). Cat 3 HIGH (1x1/1xN/Nx1) + Cat 2 HIGH (all-NaN with sentinel) + Cat 2 MEDIUM (mixed-Inf, all-Inf) + Cat 4 MEDIUM (predictor=3 streaming). Pass 13 (2026-05-13): added test_size_param_validation_gpu_vrt_1776.py closing Cat 4 HIGH parameter-coverage gap on size-arg validation. Issue #1752 added tile_size validation to to_geotiff and chunks validation to read_geotiff_dask, but the matching kwargs on three sibling entry points were left unchecked: write_geotiff_gpu(tile_size=) raised ZeroDivisionError for 0, struct.error for -1, TypeError for 256.0; read_geotiff_gpu(chunks=) and read_vrt(chunks=) raised ZeroDivisionError for 0 and silently accepted negative values. Factored two shared validators (_validate_tile_size_arg, _validate_chunks_arg) and called them up front from each entry point. 34 new tests, all passing on GPU host: tile_size matrix on write_geotiff_gpu (0/-1/256.0/True/False/positive/np.int64), chunks matrix on read_geotiff_gpu and read_vrt (0/-1/(0,N)/(N,-1)/wrong-length/bool/non-int/(N,float)/positive/np.int64), dispatcher thread-through tests (open_geotiff(gpu=True, chunks=0), to_geotiff(gpu=True, tile_size=0)). Pre-existing 13 #1752 tests still pass after refactor. Filed issue #1776. Pass 12 (2026-05-12): added test_gpu_writer_overview_mode_and_compression_level_1740.py closing Cat 4 HIGH and Cat 4 MEDIUM parameter-coverage gaps. (1) write_geotiff_gpu(overview_resampling='mode') and the dedicated _block_reduce_2d_gpu mode-fallback branch (_gpu_decode.py:3051-3056) had zero direct tests; six of the seven overview_resampling modes were covered (mean/nearest by test_features, min/max/median by pass 6, cubic by test_signature_parity_1631) but mode was the odd one out -- a regression dropping the mode dispatch from _block_reduce_2d_gpu would fall through to the mean reshape branch and emit wrong overview pixels for integer rasters. (2) write_geotiff_gpu(compression_level=) documented as accepted-but-ignored had no test; the CPU writer rejects out-of-range levels with ValueError, the GPU writer is documented not to -- a regression wiring the GPU writer up to the CPU range validator would silently break every to_geotiff(gpu=True, compression_level=X) caller for in-range levels and noisily for out-of-range. 19 tests, all passing on GPU host: _block_reduce_2d_gpu(method='mode') CPU-parity on 4x4 deterministic + random 8x8 + dtype-preserved across u8/u16/i16/i32, write_geotiff_gpu(cog=True, overview_resampling='mode') end-to-end round trip, to_geotiff(gpu=True, ..., overview_resampling='mode') dispatcher thread-through, GPU-vs-CPU pixel parity on 8x8 input, write_geotiff_gpu(compression_level=) in-range matrix on zstd/deflate, out-of-range matrix (zstd=999/-5, deflate=50/0) accepted without raising + round-trip preserved, to_geotiff(gpu=True, compression_level=999) dispatcher thread-through, companion CPU rejects-OOR pin to lock the asymmetry. Mutation against the mode branch (drop the 'if method == mode' block in _block_reduce_2d_gpu) flipped 9 mode tests red. Filed issue #1740. Pass 11 (2026-05-12): added test_gpu_writer_cpu_fallback_codecs_2026_05_12.py closing a Cat 4 HIGH parameter-coverage gap on write_geotiff_gpu compression= modes for the CPU-fallback codecs (lzw, packbits, lz4, lerc, jpeg2000/j2k). Pass 7 (test_gpu_writer_compression_modes_2026_05_11) covered only none/deflate/zstd/jpeg; the remaining five codecs route through dedicated branches in gpu_compress_tiles (_gpu_decode.py:2974-3019) with CPU fallbacks (lerc_compress, jpeg2000_compress, cpu_compress) that had zero direct tests via write_geotiff_gpu. A regression in routing/tag-wiring/fallback dispatch would ship silently because the internal reader uses the same compression-tag table. 17 tests, all passing on GPU host: lzw/packbits/lz4 round-trip + compression-tag pin on uint16, lerc lossless float32 + uint16 round-trip + tag pin, jpeg2000 uint8 single-band + RGB multi-band lossless round-trip + j2k-alias parity + tag pin, GPU-vs-CPU writer pixel parity for lzw/packbits, to_geotiff(gpu=True, compression=lzw/packbits) dispatcher thread-through. Mutation against compression dispatch (swap lzw bytes to zstd; swap lerc bytes to deflate) flipped round-trip tests red. Filed issue #1706. Pass 10 (2026-05-12): added test_kwarg_behaviour_2026_05_12_v2.py closing two Cat 4 HIGH parameter-coverage gaps. (1) write_geotiff_gpu(predictor=True/2/3) had zero direct tests; the GPU writer threads predictor= through normalize_predictor and gpu_compress_tiles into five CUDA encode kernels (_predictor_encode_kernel_u8/u16/u32/u64 for predictor=2, _fp_predictor_encode_kernel for predictor=3) and a regression dropping the encode-kernel calls would ship corrupt files. (2) read_vrt(window=) had no behaviour tests (only a signature pin in test_signature_annotations_1654); the kwarg is documented and _vrt.read_vrt implements full windowed-read semantics (clip, multi-source overlap, src/dst scaling, GeoTransform origin shift on coords + attrs['transform']). 23 tests, all passing on GPU host: predictor=True/2 round-trips on u8/u16/i32 + 3-band RGB samples_per_pixel stride; predictor=3 lossless round-trip on f32 and f64; predictor=3 int-dtype ValueError (CPU/GPU parity); CPU/GPU pixel-exact parity for pred=2 u16 and pred=3 f32; read_vrt(window=) subregion + full + clamp-overflow + clamp-negative + 2x1 mosaic seam straddle + offset past seam + transform-attr origin shift + y/x coords half-pixel shift + window+band + window+chunks (dask) + window+gpu (cupy) + window+gpu+chunks (dask+cupy). Mutation against the encode dispatch flipped 7 predictor tests red. Filed issue #1690. Pass 9 (2026-05-12): added test_kwarg_behaviour_2026_05_12.py closing three Cat 4 MEDIUM parameter-coverage gaps plus one Cat 4 LOW error path. write_vrt documented kwargs (relative/crs_wkt/nodata) had a smoke-test pinning that the kwargs are accepted but no test verified the override *effect* -- a regression dropping the override branch and silently using the default-from-first-source would ship undetected. read_geotiff_gpu(dtype=) cast had zero direct tests; the eager path has TestDtypeEager and dask has TestDtypeDask but the GPU branch had no equivalent. write_geotiff_gpu(bigtiff=) threads through to _assemble_tiff(force_bigtiff=) but no test asserted the on-disk header byte switches; the CPU writer had it via test_features::test_force_bigtiff_via_public_api. write_vrt(source_files=[]) ValueError was uncovered. 26 tests, all passing on GPU host: write_vrt relative=True/False XML attribute + path inspection + parse-back round-trip, write_vrt crs_wkt= override distinct-from-default XML check, write_vrt nodata= override + default-from-source coverage, write_vrt([]) ValueError + no-file side effect, read_geotiff_gpu dtype= matrix (float64->float32, float64->float16, uint16->int32, uint16->uint8, float-to-int raise, dtype=None preserves native), open_geotiff(gpu=True, dtype=) dispatcher, read_geotiff_gpu(chunks=, dtype=) dask+GPU branch, write_geotiff_gpu bigtiff=True/False/None header verification, to_geotiff(gpu=True, bigtiff=True) dispatcher thread-through. Pass 8 (2026-05-11): added test_lz4_compression_level_2026_05_11.py closing Cat 4 MEDIUM parameter-coverage gap on compression='lz4' + compression_level=. _LEVEL_RANGES advertises lz4: (0, 16) but only deflate (1, 9) and zstd (1, 22) had direct level boundary + round-trip + reject tests. The range check is the gatekeeper -- lz4_compress silently accepts any int level -- so a regression dropping 'lz4' from _LEVEL_RANGES would ship undetected. 18 tests, all passing: round-trip at levels 0/1/9/16 (lossless), default-level no-arg path, higher-level-not-larger smoke check on compressible input, out-of-range reject at -1/-10/17/100 on eager path, valid-range message format pin (lz4 valid: 0-16), dask streaming round-trip at 0/1/8/16, dask streaming out-of-range reject at -1/17/50 (separate _LEVEL_RANGES call site). Pass 7 (2026-05-11): added test_gpu_writer_compression_modes_2026_05_11.py closing Cat 4 HIGH gap on write_geotiff_gpu compression= modes. The writer documents zstd (default, fastest GPU), deflate, jpeg, and none, but only deflate + none had round-trip tests; the default zstd and the jpeg (nvJPEG/Pillow) paths shipped without targeted coverage. 11 new tests, all passing on GPU host: zstd round-trip + default-codec pinning, jpeg round-trip on 3-band RGB uint8 + 1-band greyscale, TIFF compression-tag header check across none/deflate/zstd/jpeg, plain deflate + none round-trips outside the COG/sentinel paths, and a cross-codec lossless parity check (zstd/deflate/none agree pixel-exact). nvJPEG path was exercised live, not just the Pillow fallback. Pass 6 (2026-05-11): added test_overview_resampling_min_max_median_2026_05_11.py covering Cat 4 HIGH parameter-coverage gap on overview_resampling=min/max/median. CPU end-to-end paths were already covered by test_cog_overview_nodata_1613::test_cpu_cog_overview_aggregations_ignore_sentinel; the GPU end-to-end paths and the direct CPU+GPU block-reducer branches had no targeted tests, so a regression on those code paths would ship undetected. 26 tests, all passing on GPU host: block-reducer unit tests (finite + partial-NaN), end-to-end COG writes for both to_geotiff and write_geotiff_gpu, CPU/GPU parity for to_geotiff(gpu=True), CPU nodata-sentinel regression check, and ValueError error-path tests for unknown method names on both backends. Pass 5 (2026-05-11): added test_degenerate_shapes_backends_2026_05_11.py covering Cat 3 HIGH geometric gaps (1x1 / 1xN / Nx1 reads on dask+numpy, GPU, dask+cupy backends; 1x1 / 1xN / Nx1 writes through write_geotiff_gpu) and Cat 2 MEDIUM NaN/Inf gaps (all-NaN read on GPU + dask+cupy, Inf / -Inf reads on all non-eager backends, NaN sentinel mask on dask read path including sentinel block split across chunk boundary). 23 tests, all passing on GPU host. Prior passes still hold: pass 4 (r4) closed read_geotiff_gpu/dask name= + max_pixels= kwargs (Cat 4), pass 3 (r3) closed read_vrt GPU/dask+GPU backend dispatch (Cat 1) and dtype/name kwargs (Cat 4)." polygonize,2026-05-27,2537,MEDIUM,4,"Pass 2 (2026-05-27): added test_polygonize_atol_rtol_backend_coverage_2026_05_27.py with 15 tests, all passing on a CUDA host. Closes Cat 4 MEDIUM parameter-coverage gap on atol/rtol forwarding through the cupy and dask+cupy backends. atol/rtol were exposed by #2173 / #2194 and thread through _polygonize_cupy (polygonize.py:808) and _polygonize_dask (polygonize.py:1719); the dask path further plumbs them into dask.delayed(_polygonize_chunk)(...) at lines 1748-1754 and into _bucket_key_for_value for cross-chunk merge bucketing at lines 1757-1758. Pre-existing tests covered non-default atol/rtol only on numpy and dask+numpy. The cupy and dask+cupy dispatchers were untested -- a regression dropping the kwargs there would silently change the float polygon count and would not be caught. Same dispatcher-silently-drops-kwarg pattern fixed by #1561 / #1605 / #1685 / #1810 / #1974 on adjacent GeoTIFF surfaces. 15 tests: cupy strict-equality + default-tolerance pin on _REPRO_2173, dask+cupy strict-equality single-chunk + multi-chunk (engages cross-chunk merge bucket) + default-tolerance multi-chunk pin, cupy intermediate-atol small/large pair, dask+cupy intermediate-atol single/multi-chunk small + single-chunk large, cupy integer atol-ignored matrix, dask+cupy integer atol-ignored single-chunk + multi-chunk, cupy rtol-only large/small matrix. Mutation against _polygonize_cupy float branch (drop atol/rtol kwargs in the _polygonize_numpy forward call at polygonize.py:823-825) flips 3 of 5 cupy tests red; mutation against dask.delayed(_polygonize_chunk)(...) at polygonize.py:1748-1754 (drop atol, rtol args) flips 2 of 6 dask+cupy tests red. Confirmed clean restore via md5sum. Source untouched. Filed issue #2537 (test-only). Cat 4 MEDIUM (parameter coverage on cupy + dask+cupy atol/rtol forwarding). Pass 1 (2026-05-19): added test_polygonize_coverage_2026_05_19.py with 58 tests, all passing on a CUDA host. Closes Cat 3 HIGH 1x1 / Nx1 single-column geometric gaps (Nx1 exercises the nx==1 padding path at polygonize.py:565 and the cupy nx==1 numpy-fallback at polygonize.py:671), Cat 3 MEDIUM 1xN single-row and all-equal-value rasters on all four backends. Closes Cat 2 HIGH NaN parity for cupy + dask+cupy (numpy/dask were already covered by test_polygonize_nan_pixels_excluded*), Cat 2 MEDIUM all-NaN raster on all four backends, Cat 2 HIGH +/-Inf pins on all four backends. Filed source-bug issue #2155: numpy/dask/dask+cupy backends silently absorb Inf cells into adjacent finite polygons because _is_close reduces abs(inf-inf) to nan; cupy backend handles Inf correctly. Pins lock the asymmetric behaviour so the fix is visible. Closes Cat 1 MEDIUM simplify_tolerance + mask= parity gaps on dask+cupy backend (numpy/cupy/dask were already covered). Closes Cat 4 MEDIUM column_name non-default value across geopandas/spatialpandas/geojson return types and Cat 4 MEDIUM validation error paths (bad connectivity, bad transform length, mask shape mismatch, mask underlying-type mismatch). Cat 5 N/A: polygonize returns lists/dataframes, not a DataArray with attrs to propagate." rasterize,2026-05-21,2255,HIGH,1;2;3,"Pass 2 (2026-05-21): added test_rasterize_coverage_2026_05_21.py with 58 tests, all passing on a CUDA host. Closes Cat 2 HIGH +/-Inf and NaN burn-value gaps that pass-1 left untouched: pin +Inf / -Inf / Inf+(-Inf)/NaN polygon, point, and line burn behaviour across numpy / cupy / dask+numpy / dask+cupy, plus Inf+finite under sum stays Inf, Inf+(-Inf) under sum collapses to NaN, min(Inf, 1.0) and max(-Inf, 1.0) pick the finite value, and Inf-as-bound is rejected with the same ValueError as NaN-as-bound (pass-1 only tested the NaN-bound rejection). Closes Cat 1 MEDIUM nested GeometryCollection on all four backends: a GC inside a GC has no direct test today even though rasterize.py:1995 documents recursive unpacking, and the deeply-nested-3-levels eager test pins the recursion depth limit isn't 1 or 2. Closes Cat 1 MEDIUM columns= (multi-column) parity on cupy and dask+cupy (TestMultiColumn covered numpy/dask+numpy only); pin three columns of props on GPU so the (N, P) loop survives the kernel boundary. Closes Cat 3 LOW rectangular-pixel parity with resolution=(rx, ry) across backends. Filed source-bug issue #2255: GPU max/min merge silently suppresses NaN burn values -- CPU returns NaN (1.0 > NaN is False, keeps NaN); GPU returns 1.0 because the kernel inits the output buffer to -inf for max (or +inf for min) and atomicMax/Min is NaN-suppressing under IEEE device semantics. Pinned both the CPU NaN-propagating behaviour and the GPU NaN-suppressing behaviour as paired tests (test_nan_burn_overlaps_max_cpu_propagates vs test_nan_burn_overlaps_max_gpu_suppresses_nan, plus test_nan_burn_single_geom_max_gpu_returns_neg_inf for the single-write-on-GPU-returns-buffer-init case) so the divergence is visible in CI until the GPU kernels are aligned. Source untouched. Pass 1 (2026-05-17): added test_rasterize_coverage_2026_05_17.py with 34 tests, all passing on a CUDA host. Closes four documented public-API gaps left after the pass-0 audit. (1) Cat 3 HIGH 1x1 single-pixel raster -- test_rasterize.py covers 1xN strips and Nx1 strips but never width=1 AND height=1, so the polygon scanline / line Bresenham / point burn kernels all ship without the single-cell degenerate case; the new TestSinglePixelRaster class pins polygon/point/line on eager numpy plus polygon parity across cupy / dask+numpy / dask+cupy. (2) Cat 4 HIGH like= template-raster parameter is documented at rasterize.py:2038 and implemented by _extract_grid_from_like (line 1930) but no test exercises it; TestLikeParameter pins dtype/bounds/coords inheritance, the three override branches (dtype, bounds, width/height), the three validation branches (not-DataArray, 3D, wrong dim names) and like= on all four backends. Mutation against the like-dtype branch (rasterize.py:2183-2184) flipped the inheritance test red. (3) Cat 4 HIGH resolution= happy path -- only the oversize-rejection error path was tested (line 304); TestResolutionParameter pins the scalar branch, the tuple branch, the ceil-and-clamp-to-1 semantics, and resolution= on all four backends. (4) Cat 4 HIGH non-empty GeometryCollection unpacking is documented at rasterize.py:1995 and implemented by _classify_geometries_loop (line 228) but only the empty-GC case was tested (line 269); TestGeometryCollection pins polygon+point and polygon+line+point collections on eager numpy plus parity across cupy / dask+numpy / dask+cupy so the loop classifier's polygon/line/point sub-bucketing has direct coverage. Cat 1 MEDIUM gap closed: eager cupy all_touched=True parity vs eager numpy (TestEagerCupyAllTouched) -- the existing test only covered dask+cupy all_touched, leaving the direct GPU all_touched kernel untested. Cat 2 MEDIUM gap closed: int32 dtype with default NaN fill silently casts to the int32-min sentinel (TestIntegerDtypeNanFill) -- pin the cast so any future ValueError-raises switch is visible as a code-review diff. Pre-existing 143 passing + 2 skipped tests in test_rasterize.py untouched." +reproject,2026-05-27,,MEDIUM,1,"Pass 2 (2026-05-27): added test_reproject_coverage_2026_05_27.py with 10 tests, all passing on a CUDA host. Closes Cat 1 MEDIUM backend-coverage gaps left after pass 1: (a) bounds_policy=#2187 had numpy + dask+numpy coverage but no cupy / dask+cupy tests -- a regression dropping the kwarg from the GPU dispatchers would ship undetected; TestBoundsPolicyCupy and TestBoundsPolicyDaskCupy pin raw/clamp/bogus on both GPU backends and assert clamp-grid parity with numpy. (b) test_reproject_handles_inf_input only covered eager numpy; the dask, cupy, and dask+cupy chunk workers each ship their own bilinear/cubic resampler so a regression raising on +/-Inf in any one backend would not surface from the existing test. Four new tests close the matrix (dask+numpy, cupy, dask+cupy with scattered +/-Inf cells; cupy with all-Inf raster checking no spurious finite cells appear). Note carried forward from pass 1: _merge_arrays_cupy is imported but unused -- no cupy merge dispatch in merge(); feature gap not test gap. Added 39 tests: LiteCRS direct coverage, itrf_transform behaviour/roundtrip/array, itrf_frames, geoid_height numerical correctness + raster happy-path, vertical helpers (ellipsoidal<->orthometric/depth), reproject() lat/lon and latitude/longitude dim propagation. Note: _merge_arrays_cupy is imported but unused (no cupy merge dispatch in merge()); flagged as feature gap not test gap." reproject,2026-05-10,,HIGH,1;4;5,"Added 39 tests: LiteCRS direct coverage, itrf_transform behaviour/roundtrip/array, itrf_frames, geoid_height numerical correctness + raster happy-path, vertical helpers (ellipsoidal<->orthometric/depth), reproject() lat/lon and latitude/longitude dim propagation. Note: _merge_arrays_cupy is imported but unused (no cupy merge dispatch in merge()); flagged as feature gap not test gap." zonal,2026-05-27,,HIGH,1;3;4;5,"Pass 1 (2026-05-27): added test_zonal_backend_coverage_2026_05_27.py with 32 tests, all passing on a CUDA host. Closes Cat 1 HIGH backend-coverage gaps: crosstab cupy + dask+cupy (_crosstab_cupy / _crosstab_dask_cupy were dispatched but never invoked by tests), regions cupy + dask+cupy (_regions_cupy via cupyx.scipy.ndimage + _regions_dask_cupy), trim dask+numpy + cupy + dask+cupy (_trim_bounds_dask isnan path and cupy data.get() path), crop dask+numpy + cupy + dask+cupy (_crop_bounds_dask + cupy data.get() path), apply 3D cupy + dask+cupy (per-layer kernel launch over the third axis in _apply_cupy and _apply_dask_cupy). Existing test_zonal.py covered only numpy + dask+numpy for crosstab/regions/trim/crop and 2D-only for cupy apply. Closes Cat 3 MEDIUM 1x1 / 1xN / Nx1 strip edge cases for trim, crop, and regions. Closes Cat 4 LOW pins: regions(neighborhood=6) ValueError, suggest_zonal_canvas(crs='Geographic') aspect-ratio pin and invalid-crs KeyError, crosstab cupy zone_ids/cat_ids filter, crosstab cupy agg='percentage'. Closes Cat 5 MEDIUM: regions coords/attrs propagation across numpy + dask+numpy, trim/crop name='trim'/'crop' default + attrs preservation. Also pins the documented numpy-vs-dask trim asymmetry on NaN sentinel (numpy _trim does equality which never matches NaN; dask _trim_bounds_dask has dedicated isnan branch). Mutation against the cupy.asnumpy() conversion in _crosstab_cupy flipped test_crosstab_cupy_matches_numpy red. Source untouched." diff --git a/xrspatial/tests/test_reproject_coverage_2026_05_27.py b/xrspatial/tests/test_reproject_coverage_2026_05_27.py new file mode 100644 index 000000000..feb241daf --- /dev/null +++ b/xrspatial/tests/test_reproject_coverage_2026_05_27.py @@ -0,0 +1,332 @@ +"""Test-coverage sweep additions for ``xrspatial.reproject`` (2026-05-27). + +Closes two backend-coverage gaps left after the 2026-05-10 sweep: + +1. ``bounds_policy`` (added in #2187) only had numpy and dask+numpy + coverage. The cupy and dask+cupy backends were not exercised, so a + regression that broke the bounds-derivation heuristics on a GPU input + would ship undetected. ``bounds_policy`` is computed before the + backend split inside ``_compute_output_grid``, but the surrounding + ``reproject()`` call still routes through the backend-specific + chunk worker -- pinning the policy through cupy / dask+cupy locks in + that the policy keyword survives every dispatch. + +2. ``test_reproject_handles_inf_input`` covered only the eager numpy + path. ``+/-Inf`` pixels are common in real raster data (often as the + result of an upstream divide or a corrupt source), and the cupy, + dask, and dask+cupy chunk workers each have their own copy of the + bilinear / cubic resampler math. A regression that started raising + on Inf in any one backend would not surface from the existing test. + Together the four new tests pin that every backend completes without + crashing on +/-Inf input. + +Both gaps map to **Cat 1 -- Backend coverage** in the sweep template +(``MEDIUM`` severity: a real bug could ship undetected, but the public +contract on the failure paths is "implementation-defined" so the test +asserts on geometry, not pixel values). +""" +from __future__ import annotations + +import numpy as np +import pytest +import xarray as xr + +try: + import pyproj # noqa: F401 + HAS_PYPROJ = True +except ImportError: + HAS_PYPROJ = False + +try: + import dask.array as da + HAS_DASK = True +except ImportError: + HAS_DASK = False + +try: + import cupy as cp + HAS_CUPY = True +except ImportError: + HAS_CUPY = False + + +pytestmark = pytest.mark.skipif( + not HAS_PYPROJ, reason="pyproj required for reproject tests" +) + + +# --------------------------------------------------------------------------- +# Shared fixture helpers +# --------------------------------------------------------------------------- + +def _benign_geographic(h=32, w=32, seed=0): + """Mid-latitude raster, well clear of any projection singularity.""" + data = np.random.RandomState(seed).rand(h, w).astype(np.float32) + return xr.DataArray( + data, + dims=['y', 'x'], + coords={'y': np.linspace(55, 45, h), + 'x': np.linspace(-5, 5, w)}, + attrs={'crs': 'EPSG:4326'}, + ) + + +def _global_geographic(h=50, w=100, seed=0): + """Global raster that triggers the auto blow-up heuristic when + projected to Web Mercator (polar singularity on the y axis).""" + data = np.random.RandomState(seed).rand(h, w).astype(np.float32) + return xr.DataArray( + data, + dims=['y', 'x'], + coords={'y': np.linspace(90, -90, h), + 'x': np.linspace(-180, 180, w)}, + attrs={'crs': 'EPSG:4326'}, + ) + + +def _inf_raster(h=32, w=32): + """Raster with one +Inf and one -Inf cell at fixed positions.""" + data = np.ones((h, w), dtype=np.float64) + data[0, 0] = np.inf + data[1, 1] = -np.inf + return xr.DataArray( + data, + dims=['y', 'x'], + coords={'y': np.linspace(55, 45, h), 'x': np.linspace(-5, 5, w)}, + attrs={'crs': 'EPSG:4326', 'nodata': np.nan}, + ) + + +# --------------------------------------------------------------------------- +# Gap 1: bounds_policy on cupy / dask+cupy backends +# --------------------------------------------------------------------------- + +@pytest.mark.skipif(not HAS_CUPY, reason="cupy required") +class TestBoundsPolicyCupy: + """bounds_policy must thread through the cupy backend. + + The numpy and dask+numpy paths are already covered by + ``TestBoundsPolicy``. Without these tests, a regression that dropped + the kwarg from the cupy / dask+cupy dispatch would not surface. + """ + + def test_raw_policy_cupy_backend(self): + """bounds_policy='raw' returns a finite output on a cupy raster.""" + from xrspatial.reproject import reproject + + host = _benign_geographic() + raster = host.copy(data=cp.asarray(host.values)) + out = reproject(raster, 'EPSG:3857', bounds_policy='raw') + # Output stays on GPU. + assert hasattr(out.data, 'device') or hasattr(out.data, 'get'), ( + "expected cupy-backed output, got " + f"{type(out.data).__name__}" + ) + # Sanity: at least one finite pixel. + arr = out.data.get() if hasattr(out.data, 'get') else np.asarray(out.data) + assert np.isfinite(arr).any() + + def test_clamp_policy_cupy_matches_numpy(self): + """Bounds policy effect must match numpy bit-for-bit on the same + grid (the kwarg only changes ``_compute_output_grid``, not the + per-pixel resampler).""" + from xrspatial.reproject import reproject + + host = _global_geographic() + gpu = host.copy(data=cp.asarray(host.values)) + # Pin to a fixed bounds via clamp to avoid 2/98 percentile randomness + # on tiny grids. + import warnings + with warnings.catch_warnings(): + warnings.simplefilter('ignore', UserWarning) + np_out = reproject(host, 'EPSG:3857', bounds_policy='clamp') + gpu_out = reproject(gpu, 'EPSG:3857', bounds_policy='clamp') + + np.testing.assert_allclose( + np_out.coords['x'].values, gpu_out.coords['x'].values, + rtol=1e-10, atol=1e-10, + ) + np.testing.assert_allclose( + np_out.coords['y'].values, gpu_out.coords['y'].values, + rtol=1e-10, atol=1e-10, + ) + np_vals = np_out.values + gpu_vals = (gpu_out.data.get() if hasattr(gpu_out.data, 'get') + else np.asarray(gpu_out.data)) + np.testing.assert_array_equal(np.isnan(np_vals), np.isnan(gpu_vals)) + finite = np.isfinite(np_vals) + if finite.any(): + np.testing.assert_allclose( + np_vals[finite], gpu_vals[finite], rtol=1e-5, atol=1e-5, + ) + + def test_invalid_policy_rejected_cupy(self): + """Validation must fire before the backend dispatch on cupy too.""" + from xrspatial.reproject import reproject + + host = _benign_geographic() + raster = host.copy(data=cp.asarray(host.values)) + with pytest.raises(ValueError, match=r"bounds_policy"): + reproject(raster, 'EPSG:3857', bounds_policy='bogus') + + +@pytest.mark.skipif( + not (HAS_CUPY and HAS_DASK), reason="cupy and dask required", +) +class TestBoundsPolicyDaskCupy: + """bounds_policy must thread through the dask+cupy backend.""" + + def test_raw_policy_dask_cupy_backend(self): + """bounds_policy='raw' works with a dask+cupy raster.""" + from xrspatial.reproject import reproject + + host = _benign_geographic() + gpu_chunked = da.from_array(cp.asarray(host.values), chunks=(16, 16)) + raster = host.copy(data=gpu_chunked) + out = reproject(raster, 'EPSG:3857', bounds_policy='raw') + # The dask+cupy fast path materializes the GPU result eagerly + # when the output fits in VRAM (see ``_reproject_dask_cupy``). + # The chunked fallback returns a lazy dask array. Either is + # acceptable here -- the contract is that the data stays on the + # GPU and at least one cell is finite. + if hasattr(out.data, 'dask'): + arr = out.compute().data + else: + arr = out.data + assert isinstance(arr, cp.ndarray), ( + f"expected cupy-backed output, got {type(arr).__name__}" + ) + host_arr = arr.get() + assert np.isfinite(host_arr).any() + + def test_clamp_policy_dask_cupy_matches_numpy(self): + """bounds_policy='clamp' produces the same output grid as the numpy + path on a dask+cupy raster.""" + from xrspatial.reproject import reproject + + host = _global_geographic() + gpu_chunked = da.from_array(cp.asarray(host.values), chunks=(25, 50)) + raster = host.copy(data=gpu_chunked) + + import warnings + with warnings.catch_warnings(): + warnings.simplefilter('ignore', UserWarning) + np_out = reproject(host, 'EPSG:3857', bounds_policy='clamp') + dgpu_raw = reproject(raster, 'EPSG:3857', bounds_policy='clamp') + + # Materialize the dask+cupy result if still lazy. + if hasattr(dgpu_raw.data, 'dask'): + dgpu_out = dgpu_raw.compute() + else: + dgpu_out = dgpu_raw + + np.testing.assert_allclose( + np_out.coords['x'].values, dgpu_out.coords['x'].values, + rtol=1e-10, atol=1e-10, + ) + np.testing.assert_allclose( + np_out.coords['y'].values, dgpu_out.coords['y'].values, + rtol=1e-10, atol=1e-10, + ) + + def test_invalid_policy_rejected_dask_cupy(self): + """Validation must fire before backend dispatch on dask+cupy too.""" + from xrspatial.reproject import reproject + + host = _benign_geographic() + gpu_chunked = da.from_array(cp.asarray(host.values), chunks=(16, 16)) + raster = host.copy(data=gpu_chunked) + with pytest.raises(ValueError, match=r"bounds_policy"): + reproject(raster, 'EPSG:3857', bounds_policy='bogus') + + +# --------------------------------------------------------------------------- +# Gap 2: Inf input handling across all backends +# --------------------------------------------------------------------------- + +@pytest.mark.skipif(not HAS_DASK, reason="dask required") +def test_reproject_inf_input_dask_backend(): + """+/-Inf input on the dask+numpy backend must not crash. + + Pins the dask map_blocks chunk worker's behaviour on Inf cells. The + output may propagate Inf or coerce to NaN -- both are acceptable -- + but the call must return a valid 2-D DataArray with the requested + output geometry. + """ + from xrspatial.reproject import reproject + + raster = _inf_raster() + raster = raster.copy(data=da.from_array(raster.values, chunks=(16, 16))) + out = reproject(raster, 'EPSG:32633') + assert hasattr(out.data, 'dask'), "expected dask-backed output" + computed = out.compute() + assert computed.ndim == 2 + assert computed.shape[0] >= 1 and computed.shape[1] >= 1 + + +@pytest.mark.skipif(not HAS_CUPY, reason="cupy required") +def test_reproject_inf_input_cupy_backend(): + """+/-Inf input on the cupy backend must not crash.""" + from xrspatial.reproject import reproject + + raster = _inf_raster() + raster = raster.copy(data=cp.asarray(raster.values)) + out = reproject(raster, 'EPSG:32633') + assert out.ndim == 2 + arr = out.data.get() if hasattr(out.data, 'get') else np.asarray(out.data) + assert arr.shape[0] >= 1 and arr.shape[1] >= 1 + + +@pytest.mark.skipif( + not (HAS_CUPY and HAS_DASK), reason="cupy and dask required", +) +def test_reproject_inf_input_dask_cupy_backend(): + """+/-Inf input on the dask+cupy backend must not crash. + + The dask+cupy fast path returns the result eagerly when it fits in + VRAM. The fallback returns a lazy dask array. Both are acceptable; + the contract checked here is that no kernel raises on +/-Inf cells. + """ + from xrspatial.reproject import reproject + + raster = _inf_raster() + raster = raster.copy( + data=da.from_array(cp.asarray(raster.values), chunks=(16, 16)) + ) + out = reproject(raster, 'EPSG:32633') + if hasattr(out.data, 'dask'): + out = out.compute() + assert out.ndim == 2 + arr = (out.data.get() if hasattr(out.data, 'get') + else np.asarray(out.data)) + assert arr.shape[0] >= 1 and arr.shape[1] >= 1 + + +@pytest.mark.skipif(not HAS_CUPY, reason="cupy required") +def test_reproject_all_inf_cupy_backend(): + """All-Inf raster on cupy must produce a valid output. + + The eager numpy path already pins +/-Inf at scattered positions; + this pins the harder case where the source has no finite cells at + all and every output pixel must come from a non-finite source. + """ + from xrspatial.reproject import reproject + + data = np.full((16, 16), np.inf, dtype=np.float64) + host = xr.DataArray( + data, + dims=['y', 'x'], + coords={'y': np.linspace(55, 45, 16), + 'x': np.linspace(-5, 5, 16)}, + attrs={'crs': 'EPSG:4326', 'nodata': np.nan}, + ) + raster = host.copy(data=cp.asarray(data)) + out = reproject(raster, 'EPSG:32633') + assert out.ndim == 2 + arr = out.data.get() if hasattr(out.data, 'get') else np.asarray(out.data) + # Every cell is either Inf or NaN; no finite values can appear. + finite_mask = np.isfinite(arr) + assert not finite_mask.any(), ( + f"all-Inf input should not produce finite outputs; " + f"got {finite_mask.sum()} finite cell(s)" + )