diff --git a/doc/release_notes.rst b/doc/release_notes.rst
index 29503064..2487b2de 100644
--- a/doc/release_notes.rst
+++ b/doc/release_notes.rst
@@ -37,6 +37,10 @@ Most users should keep calling ``model.solve(...)``. If you want more control, y
 * New ``linopy.licensed_solvers``: the subset of installed solvers that currently pass a license check. Handy in tests and for picking a solver at runtime.
 * New helpers for explicit license checks: ``linopy.solvers.check_solver_licenses("gurobi", "mosek")``, ``Gurobi.license_status()``, ``Gurobi.is_available()``. They return a ``LicenseStatus`` dataclass (``name``, ``ok``, ``message``).
 
+*Compact multi-key grouping*
+
+* ``LinearExpressionGroupby.sum`` gains a pandas-style ``observed`` parameter for grouping by a list of coordinate names: ``expr.groupby(["period", "season"]).sum(observed=True)`` keeps the result stacked over only the observed key combinations (a ``MultiIndex`` ``group`` dimension) instead of unstacking into one dimension per key, which materialises the dense cartesian grid. The default ``observed=False`` mirrors xarray. When the grid would be mostly fill values, a ``UserWarning`` points to ``observed=True``.
+
 *Constraints — CSR-backed storage*
 
 * Add ``CSRConstraint``: a memory-efficient immutable constraint representation backed by scipy CSR sparse matrices. Up to 90% memory savings for constraints with many terms and 30–120× faster matrix generation for direct solver APIs.
@@ -58,6 +62,8 @@ Most users should keep calling ``model.solve(...)``. If you want more control, y
 
 * ``add_variables`` / ``add_constraints``: extends 0.7.0's coords-as-truth rule to ``lower``, ``upper`` and ``mask`` for every bound type and dim order. Pandas ``Series`` / ``DataFrame`` bounds or masks missing a dimension are broadcast to ``coords`` instead of being silently dropped (`#709 <https://github.com/PyPSA/linopy/issues/709>`__); the variable's dimension order always follows ``coords`` (`#706 <https://github.com/PyPSA/linopy/issues/706>`__); tuple coord entries follow xarray's ``(dim_name, values)`` convention (e.g. ``coords=[("origin", origins)]``), while a bare value sequence uses a ``list``. Mismatched values or extra dims raise ``ValueError`` with a labelled message; sparse-coord masks (formerly a v0.6.3 ``FutureWarning``, #580) raise ``ValueError``, and masks with dims not in the data raise ``ValueError`` instead of ``AssertionError``.
 * Pandas inputs whose index names *levels* of a stacked-``MultiIndex`` ``coords`` dimension are now projected onto that dimension: a level subset broadcasts across the others, the full set aligns element-wise. This fixes PyPSA multi-investment arithmetic (e.g. an expression over a ``(period, timestep)`` ``snapshot`` MultiIndex times a ``period``-indexed weighting). In ``add_variables`` / ``add_constraints`` the input must provide a value for every level combination of the MultiIndex or a ``ValueError`` is raised (the error lists the missing combinations). **Implicit level projections are deprecated**: they emit an ``EvolvingAPIWarning`` everywhere — in arithmetic *and* in ``add_variables`` / ``add_constraints`` — and will raise under the upcoming v1 convention. Project the input onto the dimension explicitly (select with the dimension's level values) to keep current behavior. Aligning the full level set with full coverage stays silent. Strict validation also rejects a ``MultiIndex`` input with *unnamed* levels whose combinations don't match ``coords`` (previously a silent bypass, as such inputs can't be projected by level name).
+* ``LinearExpression.groupby`` now accepts a **non-dimension** coordinate as the key -- by name (``expr.groupby("period").sum()``, where ``period`` labels another dimension) or as the coordinate ``DataArray`` -- which previously raised ``ValueError: ... already exists``. Grouping by a dimension or a ``MultiIndex`` level already worked (`#750 <https://github.com/PyPSA/linopy/issues/750>`__).
+* ``LinearExpression.groupby`` with a **list of coordinate names** (``expr.groupby(["period", "season"]).sum()``) now takes the fast reindex path instead of silently falling back to the slow xarray implementation, returning one dimension per key as before (`#753 <https://github.com/PyPSA/linopy/issues/753>`__). See ``observed`` under Features to keep the result compact instead.
 * ``add_piecewise_formulation`` now produces a reproducible dimension order in the broadcast breakpoint array. The previous set-based expansion gave a hash-randomized order that varied between processes.
 * SOS constraints on masked variables no longer cause solver-specific failures (Gurobi ``IndexError``, Xpress ``?404 Invalid column number``, LP parse errors, silent set corruption). ``Model.solve()`` and ``Model.to_file()`` now raise a clear ``NotImplementedError`` referring users to `#688 <https://github.com/PyPSA/linopy/issues/688>`__; pass ``reformulate_sos=True`` as a workaround.
 * ``Model.solve(..., reformulate_sos=True)`` now actually reformulates SOS constraints even when the solver supports them natively. Previously it was silently ignored with a warning.
diff --git a/linopy/expressions.py b/linopy/expressions.py
index b0515ea2..ea8588d2 100644
--- a/linopy/expressions.py
+++ b/linopy/expressions.py
@@ -136,6 +136,67 @@ def _expr_unwrap(
 logger = logging.getLogger(__name__)
 
 
+def _resolve_group(group: Any, data: Dataset) -> Any:
+    """
+    Normalize a groupby key.
+
+    Unwrap a single-element key list to the scalar key, and resolve a string
+    naming a coordinate to that coordinate -- so ``groupby("name")`` behaves
+    like ``groupby(data["name"])``, mirroring xarray. Other inputs (Series,
+    DataFrame, DataArray, multi-key lists) are returned unchanged.
+    """
+    if isinstance(group, (list, tuple)) and len(group) == 1:
+        group = group[0]
+    if isinstance(group, str) and group in data.coords:
+        group = data[group]
+    return group
+
+
+def _multikey_value_frame(group: Any, data: Dataset) -> pd.DataFrame | None:
+    """
+    Gather a multi-key list of coordinate names into a value frame.
+
+    Return a DataFrame of the named coordinates when all keys are 1-D
+    coordinates sharing a single dimension -- so the list rides the fast
+    reindex path -- otherwise None.
+    """
+    is_name_list = (
+        isinstance(group, (list, tuple))
+        and len(group) > 1
+        and all(isinstance(g, str) and g in data.coords for g in group)
+    )
+    if not is_name_list:
+        return None
+    coord_dims = {data[g].dims for g in group}
+    if len(coord_dims) != 1 or len(next(iter(coord_dims))) != 1:
+        return None
+    names = list(group)
+    return data[names].to_dataframe()[names]
+
+
+def _unstack_multikey(ds: Dataset, dim: str) -> Dataset:
+    """
+    Unstack a stacked multi-key group dimension into one dimension per key.
+
+    Warn before materialising the grid when most cells would be fill values,
+    pointing to ``observed=True`` for a compact result.
+    """
+    mi = ds.indexes[dim].remove_unused_levels()
+    observed = len(mi)
+    grid = int(np.prod([len(level) for level in mi.levels]))
+    if grid > 2 * observed and grid - observed > 10_000:
+        warn(
+            f"Grouping a LinearExpression by {list(mi.names)} produces a dense "
+            f"{grid:,}-cell grid, but only {observed:,} of those combinations "
+            f"occur -- the {grid - observed:,} absent ones are materialised as "
+            f"fill values. Pass `observed=True` to keep the result compact over "
+            f"only the observed combinations.",
+            UserWarning,
+            stacklevel=3,
+        )
+    return ds.unstack(dim, fill_value=LinearExpression._fill_value)
+
+
 @dataclass
 @forward_as_properties(groupby=["dims", "groups"])
 class LinearExpressionGroupby:
@@ -158,17 +219,25 @@ def groupby(self) -> xarray.core.groupby.DatasetGroupBy:
         xarray.core.groupby.DataArrayGroupBy
             The groupby object.
         """
-        if isinstance(self.group, pd.DataFrame):
+        data = self.data
+        group = _resolve_group(self.group, data)
+
+        if isinstance(group, pd.DataFrame):
             raise ValueError(
                 "Grouping by a DataFrame only supported for `sum` operation with `use_fallback=False`."
             )
-        if isinstance(self.group, pd.Series):
-            group_name = self.group.name or "group"
-            group = DataArray(self.group, name=group_name)
-        else:
-            group = self.group  # type: ignore
+        if isinstance(group, pd.Series):
+            group = DataArray(group, name=group.name or "group")
+
+        # detach an attached free coordinate (never an indexed/level coord)
+        if (
+            isinstance(group, DataArray)
+            and group.name in set(data.coords) - set(data.dims)
+            and group.name not in data.xindexes
+        ):
+            data = data.drop_vars([group.name])
 
-        return self.data.groupby(group=group, **self.kwargs)
+        return data.groupby(group=group, **self.kwargs)
 
     def map(
         self,
@@ -200,7 +269,9 @@ def map(
             self.groupby.map(func, shortcut=shortcut, args=args, **kwargs), self.model
         )
 
-    def sum(self, use_fallback: bool = False, **kwargs: Any) -> LinearExpression:
+    def sum(
+        self, use_fallback: bool = False, observed: bool = False, **kwargs: Any
+    ) -> LinearExpression:
         """
         Sum the groupby object.
 
@@ -218,6 +289,13 @@ def sum(self, use_fallback: bool = False, **kwargs: Any) -> LinearExpression:
             Whether to use the fallback implementation, which is a sort of default
             xarray implementation. If set to False, the operation will be much
             faster but keyword arguments are ignored. Defaults to False.
+        observed : bool
+            Only applies when grouping by a list of coordinate names. If True,
+            keep the result stacked over the observed key combinations (a
+            ``MultiIndex`` ``group`` dimension) instead of unstacking into one
+            dimension per key, which materialises the dense cartesian grid.
+            Defaults to False, mirroring xarray. Not supported together with
+            `use_fallback`.
         **kwargs
             Arbitrary keyword arguments.
 
@@ -226,9 +304,22 @@ def sum(self, use_fallback: bool = False, **kwargs: Any) -> LinearExpression:
         LinearExpression
             The sum of the groupby object.
         """
+        if observed and use_fallback:
+            raise ValueError(
+                "`observed=True` is not supported with `use_fallback=True`."
+            )
+
+        group = _resolve_group(self.group, self.data)
+
+        # a list of coord names rides the fast path as a value frame
+        multikey_frame = (
+            None if use_fallback else _multikey_value_frame(group, self.data)
+        )
+        if multikey_frame is not None:
+            group = multikey_frame
+
         non_fallback_types = (pd.Series, pd.DataFrame, xr.DataArray)
-        if isinstance(self.group, non_fallback_types) and not use_fallback:
-            group: pd.Series | pd.DataFrame | xr.DataArray = self.group
+        if isinstance(group, non_fallback_types) and not use_fallback:
             if isinstance(group, pd.DataFrame):
                 # dataframes do not have a name, so we need to set it
                 final_group_name = "group"
@@ -254,10 +345,12 @@ def sum(self, use_fallback: bool = False, **kwargs: Any) -> LinearExpression:
             arrays = [group, group.groupby(group).cumcount()]
             idx = pd.MultiIndex.from_arrays(arrays, names=[GROUP_DIM, GROUPED_TERM_DIM])
             new_coords = Coordinates.from_pandas_multiindex(idx, group_dim)
-            coords = self.data.indexes[group_dim]
-            names_to_drop = [coords.name]
-            if isinstance(coords, pd.MultiIndex):
-                names_to_drop += list(coords.names)
+            # collapsing group_dim invalidates every coordinate aligned to it
+            names_to_drop = [
+                name
+                for name, coord in self.data.coords.items()
+                if group_dim in coord.dims
+            ]
             ds = self.data.drop_vars(names_to_drop).assign_coords(new_coords)
             ds = ds.unstack(group_dim, fill_value=LinearExpression._fill_value)
             ds = LinearExpression._sum(ds, dim=GROUPED_TERM_DIM)
@@ -270,6 +363,8 @@ def sum(self, use_fallback: bool = False, **kwargs: Any) -> LinearExpression:
                 ds = ds.assign_coords(new_coords)
 
             ds = ds.rename({GROUP_DIM: final_group_name})
+            if multikey_frame is not None and not observed:
+                ds = _unstack_multikey(ds, final_group_name)
             return LinearExpression(ds, self.model)
 
         def func(ds: Dataset) -> Dataset:
diff --git a/test/test_linear_expression.py b/test/test_linear_expression.py
index 0ac8bde4..5ffd7de1 100644
--- a/test/test_linear_expression.py
+++ b/test/test_linear_expression.py
@@ -7,6 +7,7 @@
 
 from __future__ import annotations
 
+import warnings
 from typing import Any
 
 import numpy as np
@@ -1384,6 +1385,288 @@ def test_linear_expression_groupby_on_same_name_as_target_dim(
     assert grouped.nterm == 10
 
 
+class TestMultiKeyFastPath:
+    """
+    Group a LinearExpression by a list of coordinate names: takes the fast
+    reindex path and returns one dimension per key, like the xarray fallback.
+    """
+
+    @staticmethod
+    def _expr(period_vals: list, season_vals: list) -> LinearExpression:
+        n = len(period_vals)
+        s = pd.RangeIndex(n, name="s")
+        m = Model()
+        x = m.add_variables(coords=[s], name="x")
+        return (1.0 * x).assign_coords(
+            period=xr.DataArray(period_vals, dims="s", coords={"s": s}, name="period"),
+            season=xr.DataArray(season_vals, dims="s", coords={"s": s}, name="season"),
+        )
+
+    @pytest.mark.parametrize("spelling", [list, tuple], ids=["list", "tuple"])
+    def test_matches_fallback(self, spelling: type) -> None:
+        # the fast path must equal the slow fallback, sparse cells included
+        expr = self._expr([2020, 2020, 2030, 2030, 2030], list("wswws"))
+        group = spelling(["period", "season"])
+
+        fast = expr.groupby(group).sum()
+        slow = expr.groupby(group).sum(use_fallback=True)
+
+        assert_linequal(fast, slow)
+
+    def test_separate_dims_not_stacked(self) -> None:
+        # built via a stacked index internally, but returns one dim per key
+        expr = self._expr([2020, 2020, 2030, 2030], list("wsws"))
+
+        grouped = expr.groupby(["period", "season"]).sum()
+
+        assert {"period", "season"} <= set(grouped.dims)
+        assert "group" not in grouped.dims
+        assert not isinstance(grouped.data.indexes.get("period"), pd.MultiIndex)
+
+    def test_sparse_combination_filled(self) -> None:
+        # (2020, "s") never occurs -> empty term in the grid
+        expr = self._expr([2020, 2020, 2030, 2030], list("wwws"))
+
+        grouped = expr.groupby(["period", "season"]).sum()
+
+        cell = grouped.sel(period=2020, season="s")
+        assert (cell.vars == -1).all()
+        assert cell.coeffs.isnull().all()
+
+    def test_dataframe_grouper_stays_compact(self) -> None:
+        # the DataFrame grouper keeps the stacked observed-only group dim
+        expr = self._expr([2020, 2020, 2030, 2030], list("wwws"))
+        df = expr.data[["period", "season"]].to_dataframe()[["period", "season"]]
+
+        grouped = expr.groupby(df).sum()
+
+        assert "group" in grouped.dims
+        assert isinstance(grouped.data.indexes["group"], pd.MultiIndex)
+        assert grouped.sizes["group"] == 3  # observed, not the 2x2=4 grid
+
+    def test_blowup_warns_when_sparse(self) -> None:
+        # 200 observed combos, 200x200 grid -> nudge toward observed=True
+        expr = self._expr(list(range(200)), list(range(200)))
+
+        with pytest.warns(UserWarning, match="dense .* grid"):
+            expr.groupby(["period", "season"]).sum()
+
+    def test_no_warning_when_dense(self) -> None:
+        expr = self._expr([2020, 2020, 2030, 2030], list("wsws"))
+
+        with warnings.catch_warnings():
+            warnings.simplefilter("error")
+            expr.groupby(["period", "season"]).sum()
+
+    def test_observed_keeps_stacked(self) -> None:
+        # observed=True skips the unstack: compact stacked MultiIndex,
+        # identical to the DataFrame grouper output
+        expr = self._expr([2020, 2020, 2030, 2030], list("wwws"))
+        df = expr.data[["period", "season"]].to_dataframe()[["period", "season"]]
+
+        grouped = expr.groupby(["period", "season"]).sum(observed=True)
+
+        assert_linequal(grouped, expr.groupby(df).sum())
+        assert grouped.sizes["group"] == 3  # observed, not the 2x2=4 grid
+
+    def test_observed_silences_blowup_warning(self) -> None:
+        expr = self._expr(list(range(200)), list(range(200)))
+
+        with warnings.catch_warnings():
+            warnings.simplefilter("error")
+            grouped = expr.groupby(["period", "season"]).sum(observed=True)
+
+        assert grouped.sizes["group"] == 200
+
+    def test_observed_with_fallback_raises(self) -> None:
+        expr = self._expr([2020, 2020], list("ws"))
+
+        with pytest.raises(ValueError, match="observed"):
+            expr.groupby(["period", "season"]).sum(use_fallback=True, observed=True)
+
+
+class TestGroupbyByAttachedCoordinate:
+    """
+    Group by an attached non-dimension coordinate.
+
+    Asserts grouping against hard-coded ``vars``/``coeffs`` to catch regressions.
+    """
+
+    @pytest.fixture
+    def t(self) -> pd.RangeIndex:
+        return pd.RangeIndex(4, name="t")
+
+    @pytest.fixture
+    def period(self, t: pd.RangeIndex) -> xr.DataArray:
+        return xr.DataArray(
+            [2020, 2020, 2030, 2030], dims="t", coords={"t": t}, name="period"
+        )
+
+    @pytest.fixture
+    def season(self, t: pd.RangeIndex) -> xr.DataArray:
+        return xr.DataArray(list("wsws"), dims="t", coords={"t": t}, name="season")
+
+    @pytest.fixture
+    def expr(
+        self, t: pd.RangeIndex, period: xr.DataArray, season: xr.DataArray
+    ) -> LinearExpression:
+        m = Model()
+        x = m.add_variables(coords=[t], name="x")
+        return (2.0 * x).assign_coords(period=period, season=season)
+
+    @pytest.mark.parametrize("use_fallback", [True, False])
+    @pytest.mark.parametrize("by", ["name", "dataarray"])
+    def test_single_key(
+        self,
+        expr: LinearExpression,
+        period: xr.DataArray,
+        by: str,
+        use_fallback: bool,
+    ) -> None:
+        group = "period" if by == "name" else period
+
+        grouped = expr.groupby(group).sum(use_fallback=use_fallback)
+
+        assert grouped.data.period.values.tolist() == [2020, 2030]
+        assert grouped.vars.transpose("period", TERM_DIM).values.tolist() == [
+            [0, 1],
+            [2, 3],
+        ]
+        assert grouped.coeffs.transpose("period", TERM_DIM).values.tolist() == [
+            [2.0, 2.0],
+            [2.0, 2.0],
+        ]
+
+    @pytest.mark.parametrize("spelling", [list, tuple], ids=["list", "tuple"])
+    def test_multi_key(self, expr: LinearExpression, spelling: type) -> None:
+        # A multi-key group always goes through the xarray fallback (a list is
+        # not a fast-path type), so there is no separate use_fallback case.
+        group = spelling(["period", "season"])
+
+        grouped = expr.groupby(group).sum()
+
+        assert dict(grouped.sizes) == {"period": 2, "season": 2, TERM_DIM: 1}
+        assert grouped.data.period.values.tolist() == [2020, 2030]
+        assert grouped.data.season.values.tolist() == ["s", "w"]
+        assert grouped.vars.transpose("period", "season", TERM_DIM).values.tolist() == [
+            [[1], [0]],
+            [[3], [2]],
+        ]
+        assert (grouped.coeffs == 2.0).all()
+
+    def test_extra_aux_coord_does_not_change_result(
+        self, t: pd.RangeIndex, period: xr.DataArray
+    ) -> None:
+        # A second auxiliary coord on the grouped dimension must neither break
+        # the reshape (it raised ``KeyError`` before the fix) nor change the sum.
+        m = Model()
+        x = m.add_variables(coords=[t], name="x")
+        timestep = xr.DataArray(
+            list("abab"), dims="t", coords={"t": t}, name="timestep"
+        )
+        expr = (2.0 * x).assign_coords(period=period, timestep=timestep)
+
+        grouped = expr.groupby("period").sum()
+
+        assert "timestep" not in grouped.coords
+        assert grouped.vars.transpose("period", TERM_DIM).values.tolist() == [
+            [0, 1],
+            [2, 3],
+        ]
+        assert (grouped.coeffs == 2.0).all()
+
+    @pytest.mark.parametrize("by", ["name", "dataarray"])
+    def test_two_dimensional(self, by: str) -> None:
+        # Grouping one dimension of a 2-D variable by an aux coord must keep the
+        # other dimension intact and pair up the right variable labels.
+        m = Model()
+        snapshot = pd.RangeIndex(4, name="snapshot")
+        gen = pd.Index(["g1", "g2"], name="gen")
+        y = m.add_variables(coords=[snapshot, gen], name="y")  # labels 0..7
+        period = xr.DataArray(
+            [2020, 2020, 2030, 2030],
+            dims="snapshot",
+            coords={"snapshot": snapshot},
+            name="period",
+        )
+        expr = (1.0 * y).assign_coords(period=period)
+        group = "period" if by == "name" else period
+
+        grouped = expr.groupby(group).sum()
+
+        assert grouped.data.period.values.tolist() == [2020, 2030]
+        assert grouped.data.gen.values.tolist() == ["g1", "g2"]
+        assert grouped.vars.transpose("period", "gen", TERM_DIM).values.tolist() == [
+            [[0, 2], [1, 3]],
+            [[4, 6], [5, 7]],
+        ]
+        assert (grouped.coeffs == 1.0).all()
+
+    @pytest.mark.parametrize("use_fallback", [True, False])
+    def test_dimension_coordinate_by_name(self, use_fallback: bool) -> None:
+        # A dimension coordinate may also be grouped by name; it collapses that
+        # dimension and keeps the other one.
+        m = Model()
+        snapshot = pd.RangeIndex(4, name="snapshot")
+        gen = pd.Index(["g1", "g2"], name="gen")
+        y = m.add_variables(coords=[snapshot, gen], name="y")  # labels 0..7
+
+        grouped = (1 * y).groupby("gen").sum(use_fallback=use_fallback)
+
+        assert grouped.data.gen.values.tolist() == ["g1", "g2"]
+        assert grouped.sizes["snapshot"] == 4
+        assert grouped.vars.transpose("gen", "snapshot", TERM_DIM).values.tolist() == [
+            [[0], [2], [4], [6]],
+            [[1], [3], [5], [7]],
+        ]
+
+    @pytest.mark.parametrize("use_fallback", [True, False])
+    def test_single_element_list_groups_like_scalar(
+        self, expr: LinearExpression, use_fallback: bool
+    ) -> None:
+        # ``groupby(["period"])`` groups like the scalar key, mirroring xarray.
+        grouped = expr.groupby(["period"]).sum(use_fallback=use_fallback)
+
+        assert grouped.data.period.values.tolist() == [2020, 2030]
+        assert grouped.vars.transpose("period", TERM_DIM).values.tolist() == [
+            [0, 1],
+            [2, 3],
+        ]
+        assert (grouped.coeffs == 2.0).all()
+
+    def test_multi_key_dataarrays_unsupported(
+        self, expr: LinearExpression, period: xr.DataArray, season: xr.DataArray
+    ) -> None:
+        # Multi-key grouping must be spelled with names; a list of DataArrays
+        # is unhashable and raises in xarray itself, so linopy mirrors that.
+        with pytest.raises(TypeError, match="unhashable"):
+            expr.groupby([period, season]).sum()
+
+    @pytest.mark.parametrize("use_fallback", [True, False])
+    @pytest.mark.parametrize(
+        "level, values, vars_",
+        [
+            ("period", [2020, 2030], [[0, 1, 2], [3, 4, 5]]),
+            ("timestep", ["t1", "t2", "t3"], [[0, 3], [1, 4], [2, 5]]),
+        ],
+    )
+    def test_multiindex_level(
+        self, level: str, values: list, vars_: list, use_fallback: bool
+    ) -> None:
+        # Grouping by a level of a real ``MultiIndex`` dimension (the
+        # pydata/xarray#6836 case, fixed upstream) works through linopy.
+        m = Model()
+        mi = pd.MultiIndex.from_product(
+            [[2020, 2030], ["t1", "t2", "t3"]], names=["period", "timestep"]
+        )
+        x = m.add_variables(coords={"snapshot": mi}, name="x")  # labels 0..5
+
+        grouped = (1 * x).groupby(level).sum(use_fallback=use_fallback)
+
+        assert grouped.data[level].values.tolist() == values
+        assert grouped.vars.transpose(level, TERM_DIM).values.tolist() == vars_
+
+
 @pytest.mark.parametrize("use_fallback", [True])
 def test_linear_expression_groupby_ndim(z: Variable, use_fallback: bool) -> None:
     # TODO: implement fallback for n-dim groupby, see https://github.com/PyPSA/linopy/issues/299