python-wheel-build
diff --git a/‎.github/workflows/benchmarks-nightly.yml‎
Lines changed: 84 additions & 0 deletions b/‎.github/workflows/benchmarks-nightly.yml‎
Lines changed: 84 additions & 0 deletions
diff --git a/‎.github/workflows/benchmarks.yml‎
Lines changed: 87 additions & 0 deletions b/‎.github/workflows/benchmarks.yml‎
Lines changed: 87 additions & 0 deletions
diff --git a/‎benchmarks/README.md‎
Lines changed: 186 additions & 0 deletions b/‎benchmarks/README.md‎
Lines changed: 186 additions & 0 deletions
@@ -0,0 +1,84 @@
+name: Nightly Integration Benchmarks
+
+on:
+  schedule:
+    - cron: "0 2 * * *" # 2 AM UTC daily
+  workflow_dispatch: # Allow manual trigger
+
+jobs:
+  integration-benchmark:
+    runs-on: ubuntu-latest
+
+    services:
+      local-pypi:
+        image: pypiserver/pypiserver:latest
+        ports:
+          - 8080:8080
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v4
+
+      - name: Install dependencies
+        run: uv sync --extra benchmark
+
+      - name: Download packages for local PyPI
+        run: |
+          mkdir -p packages
+          uv pip download -r benchmarks/requirements/packages.txt -d packages
+          # Note: In production, these would be copied to the pypiserver volume
+
+      - name: Run integration benchmarks with CodSpeed
+        uses: CodSpeedHQ/action@v3
+        env:
+          CODSPEED_VALGRIND_ARGS: "--trace-children=yes --trace-children-skip=/bin/sh,/usr/bin/git"
+          UV_INDEX_URL: "http://localhost:8080/simple"
+          UV_NO_PROGRESS: "1"
+        with:
+          token: ${{ secrets.CODSPEED_TOKEN }}
+          run: uv run pytest benchmarks/ --codspeed -m "integration"
+
+      - name: Generate benchmark JSON (fallback)
+        if: always()
+        env:
+          UV_INDEX_URL: "http://localhost:8080/simple"
+          UV_NO_PROGRESS: "1"
+        run: |
+          uv run pytest benchmarks/ \
+            --benchmark-only \
+            --benchmark-json=integration-benchmark-results.json \
+            -m "integration" || true
+
+      - name: Upload benchmark results
+        uses: actions/upload-artifact@v4
+        if: always()
+        with:
+          name: integration-benchmark-results
+          path: integration-benchmark-results.json
+          retention-days: 90
+
+      - name: Run memory profiling on integration tests
+        if: always()
+        env:
+          UV_INDEX_URL: "http://localhost:8080/simple"
+          UV_NO_PROGRESS: "1"
+        run: |
+          uv run pytest benchmarks/ \
+            --memray \
+            --memray-bin-path=integration-memray-results \
+            -m "integration" || true
+
+      - name: Upload memory results
+        uses: actions/upload-artifact@v4
+        if: always()
+        with:
+          name: integration-memory-results
+          path: integration-memray-results/
+          retention-days: 90
@@ -0,0 +1,87 @@
+name: Benchmarks
+
+on:
+  pull_request:
+    types: [labeled, synchronize]
+  push:
+    branches: [main]
+
+jobs:
+  benchmark-cpu:
+    # Only run on PRs with 'run-benchmarks' label OR on main branch
+    if: |
+      github.event_name == 'push' ||
+      contains(github.event.pull_request.labels.*.name, 'run-benchmarks')
+
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v4
+
+      - name: Install dependencies
+        run: uv sync --extra benchmark
+
+      - name: Run benchmarks with CodSpeed
+        uses: CodSpeedHQ/action@v3
+        with:
+          token: ${{ secrets.CODSPEED_TOKEN }}
+          run: uv run pytest benchmarks/ --codspeed -m "not slow and not integration"
+
+      - name: Generate benchmark JSON (fallback)
+        if: always()
+        run: |
+          uv run pytest benchmarks/ \
+            --benchmark-only \
+            --benchmark-json=benchmark-results.json \
+            -m "not slow and not integration" || true
+
+      - name: Upload benchmark results
+        uses: actions/upload-artifact@v4
+        if: always()
+        with:
+          name: benchmark-results
+          path: benchmark-results.json
+          retention-days: 30
+
+  benchmark-memory:
+    # Only run on PRs with 'run-benchmarks' label
+    if: contains(github.event.pull_request.labels.*.name, 'run-benchmarks')
+
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v4
+
+      - name: Install dependencies
+        run: uv sync --extra benchmark
+
+      - name: Run memory benchmarks
+        run: |
+          uv run pytest benchmarks/ \
+            --memray \
+            --memray-bin-path=memray-results \
+            -m "not slow and not integration"
+
+      - name: Upload memory results
+        uses: actions/upload-artifact@v4
+        if: always()
+        with:
+          name: memory-results
+          path: memray-results/
+          retention-days: 30
@@ -0,0 +1,186 @@
+# Fromager Benchmarks
+
+Performance benchmarks for Fromager, a tool for rebuilding complete dependency trees of Python wheels from source.
+
+## Why Benchmarks?
+
+Fromager recursively resolves and builds entire dependency trees from source. A typical bootstrap processes hundreds of packages, each requiring version resolution, source acquisition, patching, and wheel building. Performance regressions in Fromager's core logic compound across these operations.
+
+**The challenge:** Wall-clock benchmarks in shared CI environments vary 10-20% due to noise. A genuine 5% regression becomes indistinguishable from CI variance. These benchmarks focus on pure Python operations where measurements are stable and regressions are detectable.
+
+---
+
+## Quick Start
+
+```bash
+# Install dependencies
+uv sync --extra benchmark
+
+# Run all benchmarks
+uv run pytest benchmarks/
+
+# Fast benchmarks only (skip slow and integration)
+uv run pytest benchmarks/ -m "not slow and not integration"
+
+# Integration benchmarks only
+uv run pytest benchmarks/ -m "integration"
+
+# With memory profiling
+uv run pytest benchmarks/ --memray
+
+# Compare against baseline
+uv run pytest benchmarks/ --benchmark-save=baseline
+# ... make changes ...
+uv run pytest benchmarks/ --benchmark-compare=baseline
+
+# Export to JSON
+uv run pytest benchmarks/ --benchmark-json=results.json
+```
+
+---
+
+## Understanding Output
+
+```
+-------------------------------- benchmark: 3 tests --------------------------------
+Name                                   Mean        StdDev      Rounds
+------------------------------------------------------------------------------------
+test_constraint_add_and_check          0.85ms      0.05ms      100
+test_graph_serialization               1.20ms      0.08ms      100
+test_python_version_matching_hot       0.12ms      0.01ms      200
+------------------------------------------------------------------------------------
+```
+
+**Key metrics:**
+- **Mean** — Primary comparison metric
+- **StdDev** — Low values indicate reliable measurements
+- **Rounds** — More rounds = more statistical confidence
+
+**Comparison output:**
+```
+Name                             Mean (now)    Mean (base)   Ratio
+--------------------------------------------------------------------
+test_constraint_add_and_check    0.87ms        0.85ms        1.02x
+```
+
+- **Ratio < 1.0** — Faster (improvement)
+- **Ratio > 1.15** — Investigate before merging
+
+---
+
+## Adding Benchmarks
+
+Create test functions in `test_*.py` files using the `benchmark` fixture:
+
+```python
+def test_constraint_satisfaction(benchmark):
+    """Benchmark Fromager's constraint checking."""
+    from fromager.constraints import Constraints
+    from packaging.version import Version
+    
+    constraints = Constraints()
+    constraints.add_constraint("numpy>=1.20,<2.0")
+    
+    versions = [Version(v) for v in ["1.19.0", "1.25.0", "2.0.0"]]
+    
+    def check_all():
+        return [constraints.is_satisfied_by("numpy", v) for v in versions]
+    
+    result = benchmark(check_all)
+    assert result == [False, True, False]
+```
+
+**Guidelines:**
+- Keep setup outside the benchmark function
+- Assert correctness to ensure the benchmark actually works
+- Mark slow benchmarks with `@pytest.mark.slow`
+- Add metadata with `benchmark.extra_info["key"] = value`
+
+---
+
+## Advanced Features
+
+### Benchmark Categories
+
+| Category | File | Characteristics |
+|----------|------|-----------------|
+| Component | `test_resolution.py` | Fast, pure Python, no subprocess |
+| Integration | `test_integration.py` | Slow, uses fixtures, network-isolated |
+| Memory | Any with `--memray` | Tracks allocations and peak memory |
+
+### Markers
+
+- `@pytest.mark.slow` — Skip with `-m "not slow"`
+- `@pytest.mark.integration` — Requires fixtures (local PyPI, uv shim)
+- `@pytest.mark.memory` — Memory-focused benchmarks
+
+### Integration Fixtures
+
+The `fixtures/` module provides isolation for realistic benchmarks:
+
+- **`local_pypi`** — Session-scoped local PyPI server for network isolation
+- **`configured_env`** — Configures environment to use local PyPI
+- **`uv_shim`** — Creates mock uv binary for subprocess isolation
+- **`subprocess_timer`** — Measures subprocess execution time and overhead
+
+### Memory Profiling
+
+Memory benchmarks use pytest-memray (non-Windows):
+
+```bash
+uv run pytest benchmarks/ --memray
+uv run pytest benchmarks/ --memray --memray-bin-path=./memray-results
+```
+
+### CI Integration
+
+Benchmarks run automatically via GitHub Actions:
+
+- **`benchmarks.yml`** — PRs with `run-benchmarks` label or push to main
+- **`benchmarks-nightly.yml`** — Nightly integration benchmarks (2 AM UTC)
+
+CodSpeed provides noise-resistant CI benchmarks via instruction counting rather than wall-clock time.
+
+---
+
+## Directory Structure
+
+```
+benchmarks/
+├── README.md              # This file
+├── conftest.py            # Shared fixtures and markers
+├── pytest.ini             # Benchmark configuration
+├── fixtures/              # Reusable fixture modules
+│   ├── __init__.py
+│   ├── pypi_server.py     # Local PyPI server
+│   ├── uv_shim.py         # Subprocess isolation
+│   └── metrics.py         # Timing collectors
+├── requirements/          # Package requirements for local PyPI
+│   └── packages.txt
+├── test_resolution.py     # Component benchmarks
+└── test_integration.py    # Integration benchmarks (slow)
+```
+
+---
+
+## Troubleshooting
+
+**High variance:** Close resource-intensive applications. Increase rounds:
+```bash
+uv run pytest benchmarks/ --benchmark-min-rounds=20
+```
+
+**Missing module:** Install dependencies with `uv sync --extra benchmark`
+
+**Debug without timing:** Run benchmarks as regular tests:
+```bash
+uv run pytest benchmarks/ --benchmark-disable
+```
+
+---
+
+## Resources
+
+- [pytest-benchmark documentation](https://pytest-benchmark.readthedocs.io/)
+- [CodSpeed documentation](https://docs.codspeed.io/)
+- [pytest-memray documentation](https://pytest-memray.readthedocs.io/)