Fix LocalBackend fork_checkpoint to overwrite initial LoRA for vLLM #506
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Prek | |
| on: | |
| pull_request: | |
| push: | |
| branches: [main] | |
| permissions: | |
| contents: write | |
| env: | |
| CI_BASE_IMAGE: "pytorch/pytorch:2.9.0-cuda12.8-cudnn9-devel" | |
| CI_PYTHON_MM: "3.11" | |
| CI_UV_CACHE_RELEASE_TAG: "prek-uv-cache" | |
| CI_UV_CACHE_ASSET_PREFIX: "prek-uv-cache" | |
| CI_APEX_PARALLEL_BUILD: "8" | |
| CI_APEX_NVCC_THREADS: "1" | |
| CI_UV_BUILD_SLOTS: "2" | |
| UV_CACHE_DIR: "/root/.cache/uv" | |
| UV_LINK_MODE: "copy" | |
| TORCH_CUDA_ARCH_LIST: "9.0" | |
| jobs: | |
| cache-status: | |
| runs-on: art-large-runner | |
| outputs: | |
| cache-hit: ${{ steps.check.outputs.cache-hit }} | |
| fingerprint: ${{ steps.fingerprint.outputs.fingerprint }} | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Compute expected uv cache fingerprint | |
| id: fingerprint | |
| run: | | |
| fp="$(python3 scripts/ci/compute_uv_fingerprint.py \ | |
| --pyproject pyproject.toml \ | |
| --uv-lock uv.lock \ | |
| --base-image "${CI_BASE_IMAGE}" \ | |
| --python-mm "${CI_PYTHON_MM}" \ | |
| --torch-cuda-arch-list "${TORCH_CUDA_ARCH_LIST}" \ | |
| --ci-apex-parallel-build "${CI_APEX_PARALLEL_BUILD}" \ | |
| --ci-apex-nvcc-threads "${CI_APEX_NVCC_THREADS}")" | |
| echo "fingerprint=${fp}" >> "${GITHUB_OUTPUT}" | |
| echo "Expected uv cache fingerprint: ${fp}" | |
| - name: Check if uv cache exists | |
| id: check | |
| env: | |
| GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
| run: | | |
| fingerprint="${{ steps.fingerprint.outputs.fingerprint }}" | |
| part_prefix="${CI_UV_CACHE_ASSET_PREFIX}-${fingerprint}.tar.zst.part-" | |
| release_api="https://api.github.com/repos/${GITHUB_REPOSITORY}/releases/tags/${CI_UV_CACHE_RELEASE_TAG}" | |
| release_json="$(curl -fsSL \ | |
| -H "Authorization: Bearer ${GITHUB_TOKEN}" \ | |
| -H "Accept: application/vnd.github+json" \ | |
| "${release_api}" || true)" | |
| if [ -z "${release_json}" ]; then | |
| echo "Cache release '${CI_UV_CACHE_RELEASE_TAG}' not found." | |
| echo "cache-hit=false" >> "${GITHUB_OUTPUT}" | |
| exit 0 | |
| fi | |
| hit="$(RELEASE_JSON="${release_json}" PART_PREFIX="${part_prefix}" python3 -c " | |
| import json, os, re | |
| payload = json.loads(os.environ['RELEASE_JSON']) | |
| prefix = os.environ['PART_PREFIX'] | |
| pattern = re.compile(r'^' + re.escape(prefix) + r'(\d{3})$') | |
| parts = sorted( | |
| int(m.group(1)) | |
| for a in payload.get('assets', []) | |
| for m in [pattern.match(a.get('name', ''))] | |
| if m and a.get('id') is not None | |
| ) | |
| print('true' if parts and parts == list(range(len(parts))) else 'false') | |
| ")" | |
| echo "cache-hit=${hit}" >> "${GITHUB_OUTPUT}" | |
| echo "Cache hit: ${hit}" | |
| build-cache: | |
| needs: cache-status | |
| if: needs.cache-status.outputs.cache-hit != 'true' | |
| runs-on: art-cache-builder | |
| container: | |
| image: pytorch/pytorch:2.9.0-cuda12.8-cudnn9-devel | |
| steps: | |
| - name: Install CI dependencies | |
| run: | | |
| apt-get update | |
| apt-get install -y --no-install-recommends ca-certificates curl git zstd libibverbs-dev | |
| rm -rf /var/lib/apt/lists/* | |
| curl -LsSf https://astral.sh/uv/install.sh | sh | |
| echo "/root/.local/bin" >> "${GITHUB_PATH}" | |
| - name: Install gh CLI | |
| env: | |
| GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
| run: | | |
| GH_DL_URL="$(curl -fsSL \ | |
| -H "Authorization: Bearer ${GH_TOKEN}" \ | |
| https://api.github.com/repos/cli/cli/releases/latest \ | |
| | python3 -c "import json,sys;r=json.load(sys.stdin);print([a['browser_download_url'] for a in r['assets'] if a['name'].endswith('_linux_amd64.tar.gz')][0])")" | |
| curl -fsSL "${GH_DL_URL}" | tar xz --strip-components=1 -C /usr/local | |
| gh version | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Mark workspace as a safe git directory | |
| run: | | |
| git config --global --add safe.directory "${GITHUB_WORKSPACE}" | |
| - name: Build and upload uv cache | |
| env: | |
| GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
| run: | | |
| bash scripts/ci/build_and_push_uv_cache.sh \ | |
| --base-image "${CI_BASE_IMAGE}" \ | |
| --python-mm "${CI_PYTHON_MM}" | |
| quality-checks: | |
| needs: [cache-status, build-cache] | |
| if: ${{ !failure() && !cancelled() }} | |
| runs-on: art-large-runner | |
| container: | |
| image: pytorch/pytorch:2.9.0-cuda12.8-cudnn9-devel | |
| steps: | |
| - name: Install CI dependencies | |
| run: | | |
| apt-get update | |
| apt-get install -y --no-install-recommends ca-certificates curl git zstd libibverbs-dev | |
| rm -rf /var/lib/apt/lists/* | |
| curl -LsSf https://astral.sh/uv/install.sh | sh | |
| echo "/root/.local/bin" >> "${GITHUB_PATH}" | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Mark workspace as a safe git directory | |
| run: | | |
| git config --global --add safe.directory "${GITHUB_WORKSPACE}" | |
| - name: Restore prebuilt uv cache | |
| env: | |
| GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
| run: | | |
| release_api="https://api.github.com/repos/${GITHUB_REPOSITORY}/releases/tags/${CI_UV_CACHE_RELEASE_TAG}" | |
| fingerprint="${{ needs.cache-status.outputs.fingerprint }}" | |
| part_prefix="${CI_UV_CACHE_ASSET_PREFIX}-${fingerprint}.tar.zst.part-" | |
| release_json="$(curl -fsSL \ | |
| -H "Authorization: Bearer ${GITHUB_TOKEN}" \ | |
| -H "Accept: application/vnd.github+json" \ | |
| "${release_api}" || true)" | |
| if [ -z "${release_json}" ]; then | |
| echo "::error::Missing cache release '${CI_UV_CACHE_RELEASE_TAG}'." | |
| exit 1 | |
| fi | |
| part_selection_file="/tmp/uv-cache-part-selection.txt" | |
| if ! RELEASE_JSON="${release_json}" PART_PREFIX="${part_prefix}" python3 -c "import json, os, re, sys; payload=json.loads(os.environ['RELEASE_JSON']); part_prefix=os.environ['PART_PREFIX']; pattern=re.compile(r'^' + re.escape(part_prefix) + r'(\\d{3})$'); parts=[]; [parts.append((int(m.group(1)), int(a.get('id')), a.get('name'))) for a in payload.get('assets', []) for m in [pattern.match(a.get('name', ''))] if m and a.get('id') is not None]; parts.sort(key=lambda x: x[0]); indices=[p[0] for p in parts]; expected=list(range(len(parts))); print('\\n'.join(f'{asset_id} {name}' for _, asset_id, name in parts)) if parts and indices == expected else (_ for _ in ()).throw(SystemExit(2 if not parts else 3))" > "${part_selection_file}"; then | |
| echo "::error::No complete uv cache part set found for prefix '${part_prefix}'." | |
| exit 1 | |
| fi | |
| part_count="$(wc -l < "${part_selection_file}" | tr -d ' ')" | |
| echo "Using uv cache part set '${part_prefix}*' (${part_count} parts)." | |
| parts_dir="/tmp/uv-cache-parts" | |
| part_paths_file="/tmp/uv-cache-part-paths.txt" | |
| rm -rf "${parts_dir}" | |
| mkdir -p "${parts_dir}" | |
| awk -v d="${parts_dir}" '{print d "/" $2}' "${part_selection_file}" > "${part_paths_file}" | |
| PARTS_DIR="${parts_dir}" GITHUB_TOKEN="${GITHUB_TOKEN}" GITHUB_REPOSITORY="${GITHUB_REPOSITORY}" \ | |
| xargs -n 2 -P 8 sh -c ' | |
| asset_id="$1" | |
| asset_name="$2" | |
| part_path="${PARTS_DIR}/${asset_name}" | |
| curl -fsSL -L \ | |
| -H "Authorization: Bearer ${GITHUB_TOKEN}" \ | |
| -H "Accept: application/octet-stream" \ | |
| "https://api.github.com/repos/${GITHUB_REPOSITORY}/releases/assets/${asset_id}" \ | |
| -o "${part_path}" | |
| ' sh < "${part_selection_file}" | |
| while IFS= read -r part_path; do | |
| [ -s "${part_path}" ] || { | |
| echo "::error::Missing or empty cache part: ${part_path}" | |
| exit 1 | |
| } | |
| done < "${part_paths_file}" | |
| rm -rf "${UV_CACHE_DIR}" | |
| mkdir -p "${UV_CACHE_DIR}" | |
| while IFS= read -r part_path; do | |
| cat "${part_path}" | |
| done < "${part_paths_file}" | zstd -d -c | tar -xf - -C "${UV_CACHE_DIR}" | |
| du -sh "${UV_CACHE_DIR}" | |
| - name: Install dependencies (with all optional extras for complete type checking) | |
| run: | | |
| original_pyproject="$(mktemp)" | |
| cp pyproject.toml "${original_pyproject}" | |
| cleanup() { | |
| mv "${original_pyproject}" pyproject.toml | |
| } | |
| trap cleanup EXIT | |
| py_mm="$(python -c 'import sys; print(f"{sys.version_info.major}.{sys.version_info.minor}")')" | |
| cudnn_path="${GITHUB_WORKSPACE}/.venv/lib/python${py_mm}/site-packages/nvidia/cudnn" | |
| export CUDNN_PATH="${cudnn_path}" | |
| export CUDNN_HOME="${cudnn_path}" | |
| export CUDNN_INCLUDE_PATH="${cudnn_path}/include" | |
| export CUDNN_LIBRARY_PATH="${cudnn_path}/lib" | |
| export CPLUS_INCLUDE_PATH="${CUDNN_INCLUDE_PATH}${CPLUS_INCLUDE_PATH:+:${CPLUS_INCLUDE_PATH}}" | |
| export LIBRARY_PATH="${CUDNN_LIBRARY_PATH}${LIBRARY_PATH:+:${LIBRARY_PATH}}" | |
| export LD_LIBRARY_PATH="${CUDNN_LIBRARY_PATH}${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}" | |
| export UV_CONCURRENT_BUILDS="${CI_UV_BUILD_SLOTS}" | |
| export CMAKE_BUILD_PARALLEL_LEVEL="${CI_APEX_PARALLEL_BUILD}" | |
| export MAX_JOBS="${CI_APEX_PARALLEL_BUILD}" | |
| export NINJAFLAGS="-j${CI_APEX_PARALLEL_BUILD}" | |
| python3 scripts/ci/apply_ci_uv_build_overrides.py \ | |
| --pyproject pyproject.toml \ | |
| --apex-parallel-build "${CI_APEX_PARALLEL_BUILD}" \ | |
| --apex-nvcc-threads "${CI_APEX_NVCC_THREADS}" | |
| echo "CI uv build overrides: APEX_PARALLEL_BUILD=${CI_APEX_PARALLEL_BUILD}, NVCC_APPEND_FLAGS=--threads ${CI_APEX_NVCC_THREADS}, UV_CONCURRENT_BUILDS=${CI_UV_BUILD_SLOTS}" | |
| uv --version | |
| uv sync --all-extras --group dev --frozen | |
| - name: Run prek hooks (lint, format, typecheck, uv.lock, tests) | |
| run: | | |
| uv run --no-sync prek run --all-files | |
| - name: Run unit tests (via prek) | |
| run: | | |
| uv run --no-sync prek run pytest |