forked from JamePeng/llama-cpp-python
-
Notifications
You must be signed in to change notification settings - Fork 0
132 lines (109 loc) · 5.67 KB
/
build-wheels-cu124-linux.yml
File metadata and controls
132 lines (109 loc) · 5.67 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
name: Build Wheels(CU124) for Linux # Workflow name
on:
workflow_dispatch: # Manual trigger
permissions:
contents: write
jobs:
build_wheels:
name: Build Wheel ${{ matrix.os }} ${{ matrix.pyver }} ${{ matrix.cuda }} ${{ matrix.releasetag == 'wheels' && 'AVX2' || matrix.releasetag }}
runs-on: ubuntu-22.04
container: nvidia/cuda:12.4.1-cudnn-devel-ubuntu22.04
strategy:
matrix: # Define the build matrix directly here
os: ["ubuntu-22.04"]
pyver: ["3.10", "3.11", "3.12", "3.13", "3.14"] # Python versions
cuda: ["12.4.1"]
releasetag: ["Basic"] # Controls CMAKE_ARGS for CPU features (even in CUDA build)
cudaarch: ["all"] # Controls target CUDA architectures for nvcc
defaults:
run:
shell: bash
env:
CUDAVER: ${{ matrix.cuda }}
AVXVER: ${{ matrix.releasetag }}
CUDAARCHVER: ${{ matrix.cudaarch }}
steps:
- name: Install dependencies
run: |
apt update
apt install -y build-essential ccache cmake curl git libgomp1 libjpeg-dev libssl-dev
- uses: actions/checkout@v4 # Checkout code
with:
submodules: "recursive"
# from astral-sh/setup-uv
- name: Install the latest version of uv and set the python version
uses: astral-sh/setup-uv@v6
with:
python-version: ${{ matrix.pyver }}
activate-environment: true
enable-cache: true
- run: nvcc -V
- name: Build Wheel With Cmake # Main build step: configures and builds the wheel
env:
LD_LIBRARY_PATH: "/usr/local/cuda/lib64:/usr/local/cuda/compat:/usr/lib/x86_64-linux-gnu:${LD_LIBRARY_PATH}"
VERBOSE: 1 # Enable verbose build output
CUDA_HOME: "/usr/local/cuda/" # Set CUDA_HOME
CUDA_PATH: "${PATH}"
CUDA_TOOLKIT_ROOT_DIR: "/usr/local/cuda/" # Set CUDA_TOOLKIT_ROOT_DIR
run: |
echo "VERBOSE=1" >> $GITHUB_ENV # Enable verbose build output for troubleshooting
find /usr/ -name 'libcuda.so.*'
find /usr/ -name 'libcudart.so.*'
echo $LD_LIBRARY_PATH
# Add project-specific and feature flags
CMAKE_ARGS="-DGGML_CUDA=on -DCMAKE_CUDA_ARCHITECTURES='70-real;75-real;80-real;86-real;87-real;89-real'"
CMAKE_ARGS="-DGGML_CUDA_FORCE_MMQ=on ${CMAKE_ARGS}"
CMAKE_ARGS="${CMAKE_ARGS} -DLLAMA_CURL=off -DLLAMA_OPENSSL=on"
if [ "${AVXVER}" = "AVX" ]; then
CMAKE_ARGS="${CMAKE_ARGS} -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off"
fi
if [ "${AVXVER}" = "AVX2" ]; then
CMAKE_ARGS="${CMAKE_ARGS} -DGGML_AVX=on -DGGML_AVX2=on -DGGML_AVX512=off"
fi
if [ "${AVXVER}" = "AVXVNNI" ]; then
CMAKE_ARGS="${CMAKE_ARGS} -DGGML_AVX=on -DGGML_AVX2=on -DGGML_AVX_VNNI=on"
fi
# if [ "${AVXVER}" = "AVX512" ]; then
# CMAKE_ARGS="${CMAKE_ARGS} -DGGML_AVX512=on"
# fi
# Basic options for compiling without AVX instructions
if [ "${AVXVER}" = "Basic" ]; then
CMAKE_ARGS="${CMAKE_ARGS} -DGGML_NATIVE=off -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX_VNNI=off -DGGML_AVX512=off -DGGML_AVX512_VBMI=off -DGGML_AVX512_VNNI=off -DGGML_AVX512_BF16=off -DGGML_FMA=off -DGGML_F16C=off"
fi
# Export CMAKE_ARGS environment variable so the python -m build command can use it
echo ${CMAKE_ARGS}
echo "CMAKE_ARGS=${CMAKE_ARGS}" >> $GITHUB_ENV
# Run the Python build command to generate the wheel
uv pip install build setuptools wheel packaging
CMAKE_ARGS=${CMAKE_ARGS} uv build --wheel
# --- Post-build steps to get info for rename wheel file and release tag ---
cuda_ver_short=$(echo "${CUDAVER}" | cut -d'.' -f 1,2 | sed 's/\.//g')
avx_ver=$(echo "${AVXVER}" | tr '[:upper:]' '[:lower:]')
wheel_path=$(ls dist/*.whl | head -n 1)
filename=$(basename "$wheel_path")
# Split wheel filename
IFS='-' read -r dist_name version py_tag abi_tag plat_tag <<< "$filename"
new_version="${version}+cu${cuda_ver_short}.${avx_ver}"
new_filename="${dist_name}-${new_version}-${py_tag}-${abi_tag}-${plat_tag}"
# Rename wheel file
mv "$wheel_path" "dist/$new_filename"
echo "Renamed wheel to: $new_filename"
echo "CUDA_VERSION=$cuda_ver_short" >> $GITHUB_ENV # Store short CUDA version in env
echo "TAG_VERSION=$version" >> $GITHUB_ENV # Store version in env for release step
- name: Get Current Date # Step to get current date for the release tag
id: get-date
run: |
# Get date in YYYYMMDD format using bash date command
currentDate=$(date +%Y%m%d)
# Store the date in environment variable for the release step
echo "BUILD_DATE=$currentDate" >> $GITHUB_ENV
- uses: softprops/action-gh-release@v2.2.2 # Action to create a GitHub Release
with:
files: dist/* # Upload the generated wheel files from the dist directory
# Define the release tag name using the collected environment variables
# Format: v<package_version>-cu<short_cuda_version>-<avx_tag>-linux-<build_date>
tag_name: v${{ env.TAG_VERSION }}-cu${{ env.CUDA_VERSION }}-${{ env.AVXVER }}-linux-${{ env.BUILD_DATE }} # Release tag format for Linux
# Note: This action will create a new release tag if it doesn't exist,
# or upload assets to an existing tag. Be mindful of potential tag name conflicts.
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} # Use the secret provided by GitHub Actions for authentication