Skip to content

Commit beba265

Browse files
committed
Update dependencies and megatron for transformers==5.2.0.
1 parent df04d37 commit beba265

3 files changed

Lines changed: 1445 additions & 992 deletions

File tree

pyproject.toml

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -46,8 +46,9 @@ megatron = [
4646
"transformer-engine==2.11.0",
4747
"transformer-engine-cu12==2.11.0",
4848
"transformer-engine-torch==2.11.0",
49-
"megatron-core==0.15.2",
50-
"megatron-bridge==0.2.0rc6",
49+
"megatron-core==0.16.0rc0",
50+
"pybind11>=2.13.6",
51+
"megatron-bridge",
5152
"nvidia-ml-py==13.580.82",
5253
"ml-dtypes>=0.5.0 ; python_full_version < '3.13'",
5354
]
@@ -122,14 +123,26 @@ required-version = ">=0.6.15"
122123
# Override numpy to <2.0 for compatibility with megatron-core in the training
123124
# environment. vLLM 0.15.1 pulls opencv-python-headless>=4.13 which wants
124125
# numpy>=2 on Python 3.9+, but megatron-core requires numpy<2.
125-
override-dependencies = ["transformer-engine>=2.11.0", "numpy<2"]
126-
# Keep apex build isolation enabled so uv can inject torch from
127-
# `extra-build-dependencies` during lock/sync on non-GPU client machines.
128-
no-build-isolation-package = ["transformer-engine", "transformer-engine-cu12", "transformer-engine-torch", "megatron-core", "megatron-bridge", "nv-grouped-gemm", "mamba-ssm", "causal-conv1d"]
126+
override-dependencies = [
127+
"transformer-engine==2.11.0",
128+
"numpy<2",
129+
# 0.5.0 only ships manylinux_2_39 wheels (no sdist), which fails on
130+
# manylinux_2_35 hosts used by some dev/CI environments.
131+
"nvidia-resiliency-ext<0.5",
132+
# Keep flashinfer aligned with vLLM; Megatron's dev extra pins <0.6 but ART
133+
# does not use flashinfer through Megatron runtime paths.
134+
"flashinfer-python==0.6.1",
135+
# Override unsloth's overly strict constraint on transformers — v5.x
136+
# is confirmed working per unsloth February-2026 release notes
137+
"transformers==5.2.0",
138+
]
139+
exclude-dependencies = ["pynvml", "emerging-optimizers"]
140+
no-build-isolation-package = ["apex", "transformer-engine", "transformer-engine-cu12", "transformer-engine-torch", "megatron-core", "megatron-bridge", "nv-grouped-gemm", "mamba-ssm", "causal-conv1d"]
129141

130142
[tool.uv.extra-build-dependencies]
131143
apex = ["torch>=2.8.0"]
132144
transformer-engine-torch = ["torch>=2.8.0"]
145+
megatron-core = ["pybind11"]
133146

134147
[tool.uv.extra-build-variables]
135148
apex = { APEX_CPP_EXT = "1", APEX_CUDA_EXT = "1", APEX_FAST_LAYER_NORM = "1", APEX_PARALLEL_BUILD = "16", NVCC_APPEND_FLAGS = "--threads 4" }
@@ -210,3 +223,4 @@ dev = [
210223
[tool.uv.sources]
211224
panza = { git = "https://github.com/corbt/panza.git" }
212225
apex = { git = "https://github.com/NVIDIA/apex.git", branch = "25.09" }
226+
megatron-bridge = { git = "https://github.com/NVIDIA-NeMo/Megatron-Bridge.git", rev = "75f2c5ad4afb702b57b4781a00f5291a66bcf183" }

src/art/megatron/provider.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,4 +64,5 @@ def _flex_attention_layer_spec(
6464
provider.moe_router_dtype = "fp32"
6565
if provider.tensor_model_parallel_size > 1:
6666
provider.sequence_parallel = True
67+
provider.finalize()
6768
return provider

0 commit comments

Comments
 (0)