Skip to content

Commit f9c98bc

Browse files
committed
Update dependencies and megatron for transformers==5.2.0.
1 parent 0a1b6fe commit f9c98bc

3 files changed

Lines changed: 588 additions & 77 deletions

File tree

pyproject.toml

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,8 +46,9 @@ megatron = [
4646
"transformer-engine==2.11.0",
4747
"transformer-engine-cu12==2.11.0",
4848
"transformer-engine-torch==2.11.0",
49-
"megatron-core==0.15.2",
50-
"megatron-bridge==0.2.0rc6",
49+
"megatron-core==0.16.0rc0",
50+
"pybind11>=2.13.6",
51+
"megatron-bridge",
5152
"nvidia-ml-py==13.580.82",
5253
"ml-dtypes>=0.5.0 ; python_full_version < '3.13'",
5354
]
@@ -125,16 +126,20 @@ required-version = ">=0.6.15"
125126
override-dependencies = [
126127
"transformer-engine>=2.11.0",
127128
"numpy<2",
129+
# Keep flashinfer aligned with vLLM; Megatron's dev extra pins <0.6 but ART
130+
# does not use flashinfer through Megatron runtime paths.
131+
"flashinfer-python==0.6.1",
128132
# Override unsloth's overly strict constraint on transformers — v5.x
129133
# is confirmed working per unsloth February-2026 release notes
130134
"transformers==5.2.0",
131135
]
132-
exclude-dependencies = ["pynvml"]
136+
exclude-dependencies = ["pynvml", "emerging-optimizers"]
133137
no-build-isolation-package = ["apex", "transformer-engine", "transformer-engine-cu12", "transformer-engine-torch", "megatron-core", "megatron-bridge", "nv-grouped-gemm", "mamba-ssm", "causal-conv1d"]
134138

135139
[tool.uv.extra-build-dependencies]
136140
apex = ["torch>=2.8.0"]
137141
transformer-engine-torch = ["torch>=2.8.0"]
142+
megatron-core = ["pybind11"]
138143

139144
[tool.uv.extra-build-variables]
140145
apex = { APEX_CPP_EXT = "1", APEX_CUDA_EXT = "1", APEX_FAST_LAYER_NORM = "1", APEX_PARALLEL_BUILD = "16", NVCC_APPEND_FLAGS = "--threads 4" }
@@ -210,3 +215,4 @@ dev = [
210215
[tool.uv.sources]
211216
panza = { git = "https://github.com/corbt/panza.git" }
212217
apex = { git = "https://github.com/NVIDIA/apex.git", branch = "25.09" }
218+
megatron-bridge = { git = "https://github.com/NVIDIA-NeMo/Megatron-Bridge.git", rev = "75f2c5ad4afb702b57b4781a00f5291a66bcf183" }

src/art/megatron/provider.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,4 +64,5 @@ def _flex_attention_layer_spec(
6464
provider.moe_router_dtype = "fp32"
6565
if provider.tensor_model_parallel_size > 1:
6666
provider.sequence_parallel = True
67+
provider.finalize()
6768
return provider

0 commit comments

Comments
 (0)