Update dependencies and megatron for transformers==5.2.0.

FurtherAI · FurtherAI · commit beba26599b46 · 2026-03-04T07:25:34.000Z
diff --git a/pyproject.toml b/pyproject.toml
@@ -46,8 +46,9 @@ megatron = [
     "transformer-engine==2.11.0",
     "transformer-engine-cu12==2.11.0",
     "transformer-engine-torch==2.11.0",
-    "megatron-core==0.15.2",
-    "megatron-bridge==0.2.0rc6",
+    "megatron-core==0.16.0rc0",
+    "pybind11>=2.13.6",
+    "megatron-bridge",
     "nvidia-ml-py==13.580.82",
     "ml-dtypes>=0.5.0 ; python_full_version < '3.13'",
 ]
@@ -122,14 +123,26 @@ required-version = ">=0.6.15"
 # Override numpy to <2.0 for compatibility with megatron-core in the training
 # environment. vLLM 0.15.1 pulls opencv-python-headless>=4.13 which wants
 # numpy>=2 on Python 3.9+, but megatron-core requires numpy<2.
-override-dependencies = ["transformer-engine>=2.11.0", "numpy<2"]
-# Keep apex build isolation enabled so uv can inject torch from
-# `extra-build-dependencies` during lock/sync on non-GPU client machines.
-no-build-isolation-package = ["transformer-engine", "transformer-engine-cu12", "transformer-engine-torch", "megatron-core", "megatron-bridge", "nv-grouped-gemm", "mamba-ssm", "causal-conv1d"]
+override-dependencies = [
+    "transformer-engine==2.11.0",
+    "numpy<2",
+    # 0.5.0 only ships manylinux_2_39 wheels (no sdist), which fails on
+    # manylinux_2_35 hosts used by some dev/CI environments.
+    "nvidia-resiliency-ext<0.5",
+    # Keep flashinfer aligned with vLLM; Megatron's dev extra pins <0.6 but ART
+    # does not use flashinfer through Megatron runtime paths.
+    "flashinfer-python==0.6.1",
+    # Override unsloth's overly strict constraint on transformers — v5.x
+    # is confirmed working per unsloth February-2026 release notes
+    "transformers==5.2.0",
+]
+exclude-dependencies = ["pynvml", "emerging-optimizers"]
+no-build-isolation-package = ["apex", "transformer-engine", "transformer-engine-cu12", "transformer-engine-torch", "megatron-core", "megatron-bridge", "nv-grouped-gemm", "mamba-ssm", "causal-conv1d"]
 
 [tool.uv.extra-build-dependencies]
 apex = ["torch>=2.8.0"]
 transformer-engine-torch = ["torch>=2.8.0"]
+megatron-core = ["pybind11"]
 
 [tool.uv.extra-build-variables]
 apex = { APEX_CPP_EXT = "1", APEX_CUDA_EXT = "1", APEX_FAST_LAYER_NORM = "1", APEX_PARALLEL_BUILD = "16", NVCC_APPEND_FLAGS = "--threads 4" }
@@ -210,3 +223,4 @@ dev = [
 [tool.uv.sources]
 panza = { git = "https://github.com/corbt/panza.git" }
 apex = { git = "https://github.com/NVIDIA/apex.git", branch = "25.09" }
+megatron-bridge = { git = "https://github.com/NVIDIA-NeMo/Megatron-Bridge.git", rev = "75f2c5ad4afb702b57b4781a00f5291a66bcf183" }
diff --git a/src/art/megatron/provider.py b/src/art/megatron/provider.py
@@ -64,4 +64,5 @@ def _flex_attention_layer_spec(
     provider.moe_router_dtype = "fp32"
     if provider.tensor_model_parallel_size > 1:
         provider.sequence_parallel = True
+    provider.finalize()
     return provider
diff --git a/uv.lock b/uv.lock