Update dependencies and megatron for transformers==5.2.0.

FurtherAI · FurtherAI · commit f9c98bc302b2 · 2026-03-04T06:45:17.000Z
diff --git a/pyproject.toml b/pyproject.toml
@@ -46,8 +46,9 @@ megatron = [
     "transformer-engine==2.11.0",
     "transformer-engine-cu12==2.11.0",
     "transformer-engine-torch==2.11.0",
-    "megatron-core==0.15.2",
-    "megatron-bridge==0.2.0rc6",
+    "megatron-core==0.16.0rc0",
+    "pybind11>=2.13.6",
+    "megatron-bridge",
     "nvidia-ml-py==13.580.82",
     "ml-dtypes>=0.5.0 ; python_full_version < '3.13'",
 ]
@@ -125,16 +126,20 @@ required-version = ">=0.6.15"
 override-dependencies = [
     "transformer-engine>=2.11.0",
     "numpy<2",
+    # Keep flashinfer aligned with vLLM; Megatron's dev extra pins <0.6 but ART
+    # does not use flashinfer through Megatron runtime paths.
+    "flashinfer-python==0.6.1",
     # Override unsloth's overly strict constraint on transformers — v5.x
     # is confirmed working per unsloth February-2026 release notes
     "transformers==5.2.0",
 ]
-exclude-dependencies = ["pynvml"]
+exclude-dependencies = ["pynvml", "emerging-optimizers"]
 no-build-isolation-package = ["apex", "transformer-engine", "transformer-engine-cu12", "transformer-engine-torch", "megatron-core", "megatron-bridge", "nv-grouped-gemm", "mamba-ssm", "causal-conv1d"]
 
 [tool.uv.extra-build-dependencies]
 apex = ["torch>=2.8.0"]
 transformer-engine-torch = ["torch>=2.8.0"]
+megatron-core = ["pybind11"]
 
 [tool.uv.extra-build-variables]
 apex = { APEX_CPP_EXT = "1", APEX_CUDA_EXT = "1", APEX_FAST_LAYER_NORM = "1", APEX_PARALLEL_BUILD = "16", NVCC_APPEND_FLAGS = "--threads 4" }
@@ -210,3 +215,4 @@ dev = [
 [tool.uv.sources]
 panza = { git = "https://github.com/corbt/panza.git" }
 apex = { git = "https://github.com/NVIDIA/apex.git", branch = "25.09" }
+megatron-bridge = { git = "https://github.com/NVIDIA-NeMo/Megatron-Bridge.git", rev = "75f2c5ad4afb702b57b4781a00f5291a66bcf183" }
diff --git a/src/art/megatron/provider.py b/src/art/megatron/provider.py
@@ -64,4 +64,5 @@ def _flex_attention_layer_spec(
     provider.moe_router_dtype = "fp32"
     if provider.tensor_model_parallel_size > 1:
         provider.sequence_parallel = True
+    provider.finalize()
     return provider
diff --git a/uv.lock b/uv.lock