From e0b0d80d728a05e682e1645a5b2804f8b6364b6b Mon Sep 17 00:00:00 2001 From: Matt Norris Date: Mon, 6 Apr 2026 22:21:12 -0400 Subject: [PATCH 1/2] build(.gitignore): ignore output dirs --- .gitignore | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.gitignore b/.gitignore index 8388388..ec2110c 100644 --- a/.gitignore +++ b/.gitignore @@ -271,3 +271,8 @@ Pulumi.*.yaml # Ignore Pulumi local backend state files. .pulumi/ + + +# Output files +# ----------------------------------------------------------------------------- +output/ From 9594e71688e1731cdf1f15201a0938d5c75b3121 Mon Sep 17 00:00:00 2001 From: Matt Norris Date: Tue, 7 Apr 2026 16:43:16 -0400 Subject: [PATCH 2/2] refactor(langsmith-hosting): create langsmith-network to decouple --- .../python/langsmith-network/pyproject.toml | 13 + .../src/langsmith_network/__init__.py | 9 + .../src/langsmith_network/langsmith.py | 36 ++ pyproject.toml | 1 + tools/python/langsmith-hosting/README.md | 3 +- .../langsmith-hosting/docs/architecture.md | 26 +- .../docs/cidr-entry-points.md | 94 +++++ .../docs/deploying-langsmith-hybrid-on-aws.md | 389 ------------------ tools/python/langsmith-hosting/pyproject.toml | 5 + .../src/langsmith_hosting/__main__.py | 76 +++- .../src/langsmith_hosting/cidrs.py | 110 +++++ .../src/langsmith_hosting/config.py | 33 +- .../src/langsmith_hosting/constants.py | 32 +- .../src/langsmith_hosting/dataplane.py | 29 +- .../src/langsmith_hosting/eks.py | 205 +++++---- .../src/langsmith_hosting/postgres.py | 9 +- .../src/langsmith_hosting/redis.py | 9 +- .../src/langsmith_hosting/s3.py | 39 +- .../src/langsmith_hosting/test_cidrs.py | 89 ++++ .../src/langsmith_hosting/test_config.py | 42 ++ .../src/langsmith_hosting/vpc.py | 10 +- uv.lock | 59 +++ 22 files changed, 783 insertions(+), 535 deletions(-) create mode 100644 packages/python/langsmith-network/pyproject.toml create mode 100644 packages/python/langsmith-network/src/langsmith_network/__init__.py create mode 100644 packages/python/langsmith-network/src/langsmith_network/langsmith.py create mode 100644 tools/python/langsmith-hosting/docs/cidr-entry-points.md delete mode 100644 tools/python/langsmith-hosting/docs/deploying-langsmith-hybrid-on-aws.md create mode 100644 tools/python/langsmith-hosting/src/langsmith_hosting/cidrs.py create mode 100644 tools/python/langsmith-hosting/src/langsmith_hosting/test_cidrs.py create mode 100644 tools/python/langsmith-hosting/src/langsmith_hosting/test_config.py diff --git a/packages/python/langsmith-network/pyproject.toml b/packages/python/langsmith-network/pyproject.toml new file mode 100644 index 0000000..272d0c8 --- /dev/null +++ b/packages/python/langsmith-network/pyproject.toml @@ -0,0 +1,13 @@ +[project] +name = "langsmith-network" +version = "0.1.0" +description = "LangSmith Cloud NAT gateway IP addresses for firewall rules" +requires-python = ">=3.12,<3.13" +dependencies = [] + +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[tool.hatch.build.targets.wheel] +packages = ["src/langsmith_network"] diff --git a/packages/python/langsmith-network/src/langsmith_network/__init__.py b/packages/python/langsmith-network/src/langsmith_network/__init__.py new file mode 100644 index 0000000..23d0631 --- /dev/null +++ b/packages/python/langsmith-network/src/langsmith_network/__init__.py @@ -0,0 +1,9 @@ +"""LangSmith Cloud NAT gateway IP addresses for firewall rules.""" + +from .langsmith import EU, LANGSMITH, US + +__all__ = [ + "EU", + "LANGSMITH", + "US", +] diff --git a/packages/python/langsmith-network/src/langsmith_network/langsmith.py b/packages/python/langsmith-network/src/langsmith_network/langsmith.py new file mode 100644 index 0000000..d8327e9 --- /dev/null +++ b/packages/python/langsmith-network/src/langsmith_network/langsmith.py @@ -0,0 +1,36 @@ +"""LangSmith Cloud NAT gateway IP addresses. + +Source: https://docs.langchain.com/langsmith/deploy-to-cloud#allowlist-ip-addresses +""" + +US: tuple[str, ...] = ( + "34.9.99.224", + "34.19.34.50", + "34.19.93.202", + "34.31.121.70", + "34.41.178.137", + "34.59.244.194", + "34.68.27.146", + "34.82.222.17", + "34.121.166.52", + "34.123.151.210", + "34.135.61.140", + "34.145.102.123", + "34.169.45.153", + "34.169.88.30", + "35.197.29.146", + "35.227.171.135", +) + +EU: tuple[str, ...] = ( + "34.13.244.114", + "34.32.141.108", + "34.32.145.240", + "34.32.180.189", + "34.34.69.108", + "34.90.157.44", + "34.90.213.236", + "34.141.242.180", +) + +LANGSMITH: tuple[str, ...] = US + EU diff --git a/pyproject.toml b/pyproject.toml index 44020aa..5b92674 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,6 +23,7 @@ members = [ "tools/python/langsmith-hosting", "tools/python/pulumi-utils", "packages/python/langsmith-client", + "packages/python/langsmith-network", "packages/python/azure-ai", ] diff --git a/tools/python/langsmith-hosting/README.md b/tools/python/langsmith-hosting/README.md index 0812d99..f802f01 100644 --- a/tools/python/langsmith-hosting/README.md +++ b/tools/python/langsmith-hosting/README.md @@ -12,7 +12,7 @@ Pulumi project that provisions AWS infrastructure for LangSmith Hybrid. ## Prerequisites -- AWS CLI configured with a named profile (set `awsProfile` in your stack config) +- AWS CLI configured with an appropriate profile - [uv](https://docs.astral.sh/uv/) installed - Pulumi CLI installed @@ -39,3 +39,4 @@ Stack configuration lives in `Pulumi.dev.yaml`. Key settings: | `postgresInstanceClass` | RDS instance class | `db.t3.medium` | | `redisNodeType` | ElastiCache node type | `cache.t3.micro` | | `s3BucketPrefix` | S3 bucket name prefix | `langsmith` | +| `extraPublicAccessCidrs` | Comma-separated CIDRs to add to the EKS API server allowlist | _(none)_ | diff --git a/tools/python/langsmith-hosting/docs/architecture.md b/tools/python/langsmith-hosting/docs/architecture.md index bded4b0..7d8389f 100644 --- a/tools/python/langsmith-hosting/docs/architecture.md +++ b/tools/python/langsmith-hosting/docs/architecture.md @@ -11,13 +11,15 @@ langsmith-hosting/ ├── pyproject.toml # Dependencies (pulumi, pulumi-aws, pulumi-eks, ...) ├── README.md ├── docs/ -│ └── architecture.md # This file +│ ├── architecture.md # This file +│ └── cidr-entry-points.md # CIDR plugin discovery mechanism └── src/ └── langsmith_hosting/ ├── __init__.py ├── __main__.py # Entry point: wires all modules, exports outputs + ├── cidrs.py # CIDR utilities (get_cidrs, collapse_cidrs) ├── config.py # Typed config loading from Pulumi stack config - ├── constants.py # PROJECT_NAME, TAGS + ├── constants.py # PROJECT_NAME, get_tags() ├── vpc.py # VPC + subnets ├── eks.py # EKS cluster + node group + addons ├── postgres.py # RDS PostgreSQL @@ -52,11 +54,21 @@ Orchestrates all modules in order: 1. Loads `.env` and Pulumi stack config via `config.py` 2. Gets AWS caller identity and region 3. Creates VPC -4. Creates EKS cluster (depends on VPC) -5. Creates PostgreSQL (depends on VPC + random password) -6. Creates Redis (depends on VPC) -7. Creates S3 bucket (depends on VPC) -8. Exports stack outputs +4. Builds the EKS API server CIDR allowlist (LangSmith IPs + [entry point plugins](cidr-entry-points.md) + manual overrides) +5. Creates EKS cluster (depends on VPC) +6. Creates PostgreSQL (depends on VPC + random password) +7. Creates Redis (depends on VPC) +8. Creates S3 bucket (depends on VPC) +9. Creates data plane (listener + KEDA + langgraph-dataplane Helm chart) +10. Exports stack outputs + +### `cidrs.py` -- CIDR Utilities + +Helper functions for IP/CIDR manipulation: + +- `get_cidrs()` -- appends `/32` (IPv4) or `/128` (IPv6) to bare IP addresses +- `collapse_cidrs()` -- best-effort collapse of a CIDR list toward a target count (AWS EKS allows at most 40 public access CIDRs) +- `AWS_EKS_MAX_PUBLIC_ACCESS_CIDRS` -- the 40-entry limit constant ### `config.py` -- Configuration diff --git a/tools/python/langsmith-hosting/docs/cidr-entry-points.md b/tools/python/langsmith-hosting/docs/cidr-entry-points.md new file mode 100644 index 0000000..3669837 --- /dev/null +++ b/tools/python/langsmith-hosting/docs/cidr-entry-points.md @@ -0,0 +1,94 @@ +# CIDR Entry Points + +The EKS API server allowlist is built from three sources, merged and +deduplicated at deploy time: + +```python +_all_cidrs = list( + dict.fromkeys( + get_cidrs(LANGSMITH) # 1. Built-in LangSmith IPs + + tuple(_org_cidrs) # 2. Entry point plugins + + cfg.extra_public_access_cidrs # 3. Manual overrides + ) +) +``` + +Source 2 uses [Python entry points](https://packaging.python.org/en/latest/guides/creating-and-discovering-plugins/#using-package-metadata), +the PyPA-standard plugin discovery mechanism (`importlib.metadata`). Any +installed package can provide CIDRs by declaring an entry point in the +`langsmith_hosting.cidrs` group. + +## How it works + +### Consumer side (langsmith-hosting) + +`__main__.py` discovers all registered CIDR providers at runtime: + +```python +from importlib.metadata import entry_points + +_org_cidrs: list[str] = [] +for _ep in entry_points(group="langsmith_hosting.cidrs"): + _org_cidrs.extend(_ep.load()) +``` + +`entry_points(group=...)` scans every installed package's metadata for +entries in that group. `ep.load()` performs the import and attribute +lookup, returning the CIDR tuple. + +### Provider side (any package) + +A provider declares entry points in its `pyproject.toml`: + +```toml +[project.entry-points."langsmith_hosting.cidrs"] +my-corp = "my_network.corporate:CORPORATE_CIDRS" +``` + +Each entry follows the format `name = "module.path:ATTRIBUTE"`: + +| Part | Meaning | +| --- | --- | +| `my-corp` | Human-readable name (used for inspection, not code) | +| `my_network.corporate` | Python module to import | +| `CORPORATE_CIDRS` | Attribute on that module — must be an iterable of CIDR strings | + +### Concrete example + +An internal networking package could declare: + +```toml +[project.entry-points."langsmith_hosting.cidrs"] +corporate = "my_network.corporate:CORPORATE_CIDRS" +``` + +When that package is installed (e.g., via `uv sync --all-packages`), its +CIDRs are automatically discovered and included. When it is absent, +`entry_points()` returns nothing and only the LangSmith IPs + manual +overrides are used. + +## Inspecting registered entry points + +```bash +uv run python -c " +from importlib.metadata import entry_points +for ep in entry_points(group='langsmith_hosting.cidrs'): + print(f'{ep.name}: {ep.load()}') +" +``` + +## Adding a new CIDR provider + +1. Create a Python package with a module that exports a tuple of CIDR + strings (e.g., `my_network/firewalls.py` with `SCANNER_IPS`). +2. Add the entry point to the package's `pyproject.toml`: + + ```toml + [project.entry-points."langsmith_hosting.cidrs"] + scanner = "my_network.firewalls:SCANNER_IPS" + ``` + +3. Install the package in the same environment as `langsmith-hosting` + (or add it to the uv workspace). +4. Run `uv sync --all-packages` to register the entry point. +5. `pulumi preview` will now include the new CIDRs. diff --git a/tools/python/langsmith-hosting/docs/deploying-langsmith-hybrid-on-aws.md b/tools/python/langsmith-hosting/docs/deploying-langsmith-hybrid-on-aws.md deleted file mode 100644 index c307a7e..0000000 --- a/tools/python/langsmith-hosting/docs/deploying-langsmith-hybrid-on-aws.md +++ /dev/null @@ -1,389 +0,0 @@ -# Deploying LangSmith Hybrid on AWS EKS: A Practitioner's Guide - -*February 2026* - -This post documents the end-to-end journey of deploying LangSmith Hybrid on -AWS EKS, building custom tooling around it, and debugging every failure along -the way. It is written as a practical reference for anyone repeating this -work -- whether a person or an automated agent. - ---- - -## Background - -Our team needed a way to run LLM agents in production on our own -infrastructure while keeping the LangSmith control plane managed by -LangChain. LangSmith Hybrid gives you exactly this: the control plane -(UI, tracing, deployment management) lives in LangChain's cloud, while -the **data plane** (the actual agent pods, Redis, and supporting services) -runs in your own Kubernetes cluster. - -The project started as a Terraform deployment, evolved into a Pulumi-managed -stack, and grew to include a custom Python CLI for building, pushing, and -deploying agent images. - -**Assumption:** The agents in this guide use Azure OpenAI for chat model -inference. The `azure-ai` Pulumi package provisions the Azure OpenAI -account, model deployment, and firewall rules. If you use a different LLM -provider, Phase 4 and the Azure-specific firewall steps won't apply, but -the rest of the guide (EKS infrastructure, build tooling, debugging) is -provider-agnostic. - ---- - -## Phase 1: Terraform Foundation - -We started with a pure Terraform approach using LangChain's official modules -from `github.com/langchain-ai/terraform`: - -| Component | Module | Purpose | -|-------------|-------------------------|---------------------------------------| -| VPC | `modules/aws/vpc` | Public/private subnets, NAT gateway | -| EKS | `modules/aws/eks` | Kubernetes cluster | -| PostgreSQL | `modules/aws/postgres` | RDS database | -| Redis | `modules/aws/redis` | ElastiCache | -| S3 | `modules/aws/s3` | Object storage with VPC endpoint | - -Terraform taught us the shape of the infrastructure, but we hit friction -immediately: - -- **Module variable mismatches.** LangChain modules use different variable - names than standard Terraform AWS modules (e.g., `instance_class` vs. - `instance_type`). We had to inspect each module's `variables.tf` manually. -- **Circular dependencies.** Root-level Kubernetes/Helm providers created a - cycle with the EKS module. The EKS module manages its own providers - internally -- you have to let it. -- **Orphaned resources.** Failed deployments left behind KMS aliases, - CloudWatch log groups, RDS subnet groups, ElastiCache subnet groups, and S3 - buckets that conflicted with fresh runs. -- **SSO timeouts.** EKS deployments take 15-20 minutes, often outlasting the - AWS SSO session. - -We built shell scripts (`deploy.sh`, `list-aws-resources.sh`, -`destroy-aws-resources.sh`) to manage targeted applies and dependency-ordered -destroys, but the Terraform workflow remained brittle for our use case. - ---- - -## Phase 2: Migrating to Pulumi - -We moved the infrastructure to Pulumi (Python) to get first-class -programmability and to share configuration logic with the rest of the -monorepo. The Pulumi stack lives in `tools/python/langsmith-hosting/` and -provisions: - -1. **VPC** with public and private subnets and a NAT gateway -2. **EKS cluster** with managed node groups, Pod Identity, EBS CSI driver, - cluster autoscaler, and an AWS Load Balancer Controller -3. **S3 bucket** for LangSmith blob storage -4. **RDS PostgreSQL** instance -5. **ElastiCache Redis** cluster -6. **Data plane** (KEDA + the `langgraph-dataplane` Helm chart) - -### Configuration as code - -Rather than duplicating values across `Pulumi.dev.yaml` and Python, we -extracted sensible defaults into module-level constants in `config.py`: - -```python -_VPC_CIDR = "10.0.0.0/16" -_EKS_CLUSTER_VERSION = "1.31" -_EKS_NODE_INSTANCE_TYPE = "m5.xlarge" -_POSTGRES_INSTANCE_CLASS = "db.t3.medium" -_REDIS_NODE_TYPE = "cache.t3.micro" -``` - -The stack config file shrank from 16 keys to 5 essentials: `environment`, -`eksClusterName`, `s3BucketPrefix`, `langsmithWorkspaceId`, and -`langsmithApiKey`. Everything else uses the Python defaults unless -explicitly overridden. - -### Listener as a dynamic resource - -The LangSmith Hybrid data plane requires a **listener** registered with the -control plane API. We built a Pulumi dynamic resource (`LangSmithListener`) -that calls the LangSmith API to create, read, and delete listeners as part -of `pulumi up` / `pulumi destroy`. This eliminated the manual -`manage_listeners.py` step from the Terraform era. - -### Dataplane Helm release - -The data plane itself is a single Helm release (`langgraph-dataplane`) that -installs the listener agent, operator, and a Redis StatefulSet. The Helm -values are wired to Pulumi outputs (API key, workspace ID, listener ID) so -everything stays in sync: - -```python -k8s.helm.v3.Release( - f"{cluster_name}-dataplane", - name="dataplane", - chart="langgraph-dataplane", - values={ - "config": { - "langsmithApiKey": langsmith_api_key, - "langgraphListenerId": listener.listener_id, - "enableLGPDeploymentHealthCheck": _ENABLE_HEALTH_CHECK, - }, - ... - }, - opts=pulumi.ResourceOptions(depends_on=[keda, listener]), -) -``` - ---- - -## Phase 3: Build and Deploy Tooling - -We created a Python CLI package (`langsmith-client`) that wraps the -build-push-deploy cycle into repeatable commands. - -### `langsmith-build` - -Builds a Docker image using `langgraph build` with an auto-generated tag -derived from `pyproject.toml`. The key insight was appending the **Git SHA** -to every image tag: - -``` -hello-world-graph:0.1.0-ff95149 -``` - -This solved a persistent Kubernetes caching problem. With a static tag like -`hello-world-graph:0.1.0`, the default `imagePullPolicy: IfNotPresent` -caused nodes to skip pulling the updated image. Unique tags forced a fresh -pull on every deployment. - -The implementation is a small private helper in `build.py`: - -```python -def _get_git_sha() -> str | None: - try: - result = subprocess.run( - ["git", "rev-parse", "--short", "HEAD"], - capture_output=True, text=True, timeout=5, - ) - if result.returncode == 0: - return result.stdout.strip() - except Exception: - pass - return None -``` - -If Git is unavailable (e.g., inside a CI container without the repo), it -falls back to `name:version`. - -### `langsmith-deploy-docker` - -Creates or updates a deployment through the LangSmith API, specifying the -ECR image URI, environment variables (Azure OpenAI secrets), and the target -listener. This replaced manual deployments through the LangSmith UI. - ---- - -## Phase 4: Azure OpenAI Integration - -The agent uses Azure OpenAI (`gpt-5.1-chat`) via `AzureChatOpenAI` from -LangChain. Getting this to work from inside EKS required solving two -distinct problems. - -### Problem: 403 Forbidden from Azure - -Azure OpenAI has Virtual Network / firewall restrictions. The EKS cluster's -outbound traffic exits through a NAT gateway, and that IP was not in the -Azure allowlist. - -**Diagnosis:** - -```bash -kubectl exec -- \ - python3 -c "from urllib.request import urlopen; print(urlopen('https://ifconfig.me').read().decode())" -``` - -**Fix:** Add the NAT gateway's public IP to the Azure OpenAI resource's -firewall rules. We later automated this with the `azure-ai` Pulumi package -(`packages/python/azure-ai/`) that manages the Azure OpenAI resource and -its firewall rules, accepting NAT IPs as input from the hosting stack. - -### Problem: Temperature incompatibility - -`gpt-5.1-chat` only accepts `temperature=1`. LangChain's `create_agent` -internally binds `temperature=0` for deterministic behavior, which the model -silently rejects. The symptom was an `httpx.UnsupportedProtocol` error in -the health check -- deeply misleading, because the real error was buried in -the Azure API response. - -**Fix:** - -```python -model = AzureChatOpenAI( - azure_endpoint=os.environ.get("AZURE_OPENAI_ENDPOINT"), - azure_deployment=os.environ.get("AZURE_OPENAI_DEPLOYMENT"), - api_version=os.environ.get("AZURE_OPENAI_API_VERSION", "2025-01-01-preview"), - temperature=1, -) -``` - ---- - -## Phase 5: Debugging in Production - -Every deployment revealed a new class of failure. Here are the ones that -cost the most time. - -### Stale images on Kubernetes nodes - -**Symptom:** Deployment succeeds, pod is `Running`, but runs old code. - -**Root cause:** Same image tag + `imagePullPolicy: IfNotPresent` = cached -stale image. - -**Fix:** Git SHA in every tag (see Phase 3). - -### Port-forward dies on pod restart - -**Symptom:** `error: lost connection to pod` after deploying a new revision. - -**Root cause:** Port-forward targets a specific pod. When LangSmith replaces -the pod with a new revision, the old pod is terminated. - -**Fix:** Port-forward via the Kubernetes *service* instead: - -```bash -lsof -ti :8000 | xargs kill -9 2>/dev/null -kubectl port-forward svc/ 8000:8000 -``` - -The service routes to whichever pods are currently healthy. - -### 404 Not Found from the LangGraph SDK - -**Symptom:** `langgraph_sdk.errors.NotFoundError: 404 Not Found` - -**Root cause:** The agent API lives under a mount prefix -(`/lgp//`). A stale port-forward or wrong prefix causes -every API call to 404. - -**Fix:** Query the pod's `MOUNT_PREFIX` environment variable: - -```bash -kubectl get pod \ - -o jsonpath='{range .spec.containers[0].env[*]}{.name}={.value}{"\n"}{end}' \ - | grep MOUNT -``` - -Then pass that prefix to the SDK client. - -### Health check fails with `httpx.UnsupportedProtocol` - -**Symptom:** The LangSmith UI shows deployment failure with a protocol error. - -**Root cause:** The dataplane constructs a health check URL from the -`ingress.hostname` Helm value. Without a hostname, the URL has no scheme. - -**Fix (workaround):** Disable health checks until an ingress is configured: - -```python -_ENABLE_HEALTH_CHECK = False -``` - -**Fix (permanent):** Configure the ingress hostname so the health check URL -is well-formed. - ---- - -## Phase 6: Refactoring for Maintainability - -With the infrastructure working, we cleaned up the codebase: - -- **Extracted constants.** Moved 11 infrastructure defaults from YAML config - into Python module-level constants. The stack config file shrank to only - the values that are genuinely environment-specific. -- **Shortened resource names.** Renamed `langgraph-dataplane` to `dataplane` - throughout, aligning with the LangSmith branding. -- **Shared packages.** `azure-ai` lives in `packages/python/azure-ai/` as a - composable library that other stacks can import via `deploy_stack(extra_ips=...)`. -- **Organized IAM policies.** Moved inline policy dicts (like the cluster - autoscaler policy) to module-level constants for readability. -- **Created a kubectl runbook.** Documented every production debugging - session into a structured runbook with copy-paste commands, organized by - problem and solution. - ---- - -## Architecture Summary - -``` -tools/python/ -├── langsmith-hosting/ # Pulumi: VPC, EKS, S3, RDS, Redis, dataplane -└── pulumi-utils/ # Shared Pulumi naming helpers - -packages/python/ -├── azure-ai/ # Pulumi: Azure OpenAI account, model, firewall -└── langsmith-client/ # CLI: build, deploy, list workspaces/listeners -``` - -The data flow: - -1. `langsmith-build` reads `pyproject.toml`, appends the Git SHA, and runs - `langgraph build` to produce a Docker image. -2. The image is tagged and pushed to ECR. -3. `langsmith-deploy-docker` calls the LangSmith API to create/update a - deployment, specifying the ECR image URI and environment secrets. -4. The LangSmith control plane tells the dataplane listener in the EKS - cluster to pull the image and create pods. -5. The agent pods run, connecting to Azure OpenAI through the NAT gateway. - ---- - -## Lessons Learned - -1. **Tag images uniquely.** Never reuse the same tag for different builds. - Git SHAs are free and make debugging trivial. - -2. **Port-forward to services, not pods.** Pods are ephemeral. Services - survive restarts. - -3. **Errors lie.** `httpx.UnsupportedProtocol` was really a temperature - validation failure three layers deep. Always check pod logs before - trusting error messages. - -4. **Extract defaults into code.** YAML config files should contain only - what varies between environments. Everything else belongs in typed Python - constants with clear names and comments. - -5. **Automate the listener lifecycle.** Managing listeners manually was a - constant source of drift. Making it a Pulumi dynamic resource eliminated - an entire class of errors. - -6. **Document while debugging.** The kubectl runbook we wrote during - production incidents has already saved hours. Debugging commands are - worth more than architecture diagrams. - -7. **Model APIs have quirks.** `gpt-5.1-chat` rejecting `temperature=0` is - not documented anywhere obvious. When an agent fails in production, check - the model's parameter constraints first. - -8. **Firewall rules follow the NAT.** If your cluster uses a NAT gateway, - every external API with IP restrictions needs that NAT IP in its - allowlist. Export it as a stack output and wire it into downstream stacks. - ---- - -## What's Next - -- **Ingress hostname and TLS.** Configure a DNS record pointing to the ALB - so the health check works and LangSmith Studio can reach the agent - directly. -- **CI/CD pipeline.** Automate the build-push-deploy cycle so agents deploy - on merge to main. -- **Tracing project management.** Build a utility to clean up stale LangSmith - tracing projects. -- **Production stack.** Stand up a second environment with stricter IAM, - larger nodes, and multi-AZ RDS. - ---- - -## References - -- [LangSmith Hybrid Deployment Docs](https://docs.langchain.com/langsmith/deploy-with-control-plane) -- [LangSmith Troubleshooting (Kubernetes)](https://docs.langchain.com/langsmith/troubleshooting#kubernetes) -- [Architecture](architecture.md) -- infrastructure overview diff --git a/tools/python/langsmith-hosting/pyproject.toml b/tools/python/langsmith-hosting/pyproject.toml index 25bdad0..0cd1d66 100644 --- a/tools/python/langsmith-hosting/pyproject.toml +++ b/tools/python/langsmith-hosting/pyproject.toml @@ -5,6 +5,8 @@ description = "LangSmith Hybrid infrastructure on AWS provisioned with Pulumi" readme = "README.md" requires-python = ">=3.12,<3.13" dependencies = [ + "boto3==1.42.85", + "langsmith-network", "pulumi>=3.0.0", "pulumi-aws>=6.0.0", "pulumi-awsx>=2.0.0", @@ -15,6 +17,9 @@ dependencies = [ "requests>=2.31.0", ] +[tool.uv.sources] +langsmith-network = { workspace = true } + [build-system] requires = ["hatchling"] build-backend = "hatchling.build" diff --git a/tools/python/langsmith-hosting/src/langsmith_hosting/__main__.py b/tools/python/langsmith-hosting/src/langsmith_hosting/__main__.py index 43d828d..66584fb 100644 --- a/tools/python/langsmith-hosting/src/langsmith_hosting/__main__.py +++ b/tools/python/langsmith-hosting/src/langsmith_hosting/__main__.py @@ -17,10 +17,18 @@ EKS -> KEDA (Helm) -> langgraph-dataplane (Helm) """ +from importlib.metadata import entry_points + import pulumi import pulumi_aws as aws from dotenv import load_dotenv +from langsmith_network import LANGSMITH +from langsmith_hosting.cidrs import ( + AWS_EKS_MAX_PUBLIC_ACCESS_CIDRS, + collapse_cidrs, + get_cidrs, +) from langsmith_hosting.config import load_config from langsmith_hosting.dataplane import create_dataplane from langsmith_hosting.eks import create_eks_cluster @@ -48,19 +56,50 @@ cidr_block=cfg.vpc_cidr, ) +# ============================================================================= +# EKS API server allowlist +# ============================================================================= +_org_cidrs: list[str] = [] +for _ep in sorted( + entry_points(group="langsmith_hosting.cidrs"), + key=lambda ep: (ep.name, ep.value), +): + try: + _org_cidrs.extend(_ep.load()) + except (KeyboardInterrupt, SystemExit): + raise + except Exception as exc: # noqa: BLE001 — any plugin failure must name the offending entry point + _dist = getattr(_ep, "dist", None) + _dist_info = f" from distribution {_dist}" if _dist is not None else "" + raise RuntimeError( + f"Failed to load langsmith_hosting.cidrs entry point " + f"{_ep.name!r}{_dist_info}" + ) from exc + +_all_cidrs = list( + dict.fromkeys( + get_cidrs(LANGSMITH) + tuple(_org_cidrs) + cfg.extra_public_access_cidrs + ) +) +public_access_cidrs = collapse_cidrs( + _all_cidrs, max_count=AWS_EKS_MAX_PUBLIC_ACCESS_CIDRS +) + +if len(public_access_cidrs) > AWS_EKS_MAX_PUBLIC_ACCESS_CIDRS: + raise ValueError( + f"EKS allows at most {AWS_EKS_MAX_PUBLIC_ACCESS_CIDRS} public access CIDRs, " + f"got {len(public_access_cidrs)}" + ) + # ============================================================================= # EKS (Steps 2-7 in apply-targeted.sh) # ============================================================================= eks = create_eks_cluster( - cluster_name=cfg.eks_cluster_name, - cluster_version=cfg.eks_cluster_version, + cfg=cfg, vpc_id=vpc.vpc_id, private_subnet_ids=vpc.private_subnet_ids, public_subnet_ids=vpc.public_subnet_ids, - node_instance_type=cfg.eks_node_instance_type, - node_min_size=cfg.eks_node_min_size, - node_max_size=cfg.eks_node_max_size, - node_desired_size=cfg.eks_node_desired_size, + public_access_cidrs=public_access_cidrs, ) # ============================================================================= @@ -103,10 +142,9 @@ # Data Plane (Listener + KEDA + langgraph-dataplane) # ============================================================================= dataplane = create_dataplane( - cluster_name=cfg.eks_cluster_name, + cfg=cfg, k8s_provider=eks.k8s_provider, - langsmith_api_key=cfg.langsmith_api_key, - langsmith_workspace_id=cfg.langsmith_workspace_id, + depends_on=[eks.alb_controller], ) # ============================================================================= @@ -134,13 +172,13 @@ pulumi.export("langsmith_listener_id", dataplane.listener_id) # kubectl configuration command -_kubectl_parts = [ - "aws eks update-kubeconfig --region ", - region.region, - " --name ", - eks.cluster_name, -] -if cfg.aws_profile: - _kubectl_parts.extend([" --profile ", cfg.aws_profile]) - -pulumi.export("kubectl_config_command", pulumi.Output.concat(*_kubectl_parts)) +pulumi.export( + "kubectl_config_command", + pulumi.Output.concat( + "aws eks update-kubeconfig --region ", + region.region, + " --name ", + eks.cluster_name, + " --profile ", + ), +) diff --git a/tools/python/langsmith-hosting/src/langsmith_hosting/cidrs.py b/tools/python/langsmith-hosting/src/langsmith_hosting/cidrs.py new file mode 100644 index 0000000..24663a7 --- /dev/null +++ b/tools/python/langsmith-hosting/src/langsmith_hosting/cidrs.py @@ -0,0 +1,110 @@ +"""CIDR utilities.""" + +import ipaddress +from collections import defaultdict + +_IPV6 = 6 +_MIN_COLLAPSIBLE_GROUP = 2 +AWS_EKS_MAX_PUBLIC_ACCESS_CIDRS = 40 + + +def get_cidrs(ips: tuple[str, ...]) -> tuple[str, ...]: + """Append a host prefix to bare IP addresses to produce CIDRs. + + IPv4 addresses get ``/32``; IPv6 addresses get ``/128``. + Addresses that already contain a ``/`` are validated and passed through. + + Args: + ips: Bare IP addresses or CIDR strings. + + Returns: + Tuple of valid CIDR strings. + + Raises: + ValueError: If any entry is empty, not a valid IP, or not a valid CIDR. + """ + results: list[str] = [] + for ip in ips: + if "/" in ip: + ipaddress.ip_network(ip, strict=False) + results.append(ip) + continue + addr = ipaddress.ip_address(ip) + suffix = "/128" if addr.version == _IPV6 else "/32" + results.append(f"{ip}{suffix}") + return tuple(results) + + +def collapse_cidrs( + cidrs: list[str], + *, + max_count: int = AWS_EKS_MAX_PUBLIC_ACCESS_CIDRS, +) -> list[str]: + """Best-effort collapse of a CIDR list toward *max_count* entries. + + Strategy: + + 1. Lossless merge via :func:`ipaddress.collapse_addresses` (per IP version). + 2. If still over *max_count*, repeatedly find the longest-prefix (narrowest) + entries that share a common parent prefix and replace each group with + its covering supernet, widening by one bit at a time. + + The result may still exceed *max_count* when the remaining networks have + no collapsible sibling pairs (e.g., unrelated ``/32`` hosts across + different subnets, or mixed IPv4/IPv6). Callers should check the length + if a hard limit is required. + + Args: + cidrs: CIDR strings (e.g. ``["10.0.0.0/8", "192.168.1.1/32"]``). + max_count: Target maximum number of CIDRs in the result. + + Returns: + Collapsed list of CIDR strings, ideally ``<= max_count``. + + Raises: + ValueError: If *max_count* < 1, or if any entry is not a valid CIDR. + """ + if max_count < 1: + raise ValueError(f"max_count must be >= 1, got {max_count}") + + networks = [ipaddress.ip_network(c, strict=False) for c in cidrs] + + v4 = [n for n in networks if n.version != _IPV6] + v6 = [n for n in networks if n.version == _IPV6] + collapsed = list(ipaddress.collapse_addresses(v4)) + list( + ipaddress.collapse_addresses(v6) + ) + + while len(collapsed) > max_count: + best_parent: ipaddress.IPv4Network | ipaddress.IPv6Network | None = None + member_indices: set[int] | None = None + + for candidate_prefix in sorted( + {n.prefixlen for n in collapsed if n.prefixlen > 0}, reverse=True + ): + groups: dict[ipaddress.IPv4Network | ipaddress.IPv6Network, list[int]] = ( + defaultdict(list) + ) + for idx, net in enumerate(collapsed): + if net.prefixlen == candidate_prefix: + parent = net.supernet(prefixlen_diff=1) + groups[parent].append(idx) + + collapsible = { + p: idxs + for p, idxs in groups.items() + if len(idxs) >= _MIN_COLLAPSIBLE_GROUP + } + if collapsible: + best_parent = max(collapsible, key=lambda p: len(collapsible[p])) + member_indices = set(collapsible[best_parent]) + break + + if best_parent is None or member_indices is None: + break + + replaced = [n for i, n in enumerate(collapsed) if i not in member_indices] + replaced.append(best_parent) + collapsed = list(ipaddress.collapse_addresses(replaced)) + + return [str(n) for n in collapsed] diff --git a/tools/python/langsmith-hosting/src/langsmith_hosting/config.py b/tools/python/langsmith-hosting/src/langsmith_hosting/config.py index b6ab141..6f67f67 100644 --- a/tools/python/langsmith-hosting/src/langsmith_hosting/config.py +++ b/tools/python/langsmith-hosting/src/langsmith_hosting/config.py @@ -7,7 +7,7 @@ Override any value by adding the corresponding key to your Pulumi..yaml. """ -import os +import ipaddress from dataclasses import dataclass import pulumi @@ -21,8 +21,8 @@ _EKS_CLUSTER_VERSION = "1.31" _EKS_NODE_INSTANCE_TYPE = "m5.xlarge" _EKS_NODE_MIN_SIZE = 2 -_EKS_NODE_MAX_SIZE = 5 -_EKS_NODE_DESIRED_SIZE = 2 +_EKS_NODE_MAX_SIZE = 8 +_EKS_NODE_DESIRED_SIZE = 3 _POSTGRES_INSTANCE_CLASS = "db.t3.medium" _POSTGRES_ENGINE_VERSION = "16.6" @@ -33,7 +33,22 @@ _S3_BUCKET_PREFIX = "langsmith" -_AWS_PROFILE = os.environ.get("AWS_PROFILE", "") + +def _parse_cidrs(raw: str | None) -> tuple[str, ...]: + """Split a comma-separated CIDR string into a tuple, dropping blanks. + + Raises: + ValueError: If any entry is not a valid CIDR. + """ + if not raw: + return () + cidrs: list[str] = [] + for raw_entry in raw.split(","): + raw_cidr = raw_entry.strip() + if not raw_cidr: + continue + cidrs.append(str(ipaddress.ip_network(raw_cidr, strict=False))) + return tuple(cidrs) @dataclass(frozen=True) @@ -66,13 +81,13 @@ class LangSmithConfig: # S3 s3_bucket_prefix: str + # EKS API server access + extra_public_access_cidrs: tuple[str, ...] + # LangSmith Control Plane langsmith_api_key: pulumi.Output[str] langsmith_workspace_id: str - # AWS - aws_profile: str - def load_config() -> LangSmithConfig: """Load and validate all configuration values from the Pulumi stack config. @@ -109,9 +124,9 @@ def load_config() -> LangSmithConfig: redis_node_type=cfg.get("redisNodeType") or _REDIS_NODE_TYPE, # S3 s3_bucket_prefix=cfg.get("s3BucketPrefix") or _S3_BUCKET_PREFIX, + # EKS API server access + extra_public_access_cidrs=_parse_cidrs(cfg.get("extraPublicAccessCidrs")), # LangSmith Control Plane langsmith_api_key=cfg.require_secret("langsmithApiKey"), langsmith_workspace_id=cfg.require("langsmithWorkspaceId"), - # AWS - aws_profile=cfg.get("awsProfile") or _AWS_PROFILE, ) diff --git a/tools/python/langsmith-hosting/src/langsmith_hosting/constants.py b/tools/python/langsmith-hosting/src/langsmith_hosting/constants.py index bf85513..321bc17 100644 --- a/tools/python/langsmith-hosting/src/langsmith_hosting/constants.py +++ b/tools/python/langsmith-hosting/src/langsmith_hosting/constants.py @@ -1,8 +1,38 @@ """Shared constants for LangSmith Hosting infrastructure.""" +from functools import lru_cache +from importlib.metadata import entry_points + PROJECT_NAME = "langsmith-hosting" -TAGS = { +_BASE_TAGS: dict[str, str] = { "Project": PROJECT_NAME, "ManagedBy": "pulumi", } + + +@lru_cache +def _build_tags(resource: str = "") -> dict[str, str]: + global_tags: dict[str, str] = {} + for ep in entry_points(group="langsmith_hosting.tags"): + global_tags.update(ep.load()) + + resource_tags: dict[str, str] = {} + if resource: + for ep in entry_points(group=f"langsmith_hosting.tags.{resource}"): + resource_tags.update(ep.load()) + + return {**_BASE_TAGS, **global_tags, **resource_tags} + + +def get_tags(resource: str = "") -> dict[str, str]: + """Build the tag dict for a resource, merging base + plugin tags. + + Args: + resource: Resource type key (e.g. "eks", "s3"). When empty, + only base and global plugin tags are returned. + + Returns: + Shallow copy of the cached tag dict (safe to mutate). + """ + return dict(_build_tags(resource)) diff --git a/tools/python/langsmith-hosting/src/langsmith_hosting/dataplane.py b/tools/python/langsmith-hosting/src/langsmith_hosting/dataplane.py index 8fc53de..c42b0f6 100644 --- a/tools/python/langsmith-hosting/src/langsmith_hosting/dataplane.py +++ b/tools/python/langsmith-hosting/src/langsmith_hosting/dataplane.py @@ -6,9 +6,9 @@ 3. langgraph-dataplane Helm chart — connects the cluster to LangSmith Dependency chain: - EKS cluster ready + EKS node group ready (via ebs_csi_addon) ├── Listener (API call, outputs listener_id) - └── KEDA (Helm) + └── KEDA (Helm, depends on nodes being schedulable) └── dataplane (Helm, depends on Listener + KEDA) """ @@ -17,6 +17,7 @@ import pulumi import pulumi_kubernetes as k8s +from langsmith_hosting.config import LangSmithConfig from langsmith_hosting.listener import LangSmithListener _HOST_BACKEND_URL = "https://api.host.langchain.com" @@ -28,6 +29,8 @@ # Disabled until an ingress hostname is configured. Once a DNS record points to # the ALB, set to True and pass the hostname via ingress.hostname in the Helm # values. See docs/ingress-hostname-setup.md in the Terraform project. +_WATCH_NAMESPACES = "default" + _ENABLE_HEALTH_CHECK = False _REDIS_CPU_REQUEST = "1000m" @@ -44,26 +47,26 @@ class DataplaneOutputs: def create_dataplane( - cluster_name: str, + cfg: LangSmithConfig, k8s_provider: k8s.Provider, - langsmith_api_key: pulumi.Output[str], - langsmith_workspace_id: str, - watch_namespaces: str = "default", + depends_on: list[pulumi.Resource] | None = None, ) -> DataplaneOutputs: """Install the LangSmith data plane on an EKS cluster. Args: - cluster_name: EKS cluster name, also used as the listener compute_id. + cfg: Typed stack configuration (cluster name, API key, workspace ID). k8s_provider: Kubernetes provider for Helm releases. - langsmith_api_key: LangSmith API key (Pulumi secret). - langsmith_workspace_id: LangSmith workspace UUID. - watch_namespaces: Comma-separated K8s namespaces for the data plane - to monitor for deployment pods. + depends_on: Resources that must be ready before Helm charts are + installed (e.g. EBS CSI addon, which implies the node group). Returns: DataplaneOutputs with the listener ID. """ - namespaces = [ns.strip() for ns in watch_namespaces.split(",")] + cluster_name = cfg.eks_cluster_name + langsmith_api_key = cfg.langsmith_api_key + langsmith_workspace_id = cfg.langsmith_workspace_id + + namespaces = [ns.strip() for ns in _WATCH_NAMESPACES.split(",")] # ========================================================================= # 1. Register a listener with the LangSmith Control Plane API @@ -81,6 +84,7 @@ def create_dataplane( # ========================================================================= keda = k8s.helm.v3.Release( f"{cluster_name}-keda", + name="keda", chart="keda", version=_KEDA_CHART_VERSION, namespace="keda", @@ -90,6 +94,7 @@ def create_dataplane( ), opts=pulumi.ResourceOptions( provider=k8s_provider, + depends_on=depends_on or [], custom_timeouts=pulumi.CustomTimeouts(create="10m", update="10m"), ), ) diff --git a/tools/python/langsmith-hosting/src/langsmith_hosting/eks.py b/tools/python/langsmith-hosting/src/langsmith_hosting/eks.py index 63e329c..b263a4d 100644 --- a/tools/python/langsmith-hosting/src/langsmith_hosting/eks.py +++ b/tools/python/langsmith-hosting/src/langsmith_hosting/eks.py @@ -4,8 +4,20 @@ GP3 default storage class, and Helm-based addons (AWS Load Balancer Controller, metrics-server, cluster-autoscaler). -Dependency chain (from apply-targeted.sh): - VPC -> EKS cluster -> EBS CSI addon -> Helm addons -> GP3 storage class +Dependency chain:: + + VPC + ├── EKS cluster → node group → pod_identity_addon + │ ├── ebs_csi_pod_identity → ebs_csi_addon + │ ├── lbc_pod_identity ──────┐ + │ └── autoscaler_pod_identity┤ + │ ebs_csi_addon ─────────────────────────────────────── ├── ALB controller + │ ├── GP3 StorageClass ├── metrics-server + │ └── cluster-autoscaler ────────────────┘ + │ ALB controller → KEDA → dataplane (wired in dataplane.py) + ├── Postgres (parallel) + ├── Redis (parallel) + └── S3 (parallel) """ import importlib.resources @@ -17,7 +29,8 @@ import pulumi_eks as eks import pulumi_kubernetes as k8s -from langsmith_hosting.constants import TAGS +from langsmith_hosting.config import LangSmithConfig +from langsmith_hosting.constants import get_tags # IAM policy for AWS Load Balancer Controller (v2.7.x). # Source: https://github.com/kubernetes-sigs/aws-load-balancer-controller/blob/v2.7.2/docs/install/iam_policy.json @@ -47,35 +60,37 @@ class EksOutputs: oidc_provider_arn: pulumi.Output[str] oidc_provider_url: pulumi.Output[str] k8s_provider: k8s.Provider + alb_controller: k8s.helm.v3.Release -def create_eks_cluster( # noqa: PLR0913 - cluster_name: str, - cluster_version: str, +def create_eks_cluster( + cfg: LangSmithConfig, vpc_id: pulumi.Output[str], private_subnet_ids: pulumi.Output[list[str]], public_subnet_ids: pulumi.Output[list[str]], - node_instance_type: str, - node_min_size: int, - node_max_size: int, - node_desired_size: int, + public_access_cidrs: list[str] | None = None, ) -> EksOutputs: """Create an EKS cluster with managed node group and addons. Args: - cluster_name: Name of the EKS cluster. - cluster_version: Kubernetes version (e.g. "1.31"). + cfg: Typed stack configuration (cluster name, version, node sizing). vpc_id: VPC ID to deploy into. private_subnet_ids: Private subnet IDs for worker nodes. public_subnet_ids: Public subnet IDs for load balancers. - node_instance_type: EC2 instance type for nodes (e.g. "m5.xlarge"). - node_min_size: Minimum number of nodes. - node_max_size: Maximum number of nodes. - node_desired_size: Desired number of nodes. + public_access_cidrs: CIDR blocks allowed to reach the public API + endpoint. When provided, the private endpoint is also enabled + so in-VPC traffic stays internal. Returns: EksOutputs with cluster details and Kubernetes provider. """ + tags = get_tags("eks") + cluster_name = cfg.eks_cluster_name + cluster_version = cfg.eks_cluster_version + node_instance_type = cfg.eks_node_instance_type + node_min_size = cfg.eks_node_min_size + node_max_size = cfg.eks_node_max_size + node_desired_size = cfg.eks_node_desired_size # ========================================================================= # IAM role for worker nodes # ========================================================================= @@ -93,7 +108,7 @@ def create_eks_cluster( # noqa: PLR0913 ], } ), - tags=TAGS, + tags=tags, ) for policy_name, policy_arn in [ @@ -119,9 +134,12 @@ def create_eks_cluster( # noqa: PLR0913 public_subnet_ids=public_subnet_ids, private_subnet_ids=private_subnet_ids, authentication_mode=eks.AuthenticationMode.API, + endpoint_private_access=bool(public_access_cidrs), + endpoint_public_access=True, + public_access_cidrs=public_access_cidrs, skip_default_node_group=True, create_oidc_provider=True, - tags=TAGS, + tags=tags, ) # ========================================================================= @@ -147,18 +165,82 @@ def create_eks_cluster( # noqa: PLR0913 kubeconfig=cluster.kubeconfig_json, ) + # ========================================================================= + # EKS Pod Identity agent addon + # ========================================================================= + # Required for Pod Identity credential injection into pods. Must be + # present on nodes before any PodIdentityAssociation can take effect. + pod_identity_addon = aws.eks.Addon( + f"{cluster_name}-pod-identity-agent", + cluster_name=cluster.eks_cluster.name, + addon_name="eks-pod-identity-agent", + resolve_conflicts_on_create="OVERWRITE", + resolve_conflicts_on_update="OVERWRITE", + opts=pulumi.ResourceOptions( + depends_on=[node_group], + custom_timeouts=pulumi.CustomTimeouts(create="10m", update="10m"), + ), + ) + + # ========================================================================= + # IAM role for EBS CSI controller (Pod Identity) + # ========================================================================= + # The EBS CSI controller runs in a Deployment (not a DaemonSet), so it + # cannot rely on the node role via IMDS (hop limit = 1 blocks pod access). + # Pod Identity injects credentials directly without IMDS. + # Registered BEFORE the addon so credentials are available on first boot. + ebs_csi_role = aws.iam.Role( + f"{cluster_name}-ebs-csi-role", + assume_role_policy=json.dumps( + { + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Principal": {"Service": "pods.eks.amazonaws.com"}, + "Action": ["sts:AssumeRole", "sts:TagSession"], + } + ], + } + ), + tags=tags, + ) + + ebs_csi_role_policy = aws.iam.RolePolicyAttachment( + f"{cluster_name}-ebs-csi-role-policy", + role=ebs_csi_role.name, + policy_arn="arn:aws:iam::aws:policy/service-role/AmazonEBSCSIDriverPolicy", + ) + + # Policy must be attached before the association so the role has + # permissions when pods first assume it. IAM propagation may still + # lag briefly; pods should tolerate transient AccessDenied on startup. + ebs_csi_pod_identity = aws.eks.PodIdentityAssociation( + f"{cluster_name}-ebs-csi-pod-identity", + cluster_name=cluster.eks_cluster.name, + namespace="kube-system", + service_account="ebs-csi-controller-sa", + role_arn=ebs_csi_role.arn, + opts=pulumi.ResourceOptions( + depends_on=[pod_identity_addon, ebs_csi_role_policy], + ), + ) + # ========================================================================= # EBS CSI driver addon # ========================================================================= - # Depends on node_group (not just cluster) so nodes are ready to schedule - # the addon's DaemonSet pods before we wait for ACTIVE state. + # Depends on node_group and ebs_csi_pod_identity so credentials are + # registered before the controller pods start. ebs_csi_addon = aws.eks.Addon( f"{cluster_name}-ebs-csi", cluster_name=cluster.eks_cluster.name, addon_name="aws-ebs-csi-driver", resolve_conflicts_on_create="OVERWRITE", resolve_conflicts_on_update="OVERWRITE", - opts=pulumi.ResourceOptions(depends_on=[node_group]), + opts=pulumi.ResourceOptions( + depends_on=[node_group, ebs_csi_pod_identity], + custom_timeouts=pulumi.CustomTimeouts(create="10m", update="10m"), + ), ) # ========================================================================= @@ -184,20 +266,6 @@ def create_eks_cluster( # noqa: PLR0913 ), ) - # ========================================================================= - # EKS Pod Identity agent addon - # ========================================================================= - # Required for Pod Identity credential injection into pods. Must be - # present on nodes before any PodIdentityAssociation can take effect. - pod_identity_addon = aws.eks.Addon( - f"{cluster_name}-pod-identity-agent", - cluster_name=cluster.eks_cluster.name, - addon_name="eks-pod-identity-agent", - resolve_conflicts_on_create="OVERWRITE", - resolve_conflicts_on_update="OVERWRITE", - opts=pulumi.ResourceOptions(depends_on=[node_group]), - ) - # ========================================================================= # IAM role for AWS Load Balancer Controller (Pod Identity) # ========================================================================= @@ -216,63 +284,28 @@ def create_eks_cluster( # noqa: PLR0913 ], } ), - tags=TAGS, + tags=tags, ) - aws.iam.RolePolicy( + lbc_policy = aws.iam.RolePolicy( f"{cluster_name}-lbc-policy", role=lbc_role.id, policy=json.dumps(_LBC_IAM_POLICY), ) - # Bind the role to the LBC service account via Pod Identity (no SA annotation). - # Captured so the Helm release can depend on it — pods must not start until the - # association is registered with the EKS control plane. + # Policy must be attached before the association so the role has + # permissions when pods first assume it. Captured so the Helm release + # can depend on it — pods must not start until the association is + # registered with the EKS control plane. lbc_pod_identity = aws.eks.PodIdentityAssociation( f"{cluster_name}-lbc-pod-identity", cluster_name=cluster.eks_cluster.name, namespace="kube-system", service_account="aws-load-balancer-controller", role_arn=lbc_role.arn, - opts=pulumi.ResourceOptions(depends_on=[pod_identity_addon]), - ) - - # ========================================================================= - # IAM role for EBS CSI controller (Pod Identity) - # ========================================================================= - # The EBS CSI controller runs in a Deployment (not a DaemonSet), so it - # cannot rely on the node role via IMDS (hop limit = 1 blocks pod access). - # Pod Identity injects credentials directly without IMDS. - ebs_csi_role = aws.iam.Role( - f"{cluster_name}-ebs-csi-role", - assume_role_policy=json.dumps( - { - "Version": "2012-10-17", - "Statement": [ - { - "Effect": "Allow", - "Principal": {"Service": "pods.eks.amazonaws.com"}, - "Action": ["sts:AssumeRole", "sts:TagSession"], - } - ], - } + opts=pulumi.ResourceOptions( + depends_on=[pod_identity_addon, lbc_policy], ), - tags=TAGS, - ) - - aws.iam.RolePolicyAttachment( - f"{cluster_name}-ebs-csi-role-policy", - role=ebs_csi_role.name, - policy_arn="arn:aws:iam::aws:policy/service-role/AmazonEBSCSIDriverPolicy", - ) - - aws.eks.PodIdentityAssociation( - f"{cluster_name}-ebs-csi-pod-identity", - cluster_name=cluster.eks_cluster.name, - namespace="kube-system", - service_account="ebs-csi-controller-sa", - role_arn=ebs_csi_role.arn, - opts=pulumi.ResourceOptions(depends_on=[pod_identity_addon, ebs_csi_addon]), ) # ========================================================================= @@ -292,29 +325,34 @@ def create_eks_cluster( # noqa: PLR0913 ], } ), - tags=TAGS, + tags=tags, ) - aws.iam.RolePolicy( + autoscaler_policy = aws.iam.RolePolicy( f"{cluster_name}-autoscaler-policy", role=autoscaler_role.id, policy=json.dumps(_AUTOSCALER_POLICY), ) + # Policy must be attached before the association so the role has + # permissions when pods first assume it. autoscaler_pod_identity = aws.eks.PodIdentityAssociation( f"{cluster_name}-autoscaler-pod-identity", cluster_name=cluster.eks_cluster.name, namespace="kube-system", service_account="cluster-autoscaler", role_arn=autoscaler_role.arn, - opts=pulumi.ResourceOptions(depends_on=[pod_identity_addon]), + opts=pulumi.ResourceOptions( + depends_on=[pod_identity_addon, autoscaler_policy], + ), ) # ========================================================================= # Helm addons: ALB Controller, metrics-server, cluster-autoscaler # ========================================================================= - k8s.helm.v3.Release( + alb_controller = k8s.helm.v3.Release( f"{cluster_name}-aws-lb-controller", + name="aws-load-balancer-controller", chart="aws-load-balancer-controller", version="1.7.2", namespace="kube-system", @@ -339,6 +377,7 @@ def create_eks_cluster( # noqa: PLR0913 k8s.helm.v3.Release( f"{cluster_name}-metrics-server", + name="metrics-server", chart="metrics-server", version="3.12.0", namespace="kube-system", @@ -353,6 +392,7 @@ def create_eks_cluster( # noqa: PLR0913 k8s.helm.v3.Release( f"{cluster_name}-cluster-autoscaler", + name="cluster-autoscaler", chart="cluster-autoscaler", version="9.36.0", namespace="kube-system", @@ -386,4 +426,5 @@ def create_eks_cluster( # noqa: PLR0913 oidc_provider_arn=cluster.oidc_provider_arn, oidc_provider_url=cluster.oidc_provider_url, k8s_provider=k8s_provider, + alb_controller=alb_controller, ) diff --git a/tools/python/langsmith-hosting/src/langsmith_hosting/postgres.py b/tools/python/langsmith-hosting/src/langsmith_hosting/postgres.py index 5714593..8283911 100644 --- a/tools/python/langsmith-hosting/src/langsmith_hosting/postgres.py +++ b/tools/python/langsmith-hosting/src/langsmith_hosting/postgres.py @@ -10,7 +10,7 @@ import pulumi_aws as aws import pulumi_random as random -from langsmith_hosting.constants import TAGS +from langsmith_hosting.constants import get_tags @dataclass @@ -51,6 +51,7 @@ def create_postgres( # noqa: PLR0913 Returns: PostgresOutputs with connection details. """ + tags = get_tags("postgres") # ========================================================================= # Random password for PostgreSQL # ========================================================================= @@ -67,7 +68,7 @@ def create_postgres( # noqa: PLR0913 f"{name}-subnet-group", name=name, subnet_ids=subnet_ids, - tags={**TAGS, "Name": f"{name}-subnet-group"}, + tags={**tags, "Name": f"{name}-subnet-group"}, ) # ========================================================================= @@ -96,7 +97,7 @@ def create_postgres( # noqa: PLR0913 description="Allow all outbound", ) ], - tags={**TAGS, "Name": f"{name}-sg"}, + tags={**tags, "Name": f"{name}-sg"}, ) # ========================================================================= @@ -118,7 +119,7 @@ def create_postgres( # noqa: PLR0913 skip_final_snapshot=True, publicly_accessible=False, storage_encrypted=True, - tags={**TAGS, "Name": name}, + tags={**tags, "Name": name}, ) # Build connection URL diff --git a/tools/python/langsmith-hosting/src/langsmith_hosting/redis.py b/tools/python/langsmith-hosting/src/langsmith_hosting/redis.py index 3051913..d6b9e62 100644 --- a/tools/python/langsmith-hosting/src/langsmith_hosting/redis.py +++ b/tools/python/langsmith-hosting/src/langsmith_hosting/redis.py @@ -9,7 +9,7 @@ import pulumi import pulumi_aws as aws -from langsmith_hosting.constants import TAGS +from langsmith_hosting.constants import get_tags @dataclass @@ -41,6 +41,7 @@ def create_redis( Returns: RedisOutputs with cluster connection details. """ + tags = get_tags("redis") # ========================================================================= # Cache subnet group # ========================================================================= @@ -48,7 +49,7 @@ def create_redis( f"{name}-subnet-group", name=name, subnet_ids=subnet_ids, - tags={**TAGS, "Name": f"{name}-subnet-group"}, + tags={**tags, "Name": f"{name}-subnet-group"}, ) # ========================================================================= @@ -77,7 +78,7 @@ def create_redis( description="Allow all outbound", ) ], - tags={**TAGS, "Name": f"{name}-sg"}, + tags={**tags, "Name": f"{name}-sg"}, ) # ========================================================================= @@ -92,7 +93,7 @@ def create_redis( num_cache_nodes=1, subnet_group_name=subnet_group.name, security_group_ids=[sg.id], - tags={**TAGS, "Name": name}, + tags={**tags, "Name": name}, ) return RedisOutputs( diff --git a/tools/python/langsmith-hosting/src/langsmith_hosting/s3.py b/tools/python/langsmith-hosting/src/langsmith_hosting/s3.py index 6956872..107410e 100644 --- a/tools/python/langsmith-hosting/src/langsmith_hosting/s3.py +++ b/tools/python/langsmith-hosting/src/langsmith_hosting/s3.py @@ -6,12 +6,37 @@ """ import json +import logging from dataclasses import dataclass +from http import HTTPStatus +import boto3 import pulumi import pulumi_aws as aws +from botocore.exceptions import ClientError -from langsmith_hosting.constants import TAGS +from langsmith_hosting.constants import get_tags + +logger = logging.getLogger(__name__) + + +def _bucket_exists(name: str) -> bool: + """Check if an S3 bucket exists and is owned by this account. + + Used to decide whether Pulumi should import an existing bucket into + state or create a new one (adopt-or-create pattern). + + A 403 (e.g. from a VPC endpoint policy) still means the bucket + exists — only a 404 confirms it does not. + """ + try: + boto3.client("s3").head_bucket(Bucket=name) + return True + except ClientError as exc: + code = exc.response.get("Error", {}).get("Code") + if code == str(HTTPStatus.NOT_FOUND): + return False + return True @dataclass @@ -38,13 +63,21 @@ def create_s3( Returns: S3Outputs with bucket and endpoint details. """ + tags = get_tags("s3") # ========================================================================= # S3 bucket # ========================================================================= + if _bucket_exists(bucket_name): + logger.info("Bucket %s already exists; importing into state", bucket_name) + bucket_opts = pulumi.ResourceOptions(import_=bucket_name) + else: + bucket_opts = None + bucket = aws.s3.Bucket( bucket_name, bucket=bucket_name, - tags={**TAGS, "Name": bucket_name}, + tags={**tags, "Name": bucket_name}, + opts=bucket_opts, ) # Block all public access @@ -91,7 +124,7 @@ def create_s3( service_name=f"com.amazonaws.{region}.s3", vpc_endpoint_type="Gateway", route_table_ids=route_tables.ids, - tags={**TAGS, "Name": f"{bucket_name}-s3-endpoint"}, + tags={**tags, "Name": f"{bucket_name}-s3-endpoint"}, ) # ========================================================================= diff --git a/tools/python/langsmith-hosting/src/langsmith_hosting/test_cidrs.py b/tools/python/langsmith-hosting/src/langsmith_hosting/test_cidrs.py new file mode 100644 index 0000000..84913e2 --- /dev/null +++ b/tools/python/langsmith-hosting/src/langsmith_hosting/test_cidrs.py @@ -0,0 +1,89 @@ +"""Tests for CIDR utilities.""" + +import pytest + +from langsmith_hosting.cidrs import ( + AWS_EKS_MAX_PUBLIC_ACCESS_CIDRS, + collapse_cidrs, + get_cidrs, +) + + +class TestGetCidrs: + def test_ipv4_bare_addresses(self): + result = get_cidrs(("10.0.0.1", "192.168.1.1")) + assert result == ("10.0.0.1/32", "192.168.1.1/32") + + def test_ipv6_bare_address(self): + result = get_cidrs(("2001:db8::1",)) + assert result == ("2001:db8::1/128",) + + def test_passthrough_cidr(self): + result = get_cidrs(("10.0.0.0/24", "192.168.0.0/16")) + assert result == ("10.0.0.0/24", "192.168.0.0/16") + + def test_mixed_bare_and_cidr(self): + result = get_cidrs(("10.0.0.1", "172.16.0.0/12", "2001:db8::1")) + assert result == ("10.0.0.1/32", "172.16.0.0/12", "2001:db8::1/128") + + def test_empty_input(self): + assert get_cidrs(()) == () + + def test_invalid_ip_raises(self): + with pytest.raises(ValueError): + get_cidrs(("not-an-ip",)) + + def test_invalid_cidr_raises(self): + with pytest.raises(ValueError): + get_cidrs(("999.999.999.999/32",)) + + +class TestCollapseCidrs: + def test_lossless_merge_adjacent(self): + result = collapse_cidrs(["10.0.0.0/25", "10.0.0.128/25"], max_count=1) + assert result == ["10.0.0.0/24"] + + def test_already_under_limit(self): + cidrs = ["10.0.0.0/24", "192.168.0.0/24"] + result = collapse_cidrs(cidrs, max_count=10) + assert result == cidrs + + def test_lossy_collapse_reduces_count(self): + cidrs = [f"10.0.0.{i}/32" for i in range(50)] + result = collapse_cidrs(cidrs, max_count=AWS_EKS_MAX_PUBLIC_ACCESS_CIDRS) + assert len(result) <= AWS_EKS_MAX_PUBLIC_ACCESS_CIDRS + + def test_max_count_validation(self): + with pytest.raises(ValueError, match="max_count must be >= 1"): + collapse_cidrs(["10.0.0.0/24"], max_count=0) + + def test_invalid_cidr_raises(self): + with pytest.raises(ValueError): + collapse_cidrs(["not-a-cidr"]) + + def test_single_entry(self): + assert collapse_cidrs(["10.0.0.1/32"], max_count=1) == ["10.0.0.1/32"] + + def test_empty_list(self): + assert collapse_cidrs([], max_count=5) == [] + + def test_mixed_ipv4_ipv6_collapses_ipv4_when_ipv6_is_lone(self): + """IPv4 entries should still collapse even when a lone IPv6 /128 is present.""" + max_count = 2 + ipv4_siblings = ["10.0.0.0/32", "10.0.0.1/32"] + ipv6_lone = ["2001:db8::1/128"] + cidrs = ipv4_siblings + ipv6_lone + result = collapse_cidrs(cidrs, max_count=max_count) + assert len(result) <= max_count + assert "2001:db8::1/128" in result + + def test_ipv6_only_collapse(self): + result = collapse_cidrs(["2001:db8::0/128", "2001:db8::1/128"], max_count=1) + assert result == ["2001:db8::/127"] + + def test_unrelated_hosts_may_exceed_limit(self): + """Completely unrelated /32 hosts cannot be collapsed -- result exceeds max.""" + max_count = 2 + cidrs = [f"{i}.0.0.1/32" for i in range(1, 5)] + result = collapse_cidrs(cidrs, max_count=max_count) + assert len(result) >= max_count diff --git a/tools/python/langsmith-hosting/src/langsmith_hosting/test_config.py b/tools/python/langsmith-hosting/src/langsmith_hosting/test_config.py new file mode 100644 index 0000000..bd58339 --- /dev/null +++ b/tools/python/langsmith-hosting/src/langsmith_hosting/test_config.py @@ -0,0 +1,42 @@ +"""Tests for config utilities.""" + +import pytest + +from langsmith_hosting.config import _parse_cidrs + + +class TestParseCidrs: + """Tests for _parse_cidrs().""" + + def test_none_returns_empty(self): + assert _parse_cidrs(None) == () + + def test_empty_string_returns_empty(self): + assert _parse_cidrs("") == () + + def test_single_cidr(self): + assert _parse_cidrs("10.0.0.0/24") == ("10.0.0.0/24",) + + def test_comma_separated(self): + result = _parse_cidrs("10.0.0.0/24, 192.168.1.0/24") + assert result == ("10.0.0.0/24", "192.168.1.0/24") + + def test_blank_entries_dropped(self): + result = _parse_cidrs("10.0.0.0/24, , ,192.168.1.0/24") + assert result == ("10.0.0.0/24", "192.168.1.0/24") + + def test_bare_ip_normalized_to_cidr(self): + result = _parse_cidrs("10.0.0.1") + assert result == ("10.0.0.1/32",) + + def test_host_bits_normalized(self): + result = _parse_cidrs("10.0.0.5/24") + assert result == ("10.0.0.0/24",) + + def test_invalid_cidr_raises(self): + with pytest.raises(ValueError): + _parse_cidrs("not-a-cidr") + + def test_ipv6_normalized(self): + result = _parse_cidrs("2001:db8::1") + assert result == ("2001:db8::1/128",) diff --git a/tools/python/langsmith-hosting/src/langsmith_hosting/vpc.py b/tools/python/langsmith-hosting/src/langsmith_hosting/vpc.py index d76d771..aa94a77 100644 --- a/tools/python/langsmith-hosting/src/langsmith_hosting/vpc.py +++ b/tools/python/langsmith-hosting/src/langsmith_hosting/vpc.py @@ -9,7 +9,7 @@ import pulumi import pulumi_awsx as awsx -from langsmith_hosting.constants import TAGS +from langsmith_hosting.constants import get_tags @dataclass @@ -36,9 +36,11 @@ def create_vpc( Returns: VpcOutputs with VPC ID, subnet IDs, and CIDR block. """ + tags = get_tags("vpc") vpc = awsx.ec2.Vpc( f"{cluster_name}-vpc", cidr_block=cidr_block, + enable_dns_hostnames=True, number_of_availability_zones=3, nat_gateways=awsx.ec2.NatGatewayConfigurationArgs( strategy=awsx.ec2.NatGatewayStrategy.SINGLE, @@ -49,7 +51,7 @@ def create_vpc( type=awsx.ec2.SubnetType.PRIVATE, cidr_mask=19, tags={ - **TAGS, + **tags, f"kubernetes.io/cluster/{cluster_name}": "shared", "kubernetes.io/role/internal-elb": "1", }, @@ -58,13 +60,13 @@ def create_vpc( type=awsx.ec2.SubnetType.PUBLIC, cidr_mask=20, tags={ - **TAGS, + **tags, f"kubernetes.io/cluster/{cluster_name}": "shared", "kubernetes.io/role/elb": "1", }, ), ], - tags=TAGS, + tags=tags, ) nat_gateway_public_ips = pulumi.Output.all(vpc.eips).apply( diff --git a/uv.lock b/uv.lock index ea94d28..9753d74 100644 --- a/uv.lock +++ b/uv.lock @@ -10,6 +10,7 @@ members = [ "gcp-gemini", "langsmith-client", "langsmith-hosting", + "langsmith-network", "pulumi-utils", ] @@ -116,6 +117,34 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/68/11/21331aed19145a952ad28fca2756a1433ee9308079bd03bd898e903a2e53/black-25.12.0-py3-none-any.whl", hash = "sha256:48ceb36c16dbc84062740049eef990bb2ce07598272e673c17d1a7720c71c828", size = 206191, upload-time = "2025-12-08T01:40:50.963Z" }, ] +[[package]] +name = "boto3" +version = "1.42.85" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "botocore" }, + { name = "jmespath" }, + { name = "s3transfer" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/31/9d/a9a7b5a9351e3ff0baae01136f71ba6fc4652fe0dc2da3b0a8ebdfc1be44/boto3-1.42.85.tar.gz", hash = "sha256:1cd3dcbfaba85c6071ba9397c1804b6a94a1a97031b8f1993fdba27c0c5d6eba", size = 112769, upload-time = "2026-04-07T19:40:53.834Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/78/ab/3167b8ec3cf1d87ad08d2ad5f15823a22945cae7870798274c283c3a18f1/boto3-1.42.85-py3-none-any.whl", hash = "sha256:4f6ac066e41d18ec33f532253fac0f35e0fdca373724458f983ce3d531340b7a", size = 140556, upload-time = "2026-04-07T19:40:52.186Z" }, +] + +[[package]] +name = "botocore" +version = "1.42.85" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "jmespath" }, + { name = "python-dateutil" }, + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/0a/ac/7f14b05cf43e4baae99f4570b02e10b2aebf242dfd86245523340390c834/botocore-1.42.85.tar.gz", hash = "sha256:2ee61f80b7724a143e16d0a85408ef5fa20b99dce7a3c8ec5d25cc8dced164c1", size = 15159562, upload-time = "2026-04-07T19:40:43.831Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/16/f3/c1fbaff4c509c616fd01f44357283a8992f10b3a05d932b22e602aa3a221/botocore-1.42.85-py3-none-any.whl", hash = "sha256:828b67722caeb7e240eefedee74050e803d1fa102958ead9c4009101eefd5381", size = 14839741, upload-time = "2026-04-07T19:40:40.733Z" }, +] + [[package]] name = "certifi" version = "2026.2.25" @@ -714,6 +743,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/7f/cc/9b681a170efab4868a032631dea1e8446d8ec718a7f657b94d49d1a12643/isort-6.1.0-py3-none-any.whl", hash = "sha256:58d8927ecce74e5087aef019f778d4081a3b6c98f15a80ba35782ca8a2097784", size = 94329, upload-time = "2025-10-01T16:26:43.291Z" }, ] +[[package]] +name = "jmespath" +version = "1.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d3/59/322338183ecda247fb5d1763a6cbe46eff7222eaeebafd9fa65d4bf5cb11/jmespath-1.1.0.tar.gz", hash = "sha256:472c87d80f36026ae83c6ddd0f1d05d4e510134ed462851fd5f754c8c3cbb88d", size = 27377, upload-time = "2026-01-22T16:35:26.279Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/14/2f/967ba146e6d58cf6a652da73885f52fc68001525b4197effc174321d70b4/jmespath-1.1.0-py3-none-any.whl", hash = "sha256:a5663118de4908c91729bea0acadca56526eb2698e83de10cd116ae0f4e97c64", size = 20419, upload-time = "2026-01-22T16:35:24.919Z" }, +] + [[package]] name = "langsmith-client" version = "0.1.0" @@ -740,6 +778,8 @@ name = "langsmith-hosting" version = "0.1.0" source = { editable = "tools/python/langsmith-hosting" } dependencies = [ + { name = "boto3" }, + { name = "langsmith-network" }, { name = "pulumi" }, { name = "pulumi-aws" }, { name = "pulumi-awsx" }, @@ -752,6 +792,8 @@ dependencies = [ [package.metadata] requires-dist = [ + { name = "boto3", specifier = "==1.42.85" }, + { name = "langsmith-network", editable = "packages/python/langsmith-network" }, { name = "pulumi", specifier = ">=3.0.0" }, { name = "pulumi-aws", specifier = ">=6.0.0" }, { name = "pulumi-awsx", specifier = ">=2.0.0" }, @@ -762,6 +804,11 @@ requires-dist = [ { name = "requests", specifier = ">=2.31.0" }, ] +[[package]] +name = "langsmith-network" +version = "0.1.0" +source = { editable = "packages/python/langsmith-network" } + [[package]] name = "markdown-it-py" version = "4.0.0" @@ -1444,6 +1491,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/9e/6a/40fee331a52339926a92e17ae748827270b288a35ef4a15c9c8f2ec54715/ruff-0.14.14-py3-none-win_arm64.whl", hash = "sha256:56e6981a98b13a32236a72a8da421d7839221fa308b223b9283312312e5ac76c", size = 10920448, upload-time = "2026-01-22T22:30:15.417Z" }, ] +[[package]] +name = "s3transfer" +version = "0.16.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "botocore" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/05/04/74127fc843314818edfa81b5540e26dd537353b123a4edc563109d8f17dd/s3transfer-0.16.0.tar.gz", hash = "sha256:8e990f13268025792229cd52fa10cb7163744bf56e719e0b9cb925ab79abf920", size = 153827, upload-time = "2025-12-01T02:30:59.114Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fc/51/727abb13f44c1fcf6d145979e1535a35794db0f6e450a0cb46aa24732fe2/s3transfer-0.16.0-py3-none-any.whl", hash = "sha256:18e25d66fed509e3868dc1572b3f427ff947dd2c56f844a5bf09481ad3f3b2fe", size = 86830, upload-time = "2025-12-01T02:30:57.729Z" }, +] + [[package]] name = "semver" version = "3.0.4"