Run 6 prep: remove venv from workspace, ban JsonAdapter/long names/chained Literals

Monkatraz · Monkatraz · commit 1f14d3e2f500 · 2026-02-20T14:49:27.000Z
Run 5 analysis: agent wrote scaffolding script to /tmp, created
JsonAdapter hack class, produced 200+ char stuttered class names,
and chained Literal[x] | Literal[y] instead of Literal[x, y].

Structural changes:
- Move Python venv outside workspace into verify dir so the agent
  has no access to a Python interpreter (only ./verify works)
- Prompt explicitly states no Python available in workspace

New code quality checks in verifier:
- Ban JsonAdapter and custom json_schema() methods
- Ban class names &gt; 60 characters (mechanical path-derived naming)
- Ban chained Literal[x] | Literal[y] | Literal[z] (3+ in a row)

Prompt updates:
- Remove .venv from file scope, add 'no Python' warnings
- Change Literal style examples from chained to multi-value
- Add failures 5-7 from run 5 (scaffolding to /tmp, JsonAdapter,
  chained Literals)
diff --git a/codegen-llm/src/codegen.ts b/codegen-llm/src/codegen.ts
@@ -325,34 +325,36 @@ function setupWorkspace(workDir: string, opts: CodegenOptions): void {
   const verifyScriptPath = path.join(verifyDir, "verify_schema.py");
   fs.writeFileSync(verifyScriptPath, VERIFY_SCRIPT, { mode: 0o755 });
 
+  // Create a venv with pydantic OUTSIDE the workspace so the agent
+  // cannot use it to run scaffolding scripts.  Only the verify wrapper
+  // knows the path to this Python.
+  log(opts, "Creating Python venv with pydantic...");
+  const venvDir = path.join(verifyDir, ".venv");
+  execSync(`uv venv "${venvDir}"`, { cwd: verifyDir, stdio: "pipe" });
+  execSync("uv pip install 'pydantic>=2.9.0'", {
+    cwd: verifyDir,
+    stdio: "pipe",
+    timeout: 120_000,
+    env: { ...process.env, VIRTUAL_ENV: venvDir },
+  });
+  const venvPython = path.join(venvDir, "bin", "python");
+  const version = execSync(
+    `"${venvPython}" -c "import pydantic; print(pydantic.VERSION)"`,
+    { cwd: verifyDir, encoding: "utf8", stdio: ["pipe", "pipe", "pipe"] },
+  ).trim();
+  log(opts, `Installed pydantic ${version} in verify venv`);
+
   // Shell wrapper inside workspace — agent calls this but can't see the
-  // Python source.
+  // Python source or the venv.
   const wrapper = [
     "#!/usr/bin/env bash",
-    `exec .venv/bin/python "${verifyScriptPath}" "$@"`,
+    `exec "${venvPython}" "${verifyScriptPath}" "$@"`,
   ].join("\n");
   fs.writeFileSync(path.join(workDir, "verify"), wrapper, { mode: 0o755 });
 
   // Create the output directory the agent writes into
   fs.mkdirSync(path.join(workDir, "generated"), { recursive: true });
 
-  // Create a venv with pydantic via uv so that both the agent and our
-  // host-side verification have a working Python regardless of whether
-  // the system Python is Nix-managed / immutable.
-  log(opts, "Creating Python venv with pydantic...");
-  execSync("uv venv .venv", { cwd: workDir, stdio: "pipe" });
-  execSync("uv pip install 'pydantic>=2.9.0'", {
-    cwd: workDir,
-    stdio: "pipe",
-    timeout: 120_000,
-    env: { ...process.env, VIRTUAL_ENV: `${workDir}/.venv` },
-  });
-  const version = execSync(
-    '.venv/bin/python -c "import pydantic; print(pydantic.VERSION)"',
-    { cwd: workDir, encoding: "utf8", stdio: ["pipe", "pipe", "pipe"] },
-  ).trim();
-  log(opts, `Installed pydantic ${version} in workspace venv`);
-
   // Minimal git init so Codex is happy (belt-and-suspenders alongside
   // skipGitRepoCheck)
   try {
diff --git a/codegen-llm/src/prompts.ts b/codegen-llm/src/prompts.ts
@@ -52,9 +52,12 @@ You have access to these locations and ONLY these locations:
 1. **Your workspace** (current working directory):
    - \`schema.json\` — serialised River JSON schema (ground truth for verification)
    - \`naming_hints.json\` — pre-computed error and $kind class names (USE THESE)
-   - \`./verify\` — verification tool
+   - \`./verify\` — verification tool (only way to run Python)
    - \`generated/\` — output directory
-   - \`.venv/\` — Python venv with pydantic
+
+   **NOTE:** There is NO Python interpreter in the workspace.  You cannot run
+   Python scripts directly.  The ONLY way to execute Python is \`./verify\`.
+   Write each service file by hand — do not attempt to create or run scripts.
 
 2. **TypeScript server source** (READ-ONLY):
    \`${opts.serverSrcPath}\`
@@ -102,6 +105,8 @@ verification fails immediately.
 - \`RootModel\`, \`__get_pydantic_json_schema__\`, \`create_model()\`
 - \`SchemaAdapter\`, \`make_schema_model\`, \`make_schema_adapter\`
 - \`RiverTypeAdapter\` or any custom TypeAdapter subclass
+- \`JsonAdapter\` or any custom adapter/wrapper class
+- Custom \`json_schema()\` methods — only Pydantic's built-in is allowed
 - \`schema_override_json\`, \`schema_override\`, \`_schema_json\`
 - Raw JSON Schema dicts embedded as Python dict literals
 - Any helper/utility that builds models from schema dicts at runtime
@@ -110,7 +115,8 @@ verification fails immediately.
 - \`Variant\\d+\` anywhere in a class name — ALL banned
 - \`Input2\`, \`Output2\`, \`Errors3\` — numbered suffixes
 - 4+ consecutive uppercase letters: \`NOTFOUNDError\`, \`PTYERRORError\` — banned
-- Long path-derived names: \`CreateInputArtifactServicesItemDevelopmentRunVariant1\`
+- Class names > 60 characters — indicates mechanical path-derived naming
+- Chained \`Literal[x] | Literal[y] | Literal[z]\` — use \`Literal[x, y, z]\` instead
 
 **Banned redefinitions:**
 - Redefining \`UncaughtError\`, \`UnexpectedDisconnectError\`,
@@ -230,7 +236,7 @@ Python — use the **TypeScript names** (\`ExitInfo\`, \`OutputChunk\`):
 class ExitInfo(BaseModel):
     kind: Literal['finished'] = Field(alias='$kind')
     exitCode: int
-    reason: Literal['Errored'] | Literal['Exited'] | Literal['Stopped']
+    reason: Literal['Errored', 'Exited', 'Stopped']
     model_config = ConfigDict(populate_by_name=True)
 
 class OutputChunk(BaseModel):
@@ -345,8 +351,8 @@ generated/
 
 6. **Service classes** wrapping a River client with typed async methods.
 
-7. **String literal unions.** Use individual Literals: \`Literal['a'] | Literal['b']\`
-   (produces \`anyOf\` with \`const\` in JSON Schema, not \`enum\`).
+7. **String/int literal unions.** Use multi-value Literal: \`Literal['a', 'b', 'c']\`
+   or \`Literal[0, 1, 2]\`.  Do NOT chain single-value Literals with \`|\`.
 
 8. **Intersections.** Flatten all properties into a single BaseModel.
 
@@ -501,14 +507,37 @@ importing from \`_errors.py\`.
 
 **Import from _errors.py.**
 
+### Failure 5: Writing scaffolding scripts outside the workspace
+
+The agent wrote \`/tmp/gen_models.py\` — a scaffolding script placed OUTSIDE
+the workspace to dodge the ban.  It used \`.venv/bin/python\` to execute it.
+Result: same terrible mechanical names, stuttered class names 200+ chars long
+(\`ListInstalledPackagesOutputPackagesValueAllItemListInstalled...\`).
+
+**There is no Python interpreter available.  Write files directly.**
+
+### Failure 6: Custom JsonAdapter class to manipulate json_schema() output
+
+The agent defined a \`JsonAdapter\` class that wraps \`TypeAdapter\` and strips
+\`$defs\` keys to game verification.  This is banned — use plain \`TypeAdapter\`.
+
+### Failure 7: Chained Literal[x] | Literal[y] | Literal[z]
+
+The agent produced \`Literal[0] | Literal[1] | Literal[2] | ... | Literal[8]\`
+instead of \`Literal[0, 1, 2, 3, 4, 5, 6, 7, 8]\`.  The chained form is banned.
+Use multi-value \`Literal[...]\` for cleaner, more readable code.
+
 
 ## Important notes
 
+- **No Python in workspace.** You cannot run \`.venv/bin/python\`, \`python3\`,
+  or any Python scripts.  The only executable is \`./verify\`.  Do NOT create
+  venvs, install packages, or write Python scripts to run.
 - schema.json is large.  Use \`jq\` to read specific services.
 - \`jq '.services | keys' schema.json\` — list all service names
 - \`jq '.services.<name>' schema.json\` — inspect a specific service
-- When the verifier reports mismatches, compare original (from schema.json via
-  \`jq\`) against \`TypeAdapter(Model).json_schema()\` output.
+- When the verifier reports mismatches, read the error message carefully and
+  fix the model by hand.
 `.trim();
 }
 
diff --git a/codegen-llm/src/verify-script.ts b/codegen-llm/src/verify-script.ts
@@ -56,6 +56,8 @@ _BANNED_PATTERNS: list[tuple[str, str]] = [
     ('_schema_json', 'Do not cache/embed raw JSON schemas — models must produce correct schemas natively'),
     ('RiverTypeAdapter', 'Do not subclass TypeAdapter — use TypeAdapter directly with correct models'),
     ('json.loads(self._', 'Do not return embedded JSON from json_schema() — fix the model instead'),
+    ('JsonAdapter', 'Do not define custom JsonAdapter wrappers — use TypeAdapter directly'),
+    ('def json_schema(self', 'Do not define custom json_schema() methods — only Pydantic BaseModel.json_schema() is allowed'),
 ]
 
 # Standard River error class names that must ONLY be defined in _errors.py.
@@ -88,6 +90,16 @@ _ALLCAPS_WHITELIST = frozenset({
     # Add known abbreviation-heavy names that are actually correct
 })
 
+# Class names longer than 60 characters indicate mechanical path-derived naming
+# (e.g. ListInstalledPackagesOutputPackagesValueAllItem).
+_MAX_CLASS_NAME_LEN = 60
+
+# Chained single-value Literal pattern: Literal[x] | Literal[y] | Literal[z]
+# This is ugly and should be Literal[x, y, z] instead.
+_CHAINED_LITERAL_RE = re.compile(
+    r'Literal\\[[^\\]]+\\]\\s*\\|\\s*Literal\\[[^\\]]+\\]\\s*\\|\\s*Literal\\[',
+)
+
 
 def check_code_quality(generated_dir: Path) -> list[str]:
     """Scan generated Python files for banned patterns."""
@@ -136,6 +148,28 @@ def check_code_quality(generated_dir: Path) -> list[str]:
                         f'[{rel}] BANNED: class {class_match.group(1)} must not '
                         f'be redefined — import it from _errors.py instead'
                     )
+
+        # Check for overly long class names (mechanical path-derived naming)
+        for line in content.splitlines():
+            class_match = re.match(r'^class\\s+(\\w+)\\s*[\\(:]', line)
+            if class_match:
+                name = class_match.group(1)
+                if len(name) > _MAX_CLASS_NAME_LEN:
+                    errors.append(
+                        f'[{rel}] BANNED NAME: "{name}" ({len(name)} chars) — '
+                        f'class names must be under {_MAX_CLASS_NAME_LEN} characters. '
+                        f'Use concise names from the TypeScript source, not '
+                        f'mechanical path-derived names.'
+                    )
+
+        # Check for chained Literal[x] | Literal[y] | Literal[z] (should be Literal[x, y, z])
+        for m in _CHAINED_LITERAL_RE.finditer(content):
+            line_num = content[:m.start()].count('\\n') + 1
+            errors.append(
+                f'[{rel}:{line_num}] BANNED STYLE: chained Literal[x] | Literal[y] | '
+                f'Literal[z] — use Literal[x, y, z] instead for cleaner code'
+            )
+
     return errors