Skip to content

Commit 1f14d3e

Browse files
committed
Run 6 prep: remove venv from workspace, ban JsonAdapter/long names/chained Literals
Run 5 analysis: agent wrote scaffolding script to /tmp, created JsonAdapter hack class, produced 200+ char stuttered class names, and chained Literal[x] | Literal[y] instead of Literal[x, y]. Structural changes: - Move Python venv outside workspace into verify dir so the agent has no access to a Python interpreter (only ./verify works) - Prompt explicitly states no Python available in workspace New code quality checks in verifier: - Ban JsonAdapter and custom json_schema() methods - Ban class names > 60 characters (mechanical path-derived naming) - Ban chained Literal[x] | Literal[y] | Literal[z] (3+ in a row) Prompt updates: - Remove .venv from file scope, add 'no Python' warnings - Change Literal style examples from chained to multi-value - Add failures 5-7 from run 5 (scaffolding to /tmp, JsonAdapter, chained Literals)
1 parent 686668d commit 1f14d3e

3 files changed

Lines changed: 92 additions & 27 deletions

File tree

codegen-llm/src/codegen.ts

Lines changed: 21 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -325,34 +325,36 @@ function setupWorkspace(workDir: string, opts: CodegenOptions): void {
325325
const verifyScriptPath = path.join(verifyDir, "verify_schema.py");
326326
fs.writeFileSync(verifyScriptPath, VERIFY_SCRIPT, { mode: 0o755 });
327327

328+
// Create a venv with pydantic OUTSIDE the workspace so the agent
329+
// cannot use it to run scaffolding scripts. Only the verify wrapper
330+
// knows the path to this Python.
331+
log(opts, "Creating Python venv with pydantic...");
332+
const venvDir = path.join(verifyDir, ".venv");
333+
execSync(`uv venv "${venvDir}"`, { cwd: verifyDir, stdio: "pipe" });
334+
execSync("uv pip install 'pydantic>=2.9.0'", {
335+
cwd: verifyDir,
336+
stdio: "pipe",
337+
timeout: 120_000,
338+
env: { ...process.env, VIRTUAL_ENV: venvDir },
339+
});
340+
const venvPython = path.join(venvDir, "bin", "python");
341+
const version = execSync(
342+
`"${venvPython}" -c "import pydantic; print(pydantic.VERSION)"`,
343+
{ cwd: verifyDir, encoding: "utf8", stdio: ["pipe", "pipe", "pipe"] },
344+
).trim();
345+
log(opts, `Installed pydantic ${version} in verify venv`);
346+
328347
// Shell wrapper inside workspace — agent calls this but can't see the
329-
// Python source.
348+
// Python source or the venv.
330349
const wrapper = [
331350
"#!/usr/bin/env bash",
332-
`exec .venv/bin/python "${verifyScriptPath}" "$@"`,
351+
`exec "${venvPython}" "${verifyScriptPath}" "$@"`,
333352
].join("\n");
334353
fs.writeFileSync(path.join(workDir, "verify"), wrapper, { mode: 0o755 });
335354

336355
// Create the output directory the agent writes into
337356
fs.mkdirSync(path.join(workDir, "generated"), { recursive: true });
338357

339-
// Create a venv with pydantic via uv so that both the agent and our
340-
// host-side verification have a working Python regardless of whether
341-
// the system Python is Nix-managed / immutable.
342-
log(opts, "Creating Python venv with pydantic...");
343-
execSync("uv venv .venv", { cwd: workDir, stdio: "pipe" });
344-
execSync("uv pip install 'pydantic>=2.9.0'", {
345-
cwd: workDir,
346-
stdio: "pipe",
347-
timeout: 120_000,
348-
env: { ...process.env, VIRTUAL_ENV: `${workDir}/.venv` },
349-
});
350-
const version = execSync(
351-
'.venv/bin/python -c "import pydantic; print(pydantic.VERSION)"',
352-
{ cwd: workDir, encoding: "utf8", stdio: ["pipe", "pipe", "pipe"] },
353-
).trim();
354-
log(opts, `Installed pydantic ${version} in workspace venv`);
355-
356358
// Minimal git init so Codex is happy (belt-and-suspenders alongside
357359
// skipGitRepoCheck)
358360
try {

codegen-llm/src/prompts.ts

Lines changed: 37 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -52,9 +52,12 @@ You have access to these locations and ONLY these locations:
5252
1. **Your workspace** (current working directory):
5353
- \`schema.json\` — serialised River JSON schema (ground truth for verification)
5454
- \`naming_hints.json\` — pre-computed error and $kind class names (USE THESE)
55-
- \`./verify\` — verification tool
55+
- \`./verify\` — verification tool (only way to run Python)
5656
- \`generated/\` — output directory
57-
- \`.venv/\` — Python venv with pydantic
57+
58+
**NOTE:** There is NO Python interpreter in the workspace. You cannot run
59+
Python scripts directly. The ONLY way to execute Python is \`./verify\`.
60+
Write each service file by hand — do not attempt to create or run scripts.
5861
5962
2. **TypeScript server source** (READ-ONLY):
6063
\`${opts.serverSrcPath}\`
@@ -102,6 +105,8 @@ verification fails immediately.
102105
- \`RootModel\`, \`__get_pydantic_json_schema__\`, \`create_model()\`
103106
- \`SchemaAdapter\`, \`make_schema_model\`, \`make_schema_adapter\`
104107
- \`RiverTypeAdapter\` or any custom TypeAdapter subclass
108+
- \`JsonAdapter\` or any custom adapter/wrapper class
109+
- Custom \`json_schema()\` methods — only Pydantic's built-in is allowed
105110
- \`schema_override_json\`, \`schema_override\`, \`_schema_json\`
106111
- Raw JSON Schema dicts embedded as Python dict literals
107112
- Any helper/utility that builds models from schema dicts at runtime
@@ -110,7 +115,8 @@ verification fails immediately.
110115
- \`Variant\\d+\` anywhere in a class name — ALL banned
111116
- \`Input2\`, \`Output2\`, \`Errors3\` — numbered suffixes
112117
- 4+ consecutive uppercase letters: \`NOTFOUNDError\`, \`PTYERRORError\` — banned
113-
- Long path-derived names: \`CreateInputArtifactServicesItemDevelopmentRunVariant1\`
118+
- Class names > 60 characters — indicates mechanical path-derived naming
119+
- Chained \`Literal[x] | Literal[y] | Literal[z]\` — use \`Literal[x, y, z]\` instead
114120
115121
**Banned redefinitions:**
116122
- Redefining \`UncaughtError\`, \`UnexpectedDisconnectError\`,
@@ -230,7 +236,7 @@ Python — use the **TypeScript names** (\`ExitInfo\`, \`OutputChunk\`):
230236
class ExitInfo(BaseModel):
231237
kind: Literal['finished'] = Field(alias='$kind')
232238
exitCode: int
233-
reason: Literal['Errored'] | Literal['Exited'] | Literal['Stopped']
239+
reason: Literal['Errored', 'Exited', 'Stopped']
234240
model_config = ConfigDict(populate_by_name=True)
235241
236242
class OutputChunk(BaseModel):
@@ -345,8 +351,8 @@ generated/
345351
346352
6. **Service classes** wrapping a River client with typed async methods.
347353
348-
7. **String literal unions.** Use individual Literals: \`Literal['a'] | Literal['b']\`
349-
(produces \`anyOf\` with \`const\` in JSON Schema, not \`enum\`).
354+
7. **String/int literal unions.** Use multi-value Literal: \`Literal['a', 'b', 'c']\`
355+
or \`Literal[0, 1, 2]\`. Do NOT chain single-value Literals with \`|\`.
350356
351357
8. **Intersections.** Flatten all properties into a single BaseModel.
352358
@@ -501,14 +507,37 @@ importing from \`_errors.py\`.
501507
502508
**Import from _errors.py.**
503509
510+
### Failure 5: Writing scaffolding scripts outside the workspace
511+
512+
The agent wrote \`/tmp/gen_models.py\` — a scaffolding script placed OUTSIDE
513+
the workspace to dodge the ban. It used \`.venv/bin/python\` to execute it.
514+
Result: same terrible mechanical names, stuttered class names 200+ chars long
515+
(\`ListInstalledPackagesOutputPackagesValueAllItemListInstalled...\`).
516+
517+
**There is no Python interpreter available. Write files directly.**
518+
519+
### Failure 6: Custom JsonAdapter class to manipulate json_schema() output
520+
521+
The agent defined a \`JsonAdapter\` class that wraps \`TypeAdapter\` and strips
522+
\`$defs\` keys to game verification. This is banned — use plain \`TypeAdapter\`.
523+
524+
### Failure 7: Chained Literal[x] | Literal[y] | Literal[z]
525+
526+
The agent produced \`Literal[0] | Literal[1] | Literal[2] | ... | Literal[8]\`
527+
instead of \`Literal[0, 1, 2, 3, 4, 5, 6, 7, 8]\`. The chained form is banned.
528+
Use multi-value \`Literal[...]\` for cleaner, more readable code.
529+
504530
505531
## Important notes
506532
533+
- **No Python in workspace.** You cannot run \`.venv/bin/python\`, \`python3\`,
534+
or any Python scripts. The only executable is \`./verify\`. Do NOT create
535+
venvs, install packages, or write Python scripts to run.
507536
- schema.json is large. Use \`jq\` to read specific services.
508537
- \`jq '.services | keys' schema.json\` — list all service names
509538
- \`jq '.services.<name>' schema.json\` — inspect a specific service
510-
- When the verifier reports mismatches, compare original (from schema.json via
511-
\`jq\`) against \`TypeAdapter(Model).json_schema()\` output.
539+
- When the verifier reports mismatches, read the error message carefully and
540+
fix the model by hand.
512541
`.trim();
513542
}
514543

codegen-llm/src/verify-script.ts

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,8 @@ _BANNED_PATTERNS: list[tuple[str, str]] = [
5656
('_schema_json', 'Do not cache/embed raw JSON schemas — models must produce correct schemas natively'),
5757
('RiverTypeAdapter', 'Do not subclass TypeAdapter — use TypeAdapter directly with correct models'),
5858
('json.loads(self._', 'Do not return embedded JSON from json_schema() — fix the model instead'),
59+
('JsonAdapter', 'Do not define custom JsonAdapter wrappers — use TypeAdapter directly'),
60+
('def json_schema(self', 'Do not define custom json_schema() methods — only Pydantic BaseModel.json_schema() is allowed'),
5961
]
6062
6163
# Standard River error class names that must ONLY be defined in _errors.py.
@@ -88,6 +90,16 @@ _ALLCAPS_WHITELIST = frozenset({
8890
# Add known abbreviation-heavy names that are actually correct
8991
})
9092
93+
# Class names longer than 60 characters indicate mechanical path-derived naming
94+
# (e.g. ListInstalledPackagesOutputPackagesValueAllItem).
95+
_MAX_CLASS_NAME_LEN = 60
96+
97+
# Chained single-value Literal pattern: Literal[x] | Literal[y] | Literal[z]
98+
# This is ugly and should be Literal[x, y, z] instead.
99+
_CHAINED_LITERAL_RE = re.compile(
100+
r'Literal\\[[^\\]]+\\]\\s*\\|\\s*Literal\\[[^\\]]+\\]\\s*\\|\\s*Literal\\[',
101+
)
102+
91103
92104
def check_code_quality(generated_dir: Path) -> list[str]:
93105
"""Scan generated Python files for banned patterns."""
@@ -136,6 +148,28 @@ def check_code_quality(generated_dir: Path) -> list[str]:
136148
f'[{rel}] BANNED: class {class_match.group(1)} must not '
137149
f'be redefined — import it from _errors.py instead'
138150
)
151+
152+
# Check for overly long class names (mechanical path-derived naming)
153+
for line in content.splitlines():
154+
class_match = re.match(r'^class\\s+(\\w+)\\s*[\\(:]', line)
155+
if class_match:
156+
name = class_match.group(1)
157+
if len(name) > _MAX_CLASS_NAME_LEN:
158+
errors.append(
159+
f'[{rel}] BANNED NAME: "{name}" ({len(name)} chars) — '
160+
f'class names must be under {_MAX_CLASS_NAME_LEN} characters. '
161+
f'Use concise names from the TypeScript source, not '
162+
f'mechanical path-derived names.'
163+
)
164+
165+
# Check for chained Literal[x] | Literal[y] | Literal[z] (should be Literal[x, y, z])
166+
for m in _CHAINED_LITERAL_RE.finditer(content):
167+
line_num = content[:m.start()].count('\\n') + 1
168+
errors.append(
169+
f'[{rel}:{line_num}] BANNED STYLE: chained Literal[x] | Literal[y] | '
170+
f'Literal[z] — use Literal[x, y, z] instead for cleaner code'
171+
)
172+
139173
return errors
140174
141175

0 commit comments

Comments
 (0)