Skip to content

Commit 72e28ba

Browse files
authored
feat(extend): add Extend AI document processing integration (#3869)
* feat(extend): add Extend AI document processing integration * fix(extend): cast json response to fix type error * fix(extend): correct API request body structure per Extend docs * fix(extend): address PR review comments * fix(extend): sync integrations.json bgColor to #000000 * lint
1 parent d99dd86 commit 72e28ba

File tree

14 files changed

+822
-1
lines changed

14 files changed

+822
-1
lines changed

apps/docs/components/icons.tsx

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2080,6 +2080,19 @@ export function Mem0Icon(props: SVGProps<SVGSVGElement>) {
20802080
)
20812081
}
20822082

2083+
export function ExtendIcon(props: SVGProps<SVGSVGElement>) {
2084+
return (
2085+
<svg {...props} xmlns='http://www.w3.org/2000/svg' viewBox='0 0 33 18' fill='none'>
2086+
<path
2087+
clipRule='evenodd'
2088+
d='M16.2893 0C16.6984 1.91708e-05 17.1074 0.0970011 17.5103 0.293745C22.3018 2.63326 27.0841 4.98521 31.8693 7.33722C32.3003 7.54649 32.5721 7.9868 32.5721 8.46461V9.51422C32.5721 9.99522 32.3004 10.4357 31.8693 10.645C31.8693 10.645 19.5816 16.6732 17.5542 17.6634C17.1357 17.8696 16.692 17.9727 16.2859 17.9727C15.8799 17.9727 15.4707 17.8758 15.0615 17.6759C12.8124 16.5795 1.9646 11.2604 0.705842 10.6419C0.274826 10.4295 2.31482e-05 9.99216 0 9.51117V8.46461C4.59913e-05 7.98366 0.271816 7.54656 0.702792 7.33417C5.8977 4.7819 15.0599 0.301869 15.1021 0.281239C15.4957 0.0938275 15.8801 0 16.2893 0ZM16.2859 2.96124C16.1516 2.96126 16.0173 2.98909 15.8924 3.05153L4.28874 8.77696C4.11382 8.86442 4.11382 9.10831 4.28874 9.19577L15.8924 14.9209C16.0173 14.9802 16.1516 15.0115 16.2859 15.0115C16.4202 15.0115 16.5548 14.9802 16.6797 14.9209L28.2864 9.19577C28.4582 9.10831 28.4582 8.86442 28.2864 8.77696L16.6797 3.05153C16.5548 2.98906 16.4202 2.96124 16.2859 2.96124Z'
2089+
fill='currentColor'
2090+
fillRule='evenodd'
2091+
/>
2092+
</svg>
2093+
)
2094+
}
2095+
20832096
export function EvernoteIcon(props: SVGProps<SVGSVGElement>) {
20842097
return (
20852098
<svg {...props} xmlns='http://www.w3.org/2000/svg' viewBox='0 0 32 32' fill='#7fce2c'>

apps/docs/components/ui/icon-mapping.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ import {
4545
EnrichSoIcon,
4646
EvernoteIcon,
4747
ExaAIIcon,
48+
ExtendIcon,
4849
EyeIcon,
4950
FathomIcon,
5051
FirecrawlIcon,
@@ -223,6 +224,7 @@ export const blockTypeToIconMap: Record<string, IconComponent> = {
223224
enrich: EnrichSoIcon,
224225
evernote: EvernoteIcon,
225226
exa: ExaAIIcon,
227+
extend_v2: ExtendIcon,
226228
fathom: FathomIcon,
227229
file_v3: DocumentIcon,
228230
firecrawl: FirecrawlIcon,
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
---
2+
title: Extend
3+
description: Parse and extract content from documents
4+
---
5+
6+
import { BlockInfoCard } from "@/components/ui/block-info-card"
7+
8+
<BlockInfoCard
9+
type="extend_v2"
10+
color="#000000"
11+
/>
12+
13+
## Usage Instructions
14+
15+
Integrate Extend AI into the workflow. Parse and extract structured content from documents or file references.
16+
17+
18+
19+
## Tools
20+
21+
### `extend_parser`
22+
23+
#### Input
24+
25+
| Parameter | Type | Required | Description |
26+
| --------- | ---- | -------- | ----------- |
27+
| `filePath` | string | No | URL to a document to be processed |
28+
| `file` | file | No | Document file to be processed |
29+
| `fileUpload` | object | No | File upload data from file-upload component |
30+
| `outputFormat` | string | No | Target output format \(markdown or spatial\). Defaults to markdown. |
31+
| `chunking` | string | No | Chunking strategy \(page, document, or section\). Defaults to page. |
32+
| `engine` | string | No | Parsing engine \(parse_performance or parse_light\). Defaults to parse_performance. |
33+
| `apiKey` | string | Yes | Extend API key |
34+
35+
#### Output
36+
37+
This tool does not produce any outputs.
38+
39+

apps/docs/content/docs/en/tools/meta.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
"enrich",
4040
"evernote",
4141
"exa",
42+
"extend",
4243
"fathom",
4344
"file",
4445
"firecrawl",

apps/sim/app/(landing)/integrations/data/icon-mapping.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ import {
4545
EnrichSoIcon,
4646
EvernoteIcon,
4747
ExaAIIcon,
48+
ExtendIcon,
4849
EyeIcon,
4950
FathomIcon,
5051
FirecrawlIcon,
@@ -223,6 +224,7 @@ export const blockTypeToIconMap: Record<string, IconComponent> = {
223224
enrich: EnrichSoIcon,
224225
evernote: EvernoteIcon,
225226
exa: ExaAIIcon,
227+
extend_v2: ExtendIcon,
226228
fathom: FathomIcon,
227229
file_v3: DocumentIcon,
228230
firecrawl: FirecrawlIcon,

apps/sim/app/(landing)/integrations/data/integrations.json

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2978,6 +2978,24 @@
29782978
"integrationType": "search",
29792979
"tags": ["web-scraping", "enrichment"]
29802980
},
2981+
{
2982+
"type": "extend_v2",
2983+
"slug": "extend",
2984+
"name": "Extend",
2985+
"description": "Parse and extract content from documents",
2986+
"longDescription": "Integrate Extend AI into the workflow. Parse and extract structured content from documents or file references.",
2987+
"bgColor": "#000000",
2988+
"iconName": "ExtendIcon",
2989+
"docsUrl": "https://docs.sim.ai/tools/extend",
2990+
"operations": [],
2991+
"operationCount": 0,
2992+
"triggers": [],
2993+
"triggerCount": 0,
2994+
"authType": "api-key",
2995+
"category": "tools",
2996+
"integrationType": "ai",
2997+
"tags": ["document-processing", "ocr"]
2998+
},
29812999
{
29823000
"type": "fathom",
29833001
"slug": "fathom",
Lines changed: 188 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,188 @@
1+
import { createLogger } from '@sim/logger'
2+
import { type NextRequest, NextResponse } from 'next/server'
3+
import { z } from 'zod'
4+
import { checkInternalAuth } from '@/lib/auth/hybrid'
5+
import {
6+
secureFetchWithPinnedIP,
7+
validateUrlWithDNS,
8+
} from '@/lib/core/security/input-validation.server'
9+
import { generateRequestId } from '@/lib/core/utils/request'
10+
import { RawFileInputSchema } from '@/lib/uploads/utils/file-schemas'
11+
import { isInternalFileUrl } from '@/lib/uploads/utils/file-utils'
12+
import { resolveFileInputToUrl } from '@/lib/uploads/utils/file-utils.server'
13+
14+
export const dynamic = 'force-dynamic'
15+
16+
const logger = createLogger('ExtendParseAPI')
17+
18+
const ExtendParseSchema = z.object({
19+
apiKey: z.string().min(1, 'API key is required'),
20+
filePath: z.string().optional(),
21+
file: RawFileInputSchema.optional(),
22+
outputFormat: z.enum(['markdown', 'spatial']).optional(),
23+
chunking: z.enum(['page', 'document', 'section']).optional(),
24+
engine: z.enum(['parse_performance', 'parse_light']).optional(),
25+
})
26+
27+
export async function POST(request: NextRequest) {
28+
const requestId = generateRequestId()
29+
30+
try {
31+
const authResult = await checkInternalAuth(request, { requireWorkflowId: false })
32+
33+
if (!authResult.success || !authResult.userId) {
34+
logger.warn(`[${requestId}] Unauthorized Extend parse attempt`, {
35+
error: authResult.error || 'Missing userId',
36+
})
37+
return NextResponse.json(
38+
{
39+
success: false,
40+
error: authResult.error || 'Unauthorized',
41+
},
42+
{ status: 401 }
43+
)
44+
}
45+
46+
const userId = authResult.userId
47+
const body = await request.json()
48+
const validatedData = ExtendParseSchema.parse(body)
49+
50+
logger.info(`[${requestId}] Extend parse request`, {
51+
fileName: validatedData.file?.name,
52+
filePath: validatedData.filePath,
53+
isWorkspaceFile: validatedData.filePath ? isInternalFileUrl(validatedData.filePath) : false,
54+
userId,
55+
})
56+
57+
const resolution = await resolveFileInputToUrl({
58+
file: validatedData.file,
59+
filePath: validatedData.filePath,
60+
userId,
61+
requestId,
62+
logger,
63+
})
64+
65+
if (resolution.error) {
66+
return NextResponse.json(
67+
{ success: false, error: resolution.error.message },
68+
{ status: resolution.error.status }
69+
)
70+
}
71+
72+
const fileUrl = resolution.fileUrl
73+
if (!fileUrl) {
74+
return NextResponse.json({ success: false, error: 'File input is required' }, { status: 400 })
75+
}
76+
77+
const extendBody: Record<string, unknown> = {
78+
file: { fileUrl },
79+
}
80+
81+
const config: Record<string, unknown> = {}
82+
83+
if (validatedData.outputFormat) {
84+
config.target = validatedData.outputFormat
85+
}
86+
87+
if (validatedData.chunking) {
88+
config.chunkingStrategy = { type: validatedData.chunking }
89+
}
90+
91+
if (validatedData.engine) {
92+
config.engine = validatedData.engine
93+
}
94+
95+
if (Object.keys(config).length > 0) {
96+
extendBody.config = config
97+
}
98+
99+
const extendEndpoint = 'https://api.extend.ai/parse'
100+
const extendValidation = await validateUrlWithDNS(extendEndpoint, 'Extend API URL')
101+
if (!extendValidation.isValid) {
102+
logger.error(`[${requestId}] Extend API URL validation failed`, {
103+
error: extendValidation.error,
104+
})
105+
return NextResponse.json(
106+
{
107+
success: false,
108+
error: 'Failed to reach Extend API',
109+
},
110+
{ status: 502 }
111+
)
112+
}
113+
114+
const extendResponse = await secureFetchWithPinnedIP(
115+
extendEndpoint,
116+
extendValidation.resolvedIP!,
117+
{
118+
method: 'POST',
119+
headers: {
120+
'Content-Type': 'application/json',
121+
Accept: 'application/json',
122+
Authorization: `Bearer ${validatedData.apiKey}`,
123+
'x-extend-api-version': '2025-04-21',
124+
},
125+
body: JSON.stringify(extendBody),
126+
}
127+
)
128+
129+
if (!extendResponse.ok) {
130+
const errorText = await extendResponse.text()
131+
logger.error(`[${requestId}] Extend API error:`, errorText)
132+
let clientError = `Extend API error: ${extendResponse.statusText || extendResponse.status}`
133+
try {
134+
const parsedError = JSON.parse(errorText)
135+
if (parsedError?.message || parsedError?.error) {
136+
clientError = (parsedError.message ?? parsedError.error) as string
137+
}
138+
} catch {
139+
// errorText is not JSON; keep generic message
140+
}
141+
return NextResponse.json(
142+
{
143+
success: false,
144+
error: clientError,
145+
},
146+
{ status: extendResponse.status }
147+
)
148+
}
149+
150+
const extendData = (await extendResponse.json()) as Record<string, unknown>
151+
152+
logger.info(`[${requestId}] Extend parse successful`)
153+
154+
return NextResponse.json({
155+
success: true,
156+
output: {
157+
id: extendData.id ?? null,
158+
status: extendData.status ?? 'PROCESSED',
159+
chunks: extendData.chunks ?? [],
160+
blocks: extendData.blocks ?? [],
161+
pageCount: extendData.pageCount ?? extendData.page_count ?? null,
162+
creditsUsed: extendData.creditsUsed ?? extendData.credits_used ?? null,
163+
},
164+
})
165+
} catch (error) {
166+
if (error instanceof z.ZodError) {
167+
logger.warn(`[${requestId}] Invalid request data`, { errors: error.errors })
168+
return NextResponse.json(
169+
{
170+
success: false,
171+
error: 'Invalid request data',
172+
details: error.errors,
173+
},
174+
{ status: 400 }
175+
)
176+
}
177+
178+
logger.error(`[${requestId}] Error in Extend parse:`, error)
179+
180+
return NextResponse.json(
181+
{
182+
success: false,
183+
error: error instanceof Error ? error.message : 'Internal server error',
184+
},
185+
{ status: 500 }
186+
)
187+
}
188+
}

0 commit comments

Comments
 (0)