Skip to content

Commit 7be011e

Browse files
committed
feat(extend): add Extend AI document processing integration
1 parent 7d0fdef commit 7be011e

File tree

14 files changed

+831
-1
lines changed

14 files changed

+831
-1
lines changed

apps/docs/components/icons.tsx

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2041,6 +2041,15 @@ export function Mem0Icon(props: SVGProps<SVGSVGElement>) {
20412041
)
20422042
}
20432043

2044+
export function ExtendIcon(props: SVGProps<SVGSVGElement>) {
2045+
return (
2046+
<svg {...props} xmlns='http://www.w3.org/2000/svg' viewBox='0 0 24 24' fill='none'>
2047+
<rect width='24' height='24' rx='4' fill='#1A1A2E' />
2048+
<path d='M7 8h10M7 12h10M7 16h6' stroke='#FFFFFF' strokeWidth='2' strokeLinecap='round' />
2049+
</svg>
2050+
)
2051+
}
2052+
20442053
export function EvernoteIcon(props: SVGProps<SVGSVGElement>) {
20452054
return (
20462055
<svg {...props} xmlns='http://www.w3.org/2000/svg' viewBox='0 0 32 32' fill='#7fce2c'>

apps/docs/components/ui/icon-mapping.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ import {
4545
EnrichSoIcon,
4646
EvernoteIcon,
4747
ExaAIIcon,
48+
ExtendIcon,
4849
EyeIcon,
4950
FathomIcon,
5051
FirecrawlIcon,
@@ -221,6 +222,7 @@ export const blockTypeToIconMap: Record<string, IconComponent> = {
221222
enrich: EnrichSoIcon,
222223
evernote: EvernoteIcon,
223224
exa: ExaAIIcon,
225+
extend_v2: ExtendIcon,
224226
fathom: FathomIcon,
225227
file_v3: DocumentIcon,
226228
firecrawl: FirecrawlIcon,
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
---
2+
title: Extend
3+
description: Parse and extract content from documents using Extend AI
4+
---
5+
6+
import { BlockInfoCard } from "@/components/ui/block-info-card"
7+
8+
<BlockInfoCard
9+
type="extend_v2"
10+
color="#000000"
11+
/>
12+
13+
## Description
14+
15+
The Extend block connects to [Extend AI](https://www.extend.ai/) to parse and extract structured content from documents. It supports a wide range of file formats including PDFs, images (JPEG, PNG, TIFF, GIF, BMP, WebP), and Office documents (Word, PowerPoint, Excel).
16+
17+
Extend uses advanced document understanding to convert unstructured documents into clean, structured output — returning parsed chunks and block-level elements with content type classification and spatial metadata.
18+
19+
### Key Capabilities
20+
21+
- **Document Parsing**: Extract text, tables, figures, and structured content from uploaded documents or URLs.
22+
- **Multiple Output Formats**: Choose between Markdown (default) for clean text output, or Spatial for layout-preserving extraction.
23+
- **Chunking Strategies**: Split output by page, document, or section depending on your downstream use case.
24+
- **Engine Selection**: Use the default `Performance` engine for best quality, or `Light` for faster processing on simpler documents.
25+
26+
### Authentication
27+
28+
An Extend API key is required. You can generate one from the [Extend Developer Dashboard](https://dashboard.extend.ai).
29+
30+
### Supported File Types
31+
32+
PDF, JPEG, PNG, TIFF, GIF, BMP, WebP, HEIC/HEIF, Word (.docx), PowerPoint (.pptx), Excel (.xlsx), XML, HTML, CSV, TXT.
33+
34+
## Tools
35+
36+
### `extend_parser`
37+
38+
#### Input
39+
40+
| Parameter | Type | Required | Description |
41+
| --------- | ---- | -------- | ----------- |
42+
| `filePath` | string | No | URL to a document to be processed |
43+
| `file` | file | No | Document file to be processed |
44+
| `fileUpload` | object | No | File upload data from file-upload component |
45+
| `outputFormat` | string | No | Target output format \(markdown or spatial\). Defaults to markdown. |
46+
| `chunking` | string | No | Chunking strategy \(page, document, or section\). Defaults to page. |
47+
| `engine` | string | No | Parsing engine \(parse_performance or parse_light\). Defaults to parse_performance. |
48+
| `apiKey` | string | Yes | Extend API key |
49+
50+
#### Output
51+
52+
| Field | Type | Description |
53+
| ----- | ---- | ----------- |
54+
| `id` | string | Unique identifier for the parser run |
55+
| `status` | string | Processing status |
56+
| `chunks` | json | Parsed document content chunks |
57+
| `blocks` | json | Block-level document elements with type and content |
58+
| `pageCount` | number | Number of pages processed |
59+
| `creditsUsed` | number | API credits consumed |
60+
61+

apps/docs/content/docs/en/tools/meta.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
"enrich",
4040
"evernote",
4141
"exa",
42+
"extend",
4243
"fathom",
4344
"file",
4445
"firecrawl",

apps/sim/app/(landing)/integrations/data/icon-mapping.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ import {
4545
EnrichSoIcon,
4646
EvernoteIcon,
4747
ExaAIIcon,
48+
ExtendIcon,
4849
EyeIcon,
4950
FathomIcon,
5051
FirecrawlIcon,
@@ -221,6 +222,7 @@ export const blockTypeToIconMap: Record<string, IconComponent> = {
221222
enrich: EnrichSoIcon,
222223
evernote: EvernoteIcon,
223224
exa: ExaAIIcon,
225+
extend_v2: ExtendIcon,
224226
fathom: FathomIcon,
225227
file_v3: DocumentIcon,
226228
firecrawl: FirecrawlIcon,

apps/sim/app/(landing)/integrations/data/integrations.json

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2939,6 +2939,24 @@
29392939
"integrationType": "search",
29402940
"tags": ["web-scraping", "enrichment"]
29412941
},
2942+
{
2943+
"type": "extend_v2",
2944+
"slug": "extend",
2945+
"name": "Extend",
2946+
"description": "Parse and extract content from documents",
2947+
"longDescription": "Integrate Extend AI into the workflow. Parse and extract structured content from documents or file references.",
2948+
"bgColor": "#1A1A2E",
2949+
"iconName": "ExtendIcon",
2950+
"docsUrl": "https://docs.sim.ai/tools/extend",
2951+
"operations": [],
2952+
"operationCount": 0,
2953+
"triggers": [],
2954+
"triggerCount": 0,
2955+
"authType": "api-key",
2956+
"category": "tools",
2957+
"integrationType": "ai",
2958+
"tags": ["document-processing", "ocr"]
2959+
},
29422960
{
29432961
"type": "fathom",
29442962
"slug": "fathom",
Lines changed: 179 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,179 @@
1+
import { createLogger } from '@sim/logger'
2+
import { type NextRequest, NextResponse } from 'next/server'
3+
import { z } from 'zod'
4+
import { checkInternalAuth } from '@/lib/auth/hybrid'
5+
import {
6+
secureFetchWithPinnedIP,
7+
validateUrlWithDNS,
8+
} from '@/lib/core/security/input-validation.server'
9+
import { generateRequestId } from '@/lib/core/utils/request'
10+
import { RawFileInputSchema } from '@/lib/uploads/utils/file-schemas'
11+
import { isInternalFileUrl } from '@/lib/uploads/utils/file-utils'
12+
import { resolveFileInputToUrl } from '@/lib/uploads/utils/file-utils.server'
13+
14+
export const dynamic = 'force-dynamic'
15+
16+
const logger = createLogger('ExtendParseAPI')
17+
18+
const ExtendParseSchema = z.object({
19+
apiKey: z.string().min(1, 'API key is required'),
20+
filePath: z.string().optional(),
21+
file: RawFileInputSchema.optional(),
22+
outputFormat: z.enum(['markdown', 'spatial']).optional(),
23+
chunking: z.enum(['page', 'document', 'section']).optional(),
24+
engine: z.enum(['parse_performance', 'parse_light']).optional(),
25+
})
26+
27+
export async function POST(request: NextRequest) {
28+
const requestId = generateRequestId()
29+
30+
try {
31+
const authResult = await checkInternalAuth(request, { requireWorkflowId: false })
32+
33+
if (!authResult.success || !authResult.userId) {
34+
logger.warn(`[${requestId}] Unauthorized Extend parse attempt`, {
35+
error: authResult.error || 'Missing userId',
36+
})
37+
return NextResponse.json(
38+
{
39+
success: false,
40+
error: authResult.error || 'Unauthorized',
41+
},
42+
{ status: 401 }
43+
)
44+
}
45+
46+
const userId = authResult.userId
47+
const body = await request.json()
48+
const validatedData = ExtendParseSchema.parse(body)
49+
50+
logger.info(`[${requestId}] Extend parse request`, {
51+
fileName: validatedData.file?.name,
52+
filePath: validatedData.filePath,
53+
isWorkspaceFile: validatedData.filePath ? isInternalFileUrl(validatedData.filePath) : false,
54+
userId,
55+
})
56+
57+
const resolution = await resolveFileInputToUrl({
58+
file: validatedData.file,
59+
filePath: validatedData.filePath,
60+
userId,
61+
requestId,
62+
logger,
63+
})
64+
65+
if (resolution.error) {
66+
return NextResponse.json(
67+
{ success: false, error: resolution.error.message },
68+
{ status: resolution.error.status }
69+
)
70+
}
71+
72+
const fileUrl = resolution.fileUrl
73+
if (!fileUrl) {
74+
return NextResponse.json({ success: false, error: 'File input is required' }, { status: 400 })
75+
}
76+
77+
const extendBody: Record<string, unknown> = {
78+
fileUrl,
79+
}
80+
81+
const config: Record<string, unknown> = {}
82+
83+
if (validatedData.outputFormat) {
84+
config.targetFormat = validatedData.outputFormat
85+
}
86+
87+
if (validatedData.chunking) {
88+
config.chunking = { strategy: validatedData.chunking }
89+
}
90+
91+
if (validatedData.engine) {
92+
config.engine = validatedData.engine
93+
}
94+
95+
if (Object.keys(config).length > 0) {
96+
extendBody.config = config
97+
}
98+
99+
const extendEndpoint = 'https://api.extend.ai/parse'
100+
const extendValidation = await validateUrlWithDNS(extendEndpoint, 'Extend API URL')
101+
if (!extendValidation.isValid) {
102+
logger.error(`[${requestId}] Extend API URL validation failed`, {
103+
error: extendValidation.error,
104+
})
105+
return NextResponse.json(
106+
{
107+
success: false,
108+
error: 'Failed to reach Extend API',
109+
},
110+
{ status: 502 }
111+
)
112+
}
113+
114+
const extendResponse = await secureFetchWithPinnedIP(
115+
extendEndpoint,
116+
extendValidation.resolvedIP!,
117+
{
118+
method: 'POST',
119+
headers: {
120+
'Content-Type': 'application/json',
121+
Accept: 'application/json',
122+
Authorization: `Bearer ${validatedData.apiKey}`,
123+
'x-extend-api-version': '2025-04-21',
124+
},
125+
body: JSON.stringify(extendBody),
126+
}
127+
)
128+
129+
if (!extendResponse.ok) {
130+
const errorText = await extendResponse.text()
131+
logger.error(`[${requestId}] Extend API error:`, errorText)
132+
return NextResponse.json(
133+
{
134+
success: false,
135+
error: `Extend API error: ${extendResponse.statusText}`,
136+
},
137+
{ status: extendResponse.status }
138+
)
139+
}
140+
141+
const extendData = await extendResponse.json()
142+
143+
logger.info(`[${requestId}] Extend parse successful`)
144+
145+
return NextResponse.json({
146+
success: true,
147+
output: {
148+
id: extendData.id ?? null,
149+
status: extendData.status ?? 'PROCESSED',
150+
chunks: extendData.chunks ?? [],
151+
blocks: extendData.blocks ?? [],
152+
pageCount: extendData.pageCount ?? extendData.page_count ?? null,
153+
creditsUsed: extendData.creditsUsed ?? extendData.credits_used ?? null,
154+
},
155+
})
156+
} catch (error) {
157+
if (error instanceof z.ZodError) {
158+
logger.warn(`[${requestId}] Invalid request data`, { errors: error.errors })
159+
return NextResponse.json(
160+
{
161+
success: false,
162+
error: 'Invalid request data',
163+
details: error.errors,
164+
},
165+
{ status: 400 }
166+
)
167+
}
168+
169+
logger.error(`[${requestId}] Error in Extend parse:`, error)
170+
171+
return NextResponse.json(
172+
{
173+
success: false,
174+
error: error instanceof Error ? error.message : 'Internal server error',
175+
},
176+
{ status: 500 }
177+
)
178+
}
179+
}

0 commit comments

Comments
 (0)