Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions apps/docs/components/icons.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -2041,6 +2041,15 @@ export function Mem0Icon(props: SVGProps<SVGSVGElement>) {
)
}

export function ExtendIcon(props: SVGProps<SVGSVGElement>) {
return (
<svg {...props} xmlns='http://www.w3.org/2000/svg' viewBox='0 0 24 24' fill='none'>
<rect width='24' height='24' rx='4' fill='#1A1A2E' />
<path d='M7 8h10M7 12h10M7 16h6' stroke='#FFFFFF' strokeWidth='2' strokeLinecap='round' />
</svg>
)
Comment thread
waleedlatif1 marked this conversation as resolved.
}

export function EvernoteIcon(props: SVGProps<SVGSVGElement>) {
return (
<svg {...props} xmlns='http://www.w3.org/2000/svg' viewBox='0 0 32 32' fill='#7fce2c'>
Expand Down
2 changes: 2 additions & 0 deletions apps/docs/components/ui/icon-mapping.ts
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ import {
EnrichSoIcon,
EvernoteIcon,
ExaAIIcon,
ExtendIcon,
EyeIcon,
FathomIcon,
FirecrawlIcon,
Expand Down Expand Up @@ -222,6 +223,7 @@ export const blockTypeToIconMap: Record<string, IconComponent> = {
enrich: EnrichSoIcon,
evernote: EvernoteIcon,
exa: ExaAIIcon,
extend_v2: ExtendIcon,
fathom: FathomIcon,
file_v3: DocumentIcon,
firecrawl: FirecrawlIcon,
Expand Down
61 changes: 61 additions & 0 deletions apps/docs/content/docs/en/tools/extend.mdx
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
---
title: Extend
description: Parse and extract content from documents using Extend AI
---

import { BlockInfoCard } from "@/components/ui/block-info-card"

<BlockInfoCard
type="extend_v2"
color="#000000"
/>

## Description

The Extend block connects to [Extend AI](https://www.extend.ai/) to parse and extract structured content from documents. It supports a wide range of file formats including PDFs, images (JPEG, PNG, TIFF, GIF, BMP, WebP), and Office documents (Word, PowerPoint, Excel).

Extend uses advanced document understanding to convert unstructured documents into clean, structured output — returning parsed chunks and block-level elements with content type classification and spatial metadata.

### Key Capabilities

- **Document Parsing**: Extract text, tables, figures, and structured content from uploaded documents or URLs.
- **Multiple Output Formats**: Choose between Markdown (default) for clean text output, or Spatial for layout-preserving extraction.
- **Chunking Strategies**: Split output by page, document, or section depending on your downstream use case.
- **Engine Selection**: Use the default `Performance` engine for best quality, or `Light` for faster processing on simpler documents.

### Authentication

An Extend API key is required. You can generate one from the [Extend Developer Dashboard](https://dashboard.extend.ai).

### Supported File Types

PDF, JPEG, PNG, TIFF, GIF, BMP, WebP, HEIC/HEIF, Word (.docx), PowerPoint (.pptx), Excel (.xlsx), XML, HTML, CSV, TXT.

## Tools

### `extend_parser`

#### Input

| Parameter | Type | Required | Description |
| --------- | ---- | -------- | ----------- |
| `filePath` | string | No | URL to a document to be processed |
| `file` | file | No | Document file to be processed |
| `fileUpload` | object | No | File upload data from file-upload component |
| `outputFormat` | string | No | Target output format \(markdown or spatial\). Defaults to markdown. |
| `chunking` | string | No | Chunking strategy \(page, document, or section\). Defaults to page. |
| `engine` | string | No | Parsing engine \(parse_performance or parse_light\). Defaults to parse_performance. |
| `apiKey` | string | Yes | Extend API key |

#### Output

| Field | Type | Description |
| ----- | ---- | ----------- |
| `id` | string | Unique identifier for the parser run |
| `status` | string | Processing status |
| `chunks` | json | Parsed document content chunks |
| `blocks` | json | Block-level document elements with type and content |
| `pageCount` | number | Number of pages processed |
| `creditsUsed` | number | API credits consumed |


1 change: 1 addition & 0 deletions apps/docs/content/docs/en/tools/meta.json
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
"enrich",
"evernote",
"exa",
"extend",
"fathom",
"file",
"firecrawl",
Expand Down
2 changes: 2 additions & 0 deletions apps/sim/app/(landing)/integrations/data/icon-mapping.ts
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ import {
EnrichSoIcon,
EvernoteIcon,
ExaAIIcon,
ExtendIcon,
EyeIcon,
FathomIcon,
FirecrawlIcon,
Expand Down Expand Up @@ -222,6 +223,7 @@ export const blockTypeToIconMap: Record<string, IconComponent> = {
enrich: EnrichSoIcon,
evernote: EvernoteIcon,
exa: ExaAIIcon,
extend_v2: ExtendIcon,
fathom: FathomIcon,
file_v3: DocumentIcon,
firecrawl: FirecrawlIcon,
Expand Down
18 changes: 18 additions & 0 deletions apps/sim/app/(landing)/integrations/data/integrations.json
Original file line number Diff line number Diff line change
Expand Up @@ -2978,6 +2978,24 @@
"integrationType": "search",
"tags": ["web-scraping", "enrichment"]
},
{
"type": "extend_v2",
"slug": "extend",
"name": "Extend",
"description": "Parse and extract content from documents",
"longDescription": "Integrate Extend AI into the workflow. Parse and extract structured content from documents or file references.",
"bgColor": "#1A1A2E",
Comment thread
waleedlatif1 marked this conversation as resolved.
Outdated
"iconName": "ExtendIcon",
"docsUrl": "https://docs.sim.ai/tools/extend",
"operations": [],
"operationCount": 0,
"triggers": [],
"triggerCount": 0,
"authType": "api-key",
"category": "tools",
"integrationType": "ai",
"tags": ["document-processing", "ocr"]
},
{
"type": "fathom",
"slug": "fathom",
Expand Down
179 changes: 179 additions & 0 deletions apps/sim/app/api/tools/extend/parse/route.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
import { createLogger } from '@sim/logger'
import { type NextRequest, NextResponse } from 'next/server'
import { z } from 'zod'
import { checkInternalAuth } from '@/lib/auth/hybrid'
import {
secureFetchWithPinnedIP,
validateUrlWithDNS,
} from '@/lib/core/security/input-validation.server'
import { generateRequestId } from '@/lib/core/utils/request'
import { RawFileInputSchema } from '@/lib/uploads/utils/file-schemas'
import { isInternalFileUrl } from '@/lib/uploads/utils/file-utils'
import { resolveFileInputToUrl } from '@/lib/uploads/utils/file-utils.server'

export const dynamic = 'force-dynamic'

const logger = createLogger('ExtendParseAPI')

const ExtendParseSchema = z.object({
apiKey: z.string().min(1, 'API key is required'),
filePath: z.string().optional(),
file: RawFileInputSchema.optional(),
outputFormat: z.enum(['markdown', 'spatial']).optional(),
chunking: z.enum(['page', 'document', 'section']).optional(),
engine: z.enum(['parse_performance', 'parse_light']).optional(),
})

export async function POST(request: NextRequest) {
const requestId = generateRequestId()

try {
const authResult = await checkInternalAuth(request, { requireWorkflowId: false })

if (!authResult.success || !authResult.userId) {
logger.warn(`[${requestId}] Unauthorized Extend parse attempt`, {
error: authResult.error || 'Missing userId',
})
return NextResponse.json(
{
success: false,
error: authResult.error || 'Unauthorized',
},
{ status: 401 }
)
}

const userId = authResult.userId
const body = await request.json()
const validatedData = ExtendParseSchema.parse(body)

logger.info(`[${requestId}] Extend parse request`, {
fileName: validatedData.file?.name,
filePath: validatedData.filePath,
isWorkspaceFile: validatedData.filePath ? isInternalFileUrl(validatedData.filePath) : false,
userId,
})

const resolution = await resolveFileInputToUrl({
file: validatedData.file,
filePath: validatedData.filePath,
userId,
requestId,
logger,
})

if (resolution.error) {
return NextResponse.json(
{ success: false, error: resolution.error.message },
{ status: resolution.error.status }
)
}

const fileUrl = resolution.fileUrl
if (!fileUrl) {
return NextResponse.json({ success: false, error: 'File input is required' }, { status: 400 })
}

const extendBody: Record<string, unknown> = {
fileUrl,
}
Comment thread
waleedlatif1 marked this conversation as resolved.

const config: Record<string, unknown> = {}

if (validatedData.outputFormat) {
config.targetFormat = validatedData.outputFormat
}

if (validatedData.chunking) {
config.chunking = { strategy: validatedData.chunking }
}

if (validatedData.engine) {
config.engine = validatedData.engine
}

if (Object.keys(config).length > 0) {
extendBody.config = config
}

const extendEndpoint = 'https://api.extend.ai/parse'
const extendValidation = await validateUrlWithDNS(extendEndpoint, 'Extend API URL')
if (!extendValidation.isValid) {
logger.error(`[${requestId}] Extend API URL validation failed`, {
error: extendValidation.error,
})
return NextResponse.json(
{
success: false,
error: 'Failed to reach Extend API',
},
{ status: 502 }
)
}

const extendResponse = await secureFetchWithPinnedIP(
extendEndpoint,
extendValidation.resolvedIP!,
{
method: 'POST',
headers: {
'Content-Type': 'application/json',
Accept: 'application/json',
Authorization: `Bearer ${validatedData.apiKey}`,
'x-extend-api-version': '2025-04-21',
},
body: JSON.stringify(extendBody),
}
)

if (!extendResponse.ok) {
const errorText = await extendResponse.text()
logger.error(`[${requestId}] Extend API error:`, errorText)
return NextResponse.json(
{
success: false,
error: `Extend API error: ${extendResponse.statusText}`,
},
{ status: extendResponse.status }
)
Comment thread
waleedlatif1 marked this conversation as resolved.
}

const extendData = (await extendResponse.json()) as Record<string, unknown>

logger.info(`[${requestId}] Extend parse successful`)

return NextResponse.json({
success: true,
output: {
id: extendData.id ?? null,
status: extendData.status ?? 'PROCESSED',
chunks: extendData.chunks ?? [],
blocks: extendData.blocks ?? [],
pageCount: extendData.pageCount ?? extendData.page_count ?? null,
creditsUsed: extendData.creditsUsed ?? extendData.credits_used ?? null,
},
})
} catch (error) {
if (error instanceof z.ZodError) {
logger.warn(`[${requestId}] Invalid request data`, { errors: error.errors })
return NextResponse.json(
{
success: false,
error: 'Invalid request data',
details: error.errors,
},
{ status: 400 }
)
}

logger.error(`[${requestId}] Error in Extend parse:`, error)

return NextResponse.json(
{
success: false,
error: error instanceof Error ? error.message : 'Internal server error',
},
{ status: 500 }
)
}
}
Loading
Loading