Skip to content

Commit f1d8b81

Browse files
committed
Add PyBuffer implementation files
- c_src/py_buffer.h: Header with resource struct and function declarations - src/py_buffer.erl: Erlang API module (new/0,1, write/2, close/1) - c_src/py_convert.c: Auto-conversion of buffer refs to PyBuffer objects - c_src/py_nif.c: NIF registration and resource type initialization - src/py_nif.erl: NIF function exports
1 parent c744ef4 commit f1d8b81

5 files changed

Lines changed: 376 additions & 2 deletions

File tree

c_src/py_buffer.h

Lines changed: 188 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,188 @@
1+
/*
2+
* Copyright 2026 Benoit Chesneau
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
/**
18+
* @file py_buffer.h
19+
* @brief Zero-copy WSGI input buffer support
20+
* @author Benoit Chesneau
21+
*
22+
* This module provides a PyBuffer Python type that wraps a NIF-allocated
23+
* buffer resource and exposes it via the buffer protocol. Erlang can write
24+
* HTTP request body chunks to the buffer while Python reads them with
25+
* file-like methods (read, readline, readlines) or direct buffer access.
26+
*
27+
* The buffer supports blocking reads that release the GIL while waiting
28+
* for data from Erlang.
29+
*
30+
* Key features:
31+
* - Buffer protocol (memoryview(buf) works for zero-copy access)
32+
* - File-like interface (read, readline, readlines, seek, tell)
33+
* - Blocking reads with GIL released (uses pthread_cond)
34+
* - Iterator protocol for line-by-line reading
35+
*/
36+
37+
#ifndef PY_BUFFER_H
38+
#define PY_BUFFER_H
39+
40+
#include <Python.h>
41+
#include <erl_nif.h>
42+
#include <stdbool.h>
43+
#include <pthread.h>
44+
45+
/* ============================================================================
46+
* Configuration
47+
* ============================================================================ */
48+
49+
/**
50+
* @def PY_BUFFER_DEFAULT_CAPACITY
51+
* @brief Default buffer capacity when content_length is unknown (chunked)
52+
*/
53+
#define PY_BUFFER_DEFAULT_CAPACITY 65536
54+
55+
/**
56+
* @def PY_BUFFER_GROW_FACTOR
57+
* @brief Growth factor when buffer needs to expand
58+
*/
59+
#define PY_BUFFER_GROW_FACTOR 2
60+
61+
/* ============================================================================
62+
* Buffer Resource Type
63+
* ============================================================================ */
64+
65+
/**
66+
* @brief Resource type for zero-copy input buffers
67+
*/
68+
extern ErlNifResourceType *PY_BUFFER_RESOURCE_TYPE;
69+
70+
/**
71+
* @struct py_buffer_resource_t
72+
* @brief NIF resource that holds streaming input buffer data
73+
*
74+
* The buffer is written by Erlang (producer) and read by Python (consumer).
75+
* Uses pthread mutex/cond for thread-safe blocking reads.
76+
*/
77+
typedef struct {
78+
unsigned char *data; /**< Buffer data */
79+
size_t capacity; /**< Allocated capacity */
80+
size_t write_pos; /**< Current write position (producer) */
81+
size_t read_pos; /**< Current read position (consumer) */
82+
ssize_t content_length; /**< Expected total size, -1 for chunked */
83+
pthread_mutex_t mutex; /**< Mutex for thread-safe access */
84+
pthread_cond_t data_ready; /**< Condition for blocking reads */
85+
bool eof; /**< End of data flag (close called) */
86+
bool closed; /**< Buffer closed flag */
87+
int view_count; /**< Active Python buffer view count */
88+
} py_buffer_resource_t;
89+
90+
/* ============================================================================
91+
* Python Type
92+
* ============================================================================ */
93+
94+
/**
95+
* @brief The PyBuffer Python type object
96+
*/
97+
extern PyTypeObject PyBufferType;
98+
99+
/**
100+
* @struct PyBufferObject
101+
* @brief Python object wrapping a py_buffer resource
102+
*
103+
* Provides file-like interface and buffer protocol for zero-copy access.
104+
*/
105+
typedef struct {
106+
PyObject_HEAD
107+
py_buffer_resource_t *resource; /**< NIF resource (we hold a reference) */
108+
void *resource_ref; /**< For releasing the resource */
109+
} PyBufferObject;
110+
111+
/* ============================================================================
112+
* Function Declarations - NIF Resource Management
113+
* ============================================================================ */
114+
115+
/**
116+
* @brief Allocate a new buffer resource
117+
*
118+
* @param content_length Expected size, or -1 for chunked encoding
119+
* @return New resource, or NULL on error
120+
*/
121+
py_buffer_resource_t *py_buffer_alloc(ssize_t content_length);
122+
123+
/**
124+
* @brief Resource destructor
125+
*/
126+
void py_buffer_resource_dtor(ErlNifEnv *env, void *obj);
127+
128+
/**
129+
* @brief Write data to the buffer (Erlang producer side)
130+
*
131+
* Appends data to the buffer, expanding if necessary.
132+
* Signals waiting readers when data is available.
133+
*
134+
* @param buf Buffer resource
135+
* @param data Data to write
136+
* @param size Size of data
137+
* @return 0 on success, -1 on error (buffer closed or alloc failure)
138+
*/
139+
int py_buffer_write(py_buffer_resource_t *buf, const unsigned char *data, size_t size);
140+
141+
/**
142+
* @brief Close the buffer (Erlang producer side)
143+
*
144+
* Sets EOF flag and wakes up any waiting readers.
145+
*
146+
* @param buf Buffer resource
147+
*/
148+
void py_buffer_close(py_buffer_resource_t *buf);
149+
150+
/* ============================================================================
151+
* Function Declarations - Python Type
152+
* ============================================================================ */
153+
154+
/**
155+
* @brief Initialize the PyBuffer type
156+
*
157+
* Must be called during Python initialization with the GIL held.
158+
*
159+
* @return 0 on success, -1 on error
160+
*/
161+
int PyBuffer_init_type(void);
162+
163+
/**
164+
* @brief Register PyBuffer with erlang module
165+
*
166+
* Makes PyBuffer accessible from Python.
167+
*
168+
* @return 0 on success, -1 on error
169+
*
170+
* @pre GIL must be held
171+
* @pre PyBuffer_init_type() must have been called
172+
* @pre erlang module must exist
173+
*/
174+
int PyBuffer_register_with_module(void);
175+
176+
/**
177+
* @brief Create a PyBuffer from a NIF resource
178+
*
179+
* @param resource The buffer resource
180+
* @param resource_ref Resource reference (for enif_release_resource)
181+
* @return New PyBuffer object, or NULL on error
182+
*
183+
* @pre GIL must be held
184+
*/
185+
PyObject *PyBuffer_from_resource(py_buffer_resource_t *resource,
186+
void *resource_ref);
187+
188+
#endif /* PY_BUFFER_H */

c_src/py_convert.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -595,6 +595,14 @@ static PyObject *term_to_py(ErlNifEnv *env, ERL_NIF_TERM term) {
595595
return capsule;
596596
}
597597

598+
/* Check for py_buffer resource - wrap in PyBufferObject for WSGI input */
599+
py_buffer_resource_t *pybuf;
600+
if (enif_get_resource(env, term, PY_BUFFER_RESOURCE_TYPE, (void **)&pybuf)) {
601+
/* Wrap the buffer resource in a PyBufferObject.
602+
* PyBuffer_from_resource increments the resource refcount. */
603+
return PyBuffer_from_resource(pybuf, pybuf);
604+
}
605+
598606
/* Fallback: return None for unknown types */
599607
Py_RETURN_NONE;
600608
}

c_src/py_nif.c

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
#include "py_wsgi.h"
4242
#include "py_event_loop.h"
4343
#include "py_channel.h"
44+
#include "py_buffer.h"
4445

4546
/* ============================================================================
4647
* Global state definitions
@@ -301,6 +302,7 @@ static int is_inline_schedule_marker(PyObject *obj);
301302
#include "py_subinterp_thread.c"
302303
#include "py_reactor_buffer.c"
303304
#include "py_channel.c"
305+
#include "py_buffer.c"
304306

305307
/* ============================================================================
306308
* Resource callbacks
@@ -1170,6 +1172,20 @@ static ERL_NIF_TERM nif_py_init(ErlNifEnv *env, int argc, const ERL_NIF_TERM arg
11701172
return make_error(env, "reactor_buffer_register_failed");
11711173
}
11721174

1175+
/* Initialize PyBuffer Python type for zero-copy WSGI input */
1176+
if (PyBuffer_init_type() < 0) {
1177+
Py_Finalize();
1178+
atomic_store(&g_runtime_state, PY_STATE_STOPPED);
1179+
return make_error(env, "py_buffer_init_failed");
1180+
}
1181+
1182+
/* Register PyBuffer type with erlang module */
1183+
if (PyBuffer_register_with_module() < 0) {
1184+
Py_Finalize();
1185+
atomic_store(&g_runtime_state, PY_STATE_STOPPED);
1186+
return make_error(env, "py_buffer_register_failed");
1187+
}
1188+
11731189
/* Create a default event loop so Python asyncio always has one available */
11741190
if (create_default_event_loop(env) < 0) {
11751191
Py_Finalize();
@@ -4797,6 +4813,16 @@ static int load(ErlNifEnv *env, void **priv_data, ERL_NIF_TERM load_info) {
47974813
return -1;
47984814
}
47994815

4816+
/* PyBuffer resource type for zero-copy WSGI input */
4817+
PY_BUFFER_RESOURCE_TYPE = enif_open_resource_type(
4818+
env, NULL, "py_buffer",
4819+
py_buffer_resource_dtor,
4820+
ERL_NIF_RT_CREATE | ERL_NIF_RT_TAKEOVER, NULL);
4821+
4822+
if (PY_BUFFER_RESOURCE_TYPE == NULL) {
4823+
return -1;
4824+
}
4825+
48004826
/* Initialize channel module atoms */
48014827
if (channel_init(env) < 0) {
48024828
return -1;
@@ -5059,7 +5085,12 @@ static ErlNifFunc nif_funcs[] = {
50595085
{"channel_info", 1, nif_channel_info, 0},
50605086
{"channel_wait", 3, nif_channel_wait, 0},
50615087
{"channel_cancel_wait", 2, nif_channel_cancel_wait, 0},
5062-
{"channel_register_sync_waiter", 1, nif_channel_register_sync_waiter, 0}
5088+
{"channel_register_sync_waiter", 1, nif_channel_register_sync_waiter, 0},
5089+
5090+
/* PyBuffer API - zero-copy WSGI input */
5091+
{"py_buffer_create", 1, nif_py_buffer_create, 0},
5092+
{"py_buffer_write", 2, nif_py_buffer_write, 0},
5093+
{"py_buffer_close", 1, nif_py_buffer_close, 0}
50635094
};
50645095

50655096
ERL_NIF_INIT(py_nif, nif_funcs, load, NULL, upgrade, unload)

src/py_buffer.erl

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
%% Copyright 2026 Benoit Chesneau
2+
%%
3+
%% Licensed under the Apache License, Version 2.0 (the "License");
4+
%% you may not use this file except in compliance with the License.
5+
%% You may obtain a copy of the License at
6+
%%
7+
%% http://www.apache.org/licenses/LICENSE-2.0
8+
%%
9+
%% Unless required by applicable law or agreed to in writing, software
10+
%% distributed under the License is distributed on an "AS IS" BASIS,
11+
%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
%% See the License for the specific language governing permissions and
13+
%% limitations under the License.
14+
15+
%%% @doc Zero-copy WSGI input buffer for streaming HTTP bodies.
16+
%%%
17+
%%% This module provides a buffer that can be written by Erlang and read
18+
%%% by Python with zero-copy semantics. The buffer is suitable for use
19+
%%% as wsgi.input in WSGI applications.
20+
%%%
21+
%%% == Usage ==
22+
%%%
23+
%%% ```
24+
%%% %% Create a buffer (chunked encoding - unknown size)
25+
%%% {ok, Buf} = py_buffer:new(),
26+
%%%
27+
%%% %% Or with known content length
28+
%%% {ok, Buf} = py_buffer:new(1024),
29+
%%%
30+
%%% %% Write HTTP body chunks
31+
%%% ok = py_buffer:write(Buf, <<"chunk1">>),
32+
%%% ok = py_buffer:write(Buf, <<"chunk2">>),
33+
%%%
34+
%%% %% Signal end of data
35+
%%% ok = py_buffer:close(Buf),
36+
%%%
37+
%%% %% Pass to Python WSGI - buffer is automatically converted
38+
%%% py_context:call(Ctx, <<"myapp">>, <<"handle">>,
39+
%%% [#{<<"wsgi.input">> => Buf}], #{}).
40+
%%% '''
41+
%%%
42+
%%% On the Python side, the buffer provides a file-like interface:
43+
%%%
44+
%%% ```python
45+
%%% def handle(environ):
46+
%%% body = environ['wsgi.input'].read() # Blocks until data ready
47+
%%% # Or use readline(), readlines(), iteration
48+
%%% for line in environ['wsgi.input']:
49+
%%% process(line)
50+
%%% '''
51+
%%%
52+
%%% @end
53+
-module(py_buffer).
54+
55+
-export([
56+
new/0,
57+
new/1,
58+
write/2,
59+
close/1
60+
]).
61+
62+
%% @doc Create a new buffer for chunked/streaming data.
63+
%%
64+
%% Use this when the content length is unknown (chunked transfer encoding).
65+
%% The buffer will grow as needed.
66+
%%
67+
%% @returns {ok, BufferRef} | {error, Reason}
68+
-spec new() -> {ok, reference()} | {error, term()}.
69+
new() ->
70+
py_nif:py_buffer_create(undefined).
71+
72+
%% @doc Create a new buffer with known content length.
73+
%%
74+
%% Pre-allocates the buffer to the specified size for better performance.
75+
%%
76+
%% @param ContentLength Expected total size in bytes, or `undefined' for chunked
77+
%% @returns {ok, BufferRef} | {error, Reason}
78+
-spec new(non_neg_integer() | undefined) -> {ok, reference()} | {error, term()}.
79+
new(undefined) ->
80+
py_nif:py_buffer_create(undefined);
81+
new(ContentLength) when is_integer(ContentLength), ContentLength >= 0 ->
82+
py_nif:py_buffer_create(ContentLength).
83+
84+
%% @doc Write data to the buffer.
85+
%%
86+
%% Appends data to the buffer and signals any waiting Python readers.
87+
%% This function is safe to call from multiple processes, but typically
88+
%% only one process should write to a buffer.
89+
%%
90+
%% @param Ref Buffer reference from new/0 or new/1
91+
%% @param Data Binary data to append
92+
%% @returns ok | {error, Reason}
93+
-spec write(reference(), binary()) -> ok | {error, term()}.
94+
write(Ref, Data) when is_binary(Data) ->
95+
py_nif:py_buffer_write(Ref, Data).
96+
97+
%% @doc Close the buffer (signal end of data).
98+
%%
99+
%% Sets the EOF flag and wakes up any Python threads waiting for data.
100+
%% After calling close, no more data can be written, and Python's read()
101+
%% will return any remaining buffered data followed by empty bytes.
102+
%%
103+
%% @param Ref Buffer reference
104+
%% @returns ok
105+
-spec close(reference()) -> ok.
106+
close(Ref) ->
107+
py_nif:py_buffer_close(Ref).

0 commit comments

Comments
 (0)