Skip to content

Commit f3a9ff5

Browse files
authored
Expose expressions for C API (#7190)
Expose a subset of expressions to test filtering in Scan API: root, column, get_item, select, or, not, is_null, and binary operators, including shortcuts like eq. Add vx_array_apply to test C-side expression application. This patch does not include C++ FFI tests because we can't create vx_arrays' yet, #7148. The next steps are exposing literal expressions and dynamic expressions. Signed-off-by: Mikhail Kot <mikhail@spiraldb.com>
1 parent c85f7a2 commit f3a9ff5

File tree

6 files changed

+838
-16
lines changed

6 files changed

+838
-16
lines changed

.github/workflows/ci.yml

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -386,10 +386,10 @@ jobs:
386386
fail-fast: false
387387
matrix:
388388
include:
389+
# We don't run memory sanitizer as it provides many false positives
390+
# for std
389391
- sanitizer: asan
390-
sanitizer_flags: "-Zsanitizer=address -Zsanitize=leak"
391-
- sanitizer: msan
392-
sanitizer_flags: "-Zsanitizer=memory"
392+
sanitizer_flags: "-Zsanitizer=address,leak"
393393
- sanitizer: tsan
394394
sanitizer_flags: "-Zsanitizer=thread"
395395
name: "Rust tests (${{ matrix.sanitizer }})"
@@ -421,15 +421,16 @@ jobs:
421421
run: |
422422
rustup toolchain install $NIGHTLY_TOOLCHAIN
423423
rustup component add --toolchain $NIGHTLY_TOOLCHAIN rust-src rustfmt clippy llvm-tools-preview
424-
export RUSTFLAGS="${RUSTFLAGS} ${{ matrix.sanitizer_flags }}"
425424
- name: Build tests with sanitizer
426425
run: |
426+
RUSTFLAGS="${RUSTFLAGS} ${{ matrix.sanitizer_flags }}" \
427427
cargo +$NIGHTLY_TOOLCHAIN build --locked --all-features \
428428
--target x86_64-unknown-linux-gnu -Zbuild-std \
429429
-p vortex-buffer -p vortex-fastlanes -p vortex-fsst -p vortex-alp -p vortex-array
430430
431431
- name: Run tests with sanitizer
432432
run: |
433+
RUSTFLAGS="${RUSTFLAGS} ${{ matrix.sanitizer_flags }}" \
433434
cargo +$NIGHTLY_TOOLCHAIN nextest run --locked --all-features \
434435
--target x86_64-unknown-linux-gnu --no-fail-fast -Zbuild-std \
435436
-p vortex-buffer -p vortex-fastlanes -p vortex-fsst -p vortex-alp -p vortex-array
@@ -440,6 +441,7 @@ jobs:
440441
# TODO(myrrc): remove --no-default-features once we make Mimalloc opt-in
441442
- name: Run vortex-ffi tests with sanitizer
442443
run: |
444+
RUSTFLAGS="${RUSTFLAGS} ${{ matrix.sanitizer_flags }}" \
443445
cargo +$NIGHTLY_TOOLCHAIN test --locked --no-default-features \
444446
--target x86_64-unknown-linux-gnu --no-fail-fast -Zbuild-std \
445447
-p vortex-ffi -- --no-capture
@@ -452,7 +454,7 @@ jobs:
452454
# We don't run memory sanitizer as it's clang-only and provides many
453455
# false positives for Catch2
454456
- sanitizer: asan
455-
sanitizer_flags: "-Zsanitizer=address -Zsanitize=leak"
457+
sanitizer_flags: "-Zsanitizer=address,leak"
456458
- sanitizer: tsan
457459
sanitizer_flags: "-Zsanitizer=thread"
458460
name: "Rust/C++ FFI tests (${{ matrix.sanitizer }})"
@@ -486,15 +488,13 @@ jobs:
486488
run: |
487489
rustup toolchain install $NIGHTLY_TOOLCHAIN
488490
rustup component add --toolchain $NIGHTLY_TOOLCHAIN rust-src rustfmt clippy llvm-tools-preview
489-
490-
# Export flags here so that rustfilt won't be built with sanitizers
491-
export RUSTFLAGS="-A warnings -Cunsafe-allow-abi-mismatch=sanitizer \
492-
--cfg disable_loom --cfg vortex_nightly -C debuginfo=2 \
493-
-C opt-level=0 -C strip=none -Zexternal-clangrt \
494-
${{ matrix.sanitizer_flags }}"
495491
- name: Build FFI library
496492
run: |
497493
# TODO(myrrc): remove --no-default-features
494+
RUSTFLAGS="-A warnings -Cunsafe-allow-abi-mismatch=sanitizer \
495+
--cfg disable_loom --cfg vortex_nightly -C debuginfo=2 \
496+
-C opt-level=0 -C strip=none -Zexternal-clangrt \
497+
${{ matrix.sanitizer_flags }}" \
498498
cargo +$NIGHTLY_TOOLCHAIN build --locked --no-default-features \
499499
--target x86_64-unknown-linux-gnu -Zbuild-std \
500500
-p vortex-ffi

vortex-ffi/cinclude/vortex.h

Lines changed: 192 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,66 @@ typedef enum {
114114
DTYPE_FIXED_SIZE_LIST = 9,
115115
} vx_dtype_variant;
116116

117+
/**
118+
* Equalities, inequalities, and boolean operations over possibly null values.
119+
* For most operations, if either side is null, the result is null.
120+
* VX_OPERATOR_KLEENE_AND, VX_OPERATOR_KLEENE_OR obey Kleene (three-valued)
121+
* logic
122+
*/
123+
typedef enum {
124+
/**
125+
* Expressions are equal.
126+
*/
127+
VX_OPERATOR_EQ = 0,
128+
/**
129+
* Expressions are not equal.
130+
*/
131+
VX_OPERATOR_NOT_EQ = 1,
132+
/**
133+
* Expression is greater than another
134+
*/
135+
VX_OPERATOR_GT = 2,
136+
/**
137+
* Expression is greater or equal to another
138+
*/
139+
VX_OPERATOR_GTE = 3,
140+
/**
141+
* Expression is less than another
142+
*/
143+
VX_OPERATOR_LT = 4,
144+
/**
145+
* Expression is less or equal to another
146+
*/
147+
VX_OPERATOR_LTE = 5,
148+
/**
149+
* Boolean AND /\.
150+
*/
151+
VX_OPERATOR_KLEENE_AND = 6,
152+
/**
153+
* Boolean OR \/.
154+
*/
155+
VX_OPERATOR_KLEENE_OR = 7,
156+
/**
157+
* The sum of the arguments.
158+
* Errors at runtime if the sum would overflow or underflow.
159+
*/
160+
VX_OPERATOR_ADD = 8,
161+
/**
162+
* The difference between the arguments.
163+
* Errors at runtime if the sum would overflow or underflow.
164+
* The result is null at any index where either input is null.
165+
*/
166+
VX_OPERATOR_SUB = 9,
167+
/**
168+
* Multiply two numbers
169+
*/
170+
VX_OPERATOR_MUL = 10,
171+
/**
172+
* Divide the left side by the right side
173+
*/
174+
VX_OPERATOR_DIV = 11,
175+
} vx_binary_operator;
176+
117177
/**
118178
* Log levels for the Vortex library.
119179
*/
@@ -297,6 +357,22 @@ typedef struct vx_dtype vx_dtype;
297357
*/
298358
typedef struct vx_error vx_error;
299359

360+
/**
361+
* A node in a Vortex expression tree.
362+
*
363+
* Expressions represent scalar computations that can be performed on
364+
* data. Each expression consists of an encoding (vtable), heap-allocated
365+
* metadata, and child expressions.
366+
*
367+
* Unless stated explicitly, all expressions returned are owned and must
368+
* be freed by the caller.
369+
* Unless stated explicitly, if an operation on const vx_expression* is
370+
* passed NULL, NULL is returned.
371+
* Operations on expressions don't take ownership of input values, and so
372+
* input values must be freed by the caller.
373+
*/
374+
typedef struct vx_expression vx_expression;
375+
300376
/**
301377
* A handle to a Vortex file encapsulating the footer and logic for instantiating a reader.
302378
*/
@@ -478,6 +554,13 @@ const vx_string *vx_array_get_utf8(const vx_array *array, uint32_t index);
478554
*/
479555
const vx_binary *vx_array_get_binary(const vx_array *array, uint32_t index);
480556

557+
/**
558+
* Apply the expression to the array, wrapping it with a ScalarFnArray.
559+
* This operation takes constant time as it doesn't execute the underlying
560+
* array. Executing the underlying array still takes O(n) time.
561+
*/
562+
const vx_array *vx_array_apply(const vx_array *array, const vx_expression *expression, vx_error **error);
563+
481564
/**
482565
* Free an owned [`vx_array_iterator`] object.
483566
*/
@@ -677,6 +760,115 @@ void vx_error_free(vx_error *ptr);
677760
*/
678761
const vx_string *vx_error_get_message(const vx_error *error);
679762

763+
/**
764+
* Free an owned [`vx_expression`] object.
765+
*/
766+
void vx_expression_free(vx_expression *ptr);
767+
768+
/**
769+
* Create a root expression. A root expression, applied to an array in
770+
* vx_array_apply, takes the array itself as opposed to functions like
771+
* vx_expression_column or vx_expression_select which take the array's parts.
772+
*
773+
* Example:
774+
*
775+
* const vx_array* array = ...;
776+
* vx_expression* root = vx_expression_root();
777+
* const vx_error* error = NULL;
778+
* vx_array* applied_array = vx_array_apply(array, root, &error);
779+
* // array and applied_array are identical
780+
* vx_array_free(applied_array);
781+
* vx_expression_free(root);
782+
* vx_array_free(array);
783+
*
784+
*/
785+
vx_expression *vx_expression_root(void);
786+
787+
/**
788+
* Create an expression that selects (includes) specific fields from a child
789+
* expression. Child expression must have a DTYPE_STRUCT dtype. Errors in
790+
* vx_array_apply if the child expression doesn't have a specified field.
791+
*
792+
* Example:
793+
*
794+
* vx_expression* root = vx_expression_root();
795+
* const char* names[] = {"name", "age"};
796+
* vx_expression* select = vx_expression_select(names, 2, root);
797+
* vx_expression_free(select);
798+
* vx_expression_free(root);
799+
*
800+
*/
801+
vx_expression *vx_expression_select(const char *const *names, size_t len, const vx_expression *child);
802+
803+
/**
804+
* Create an AND expression for multiple child expressions.
805+
* If there are no input expressions, returns NULL
806+
*/
807+
vx_expression *vx_expression_and(const vx_expression *const *expressions, size_t len);
808+
809+
/**
810+
* Create an OR disjunction expression for multiple child expressions.
811+
* If there are no input expressions, returns NULL;
812+
*/
813+
vx_expression *vx_expression_or(const vx_expression *const *expressions, size_t len);
814+
815+
/**
816+
* Create a binary expression for two expressions of form lhs OP rhs.
817+
* If either input is NULL, returns NULL.
818+
*
819+
* Example for a binary sum:
820+
*
821+
* vx_expression* age = vx_expression_column("age");
822+
* vx_expression* height = vx_expression_column("height");
823+
* vx_expression* sum = vx_expression_binary(VX_OPERATOR_ADD, age, height);
824+
* vx_expression_free(sum);
825+
* vx_expression_free(height);
826+
* vx_expression_free(age);
827+
*
828+
* Example for a binary equality function:
829+
*
830+
* vx_expression* vx_expression_eq(
831+
* const vx_expression* lhs,
832+
* const vx_expression* rhs
833+
* ) {
834+
* return vx_expression_binary(VX_OPERATOR_EQ, lhs, rhs);
835+
* }
836+
*
837+
*/
838+
vx_expression *
839+
vx_expression_binary(vx_binary_operator operator_, const vx_expression *lhs, const vx_expression *rhs);
840+
841+
/**
842+
* Create a logical NOT of the child expression.
843+
*
844+
* Returns the logical negation of the input boolean expression.
845+
*/
846+
const vx_expression *vx_expression_not(const vx_expression *child);
847+
848+
/**
849+
* Create an expression that checks for null values.
850+
*
851+
* Returns a boolean array indicating which positions contain null values.
852+
*/
853+
vx_expression *vx_expression_is_null(const vx_expression *child);
854+
855+
/**
856+
* Create an expression that extracts a named field from a struct expression.
857+
* Child expression must have a DTYPE_STRUCT dtype.
858+
* Errors in vx_array_apply if the root array doesn't have a specified field.
859+
*
860+
* Accesses the specified field from the result of the child expression.
861+
* Equivalent to select(&item, 1, child).
862+
*/
863+
vx_expression *vx_expression_get_item(const char *item, const vx_expression *child);
864+
865+
/**
866+
* Create an expression that checks if a value is contained in a list.
867+
*
868+
* Returns a boolean array indicating whether the value appears in each list.
869+
*/
870+
vx_expression *vx_expression_list_contains(const vx_expression *list, const vx_expression *value);
871+
680872
/**
681873
* Clone a borrowed [`vx_file`], returning an owned [`vx_file`].
682874
*

0 commit comments

Comments
 (0)