@@ -114,6 +114,66 @@ typedef enum {
114114 DTYPE_FIXED_SIZE_LIST = 9 ,
115115} vx_dtype_variant ;
116116
117+ /**
118+ * Equalities, inequalities, and boolean operations over possibly null values.
119+ * For most operations, if either side is null, the result is null.
120+ * VX_OPERATOR_KLEENE_AND, VX_OPERATOR_KLEENE_OR obey Kleene (three-valued)
121+ * logic
122+ */
123+ typedef enum {
124+ /**
125+ * Expressions are equal.
126+ */
127+ VX_OPERATOR_EQ = 0 ,
128+ /**
129+ * Expressions are not equal.
130+ */
131+ VX_OPERATOR_NOT_EQ = 1 ,
132+ /**
133+ * Expression is greater than another
134+ */
135+ VX_OPERATOR_GT = 2 ,
136+ /**
137+ * Expression is greater or equal to another
138+ */
139+ VX_OPERATOR_GTE = 3 ,
140+ /**
141+ * Expression is less than another
142+ */
143+ VX_OPERATOR_LT = 4 ,
144+ /**
145+ * Expression is less or equal to another
146+ */
147+ VX_OPERATOR_LTE = 5 ,
148+ /**
149+ * Boolean AND /\.
150+ */
151+ VX_OPERATOR_KLEENE_AND = 6 ,
152+ /**
153+ * Boolean OR \/.
154+ */
155+ VX_OPERATOR_KLEENE_OR = 7 ,
156+ /**
157+ * The sum of the arguments.
158+ * Errors at runtime if the sum would overflow or underflow.
159+ */
160+ VX_OPERATOR_ADD = 8 ,
161+ /**
162+ * The difference between the arguments.
163+ * Errors at runtime if the sum would overflow or underflow.
164+ * The result is null at any index where either input is null.
165+ */
166+ VX_OPERATOR_SUB = 9 ,
167+ /**
168+ * Multiply two numbers
169+ */
170+ VX_OPERATOR_MUL = 10 ,
171+ /**
172+ * Divide the left side by the right side
173+ */
174+ VX_OPERATOR_DIV = 11 ,
175+ } vx_binary_operator ;
176+
117177/**
118178 * Log levels for the Vortex library.
119179 */
@@ -297,6 +357,22 @@ typedef struct vx_dtype vx_dtype;
297357 */
298358typedef struct vx_error vx_error ;
299359
360+ /**
361+ * A node in a Vortex expression tree.
362+ *
363+ * Expressions represent scalar computations that can be performed on
364+ * data. Each expression consists of an encoding (vtable), heap-allocated
365+ * metadata, and child expressions.
366+ *
367+ * Unless stated explicitly, all expressions returned are owned and must
368+ * be freed by the caller.
369+ * Unless stated explicitly, if an operation on const vx_expression* is
370+ * passed NULL, NULL is returned.
371+ * Operations on expressions don't take ownership of input values, and so
372+ * input values must be freed by the caller.
373+ */
374+ typedef struct vx_expression vx_expression ;
375+
300376/**
301377 * A handle to a Vortex file encapsulating the footer and logic for instantiating a reader.
302378 */
@@ -478,6 +554,13 @@ const vx_string *vx_array_get_utf8(const vx_array *array, uint32_t index);
478554 */
479555const vx_binary * vx_array_get_binary (const vx_array * array , uint32_t index );
480556
557+ /**
558+ * Apply the expression to the array, wrapping it with a ScalarFnArray.
559+ * This operation takes constant time as it doesn't execute the underlying
560+ * array. Executing the underlying array still takes O(n) time.
561+ */
562+ const vx_array * vx_array_apply (const vx_array * array , const vx_expression * expression , vx_error * * error );
563+
481564/**
482565 * Free an owned [`vx_array_iterator`] object.
483566 */
@@ -677,6 +760,115 @@ void vx_error_free(vx_error *ptr);
677760 */
678761const vx_string * vx_error_get_message (const vx_error * error );
679762
763+ /**
764+ * Free an owned [`vx_expression`] object.
765+ */
766+ void vx_expression_free (vx_expression * ptr );
767+
768+ /**
769+ * Create a root expression. A root expression, applied to an array in
770+ * vx_array_apply, takes the array itself as opposed to functions like
771+ * vx_expression_column or vx_expression_select which take the array's parts.
772+ *
773+ * Example:
774+ *
775+ * const vx_array* array = ...;
776+ * vx_expression* root = vx_expression_root();
777+ * const vx_error* error = NULL;
778+ * vx_array* applied_array = vx_array_apply(array, root, &error);
779+ * // array and applied_array are identical
780+ * vx_array_free(applied_array);
781+ * vx_expression_free(root);
782+ * vx_array_free(array);
783+ *
784+ */
785+ vx_expression * vx_expression_root (void );
786+
787+ /**
788+ * Create an expression that selects (includes) specific fields from a child
789+ * expression. Child expression must have a DTYPE_STRUCT dtype. Errors in
790+ * vx_array_apply if the child expression doesn't have a specified field.
791+ *
792+ * Example:
793+ *
794+ * vx_expression* root = vx_expression_root();
795+ * const char* names[] = {"name", "age"};
796+ * vx_expression* select = vx_expression_select(names, 2, root);
797+ * vx_expression_free(select);
798+ * vx_expression_free(root);
799+ *
800+ */
801+ vx_expression * vx_expression_select (const char * const * names , size_t len , const vx_expression * child );
802+
803+ /**
804+ * Create an AND expression for multiple child expressions.
805+ * If there are no input expressions, returns NULL
806+ */
807+ vx_expression * vx_expression_and (const vx_expression * const * expressions , size_t len );
808+
809+ /**
810+ * Create an OR disjunction expression for multiple child expressions.
811+ * If there are no input expressions, returns NULL;
812+ */
813+ vx_expression * vx_expression_or (const vx_expression * const * expressions , size_t len );
814+
815+ /**
816+ * Create a binary expression for two expressions of form lhs OP rhs.
817+ * If either input is NULL, returns NULL.
818+ *
819+ * Example for a binary sum:
820+ *
821+ * vx_expression* age = vx_expression_column("age");
822+ * vx_expression* height = vx_expression_column("height");
823+ * vx_expression* sum = vx_expression_binary(VX_OPERATOR_ADD, age, height);
824+ * vx_expression_free(sum);
825+ * vx_expression_free(height);
826+ * vx_expression_free(age);
827+ *
828+ * Example for a binary equality function:
829+ *
830+ * vx_expression* vx_expression_eq(
831+ * const vx_expression* lhs,
832+ * const vx_expression* rhs
833+ * ) {
834+ * return vx_expression_binary(VX_OPERATOR_EQ, lhs, rhs);
835+ * }
836+ *
837+ */
838+ vx_expression *
839+ vx_expression_binary (vx_binary_operator operator_ , const vx_expression * lhs , const vx_expression * rhs );
840+
841+ /**
842+ * Create a logical NOT of the child expression.
843+ *
844+ * Returns the logical negation of the input boolean expression.
845+ */
846+ const vx_expression * vx_expression_not (const vx_expression * child );
847+
848+ /**
849+ * Create an expression that checks for null values.
850+ *
851+ * Returns a boolean array indicating which positions contain null values.
852+ */
853+ vx_expression * vx_expression_is_null (const vx_expression * child );
854+
855+ /**
856+ * Create an expression that extracts a named field from a struct expression.
857+ * Child expression must have a DTYPE_STRUCT dtype.
858+ * Errors in vx_array_apply if the root array doesn't have a specified field.
859+ *
860+ * Accesses the specified field from the result of the child expression.
861+ * Equivalent to select(&item, 1, child).
862+ */
863+ vx_expression * vx_expression_get_item (const char * item , const vx_expression * child );
864+
865+ /**
866+ * Create an expression that checks if a value is contained in a list.
867+ *
868+ * Returns a boolean array indicating whether the value appears in each list.
869+ */
870+ vx_expression * vx_expression_list_contains (const vx_expression * list , const vx_expression * value );
871+
680872/**
681873 * Clone a borrowed [`vx_file`], returning an owned [`vx_file`].
682874 *
0 commit comments