Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 17 additions & 1 deletion cosmosdb/CosmosDBLexer.g4
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,21 @@ UDF_SYMBOL: 'UDF';
WHERE_SYMBOL: 'WHERE';
AND_SYMBOL: 'AND';
OR_SYMBOL: 'OR';
IN_SYMBOL: 'IN';
BETWEEN_SYMBOL: 'BETWEEN';
TOP_SYMBOL: 'TOP';
VALUE_SYMBOL: 'VALUE';
ORDER_SYMBOL: 'ORDER';
BY_SYMBOL: 'BY';
GROUP_SYMBOL: 'GROUP';
OFFSET_SYMBOL: 'OFFSET';
LIMIT_SYMBOL: 'LIMIT';
ASC_SYMBOL: 'ASC';
DESC_SYMBOL: 'DESC';
EXISTS_SYMBOL: 'EXISTS';
LIKE_SYMBOL: 'LIKE';
HAVING_SYMBOL: 'HAVING';
JOIN_SYMBOL: 'JOIN';

AT_SYMBOL: '@';
LC_BRACKET_SYMBOL: '{';
Expand Down Expand Up @@ -77,10 +92,11 @@ GREATER_THAN_EQUAL_OPERATOR: '>=';
LEFT_SHIFT_OPERATOR: '<<';
RIGHT_SHIFT_OPERATOR: '>>';
ZERO_FILL_RIGHT_SHIFT_OPERATOR: '>>>';
NOT_EQUAL_OPERATOR: '!=';


/* Identifiers */
IDENTIFIER: [a-z] [a-z_0-9]*;
IDENTIFIER: [a-z_] [a-z_0-9]*;

// White space handling
WHITESPACE:
Expand Down
204 changes: 122 additions & 82 deletions cosmosdb/CosmosDBParser.g4
Original file line number Diff line number Diff line change
Expand Up @@ -6,137 +6,155 @@ options {

root: select EOF;

select: select_clause from_clause where_clause?;
select:
select_clause from_clause? where_clause? group_by_clause? having_clause? order_by_clause?
offset_limit_clause?;

select_clause: SELECT_SYMBOL select_specification;
select_clause: SELECT_SYMBOL top_clause? select_specification;

top_clause: TOP_SYMBOL DECIMAL;

select_specification:
MULTIPLY_OPERATOR
| DISTINCT_SYMBOL? object_property_list;
| DISTINCT_SYMBOL? VALUE_SYMBOL? object_property_list;

from_clause: FROM_SYMBOL from_specification;

where_clause: WHERE_SYMBOL scalar_expression_in_where;
where_clause: WHERE_SYMBOL scalar_expression;

group_by_clause:
GROUP_SYMBOL BY_SYMBOL scalar_expression (
COMMA_SYMBOL scalar_expression
)*;

having_clause: HAVING_SYMBOL scalar_expression;

order_by_clause:
ORDER_SYMBOL BY_SYMBOL sort_expression (
COMMA_SYMBOL sort_expression
)*;

sort_expression: scalar_expression (ASC_SYMBOL | DESC_SYMBOL)?;

offset_limit_clause: OFFSET_SYMBOL DECIMAL LIMIT_SYMBOL DECIMAL;

from_specification: from_source;

from_source: container_expression;
from_source: container_expression (join_clause)*;

container_expression: container_name (AS_SYMBOL? IDENTIFIER)?;
container_expression: container_name (AS_SYMBOL? identifier)?;

container_name: IDENTIFIER;
join_clause:
JOIN_SYMBOL identifier IN_SYMBOL scalar_expression;

container_name: identifier;

object_property_list:
object_property (COMMA_SYMBOL object_property)*;

object_property: scalar_expression (AS_SYMBOL? property_alias)?;
object_property:
scalar_expression (AS_SYMBOL? property_alias)?;

property_alias: IDENTIFIER;
property_alias: identifier;

// scalar_expression: https://learn.microsoft.com/en-us/azure/cosmos-db/nosql/query/scalar-expressions
// Unified scalar_expression - used in both SELECT projections and WHERE clause.
// https://learn.microsoft.com/en-us/azure/cosmos-db/nosql/query/scalar-expressions
// Alternatives are ordered from highest precedence (first) to lowest (last) per ANTLR4 semantics.
scalar_expression:
input_alias
| scalar_expression DOT_SYMBOL property_name
| scalar_expression LS_BRACKET_SYMBOL (
(DOUBLE_QUOTE_STRING_LITERAL)
| (array_index)
) RS_BRACKET_SYMBOL
| unary_operator scalar_expression;

// TODO(zp): Merge scalar_expression and scalar_expression_in_where while supporting the project
// fully. https://learn.microsoft.com/en-us/azure/cosmos-db/nosql/query/scalar-expressions
scalar_expression_in_where:
constant
| input_alias
| parameter_name
| scalar_expression_in_where AND_SYMBOL scalar_expression_in_where
| scalar_expression_in_where OR_SYMBOL scalar_expression_in_where
| scalar_expression_in_where DOT_SYMBOL property_name
| scalar_expression_in_where LS_BRACKET_SYMBOL (
(DOUBLE_QUOTE_STRING_LITERAL)
| (array_index)
) RS_BRACKET_SYMBOL
| unary_operator scalar_expression_in_where
| scalar_expression_in_where binary_operator scalar_expression_in_where
| scalar_expression_in_where QUESTION_MARK_SYMBOL scalar_expression_in_where COLON_SYMBOL
scalar_expression_in_where
| scalar_function_expression
| create_object_expression
| create_array_expression
| LR_BRACKET_SYMBOL scalar_expression_in_where RR_BRACKET_SYMBOL;

create_array_expression: array_constant;
| LR_BRACKET_SYMBOL scalar_expression RR_BRACKET_SYMBOL
| LR_BRACKET_SYMBOL select RR_BRACKET_SYMBOL
| EXISTS_SYMBOL LR_BRACKET_SYMBOL select RR_BRACKET_SYMBOL
| scalar_expression DOT_SYMBOL property_name
| scalar_expression LS_BRACKET_SYMBOL (
DOUBLE_QUOTE_STRING_LITERAL
| SINGLE_QUOTE_STRING_LITERAL
| array_index
) RS_BRACKET_SYMBOL
| unary_operator scalar_expression
| NOT_SYMBOL scalar_expression
| scalar_expression multiplicative_operator scalar_expression
| scalar_expression additive_operator scalar_expression
| scalar_expression shift_operator scalar_expression
| scalar_expression BIT_AND_SYMBOL scalar_expression
| scalar_expression BIT_XOR_SYMBOL scalar_expression
| scalar_expression BIT_OR_SYMBOL scalar_expression
| scalar_expression DOUBLE_BAR_SYMBOL scalar_expression
| scalar_expression comparison_operator scalar_expression
| scalar_expression NOT_SYMBOL? IN_SYMBOL LR_BRACKET_SYMBOL (
scalar_expression (COMMA_SYMBOL scalar_expression)*
)? RR_BRACKET_SYMBOL
| scalar_expression NOT_SYMBOL? BETWEEN_SYMBOL scalar_expression AND_SYMBOL scalar_expression
| scalar_expression NOT_SYMBOL? LIKE_SYMBOL scalar_expression
| scalar_expression AND_SYMBOL scalar_expression
| scalar_expression OR_SYMBOL scalar_expression
| scalar_expression QUESTION_MARK_SYMBOL scalar_expression COLON_SYMBOL scalar_expression;

create_array_expression:
LS_BRACKET_SYMBOL (
scalar_expression (COMMA_SYMBOL scalar_expression)*
)? RS_BRACKET_SYMBOL;

create_object_expression:
LC_BRACKET_SYMBOL (
object_field_pair (COMMA_SYMBOL object_field_pair)*
)? RC_BRACKET_SYMBOL;

create_object_expression: object_constant;
object_field_pair:
(string_literal | property_name) COLON_SYMBOL scalar_expression;

scalar_function_expression:
udf_scalar_function_expression
| builtin_function_expression;

udf_scalar_function_expression:
UDF_SYMBOL DOT_SYMBOL IDENTIFIER LR_BRACKET_SYMBOL (
scalar_expression_in_where (
COMMA_SYMBOL scalar_expression_in_where
)*
) RR_BRACKET_SYMBOL;
UDF_SYMBOL DOT_SYMBOL identifier LR_BRACKET_SYMBOL (
scalar_expression (COMMA_SYMBOL scalar_expression)*
)? RR_BRACKET_SYMBOL;

builtin_function_expression:
IDENTIFIER LR_BRACKET_SYMBOL (
scalar_expression_in_where (
COMMA_SYMBOL scalar_expression_in_where
identifier LR_BRACKET_SYMBOL (
(MULTIPLY_OPERATOR | scalar_expression) (
COMMA_SYMBOL scalar_expression
)*
) RR_BRACKET_SYMBOL;
)? RR_BRACKET_SYMBOL;

binary_operator:
multiplicative_operator:
MULTIPLY_OPERATOR
| DIVIDE_SYMBOL
| MODULO_SYMBOL
| PLUS_SYMBOL
| MINUS_SYMBOL
| BIT_AND_SYMBOL
| BIT_XOR_SYMBOL
| BIT_OR_SYMBOL
| DOUBLE_BAR_SYMBOL
| EQUAL_SYMBOL
| MODULO_SYMBOL;

additive_operator: PLUS_SYMBOL | MINUS_SYMBOL;

shift_operator:
LEFT_SHIFT_OPERATOR
| RIGHT_SHIFT_OPERATOR
| ZERO_FILL_RIGHT_SHIFT_OPERATOR;

comparison_operator:
EQUAL_SYMBOL
| NOT_EQUAL_OPERATOR
| LESS_THAN_OPERATOR
| LESS_THAN_EQUAL_OPERATOR
| GREATER_THAN_OPERATOR
| GREATER_THAN_EQUAL_OPERATOR
| LEFT_SHIFT_OPERATOR
| RIGHT_SHIFT_OPERATOR
| ZERO_FILL_RIGHT_SHIFT_OPERATOR
;
| GREATER_THAN_EQUAL_OPERATOR;

unary_operator: BIT_NOT_SYMBOL | PLUS_SYMBOL | MINUS_SYMBOL;

parameter_name: AT_SYMBOL IDENTIFIER;
parameter_name: AT_SYMBOL identifier;

// https://learn.microsoft.com/en-us/azure/cosmos-db/nosql/query/constants
constant:
undefined_constant
| null_constant
| boolean_constant
| number_constant
| string_constant
| array_constant
| object_constant;

object_constant:
LC_BRACKET_SYMBOL (
object_constant_field_pair (
COMMA_SYMBOL object_constant_field_pair
)*
) RC_BRACKET_SYMBOL;

object_constant_field_pair: (
property_name
| (DOUBLE_QUOTE_SYMBOL property_name DOUBLE_QUOTE_SYMBOL)
) COMMA_SYMBOL constant;

array_constant:
LS_BRACKET_SYMBOL (constant (COMMA_SYMBOL constant)*)? RS_BRACKET_SYMBOL;

string_constant: string_literal;
| string_constant;

undefined_constant: UNDEFINED_SYMBOL;

Expand All @@ -146,6 +164,8 @@ boolean_constant: TRUE_SYMBOL | FALSE_SYMBOL;

number_constant: decimal_literal | hexadecimal_literal;

string_constant: string_literal;

string_literal:
SINGLE_QUOTE_STRING_LITERAL
| DOUBLE_QUOTE_STRING_LITERAL;
Expand All @@ -154,8 +174,28 @@ decimal_literal: DECIMAL | REAL | FLOAT;

hexadecimal_literal: HEXADECIMAL;

property_name: IDENTIFIER;
// Allow keywords to be used as identifiers (property names, aliases, etc.)
// This is necessary because CosmosDB allows keywords as property names.
identifier:
IDENTIFIER
| IN_SYMBOL
| BETWEEN_SYMBOL
| TOP_SYMBOL
| VALUE_SYMBOL
| ORDER_SYMBOL
| BY_SYMBOL
| GROUP_SYMBOL
| OFFSET_SYMBOL
| LIMIT_SYMBOL
| ASC_SYMBOL
| DESC_SYMBOL
| EXISTS_SYMBOL
| LIKE_SYMBOL
| HAVING_SYMBOL
| JOIN_SYMBOL;

property_name: identifier;

array_index: DECIMAL;

input_alias: IDENTIFIER;
input_alias: identifier;
Loading
Loading