-
Notifications
You must be signed in to change notification settings - Fork 713
ClickHouse: Support scalar expressions in WITH clause #2333
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -754,8 +754,9 @@ pub struct With { | |||||||||||||||||||
| pub with_token: AttachedToken, | ||||||||||||||||||||
| /// Whether the `WITH` is recursive (`WITH RECURSIVE`). | ||||||||||||||||||||
| pub recursive: bool, | ||||||||||||||||||||
| /// The list of CTEs declared by this `WITH` clause. | ||||||||||||||||||||
| pub cte_tables: Vec<Cte>, | ||||||||||||||||||||
| /// The items declared by this `WITH` clause: traditional CTEs and, | ||||||||||||||||||||
| /// for dialects that support it, named expressions. | ||||||||||||||||||||
| pub items: Vec<WithItem>, | ||||||||||||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||||||||||||||||||||
| } | ||||||||||||||||||||
|
|
||||||||||||||||||||
| impl fmt::Display for With { | ||||||||||||||||||||
|
|
@@ -764,11 +765,41 @@ impl fmt::Display for With { | |||||||||||||||||||
| if self.recursive { | ||||||||||||||||||||
| f.write_str("RECURSIVE ")?; | ||||||||||||||||||||
| } | ||||||||||||||||||||
| display_comma_separated(&self.cte_tables).fmt(f)?; | ||||||||||||||||||||
| display_comma_separated(&self.items).fmt(f)?; | ||||||||||||||||||||
| Ok(()) | ||||||||||||||||||||
| } | ||||||||||||||||||||
| } | ||||||||||||||||||||
|
|
||||||||||||||||||||
| /// A single item in a `WITH` clause. | ||||||||||||||||||||
| #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] | ||||||||||||||||||||
| #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] | ||||||||||||||||||||
| #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] | ||||||||||||||||||||
| pub enum WithItem { | ||||||||||||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. repr wise can we do something like? pub enum WithExpression {
Cte(...),
Cse(ExprWithAlias), // can we reuse exprwithalias?
}
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||||||||||||||||||||
| /// A traditional common table expression: `name [(cols)] AS [MATERIALIZED] (query)`. | ||||||||||||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||||||||||||||||||||
| Cte(Cte), | ||||||||||||||||||||
| /// `<expr> AS <alias>` — binds an expression (literal, scalar subquery, | ||||||||||||||||||||
| /// lambda, …) to a name visible in the surrounding query. | ||||||||||||||||||||
| /// | ||||||||||||||||||||
| /// See ClickHouse's [common scalar expressions][1]. | ||||||||||||||||||||
| /// | ||||||||||||||||||||
| /// [1]: https://clickhouse.com/docs/sql-reference/statements/select/with#common-scalar-expressions | ||||||||||||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||||||||||||
| Named { | ||||||||||||||||||||
| /// The expression bound to the alias. | ||||||||||||||||||||
| expr: Expr, | ||||||||||||||||||||
| /// The name the expression is bound to. | ||||||||||||||||||||
| alias: Ident, | ||||||||||||||||||||
| }, | ||||||||||||||||||||
| } | ||||||||||||||||||||
|
|
||||||||||||||||||||
| impl fmt::Display for WithItem { | ||||||||||||||||||||
| fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { | ||||||||||||||||||||
| match self { | ||||||||||||||||||||
| WithItem::Cte(cte) => cte.fmt(f), | ||||||||||||||||||||
| WithItem::Named { expr, alias } => write!(f, "{expr} AS {alias}"), | ||||||||||||||||||||
| } | ||||||||||||||||||||
| } | ||||||||||||||||||||
| } | ||||||||||||||||||||
|
|
||||||||||||||||||||
| #[derive(Debug, Clone, Copy, PartialEq, PartialOrd, Eq, Ord, Hash)] | ||||||||||||||||||||
| #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] | ||||||||||||||||||||
| #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] | ||||||||||||||||||||
|
|
||||||||||||||||||||
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -1745,6 +1745,21 @@ pub trait Dialect: Debug + Any { | |||||||||||||||||||||||||||||||||||||||
| false | ||||||||||||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||
| /// Returns true if the dialect allows a `WITH` clause item to bind a | ||||||||||||||||||||||||||||||||||||||||
| /// scalar (or otherwise non-CTE) expression to a name, with the form | ||||||||||||||||||||||||||||||||||||||||
| /// `<expression> AS <identifier>` — alongside or instead of the | ||||||||||||||||||||||||||||||||||||||||
| /// traditional `<identifier> AS (<subquery>)` CTE form. | ||||||||||||||||||||||||||||||||||||||||
| /// | ||||||||||||||||||||||||||||||||||||||||
| /// For example, in ClickHouse: | ||||||||||||||||||||||||||||||||||||||||
| /// ```sql | ||||||||||||||||||||||||||||||||||||||||
| /// WITH 42 AS answer SELECT answer FROM t | ||||||||||||||||||||||||||||||||||||||||
| /// ``` | ||||||||||||||||||||||||||||||||||||||||
| /// | ||||||||||||||||||||||||||||||||||||||||
| /// [ClickHouse](https://clickhouse.com/docs/sql-reference/statements/select/with#common-scalar-expressions) | ||||||||||||||||||||||||||||||||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||||||||||||||||||||||||||||||||
| fn supports_with_clause_scalar_expression(&self) -> bool { | ||||||||||||||||||||||||||||||||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||||||||||||||||||||||||||||||||||||||||
| false | ||||||||||||||||||||||||||||||||||||||||
| } | ||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||
| /// Returns true if the dialect supports parenthesized multi-column | ||||||||||||||||||||||||||||||||||||||||
| /// aliases in SELECT items. For example: | ||||||||||||||||||||||||||||||||||||||||
| /// ```sql | ||||||||||||||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||||||||||||||
| Original file line number | Diff line number | Diff line change | ||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -14105,7 +14105,7 @@ impl<'a> Parser<'a> { | |||||||||||||||||
| Some(With { | ||||||||||||||||||
| with_token: with_token.clone().into(), | ||||||||||||||||||
| recursive: self.parse_keyword(Keyword::RECURSIVE), | ||||||||||||||||||
| cte_tables: self.parse_comma_separated(Parser::parse_cte)?, | ||||||||||||||||||
| items: self.parse_comma_separated(Parser::parse_with_item)?, | ||||||||||||||||||
| }) | ||||||||||||||||||
| } else { | ||||||||||||||||||
| None | ||||||||||||||||||
|
|
@@ -14639,6 +14639,33 @@ impl<'a> Parser<'a> { | |||||||||||||||||
| Ok(cte) | ||||||||||||||||||
| } | ||||||||||||||||||
|
|
||||||||||||||||||
| /// Parse a single item in a `WITH` clause. | ||||||||||||||||||
| /// | ||||||||||||||||||
| /// In standard SQL this is always a CTE (`name [(cols)] AS (query)`). | ||||||||||||||||||
| /// Dialects that enable [`Dialect::supports_with_clause_scalar_expression`] | ||||||||||||||||||
| /// — currently only ClickHouse — also accept the reversed form | ||||||||||||||||||
| /// `<expression> AS <identifier>`, which can be freely interleaved with | ||||||||||||||||||
| /// CTEs in the same comma-separated list. | ||||||||||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. fixed and minor renaming |
||||||||||||||||||
| pub fn parse_with_item(&mut self) -> Result<WithItem, ParserError> { | ||||||||||||||||||
| if !self.dialect.supports_with_clause_scalar_expression() { | ||||||||||||||||||
| return self.parse_cte().map(WithItem::Cte); | ||||||||||||||||||
| } | ||||||||||||||||||
|
|
||||||||||||||||||
| // CTE form must start with an identifier. If the leading token | ||||||||||||||||||
| // can't begin one (e.g. `42`, `(SELECT …)`, `(x, y) -> …`), this | ||||||||||||||||||
| // is unambiguously the named-expression form. | ||||||||||||||||||
| if matches!(self.peek_token().token, Token::Word(_)) { | ||||||||||||||||||
| if let Some(cte) = self.maybe_parse(|p| p.parse_cte())? { | ||||||||||||||||||
| return Ok(WithItem::Cte(cte)); | ||||||||||||||||||
| } | ||||||||||||||||||
| } | ||||||||||||||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can this be simplified to use that we do
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. tbh, not sure i fully understand this. with that, will WITH 42 AS answer SELECT answer FROM t parse? peek_subquery_or_cte_start returns false on 42 (not (SELECT / (WITH), so we'd go to parse_cte, which then errors on 42 not being an identifier?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah that would work right or? if we error on 42 its because the syntax was invalid and we expected a cte but got 42 instead?
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. right, i was mixing things up. can you confirm this
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think the main goal with the comment was to simplify the cases a bit, so that ideally we have one branch handling the cte case, and another handling the cse case. Another way (there might be others) to accomplish that would be to try to parse a cte and if that fails fallback to cse e.g. if let Some(cte) = self.maybe_parse(|parser| parser...)? else {
return cte
} else if self.supprots() {
return self.parse_cse
} else {
self.expected("CTE or CSE")
}maybe above woul be straightforward in that the cases that are covered is clearer? |
||||||||||||||||||
|
|
||||||||||||||||||
| let expr = self.parse_expr()?; | ||||||||||||||||||
| self.expect_keyword(Keyword::AS)?; | ||||||||||||||||||
| let alias = self.parse_identifier()?; | ||||||||||||||||||
| Ok(WithItem::Named { expr, alias }) | ||||||||||||||||||
| } | ||||||||||||||||||
|
|
||||||||||||||||||
| /// Parse a "query body", which is an expression with roughly the | ||||||||||||||||||
| /// following grammar: | ||||||||||||||||||
| /// ```sql | ||||||||||||||||||
|
|
||||||||||||||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1845,6 +1845,70 @@ fn parse_inner_array_join() { | |
| } | ||
| } | ||
|
|
||
| #[test] | ||
| fn parse_with_clause_named_expression() { | ||
| // Plain literal scalar. | ||
| clickhouse().verified_stmt("WITH 42 AS answer SELECT answer FROM t"); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. for the tests, lets use
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
|
|
||
| // String literal scalar from the ClickHouse docs. | ||
| clickhouse().verified_stmt( | ||
| "WITH '2019-08-01 15:23:00' AS ts_upper_bound SELECT * FROM hits \ | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. for the line breaks can we use concat like e.g. here to format the sql strings?
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. fixed 4f69357 |
||
| WHERE EventDate = toDate(ts_upper_bound) AND EventTime <= ts_upper_bound", | ||
| ); | ||
|
|
||
| // Aggregate function call as a named expression. | ||
| clickhouse().verified_stmt( | ||
| "WITH sum(bytes) AS s SELECT formatReadableSize(s), \"table\" \ | ||
| FROM system.parts GROUP BY \"table\" ORDER BY s", | ||
| ); | ||
|
|
||
| // Scalar subquery as the bound expression. | ||
| clickhouse().verified_stmt( | ||
| "WITH (SELECT sum(bytes) FROM system.parts WHERE active) AS total_disk_usage \ | ||
| SELECT (sum(bytes) / total_disk_usage) * 100 AS table_disk_usage, \"table\" \ | ||
| FROM system.parts GROUP BY \"table\" ORDER BY table_disk_usage DESC LIMIT 10", | ||
| ); | ||
|
|
||
| // Bare-identifier scalar — disambiguation case (`name AS alias` looks like | ||
| // a CTE prefix but the missing `(` after `AS` makes it a named expression). | ||
| clickhouse().verified_stmt("WITH user_id AS uid SELECT uid FROM t"); | ||
|
|
||
| // Mixing a named expression with a real CTE in the same WITH list. | ||
| clickhouse().verified_stmt("WITH 1 AS one, cte AS (SELECT 1) SELECT one FROM cte"); | ||
|
|
||
| // Lambda as the bound expression (also taken from the docs). | ||
| clickhouse().verified_stmt( | ||
| "WITH '.txt' AS extension, (id, extension) -> concat(lower(id), extension) AS gen_name \ | ||
| SELECT gen_name('test', '.sql') AS file_name", | ||
| ); | ||
| } | ||
|
|
||
| #[test] | ||
| fn parse_with_clause_named_expression_ast() { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we can merge this with the test above since they cover the same feature
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
| let query = clickhouse().verified_query("WITH 42 AS answer SELECT answer FROM t"); | ||
| let with = query.with.as_ref().unwrap(); | ||
| assert!(!with.recursive); | ||
| assert_eq!(with.items.len(), 1); | ||
| match &with.items[0] { | ||
| WithItem::Named { expr, alias } => { | ||
| assert_eq!(alias.value, "answer"); | ||
| assert!(matches!(expr, Expr::Value(_))); | ||
| } | ||
| other => panic!("expected a named expression, got {other:?}"), | ||
| } | ||
| } | ||
|
|
||
| #[test] | ||
| fn parse_with_clause_named_expression_unsupported_in_other_dialects() { | ||
| // The named-expression form is only enabled for ClickHouse; other | ||
| // dialects should still reject `WITH 42 AS answer …`. | ||
| let res = sqlparser::parser::Parser::parse_sql( | ||
| &GenericDialect {}, | ||
| "WITH 42 AS answer SELECT answer FROM t", | ||
| ); | ||
| assert!(res.is_err(), "expected parse error, got {res:?}"); | ||
| } | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we can skip this, no need to test what the parser does for an invalid query for unsupported dialects
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
|
|
||
| fn clickhouse() -> TestedDialects { | ||
| TestedDialects::new(vec![Box::new(ClickHouseDialect {})]) | ||
| } | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
fixed
3b86842#diff-ff51937f34d0076928ac31baffb41b26e4cbc0ec6dd8496105a7f61188f92ed1R757-R758