Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/ast/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ pub use self::query::{
TableSampleKind, TableSampleMethod, TableSampleModifier, TableSampleQuantity, TableSampleSeed,
TableSampleSeedModifier, TableSampleUnit, TableVersion, TableWithJoins, Top, TopQuantity,
UpdateTableFromKind, ValueTableMode, Values, WildcardAdditionalOptions, With, WithFill,
XmlNamespaceDefinition, XmlPassingArgument, XmlPassingClause, XmlTableColumn,
WithItem, XmlNamespaceDefinition, XmlPassingArgument, XmlPassingClause, XmlTableColumn,
XmlTableColumnOption,
};

Expand Down
37 changes: 34 additions & 3 deletions src/ast/query.rs
Original file line number Diff line number Diff line change
Expand Up @@ -754,8 +754,9 @@ pub struct With {
pub with_token: AttachedToken,
/// Whether the `WITH` is recursive (`WITH RECURSIVE`).
pub recursive: bool,
/// The list of CTEs declared by this `WITH` clause.
pub cte_tables: Vec<Cte>,
/// The items declared by this `WITH` clause: traditional CTEs and,
/// for dialects that support it, named expressions.
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
/// The items declared by this `WITH` clause: traditional CTEs and,
/// for dialects that support it, named expressions.
/// The expressions declared by this `WITH` clause.

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

pub items: Vec<WithItem>,
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
pub items: Vec<WithItem>,
pub exprs: Vec<WithExpression>,

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

}

impl fmt::Display for With {
Expand All @@ -764,11 +765,41 @@ impl fmt::Display for With {
if self.recursive {
f.write_str("RECURSIVE ")?;
}
display_comma_separated(&self.cte_tables).fmt(f)?;
display_comma_separated(&self.items).fmt(f)?;
Ok(())
}
}

/// A single item in a `WITH` clause.
#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
pub enum WithItem {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

repr wise can we do something like?

pub enum WithExpression {
    Cte(...),
    Cse(ExprWithAlias), // can we reuse exprwithalias?
}

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

/// A traditional common table expression: `name [(cols)] AS [MATERIALIZED] (query)`.
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
/// A traditional common table expression: `name [(cols)] AS [MATERIALIZED] (query)`.
/// A common table expression.

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Cte(Cte),
/// `<expr> AS <alias>` — binds an expression (literal, scalar subquery,
/// lambda, …) to a name visible in the surrounding query.
///
/// See ClickHouse's [common scalar expressions][1].
///
/// [1]: https://clickhouse.com/docs/sql-reference/statements/select/with#common-scalar-expressions
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
/// `<expr> AS <alias>` — binds an expression (literal, scalar subquery,
/// lambda, …) to a name visible in the surrounding query.
///
/// See ClickHouse's [common scalar expressions][1].
///
/// [1]: https://clickhouse.com/docs/sql-reference/statements/select/with#common-scalar-expressions
/// A common scalar expression.
///
/// [Clickhouse]: https://clickhouse.com/docs/sql-reference/statements/select/with#common-scalar-expressions

Named {
/// The expression bound to the alias.
expr: Expr,
/// The name the expression is bound to.
alias: Ident,
},
}

impl fmt::Display for WithItem {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
WithItem::Cte(cte) => cte.fmt(f),
WithItem::Named { expr, alias } => write!(f, "{expr} AS {alias}"),
}
}
}

#[derive(Debug, Clone, Copy, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))]
Expand Down
27 changes: 20 additions & 7 deletions src/ast/spans.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ use super::{
ReplaceSelectItem, Select, SelectInto, SelectItem, SetExpr, SqlOption, Statement, Subscript,
SymbolDefinition, TableAlias, TableAliasColumnDef, TableConstraint, TableFactor, TableObject,
TableOptionsClustered, TableWithJoins, Update, UpdateTableFromKind, Use, Values, ViewColumnDef,
WhileStatement, WildcardAdditionalOptions, With, WithFill,
WhileStatement, WildcardAdditionalOptions, With, WithFill, WithItem,
};

/// Given an iterator of spans, return the [Span::union] of all spans.
Expand Down Expand Up @@ -185,12 +185,21 @@ impl Spanned for With {
let With {
with_token,
recursive: _, // bool
cte_tables,
items,
} = self;

union_spans(
core::iter::once(with_token.0.span).chain(cte_tables.iter().map(|item| item.span())),
)
union_spans(core::iter::once(with_token.0.span).chain(items.iter().map(|item| item.span())))
}
}

impl Spanned for WithItem {
fn span(&self) -> Span {
match self {
WithItem::Cte(cte) => cte.span(),
WithItem::Named { expr, alias } => {
union_spans(core::iter::once(expr.span()).chain(core::iter::once(alias.span)))
}
}
}
}

Expand Down Expand Up @@ -2716,8 +2725,12 @@ pub mod tests {
);

let query = test.0.parse_query().unwrap();
let cte_span = query.clone().with.unwrap().cte_tables[0].span();
let cte_query_span = query.clone().with.unwrap().cte_tables[0].query.span();
let cte = match &query.with.as_ref().unwrap().items[0] {
WithItem::Cte(cte) => cte,
_ => panic!("expected a CTE"),
};
let cte_span = cte.span();
let cte_query_span = cte.query.span();
let body_span = query.body.span();

// the WITH keyboard is part of the query
Expand Down
5 changes: 5 additions & 0 deletions src/dialect/clickhouse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -153,4 +153,9 @@ impl Dialect for ClickHouseDialect {
fn supports_comma_separated_trim(&self) -> bool {
true
}

/// See <https://clickhouse.com/docs/sql-reference/statements/select/with#common-scalar-expressions>
fn supports_with_clause_scalar_expression(&self) -> bool {
true
}
}
15 changes: 15 additions & 0 deletions src/dialect/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1745,6 +1745,21 @@ pub trait Dialect: Debug + Any {
false
}

/// Returns true if the dialect allows a `WITH` clause item to bind a
/// scalar (or otherwise non-CTE) expression to a name, with the form
/// `<expression> AS <identifier>` — alongside or instead of the
/// traditional `<identifier> AS (<subquery>)` CTE form.
///
/// For example, in ClickHouse:
/// ```sql
/// WITH 42 AS answer SELECT answer FROM t
/// ```
///
/// [ClickHouse](https://clickhouse.com/docs/sql-reference/statements/select/with#common-scalar-expressions)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
/// Returns true if the dialect allows a `WITH` clause item to bind a
/// scalar (or otherwise non-CTE) expression to a name, with the form
/// `<expression> AS <identifier>` — alongside or instead of the
/// traditional `<identifier> AS (<subquery>)` CTE form.
///
/// For example, in ClickHouse:
/// ```sql
/// WITH 42 AS answer SELECT answer FROM t
/// ```
///
/// [ClickHouse](https://clickhouse.com/docs/sql-reference/statements/select/with#common-scalar-expressions)
/// Returns true if the dialect supports Common Scalar Expressions in a `WITH` clause.
///
/// For example:
/// ```sql
/// WITH 42 AS answer SELECT answer FROM t
/// ```
///
/// [ClickHouse](https://clickhouse.com/docs/sql-reference/statements/select/with#common-scalar-expressions)

fn supports_with_clause_scalar_expression(&self) -> bool {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
fn supports_with_clause_scalar_expression(&self) -> bool {
fn supports_common_scalar_expressions(&self) -> bool {

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

false
}

/// Returns true if the dialect supports parenthesized multi-column
/// aliases in SELECT items. For example:
/// ```sql
Expand Down
29 changes: 28 additions & 1 deletion src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14105,7 +14105,7 @@ impl<'a> Parser<'a> {
Some(With {
with_token: with_token.clone().into(),
recursive: self.parse_keyword(Keyword::RECURSIVE),
cte_tables: self.parse_comma_separated(Parser::parse_cte)?,
items: self.parse_comma_separated(Parser::parse_with_item)?,
})
} else {
None
Expand Down Expand Up @@ -14639,6 +14639,33 @@ impl<'a> Parser<'a> {
Ok(cte)
}

/// Parse a single item in a `WITH` clause.
///
/// In standard SQL this is always a CTE (`name [(cols)] AS (query)`).
/// Dialects that enable [`Dialect::supports_with_clause_scalar_expression`]
/// — currently only ClickHouse — also accept the reversed form
/// `<expression> AS <identifier>`, which can be freely interleaved with
/// CTEs in the same comma-separated list.
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
/// Parse a single item in a `WITH` clause.
///
/// In standard SQL this is always a CTE (`name [(cols)] AS (query)`).
/// Dialects that enable [`Dialect::supports_with_clause_scalar_expression`]
/// — currently only ClickHouse — also accept the reversed form
/// `<expression> AS <identifier>`, which can be freely interleaved with
/// CTEs in the same comma-separated list.
/// Parse a single item in a `WITH` clause.

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

pub fn parse_with_item(&mut self) -> Result<WithItem, ParserError> {
if !self.dialect.supports_with_clause_scalar_expression() {
return self.parse_cte().map(WithItem::Cte);
}

// CTE form must start with an identifier. If the leading token
// can't begin one (e.g. `42`, `(SELECT …)`, `(x, y) -> …`), this
// is unambiguously the named-expression form.
if matches!(self.peek_token().token, Token::Word(_)) {
if let Some(cte) = self.maybe_parse(|p| p.parse_cte())? {
return Ok(WithItem::Cte(cte));
}
}
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can this be simplified to use peek_subquery_or_cte_start? e.g.

that we do if self.dialect.supports() and !self.peek_subquery_or_cte_start { parse expr } else { parse cte }

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

tbh, not sure i fully understand this. with that, will WITH 42 AS answer SELECT answer FROM t parse? peek_subquery_or_cte_start returns false on 42 (not (SELECT / (WITH), so we'd go to parse_cte, which then errors on 42 not being an identifier?

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah that would work right or? if we error on 42 its because the syntax was invalid and we expected a cte but got 42 instead?

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

right, i was mixing things up. can you confirm this WITH (SELECT max(x) FROM t) AS m SELECT m will work with your approach? i tried and seems that the first tokens (SELECT make peek_subquery_or_cte_start return true, which routes to parse_cte, but parse_cte immediately fails on ( since it expects an identifier.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the main goal with the comment was to simplify the cases a bit, so that ideally we have one branch handling the cte case, and another handling the cse case. Another way (there might be others) to accomplish that would be to try to parse a cte and if that fails fallback to cse e.g.

if let Some(cte) = self.maybe_parse(|parser| parser...)? else {
	return cte
} else if self.supprots() {
	return self.parse_cse
} else {
	self.expected("CTE or CSE")
}

maybe above woul be straightforward in that the cases that are covered is clearer?


let expr = self.parse_expr()?;
self.expect_keyword(Keyword::AS)?;
let alias = self.parse_identifier()?;
Ok(WithItem::Named { expr, alias })
}

/// Parse a "query body", which is an expression with roughly the
/// following grammar:
/// ```sql
Expand Down
64 changes: 64 additions & 0 deletions tests/sqlparser_clickhouse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1845,6 +1845,70 @@ fn parse_inner_array_join() {
}
}

#[test]
fn parse_with_clause_named_expression() {
// Plain literal scalar.
clickhouse().verified_stmt("WITH 42 AS answer SELECT answer FROM t");
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

for the tests, lets use all_dialects_where instead of hardcoding it to clickhouse

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.


// String literal scalar from the ClickHouse docs.
clickhouse().verified_stmt(
"WITH '2019-08-01 15:23:00' AS ts_upper_bound SELECT * FROM hits \
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

for the line breaks can we use concat like e.g. here to format the sql strings?

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fixed 4f69357

WHERE EventDate = toDate(ts_upper_bound) AND EventTime <= ts_upper_bound",
);

// Aggregate function call as a named expression.
clickhouse().verified_stmt(
"WITH sum(bytes) AS s SELECT formatReadableSize(s), \"table\" \
FROM system.parts GROUP BY \"table\" ORDER BY s",
);

// Scalar subquery as the bound expression.
clickhouse().verified_stmt(
"WITH (SELECT sum(bytes) FROM system.parts WHERE active) AS total_disk_usage \
SELECT (sum(bytes) / total_disk_usage) * 100 AS table_disk_usage, \"table\" \
FROM system.parts GROUP BY \"table\" ORDER BY table_disk_usage DESC LIMIT 10",
);

// Bare-identifier scalar — disambiguation case (`name AS alias` looks like
// a CTE prefix but the missing `(` after `AS` makes it a named expression).
clickhouse().verified_stmt("WITH user_id AS uid SELECT uid FROM t");

// Mixing a named expression with a real CTE in the same WITH list.
clickhouse().verified_stmt("WITH 1 AS one, cte AS (SELECT 1) SELECT one FROM cte");

// Lambda as the bound expression (also taken from the docs).
clickhouse().verified_stmt(
"WITH '.txt' AS extension, (id, extension) -> concat(lower(id), extension) AS gen_name \
SELECT gen_name('test', '.sql') AS file_name",
);
}

#[test]
fn parse_with_clause_named_expression_ast() {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we can merge this with the test above since they cover the same feature

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

let query = clickhouse().verified_query("WITH 42 AS answer SELECT answer FROM t");
let with = query.with.as_ref().unwrap();
assert!(!with.recursive);
assert_eq!(with.items.len(), 1);
match &with.items[0] {
WithItem::Named { expr, alias } => {
assert_eq!(alias.value, "answer");
assert!(matches!(expr, Expr::Value(_)));
}
other => panic!("expected a named expression, got {other:?}"),
}
}

#[test]
fn parse_with_clause_named_expression_unsupported_in_other_dialects() {
// The named-expression form is only enabled for ClickHouse; other
// dialects should still reject `WITH 42 AS answer …`.
let res = sqlparser::parser::Parser::parse_sql(
&GenericDialect {},
"WITH 42 AS answer SELECT answer FROM t",
);
assert!(res.is_err(), "expected parse error, got {res:?}");
}
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we can skip this, no need to test what the parser does for an invalid query for unsupported dialects

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.


fn clickhouse() -> TestedDialects {
TestedDialects::new(vec![Box::new(ClickHouseDialect {})])
}
Expand Down
42 changes: 24 additions & 18 deletions tests/sqlparser_common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7854,18 +7854,21 @@ fn parse_ctes() {

fn assert_ctes_in_select(expected: &[&str], sel: &Query) {
for (i, exp) in expected.iter().enumerate() {
let Cte { alias, query, .. } = &sel.with.as_ref().unwrap().cte_tables[i];
assert_eq!(*exp, query.to_string());
assert_eq!(false, alias.explicit);
let cte = match &sel.with.as_ref().unwrap().items[i] {
WithItem::Cte(cte) => cte,
other => panic!("expected a CTE, got {other:?}"),
};
assert_eq!(*exp, cte.query.to_string());
assert_eq!(false, cte.alias.explicit);
assert_eq!(
if i == 0 {
Ident::new("a")
} else {
Ident::new("b")
},
alias.name
cte.alias.name
);
assert!(alias.columns.is_empty());
assert!(cte.alias.columns.is_empty());
}
}

Expand Down Expand Up @@ -7898,26 +7901,29 @@ fn parse_ctes() {
// CTE in a CTE...
let sql = &format!("WITH outer_cte AS ({with}) SELECT * FROM outer_cte");
let select = verified_query(sql);
assert_ctes_in_select(&cte_sqls, &only(&select.with.unwrap().cte_tables).query);
let with = select.with.as_ref().unwrap();
let outer_cte = match only(&with.items) {
WithItem::Cte(cte) => cte,
other => panic!("expected a CTE, got {other:?}"),
};
assert_ctes_in_select(&cte_sqls, &outer_cte.query);
}

#[test]
fn parse_cte_renamed_columns() {
let sql = "WITH cte (col1, col2) AS (SELECT foo, bar FROM baz) SELECT * FROM cte";
let query = all_dialects().verified_query(sql);
let with = query.with.unwrap();
let cte = match with.items.first().unwrap() {
WithItem::Cte(cte) => cte,
other => panic!("expected a CTE, got {other:?}"),
};
assert_eq!(
vec![
TableAliasColumnDef::from_name("col1"),
TableAliasColumnDef::from_name("col2")
],
query
.with
.unwrap()
.cte_tables
.first()
.unwrap()
.alias
.columns
cte.alias.columns
);
}

Expand All @@ -7931,8 +7937,8 @@ fn parse_recursive_cte() {

let with = query.with.as_ref().unwrap();
assert!(with.recursive);
assert_eq!(with.cte_tables.len(), 1);
let expected = Cte {
assert_eq!(with.items.len(), 1);
let expected = WithItem::Cte(Cte {
alias: TableAlias {
explicit: false,
name: Ident {
Expand All @@ -7947,8 +7953,8 @@ fn parse_recursive_cte() {
from: None,
materialized: None,
closing_paren_token: AttachedToken::empty(),
};
assert_eq!(with.cte_tables.first().unwrap(), &expected);
});
assert_eq!(with.items.first().unwrap(), &expected);
}

#[test]
Expand Down