From 5a3b63bf419ff80e6acdb0b3bbdbcf664083f1d5 Mon Sep 17 00:00:00 2001 From: Andriy Romanov Date: Tue, 2 Dec 2025 21:14:34 -0800 Subject: [PATCH 001/121] Fixing location for extrenal tables (#2108) --- src/ast/ddl.rs | 4 +++- tests/sqlparser_hive.rs | 2 ++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/ast/ddl.rs b/src/ast/ddl.rs index 0df53c144a..ad3191ebcf 100644 --- a/src/ast/ddl.rs +++ b/src/ast/ddl.rs @@ -2803,7 +2803,9 @@ impl fmt::Display for CreateTable { if let Some(file_format) = self.file_format { write!(f, " STORED AS {file_format}")?; } - write!(f, " LOCATION '{}'", self.location.as_ref().unwrap())?; + if let Some(location) = &self.location { + write!(f, " LOCATION '{location}'")?; + } } match &self.table_options { diff --git a/tests/sqlparser_hive.rs b/tests/sqlparser_hive.rs index 386bab7f04..1b09485185 100644 --- a/tests/sqlparser_hive.rs +++ b/tests/sqlparser_hive.rs @@ -34,10 +34,12 @@ fn parse_table_create() { let sql = r#"CREATE TABLE IF NOT EXISTS db.table (a BIGINT, b STRING, c TIMESTAMP) PARTITIONED BY (d STRING, e TIMESTAMP) STORED AS ORC LOCATION 's3://...' TBLPROPERTIES ("prop" = "2", "asdf" = '1234', 'asdf' = "1234", "asdf" = 2)"#; let iof = r#"CREATE TABLE IF NOT EXISTS db.table (a BIGINT, b STRING, c TIMESTAMP) PARTITIONED BY (d STRING, e TIMESTAMP) STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat' LOCATION 's3://...'"#; let serdeproperties = r#"CREATE EXTERNAL TABLE IF NOT EXISTS db.table (a STRING, b STRING, c STRING) PARTITIONED BY (d STRING, e STRING) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde.config' WITH SERDEPROPERTIES ('prop_a' = 'a', 'prop_b' = 'b') STORED AS TEXTFILE LOCATION 's3://...' TBLPROPERTIES ('prop_c' = 'c')"#; + let externaltable = r#"CREATE EXTERNAL TABLE t (c INT)"#; hive().verified_stmt(sql); hive().verified_stmt(iof); hive().verified_stmt(serdeproperties); + hive().verified_stmt(externaltable); } #[test] From effaac5c394137343a1d2e87429e68a7e88c2e07 Mon Sep 17 00:00:00 2001 From: Martin Grigorov Date: Wed, 3 Dec 2025 22:01:51 +0200 Subject: [PATCH 002/121] fix: Set the current folder as a "primary" for the `find` command (#2120) --- dev/release/verify-release-candidate.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev/release/verify-release-candidate.sh b/dev/release/verify-release-candidate.sh index 089a6ffc7f..4e97c6e2a1 100755 --- a/dev/release/verify-release-candidate.sh +++ b/dev/release/verify-release-candidate.sh @@ -124,7 +124,7 @@ test_source_distribution() { cargo build cargo test --all-features - if ( find -iname 'Cargo.toml' | xargs grep SNAPSHOT ); then + if ( find . -iname 'Cargo.toml' | xargs grep SNAPSHOT ); then echo "Cargo.toml version should not contain SNAPSHOT for releases" exit 1 fi From 326f1118081807933160cb530e69dfdbf7a84b22 Mon Sep 17 00:00:00 2001 From: Luca Cappelletti Date: Thu, 4 Dec 2025 14:17:31 +0100 Subject: [PATCH 003/121] Added support for `ALTER OPERATOR` syntax (#2114) --- src/ast/ddl.rs | 133 +++++++++--- src/ast/helpers/stmt_data_loading.rs | 12 +- src/ast/mod.rs | 32 +-- src/ast/spans.rs | 2 + src/keywords.rs | 9 + src/lib.rs | 1 + src/parser/mod.rs | 289 +++++++++++++++++++++------ tests/sqlparser_postgres.rs | 249 ++++++++++++++++++++--- 8 files changed, 585 insertions(+), 142 deletions(-) diff --git a/src/ast/ddl.rs b/src/ast/ddl.rs index ad3191ebcf..3516c64a1b 100644 --- a/src/ast/ddl.rs +++ b/src/ast/ddl.rs @@ -995,6 +995,103 @@ impl fmt::Display for AlterTypeOperation { } } +/// `ALTER OPERATOR` statement +/// See +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct AlterOperator { + /// Operator name (can be schema-qualified) + pub name: ObjectName, + /// Left operand type (`None` if no left operand) + pub left_type: Option, + /// Right operand type + pub right_type: DataType, + /// The operation to perform + pub operation: AlterOperatorOperation, +} + +/// An [AlterOperator] operation +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum AlterOperatorOperation { + /// `OWNER TO { new_owner | CURRENT_ROLE | CURRENT_USER | SESSION_USER }` + OwnerTo(Owner), + /// `SET SCHEMA new_schema` + SetSchema { schema_name: ObjectName }, + /// `SET ( options )` + Set { + /// List of operator options to set + options: Vec, + }, +} + +/// Option for `ALTER OPERATOR SET` operation +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum OperatorOption { + /// `RESTRICT = { res_proc | NONE }` + Restrict(Option), + /// `JOIN = { join_proc | NONE }` + Join(Option), + /// `COMMUTATOR = com_op` + Commutator(ObjectName), + /// `NEGATOR = neg_op` + Negator(ObjectName), + /// `HASHES` + Hashes, + /// `MERGES` + Merges, +} + +impl fmt::Display for AlterOperator { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "ALTER OPERATOR {} (", self.name)?; + if let Some(left_type) = &self.left_type { + write!(f, "{}", left_type)?; + } else { + write!(f, "NONE")?; + } + write!(f, ", {}) {}", self.right_type, self.operation) + } +} + +impl fmt::Display for AlterOperatorOperation { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Self::OwnerTo(owner) => write!(f, "OWNER TO {}", owner), + Self::SetSchema { schema_name } => write!(f, "SET SCHEMA {}", schema_name), + Self::Set { options } => { + write!(f, "SET (")?; + for (i, option) in options.iter().enumerate() { + if i > 0 { + write!(f, ", ")?; + } + write!(f, "{}", option)?; + } + write!(f, ")") + } + } + } +} + +impl fmt::Display for OperatorOption { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Self::Restrict(Some(proc_name)) => write!(f, "RESTRICT = {}", proc_name), + Self::Restrict(None) => write!(f, "RESTRICT = NONE"), + Self::Join(Some(proc_name)) => write!(f, "JOIN = {}", proc_name), + Self::Join(None) => write!(f, "JOIN = NONE"), + Self::Commutator(op_name) => write!(f, "COMMUTATOR = {}", op_name), + Self::Negator(op_name) => write!(f, "NEGATOR = {}", op_name), + Self::Hashes => write!(f, "HASHES"), + Self::Merges => write!(f, "MERGES"), + } + } +} + /// An `ALTER COLUMN` (`Statement::AlterTable`) operation #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] @@ -1790,7 +1887,7 @@ impl fmt::Display for ColumnOption { GeneratedAs::Always => "ALWAYS", GeneratedAs::ByDefault => "BY DEFAULT", // ExpStored goes with an expression, handled above - GeneratedAs::ExpStored => unreachable!(), + GeneratedAs::ExpStored => "", }; write!(f, "GENERATED {when} AS IDENTITY")?; if sequence_options.is_some() { @@ -3981,18 +4078,8 @@ pub struct CreateOperator { pub left_arg: Option, /// RIGHTARG parameter (right operand type) pub right_arg: Option, - /// COMMUTATOR parameter (commutator operator) - pub commutator: Option, - /// NEGATOR parameter (negator operator) - pub negator: Option, - /// RESTRICT parameter (restriction selectivity function) - pub restrict: Option, - /// JOIN parameter (join selectivity function) - pub join: Option, - /// HASHES flag - pub hashes: bool, - /// MERGES flag - pub merges: bool, + /// Operator options (COMMUTATOR, NEGATOR, RESTRICT, JOIN, HASHES, MERGES) + pub options: Vec, } /// CREATE OPERATOR FAMILY statement @@ -4044,23 +4131,9 @@ impl fmt::Display for CreateOperator { if let Some(right_arg) = &self.right_arg { params.push(format!("RIGHTARG = {}", right_arg)); } - if let Some(commutator) = &self.commutator { - params.push(format!("COMMUTATOR = {}", commutator)); - } - if let Some(negator) = &self.negator { - params.push(format!("NEGATOR = {}", negator)); - } - if let Some(restrict) = &self.restrict { - params.push(format!("RESTRICT = {}", restrict)); - } - if let Some(join) = &self.join { - params.push(format!("JOIN = {}", join)); - } - if self.hashes { - params.push("HASHES".to_string()); - } - if self.merges { - params.push("MERGES".to_string()); + + for option in &self.options { + params.push(option.to_string()); } write!(f, "{}", params.join(", "))?; diff --git a/src/ast/helpers/stmt_data_loading.rs b/src/ast/helpers/stmt_data_loading.rs index 92a7272799..62ee77ce32 100644 --- a/src/ast/helpers/stmt_data_loading.rs +++ b/src/ast/helpers/stmt_data_loading.rs @@ -99,15 +99,15 @@ impl fmt::Display for StageParamsObject { impl fmt::Display for StageLoadSelectItem { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - if self.alias.is_some() { - write!(f, "{}.", self.alias.as_ref().unwrap())?; + if let Some(alias) = &self.alias { + write!(f, "{alias}.")?; } write!(f, "${}", self.file_col_num)?; - if self.element.is_some() { - write!(f, ":{}", self.element.as_ref().unwrap())?; + if let Some(element) = &self.element { + write!(f, ":{element}")?; } - if self.item_as.is_some() { - write!(f, " AS {}", self.item_as.as_ref().unwrap())?; + if let Some(item_as) = &self.item_as { + write!(f, " AS {item_as}")?; } Ok(()) } diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 44d50c1390..b9d947e022 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -59,23 +59,23 @@ pub use self::dcl::{ AlterRoleOperation, CreateRole, ResetConfig, RoleOption, SecondaryRoles, SetConfigValue, Use, }; pub use self::ddl::{ - Alignment, AlterColumnOperation, AlterConnectorOwner, AlterIndexOperation, - AlterPolicyOperation, AlterSchema, AlterSchemaOperation, AlterTable, AlterTableAlgorithm, - AlterTableLock, AlterTableOperation, AlterTableType, AlterType, AlterTypeAddValue, - AlterTypeAddValuePosition, AlterTypeOperation, AlterTypeRename, AlterTypeRenameValue, - ClusteredBy, ColumnDef, ColumnOption, ColumnOptionDef, ColumnOptions, ColumnPolicy, - ColumnPolicyProperty, ConstraintCharacteristics, CreateConnector, CreateDomain, + Alignment, AlterColumnOperation, AlterConnectorOwner, AlterIndexOperation, AlterOperator, + AlterOperatorOperation, AlterPolicyOperation, AlterSchema, AlterSchemaOperation, AlterTable, + AlterTableAlgorithm, AlterTableLock, AlterTableOperation, AlterTableType, AlterType, + AlterTypeAddValue, AlterTypeAddValuePosition, AlterTypeOperation, AlterTypeRename, + AlterTypeRenameValue, ClusteredBy, ColumnDef, ColumnOption, ColumnOptionDef, ColumnOptions, + ColumnPolicy, ColumnPolicyProperty, ConstraintCharacteristics, CreateConnector, CreateDomain, CreateExtension, CreateFunction, CreateIndex, CreateOperator, CreateOperatorClass, CreateOperatorFamily, CreateTable, CreateTrigger, CreateView, Deduplicate, DeferrableInitial, DropBehavior, DropExtension, DropFunction, DropOperator, DropOperatorClass, DropOperatorFamily, DropOperatorSignature, DropTrigger, GeneratedAs, GeneratedExpressionMode, IdentityParameters, IdentityProperty, IdentityPropertyFormatKind, IdentityPropertyKind, IdentityPropertyOrder, IndexColumn, IndexOption, IndexType, KeyOrIndexDisplay, Msck, NullsDistinctOption, - OperatorArgTypes, OperatorClassItem, OperatorPurpose, Owner, Partition, ProcedureParam, - ReferentialAction, RenameTableNameKind, ReplicaIdentity, TagsColumnOption, TriggerObjectKind, - Truncate, UserDefinedTypeCompositeAttributeDef, UserDefinedTypeInternalLength, - UserDefinedTypeRangeOption, UserDefinedTypeRepresentation, UserDefinedTypeSqlDefinitionOption, - UserDefinedTypeStorage, ViewColumnDef, + OperatorArgTypes, OperatorClassItem, OperatorOption, OperatorPurpose, Owner, Partition, + ProcedureParam, ReferentialAction, RenameTableNameKind, ReplicaIdentity, TagsColumnOption, + TriggerObjectKind, Truncate, UserDefinedTypeCompositeAttributeDef, + UserDefinedTypeInternalLength, UserDefinedTypeRangeOption, UserDefinedTypeRepresentation, + UserDefinedTypeSqlDefinitionOption, UserDefinedTypeStorage, ViewColumnDef, }; pub use self::dml::{Delete, Insert, Update}; pub use self::operator::{BinaryOperator, UnaryOperator}; @@ -3396,6 +3396,11 @@ pub enum Statement { /// ``` AlterType(AlterType), /// ```sql + /// ALTER OPERATOR + /// ``` + /// See [PostgreSQL](https://www.postgresql.org/docs/current/sql-alteroperator.html) + AlterOperator(AlterOperator), + /// ```sql /// ALTER ROLE /// ``` AlterRole { @@ -4971,6 +4976,7 @@ impl fmt::Display for Statement { Statement::AlterType(AlterType { name, operation }) => { write!(f, "ALTER TYPE {name} {operation}") } + Statement::AlterOperator(alter_operator) => write!(f, "{alter_operator}"), Statement::AlterRole { name, operation } => { write!(f, "ALTER ROLE {name} {operation}") } @@ -9814,8 +9820,8 @@ impl fmt::Display for ShowCharset { } else { write!(f, " CHARACTER SET")?; } - if self.filter.is_some() { - write!(f, " {}", self.filter.as_ref().unwrap())?; + if let Some(filter) = &self.filter { + write!(f, " {filter}")?; } Ok(()) } diff --git a/src/ast/spans.rs b/src/ast/spans.rs index 684cc5b028..20a0525856 100644 --- a/src/ast/spans.rs +++ b/src/ast/spans.rs @@ -252,6 +252,7 @@ impl Spanned for Values { /// - [Statement::CreateSecret] /// - [Statement::CreateRole] /// - [Statement::AlterType] +/// - [Statement::AlterOperator] /// - [Statement::AlterRole] /// - [Statement::AttachDatabase] /// - [Statement::AttachDuckDBDatabase] @@ -401,6 +402,7 @@ impl Spanned for Statement { ), // These statements need to be implemented Statement::AlterType { .. } => Span::empty(), + Statement::AlterOperator { .. } => Span::empty(), Statement::AlterRole { .. } => Span::empty(), Statement::AlterSession { .. } => Span::empty(), Statement::AttachDatabase { .. } => Span::empty(), diff --git a/src/keywords.rs b/src/keywords.rs index 827df1cee6..f06842ec6a 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -67,6 +67,15 @@ macro_rules! define_keywords { pub const ALL_KEYWORDS: &[&str] = &[ $($ident),* ]; + + impl core::fmt::Display for Keyword { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { + match self { + Keyword::NoKeyword => write!(f, "NoKeyword"), + $(Keyword::$ident => write!(f, "{}", $ident),)* + } + } + } }; } diff --git a/src/lib.rs b/src/lib.rs index dbfd1791a7..4050173caa 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -153,6 +153,7 @@ // Splitting complex nodes (expressions, statements, types) into separate types // would bloat the API and hide intent. Extra memory is a worthwhile tradeoff. #![allow(clippy::large_enum_variant)] +#![forbid(clippy::unreachable)] // Allow proc-macros to find this crate extern crate self as sqlparser; diff --git a/src/parser/mod.rs b/src/parser/mod.rs index b2fa3b169d..ce83adaf0d 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1194,7 +1194,11 @@ impl<'a> Parser<'a> { let mut id_parts: Vec = vec![match t { Token::Word(w) => w.into_ident(next_token.span), Token::SingleQuotedString(s) => Ident::with_quote('\'', s), - _ => unreachable!(), // We matched above + _ => { + return Err(ParserError::ParserError( + "Internal parser error: unexpected token type".to_string(), + )) + } }]; while self.consume_token(&Token::Period) { @@ -1641,7 +1645,11 @@ impl<'a> Parser<'a> { Token::PGSquareRoot => UnaryOperator::PGSquareRoot, Token::PGCubeRoot => UnaryOperator::PGCubeRoot, Token::AtSign => UnaryOperator::PGAbs, - _ => unreachable!(), + _ => { + return Err(ParserError::ParserError( + "Internal parser error: unexpected unary operator token".to_string(), + )) + } }; Ok(Expr::UnaryOp { op, @@ -1709,18 +1717,22 @@ impl<'a> Parser<'a> { Ok(Expr::Value(self.parse_value()?)) } Token::LParen => { - let expr = if let Some(expr) = self.try_parse_expr_sub_query()? { - expr - } else if let Some(lambda) = self.try_parse_lambda()? { - return Ok(lambda); - } else { - let exprs = self.parse_comma_separated(Parser::parse_expr)?; - match exprs.len() { - 0 => unreachable!(), // parse_comma_separated ensures 1 or more - 1 => Expr::Nested(Box::new(exprs.into_iter().next().unwrap())), - _ => Expr::Tuple(exprs), - } - }; + let expr = + if let Some(expr) = self.try_parse_expr_sub_query()? { + expr + } else if let Some(lambda) = self.try_parse_lambda()? { + return Ok(lambda); + } else { + let exprs = self.parse_comma_separated(Parser::parse_expr)?; + match exprs.len() { + 0 => return Err(ParserError::ParserError( + "Internal parser error: parse_comma_separated returned empty list" + .to_string(), + )), + 1 => Expr::Nested(Box::new(exprs.into_iter().next().unwrap())), + _ => Expr::Tuple(exprs), + } + }; self.expect_token(&Token::RParen)?; Ok(expr) } @@ -3591,7 +3603,9 @@ impl<'a> Parser<'a> { right: Box::new(right), is_some: keyword == Keyword::SOME, }, - _ => unreachable!(), + unexpected_keyword => return Err(ParserError::ParserError( + format!("Internal parser error: expected any of {{ALL, ANY, SOME}}, got {unexpected_keyword:?}"), + )), }) } else { Ok(Expr::BinaryOp { @@ -5590,13 +5604,14 @@ impl<'a> Parser<'a> { } else { None }; - let option = self - .parse_one_of_keywords(&[Keyword::CASCADE, Keyword::RESTRICT]) - .map(|keyword| match keyword { - Keyword::CASCADE => ReferentialAction::Cascade, - Keyword::RESTRICT => ReferentialAction::Restrict, - _ => unreachable!(), - }); + let option = match self.parse_one_of_keywords(&[Keyword::CASCADE, Keyword::RESTRICT]) { + Some(Keyword::CASCADE) => Some(ReferentialAction::Cascade), + Some(Keyword::RESTRICT) => Some(ReferentialAction::Restrict), + Some(unexpected_keyword) => return Err(ParserError::ParserError( + format!("Internal parser error: expected any of {{CASCADE, RESTRICT}}, got {unexpected_keyword:?}"), + )), + None => None, + }; Ok(Statement::DropTrigger(DropTrigger { if_exists, trigger_name, @@ -5646,7 +5661,9 @@ impl<'a> Parser<'a> { match self.expect_one_of_keywords(&[Keyword::ROW, Keyword::STATEMENT])? { Keyword::ROW => TriggerObject::Row, Keyword::STATEMENT => TriggerObject::Statement, - _ => unreachable!(), + unexpected_keyword => return Err(ParserError::ParserError( + format!("Internal parser error: unexpected keyword `{unexpected_keyword}` in ROW/STATEMENT"), + )), }; Some(if include_each { @@ -5709,7 +5726,9 @@ impl<'a> Parser<'a> { Keyword::INSTEAD => self .expect_keyword_is(Keyword::OF) .map(|_| TriggerPeriod::InsteadOf)?, - _ => unreachable!(), + unexpected_keyword => return Err(ParserError::ParserError( + format!("Internal parser error: unexpected keyword `{unexpected_keyword}` in trigger period"), + )), }, ) } @@ -5733,7 +5752,9 @@ impl<'a> Parser<'a> { } Keyword::DELETE => TriggerEvent::Delete, Keyword::TRUNCATE => TriggerEvent::Truncate, - _ => unreachable!(), + unexpected_keyword => return Err(ParserError::ParserError( + format!("Internal parser error: unexpected keyword `{unexpected_keyword}` in trigger event"), + )), }, ) } @@ -5767,7 +5788,9 @@ impl<'a> Parser<'a> { { Keyword::FUNCTION => TriggerExecBodyType::Function, Keyword::PROCEDURE => TriggerExecBodyType::Procedure, - _ => unreachable!(), + unexpected_keyword => return Err(ParserError::ParserError( + format!("Internal parser error: unexpected keyword `{unexpected_keyword}` in trigger exec body"), + )), }, func_desc: self.parse_function_desc()?, }) @@ -6284,7 +6307,9 @@ impl<'a> Parser<'a> { Some(Keyword::CURRENT_USER) => Owner::CurrentUser, Some(Keyword::CURRENT_ROLE) => Owner::CurrentRole, Some(Keyword::SESSION_USER) => Owner::SessionUser, - Some(_) => unreachable!(), + Some(unexpected_keyword) => return Err(ParserError::ParserError( + format!("Internal parser error: unexpected keyword `{unexpected_keyword}` in owner"), + )), None => { match self.parse_identifier() { Ok(ident) => Owner::Ident(ident), @@ -6346,7 +6371,9 @@ impl<'a> Parser<'a> { Some(match keyword { Keyword::PERMISSIVE => CreatePolicyType::Permissive, Keyword::RESTRICTIVE => CreatePolicyType::Restrictive, - _ => unreachable!(), + unexpected_keyword => return Err(ParserError::ParserError( + format!("Internal parser error: unexpected keyword `{unexpected_keyword}` in policy type"), + )), }) } else { None @@ -6366,7 +6393,9 @@ impl<'a> Parser<'a> { Keyword::INSERT => CreatePolicyCommand::Insert, Keyword::UPDATE => CreatePolicyCommand::Update, Keyword::DELETE => CreatePolicyCommand::Delete, - _ => unreachable!(), + unexpected_keyword => return Err(ParserError::ParserError( + format!("Internal parser error: unexpected keyword `{unexpected_keyword}` in policy command"), + )), }) } else { None @@ -6479,12 +6508,7 @@ impl<'a> Parser<'a> { let mut is_procedure = false; let mut left_arg: Option = None; let mut right_arg: Option = None; - let mut commutator: Option = None; - let mut negator: Option = None; - let mut restrict: Option = None; - let mut join: Option = None; - let mut hashes = false; - let mut merges = false; + let mut options: Vec = Vec::new(); loop { let keyword = self.expect_one_of_keywords(&[ @@ -6501,11 +6525,11 @@ impl<'a> Parser<'a> { ])?; match keyword { - Keyword::HASHES if !hashes => { - hashes = true; + Keyword::HASHES if !options.iter().any(|o| matches!(o, OperatorOption::Hashes)) => { + options.push(OperatorOption::Hashes); } - Keyword::MERGES if !merges => { - merges = true; + Keyword::MERGES if !options.iter().any(|o| matches!(o, OperatorOption::Merges)) => { + options.push(OperatorOption::Merges); } Keyword::FUNCTION | Keyword::PROCEDURE if function.is_none() => { self.expect_token(&Token::Eq)?; @@ -6520,33 +6544,49 @@ impl<'a> Parser<'a> { self.expect_token(&Token::Eq)?; right_arg = Some(self.parse_data_type()?); } - Keyword::COMMUTATOR if commutator.is_none() => { + Keyword::COMMUTATOR + if !options + .iter() + .any(|o| matches!(o, OperatorOption::Commutator(_))) => + { self.expect_token(&Token::Eq)?; if self.parse_keyword(Keyword::OPERATOR) { self.expect_token(&Token::LParen)?; - commutator = Some(self.parse_operator_name()?); + let op = self.parse_operator_name()?; self.expect_token(&Token::RParen)?; + options.push(OperatorOption::Commutator(op)); } else { - commutator = Some(self.parse_operator_name()?); + options.push(OperatorOption::Commutator(self.parse_operator_name()?)); } } - Keyword::NEGATOR if negator.is_none() => { + Keyword::NEGATOR + if !options + .iter() + .any(|o| matches!(o, OperatorOption::Negator(_))) => + { self.expect_token(&Token::Eq)?; if self.parse_keyword(Keyword::OPERATOR) { self.expect_token(&Token::LParen)?; - negator = Some(self.parse_operator_name()?); + let op = self.parse_operator_name()?; self.expect_token(&Token::RParen)?; + options.push(OperatorOption::Negator(op)); } else { - negator = Some(self.parse_operator_name()?); + options.push(OperatorOption::Negator(self.parse_operator_name()?)); } } - Keyword::RESTRICT if restrict.is_none() => { + Keyword::RESTRICT + if !options + .iter() + .any(|o| matches!(o, OperatorOption::Restrict(_))) => + { self.expect_token(&Token::Eq)?; - restrict = Some(self.parse_object_name(false)?); + options.push(OperatorOption::Restrict(Some( + self.parse_object_name(false)?, + ))); } - Keyword::JOIN if join.is_none() => { + Keyword::JOIN if !options.iter().any(|o| matches!(o, OperatorOption::Join(_))) => { self.expect_token(&Token::Eq)?; - join = Some(self.parse_object_name(false)?); + options.push(OperatorOption::Join(Some(self.parse_object_name(false)?))); } _ => { return Err(ParserError::ParserError(format!( @@ -6575,12 +6615,7 @@ impl<'a> Parser<'a> { is_procedure, left_arg, right_arg, - commutator, - negator, - restrict, - join, - hashes, - merges, + options, })) } @@ -6997,7 +7032,9 @@ impl<'a> Parser<'a> { match keyword { Keyword::WITH => Some(true), Keyword::WITHOUT => Some(false), - _ => unreachable!(), + unexpected_keyword => return Err(ParserError::ParserError( + format!("Internal parser error: unexpected keyword `{unexpected_keyword}` in cursor hold"), + )), } } None => None, @@ -9764,7 +9801,9 @@ impl<'a> Parser<'a> { Keyword::PART => Ok(Partition::Part(self.parse_expr()?)), Keyword::PARTITION => Ok(Partition::Expr(self.parse_expr()?)), // unreachable because expect_one_of_keywords used above - _ => unreachable!(), + unexpected_keyword => Err(ParserError::ParserError( + format!("Internal parser error: expected any of {{PART, PARTITION}}, got {unexpected_keyword:?}"), + )), } } @@ -9780,6 +9819,7 @@ impl<'a> Parser<'a> { Keyword::ICEBERG, Keyword::SCHEMA, Keyword::USER, + Keyword::OPERATOR, ])?; match object_type { Keyword::SCHEMA => { @@ -9812,12 +9852,15 @@ impl<'a> Parser<'a> { operation, }) } + Keyword::OPERATOR => self.parse_alter_operator(), Keyword::ROLE => self.parse_alter_role(), Keyword::POLICY => self.parse_alter_policy(), Keyword::CONNECTOR => self.parse_alter_connector(), Keyword::USER => self.parse_alter_user(), // unreachable because expect_one_of_keywords used above - _ => unreachable!(), + unexpected_keyword => Err(ParserError::ParserError( + format!("Internal parser error: expected any of {{VIEW, TYPE, TABLE, INDEX, ROLE, POLICY, CONNECTOR, ICEBERG, SCHEMA, USER, OPERATOR}}, got {unexpected_keyword:?}"), + )), } } @@ -9931,6 +9974,116 @@ impl<'a> Parser<'a> { } } + /// Parse a [Statement::AlterOperator] + /// + /// [PostgreSQL Documentation](https://www.postgresql.org/docs/current/sql-alteroperator.html) + pub fn parse_alter_operator(&mut self) -> Result { + let name = self.parse_operator_name()?; + + // Parse (left_type, right_type) + self.expect_token(&Token::LParen)?; + + let left_type = if self.parse_keyword(Keyword::NONE) { + None + } else { + Some(self.parse_data_type()?) + }; + + self.expect_token(&Token::Comma)?; + let right_type = self.parse_data_type()?; + self.expect_token(&Token::RParen)?; + + // Parse the operation + let operation = if self.parse_keywords(&[Keyword::OWNER, Keyword::TO]) { + let owner = if self.parse_keyword(Keyword::CURRENT_ROLE) { + Owner::CurrentRole + } else if self.parse_keyword(Keyword::CURRENT_USER) { + Owner::CurrentUser + } else if self.parse_keyword(Keyword::SESSION_USER) { + Owner::SessionUser + } else { + Owner::Ident(self.parse_identifier()?) + }; + AlterOperatorOperation::OwnerTo(owner) + } else if self.parse_keywords(&[Keyword::SET, Keyword::SCHEMA]) { + let schema_name = self.parse_object_name(false)?; + AlterOperatorOperation::SetSchema { schema_name } + } else if self.parse_keyword(Keyword::SET) { + self.expect_token(&Token::LParen)?; + + let mut options = Vec::new(); + loop { + let keyword = self.expect_one_of_keywords(&[ + Keyword::RESTRICT, + Keyword::JOIN, + Keyword::COMMUTATOR, + Keyword::NEGATOR, + Keyword::HASHES, + Keyword::MERGES, + ])?; + + match keyword { + Keyword::RESTRICT => { + self.expect_token(&Token::Eq)?; + let proc_name = if self.parse_keyword(Keyword::NONE) { + None + } else { + Some(self.parse_object_name(false)?) + }; + options.push(OperatorOption::Restrict(proc_name)); + } + Keyword::JOIN => { + self.expect_token(&Token::Eq)?; + let proc_name = if self.parse_keyword(Keyword::NONE) { + None + } else { + Some(self.parse_object_name(false)?) + }; + options.push(OperatorOption::Join(proc_name)); + } + Keyword::COMMUTATOR => { + self.expect_token(&Token::Eq)?; + let op_name = self.parse_operator_name()?; + options.push(OperatorOption::Commutator(op_name)); + } + Keyword::NEGATOR => { + self.expect_token(&Token::Eq)?; + let op_name = self.parse_operator_name()?; + options.push(OperatorOption::Negator(op_name)); + } + Keyword::HASHES => { + options.push(OperatorOption::Hashes); + } + Keyword::MERGES => { + options.push(OperatorOption::Merges); + } + unexpected_keyword => return Err(ParserError::ParserError( + format!("Internal parser error: unexpected keyword `{unexpected_keyword}` in operator option"), + )), + } + + if !self.consume_token(&Token::Comma) { + break; + } + } + + self.expect_token(&Token::RParen)?; + AlterOperatorOperation::Set { options } + } else { + return self.expected_ref( + "OWNER TO, SET SCHEMA, or SET after ALTER OPERATOR", + self.peek_token_ref(), + ); + }; + + Ok(Statement::AlterOperator(AlterOperator { + name, + left_type, + right_type, + operation, + })) + } + // Parse a [Statement::AlterSchema] // ALTER SCHEMA [ IF EXISTS ] schema_name pub fn parse_alter_schema(&mut self) -> Result { @@ -14175,7 +14328,9 @@ impl<'a> Parser<'a> { table = match kw { Keyword::PIVOT => self.parse_pivot_table_factor(table)?, Keyword::UNPIVOT => self.parse_unpivot_table_factor(table)?, - _ => unreachable!(), + unexpected_keyword => return Err(ParserError::ParserError( + format!("Internal parser error: unexpected keyword `{unexpected_keyword}` in pivot/unpivot"), + )), } } return Ok(table); @@ -14433,7 +14588,9 @@ impl<'a> Parser<'a> { table = match kw { Keyword::PIVOT => self.parse_pivot_table_factor(table)?, Keyword::UNPIVOT => self.parse_unpivot_table_factor(table)?, - _ => unreachable!(), + unexpected_keyword => return Err(ParserError::ParserError( + format!("Internal parser error: unexpected keyword `{unexpected_keyword}` in pivot/unpivot"), + )), } } @@ -15532,7 +15689,9 @@ impl<'a> Parser<'a> { } } Some(Keyword::TABLE) | None => Some(GrantObjects::Tables(objects?)), - _ => unreachable!(), + Some(unexpected_keyword) => return Err(ParserError::ParserError( + format!("Internal parser error: unexpected keyword `{unexpected_keyword}` in grant objects"), + )), } } } else { @@ -16402,7 +16561,9 @@ impl<'a> Parser<'a> { let kind = match self.expect_one_of_keywords(&[Keyword::MIN, Keyword::MAX])? { Keyword::MIN => HavingBoundKind::Min, Keyword::MAX => HavingBoundKind::Max, - _ => unreachable!(), + unexpected_keyword => return Err(ParserError::ParserError( + format!("Internal parser error: unexpected keyword `{unexpected_keyword}` in having bound"), + )), }; clauses.push(FunctionArgumentClause::Having(HavingBound( kind, @@ -16930,7 +17091,9 @@ impl<'a> Parser<'a> { let lock_type = match self.expect_one_of_keywords(&[Keyword::UPDATE, Keyword::SHARE])? { Keyword::UPDATE => LockType::Update, Keyword::SHARE => LockType::Share, - _ => unreachable!(), + unexpected_keyword => return Err(ParserError::ParserError( + format!("Internal parser error: expected any of {{UPDATE, SHARE}}, got {unexpected_keyword:?}"), + )), }; let of = if self.parse_keyword(Keyword::OF) { Some(self.parse_object_name(false)?) diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 96e0414571..11512cf803 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -6715,24 +6715,26 @@ fn parse_create_operator() { length: 255, unit: None }))), - commutator: Some(ObjectName::from(vec![ - Ident::new("schema"), - Ident::new(">") - ])), - negator: Some(ObjectName::from(vec![ - Ident::new("schema"), - Ident::new("<=") - ])), - restrict: Some(ObjectName::from(vec![ - Ident::new("myschema"), - Ident::new("sel_func") - ])), - join: Some(ObjectName::from(vec![ - Ident::new("myschema"), - Ident::new("join_func") - ])), - hashes: true, - merges: true, + options: vec![ + OperatorOption::Commutator(ObjectName::from(vec![ + Ident::new("schema"), + Ident::new(">") + ])), + OperatorOption::Negator(ObjectName::from(vec![ + Ident::new("schema"), + Ident::new("<=") + ])), + OperatorOption::Restrict(Some(ObjectName::from(vec![ + Ident::new("myschema"), + Ident::new("sel_func") + ]))), + OperatorOption::Join(Some(ObjectName::from(vec![ + Ident::new("myschema"), + Ident::new("join_func") + ]))), + OperatorOption::Hashes, + OperatorOption::Merges, + ], }) ); @@ -6748,12 +6750,7 @@ fn parse_create_operator() { is_procedure: false, left_arg: None, right_arg: None, - commutator: None, - negator: None, - restrict: None, - join: None, - hashes: false, - merges: false, + options: vec![], }) ); } @@ -6778,13 +6775,9 @@ fn parse_create_operator() { ), ] { match pg().verified_stmt(&format!("CREATE OPERATOR {name} (FUNCTION = f)")) { - Statement::CreateOperator(CreateOperator { - name, - hashes: false, - merges: false, - .. - }) => { + Statement::CreateOperator(CreateOperator { name, options, .. }) => { assert_eq!(name, expected_name); + assert!(options.is_empty()); } _ => unreachable!(), } @@ -6920,6 +6913,202 @@ fn parse_drop_operator() { assert!(pg().parse_sql_statements(sql).is_err()); } +#[test] +fn parse_alter_operator() { + use sqlparser::ast::{AlterOperator, AlterOperatorOperation, OperatorOption, Owner}; + + // Test ALTER OPERATOR ... OWNER TO with different owner types + for (owner_sql, owner_ast) in [ + ("joe", Owner::Ident(Ident::new("joe"))), + ("CURRENT_USER", Owner::CurrentUser), + ("CURRENT_ROLE", Owner::CurrentRole), + ("SESSION_USER", Owner::SessionUser), + ] { + for (op_name, op_name_ast, left_type_sql, left_type_ast, right_type_sql, right_type_ast) in [ + ( + "+", + ObjectName::from(vec![Ident::new("+")]), + "INTEGER", + Some(DataType::Integer(None)), + "INTEGER", + DataType::Integer(None), + ), + ( + "~", + ObjectName::from(vec![Ident::new("~")]), + "NONE", + None, + "BIT", + DataType::Bit(None), + ), + ( + "@@", + ObjectName::from(vec![Ident::new("@@")]), + "TEXT", + Some(DataType::Text), + "TEXT", + DataType::Text, + ), + ] { + let sql = format!( + "ALTER OPERATOR {} ({}, {}) OWNER TO {}", + op_name, left_type_sql, right_type_sql, owner_sql + ); + assert_eq!( + pg_and_generic().verified_stmt(&sql), + Statement::AlterOperator(AlterOperator { + name: op_name_ast.clone(), + left_type: left_type_ast.clone(), + right_type: right_type_ast.clone(), + operation: AlterOperatorOperation::OwnerTo(owner_ast.clone()), + }) + ); + } + } + + // Test ALTER OPERATOR ... SET SCHEMA + for (op_name, op_name_ast, schema_name, schema_name_ast) in [ + ( + "+", + ObjectName::from(vec![Ident::new("+")]), + "new_schema", + ObjectName::from(vec![Ident::new("new_schema")]), + ), + ( + "myschema.@@", + ObjectName::from(vec![Ident::new("myschema"), Ident::new("@@")]), + "other_schema", + ObjectName::from(vec![Ident::new("other_schema")]), + ), + ] { + let sql = format!( + "ALTER OPERATOR {} (TEXT, TEXT) SET SCHEMA {}", + op_name, schema_name + ); + assert_eq!( + pg_and_generic().verified_stmt(&sql), + Statement::AlterOperator(AlterOperator { + name: op_name_ast, + left_type: Some(DataType::Text), + right_type: DataType::Text, + operation: AlterOperatorOperation::SetSchema { + schema_name: schema_name_ast, + }, + }) + ); + } + + // Test ALTER OPERATOR ... SET with RESTRICT and JOIN + for (restrict_val, restrict_ast, join_val, join_ast) in [ + ( + "_int_contsel", + Some(ObjectName::from(vec![Ident::new("_int_contsel")])), + "_int_contjoinsel", + Some(ObjectName::from(vec![Ident::new("_int_contjoinsel")])), + ), + ( + "NONE", + None, + "my_joinsel", + Some(ObjectName::from(vec![Ident::new("my_joinsel")])), + ), + ( + "my_sel", + Some(ObjectName::from(vec![Ident::new("my_sel")])), + "NONE", + None, + ), + ] { + let sql = format!( + "ALTER OPERATOR && (TEXT, TEXT) SET (RESTRICT = {}, JOIN = {})", + restrict_val, join_val + ); + assert_eq!( + pg_and_generic().verified_stmt(&sql), + Statement::AlterOperator(AlterOperator { + name: ObjectName::from(vec![Ident::new("&&")]), + left_type: Some(DataType::Text), + right_type: DataType::Text, + operation: AlterOperatorOperation::Set { + options: vec![ + OperatorOption::Restrict(restrict_ast), + OperatorOption::Join(join_ast), + ], + }, + }) + ); + } + + // Test ALTER OPERATOR ... SET with COMMUTATOR and NEGATOR + for (operator, commutator, negator) in [("&&", "&&", ">"), ("+", "+", "-"), ("<", "<", ">=")] { + let sql = format!( + "ALTER OPERATOR {} (INTEGER, INTEGER) SET (COMMUTATOR = {}, NEGATOR = {})", + operator, commutator, negator + ); + assert_eq!( + pg_and_generic().verified_stmt(&sql), + Statement::AlterOperator(AlterOperator { + name: ObjectName::from(vec![Ident::new(operator)]), + left_type: Some(DataType::Integer(None)), + right_type: DataType::Integer(None), + operation: AlterOperatorOperation::Set { + options: vec![ + OperatorOption::Commutator(ObjectName::from(vec![Ident::new(commutator)])), + OperatorOption::Negator(ObjectName::from(vec![Ident::new(negator)])), + ], + }, + }) + ); + } + + // Test ALTER OPERATOR ... SET with HASHES and MERGES (individually and combined) + for (operator, options_sql, options_ast) in [ + ("=", "HASHES", vec![OperatorOption::Hashes]), + ("<", "MERGES", vec![OperatorOption::Merges]), + ( + "<=", + "HASHES, MERGES", + vec![OperatorOption::Hashes, OperatorOption::Merges], + ), + ] { + let sql = format!( + "ALTER OPERATOR {} (INTEGER, INTEGER) SET ({})", + operator, options_sql + ); + assert_eq!( + pg_and_generic().verified_stmt(&sql), + Statement::AlterOperator(AlterOperator { + name: ObjectName::from(vec![Ident::new(operator)]), + left_type: Some(DataType::Integer(None)), + right_type: DataType::Integer(None), + operation: AlterOperatorOperation::Set { + options: options_ast + }, + }) + ); + } + + // Test ALTER OPERATOR ... SET with multiple options combined + let sql = + "ALTER OPERATOR + (INTEGER, INTEGER) SET (COMMUTATOR = +, NEGATOR = -, HASHES, MERGES)"; + assert_eq!( + pg_and_generic().verified_stmt(sql), + Statement::AlterOperator(AlterOperator { + name: ObjectName::from(vec![Ident::new("+")]), + left_type: Some(DataType::Integer(None)), + right_type: DataType::Integer(None), + operation: AlterOperatorOperation::Set { + options: vec![ + OperatorOption::Commutator(ObjectName::from(vec![Ident::new("+")])), + OperatorOption::Negator(ObjectName::from(vec![Ident::new("-")])), + OperatorOption::Hashes, + OperatorOption::Merges, + ], + }, + }) + ); +} + #[test] fn parse_drop_operator_family() { for if_exists in [true, false] { From ca2d333dff851d344ce5b307c6ec4911a707703e Mon Sep 17 00:00:00 2001 From: xitep Date: Sat, 6 Dec 2025 08:59:50 +0100 Subject: [PATCH 004/121] Oracle: Support for MERGE predicates (#2101) Co-authored-by: Ifeanyi Ubah --- src/ast/dml.rs | 340 +++++++++++++++++++++++++++++++++++- src/ast/mod.rs | 301 ++----------------------------- src/ast/spans.rs | 119 +++++++++---- src/dialect/mod.rs | 2 +- src/parser/merge.rs | 242 +++++++++++++++++++++++++ src/parser/mod.rs | 190 +------------------- tests/sqlparser_bigquery.rs | 39 +++-- tests/sqlparser_common.rs | 60 ++++++- 8 files changed, 759 insertions(+), 534 deletions(-) create mode 100644 src/parser/merge.rs diff --git a/src/ast/dml.rs b/src/ast/dml.rs index d6009ce8ab..d740b140e8 100644 --- a/src/ast/dml.rs +++ b/src/ast/dml.rs @@ -24,13 +24,16 @@ use serde::{Deserialize, Serialize}; #[cfg(feature = "visitor")] use sqlparser_derive::{Visit, VisitMut}; -use crate::display_utils::{indented_list, Indent, SpaceOrNewline}; +use crate::{ + ast::display_separated, + display_utils::{indented_list, Indent, SpaceOrNewline}, +}; use super::{ display_comma_separated, helpers::attached_token::AttachedToken, query::InputFormatClause, Assignment, Expr, FromTable, Ident, InsertAliases, MysqlInsertPriority, ObjectName, OnInsert, - OrderByExpr, Query, SelectItem, Setting, SqliteOnConflict, TableObject, TableWithJoins, - UpdateTableFromKind, + OrderByExpr, Query, SelectInto, SelectItem, Setting, SqliteOnConflict, TableFactor, + TableObject, TableWithJoins, UpdateTableFromKind, Values, }; /// INSERT statement. @@ -310,3 +313,334 @@ impl Display for Update { Ok(()) } } + +/// A `MERGE` statement. +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct Merge { + /// The `MERGE` token that starts the statement. + pub merge_token: AttachedToken, + /// optional INTO keyword + pub into: bool, + /// Specifies the table to merge + pub table: TableFactor, + /// Specifies the table or subquery to join with the target table + pub source: TableFactor, + /// Specifies the expression on which to join the target table and source + pub on: Box, + /// Specifies the actions to perform when values match or do not match. + pub clauses: Vec, + // Specifies the output to save changes in MSSQL + pub output: Option, +} + +impl Display for Merge { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "MERGE{int} {table} USING {source} ", + int = if self.into { " INTO" } else { "" }, + table = self.table, + source = self.source, + )?; + write!(f, "ON {on} ", on = self.on)?; + write!(f, "{}", display_separated(&self.clauses, " "))?; + if let Some(ref output) = self.output { + write!(f, " {output}")?; + } + Ok(()) + } +} + +/// A `WHEN` clause within a `MERGE` Statement +/// +/// Example: +/// ```sql +/// WHEN NOT MATCHED BY SOURCE AND product LIKE '%washer%' THEN DELETE +/// ``` +/// [Snowflake](https://docs.snowflake.com/en/sql-reference/sql/merge) +/// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#merge_statement) +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct MergeClause { + /// The `WHEN` token that starts the sub-expression. + pub when_token: AttachedToken, + pub clause_kind: MergeClauseKind, + pub predicate: Option, + pub action: MergeAction, +} + +impl Display for MergeClause { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let MergeClause { + when_token: _, + clause_kind, + predicate, + action, + } = self; + + write!(f, "WHEN {clause_kind}")?; + if let Some(pred) = predicate { + write!(f, " AND {pred}")?; + } + write!(f, " THEN {action}") + } +} + +/// Variant of `WHEN` clause used within a `MERGE` Statement. +/// +/// Example: +/// ```sql +/// MERGE INTO T USING U ON FALSE WHEN MATCHED THEN DELETE +/// ``` +/// [Snowflake](https://docs.snowflake.com/en/sql-reference/sql/merge) +/// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#merge_statement) +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum MergeClauseKind { + /// `WHEN MATCHED` + Matched, + /// `WHEN NOT MATCHED` + NotMatched, + /// `WHEN MATCHED BY TARGET` + /// + /// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#merge_statement) + NotMatchedByTarget, + /// `WHEN MATCHED BY SOURCE` + /// + /// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#merge_statement) + NotMatchedBySource, +} + +impl Display for MergeClauseKind { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + MergeClauseKind::Matched => write!(f, "MATCHED"), + MergeClauseKind::NotMatched => write!(f, "NOT MATCHED"), + MergeClauseKind::NotMatchedByTarget => write!(f, "NOT MATCHED BY TARGET"), + MergeClauseKind::NotMatchedBySource => write!(f, "NOT MATCHED BY SOURCE"), + } + } +} + +/// Underlying statement of a `WHEN` clause within a `MERGE` Statement +/// +/// Example +/// ```sql +/// INSERT (product, quantity) VALUES(product, quantity) +/// ``` +/// +/// [Snowflake](https://docs.snowflake.com/en/sql-reference/sql/merge) +/// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#merge_statement) +/// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/MERGE.html) +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum MergeAction { + /// An `INSERT` clause + /// + /// Example: + /// ```sql + /// INSERT (product, quantity) VALUES(product, quantity) + /// ``` + Insert(MergeInsertExpr), + /// An `UPDATE` clause + /// + /// Example: + /// ```sql + /// UPDATE SET quantity = T.quantity + S.quantity + /// ``` + Update(MergeUpdateExpr), + /// A plain `DELETE` clause + Delete { + /// The `DELETE` token that starts the sub-expression. + delete_token: AttachedToken, + }, +} + +impl Display for MergeAction { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + MergeAction::Insert(insert) => { + write!(f, "INSERT {insert}") + } + MergeAction::Update(update) => { + write!(f, "UPDATE {update}") + } + MergeAction::Delete { .. } => { + write!(f, "DELETE") + } + } + } +} + +/// The type of expression used to insert rows within a `MERGE` statement. +/// +/// [Snowflake](https://docs.snowflake.com/en/sql-reference/sql/merge) +/// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#merge_statement) +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum MergeInsertKind { + /// The insert expression is defined from an explicit `VALUES` clause + /// + /// Example: + /// ```sql + /// INSERT VALUES(product, quantity) + /// ``` + Values(Values), + /// The insert expression is defined using only the `ROW` keyword. + /// + /// Example: + /// ```sql + /// INSERT ROW + /// ``` + /// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#merge_statement) + Row, +} + +impl Display for MergeInsertKind { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + MergeInsertKind::Values(values) => { + write!(f, "{values}") + } + MergeInsertKind::Row => { + write!(f, "ROW") + } + } + } +} + +/// The expression used to insert rows within a `MERGE` statement. +/// +/// Examples +/// ```sql +/// INSERT (product, quantity) VALUES(product, quantity) +/// INSERT ROW +/// ``` +/// +/// [Snowflake](https://docs.snowflake.com/en/sql-reference/sql/merge) +/// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#merge_statement) +/// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/MERGE.html) +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct MergeInsertExpr { + /// The `INSERT` token that starts the sub-expression. + pub insert_token: AttachedToken, + /// Columns (if any) specified by the insert. + /// + /// Example: + /// ```sql + /// INSERT (product, quantity) VALUES(product, quantity) + /// INSERT (product, quantity) ROW + /// ``` + pub columns: Vec, + /// The token, `[VALUES | ROW]` starting `kind`. + pub kind_token: AttachedToken, + /// The insert type used by the statement. + pub kind: MergeInsertKind, + /// An optional condition to restrict the insertion (Oracle specific) + pub insert_predicate: Option, +} + +impl Display for MergeInsertExpr { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + if !self.columns.is_empty() { + write!(f, "({}) ", display_comma_separated(self.columns.as_slice()))?; + } + write!(f, "{}", self.kind)?; + if let Some(predicate) = self.insert_predicate.as_ref() { + write!(f, " WHERE {}", predicate)?; + } + Ok(()) + } +} + +/// The expression used to update rows within a `MERGE` statement. +/// +/// Examples +/// ```sql +/// UPDATE SET quantity = T.quantity + S.quantity +/// ``` +/// +/// [Snowflake](https://docs.snowflake.com/en/sql-reference/sql/merge) +/// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#merge_statement) +/// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/MERGE.html) +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct MergeUpdateExpr { + /// The `UPDATE` token that starts the sub-expression. + pub update_token: AttachedToken, + /// The update assiment expressions + pub assignments: Vec, + /// `where_clause` for the update (Oralce specific) + pub update_predicate: Option, + /// `delete_clause` for the update "delete where" (Oracle specific) + pub delete_predicate: Option, +} + +impl Display for MergeUpdateExpr { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "SET {}", display_comma_separated(&self.assignments))?; + if let Some(predicate) = self.update_predicate.as_ref() { + write!(f, " WHERE {predicate}")?; + } + if let Some(predicate) = self.delete_predicate.as_ref() { + write!(f, " DELETE WHERE {predicate}")?; + } + Ok(()) + } +} + +/// A `OUTPUT` Clause in the end of a `MERGE` Statement +/// +/// Example: +/// OUTPUT $action, deleted.* INTO dbo.temp_products; +/// [mssql](https://learn.microsoft.com/en-us/sql/t-sql/queries/output-clause-transact-sql) +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum OutputClause { + Output { + output_token: AttachedToken, + select_items: Vec, + into_table: Option, + }, + Returning { + returning_token: AttachedToken, + select_items: Vec, + }, +} + +impl fmt::Display for OutputClause { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + OutputClause::Output { + output_token: _, + select_items, + into_table, + } => { + f.write_str("OUTPUT ")?; + display_comma_separated(select_items).fmt(f)?; + if let Some(into_table) = into_table { + f.write_str(" ")?; + into_table.fmt(f)?; + } + Ok(()) + } + OutputClause::Returning { + returning_token: _, + select_items, + } => { + f.write_str("RETURNING ")?; + display_comma_separated(select_items).fmt(f) + } + } + } +} diff --git a/src/ast/mod.rs b/src/ast/mod.rs index b9d947e022..6cb4c33605 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -77,7 +77,10 @@ pub use self::ddl::{ UserDefinedTypeInternalLength, UserDefinedTypeRangeOption, UserDefinedTypeRepresentation, UserDefinedTypeSqlDefinitionOption, UserDefinedTypeStorage, ViewColumnDef, }; -pub use self::dml::{Delete, Insert, Update}; +pub use self::dml::{ + Delete, Insert, Merge, MergeAction, MergeClause, MergeClauseKind, MergeInsertExpr, + MergeInsertKind, MergeUpdateExpr, OutputClause, Update, +}; pub use self::operator::{BinaryOperator, UnaryOperator}; pub use self::query::{ AfterMatchSkip, ConnectBy, Cte, CteAsMaterialized, Distinct, EmptyMatchesMode, @@ -344,6 +347,12 @@ impl From> for ObjectName { } } +impl From for ObjectName { + fn from(ident: Ident) -> Self { + ObjectName(vec![ObjectNamePart::Identifier(ident)]) + } +} + impl fmt::Display for ObjectName { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "{}", display_separated(&self.0, ".")) @@ -4087,22 +4096,7 @@ pub enum Statement { /// [Snowflake](https://docs.snowflake.com/en/sql-reference/sql/merge) /// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#merge_statement) /// [MSSQL](https://learn.microsoft.com/en-us/sql/t-sql/statements/merge-transact-sql?view=sql-server-ver16) - Merge { - /// The `MERGE` token that starts the statement. - merge_token: AttachedToken, - /// optional INTO keyword - into: bool, - /// Specifies the table to merge - table: TableFactor, - /// Specifies the table or subquery to join with the target table - source: TableFactor, - /// Specifies the expression on which to join the target table and source - on: Box, - /// Specifies the actions to perform when values match or do not match. - clauses: Vec, - // Specifies the output to save changes in MSSQL - output: Option, - }, + Merge(Merge), /// ```sql /// CACHE [ FLAG ] TABLE [ OPTIONS('K1' = 'V1', 'K2' = V2) ] [ AS ] [ ] /// ``` @@ -5520,27 +5514,7 @@ impl fmt::Display for Statement { Statement::ReleaseSavepoint { name } => { write!(f, "RELEASE SAVEPOINT {name}") } - Statement::Merge { - merge_token: _, - into, - table, - source, - on, - clauses, - output, - } => { - write!( - f, - "MERGE{int} {table} USING {source} ", - int = if *into { " INTO" } else { "" } - )?; - write!(f, "ON {on} ")?; - write!(f, "{}", display_separated(clauses, " "))?; - if let Some(output) = output { - write!(f, " {output}")?; - } - Ok(()) - } + Statement::Merge(merge) => merge.fmt(f), Statement::Cache { table_name, table_flag, @@ -8565,257 +8539,6 @@ impl fmt::Display for CopyLegacyCsvOption { } } -/// Variant of `WHEN` clause used within a `MERGE` Statement. -/// -/// Example: -/// ```sql -/// MERGE INTO T USING U ON FALSE WHEN MATCHED THEN DELETE -/// ``` -/// [Snowflake](https://docs.snowflake.com/en/sql-reference/sql/merge) -/// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#merge_statement) -#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] -#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] -#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] -pub enum MergeClauseKind { - /// `WHEN MATCHED` - Matched, - /// `WHEN NOT MATCHED` - NotMatched, - /// `WHEN MATCHED BY TARGET` - /// - /// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#merge_statement) - NotMatchedByTarget, - /// `WHEN MATCHED BY SOURCE` - /// - /// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#merge_statement) - NotMatchedBySource, -} - -impl Display for MergeClauseKind { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self { - MergeClauseKind::Matched => write!(f, "MATCHED"), - MergeClauseKind::NotMatched => write!(f, "NOT MATCHED"), - MergeClauseKind::NotMatchedByTarget => write!(f, "NOT MATCHED BY TARGET"), - MergeClauseKind::NotMatchedBySource => write!(f, "NOT MATCHED BY SOURCE"), - } - } -} - -/// The type of expression used to insert rows within a `MERGE` statement. -/// -/// [Snowflake](https://docs.snowflake.com/en/sql-reference/sql/merge) -/// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#merge_statement) -#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] -#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] -#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] -pub enum MergeInsertKind { - /// The insert expression is defined from an explicit `VALUES` clause - /// - /// Example: - /// ```sql - /// INSERT VALUES(product, quantity) - /// ``` - Values(Values), - /// The insert expression is defined using only the `ROW` keyword. - /// - /// Example: - /// ```sql - /// INSERT ROW - /// ``` - /// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#merge_statement) - Row, -} - -impl Display for MergeInsertKind { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self { - MergeInsertKind::Values(values) => { - write!(f, "{values}") - } - MergeInsertKind::Row => { - write!(f, "ROW") - } - } - } -} - -/// The expression used to insert rows within a `MERGE` statement. -/// -/// Examples -/// ```sql -/// INSERT (product, quantity) VALUES(product, quantity) -/// INSERT ROW -/// ``` -/// -/// [Snowflake](https://docs.snowflake.com/en/sql-reference/sql/merge) -/// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#merge_statement) -#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] -#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] -#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] -pub struct MergeInsertExpr { - /// The `INSERT` token that starts the sub-expression. - pub insert_token: AttachedToken, - /// Columns (if any) specified by the insert. - /// - /// Example: - /// ```sql - /// INSERT (product, quantity) VALUES(product, quantity) - /// INSERT (product, quantity) ROW - /// ``` - pub columns: Vec, - /// The token, `[VALUES | ROW]` starting `kind`. - pub kind_token: AttachedToken, - /// The insert type used by the statement. - pub kind: MergeInsertKind, -} - -impl Display for MergeInsertExpr { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - if !self.columns.is_empty() { - write!(f, "({}) ", display_comma_separated(self.columns.as_slice()))?; - } - write!(f, "{}", self.kind) - } -} - -/// Underlying statement of a when clause within a `MERGE` Statement -/// -/// Example -/// ```sql -/// INSERT (product, quantity) VALUES(product, quantity) -/// ``` -/// -/// [Snowflake](https://docs.snowflake.com/en/sql-reference/sql/merge) -/// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#merge_statement) -#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] -#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] -#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] -pub enum MergeAction { - /// An `INSERT` clause - /// - /// Example: - /// ```sql - /// INSERT (product, quantity) VALUES(product, quantity) - /// ``` - Insert(MergeInsertExpr), - /// An `UPDATE` clause - /// - /// Example: - /// ```sql - /// UPDATE SET quantity = T.quantity + S.quantity - /// ``` - Update { - /// The `UPDATE` token that starts the sub-expression. - update_token: AttachedToken, - assignments: Vec, - }, - /// A plain `DELETE` clause - Delete { - /// The `DELETE` token that starts the sub-expression. - delete_token: AttachedToken, - }, -} - -impl Display for MergeAction { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self { - MergeAction::Insert(insert) => { - write!(f, "INSERT {insert}") - } - MergeAction::Update { assignments, .. } => { - write!(f, "UPDATE SET {}", display_comma_separated(assignments)) - } - MergeAction::Delete { .. } => { - write!(f, "DELETE") - } - } - } -} - -/// A when clause within a `MERGE` Statement -/// -/// Example: -/// ```sql -/// WHEN NOT MATCHED BY SOURCE AND product LIKE '%washer%' THEN DELETE -/// ``` -/// [Snowflake](https://docs.snowflake.com/en/sql-reference/sql/merge) -/// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#merge_statement) -#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] -#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] -#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] -pub struct MergeClause { - /// The `WHEN` token that starts the sub-expression. - pub when_token: AttachedToken, - pub clause_kind: MergeClauseKind, - pub predicate: Option, - pub action: MergeAction, -} - -impl Display for MergeClause { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - let MergeClause { - when_token: _, - clause_kind, - predicate, - action, - } = self; - - write!(f, "WHEN {clause_kind}")?; - if let Some(pred) = predicate { - write!(f, " AND {pred}")?; - } - write!(f, " THEN {action}") - } -} - -/// A Output Clause in the end of a 'MERGE' Statement -/// -/// Example: -/// OUTPUT $action, deleted.* INTO dbo.temp_products; -/// [mssql](https://learn.microsoft.com/en-us/sql/t-sql/queries/output-clause-transact-sql) -#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] -#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] -#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] -pub enum OutputClause { - Output { - output_token: AttachedToken, - select_items: Vec, - into_table: Option, - }, - Returning { - returning_token: AttachedToken, - select_items: Vec, - }, -} - -impl fmt::Display for OutputClause { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self { - OutputClause::Output { - output_token: _, - select_items, - into_table, - } => { - f.write_str("OUTPUT ")?; - display_comma_separated(select_items).fmt(f)?; - if let Some(into_table) = into_table { - f.write_str(" ")?; - into_table.fmt(f)?; - } - Ok(()) - } - OutputClause::Returning { - returning_token: _, - select_items, - } => { - f.write_str("RETURNING ")?; - display_comma_separated(select_items).fmt(f) - } - } - } -} - #[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] diff --git a/src/ast/spans.rs b/src/ast/spans.rs index 20a0525856..d63ed62b4c 100644 --- a/src/ast/spans.rs +++ b/src/ast/spans.rs @@ -38,15 +38,15 @@ use super::{ FunctionArgumentClause, FunctionArgumentList, FunctionArguments, GroupByExpr, HavingBound, IfStatement, IlikeSelectItem, IndexColumn, Insert, Interpolate, InterpolateExpr, Join, JoinConstraint, JoinOperator, JsonPath, JsonPathElem, LateralView, LimitClause, - MatchRecognizePattern, Measure, MergeAction, MergeClause, MergeInsertExpr, MergeInsertKind, - NamedParenthesizedList, NamedWindowDefinition, ObjectName, ObjectNamePart, Offset, OnConflict, - OnConflictAction, OnInsert, OpenStatement, OrderBy, OrderByExpr, OrderByKind, OutputClause, - Partition, PivotValueSource, ProjectionSelect, Query, RaiseStatement, RaiseStatementValue, - ReferentialAction, RenameSelectItem, ReplaceSelectElement, ReplaceSelectItem, Select, - SelectInto, SelectItem, SetExpr, SqlOption, Statement, Subscript, SymbolDefinition, TableAlias, - TableAliasColumnDef, TableConstraint, TableFactor, TableObject, TableOptionsClustered, - TableWithJoins, Update, UpdateTableFromKind, Use, Value, Values, ViewColumnDef, WhileStatement, - WildcardAdditionalOptions, With, WithFill, + MatchRecognizePattern, Measure, Merge, MergeAction, MergeClause, MergeInsertExpr, + MergeInsertKind, MergeUpdateExpr, NamedParenthesizedList, NamedWindowDefinition, ObjectName, + ObjectNamePart, Offset, OnConflict, OnConflictAction, OnInsert, OpenStatement, OrderBy, + OrderByExpr, OrderByKind, OutputClause, Partition, PivotValueSource, ProjectionSelect, Query, + RaiseStatement, RaiseStatementValue, ReferentialAction, RenameSelectItem, ReplaceSelectElement, + ReplaceSelectItem, Select, SelectInto, SelectItem, SetExpr, SqlOption, Statement, Subscript, + SymbolDefinition, TableAlias, TableAliasColumnDef, TableConstraint, TableFactor, TableObject, + TableOptionsClustered, TableWithJoins, Update, UpdateTableFromKind, Use, Value, Values, + ViewColumnDef, WhileStatement, WildcardAdditionalOptions, With, WithFill, }; /// Given an iterator of spans, return the [Span::union] of all spans. @@ -453,20 +453,7 @@ impl Spanned for Statement { Statement::Explain { .. } => Span::empty(), Statement::Savepoint { .. } => Span::empty(), Statement::ReleaseSavepoint { .. } => Span::empty(), - Statement::Merge { - merge_token, - into: _, - table: _, - source: _, - on, - clauses, - output, - } => union_spans( - [merge_token.0.span, on.span()] - .into_iter() - .chain(clauses.iter().map(Spanned::span)) - .chain(output.iter().map(Spanned::span)), - ), + Statement::Merge(merge) => merge.span(), Statement::Cache { .. } => Span::empty(), Statement::UNCache { .. } => Span::empty(), Statement::CreateSequence { .. } => Span::empty(), @@ -927,6 +914,17 @@ impl Spanned for Update { } } +impl Spanned for Merge { + fn span(&self) -> Span { + union_spans( + [self.merge_token.0.span, self.on.span()] + .into_iter() + .chain(self.clauses.iter().map(Spanned::span)) + .chain(self.output.iter().map(Spanned::span)), + ) + } +} + impl Spanned for FromTable { fn span(&self) -> Span { match self { @@ -2421,12 +2419,7 @@ impl Spanned for MergeAction { fn span(&self) -> Span { match self { MergeAction::Insert(expr) => expr.span(), - MergeAction::Update { - update_token, - assignments, - } => union_spans( - core::iter::once(update_token.0.span).chain(assignments.iter().map(Spanned::span)), - ), + MergeAction::Update(expr) => expr.span(), MergeAction::Delete { delete_token } => delete_token.0.span, } } @@ -2444,7 +2437,19 @@ impl Spanned for MergeInsertExpr { }, ] .into_iter() - .chain(self.columns.iter().map(|i| i.span)), + .chain(self.insert_predicate.iter().map(Spanned::span)) + .chain(self.columns.iter().map(|i| i.span())), + ) + } +} + +impl Spanned for MergeUpdateExpr { + fn span(&self) -> Span { + union_spans( + core::iter::once(self.update_token.0.span) + .chain(self.assignments.iter().map(Spanned::span)) + .chain(self.update_predicate.iter().map(Spanned::span)) + .chain(self.delete_predicate.iter().map(Spanned::span)), ) } } @@ -2768,7 +2773,7 @@ WHERE id = 1 assert_eq!(stmt_span.end, (16, 67).into()); // ~ individual tokens within the statement - let Statement::Merge { + let Statement::Merge(Merge { merge_token, into: _, table: _, @@ -2776,7 +2781,7 @@ WHERE id = 1 on: _, clauses, output, - } = &r[0] + }) = &r[0] else { panic!("not a MERGE statement"); }; @@ -2814,10 +2819,12 @@ WHERE id = 1 clauses[1].when_token.0.span, Span::new(Location::new(12, 17), Location::new(12, 21)) ); - if let MergeAction::Update { + if let MergeAction::Update(MergeUpdateExpr { update_token, assignments: _, - } = &clauses[1].action + update_predicate: _, + delete_predicate: _, + }) = &clauses[1].action { assert_eq!( update_token.0.span, @@ -2890,7 +2897,7 @@ WHERE id = 1 ); // ~ individual tokens within the statement - if let Statement::Merge { output, .. } = &r[0] { + if let Statement::Merge(Merge { output, .. }) = &r[0] { if let Some(OutputClause::Returning { returning_token, .. }) = output @@ -2924,7 +2931,7 @@ WHERE id = 1 ); // ~ individual tokens within the statement - if let Statement::Merge { output, .. } = &r[0] { + if let Statement::Merge(Merge { output, .. }) = &r[0] { if let Some(OutputClause::Output { output_token, .. }) = output { assert_eq!( output_token.0.span, @@ -2937,4 +2944,44 @@ WHERE id = 1 panic!("not a MERGE statement"); }; } + + #[test] + fn test_merge_statement_spans_with_update_predicates() { + let sql = r#" + MERGE INTO a USING b ON a.id = b.id + WHEN MATCHED THEN + UPDATE set a.x = a.x + b.x + WHERE b.x != 2 + DELETE WHERE a.x <> 3"#; + + let r = Parser::parse_sql(&crate::dialect::GenericDialect, sql).unwrap(); + assert_eq!(1, r.len()); + + // ~ assert the span of the whole statement + let stmt_span = r[0].span(); + assert_eq!( + stmt_span, + Span::new(Location::new(2, 8), Location::new(6, 36)) + ); + } + + #[test] + fn test_merge_statement_spans_with_insert_predicate() { + let sql = r#" + MERGE INTO a USING b ON a.id = b.id + WHEN NOT MATCHED THEN + INSERT VALUES (b.x, b.y) WHERE b.x != 2 +-- qed +"#; + + let r = Parser::parse_sql(&crate::dialect::GenericDialect, sql).unwrap(); + assert_eq!(1, r.len()); + + // ~ assert the span of the whole statement + let stmt_span = r[0].span(); + assert_eq!( + stmt_span, + Span::new(Location::new(2, 8), Location::new(4, 52)) + ); + } } diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index 83c6da4821..1d99d86319 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -603,7 +603,7 @@ pub trait Dialect: Debug + Any { false } - /// Return true if the dialect supports specifying multiple options + /// Returns true if the dialect supports specifying multiple options /// in a `CREATE TABLE` statement for the structure of the new table. For example: /// `CREATE TABLE t (a INT, b INT) AS SELECT 1 AS b, 2 AS a` fn supports_create_table_multi_schema_info_sources(&self) -> bool { diff --git a/src/parser/merge.rs b/src/parser/merge.rs new file mode 100644 index 0000000000..b2283b6716 --- /dev/null +++ b/src/parser/merge.rs @@ -0,0 +1,242 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! SQL Parser for a `MERGE` statement + +#[cfg(not(feature = "std"))] +use alloc::{boxed::Box, format, string::ToString, vec, vec::Vec}; + +use crate::{ + ast::{ + Merge, MergeAction, MergeClause, MergeClauseKind, MergeInsertExpr, MergeInsertKind, + MergeUpdateExpr, ObjectName, OutputClause, SetExpr, Statement, + }, + dialect::{BigQueryDialect, GenericDialect, MySqlDialect}, + keywords::Keyword, + parser::IsOptional, + tokenizer::TokenWithSpan, +}; + +use super::{Parser, ParserError}; + +impl Parser<'_> { + /// Parse a `MERGE` statement, returning a `Box`ed SetExpr + /// + /// This is used to reduce the size of the stack frames in debug builds + pub(super) fn parse_merge_setexpr_boxed( + &mut self, + merge_token: TokenWithSpan, + ) -> Result, ParserError> { + Ok(Box::new(SetExpr::Merge(self.parse_merge(merge_token)?))) + } + + pub fn parse_merge(&mut self, merge_token: TokenWithSpan) -> Result { + let into = self.parse_keyword(Keyword::INTO); + + let table = self.parse_table_factor()?; + + self.expect_keyword_is(Keyword::USING)?; + let source = self.parse_table_factor()?; + self.expect_keyword_is(Keyword::ON)?; + let on = self.parse_expr()?; + let clauses = self.parse_merge_clauses()?; + let output = match self.parse_one_of_keywords(&[Keyword::OUTPUT, Keyword::RETURNING]) { + Some(keyword) => Some(self.parse_output(keyword, self.get_current_token().clone())?), + None => None, + }; + + Ok(Statement::Merge(Merge { + merge_token: merge_token.into(), + into, + table, + source, + on: Box::new(on), + clauses, + output, + })) + } + + fn parse_merge_clauses(&mut self) -> Result, ParserError> { + let mut clauses = vec![]; + loop { + if !(self.parse_keyword(Keyword::WHEN)) { + break; + } + let when_token = self.get_current_token().clone(); + + let mut clause_kind = MergeClauseKind::Matched; + if self.parse_keyword(Keyword::NOT) { + clause_kind = MergeClauseKind::NotMatched; + } + self.expect_keyword_is(Keyword::MATCHED)?; + + if matches!(clause_kind, MergeClauseKind::NotMatched) + && self.parse_keywords(&[Keyword::BY, Keyword::SOURCE]) + { + clause_kind = MergeClauseKind::NotMatchedBySource; + } else if matches!(clause_kind, MergeClauseKind::NotMatched) + && self.parse_keywords(&[Keyword::BY, Keyword::TARGET]) + { + clause_kind = MergeClauseKind::NotMatchedByTarget; + } + + let predicate = if self.parse_keyword(Keyword::AND) { + Some(self.parse_expr()?) + } else { + None + }; + + self.expect_keyword_is(Keyword::THEN)?; + + let merge_clause = match self.parse_one_of_keywords(&[ + Keyword::UPDATE, + Keyword::INSERT, + Keyword::DELETE, + ]) { + Some(Keyword::UPDATE) => { + if matches!( + clause_kind, + MergeClauseKind::NotMatched | MergeClauseKind::NotMatchedByTarget + ) { + return parser_err!( + format_args!("UPDATE is not allowed in a {clause_kind} merge clause"), + self.get_current_token().span.start + ); + } + + let update_token = self.get_current_token().clone(); + self.expect_keyword_is(Keyword::SET)?; + let assignments = self.parse_comma_separated(Parser::parse_assignment)?; + let update_predicate = if self.parse_keyword(Keyword::WHERE) { + Some(self.parse_expr()?) + } else { + None + }; + let delete_predicate = if self.parse_keyword(Keyword::DELETE) { + let _ = self.expect_keyword(Keyword::WHERE)?; + Some(self.parse_expr()?) + } else { + None + }; + MergeAction::Update(MergeUpdateExpr { + update_token: update_token.into(), + assignments, + update_predicate, + delete_predicate, + }) + } + Some(Keyword::DELETE) => { + if matches!( + clause_kind, + MergeClauseKind::NotMatched | MergeClauseKind::NotMatchedByTarget + ) { + return parser_err!( + format_args!("DELETE is not allowed in a {clause_kind} merge clause"), + self.get_current_token().span.start + ); + }; + + let delete_token = self.get_current_token().clone(); + MergeAction::Delete { + delete_token: delete_token.into(), + } + } + Some(Keyword::INSERT) => { + if !matches!( + clause_kind, + MergeClauseKind::NotMatched | MergeClauseKind::NotMatchedByTarget + ) { + return parser_err!( + format_args!("INSERT is not allowed in a {clause_kind} merge clause"), + self.get_current_token().span.start + ); + }; + + let insert_token = self.get_current_token().clone(); + let is_mysql = dialect_of!(self is MySqlDialect); + + let columns = self.parse_merge_clause_insert_columns(is_mysql)?; + let (kind, kind_token) = if dialect_of!(self is BigQueryDialect | GenericDialect) + && self.parse_keyword(Keyword::ROW) + { + (MergeInsertKind::Row, self.get_current_token().clone()) + } else { + self.expect_keyword_is(Keyword::VALUES)?; + let values_token = self.get_current_token().clone(); + let values = self.parse_values(is_mysql, false)?; + (MergeInsertKind::Values(values), values_token) + }; + let insert_predicate = if self.parse_keyword(Keyword::WHERE) { + Some(self.parse_expr()?) + } else { + None + }; + + MergeAction::Insert(MergeInsertExpr { + insert_token: insert_token.into(), + columns, + kind_token: kind_token.into(), + kind, + insert_predicate, + }) + } + _ => { + return parser_err!( + "expected UPDATE, DELETE or INSERT in merge clause", + self.peek_token_ref().span.start + ); + } + }; + clauses.push(MergeClause { + when_token: when_token.into(), + clause_kind, + predicate, + action: merge_clause, + }); + } + Ok(clauses) + } + + fn parse_merge_clause_insert_columns( + &mut self, + allow_empty: bool, + ) -> Result, ParserError> { + self.parse_parenthesized_qualified_column_list(IsOptional::Optional, allow_empty) + } + + fn parse_output( + &mut self, + start_keyword: Keyword, + start_token: TokenWithSpan, + ) -> Result { + let select_items = self.parse_projection()?; + let into_table = if start_keyword == Keyword::OUTPUT && self.peek_keyword(Keyword::INTO) { + self.expect_keyword_is(Keyword::INTO)?; + Some(self.parse_select_into()?) + } else { + None + }; + + Ok(if start_keyword == Keyword::OUTPUT { + OutputClause::Output { + output_token: start_token.into(), + select_items, + into_table, + } + } else { + OutputClause::Returning { + returning_token: start_token.into(), + select_items, + } + }) + } +} diff --git a/src/parser/mod.rs b/src/parser/mod.rs index ce83adaf0d..c499829cdf 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -45,8 +45,6 @@ use crate::keywords::{Keyword, ALL_KEYWORDS}; use crate::tokenizer::*; use sqlparser::parser::ParserState::ColumnDefinition; -mod alter; - #[derive(Debug, Clone, PartialEq, Eq)] pub enum ParserError { TokenizerError(String), @@ -61,6 +59,9 @@ macro_rules! parser_err { }; } +mod alter; +mod merge; + #[cfg(feature = "std")] /// Implementation [`RecursionCounter`] if std is available mod recursion { @@ -11882,7 +11883,7 @@ impl<'a> Parser<'a> { token => { return Err(ParserError::ParserError(format!( "Unexpected token in identifier: {token}" - )))? + )))?; } } } @@ -12386,16 +12387,6 @@ impl<'a> Parser<'a> { Ok(Box::new(SetExpr::Delete(self.parse_delete(delete_token)?))) } - /// Parse a MERGE statement, returning a `Box`ed SetExpr - /// - /// This is used to reduce the size of the stack frames in debug builds - fn parse_merge_setexpr_boxed( - &mut self, - merge_token: TokenWithSpan, - ) -> Result, ParserError> { - Ok(Box::new(SetExpr::Merge(self.parse_merge(merge_token)?))) - } - pub fn parse_delete(&mut self, delete_token: TokenWithSpan) -> Result { let (tables, with_from_keyword) = if !self.parse_keyword(Keyword::FROM) { // `FROM` keyword is optional in BigQuery SQL. @@ -17476,153 +17467,6 @@ impl<'a> Parser<'a> { }) } - pub fn parse_merge_clauses(&mut self) -> Result, ParserError> { - let mut clauses = vec![]; - loop { - if !(self.parse_keyword(Keyword::WHEN)) { - break; - } - let when_token = self.get_current_token().clone(); - - let mut clause_kind = MergeClauseKind::Matched; - if self.parse_keyword(Keyword::NOT) { - clause_kind = MergeClauseKind::NotMatched; - } - self.expect_keyword_is(Keyword::MATCHED)?; - - if matches!(clause_kind, MergeClauseKind::NotMatched) - && self.parse_keywords(&[Keyword::BY, Keyword::SOURCE]) - { - clause_kind = MergeClauseKind::NotMatchedBySource; - } else if matches!(clause_kind, MergeClauseKind::NotMatched) - && self.parse_keywords(&[Keyword::BY, Keyword::TARGET]) - { - clause_kind = MergeClauseKind::NotMatchedByTarget; - } - - let predicate = if self.parse_keyword(Keyword::AND) { - Some(self.parse_expr()?) - } else { - None - }; - - self.expect_keyword_is(Keyword::THEN)?; - - let merge_clause = match self.parse_one_of_keywords(&[ - Keyword::UPDATE, - Keyword::INSERT, - Keyword::DELETE, - ]) { - Some(Keyword::UPDATE) => { - if matches!( - clause_kind, - MergeClauseKind::NotMatched | MergeClauseKind::NotMatchedByTarget - ) { - return parser_err!( - format_args!("UPDATE is not allowed in a {clause_kind} merge clause"), - self.get_current_token().span.start - ); - } - - let update_token = self.get_current_token().clone(); - self.expect_keyword_is(Keyword::SET)?; - MergeAction::Update { - update_token: update_token.into(), - assignments: self.parse_comma_separated(Parser::parse_assignment)?, - } - } - Some(Keyword::DELETE) => { - if matches!( - clause_kind, - MergeClauseKind::NotMatched | MergeClauseKind::NotMatchedByTarget - ) { - return parser_err!( - format_args!("DELETE is not allowed in a {clause_kind} merge clause"), - self.get_current_token().span.start - ); - }; - - let delete_token = self.get_current_token().clone(); - MergeAction::Delete { - delete_token: delete_token.into(), - } - } - Some(Keyword::INSERT) => { - if !matches!( - clause_kind, - MergeClauseKind::NotMatched | MergeClauseKind::NotMatchedByTarget - ) { - return parser_err!( - format_args!("INSERT is not allowed in a {clause_kind} merge clause"), - self.get_current_token().span.start - ); - }; - - let insert_token = self.get_current_token().clone(); - let is_mysql = dialect_of!(self is MySqlDialect); - - let columns = self.parse_parenthesized_column_list(Optional, is_mysql)?; - let (kind, kind_token) = if dialect_of!(self is BigQueryDialect | GenericDialect) - && self.parse_keyword(Keyword::ROW) - { - (MergeInsertKind::Row, self.get_current_token().clone()) - } else { - self.expect_keyword_is(Keyword::VALUES)?; - let values_token = self.get_current_token().clone(); - let values = self.parse_values(is_mysql, false)?; - (MergeInsertKind::Values(values), values_token) - }; - MergeAction::Insert(MergeInsertExpr { - insert_token: insert_token.into(), - columns, - kind_token: kind_token.into(), - kind, - }) - } - _ => { - return parser_err!( - "expected UPDATE, DELETE or INSERT in merge clause", - self.peek_token_ref().span.start - ); - } - }; - clauses.push(MergeClause { - when_token: when_token.into(), - clause_kind, - predicate, - action: merge_clause, - }); - } - Ok(clauses) - } - - fn parse_output( - &mut self, - start_keyword: Keyword, - start_token: TokenWithSpan, - ) -> Result { - let select_items = self.parse_projection()?; - let into_table = if start_keyword == Keyword::OUTPUT && self.peek_keyword(Keyword::INTO) { - self.expect_keyword_is(Keyword::INTO)?; - Some(self.parse_select_into()?) - } else { - None - }; - - Ok(if start_keyword == Keyword::OUTPUT { - OutputClause::Output { - output_token: start_token.into(), - select_items, - into_table, - } - } else { - OutputClause::Returning { - returning_token: start_token.into(), - select_items, - } - }) - } - fn parse_select_into(&mut self) -> Result { let temporary = self .parse_one_of_keywords(&[Keyword::TEMP, Keyword::TEMPORARY]) @@ -17639,32 +17483,6 @@ impl<'a> Parser<'a> { }) } - pub fn parse_merge(&mut self, merge_token: TokenWithSpan) -> Result { - let into = self.parse_keyword(Keyword::INTO); - - let table = self.parse_table_factor()?; - - self.expect_keyword_is(Keyword::USING)?; - let source = self.parse_table_factor()?; - self.expect_keyword_is(Keyword::ON)?; - let on = self.parse_expr()?; - let clauses = self.parse_merge_clauses()?; - let output = match self.parse_one_of_keywords(&[Keyword::OUTPUT, Keyword::RETURNING]) { - Some(keyword) => Some(self.parse_output(keyword, self.get_current_token().clone())?), - None => None, - }; - - Ok(Statement::Merge { - merge_token: merge_token.into(), - into, - table, - source, - on: Box::new(on), - clauses, - output, - }) - } - fn parse_pragma_value(&mut self) -> Result { match self.parse_value()?.value { v @ Value::SingleQuotedString(_) => Ok(v), diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index f2b9f2affe..24b9efcaa6 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -1806,15 +1806,16 @@ fn parse_merge() { ); let insert_action = MergeAction::Insert(MergeInsertExpr { insert_token: AttachedToken::empty(), - columns: vec![Ident::new("product"), Ident::new("quantity")], + columns: vec![Ident::new("product").into(), Ident::new("quantity").into()], kind_token: AttachedToken::empty(), kind: MergeInsertKind::Values(Values { value_keyword: false, explicit_row: false, rows: vec![vec![Expr::value(number("1")), Expr::value(number("2"))]], }), + insert_predicate: None, }); - let update_action = MergeAction::Update { + let update_action = MergeAction::Update(MergeUpdateExpr { update_token: AttachedToken::empty(), assignments: vec![ Assignment { @@ -1826,17 +1827,19 @@ fn parse_merge() { value: Expr::value(number("2")), }, ], - }; + update_predicate: None, + delete_predicate: None, + }); match bigquery_and_generic().verified_stmt(sql) { - Statement::Merge { + Statement::Merge(Merge { into, table, source, on, clauses, .. - } => { + }) => { assert!(!into); assert_eq!( TableFactor::Table { @@ -1917,9 +1920,13 @@ fn parse_merge() { predicate: Some(Expr::value(number("1"))), action: MergeAction::Insert(MergeInsertExpr { insert_token: AttachedToken::empty(), - columns: vec![Ident::new("product"), Ident::new("quantity"),], + columns: vec![ + Ident::new("product").into(), + Ident::new("quantity").into(), + ], kind_token: AttachedToken::empty(), kind: MergeInsertKind::Row, + insert_predicate: None, }) }, MergeClause { @@ -1928,9 +1935,13 @@ fn parse_merge() { predicate: None, action: MergeAction::Insert(MergeInsertExpr { insert_token: AttachedToken::empty(), - columns: vec![Ident::new("product"), Ident::new("quantity"),], + columns: vec![ + Ident::new("product").into(), + Ident::new("quantity").into(), + ], kind_token: AttachedToken::empty(), kind: MergeInsertKind::Row, + insert_predicate: None, }) }, MergeClause { @@ -1941,7 +1952,8 @@ fn parse_merge() { insert_token: AttachedToken::empty(), columns: vec![], kind_token: AttachedToken::empty(), - kind: MergeInsertKind::Row + kind: MergeInsertKind::Row, + insert_predicate: None, }) }, MergeClause { @@ -1952,7 +1964,8 @@ fn parse_merge() { insert_token: AttachedToken::empty(), columns: vec![], kind_token: AttachedToken::empty(), - kind: MergeInsertKind::Row + kind: MergeInsertKind::Row, + insert_predicate: None, }) }, MergeClause { @@ -1975,7 +1988,7 @@ fn parse_merge() { predicate: None, action: MergeAction::Insert(MergeInsertExpr { insert_token: AttachedToken::empty(), - columns: vec![Ident::new("a"), Ident::new("b"),], + columns: vec![Ident::new("a").into(), Ident::new("b").into(),], kind_token: AttachedToken::empty(), kind: MergeInsertKind::Values(Values { value_keyword: false, @@ -1984,7 +1997,8 @@ fn parse_merge() { Expr::value(number("1")), Expr::Identifier(Ident::new("DEFAULT")), ]] - }) + }), + insert_predicate: None, }) }, MergeClause { @@ -2002,7 +2016,8 @@ fn parse_merge() { Expr::value(number("1")), Expr::Identifier(Ident::new("DEFAULT")), ]] - }) + }), + insert_predicate: None, }) }, ], diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index ccad67e39c..cfee262141 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -9793,22 +9793,22 @@ fn parse_merge() { let sql_no_into = "MERGE s.bar AS dest USING (SELECT * FROM s.foo) AS stg ON dest.D = stg.D AND dest.E = stg.E WHEN NOT MATCHED THEN INSERT (A, B, C) VALUES (stg.A, stg.B, stg.C) WHEN MATCHED AND dest.A = 'a' THEN UPDATE SET dest.F = stg.F, dest.G = stg.G WHEN MATCHED THEN DELETE"; match (verified_stmt(sql), verified_stmt(sql_no_into)) { ( - Statement::Merge { + Statement::Merge(Merge { into, table, source, on, clauses, .. - }, - Statement::Merge { + }), + Statement::Merge(Merge { into: no_into, table: table_no_into, source: source_no_into, on: on_no_into, clauses: clauses_no_into, .. - }, + }), ) => { assert!(into); assert!(!no_into); @@ -9921,7 +9921,11 @@ fn parse_merge() { predicate: None, action: MergeAction::Insert(MergeInsertExpr { insert_token: AttachedToken::empty(), - columns: vec![Ident::new("A"), Ident::new("B"), Ident::new("C")], + columns: vec![ + Ident::new("A").into(), + Ident::new("B").into(), + Ident::new("C").into() + ], kind_token: AttachedToken::empty(), kind: MergeInsertKind::Values(Values { value_keyword: false, @@ -9941,6 +9945,7 @@ fn parse_merge() { ]), ]] }), + insert_predicate: None, }), }, MergeClause { @@ -9956,7 +9961,7 @@ fn parse_merge() { (Value::SingleQuotedString("a".to_string())).with_empty_span() )), }), - action: MergeAction::Update { + action: MergeAction::Update(MergeUpdateExpr { update_token: AttachedToken::empty(), assignments: vec![ Assignment { @@ -9980,7 +9985,9 @@ fn parse_merge() { ]), }, ], - }, + update_predicate: None, + delete_predicate: None, + }), }, MergeClause { when_token: AttachedToken::empty(), @@ -9999,6 +10006,45 @@ fn parse_merge() { let sql = "MERGE INTO s.bar AS dest USING newArrivals AS S ON (1 > 1) WHEN NOT MATCHED THEN INSERT VALUES (stg.A, stg.B, stg.C)"; verified_stmt(sql); + + // MERGE with predicates + let sql = "\ +MERGE INTO FOO \ +USING FOO_IMPORT \ +ON (FOO.ID = FOO_IMPORT.ID) \ +WHEN MATCHED THEN \ +UPDATE SET FOO.NAME = FOO_IMPORT.NAME \ +WHERE 1 = 1 \ +DELETE WHERE FOO.NAME LIKE '%.DELETE' \ +WHEN NOT MATCHED THEN \ +INSERT (ID, NAME) \ +VALUES (FOO_IMPORT.ID, UPPER(FOO_IMPORT.NAME)) \ +WHERE NOT FOO_IMPORT.NAME LIKE '%.DO_NOT_INSERT'"; + all_dialects().verified_stmt(sql); + + // MERGE with simple insert columns + let sql = "\ +MERGE INTO FOO USING FOO_IMPORT ON (FOO.ID = FOO_IMPORT.ID) \ +WHEN NOT MATCHED THEN \ +INSERT (ID, NAME) \ +VALUES (1, 'abc')"; + all_dialects().verified_stmt(sql); + + // MERGE with qualified insert columns + let sql = "\ +MERGE INTO FOO USING FOO_IMPORT ON (FOO.ID = FOO_IMPORT.ID) \ +WHEN NOT MATCHED THEN \ +INSERT (FOO.ID, FOO.NAME) \ +VALUES (1, 'abc')"; + all_dialects().verified_stmt(sql); + + // MERGE with schema qualified insert columns + let sql = "\ +MERGE INTO PLAYGROUND.FOO USING FOO_IMPORT ON (PLAYGROUND.FOO.ID = FOO_IMPORT.ID) \ +WHEN NOT MATCHED THEN \ +INSERT (PLAYGROUND.FOO.ID, PLAYGROUND.FOO.NAME) \ +VALUES (1, 'abc')"; + all_dialects().verified_stmt(sql); } #[test] From adbfc46177aad76b7d67d85fba0dea38a4f777cb Mon Sep 17 00:00:00 2001 From: xitep Date: Wed, 10 Dec 2025 12:50:26 +0100 Subject: [PATCH 005/121] [Oracle] Lower StringConcat precedence (#2115) --- src/dialect/oracle.rs | 19 ++++++- tests/sqlparser_oracle.rs | 105 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 123 insertions(+), 1 deletion(-) create mode 100644 tests/sqlparser_oracle.rs diff --git a/src/dialect/oracle.rs b/src/dialect/oracle.rs index 0d6aee5e60..f8bb0e155e 100644 --- a/src/dialect/oracle.rs +++ b/src/dialect/oracle.rs @@ -15,7 +15,14 @@ // specific language governing permissions and limitations // under the License. -use super::Dialect; +use log::debug; + +use crate::{ + parser::{Parser, ParserError}, + tokenizer::Token, +}; + +use super::{Dialect, Precedence}; /// A [`Dialect`] for [Oracle Databases](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/index.html) #[derive(Debug)] @@ -75,6 +82,16 @@ impl Dialect for OracleDialect { true } + fn get_next_precedence(&self, parser: &Parser) -> Option> { + let t = parser.peek_token(); + debug!("get_next_precedence() {t:?}"); + + match t.token { + Token::StringConcat => Some(Ok(self.prec_value(Precedence::PlusMinus))), + _ => None, + } + } + fn supports_group_by_expr(&self) -> bool { true } diff --git a/tests/sqlparser_oracle.rs b/tests/sqlparser_oracle.rs new file mode 100644 index 0000000000..09fd41912d --- /dev/null +++ b/tests/sqlparser_oracle.rs @@ -0,0 +1,105 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Test SQL syntax, specific to [sqlparser::dialect::OracleDialect]. + +#[cfg(test)] +use pretty_assertions::assert_eq; + +use sqlparser::{ + ast::{BinaryOperator, Expr, Value, ValueWithSpan}, + dialect::OracleDialect, + tokenizer::Span, +}; +use test_utils::{expr_from_projection, number, TestedDialects}; + +mod test_utils; + +fn oracle() -> TestedDialects { + TestedDialects::new(vec![Box::new(OracleDialect)]) +} + +/// Oracle: `||` has a lower precedence than `*` and `/` +#[test] +fn muldiv_have_higher_precedence_than_strconcat() { + // ............... A .. B ...... C .. D ........... + let sql = "SELECT 3 / 5 || 'asdf' || 7 * 9 FROM dual"; + let select = oracle().verified_only_select(sql); + assert_eq!(1, select.projection.len()); + assert_eq!( + expr_from_projection(&select.projection[0]), + // (C || D) + &Expr::BinaryOp { + // (A || B) + left: Box::new(Expr::BinaryOp { + // A + left: Box::new(Expr::BinaryOp { + left: Box::new(Expr::Value(number("3").into())), + op: BinaryOperator::Divide, + right: Box::new(Expr::Value(number("5").into())), + }), + op: BinaryOperator::StringConcat, + right: Box::new(Expr::Value(ValueWithSpan { + value: Value::SingleQuotedString("asdf".into()), + span: Span::empty(), + })), + }), + op: BinaryOperator::StringConcat, + // D + right: Box::new(Expr::BinaryOp { + left: Box::new(Expr::Value(number("7").into())), + op: BinaryOperator::Multiply, + right: Box::new(Expr::Value(number("9").into())), + }), + } + ); +} + +/// Oracle: `+`, `-`, and `||` have the same precedence and parse from left-to-right +#[test] +fn plusminus_have_same_precedence_as_strconcat() { + // ................ A .. B .... C .. D ............ + let sql = "SELECT 3 + 5 || '.3' || 7 - 9 FROM dual"; + let select = oracle().verified_only_select(sql); + assert_eq!(1, select.projection.len()); + assert_eq!( + expr_from_projection(&select.projection[0]), + // D + &Expr::BinaryOp { + left: Box::new(Expr::BinaryOp { + // B + left: Box::new(Expr::BinaryOp { + // A + left: Box::new(Expr::BinaryOp { + left: Box::new(Expr::Value(number("3").into())), + op: BinaryOperator::Plus, + right: Box::new(Expr::Value(number("5").into())), + }), + op: BinaryOperator::StringConcat, + right: Box::new(Expr::Value(ValueWithSpan { + value: Value::SingleQuotedString(".3".into()), + span: Span::empty(), + })), + }), + op: BinaryOperator::StringConcat, + right: Box::new(Expr::Value(number("7").into())), + }), + op: BinaryOperator::Minus, + right: Box::new(Expr::Value(number("9").into())) + } + ); +} From 048bc8f09d569d223db36db577b18c7466d55005 Mon Sep 17 00:00:00 2001 From: Andriy Romanov Date: Wed, 10 Dec 2025 03:51:44 -0800 Subject: [PATCH 006/121] Added alter external table support for snowflake (#2122) --- src/ast/ddl.rs | 25 +++++++++++++++----- src/ast/spans.rs | 2 +- src/dialect/snowflake.rs | 46 +++++++++++++++++++++++++++++++++++- tests/sqlparser_snowflake.rs | 9 +++++++ 4 files changed, 74 insertions(+), 8 deletions(-) diff --git a/src/ast/ddl.rs b/src/ast/ddl.rs index 3516c64a1b..8ccd533cfb 100644 --- a/src/ast/ddl.rs +++ b/src/ast/ddl.rs @@ -371,10 +371,15 @@ pub enum AlterTableOperation { DropClusteringKey, SuspendRecluster, ResumeRecluster, - /// `REFRESH` + /// `REFRESH [ '' ]` /// - /// Note: this is Snowflake specific for dynamic tables - Refresh, + /// Note: this is Snowflake specific for dynamic/external tables + /// + /// + Refresh { + /// Optional subpath for external table refresh + subpath: Option, + }, /// `SUSPEND` /// /// Note: this is Snowflake specific for dynamic tables @@ -863,8 +868,12 @@ impl fmt::Display for AlterTableOperation { write!(f, "RESUME RECLUSTER")?; Ok(()) } - AlterTableOperation::Refresh => { - write!(f, "REFRESH") + AlterTableOperation::Refresh { subpath } => { + write!(f, "REFRESH")?; + if let Some(path) = subpath { + write!(f, " '{path}'")?; + } + Ok(()) } AlterTableOperation::Suspend => { write!(f, "SUSPEND") @@ -3977,8 +3986,11 @@ pub enum AlterTableType { /// Iceberg, /// Dynamic table type - /// + /// Dynamic, + /// External table type + /// + External, } /// ALTER TABLE statement @@ -4008,6 +4020,7 @@ impl fmt::Display for AlterTable { match &self.table_type { Some(AlterTableType::Iceberg) => write!(f, "ALTER ICEBERG TABLE ")?, Some(AlterTableType::Dynamic) => write!(f, "ALTER DYNAMIC TABLE ")?, + Some(AlterTableType::External) => write!(f, "ALTER EXTERNAL TABLE ")?, None => write!(f, "ALTER TABLE ")?, } diff --git a/src/ast/spans.rs b/src/ast/spans.rs index d63ed62b4c..de8fba75b2 100644 --- a/src/ast/spans.rs +++ b/src/ast/spans.rs @@ -1145,7 +1145,7 @@ impl Spanned for AlterTableOperation { AlterTableOperation::DropClusteringKey => Span::empty(), AlterTableOperation::SuspendRecluster => Span::empty(), AlterTableOperation::ResumeRecluster => Span::empty(), - AlterTableOperation::Refresh => Span::empty(), + AlterTableOperation::Refresh { .. } => Span::empty(), AlterTableOperation::Suspend => Span::empty(), AlterTableOperation::Resume => Span::empty(), AlterTableOperation::Algorithm { .. } => Span::empty(), diff --git a/src/dialect/snowflake.rs b/src/dialect/snowflake.rs index 4cfaddceb3..ed01c128b9 100644 --- a/src/dialect/snowflake.rs +++ b/src/dialect/snowflake.rs @@ -221,6 +221,11 @@ impl Dialect for SnowflakeDialect { return Some(parse_alter_dynamic_table(parser)); } + if parser.parse_keywords(&[Keyword::ALTER, Keyword::EXTERNAL, Keyword::TABLE]) { + // ALTER EXTERNAL TABLE + return Some(parse_alter_external_table(parser)); + } + if parser.parse_keywords(&[Keyword::ALTER, Keyword::SESSION]) { // ALTER SESSION let set = match parser.parse_one_of_keywords(&[Keyword::SET, Keyword::UNSET]) { @@ -619,7 +624,7 @@ fn parse_alter_dynamic_table(parser: &mut Parser) -> Result Result +fn parse_alter_external_table(parser: &mut Parser) -> Result { + let if_exists = parser.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); + let table_name = parser.parse_object_name(true)?; + + // Parse the operation (REFRESH for now) + let operation = if parser.parse_keyword(Keyword::REFRESH) { + // Optional subpath for refreshing specific partitions + let subpath = match parser.peek_token().token { + Token::SingleQuotedString(s) => { + parser.next_token(); + Some(s) + } + _ => None, + }; + AlterTableOperation::Refresh { subpath } + } else { + return parser.expected("REFRESH after ALTER EXTERNAL TABLE", parser.peek_token()); + }; + + let end_token = if parser.peek_token_ref().token == Token::SemiColon { + parser.peek_token_ref().clone() + } else { + parser.get_current_token().clone() + }; + + Ok(Statement::AlterTable(AlterTable { + name: table_name, + if_exists, + only: false, + operations: vec![operation], + location: None, + on_cluster: None, + table_type: Some(AlterTableType::External), + end_token: AttachedToken(end_token), + })) +} + /// Parse snowflake alter session. /// fn parse_alter_session(parser: &mut Parser, set: bool) -> Result { diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index 22a6326660..37e9f8cb4b 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -4635,3 +4635,12 @@ fn test_alter_dynamic_table() { snowflake().verified_stmt("ALTER DYNAMIC TABLE my_dyn_table SUSPEND"); snowflake().verified_stmt("ALTER DYNAMIC TABLE my_dyn_table RESUME"); } + +#[test] +fn test_alter_external_table() { + snowflake().verified_stmt("ALTER EXTERNAL TABLE some_table REFRESH"); + snowflake().verified_stmt("ALTER EXTERNAL TABLE some_table REFRESH 'year=2025/month=12/'"); + snowflake().verified_stmt("ALTER EXTERNAL TABLE IF EXISTS some_table REFRESH"); + snowflake() + .verified_stmt("ALTER EXTERNAL TABLE IF EXISTS some_table REFRESH 'year=2025/month=12/'"); +} From 0b1e0c35d956edd51bfa00c3a18c5c44ccd8343f Mon Sep 17 00:00:00 2001 From: Yoav Cohen <59807311+yoavcloud@users.noreply.github.com> Date: Wed, 10 Dec 2025 06:53:22 -0500 Subject: [PATCH 007/121] MSSQL: Add support for parenthesized stored procedure name in EXEC (#2126) --- src/parser/mod.rs | 4 ++++ tests/sqlparser_common.rs | 2 ++ 2 files changed, 6 insertions(+) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index c499829cdf..dc52994554 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -17368,7 +17368,11 @@ impl<'a> Parser<'a> { { None } else { + let has_parentheses = self.consume_token(&Token::LParen); let name = self.parse_object_name(false)?; + if has_parentheses { + self.expect_token(&Token::RParen)?; + } Some(name) }; diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index cfee262141..da8e7b495a 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -12120,6 +12120,8 @@ fn parse_execute_stored_procedure() { } _ => unreachable!(), } + // Test optional parentheses around procedure name + ms_and_generic().one_statement_parses_to("EXEC ('name')", "EXECUTE 'name'"); } #[test] From 1b842d3b6a76eadd0a8dc9bfecc8cb1dcd0bd3c0 Mon Sep 17 00:00:00 2001 From: Yoav Cohen <59807311+yoavcloud@users.noreply.github.com> Date: Wed, 10 Dec 2025 07:04:22 -0500 Subject: [PATCH 008/121] MSSQL: Parse IF/ELSE without semicolon delimiters (#2128) --- src/dialect/mssql.rs | 22 +++++++++++++++++----- src/parser/mod.rs | 13 +++++++------ tests/sqlparser_mssql.rs | 39 ++++++++++++++++++++++++++++++++++++++- 3 files changed, 62 insertions(+), 12 deletions(-) diff --git a/src/dialect/mssql.rs b/src/dialect/mssql.rs index e1902b3896..faf3402c24 100644 --- a/src/dialect/mssql.rs +++ b/src/dialect/mssql.rs @@ -21,14 +21,12 @@ use crate::ast::{ GranteesType, IfStatement, Statement, }; use crate::dialect::Dialect; -use crate::keywords::{self, Keyword}; +use crate::keywords::Keyword; use crate::parser::{Parser, ParserError}; use crate::tokenizer::Token; #[cfg(not(feature = "std"))] use alloc::{vec, vec::Vec}; -const RESERVED_FOR_COLUMN_ALIAS: &[Keyword] = &[Keyword::IF, Keyword::ELSE]; - /// A [`Dialect`] for [Microsoft SQL Server](https://www.microsoft.com/en-us/sql-server/) #[derive(Debug)] pub struct MsSqlDialect {} @@ -128,8 +126,22 @@ impl Dialect for MsSqlDialect { &[GranteesType::Public] } - fn is_column_alias(&self, kw: &Keyword, _parser: &mut Parser) -> bool { - !keywords::RESERVED_FOR_COLUMN_ALIAS.contains(kw) && !RESERVED_FOR_COLUMN_ALIAS.contains(kw) + fn is_select_item_alias(&self, explicit: bool, kw: &Keyword, parser: &mut Parser) -> bool { + match kw { + // List of keywords that cannot be used as select item aliases in MSSQL + // regardless of whether the alias is explicit or implicit + Keyword::IF | Keyword::ELSE => false, + _ => explicit || self.is_column_alias(kw, parser), + } + } + + fn is_table_factor_alias(&self, explicit: bool, kw: &Keyword, parser: &mut Parser) -> bool { + match kw { + // List of keywords that cannot be used as table aliases in MSSQL + // regardless of whether the alias is explicit or implicit + Keyword::IF | Keyword::ELSE => false, + _ => explicit || self.is_table_alias(kw, parser), + } } fn parse_statement(&self, parser: &mut Parser) -> Option> { diff --git a/src/parser/mod.rs b/src/parser/mod.rs index dc52994554..54fb327374 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -11503,16 +11503,17 @@ impl<'a> Parser<'a> { let next_token = self.next_token(); match next_token.token { - // By default, if a word is located after the `AS` keyword we consider it an alias - // as long as it's not reserved. + // Accepts a keyword as an alias if the AS keyword explicitly indicate an alias or if the + // caller provided a list of reserved keywords and the keyword is not on that list. Token::Word(w) - if after_as || reserved_kwds.is_some_and(|x| !x.contains(&w.keyword)) => + if reserved_kwds.is_some() + && (after_as || reserved_kwds.is_some_and(|x| !x.contains(&w.keyword))) => { Ok(Some(w.into_ident(next_token.span))) } - // This pattern allows for customizing the acceptance of words as aliases based on the caller's - // context, such as to what SQL element this word is a potential alias of (select item alias, table name - // alias, etc.) or dialect-specific logic that goes beyond a simple list of reserved keywords. + // Accepts a keyword as alias based on the caller's context, such as to what SQL element + // this word is a potential alias of using the validator call-back. This allows for + // dialect-specific logic. Token::Word(w) if validator(after_as, &w.keyword, self) => { Ok(Some(w.into_ident(next_token.span))) } diff --git a/tests/sqlparser_mssql.rs b/tests/sqlparser_mssql.rs index 37e8e962f3..70e0aab491 100644 --- a/tests/sqlparser_mssql.rs +++ b/tests/sqlparser_mssql.rs @@ -2501,8 +2501,45 @@ fn test_tsql_no_semicolon_delimiter() { DECLARE @X AS NVARCHAR(MAX)='x' DECLARE @Y AS NVARCHAR(MAX)='y' "#; - let stmts = tsql().parse_sql_statements(sql).unwrap(); assert_eq!(stmts.len(), 2); assert!(stmts.iter().all(|s| matches!(s, Statement::Declare { .. }))); + + let sql = r#" +SELECT col FROM tbl +IF x=1 + SELECT 1 +ELSE + SELECT 2 + "#; + let stmts = tsql().parse_sql_statements(sql).unwrap(); + assert_eq!(stmts.len(), 2); + assert!(matches!(&stmts[0], Statement::Query(_))); + assert!(matches!(&stmts[1], Statement::If(_))); +} + +#[test] +fn test_sql_keywords_as_table_aliases() { + // Some keywords that should not be parsed as an alias implicitly or explicitly + let reserved_kws = vec!["IF", "ELSE"]; + for kw in reserved_kws { + for explicit in &["", "AS "] { + assert!(tsql() + .parse_sql_statements(&format!("SELECT * FROM tbl {explicit}{kw}")) + .is_err()); + } + } +} + +#[test] +fn test_sql_keywords_as_column_aliases() { + // Some keywords that should not be parsed as an alias implicitly or explicitly + let reserved_kws = vec!["IF", "ELSE"]; + for kw in reserved_kws { + for explicit in &["", "AS "] { + assert!(tsql() + .parse_sql_statements(&format!("SELECT col {explicit}{kw} FROM tbl")) + .is_err()); + } + } } From 9b8a2d1e226a024758a4dbbaaf47fafe67a9619d Mon Sep 17 00:00:00 2001 From: xitep Date: Tue, 16 Dec 2025 12:30:30 +0100 Subject: [PATCH 009/121] Extract source comments (#2107) Co-authored-by: Ifeanyi Ubah --- src/ast/comments.rs | 329 ++++++++++++++++++++++++++++++++++++ src/ast/mod.rs | 1 + src/ast/spans.rs | 8 +- src/parser/mod.rs | 53 +++++- tests/sqlparser_comments.rs | 75 ++++++++ 5 files changed, 459 insertions(+), 7 deletions(-) create mode 100644 src/ast/comments.rs create mode 100644 tests/sqlparser_comments.rs diff --git a/src/ast/comments.rs b/src/ast/comments.rs new file mode 100644 index 0000000000..1f5b3102d3 --- /dev/null +++ b/src/ast/comments.rs @@ -0,0 +1,329 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! Provides a representation of source code comments in parsed SQL code. +//! +//! See [Comments::find] for an example. + +#[cfg(not(feature = "std"))] +use alloc::{string::String, vec::Vec}; + +use core::{ + ops::{Bound, Deref, RangeBounds}, + slice, +}; + +use crate::tokenizer::{Location, Span}; + +/// An opaque container for comments from a parse SQL source code. +#[derive(Default, Debug)] +pub struct Comments(Vec); + +impl Comments { + /// Accepts `comment` if its the first or is located strictly after the + /// last accepted comment. In other words, this method will skip the + /// comment if its comming out of order (as encountered in the parsed + /// source code.) + pub(crate) fn offer(&mut self, comment: CommentWithSpan) { + if self + .0 + .last() + .map(|last| last.span < comment.span) + .unwrap_or(true) + { + self.0.push(comment); + } + } + + /// Finds comments starting within the given location range. The order of + /// iterator reflects the order of the comments as encountered in the parsed + /// source code. + /// + /// # Example + /// ```rust + /// use sqlparser::{dialect::GenericDialect, parser::Parser, tokenizer::Location}; + /// + /// let sql = r#"/* + /// header comment ... + /// ... spanning multiple lines + /// */ + /// + /// -- first statement + /// SELECT 'hello' /* world */ FROM DUAL; + /// + /// -- second statement + /// SELECT 123 FROM DUAL; + /// + /// -- trailing comment + /// "#; + /// + /// let (ast, comments) = Parser::parse_sql_with_comments(&GenericDialect, sql).unwrap(); + /// + /// // all comments appearing before line seven, i.e. before the first statement itself + /// assert_eq!( + /// &comments.find(..Location::new(7, 1)).map(|c| c.as_str()).collect::>(), + /// &["\n header comment ...\n ... spanning multiple lines\n", " first statement\n"]); + /// + /// // all comments appearing within the first statement + /// assert_eq!( + /// &comments.find(Location::new(7, 1)..Location::new(8,1)).map(|c| c.as_str()).collect::>(), + /// &[" world "]); + /// + /// // all comments appearing within or after the first statement + /// assert_eq!( + /// &comments.find(Location::new(7, 1)..).map(|c| c.as_str()).collect::>(), + /// &[" world ", " second statement\n", " trailing comment\n"]); + /// ``` + /// + /// The [Spanned](crate::ast::Spanned) trait allows you to access location + /// information for certain AST nodes. + pub fn find>(&self, range: R) -> Iter<'_> { + let (start, end) = ( + self.start_index(range.start_bound()), + self.end_index(range.end_bound()), + ); + debug_assert!((0..=self.0.len()).contains(&start)); + debug_assert!((0..=self.0.len()).contains(&end)); + // in case the user specified a reverse range + Iter(if start <= end { + self.0[start..end].iter() + } else { + self.0[0..0].iter() + }) + } + + /// Find the index of the first comment starting "before" the given location. + /// + /// The returned index is _inclusive_ and within the range of `0..=self.0.len()`. + fn start_index(&self, location: Bound<&Location>) -> usize { + match location { + Bound::Included(location) => { + match self.0.binary_search_by(|c| c.span.start.cmp(location)) { + Ok(i) => i, + Err(i) => i, + } + } + Bound::Excluded(location) => { + match self.0.binary_search_by(|c| c.span.start.cmp(location)) { + Ok(i) => i + 1, + Err(i) => i, + } + } + Bound::Unbounded => 0, + } + } + + /// Find the index of the first comment starting "after" the given location. + /// + /// The returned index is _exclusive_ and within the range of `0..=self.0.len()`. + fn end_index(&self, location: Bound<&Location>) -> usize { + match location { + Bound::Included(location) => { + match self.0.binary_search_by(|c| c.span.start.cmp(location)) { + Ok(i) => i + 1, + Err(i) => i, + } + } + Bound::Excluded(location) => { + match self.0.binary_search_by(|c| c.span.start.cmp(location)) { + Ok(i) => i, + Err(i) => i, + } + } + Bound::Unbounded => self.0.len(), + } + } +} + +impl From for Vec { + fn from(comments: Comments) -> Self { + comments.0 + } +} + +/// A source code comment with information of its entire span. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct CommentWithSpan { + /// The source code comment iself + pub comment: Comment, + /// The span of the comment including its markers + pub span: Span, +} + +impl Deref for CommentWithSpan { + type Target = Comment; + + fn deref(&self) -> &Self::Target { + &self.comment + } +} + +/// A unified type of the different source code comment formats. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum Comment { + /// A single line comment, typically introduced with a prefix and spanning + /// until end-of-line or end-of-file in the source code. + /// + /// Note: `content` will include the terminating new-line character, if any. + SingleLine { content: String, prefix: String }, + + /// A multi-line comment, typically enclosed in `/* .. */` markers. The + /// string represents the content excluding the markers. + MultiLine(String), +} + +impl Comment { + /// Retrieves the content of the comment as string slice. + pub fn as_str(&self) -> &str { + match self { + Comment::SingleLine { content, prefix: _ } => content.as_str(), + Comment::MultiLine(content) => content.as_str(), + } + } +} + +impl Deref for Comment { + type Target = str; + + fn deref(&self) -> &Self::Target { + self.as_str() + } +} + +/// An opaque iterator implementation over comments served by [Comments::find]. +pub struct Iter<'a>(slice::Iter<'a, CommentWithSpan>); + +impl<'a> Iterator for Iter<'a> { + type Item = &'a CommentWithSpan; + + fn next(&mut self) -> Option { + self.0.next() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_find() { + let comments = { + // ``` + // -- abc + // /* hello */--, world + // /* def + // ghi + // jkl + // */ + // ``` + let mut c = Comments(Vec::new()); + c.offer(CommentWithSpan { + comment: Comment::SingleLine { + content: " abc".into(), + prefix: "--".into(), + }, + span: Span::new((1, 1).into(), (1, 7).into()), + }); + c.offer(CommentWithSpan { + comment: Comment::MultiLine(" hello ".into()), + span: Span::new((2, 3).into(), (2, 14).into()), + }); + c.offer(CommentWithSpan { + comment: Comment::SingleLine { + content: ", world".into(), + prefix: "--".into(), + }, + span: Span::new((2, 14).into(), (2, 21).into()), + }); + c.offer(CommentWithSpan { + comment: Comment::MultiLine(" def\n ghi\n jkl\n".into()), + span: Span::new((3, 3).into(), (7, 1).into()), + }); + c + }; + + fn find>(comments: &Comments, range: R) -> Vec<&str> { + comments.find(range).map(|c| c.as_str()).collect::>() + } + + // ~ end-points only -------------------------------------------------- + assert_eq!(find(&comments, ..Location::new(0, 0)), Vec::<&str>::new()); + assert_eq!(find(&comments, ..Location::new(2, 1)), vec![" abc"]); + assert_eq!(find(&comments, ..Location::new(2, 3)), vec![" abc"]); + assert_eq!( + find(&comments, ..=Location::new(2, 3)), + vec![" abc", " hello "] + ); + assert_eq!( + find(&comments, ..=Location::new(2, 3)), + vec![" abc", " hello "] + ); + assert_eq!( + find(&comments, ..Location::new(2, 15)), + vec![" abc", " hello ", ", world"] + ); + + // ~ start-points only ------------------------------------------------ + assert_eq!( + find(&comments, Location::new(1000, 1000)..), + Vec::<&str>::new() + ); + assert_eq!( + find(&comments, Location::new(2, 14)..), + vec![", world", " def\n ghi\n jkl\n"] + ); + assert_eq!( + find(&comments, Location::new(2, 15)..), + vec![" def\n ghi\n jkl\n"] + ); + assert_eq!( + find(&comments, Location::new(0, 0)..), + vec![" abc", " hello ", ", world", " def\n ghi\n jkl\n"] + ); + assert_eq!( + find(&comments, Location::new(1, 1)..), + vec![" abc", " hello ", ", world", " def\n ghi\n jkl\n"] + ); + + // ~ ranges ----------------------------------------------------------- + assert_eq!( + find(&comments, Location::new(2, 1)..Location::new(1, 1)), + Vec::<&str>::new() + ); + assert_eq!( + find(&comments, Location::new(1, 1)..Location::new(2, 3)), + vec![" abc"] + ); + assert_eq!( + find(&comments, Location::new(1, 1)..=Location::new(2, 3)), + vec![" abc", " hello "] + ); + assert_eq!( + find(&comments, Location::new(1, 1)..=Location::new(2, 10)), + vec![" abc", " hello "] + ); + assert_eq!( + find(&comments, Location::new(1, 1)..=Location::new(2, 14)), + vec![" abc", " hello ", ", world"] + ); + assert_eq!( + find(&comments, Location::new(1, 1)..Location::new(2, 15)), + vec![" abc", " hello ", ", world"] + ); + + // ~ find everything -------------------------------------------------- + assert_eq!( + find(&comments, ..), + vec![" abc", " hello ", ", world", " def\n ghi\n jkl\n"] + ); + } +} diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 6cb4c33605..23cde478b8 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -136,6 +136,7 @@ mod query; mod spans; pub use spans::Spanned; +pub mod comments; mod trigger; mod value; diff --git a/src/ast/spans.rs b/src/ast/spans.rs index de8fba75b2..2ec797db47 100644 --- a/src/ast/spans.rs +++ b/src/ast/spans.rs @@ -28,7 +28,7 @@ use core::iter; use crate::tokenizer::Span; use super::{ - dcl::SecondaryRoles, value::ValueWithSpan, AccessExpr, AlterColumnOperation, + comments, dcl::SecondaryRoles, value::ValueWithSpan, AccessExpr, AlterColumnOperation, AlterIndexOperation, AlterTableOperation, Analyze, Array, Assignment, AssignmentTarget, AttachedToken, BeginEndStatements, CaseStatement, CloseCursor, ClusteredIndex, ColumnDef, ColumnOption, ColumnOptionDef, ConditionalStatementBlock, ConditionalStatements, @@ -2477,6 +2477,12 @@ impl Spanned for OutputClause { } } +impl Spanned for comments::CommentWithSpan { + fn span(&self) -> Span { + self.span + } +} + #[cfg(test)] pub mod tests { use crate::dialect::{Dialect, GenericDialect, SnowflakeDialect}; diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 54fb327374..2b82d00956 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -32,14 +32,17 @@ use recursion::RecursionCounter; use IsLateral::*; use IsOptional::*; -use crate::ast::helpers::{ - key_value_options::{ - KeyValueOption, KeyValueOptionKind, KeyValueOptions, KeyValueOptionsDelimiter, - }, - stmt_create_table::{CreateTableBuilder, CreateTableConfiguration}, -}; use crate::ast::Statement::CreatePolicy; use crate::ast::*; +use crate::ast::{ + comments, + helpers::{ + key_value_options::{ + KeyValueOption, KeyValueOptionKind, KeyValueOptions, KeyValueOptionsDelimiter, + }, + stmt_create_table::{CreateTableBuilder, CreateTableConfiguration}, + }, +}; use crate::dialect::*; use crate::keywords::{Keyword, ALL_KEYWORDS}; use crate::tokenizer::*; @@ -530,6 +533,44 @@ impl<'a> Parser<'a> { Parser::new(dialect).try_with_sql(sql)?.parse_statements() } + /// Parses the given `sql` into an Abstract Syntax Tree (AST), returning + /// also encountered source code comments. + /// + /// See [Parser::parse_sql]. + pub fn parse_sql_with_comments( + dialect: &'a dyn Dialect, + sql: &str, + ) -> Result<(Vec, comments::Comments), ParserError> { + let mut p = Parser::new(dialect).try_with_sql(sql)?; + p.parse_statements().map(|stmts| (stmts, p.into_comments())) + } + + /// Consumes this parser returning comments from the parsed token stream. + fn into_comments(self) -> comments::Comments { + let mut comments = comments::Comments::default(); + for t in self.tokens.into_iter() { + match t.token { + Token::Whitespace(Whitespace::SingleLineComment { comment, prefix }) => { + comments.offer(comments::CommentWithSpan { + comment: comments::Comment::SingleLine { + content: comment, + prefix, + }, + span: t.span, + }); + } + Token::Whitespace(Whitespace::MultiLineComment(comment)) => { + comments.offer(comments::CommentWithSpan { + comment: comments::Comment::MultiLine(comment), + span: t.span, + }); + } + _ => {} + } + } + comments + } + /// Parse a single top-level statement (such as SELECT, INSERT, CREATE, etc.), /// stopping before the statement separator, if any. pub fn parse_statement(&mut self) -> Result { diff --git a/tests/sqlparser_comments.rs b/tests/sqlparser_comments.rs new file mode 100644 index 0000000000..34442ca3e0 --- /dev/null +++ b/tests/sqlparser_comments.rs @@ -0,0 +1,75 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#![warn(clippy::all)] +//! Test comment extraction from SQL source code. + +#[cfg(test)] +use pretty_assertions::assert_eq; + +use sqlparser::{ + ast::comments::{Comment, CommentWithSpan}, + dialect::GenericDialect, + parser::Parser, + tokenizer::Span, +}; + +#[test] +fn parse_sql_with_comments() { + let sql = r#" +-- second line comment +select * from /* inline comment after `from` */ dual; + +/*select +some +more*/ + + -- end-of-script-with-no-newline"#; + + let comments = match Parser::parse_sql_with_comments(&GenericDialect, sql) { + Ok((_, comments)) => comments, + Err(e) => panic!("Invalid sql script: {e}"), + }; + + assert_eq!( + Vec::from(comments), + vec![ + CommentWithSpan { + comment: Comment::SingleLine { + content: " second line comment\n".into(), + prefix: "--".into() + }, + span: Span::new((2, 1).into(), (3, 1).into()), + }, + CommentWithSpan { + comment: Comment::MultiLine(" inline comment after `from` ".into()), + span: Span::new((3, 15).into(), (3, 48).into()), + }, + CommentWithSpan { + comment: Comment::MultiLine("select\nsome\nmore".into()), + span: Span::new((5, 1).into(), (7, 7).into()) + }, + CommentWithSpan { + comment: Comment::SingleLine { + content: " end-of-script-with-no-newline".into(), + prefix: "--".into() + }, + span: Span::new((9, 3).into(), (9, 35).into()), + } + ] + ); +} From cdeed32294609e31779c13b48bd19e505bf65ea8 Mon Sep 17 00:00:00 2001 From: Denis Goncharenko Date: Tue, 16 Dec 2025 13:07:11 +0100 Subject: [PATCH 010/121] PostgreSQL: Support schema-qualified operator classes in CREATE INDEX (#2131) --- src/ast/ddl.rs | 2 +- src/parser/mod.rs | 6 ++--- tests/sqlparser_postgres.rs | 44 +++++++++++++++++++++++++++++++++---- 3 files changed, 44 insertions(+), 8 deletions(-) diff --git a/src/ast/ddl.rs b/src/ast/ddl.rs index 8ccd533cfb..d0aed448e7 100644 --- a/src/ast/ddl.rs +++ b/src/ast/ddl.rs @@ -61,7 +61,7 @@ use crate::tokenizer::{Span, Token}; #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct IndexColumn { pub column: OrderByExpr, - pub operator_class: Option, + pub operator_class: Option, } impl From for IndexColumn { diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 2b82d00956..3ba4ba5711 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -16933,10 +16933,10 @@ impl<'a> Parser<'a> { fn parse_order_by_expr_inner( &mut self, with_operator_class: bool, - ) -> Result<(OrderByExpr, Option), ParserError> { + ) -> Result<(OrderByExpr, Option), ParserError> { let expr = self.parse_expr()?; - let operator_class: Option = if with_operator_class { + let operator_class: Option = if with_operator_class { // We check that if non of the following keywords are present, then we parse an // identifier as operator class. if self @@ -16945,7 +16945,7 @@ impl<'a> Parser<'a> { { None } else { - self.maybe_parse(|parser| parser.parse_identifier())? + self.maybe_parse(|parser| parser.parse_object_name(false))? } } else { None diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 11512cf803..d595a0a26e 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -2572,11 +2572,17 @@ fn parse_create_indices_with_operator_classes() { IndexType::SPGiST, IndexType::Custom("CustomIndexType".into()), ]; - let operator_classes: [Option; 4] = [ + let operator_classes: [Option; 4] = [ None, - Some("gin_trgm_ops".into()), - Some("gist_trgm_ops".into()), - Some("totally_not_valid".into()), + Some(ObjectName(vec![ObjectNamePart::Identifier(Ident::new( + "gin_trgm_ops", + ))])), + Some(ObjectName(vec![ObjectNamePart::Identifier(Ident::new( + "gist_trgm_ops", + ))])), + Some(ObjectName(vec![ObjectNamePart::Identifier(Ident::new( + "totally_not_valid", + ))])), ]; for expected_index_type in indices { @@ -2713,6 +2719,36 @@ fn parse_create_indices_with_operator_classes() { } } +#[test] +fn parse_create_index_with_schema_qualified_operator_class() { + let sql = "CREATE INDEX my_index ON my_table USING HNSW (embedding public.vector_cosine_ops)"; + + match pg().verified_stmt(sql) { + Statement::CreateIndex(CreateIndex { columns, .. }) => { + assert_eq!(1, columns.len()); + let idx_col = &columns[0]; + + // Verify the column name + match &idx_col.column.expr { + Expr::Identifier(ident) => { + assert_eq!("embedding", ident.value); + } + _ => panic!("Expected identifier expression"), + } + + // Verify the schema-qualified operator class + assert_eq!( + Some(ObjectName(vec![ + ObjectNamePart::Identifier(Ident::new("public")), + ObjectNamePart::Identifier(Ident::new("vector_cosine_ops")), + ])), + idx_col.operator_class + ); + } + _ => unreachable!(), + } +} + #[test] fn parse_create_bloom() { let sql = From f84887d0049105c7b84621d65b71e1ee640e18e9 Mon Sep 17 00:00:00 2001 From: xitep Date: Tue, 16 Dec 2025 19:04:11 +0100 Subject: [PATCH 011/121] Oracle: Support for quote delimited strings (#2130) --- src/ast/mod.rs | 2 +- src/ast/value.rs | 32 ++++++ src/dialect/generic.rs | 4 + src/dialect/mod.rs | 7 ++ src/dialect/oracle.rs | 4 + src/parser/merge.rs | 2 +- src/parser/mod.rs | 10 ++ src/tokenizer.rs | 92 ++++++++++++++- tests/sqlparser_oracle.rs | 234 +++++++++++++++++++++++++++++++++++++- 9 files changed, 381 insertions(+), 6 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 23cde478b8..f1e79b0d26 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -110,7 +110,7 @@ pub use self::trigger::{ pub use self::value::{ escape_double_quote_string, escape_quoted_string, DateTimeField, DollarQuotedString, - NormalizationForm, TrimWhereField, Value, ValueWithSpan, + NormalizationForm, QuoteDelimitedString, TrimWhereField, Value, ValueWithSpan, }; use crate::ast::helpers::key_value_options::KeyValueOptions; diff --git a/src/ast/value.rs b/src/ast/value.rs index fdfa6a6748..ccbb12a332 100644 --- a/src/ast/value.rs +++ b/src/ast/value.rs @@ -167,6 +167,12 @@ pub enum Value { TripleDoubleQuotedRawStringLiteral(String), /// N'string value' NationalStringLiteral(String), + /// Quote delimited literal. Examples `Q'{ab'c}'`, `Q'|ab'c|'`, `Q'|ab|c|'` + /// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Literals.html#GUID-1824CBAA-6E16-4921-B2A6-112FB02248DA) + QuoteDelimitedStringLiteral(QuoteDelimitedString), + /// "National" quote delimited literal. Examples `Q'{ab'c}'`, `Q'|ab'c|'`, `Q'|ab|c|'` + /// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Literals.html#GUID-1824CBAA-6E16-4921-B2A6-112FB02248DA) + NationalQuoteDelimitedStringLiteral(QuoteDelimitedString), /// X'hex value' HexStringLiteral(String), @@ -207,6 +213,8 @@ impl Value { | Value::NationalStringLiteral(s) | Value::HexStringLiteral(s) => Some(s), Value::DollarQuotedString(s) => Some(s.value), + Value::QuoteDelimitedStringLiteral(s) => Some(s.value), + Value::NationalQuoteDelimitedStringLiteral(s) => Some(s.value), _ => None, } } @@ -242,6 +250,8 @@ impl fmt::Display for Value { Value::EscapedStringLiteral(v) => write!(f, "E'{}'", escape_escaped_string(v)), Value::UnicodeStringLiteral(v) => write!(f, "U&'{}'", escape_unicode_string(v)), Value::NationalStringLiteral(v) => write!(f, "N'{v}'"), + Value::QuoteDelimitedStringLiteral(v) => v.fmt(f), + Value::NationalQuoteDelimitedStringLiteral(v) => write!(f, "N{v}"), Value::HexStringLiteral(v) => write!(f, "X'{v}'"), Value::Boolean(v) => write!(f, "{v}"), Value::SingleQuotedByteStringLiteral(v) => write!(f, "B'{v}'"), @@ -279,6 +289,28 @@ impl fmt::Display for DollarQuotedString { } } +/// A quote delimited string literal, e.g. `Q'_abc_'`. +/// +/// See [Value::QuoteDelimitedStringLiteral] and/or +/// [Value::NationalQuoteDelimitedStringLiteral]. +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct QuoteDelimitedString { + /// the quote start character; i.e. the character _after_ the opening `Q'` + pub start_quote: char, + /// the string literal value itself + pub value: String, + /// the quote end character; i.e. the character _before_ the closing `'` + pub end_quote: char, +} + +impl fmt::Display for QuoteDelimitedString { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "Q'{}{}{}'", self.start_quote, self.value, self.end_quote) + } +} + #[derive(Debug, Clone, PartialEq, Eq, Ord, PartialOrd, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] diff --git a/src/dialect/generic.rs b/src/dialect/generic.rs index dffc5b5276..bbedbc0592 100644 --- a/src/dialect/generic.rs +++ b/src/dialect/generic.rs @@ -195,4 +195,8 @@ impl Dialect for GenericDialect { fn supports_interval_options(&self) -> bool { true } + + fn supports_quote_delimited_string(&self) -> bool { + true + } } diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index 1d99d86319..1a416e4df6 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -1209,6 +1209,13 @@ pub trait Dialect: Debug + Any { fn supports_semantic_view_table_factor(&self) -> bool { false } + + /// Support quote delimited string literals, e.g. `Q'{...}'` + /// + /// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/Literals.html#GUID-1824CBAA-6E16-4921-B2A6-112FB02248DA) + fn supports_quote_delimited_string(&self) -> bool { + false + } } /// This represents the operators for which precedence must be defined diff --git a/src/dialect/oracle.rs b/src/dialect/oracle.rs index f8bb0e155e..54c2ace5fb 100644 --- a/src/dialect/oracle.rs +++ b/src/dialect/oracle.rs @@ -95,4 +95,8 @@ impl Dialect for OracleDialect { fn supports_group_by_expr(&self) -> bool { true } + + fn supports_quote_delimited_string(&self) -> bool { + true + } } diff --git a/src/parser/merge.rs b/src/parser/merge.rs index b2283b6716..2bc1544f00 100644 --- a/src/parser/merge.rs +++ b/src/parser/merge.rs @@ -13,7 +13,7 @@ //! SQL Parser for a `MERGE` statement #[cfg(not(feature = "std"))] -use alloc::{boxed::Box, format, string::ToString, vec, vec::Vec}; +use alloc::{boxed::Box, format, vec, vec::Vec}; use crate::{ ast::{ diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 3ba4ba5711..ade3c250f0 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1754,6 +1754,8 @@ impl<'a> Parser<'a> { | Token::TripleSingleQuotedRawStringLiteral(_) | Token::TripleDoubleQuotedRawStringLiteral(_) | Token::NationalStringLiteral(_) + | Token::QuoteDelimitedStringLiteral(_) + | Token::NationalQuoteDelimitedStringLiteral(_) | Token::HexStringLiteral(_) => { self.prev_token(); Ok(Expr::Value(self.parse_value()?)) @@ -2770,6 +2772,8 @@ impl<'a> Parser<'a> { | Token::EscapedStringLiteral(_) | Token::UnicodeStringLiteral(_) | Token::NationalStringLiteral(_) + | Token::QuoteDelimitedStringLiteral(_) + | Token::NationalQuoteDelimitedStringLiteral(_) | Token::HexStringLiteral(_) => Some(Box::new(self.parse_expr()?)), _ => self.expected( "either filler, WITH, or WITHOUT in LISTAGG", @@ -10697,6 +10701,12 @@ impl<'a> Parser<'a> { Token::NationalStringLiteral(ref s) => { ok_value(Value::NationalStringLiteral(s.to_string())) } + Token::QuoteDelimitedStringLiteral(v) => { + ok_value(Value::QuoteDelimitedStringLiteral(v)) + } + Token::NationalQuoteDelimitedStringLiteral(v) => { + ok_value(Value::NationalQuoteDelimitedStringLiteral(v)) + } Token::EscapedStringLiteral(ref s) => { ok_value(Value::EscapedStringLiteral(s.to_string())) } diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 54a158c1fd..2ae17cf4a4 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -29,10 +29,10 @@ use alloc::{ vec, vec::Vec, }; -use core::iter::Peekable; use core::num::NonZeroU8; use core::str::Chars; use core::{cmp, fmt}; +use core::{iter::Peekable, str}; #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; @@ -46,7 +46,10 @@ use crate::dialect::{ SnowflakeDialect, }; use crate::keywords::{Keyword, ALL_KEYWORDS, ALL_KEYWORDS_INDEX}; -use crate::{ast::DollarQuotedString, dialect::HiveDialect}; +use crate::{ + ast::{DollarQuotedString, QuoteDelimitedString}, + dialect::HiveDialect, +}; /// SQL Token enumeration #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] @@ -98,6 +101,12 @@ pub enum Token { TripleDoubleQuotedRawStringLiteral(String), /// "National" string literal: i.e: N'string' NationalStringLiteral(String), + /// Quote delimited literal. Examples `Q'{ab'c}'`, `Q'|ab'c|'`, `Q'|ab|c|'` + /// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Literals.html#GUID-1824CBAA-6E16-4921-B2A6-112FB02248DA) + QuoteDelimitedStringLiteral(QuoteDelimitedString), + /// "Nationa" quote delimited literal. Examples `NQ'{ab'c}'`, `NQ'|ab'c|'`, `NQ'|ab|c|'` + /// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Literals.html#GUID-1824CBAA-6E16-4921-B2A6-112FB02248DA) + NationalQuoteDelimitedStringLiteral(QuoteDelimitedString), /// "escaped" string literal, which are an extension to the SQL standard: i.e: e'first \n second' or E 'first \n second' EscapedStringLiteral(String), /// Unicode string literal: i.e: U&'first \000A second' @@ -292,6 +301,8 @@ impl fmt::Display for Token { Token::TripleDoubleQuotedString(ref s) => write!(f, "\"\"\"{s}\"\"\""), Token::DollarQuotedString(ref s) => write!(f, "{s}"), Token::NationalStringLiteral(ref s) => write!(f, "N'{s}'"), + Token::QuoteDelimitedStringLiteral(ref s) => s.fmt(f), + Token::NationalQuoteDelimitedStringLiteral(ref s) => write!(f, "N{s}"), Token::EscapedStringLiteral(ref s) => write!(f, "E'{s}'"), Token::UnicodeStringLiteral(ref s) => write!(f, "U&'{s}'"), Token::HexStringLiteral(ref s) => write!(f, "X'{s}'"), @@ -1032,6 +1043,18 @@ impl<'a> Tokenizer<'a> { self.tokenize_single_quoted_string(chars, '\'', backslash_escape)?; Ok(Some(Token::NationalStringLiteral(s))) } + Some(&q @ 'q') | Some(&q @ 'Q') + if self.dialect.supports_quote_delimited_string() => + { + chars.next(); // consume and check the next char + if let Some('\'') = chars.peek() { + self.tokenize_quote_delimited_string(chars, &[n, q]) + .map(|s| Some(Token::NationalQuoteDelimitedStringLiteral(s))) + } else { + let s = self.tokenize_word(String::from_iter([n, q]), chars); + Ok(Some(Token::make_word(&s, None))) + } + } _ => { // regular identifier starting with an "N" let s = self.tokenize_word(n, chars); @@ -1039,6 +1062,16 @@ impl<'a> Tokenizer<'a> { } } } + q @ 'Q' | q @ 'q' if self.dialect.supports_quote_delimited_string() => { + chars.next(); // consume and check the next char + if let Some('\'') = chars.peek() { + self.tokenize_quote_delimited_string(chars, &[q]) + .map(|s| Some(Token::QuoteDelimitedStringLiteral(s))) + } else { + let s = self.tokenize_word(q, chars); + Ok(Some(Token::make_word(&s, None))) + } + } // PostgreSQL accepts "escape" string constants, which are an extension to the SQL standard. x @ 'e' | x @ 'E' if self.dialect.supports_string_escape_constant() => { let starting_loc = chars.location(); @@ -1994,6 +2027,61 @@ impl<'a> Tokenizer<'a> { ) } + /// Reads a quote delimited string expecting `chars.next()` to deliver a quote. + /// + /// See + fn tokenize_quote_delimited_string( + &self, + chars: &mut State, + // the prefix that introduced the possible literal or word, + // e.g. "Q" or "nq" + literal_prefix: &[char], + ) -> Result { + let literal_start_loc = chars.location(); + chars.next(); + + let start_quote_loc = chars.location(); + let (start_quote, end_quote) = match chars.next() { + None | Some(' ') | Some('\t') | Some('\r') | Some('\n') => { + return self.tokenizer_error( + start_quote_loc, + format!( + "Invalid space, tab, newline, or EOF after '{}''", + String::from_iter(literal_prefix) + ), + ); + } + Some(c) => ( + c, + match c { + '[' => ']', + '{' => '}', + '<' => '>', + '(' => ')', + c => c, + }, + ), + }; + + // read the string literal until the "quote character" following a by literal quote + let mut value = String::new(); + while let Some(ch) = chars.next() { + if ch == end_quote { + if let Some('\'') = chars.peek() { + chars.next(); // ~ consume the quote + return Ok(QuoteDelimitedString { + start_quote, + value, + end_quote, + }); + } + } + value.push(ch); + } + + self.tokenizer_error(literal_start_loc, "Unterminated string literal") + } + /// Read a quoted string. fn tokenize_quoted_string( &self, diff --git a/tests/sqlparser_oracle.rs b/tests/sqlparser_oracle.rs index 09fd41912d..683660369a 100644 --- a/tests/sqlparser_oracle.rs +++ b/tests/sqlparser_oracle.rs @@ -21,11 +21,12 @@ use pretty_assertions::assert_eq; use sqlparser::{ - ast::{BinaryOperator, Expr, Value, ValueWithSpan}, + ast::{BinaryOperator, Expr, Ident, QuoteDelimitedString, Value, ValueWithSpan}, dialect::OracleDialect, + parser::ParserError, tokenizer::Span, }; -use test_utils::{expr_from_projection, number, TestedDialects}; +use test_utils::{all_dialects_where, expr_from_projection, number, TestedDialects}; mod test_utils; @@ -33,6 +34,19 @@ fn oracle() -> TestedDialects { TestedDialects::new(vec![Box::new(OracleDialect)]) } +/// Convenience constructor for [QuoteDelimitedstring]. +fn quote_delimited_string( + start_quote: char, + value: &'static str, + end_quote: char, +) -> QuoteDelimitedString { + QuoteDelimitedString { + start_quote, + value: value.into(), + end_quote, + } +} + /// Oracle: `||` has a lower precedence than `*` and `/` #[test] fn muldiv_have_higher_precedence_than_strconcat() { @@ -103,3 +117,219 @@ fn plusminus_have_same_precedence_as_strconcat() { } ); } + +#[test] +fn parse_quote_delimited_string() { + let dialect = all_dialects_where(|d| d.supports_quote_delimited_string()); + let sql = "SELECT Q'.abc.', \ + Q'Xab'cX', \ + Q'|abc'''|', \ + Q'{abc}d}', \ + Q'[]abc[]', \ + Q'', \ + Q'<<', \ + Q'('abc'('abc)', \ + Q'(abc'def))', \ + Q'(abc'def)))' \ + FROM dual"; + let select = dialect.verified_only_select(sql); + assert_eq!(10, select.projection.len()); + assert_eq!( + &Expr::Value( + Value::QuoteDelimitedStringLiteral(quote_delimited_string('.', "abc", '.')) + .with_empty_span() + ), + expr_from_projection(&select.projection[0]) + ); + assert_eq!( + &Expr::Value( + (Value::QuoteDelimitedStringLiteral(quote_delimited_string('X', "ab'c", 'X'))) + .with_empty_span() + ), + expr_from_projection(&select.projection[1]) + ); + assert_eq!( + &Expr::Value( + (Value::QuoteDelimitedStringLiteral(quote_delimited_string('|', "abc'''", '|'))) + .with_empty_span() + ), + expr_from_projection(&select.projection[2]) + ); + assert_eq!( + &Expr::Value( + (Value::QuoteDelimitedStringLiteral(quote_delimited_string('{', "abc}d", '}'))) + .with_empty_span() + ), + expr_from_projection(&select.projection[3]) + ); + assert_eq!( + &Expr::Value( + (Value::QuoteDelimitedStringLiteral(quote_delimited_string('[', "]abc[", ']'))) + .with_empty_span() + ), + expr_from_projection(&select.projection[4]) + ); + assert_eq!( + &Expr::Value( + (Value::QuoteDelimitedStringLiteral(quote_delimited_string('<', "a'bc", '>'))) + .with_empty_span() + ), + expr_from_projection(&select.projection[5]) + ); + assert_eq!( + &Expr::Value( + (Value::QuoteDelimitedStringLiteral(quote_delimited_string('<', "<'))) + .with_empty_span() + ), + expr_from_projection(&select.projection[6]) + ); + assert_eq!( + &Expr::Value( + (Value::QuoteDelimitedStringLiteral(quote_delimited_string('(', "'abc'('abc", ')'))) + .with_empty_span() + ), + expr_from_projection(&select.projection[7]) + ); + assert_eq!( + &Expr::Value( + (Value::QuoteDelimitedStringLiteral(quote_delimited_string('(', "abc'def)", ')'))) + .with_empty_span() + ), + expr_from_projection(&select.projection[8]) + ); + assert_eq!( + &Expr::Value( + (Value::QuoteDelimitedStringLiteral(quote_delimited_string('(', "abc'def))", ')'))) + .with_empty_span() + ), + expr_from_projection(&select.projection[9]) + ); +} + +#[test] +fn parse_invalid_quote_delimited_strings() { + let dialect = all_dialects_where(|d| d.supports_quote_delimited_string()); + // ~ invalid quote delimiter + for q in [' ', '\t', '\r', '\n'] { + assert_eq!( + dialect.parse_sql_statements(&format!("SELECT Q'{q}abc{q}' FROM dual")), + Err(ParserError::TokenizerError( + "Invalid space, tab, newline, or EOF after 'Q'' at Line: 1, Column: 10".into() + )), + "with quote char {q:?}" + ); + } + // ~ invalid eof after quote + assert_eq!( + dialect.parse_sql_statements("SELECT Q'"), + Err(ParserError::TokenizerError( + "Invalid space, tab, newline, or EOF after 'Q'' at Line: 1, Column: 10".into() + )), + "with EOF quote char" + ); + // ~ unterminated string + assert_eq!( + dialect.parse_sql_statements("SELECT Q'|asdfa...."), + Err(ParserError::TokenizerError( + "Unterminated string literal at Line: 1, Column: 9".into() + )), + "with EOF quote char" + ); +} + +#[test] +fn parse_quote_delimited_string_lowercase() { + let dialect = all_dialects_where(|d| d.supports_quote_delimited_string()); + let sql = "select q'!a'b'c!d!' from dual"; + let select = dialect.verified_only_select_with_canonical(sql, "SELECT Q'!a'b'c!d!' FROM dual"); + assert_eq!(1, select.projection.len()); + assert_eq!( + &Expr::Value( + Value::QuoteDelimitedStringLiteral(quote_delimited_string('!', "a'b'c!d", '!')) + .with_empty_span() + ), + expr_from_projection(&select.projection[0]) + ); +} + +#[test] +fn parse_quote_delimited_string_but_is_a_word() { + let dialect = all_dialects_where(|d| d.supports_quote_delimited_string()); + let sql = "SELECT q, quux, q.abc FROM dual q"; + let select = dialect.verified_only_select(sql); + assert_eq!(3, select.projection.len()); + assert_eq!( + &Expr::Identifier(Ident::with_span(Span::empty(), "q")), + expr_from_projection(&select.projection[0]) + ); + assert_eq!( + &Expr::Identifier(Ident::with_span(Span::empty(), "quux")), + expr_from_projection(&select.projection[1]) + ); + assert_eq!( + &Expr::CompoundIdentifier(vec![ + Ident::with_span(Span::empty(), "q"), + Ident::with_span(Span::empty(), "abc") + ]), + expr_from_projection(&select.projection[2]) + ); +} + +#[test] +fn parse_national_quote_delimited_string() { + let dialect = all_dialects_where(|d| d.supports_quote_delimited_string()); + let sql = "SELECT NQ'.abc.' FROM dual"; + let select = dialect.verified_only_select(sql); + assert_eq!(1, select.projection.len()); + assert_eq!( + &Expr::Value( + Value::NationalQuoteDelimitedStringLiteral(quote_delimited_string('.', "abc", '.')) + .with_empty_span() + ), + expr_from_projection(&select.projection[0]) + ); +} + +#[test] +fn parse_national_quote_delimited_string_lowercase() { + let dialect = all_dialects_where(|d| d.supports_quote_delimited_string()); + for prefix in ["nq", "Nq", "nQ", "NQ"] { + let select = dialect.verified_only_select_with_canonical( + &format!("select {prefix}'!a'b'c!d!' from dual"), + "SELECT NQ'!a'b'c!d!' FROM dual", + ); + assert_eq!(1, select.projection.len()); + assert_eq!( + &Expr::Value( + Value::NationalQuoteDelimitedStringLiteral(quote_delimited_string( + '!', "a'b'c!d", '!' + )) + .with_empty_span() + ), + expr_from_projection(&select.projection[0]) + ); + } +} + +#[test] +fn parse_national_quote_delimited_string_but_is_a_word() { + let dialect = all_dialects_where(|d| d.supports_quote_delimited_string()); + let sql = "SELECT nq, nqoo, nq.abc FROM dual q"; + let select = dialect.verified_only_select(sql); + assert_eq!(3, select.projection.len()); + assert_eq!( + &Expr::Identifier(Ident::with_span(Span::empty(), "nq")), + expr_from_projection(&select.projection[0]) + ); + assert_eq!( + &Expr::Identifier(Ident::with_span(Span::empty(), "nqoo")), + expr_from_projection(&select.projection[1]) + ); + assert_eq!( + &Expr::CompoundIdentifier(vec![ + Ident::with_span(Span::empty(), "nq"), + Ident::with_span(Span::empty(), "abc") + ]), + expr_from_projection(&select.projection[2]) + ); +} From d78dbc97a1f65ce7ee353fea181a6c3bb15a5050 Mon Sep 17 00:00:00 2001 From: Luca Cappelletti Date: Thu, 18 Dec 2025 05:34:48 +0100 Subject: [PATCH 012/121] Added support for `ALTER OPERATOR FAMILY` syntax (#2125) --- src/ast/ddl.rs | 198 +++++++++++++++++- src/ast/mod.rs | 30 ++- src/ast/spans.rs | 1 + src/parser/mod.rs | 176 +++++++++++++++- tests/sqlparser_postgres.rs | 398 +++++++++++++++++++++++++++++++++++- 5 files changed, 778 insertions(+), 25 deletions(-) diff --git a/src/ast/ddl.rs b/src/ast/ddl.rs index d0aed448e7..4e042a365e 100644 --- a/src/ast/ddl.rs +++ b/src/ast/ddl.rs @@ -4198,25 +4198,25 @@ impl fmt::Display for OperatorArgTypes { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum OperatorClassItem { - /// OPERATOR clause + /// `OPERATOR` clause Operator { - strategy_number: u32, + strategy_number: u64, operator_name: ObjectName, /// Optional operator argument types op_types: Option, - /// FOR SEARCH or FOR ORDER BY + /// `FOR SEARCH` or `FOR ORDER BY` purpose: Option, }, - /// FUNCTION clause + /// `FUNCTION` clause Function { - support_number: u32, + support_number: u64, /// Optional function argument types for the operator class op_types: Option>, function_name: ObjectName, /// Function argument types argument_types: Vec, }, - /// STORAGE clause + /// `STORAGE` clause Storage { storage_type: DataType }, } @@ -4413,3 +4413,189 @@ impl Spanned for DropOperatorClass { Span::empty() } } + +/// An item in an ALTER OPERATOR FAMILY ADD statement +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum OperatorFamilyItem { + /// `OPERATOR` clause + Operator { + strategy_number: u64, + operator_name: ObjectName, + /// Operator argument types + op_types: Vec, + /// `FOR SEARCH` or `FOR ORDER BY` + purpose: Option, + }, + /// `FUNCTION` clause + Function { + support_number: u64, + /// Optional operator argument types for the function + op_types: Option>, + function_name: ObjectName, + /// Function argument types + argument_types: Vec, + }, +} + +/// An item in an ALTER OPERATOR FAMILY DROP statement +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum OperatorFamilyDropItem { + /// `OPERATOR` clause + Operator { + strategy_number: u64, + /// Operator argument types + op_types: Vec, + }, + /// `FUNCTION` clause + Function { + support_number: u64, + /// Operator argument types for the function + op_types: Vec, + }, +} + +impl fmt::Display for OperatorFamilyItem { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + OperatorFamilyItem::Operator { + strategy_number, + operator_name, + op_types, + purpose, + } => { + write!( + f, + "OPERATOR {strategy_number} {operator_name} ({})", + display_comma_separated(op_types) + )?; + if let Some(purpose) = purpose { + write!(f, " {purpose}")?; + } + Ok(()) + } + OperatorFamilyItem::Function { + support_number, + op_types, + function_name, + argument_types, + } => { + write!(f, "FUNCTION {support_number}")?; + if let Some(types) = op_types { + write!(f, " ({})", display_comma_separated(types))?; + } + write!(f, " {function_name}")?; + if !argument_types.is_empty() { + write!(f, "({})", display_comma_separated(argument_types))?; + } + Ok(()) + } + } + } +} + +impl fmt::Display for OperatorFamilyDropItem { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + OperatorFamilyDropItem::Operator { + strategy_number, + op_types, + } => { + write!( + f, + "OPERATOR {strategy_number} ({})", + display_comma_separated(op_types) + ) + } + OperatorFamilyDropItem::Function { + support_number, + op_types, + } => { + write!( + f, + "FUNCTION {support_number} ({})", + display_comma_separated(op_types) + ) + } + } + } +} + +/// `ALTER OPERATOR FAMILY` statement +/// See +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct AlterOperatorFamily { + /// Operator family name (can be schema-qualified) + pub name: ObjectName, + /// Index method (btree, hash, gist, gin, etc.) + pub using: Ident, + /// The operation to perform + pub operation: AlterOperatorFamilyOperation, +} + +/// An [AlterOperatorFamily] operation +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum AlterOperatorFamilyOperation { + /// `ADD { OPERATOR ... | FUNCTION ... } [, ...]` + Add { + /// List of operator family items to add + items: Vec, + }, + /// `DROP { OPERATOR ... | FUNCTION ... } [, ...]` + Drop { + /// List of operator family items to drop + items: Vec, + }, + /// `RENAME TO new_name` + RenameTo { new_name: ObjectName }, + /// `OWNER TO { new_owner | CURRENT_ROLE | CURRENT_USER | SESSION_USER }` + OwnerTo(Owner), + /// `SET SCHEMA new_schema` + SetSchema { schema_name: ObjectName }, +} + +impl fmt::Display for AlterOperatorFamily { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "ALTER OPERATOR FAMILY {} USING {}", + self.name, self.using + )?; + write!(f, " {}", self.operation) + } +} + +impl fmt::Display for AlterOperatorFamilyOperation { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + AlterOperatorFamilyOperation::Add { items } => { + write!(f, "ADD {}", display_comma_separated(items)) + } + AlterOperatorFamilyOperation::Drop { items } => { + write!(f, "DROP {}", display_comma_separated(items)) + } + AlterOperatorFamilyOperation::RenameTo { new_name } => { + write!(f, "RENAME TO {new_name}") + } + AlterOperatorFamilyOperation::OwnerTo(owner) => { + write!(f, "OWNER TO {owner}") + } + AlterOperatorFamilyOperation::SetSchema { schema_name } => { + write!(f, "SET SCHEMA {schema_name}") + } + } + } +} + +impl Spanned for AlterOperatorFamily { + fn span(&self) -> Span { + Span::empty() + } +} diff --git a/src/ast/mod.rs b/src/ast/mod.rs index f1e79b0d26..4676786027 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -60,22 +60,24 @@ pub use self::dcl::{ }; pub use self::ddl::{ Alignment, AlterColumnOperation, AlterConnectorOwner, AlterIndexOperation, AlterOperator, - AlterOperatorOperation, AlterPolicyOperation, AlterSchema, AlterSchemaOperation, AlterTable, - AlterTableAlgorithm, AlterTableLock, AlterTableOperation, AlterTableType, AlterType, - AlterTypeAddValue, AlterTypeAddValuePosition, AlterTypeOperation, AlterTypeRename, - AlterTypeRenameValue, ClusteredBy, ColumnDef, ColumnOption, ColumnOptionDef, ColumnOptions, - ColumnPolicy, ColumnPolicyProperty, ConstraintCharacteristics, CreateConnector, CreateDomain, + AlterOperatorFamily, AlterOperatorFamilyOperation, AlterOperatorOperation, + AlterPolicyOperation, AlterSchema, AlterSchemaOperation, AlterTable, AlterTableAlgorithm, + AlterTableLock, AlterTableOperation, AlterTableType, AlterType, AlterTypeAddValue, + AlterTypeAddValuePosition, AlterTypeOperation, AlterTypeRename, AlterTypeRenameValue, + ClusteredBy, ColumnDef, ColumnOption, ColumnOptionDef, ColumnOptions, ColumnPolicy, + ColumnPolicyProperty, ConstraintCharacteristics, CreateConnector, CreateDomain, CreateExtension, CreateFunction, CreateIndex, CreateOperator, CreateOperatorClass, CreateOperatorFamily, CreateTable, CreateTrigger, CreateView, Deduplicate, DeferrableInitial, DropBehavior, DropExtension, DropFunction, DropOperator, DropOperatorClass, DropOperatorFamily, DropOperatorSignature, DropTrigger, GeneratedAs, GeneratedExpressionMode, IdentityParameters, IdentityProperty, IdentityPropertyFormatKind, IdentityPropertyKind, IdentityPropertyOrder, IndexColumn, IndexOption, IndexType, KeyOrIndexDisplay, Msck, NullsDistinctOption, - OperatorArgTypes, OperatorClassItem, OperatorOption, OperatorPurpose, Owner, Partition, - ProcedureParam, ReferentialAction, RenameTableNameKind, ReplicaIdentity, TagsColumnOption, - TriggerObjectKind, Truncate, UserDefinedTypeCompositeAttributeDef, - UserDefinedTypeInternalLength, UserDefinedTypeRangeOption, UserDefinedTypeRepresentation, - UserDefinedTypeSqlDefinitionOption, UserDefinedTypeStorage, ViewColumnDef, + OperatorArgTypes, OperatorClassItem, OperatorFamilyDropItem, OperatorFamilyItem, + OperatorOption, OperatorPurpose, Owner, Partition, ProcedureParam, ReferentialAction, + RenameTableNameKind, ReplicaIdentity, TagsColumnOption, TriggerObjectKind, Truncate, + UserDefinedTypeCompositeAttributeDef, UserDefinedTypeInternalLength, + UserDefinedTypeRangeOption, UserDefinedTypeRepresentation, UserDefinedTypeSqlDefinitionOption, + UserDefinedTypeStorage, ViewColumnDef, }; pub use self::dml::{ Delete, Insert, Merge, MergeAction, MergeClause, MergeClauseKind, MergeInsertExpr, @@ -3411,6 +3413,11 @@ pub enum Statement { /// See [PostgreSQL](https://www.postgresql.org/docs/current/sql-alteroperator.html) AlterOperator(AlterOperator), /// ```sql + /// ALTER OPERATOR FAMILY + /// ``` + /// See [PostgreSQL](https://www.postgresql.org/docs/current/sql-alteropfamily.html) + AlterOperatorFamily(AlterOperatorFamily), + /// ```sql /// ALTER ROLE /// ``` AlterRole { @@ -4972,6 +4979,9 @@ impl fmt::Display for Statement { write!(f, "ALTER TYPE {name} {operation}") } Statement::AlterOperator(alter_operator) => write!(f, "{alter_operator}"), + Statement::AlterOperatorFamily(alter_operator_family) => { + write!(f, "{alter_operator_family}") + } Statement::AlterRole { name, operation } => { write!(f, "ALTER ROLE {name} {operation}") } diff --git a/src/ast/spans.rs b/src/ast/spans.rs index 2ec797db47..d4e843157a 100644 --- a/src/ast/spans.rs +++ b/src/ast/spans.rs @@ -403,6 +403,7 @@ impl Spanned for Statement { // These statements need to be implemented Statement::AlterType { .. } => Span::empty(), Statement::AlterOperator { .. } => Span::empty(), + Statement::AlterOperatorFamily { .. } => Span::empty(), Statement::AlterRole { .. } => Span::empty(), Statement::AlterSession { .. } => Span::empty(), Statement::AttachDatabase { .. } => Span::empty(), diff --git a/src/parser/mod.rs b/src/parser/mod.rs index ade3c250f0..74b06ec8d0 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -6701,7 +6701,7 @@ impl<'a> Parser<'a> { let mut items = vec![]; loop { if self.parse_keyword(Keyword::OPERATOR) { - let strategy_number = self.parse_literal_uint()? as u32; + let strategy_number = self.parse_literal_uint()?; let operator_name = self.parse_operator_name()?; // Optional operator argument types @@ -6736,7 +6736,7 @@ impl<'a> Parser<'a> { purpose, }); } else if self.parse_keyword(Keyword::FUNCTION) { - let support_number = self.parse_literal_uint()? as u32; + let support_number = self.parse_literal_uint()?; // Optional operator types let op_types = @@ -9898,7 +9898,13 @@ impl<'a> Parser<'a> { operation, }) } - Keyword::OPERATOR => self.parse_alter_operator(), + Keyword::OPERATOR => { + if self.parse_keyword(Keyword::FAMILY) { + self.parse_alter_operator_family() + } else { + self.parse_alter_operator() + } + } Keyword::ROLE => self.parse_alter_role(), Keyword::POLICY => self.parse_alter_policy(), Keyword::CONNECTOR => self.parse_alter_connector(), @@ -10130,6 +10136,170 @@ impl<'a> Parser<'a> { })) } + /// Parse an operator item for ALTER OPERATOR FAMILY ADD operations + fn parse_operator_family_add_operator(&mut self) -> Result { + let strategy_number = self.parse_literal_uint()?; + let operator_name = self.parse_operator_name()?; + + // Operator argument types (required for ALTER OPERATOR FAMILY) + self.expect_token(&Token::LParen)?; + let op_types = self.parse_comma_separated(Parser::parse_data_type)?; + self.expect_token(&Token::RParen)?; + + // Optional purpose + let purpose = if self.parse_keyword(Keyword::FOR) { + if self.parse_keyword(Keyword::SEARCH) { + Some(OperatorPurpose::ForSearch) + } else if self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) { + let sort_family = self.parse_object_name(false)?; + Some(OperatorPurpose::ForOrderBy { sort_family }) + } else { + return self.expected("SEARCH or ORDER BY after FOR", self.peek_token()); + } + } else { + None + }; + + Ok(OperatorFamilyItem::Operator { + strategy_number, + operator_name, + op_types, + purpose, + }) + } + + /// Parse a function item for ALTER OPERATOR FAMILY ADD operations + fn parse_operator_family_add_function(&mut self) -> Result { + let support_number = self.parse_literal_uint()?; + + // Optional operator types + let op_types = if self.consume_token(&Token::LParen) && self.peek_token() != Token::RParen { + let types = self.parse_comma_separated(Parser::parse_data_type)?; + self.expect_token(&Token::RParen)?; + Some(types) + } else if self.consume_token(&Token::LParen) { + self.expect_token(&Token::RParen)?; + Some(vec![]) + } else { + None + }; + + let function_name = self.parse_object_name(false)?; + + // Function argument types + let argument_types = if self.consume_token(&Token::LParen) { + if self.peek_token() == Token::RParen { + self.expect_token(&Token::RParen)?; + vec![] + } else { + let types = self.parse_comma_separated(Parser::parse_data_type)?; + self.expect_token(&Token::RParen)?; + types + } + } else { + vec![] + }; + + Ok(OperatorFamilyItem::Function { + support_number, + op_types, + function_name, + argument_types, + }) + } + + /// Parse an operator item for ALTER OPERATOR FAMILY DROP operations + fn parse_operator_family_drop_operator( + &mut self, + ) -> Result { + let strategy_number = self.parse_literal_uint()?; + + // Operator argument types (required for DROP) + self.expect_token(&Token::LParen)?; + let op_types = self.parse_comma_separated(Parser::parse_data_type)?; + self.expect_token(&Token::RParen)?; + + Ok(OperatorFamilyDropItem::Operator { + strategy_number, + op_types, + }) + } + + /// Parse a function item for ALTER OPERATOR FAMILY DROP operations + fn parse_operator_family_drop_function( + &mut self, + ) -> Result { + let support_number = self.parse_literal_uint()?; + + // Operator types (required for DROP) + self.expect_token(&Token::LParen)?; + let op_types = self.parse_comma_separated(Parser::parse_data_type)?; + self.expect_token(&Token::RParen)?; + + Ok(OperatorFamilyDropItem::Function { + support_number, + op_types, + }) + } + + /// Parse an operator family item for ADD operations (dispatches to operator or function parsing) + fn parse_operator_family_add_item(&mut self) -> Result { + if self.parse_keyword(Keyword::OPERATOR) { + self.parse_operator_family_add_operator() + } else if self.parse_keyword(Keyword::FUNCTION) { + self.parse_operator_family_add_function() + } else { + self.expected("OPERATOR or FUNCTION", self.peek_token()) + } + } + + /// Parse an operator family item for DROP operations (dispatches to operator or function parsing) + fn parse_operator_family_drop_item(&mut self) -> Result { + if self.parse_keyword(Keyword::OPERATOR) { + self.parse_operator_family_drop_operator() + } else if self.parse_keyword(Keyword::FUNCTION) { + self.parse_operator_family_drop_function() + } else { + self.expected("OPERATOR or FUNCTION", self.peek_token()) + } + } + + /// Parse a [Statement::AlterOperatorFamily] + /// See + pub fn parse_alter_operator_family(&mut self) -> Result { + let name = self.parse_object_name(false)?; + self.expect_keyword(Keyword::USING)?; + let using = self.parse_identifier()?; + + let operation = if self.parse_keyword(Keyword::ADD) { + let items = self.parse_comma_separated(Parser::parse_operator_family_add_item)?; + AlterOperatorFamilyOperation::Add { items } + } else if self.parse_keyword(Keyword::DROP) { + let items = self.parse_comma_separated(Parser::parse_operator_family_drop_item)?; + AlterOperatorFamilyOperation::Drop { items } + } else if self.parse_keywords(&[Keyword::RENAME, Keyword::TO]) { + let new_name = self.parse_object_name(false)?; + AlterOperatorFamilyOperation::RenameTo { new_name } + } else if self.parse_keywords(&[Keyword::OWNER, Keyword::TO]) { + let owner = self.parse_owner()?; + AlterOperatorFamilyOperation::OwnerTo(owner) + } else if self.parse_keywords(&[Keyword::SET, Keyword::SCHEMA]) { + let schema_name = self.parse_object_name(false)?; + AlterOperatorFamilyOperation::SetSchema { schema_name } + } else { + return self.expected_ref( + "ADD, DROP, RENAME TO, OWNER TO, or SET SCHEMA after ALTER OPERATOR FAMILY", + self.peek_token_ref(), + ); + }; + + Ok(Statement::AlterOperatorFamily(AlterOperatorFamily { + name, + using, + operation, + })) + } + // Parse a [Statement::AlterSchema] // ALTER SCHEMA [ IF EXISTS ] schema_name pub fn parse_alter_schema(&mut self) -> Result { diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index d595a0a26e..9f4564ef2c 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -23,15 +23,11 @@ mod test_utils; use helpers::attached_token::AttachedToken; -use sqlparser::ast::{ - DataType, DropBehavior, DropOperator, DropOperatorClass, DropOperatorSignature, -}; -use sqlparser::tokenizer::Span; -use test_utils::*; - use sqlparser::ast::*; use sqlparser::dialect::{GenericDialect, PostgreSqlDialect}; use sqlparser::parser::ParserError; +use sqlparser::tokenizer::Span; +use test_utils::*; #[test] fn parse_create_table_generated_always_as_identity() { @@ -7145,6 +7141,396 @@ fn parse_alter_operator() { ); } +#[test] +fn parse_alter_operator_family() { + // Test ALTER OPERATOR FAMILY ... ADD OPERATOR + let sql = "ALTER OPERATOR FAMILY integer_ops USING btree ADD OPERATOR 1 < (INT4, INT2)"; + assert_eq!( + pg_and_generic().verified_stmt(sql), + Statement::AlterOperatorFamily(AlterOperatorFamily { + name: ObjectName::from(vec![Ident::new("integer_ops")]), + using: Ident::new("btree"), + operation: AlterOperatorFamilyOperation::Add { + items: vec![OperatorFamilyItem::Operator { + strategy_number: 1, + operator_name: ObjectName::from(vec![Ident::new("<")]), + op_types: vec![DataType::Int4(None), DataType::Int2(None)], + purpose: None, + }], + }, + }) + ); + + // Test ALTER OPERATOR FAMILY ... ADD OPERATOR with FOR SEARCH + let sql = + "ALTER OPERATOR FAMILY text_ops USING btree ADD OPERATOR 1 @@ (TEXT, TEXT) FOR SEARCH"; + assert_eq!( + pg_and_generic().verified_stmt(sql), + Statement::AlterOperatorFamily(AlterOperatorFamily { + name: ObjectName::from(vec![Ident::new("text_ops")]), + using: Ident::new("btree"), + operation: AlterOperatorFamilyOperation::Add { + items: vec![OperatorFamilyItem::Operator { + strategy_number: 1, + operator_name: ObjectName::from(vec![Ident::new("@@")]), + op_types: vec![DataType::Text, DataType::Text], + purpose: Some(OperatorPurpose::ForSearch), + }], + }, + }) + ); + + // Test ALTER OPERATOR FAMILY ... ADD FUNCTION + let sql = "ALTER OPERATOR FAMILY integer_ops USING btree ADD FUNCTION 1 btint42cmp(INT4, INT2)"; + assert_eq!( + pg_and_generic().verified_stmt(sql), + Statement::AlterOperatorFamily(AlterOperatorFamily { + name: ObjectName::from(vec![Ident::new("integer_ops")]), + using: Ident::new("btree"), + operation: AlterOperatorFamilyOperation::Add { + items: vec![OperatorFamilyItem::Function { + support_number: 1, + op_types: None, + function_name: ObjectName::from(vec![Ident::new("btint42cmp")]), + argument_types: vec![DataType::Int4(None), DataType::Int2(None)], + }], + }, + }) + ); + + // Test ALTER OPERATOR FAMILY ... DROP OPERATOR + let sql = "ALTER OPERATOR FAMILY integer_ops USING btree DROP OPERATOR 1 (INT4, INT2)"; + assert_eq!( + pg_and_generic().verified_stmt(sql), + Statement::AlterOperatorFamily(AlterOperatorFamily { + name: ObjectName::from(vec![Ident::new("integer_ops")]), + using: Ident::new("btree"), + operation: AlterOperatorFamilyOperation::Drop { + items: vec![OperatorFamilyDropItem::Operator { + strategy_number: 1, + op_types: vec![DataType::Int4(None), DataType::Int2(None)], + }], + }, + }) + ); + + // Test ALTER OPERATOR FAMILY ... DROP FUNCTION + let sql = "ALTER OPERATOR FAMILY integer_ops USING btree DROP FUNCTION 1 (INT4, INT2)"; + assert_eq!( + pg_and_generic().verified_stmt(sql), + Statement::AlterOperatorFamily(AlterOperatorFamily { + name: ObjectName::from(vec![Ident::new("integer_ops")]), + using: Ident::new("btree"), + operation: AlterOperatorFamilyOperation::Drop { + items: vec![OperatorFamilyDropItem::Function { + support_number: 1, + op_types: vec![DataType::Int4(None), DataType::Int2(None)], + }], + }, + }) + ); + + // Test ALTER OPERATOR FAMILY ... RENAME TO + let sql = "ALTER OPERATOR FAMILY old_ops USING btree RENAME TO new_ops"; + assert_eq!( + pg_and_generic().verified_stmt(sql), + Statement::AlterOperatorFamily(AlterOperatorFamily { + name: ObjectName::from(vec![Ident::new("old_ops")]), + using: Ident::new("btree"), + operation: AlterOperatorFamilyOperation::RenameTo { + new_name: ObjectName::from(vec![Ident::new("new_ops")]), + }, + }) + ); + + // Test ALTER OPERATOR FAMILY ... OWNER TO + let sql = "ALTER OPERATOR FAMILY my_ops USING btree OWNER TO joe"; + assert_eq!( + pg_and_generic().verified_stmt(sql), + Statement::AlterOperatorFamily(AlterOperatorFamily { + name: ObjectName::from(vec![Ident::new("my_ops")]), + using: Ident::new("btree"), + operation: AlterOperatorFamilyOperation::OwnerTo(Owner::Ident(Ident::new("joe"))), + }) + ); + + // Test ALTER OPERATOR FAMILY ... SET SCHEMA + let sql = "ALTER OPERATOR FAMILY my_ops USING btree SET SCHEMA new_schema"; + assert_eq!( + pg_and_generic().verified_stmt(sql), + Statement::AlterOperatorFamily(AlterOperatorFamily { + name: ObjectName::from(vec![Ident::new("my_ops")]), + using: Ident::new("btree"), + operation: AlterOperatorFamilyOperation::SetSchema { + schema_name: ObjectName::from(vec![Ident::new("new_schema")]), + }, + }) + ); + + // Test error cases + // Missing USING clause + assert!(pg() + .parse_sql_statements("ALTER OPERATOR FAMILY my_ops ADD OPERATOR 1 < (INT4, INT2)") + .is_err()); + + // Invalid operation + assert!(pg() + .parse_sql_statements("ALTER OPERATOR FAMILY my_ops USING btree INVALID_OPERATION") + .is_err()); + + // Missing operator name in ADD OPERATOR + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree ADD OPERATOR 1 (INT4, INT2)" + ) + .is_err()); + + // Missing function name in ADD FUNCTION + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree ADD FUNCTION 1 (INT4, INT2)" + ) + .is_err()); + + // Missing parentheses in DROP OPERATOR + assert!(pg() + .parse_sql_statements("ALTER OPERATOR FAMILY my_ops USING btree DROP OPERATOR 1 INT4, INT2") + .is_err()); + + // Invalid operator name (empty) + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree ADD OPERATOR 1 (INT4, INT2)" + ) + .is_err()); + + // Invalid operator name (special characters) + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree ADD OPERATOR 1 @#$ (INT4, INT2)" + ) + .is_err()); + + // Negative strategy number + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree ADD OPERATOR -1 < (INT4, INT2)" + ) + .is_err()); + + // Non-integer strategy number + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree ADD OPERATOR 1.5 < (INT4, INT2)" + ) + .is_err()); + + // Missing closing parenthesis in operator types + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree ADD OPERATOR 1 < (INT4, INT2" + ) + .is_err()); + + // Missing opening parenthesis in operator types + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree ADD OPERATOR 1 < INT4, INT2)" + ) + .is_err()); + + // Empty operator types + assert!(pg() + .parse_sql_statements("ALTER OPERATOR FAMILY my_ops USING btree ADD OPERATOR 1 < ()") + .is_err()); + + // Invalid data type (using punctuation) + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree ADD OPERATOR 1 < (@#$%, INT2)" + ) + .is_err()); + + // Incomplete FOR clause + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree ADD OPERATOR 1 < (INT4, INT2) FOR" + ) + .is_err()); + + // Invalid FOR clause keyword + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree ADD OPERATOR 1 < (INT4, INT2) FOR INVALID" + ) + .is_err()); + + // FOR ORDER BY without sort family + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree ADD OPERATOR 1 < (INT4, INT2) FOR ORDER BY" + ) + .is_err()); + + // Missing function name in ADD FUNCTION + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree ADD FUNCTION 1 (INT4, INT2)" + ) + .is_err()); + + // Invalid function name + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree ADD FUNCTION 1 123invalid(INT4, INT2)" + ) + .is_err()); + + // Negative support number + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree ADD FUNCTION -1 func(INT4, INT2)" + ) + .is_err()); + + // Non-integer support number + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree ADD FUNCTION 1.5 func(INT4, INT2)" + ) + .is_err()); + + // Missing closing parenthesis in function operator types + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree ADD FUNCTION 1 (INT4, INT2 func()" + ) + .is_err()); + + // Missing closing parenthesis in function arguments + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree ADD FUNCTION 1 func(INT4, INT2" + ) + .is_err()); + + // Invalid data type in function arguments + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree ADD FUNCTION 1 func(@#$%, INT2)" + ) + .is_err()); + + // DROP OPERATOR with FOR clause (not allowed) + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree DROP OPERATOR 1 (INT4, INT2) FOR SEARCH" + ) + .is_err()); + + // DROP FUNCTION with function arguments (not allowed) + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree DROP FUNCTION 1 (INT4, INT2) func(INT4)" + ) + .is_err()); + + // Multiple ADD items with error in middle + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree ADD OPERATOR 1 < (INT4, INT2), INVALID_ITEM" + ) + .is_err()); + + // Multiple DROP items with error in middle + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree DROP OPERATOR 1 (INT4, INT2), INVALID_ITEM" + ) + .is_err()); + + // RENAME TO with invalid new name + assert!(pg() + .parse_sql_statements("ALTER OPERATOR FAMILY my_ops USING btree RENAME TO 123invalid") + .is_err()); + + // OWNER TO with invalid owner + assert!(pg() + .parse_sql_statements("ALTER OPERATOR FAMILY my_ops USING btree OWNER TO 123invalid") + .is_err()); + + // SET SCHEMA with invalid schema name + assert!(pg() + .parse_sql_statements("ALTER OPERATOR FAMILY my_ops USING btree SET SCHEMA 123invalid") + .is_err()); + + // Schema-qualified operator family name with invalid schema + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY 123invalid.my_ops USING btree ADD OPERATOR 1 < (INT4, INT2)" + ) + .is_err()); + + // Missing operator family name + assert!(pg() + .parse_sql_statements("ALTER OPERATOR FAMILY USING btree ADD OPERATOR 1 < (INT4, INT2)") + .is_err()); + + // Extra tokens at end + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree ADD OPERATOR 1 < (INT4, INT2) EXTRA" + ) + .is_err()); + + // Incomplete statement + assert!(pg() + .parse_sql_statements("ALTER OPERATOR FAMILY my_ops USING btree ADD") + .is_err()); + + // Very long numbers + assert!(pg() + .parse_sql_statements("ALTER OPERATOR FAMILY my_ops USING btree ADD OPERATOR 999999999999999999999 < (INT4, INT2)") + .is_err()); + + // Multiple FOR clauses + assert!(pg() + .parse_sql_statements("ALTER OPERATOR FAMILY my_ops USING btree ADD OPERATOR 1 < (INT4, INT2) FOR SEARCH FOR ORDER BY sort_family") + .is_err()); + + // FOR SEARCH with extra tokens + assert!(pg() + .parse_sql_statements("ALTER OPERATOR FAMILY my_ops USING btree ADD OPERATOR 1 < (INT4, INT2) FOR SEARCH EXTRA") + .is_err()); + + // FOR ORDER BY with invalid sort family + assert!(pg() + .parse_sql_statements("ALTER OPERATOR FAMILY my_ops USING btree ADD OPERATOR 1 < (INT4, INT2) FOR ORDER BY 123invalid") + .is_err()); + + // Function with empty operator types but missing function args parens + assert!(pg() + .parse_sql_statements("ALTER OPERATOR FAMILY my_ops USING btree ADD FUNCTION 1 () func") + .is_err()); + + // Function with mismatched parentheses + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR FAMILY my_ops USING btree ADD FUNCTION 1 (INT4 func(INT2" + ) + .is_err()); + + // DROP with empty types + assert!(pg() + .parse_sql_statements("ALTER OPERATOR FAMILY my_ops USING btree DROP OPERATOR 1 ()") + .is_err()); + + // DROP FUNCTION with empty types + assert!(pg() + .parse_sql_statements("ALTER OPERATOR FAMILY my_ops USING btree DROP FUNCTION 1 ()") + .is_err()); +} + #[test] fn parse_drop_operator_family() { for if_exists in [true, false] { From 39418cfebbfe0c028d780614e8b60ef8f0c98ce7 Mon Sep 17 00:00:00 2001 From: jnlt3 Date: Thu, 18 Dec 2025 15:06:55 +0300 Subject: [PATCH 013/121] PostgreSQL Tokenization: Fix unexpected characters after question mark being silently ignored (#2129) --- src/tokenizer.rs | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 2ae17cf4a4..8666563ace 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -1717,7 +1717,7 @@ impl<'a> Tokenizer<'a> { } } Some('#') => self.consume_and_return(chars, Token::QuestionMarkSharp), - _ => self.consume_and_return(chars, Token::Question), + _ => Ok(Some(Token::Question)), } } '?' => { @@ -4147,4 +4147,23 @@ mod tests { panic!("Tokenizer should have failed on {sql}, but it succeeded with {tokens:?}"); } } + + #[test] + fn tokenize_question_mark() { + let dialect = PostgreSqlDialect {}; + let sql = "SELECT x ? y"; + let tokens = Tokenizer::new(&dialect, sql).tokenize().unwrap(); + compare( + tokens, + vec![ + Token::make_keyword("SELECT"), + Token::Whitespace(Whitespace::Space), + Token::make_word("x", None), + Token::Whitespace(Whitespace::Space), + Token::Question, + Token::Whitespace(Whitespace::Space), + Token::make_word("y", None), + ], + ) + } } From 355a3bfd90a43c9cda04f578827d353d8dac04c8 Mon Sep 17 00:00:00 2001 From: Andriy Romanov Date: Thu, 18 Dec 2025 20:09:33 -0800 Subject: [PATCH 014/121] Support parsing parenthesized wildcard `(*)` (#2123) --- src/parser/mod.rs | 9 +++++++++ tests/sqlparser_common.rs | 19 +++++++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 74b06ec8d0..d1c4fe05b4 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1268,6 +1268,15 @@ impl<'a> Parser<'a> { Token::Mul => { return Ok(Expr::Wildcard(AttachedToken(next_token))); } + // Handle parenthesized wildcard: (*) + Token::LParen => { + let [maybe_mul, maybe_rparen] = self.peek_tokens_ref(); + if maybe_mul.token == Token::Mul && maybe_rparen.token == Token::RParen { + let mul_token = self.next_token(); // consume Mul + self.next_token(); // consume RParen + return Ok(Expr::Wildcard(AttachedToken(mul_token))); + } + } _ => (), }; diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index da8e7b495a..9f549e4d0e 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -17953,3 +17953,22 @@ fn test_parse_set_session_authorization() { })) ); } + +#[test] +fn parse_select_parenthesized_wildcard() { + // Test SELECT DISTINCT(*) which uses a parenthesized wildcard + // The parentheses are syntactic sugar and get normalized to just * + let sql = "SELECT DISTINCT (*) FROM table1"; + let canonical = "SELECT DISTINCT * FROM table1"; + let select = all_dialects().verified_only_select_with_canonical(sql, canonical); + assert_eq!(select.distinct, Some(Distinct::Distinct)); + assert_eq!(select.projection.len(), 1); + assert!(matches!(select.projection[0], SelectItem::Wildcard(_))); + + // Also test without spaces: SELECT DISTINCT(*) + let sql_no_spaces = "SELECT DISTINCT(*) FROM table1"; + let select2 = all_dialects().verified_only_select_with_canonical(sql_no_spaces, canonical); + assert_eq!(select2.distinct, Some(Distinct::Distinct)); + assert_eq!(select2.projection.len(), 1); + assert!(matches!(select2.projection[0], SelectItem::Wildcard(_))); +} From 14703f022fd8c5b5a58d3e23d3839d956c1b11ee Mon Sep 17 00:00:00 2001 From: xitep Date: Wed, 24 Dec 2025 13:55:23 +0100 Subject: [PATCH 015/121] Make benchmark statement valid (#2139) --- sqlparser_bench/benches/sqlparser_bench.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sqlparser_bench/benches/sqlparser_bench.rs b/sqlparser_bench/benches/sqlparser_bench.rs index 6132ee4329..9637a98f7a 100644 --- a/sqlparser_bench/benches/sqlparser_bench.rs +++ b/sqlparser_bench/benches/sqlparser_bench.rs @@ -51,7 +51,7 @@ fn basic_queries(c: &mut Criterion) { let tables = (0..1000) .map(|n| format!("TABLE_{n}")) .collect::>() - .join(" JOIN "); + .join(" CROSS JOIN "); let where_condition = (0..1000) .map(|n| format!("COL_{n} = {n}")) .collect::>() From 0cf85d3b3d84596f4f773a2d23fbee2661fda23c Mon Sep 17 00:00:00 2001 From: jnlt3 Date: Tue, 30 Dec 2025 17:15:24 +0300 Subject: [PATCH 016/121] Fix parse_identifiers not taking semicolons into account (#2137) --- src/parser/mod.rs | 2 +- tests/sqlparser_postgres.rs | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index d1c4fe05b4..f07e8919a5 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -12024,7 +12024,7 @@ impl<'a> Parser<'a> { Token::Word(w) => { idents.push(w.clone().into_ident(self.peek_token_ref().span)); } - Token::EOF | Token::Eq => break, + Token::EOF | Token::Eq | Token::SemiColon => break, _ => {} } self.advance_token(); diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 9f4564ef2c..70f27a13da 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -7914,3 +7914,11 @@ fn parse_create_operator_class() { ) .is_err()); } + +#[test] +fn parse_identifiers_semicolon_handling() { + let statement = "SHOW search_path; SELECT 1"; + pg_and_generic().statements_parse_to(statement, statement); + let statement = "SHOW search_path; SHOW ALL; SHOW ALL"; + pg_and_generic().statements_parse_to(statement, statement); +} From 4de1ac95a4168b60c8487a888e93123d45d92bef Mon Sep 17 00:00:00 2001 From: Filipe Guerreiro Date: Tue, 6 Jan 2026 20:12:56 +0900 Subject: [PATCH 017/121] Add PostgreSQL PARTITION OF syntax support (#2127) --- src/ast/ddl.rs | 119 +++++++++- src/ast/helpers/stmt_create_table.rs | 24 +- src/ast/mod.rs | 74 +++++- src/ast/spans.rs | 62 +++-- src/keywords.rs | 2 + src/parser/mod.rs | 126 +++++++++++ tests/sqlparser_bigquery.rs | 2 + tests/sqlparser_duckdb.rs | 2 + tests/sqlparser_mssql.rs | 8 + tests/sqlparser_postgres.rs | 327 +++++++++++++++++++++++++++ 10 files changed, 712 insertions(+), 34 deletions(-) diff --git a/src/ast/ddl.rs b/src/ast/ddl.rs index 4e042a365e..2a24741f37 100644 --- a/src/ast/ddl.rs +++ b/src/ast/ddl.rs @@ -43,13 +43,14 @@ use crate::ast::{ }, ArgMode, AttachedToken, CommentDef, ConditionalStatements, CreateFunctionBody, CreateFunctionUsing, CreateTableLikeKind, CreateTableOptions, CreateViewParams, DataType, Expr, - FileFormat, FunctionBehavior, FunctionCalledOnNull, FunctionDesc, FunctionDeterminismSpecifier, - FunctionParallel, HiveDistributionStyle, HiveFormat, HiveIOFormat, HiveRowFormat, - HiveSetLocation, Ident, InitializeKind, MySQLColumnPosition, ObjectName, OnCommit, - OneOrManyWithParens, OperateFunctionArg, OrderByExpr, ProjectionSelect, Query, RefreshModeKind, - RowAccessPolicy, SequenceOptions, Spanned, SqlOption, StorageSerializationPolicy, TableVersion, - Tag, TriggerEvent, TriggerExecBody, TriggerObject, TriggerPeriod, TriggerReferencing, Value, - ValueWithSpan, WrappedCollection, + FileFormat, FunctionBehavior, FunctionCalledOnNull, FunctionDefinitionSetParam, FunctionDesc, + FunctionDeterminismSpecifier, FunctionParallel, FunctionSecurity, HiveDistributionStyle, + HiveFormat, HiveIOFormat, HiveRowFormat, HiveSetLocation, Ident, InitializeKind, + MySQLColumnPosition, ObjectName, OnCommit, OneOrManyWithParens, OperateFunctionArg, + OrderByExpr, ProjectionSelect, Query, RefreshModeKind, RowAccessPolicy, SequenceOptions, + Spanned, SqlOption, StorageSerializationPolicy, TableVersion, Tag, TriggerEvent, + TriggerExecBody, TriggerObject, TriggerPeriod, TriggerReferencing, Value, ValueWithSpan, + WrappedCollection, }; use crate::display_utils::{DisplayCommaSeparated, Indent, NewLine, SpaceOrNewline}; use crate::keywords::Keyword; @@ -2697,6 +2698,14 @@ pub struct CreateTable { /// /// pub inherits: Option>, + /// PostgreSQL `PARTITION OF` clause to create a partition of a parent table. + /// Contains the parent table name. + /// + #[cfg_attr(feature = "visitor", visit(with = "visit_relation"))] + pub partition_of: Option, + /// PostgreSQL partition bound specification for PARTITION OF. + /// + pub for_values: Option, /// SQLite "STRICT" clause. /// if the "STRICT" table-option keyword is added to the end, after the closing ")", /// then strict typing rules apply to that table. @@ -2792,6 +2801,9 @@ impl fmt::Display for CreateTable { dynamic = if self.dynamic { "DYNAMIC " } else { "" }, name = self.name, )?; + if let Some(partition_of) = &self.partition_of { + write!(f, " PARTITION OF {partition_of}")?; + } if let Some(on_cluster) = &self.on_cluster { write!(f, " ON CLUSTER {on_cluster}")?; } @@ -2806,12 +2818,19 @@ impl fmt::Display for CreateTable { Indent(DisplayCommaSeparated(&self.constraints)).fmt(f)?; NewLine.fmt(f)?; f.write_str(")")?; - } else if self.query.is_none() && self.like.is_none() && self.clone.is_none() { + } else if self.query.is_none() + && self.like.is_none() + && self.clone.is_none() + && self.partition_of.is_none() + { // PostgreSQL allows `CREATE TABLE t ();`, but requires empty parens f.write_str(" ()")?; } else if let Some(CreateTableLikeKind::Parenthesized(like_in_columns_list)) = &self.like { write!(f, " ({like_in_columns_list})")?; } + if let Some(for_values) = &self.for_values { + write!(f, " {for_values}")?; + } // Hive table comment should be after column definitions, please refer to: // [Hive](https://cwiki.apache.org/confluence/display/Hive/LanguageManual+DDL#LanguageManualDDL-CreateTable) @@ -3053,6 +3072,76 @@ impl fmt::Display for CreateTable { } } +/// PostgreSQL partition bound specification for `PARTITION OF`. +/// +/// Specifies partition bounds for a child partition table. +/// +/// See [PostgreSQL](https://www.postgresql.org/docs/current/sql-createtable.html) +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum ForValues { + /// `FOR VALUES IN (expr, ...)` + In(Vec), + /// `FOR VALUES FROM (expr|MINVALUE|MAXVALUE, ...) TO (expr|MINVALUE|MAXVALUE, ...)` + From { + from: Vec, + to: Vec, + }, + /// `FOR VALUES WITH (MODULUS n, REMAINDER r)` + With { modulus: u64, remainder: u64 }, + /// `DEFAULT` + Default, +} + +impl fmt::Display for ForValues { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + ForValues::In(values) => { + write!(f, "FOR VALUES IN ({})", display_comma_separated(values)) + } + ForValues::From { from, to } => { + write!( + f, + "FOR VALUES FROM ({}) TO ({})", + display_comma_separated(from), + display_comma_separated(to) + ) + } + ForValues::With { modulus, remainder } => { + write!( + f, + "FOR VALUES WITH (MODULUS {modulus}, REMAINDER {remainder})" + ) + } + ForValues::Default => write!(f, "DEFAULT"), + } + } +} + +/// A value in a partition bound specification. +/// +/// Used in RANGE partition bounds where values can be expressions, +/// MINVALUE (negative infinity), or MAXVALUE (positive infinity). +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum PartitionBoundValue { + Expr(Expr), + MinValue, + MaxValue, +} + +impl fmt::Display for PartitionBoundValue { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + PartitionBoundValue::Expr(expr) => write!(f, "{expr}"), + PartitionBoundValue::MinValue => write!(f, "MINVALUE"), + PartitionBoundValue::MaxValue => write!(f, "MAXVALUE"), + } + } +} + #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] @@ -3138,6 +3227,14 @@ pub struct CreateFunction { /// /// [PostgreSQL](https://www.postgresql.org/docs/current/sql-createfunction.html) pub parallel: Option, + /// SECURITY { DEFINER | INVOKER } + /// + /// [PostgreSQL](https://www.postgresql.org/docs/current/sql-createfunction.html) + pub security: Option, + /// SET configuration_parameter clauses + /// + /// [PostgreSQL](https://www.postgresql.org/docs/current/sql-createfunction.html) + pub set_params: Vec, /// USING ... (Hive only) pub using: Option, /// Language used in a UDF definition. @@ -3204,6 +3301,12 @@ impl fmt::Display for CreateFunction { if let Some(parallel) = &self.parallel { write!(f, " {parallel}")?; } + if let Some(security) = &self.security { + write!(f, " {security}")?; + } + for set_param in &self.set_params { + write!(f, " {set_param}")?; + } if let Some(remote_connection) = &self.remote_connection { write!(f, " REMOTE WITH CONNECTION {remote_connection}")?; } diff --git a/src/ast/helpers/stmt_create_table.rs b/src/ast/helpers/stmt_create_table.rs index fe950c909c..62dbbbcba0 100644 --- a/src/ast/helpers/stmt_create_table.rs +++ b/src/ast/helpers/stmt_create_table.rs @@ -26,8 +26,8 @@ use sqlparser_derive::{Visit, VisitMut}; use crate::ast::{ ClusteredBy, ColumnDef, CommentDef, CreateTable, CreateTableLikeKind, CreateTableOptions, Expr, - FileFormat, HiveDistributionStyle, HiveFormat, Ident, InitializeKind, ObjectName, OnCommit, - OneOrManyWithParens, Query, RefreshModeKind, RowAccessPolicy, Statement, + FileFormat, ForValues, HiveDistributionStyle, HiveFormat, Ident, InitializeKind, ObjectName, + OnCommit, OneOrManyWithParens, Query, RefreshModeKind, RowAccessPolicy, Statement, StorageSerializationPolicy, TableConstraint, TableVersion, Tag, WrappedCollection, }; @@ -94,6 +94,8 @@ pub struct CreateTableBuilder { pub cluster_by: Option>>, pub clustered_by: Option, pub inherits: Option>, + pub partition_of: Option, + pub for_values: Option, pub strict: bool, pub copy_grants: bool, pub enable_schema_evolution: Option, @@ -150,6 +152,8 @@ impl CreateTableBuilder { cluster_by: None, clustered_by: None, inherits: None, + partition_of: None, + for_values: None, strict: false, copy_grants: false, enable_schema_evolution: None, @@ -317,6 +321,16 @@ impl CreateTableBuilder { self } + pub fn partition_of(mut self, partition_of: Option) -> Self { + self.partition_of = partition_of; + self + } + + pub fn for_values(mut self, for_values: Option) -> Self { + self.for_values = for_values; + self + } + pub fn strict(mut self, strict: bool) -> Self { self.strict = strict; self @@ -463,6 +477,8 @@ impl CreateTableBuilder { cluster_by: self.cluster_by, clustered_by: self.clustered_by, inherits: self.inherits, + partition_of: self.partition_of, + for_values: self.for_values, strict: self.strict, copy_grants: self.copy_grants, enable_schema_evolution: self.enable_schema_evolution, @@ -527,6 +543,8 @@ impl TryFrom for CreateTableBuilder { cluster_by, clustered_by, inherits, + partition_of, + for_values, strict, copy_grants, enable_schema_evolution, @@ -577,6 +595,8 @@ impl TryFrom for CreateTableBuilder { cluster_by, clustered_by, inherits, + partition_of, + for_values, strict, iceberg, copy_grants, diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 4676786027..c8d9c6be38 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -69,15 +69,15 @@ pub use self::ddl::{ CreateExtension, CreateFunction, CreateIndex, CreateOperator, CreateOperatorClass, CreateOperatorFamily, CreateTable, CreateTrigger, CreateView, Deduplicate, DeferrableInitial, DropBehavior, DropExtension, DropFunction, DropOperator, DropOperatorClass, DropOperatorFamily, - DropOperatorSignature, DropTrigger, GeneratedAs, GeneratedExpressionMode, IdentityParameters, - IdentityProperty, IdentityPropertyFormatKind, IdentityPropertyKind, IdentityPropertyOrder, - IndexColumn, IndexOption, IndexType, KeyOrIndexDisplay, Msck, NullsDistinctOption, - OperatorArgTypes, OperatorClassItem, OperatorFamilyDropItem, OperatorFamilyItem, - OperatorOption, OperatorPurpose, Owner, Partition, ProcedureParam, ReferentialAction, - RenameTableNameKind, ReplicaIdentity, TagsColumnOption, TriggerObjectKind, Truncate, - UserDefinedTypeCompositeAttributeDef, UserDefinedTypeInternalLength, - UserDefinedTypeRangeOption, UserDefinedTypeRepresentation, UserDefinedTypeSqlDefinitionOption, - UserDefinedTypeStorage, ViewColumnDef, + DropOperatorSignature, DropTrigger, ForValues, GeneratedAs, GeneratedExpressionMode, + IdentityParameters, IdentityProperty, IdentityPropertyFormatKind, IdentityPropertyKind, + IdentityPropertyOrder, IndexColumn, IndexOption, IndexType, KeyOrIndexDisplay, Msck, + NullsDistinctOption, OperatorArgTypes, OperatorClassItem, OperatorFamilyDropItem, + OperatorFamilyItem, OperatorOption, OperatorPurpose, Owner, Partition, PartitionBoundValue, + ProcedureParam, ReferentialAction, RenameTableNameKind, ReplicaIdentity, TagsColumnOption, + TriggerObjectKind, Truncate, UserDefinedTypeCompositeAttributeDef, + UserDefinedTypeInternalLength, UserDefinedTypeRangeOption, UserDefinedTypeRepresentation, + UserDefinedTypeSqlDefinitionOption, UserDefinedTypeStorage, ViewColumnDef, }; pub use self::dml::{ Delete, Insert, Merge, MergeAction, MergeClause, MergeClauseKind, MergeInsertExpr, @@ -8781,6 +8781,62 @@ impl fmt::Display for FunctionBehavior { } } +/// Security attribute for functions: SECURITY DEFINER or SECURITY INVOKER. +/// +/// [PostgreSQL](https://www.postgresql.org/docs/current/sql-createfunction.html) +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum FunctionSecurity { + Definer, + Invoker, +} + +impl fmt::Display for FunctionSecurity { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + FunctionSecurity::Definer => write!(f, "SECURITY DEFINER"), + FunctionSecurity::Invoker => write!(f, "SECURITY INVOKER"), + } + } +} + +/// Value for a SET configuration parameter in a CREATE FUNCTION statement. +/// +/// [PostgreSQL](https://www.postgresql.org/docs/current/sql-createfunction.html) +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum FunctionSetValue { + /// SET param = value1, value2, ... + Values(Vec), + /// SET param FROM CURRENT + FromCurrent, +} + +/// A SET configuration_parameter clause in a CREATE FUNCTION statement. +/// +/// [PostgreSQL](https://www.postgresql.org/docs/current/sql-createfunction.html) +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct FunctionDefinitionSetParam { + pub name: Ident, + pub value: FunctionSetValue, +} + +impl fmt::Display for FunctionDefinitionSetParam { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "SET {} ", self.name)?; + match &self.value { + FunctionSetValue::Values(values) => { + write!(f, "= {}", display_comma_separated(values)) + } + FunctionSetValue::FromCurrent => write!(f, "FROM CURRENT"), + } + } +} + /// These attributes describe the behavior of the function when called with a null argument. #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] diff --git a/src/ast/spans.rs b/src/ast/spans.rs index d4e843157a..f88b302965 100644 --- a/src/ast/spans.rs +++ b/src/ast/spans.rs @@ -34,19 +34,20 @@ use super::{ ColumnOption, ColumnOptionDef, ConditionalStatementBlock, ConditionalStatements, ConflictTarget, ConnectBy, ConstraintCharacteristics, CopySource, CreateIndex, CreateTable, CreateTableOptions, Cte, Delete, DoUpdate, ExceptSelectItem, ExcludeSelectItem, Expr, - ExprWithAlias, Fetch, FromTable, Function, FunctionArg, FunctionArgExpr, + ExprWithAlias, Fetch, ForValues, FromTable, Function, FunctionArg, FunctionArgExpr, FunctionArgumentClause, FunctionArgumentList, FunctionArguments, GroupByExpr, HavingBound, IfStatement, IlikeSelectItem, IndexColumn, Insert, Interpolate, InterpolateExpr, Join, JoinConstraint, JoinOperator, JsonPath, JsonPathElem, LateralView, LimitClause, MatchRecognizePattern, Measure, Merge, MergeAction, MergeClause, MergeInsertExpr, MergeInsertKind, MergeUpdateExpr, NamedParenthesizedList, NamedWindowDefinition, ObjectName, ObjectNamePart, Offset, OnConflict, OnConflictAction, OnInsert, OpenStatement, OrderBy, - OrderByExpr, OrderByKind, OutputClause, Partition, PivotValueSource, ProjectionSelect, Query, - RaiseStatement, RaiseStatementValue, ReferentialAction, RenameSelectItem, ReplaceSelectElement, - ReplaceSelectItem, Select, SelectInto, SelectItem, SetExpr, SqlOption, Statement, Subscript, - SymbolDefinition, TableAlias, TableAliasColumnDef, TableConstraint, TableFactor, TableObject, - TableOptionsClustered, TableWithJoins, Update, UpdateTableFromKind, Use, Value, Values, - ViewColumnDef, WhileStatement, WildcardAdditionalOptions, With, WithFill, + OrderByExpr, OrderByKind, OutputClause, Partition, PartitionBoundValue, PivotValueSource, + ProjectionSelect, Query, RaiseStatement, RaiseStatementValue, ReferentialAction, + RenameSelectItem, ReplaceSelectElement, ReplaceSelectItem, Select, SelectInto, SelectItem, + SetExpr, SqlOption, Statement, Subscript, SymbolDefinition, TableAlias, TableAliasColumnDef, + TableConstraint, TableFactor, TableObject, TableOptionsClustered, TableWithJoins, Update, + UpdateTableFromKind, Use, Value, Values, ViewColumnDef, WhileStatement, + WildcardAdditionalOptions, With, WithFill, }; /// Given an iterator of spans, return the [Span::union] of all spans. @@ -547,13 +548,15 @@ impl Spanned for CreateTable { clone, comment: _, // todo, no span on_commit: _, - on_cluster: _, // todo, clickhouse specific - primary_key: _, // todo, clickhouse specific - order_by: _, // todo, clickhouse specific - partition_by: _, // todo, BigQuery specific - cluster_by: _, // todo, BigQuery specific - clustered_by: _, // todo, Hive specific - inherits: _, // todo, PostgreSQL specific + on_cluster: _, // todo, clickhouse specific + primary_key: _, // todo, clickhouse specific + order_by: _, // todo, clickhouse specific + partition_by: _, // todo, BigQuery specific + cluster_by: _, // todo, BigQuery specific + clustered_by: _, // todo, Hive specific + inherits: _, // todo, PostgreSQL specific + partition_of, + for_values, strict: _, // bool copy_grants: _, // bool enable_schema_evolution: _, // bool @@ -584,7 +587,9 @@ impl Spanned for CreateTable { .chain(columns.iter().map(|i| i.span())) .chain(constraints.iter().map(|i| i.span())) .chain(query.iter().map(|i| i.span())) - .chain(clone.iter().map(|i| i.span())), + .chain(clone.iter().map(|i| i.span())) + .chain(partition_of.iter().map(|i| i.span())) + .chain(for_values.iter().map(|i| i.span())), ) } } @@ -622,6 +627,33 @@ impl Spanned for TableConstraint { } } +impl Spanned for PartitionBoundValue { + fn span(&self) -> Span { + match self { + PartitionBoundValue::Expr(expr) => expr.span(), + // MINVALUE and MAXVALUE are keywords without tracked spans + PartitionBoundValue::MinValue => Span::empty(), + PartitionBoundValue::MaxValue => Span::empty(), + } + } +} + +impl Spanned for ForValues { + fn span(&self) -> Span { + match self { + ForValues::In(exprs) => union_spans(exprs.iter().map(|e| e.span())), + ForValues::From { from, to } => union_spans( + from.iter() + .map(|v| v.span()) + .chain(to.iter().map(|v| v.span())), + ), + // WITH (MODULUS n, REMAINDER r) - u64 values have no spans + ForValues::With { .. } => Span::empty(), + ForValues::Default => Span::empty(), + } + } +} + impl Spanned for CreateIndex { fn span(&self) -> Span { let CreateIndex { diff --git a/src/keywords.rs b/src/keywords.rs index f06842ec6a..87c77379c2 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -637,6 +637,7 @@ define_keywords!( MODIFIES, MODIFY, MODULE, + MODULUS, MONITOR, MONTH, MONTHS, @@ -837,6 +838,7 @@ define_keywords!( RELAY, RELEASE, RELEASES, + REMAINDER, REMOTE, REMOVE, REMOVEQUOTES, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index f07e8919a5..373076f120 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -5260,8 +5260,10 @@ impl<'a> Parser<'a> { function_body: Option, called_on_null: Option, parallel: Option, + security: Option, } let mut body = Body::default(); + let mut set_params: Vec = Vec::new(); loop { fn ensure_not_set(field: &Option, name: &str) -> Result<(), ParserError> { if field.is_some() { @@ -5326,6 +5328,27 @@ impl<'a> Parser<'a> { } else { return self.expected("one of UNSAFE | RESTRICTED | SAFE", self.peek_token()); } + } else if self.parse_keyword(Keyword::SECURITY) { + ensure_not_set(&body.security, "SECURITY { DEFINER | INVOKER }")?; + if self.parse_keyword(Keyword::DEFINER) { + body.security = Some(FunctionSecurity::Definer); + } else if self.parse_keyword(Keyword::INVOKER) { + body.security = Some(FunctionSecurity::Invoker); + } else { + return self.expected("DEFINER or INVOKER", self.peek_token()); + } + } else if self.parse_keyword(Keyword::SET) { + let name = self.parse_identifier()?; + let value = if self.parse_keywords(&[Keyword::FROM, Keyword::CURRENT]) { + FunctionSetValue::FromCurrent + } else { + if !self.consume_token(&Token::Eq) && !self.parse_keyword(Keyword::TO) { + return self.expected("= or TO", self.peek_token()); + } + let values = self.parse_comma_separated(Parser::parse_expr)?; + FunctionSetValue::Values(values) + }; + set_params.push(FunctionDefinitionSetParam { name, value }); } else if self.parse_keyword(Keyword::RETURN) { ensure_not_set(&body.function_body, "RETURN")?; body.function_body = Some(CreateFunctionBody::Return(self.parse_expr()?)); @@ -5344,6 +5367,8 @@ impl<'a> Parser<'a> { behavior: body.behavior, called_on_null: body.called_on_null, parallel: body.parallel, + security: body.security, + set_params, language: body.language, function_body: body.function_body, if_not_exists: false, @@ -5381,6 +5406,8 @@ impl<'a> Parser<'a> { behavior: None, called_on_null: None, parallel: None, + security: None, + set_params: vec![], language: None, determinism_specifier: None, options: None, @@ -5463,6 +5490,8 @@ impl<'a> Parser<'a> { behavior: None, called_on_null: None, parallel: None, + security: None, + set_params: vec![], })) } @@ -5552,6 +5581,8 @@ impl<'a> Parser<'a> { behavior: None, called_on_null: None, parallel: None, + security: None, + set_params: vec![], })) } @@ -7887,6 +7918,22 @@ impl<'a> Parser<'a> { let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); let table_name = self.parse_object_name(allow_unquoted_hyphen)?; + // PostgreSQL PARTITION OF for child partition tables + // Note: This is a PostgreSQL-specific feature, but the dialect check was intentionally + // removed to allow GenericDialect and other dialects to parse this syntax. This enables + // multi-dialect SQL tools to work with PostgreSQL-specific DDL statements. + // + // PARTITION OF can be combined with other table definition clauses in the AST, + // though PostgreSQL itself prohibits PARTITION OF with AS SELECT or LIKE clauses. + // The parser accepts these combinations for flexibility; semantic validation + // is left to downstream tools. + // Child partitions can have their own constraints and indexes. + let partition_of = if self.parse_keywords(&[Keyword::PARTITION, Keyword::OF]) { + Some(self.parse_object_name(allow_unquoted_hyphen)?) + } else { + None + }; + // Clickhouse has `ON CLUSTER 'cluster'` syntax for DDLs let on_cluster = self.parse_optional_on_cluster()?; @@ -7911,6 +7958,20 @@ impl<'a> Parser<'a> { None }; + // PostgreSQL PARTITION OF: partition bound specification + let for_values = if partition_of.is_some() { + if self.peek_keyword(Keyword::FOR) || self.peek_keyword(Keyword::DEFAULT) { + Some(self.parse_partition_for_values()?) + } else { + return self.expected( + "FOR VALUES or DEFAULT after PARTITION OF", + self.peek_token(), + ); + } + } else { + None + }; + // SQLite supports `WITHOUT ROWID` at the end of `CREATE TABLE` let without_rowid = self.parse_keywords(&[Keyword::WITHOUT, Keyword::ROWID]); @@ -7988,6 +8049,8 @@ impl<'a> Parser<'a> { .partition_by(create_table_config.partition_by) .cluster_by(create_table_config.cluster_by) .inherits(create_table_config.inherits) + .partition_of(partition_of) + .for_values(for_values) .table_options(create_table_config.table_options) .primary_key(primary_key) .strict(strict) @@ -8047,6 +8110,69 @@ impl<'a> Parser<'a> { } } + /// Parse [ForValues] of a `PARTITION OF` clause. + /// + /// Parses: `FOR VALUES partition_bound_spec | DEFAULT` + /// + /// [PostgreSQL](https://www.postgresql.org/docs/current/sql-createtable.html) + fn parse_partition_for_values(&mut self) -> Result { + if self.parse_keyword(Keyword::DEFAULT) { + return Ok(ForValues::Default); + } + + self.expect_keywords(&[Keyword::FOR, Keyword::VALUES])?; + + if self.parse_keyword(Keyword::IN) { + // FOR VALUES IN (expr, ...) + self.expect_token(&Token::LParen)?; + if self.peek_token() == Token::RParen { + return self.expected("at least one value", self.peek_token()); + } + let values = self.parse_comma_separated(Parser::parse_expr)?; + self.expect_token(&Token::RParen)?; + Ok(ForValues::In(values)) + } else if self.parse_keyword(Keyword::FROM) { + // FOR VALUES FROM (...) TO (...) + self.expect_token(&Token::LParen)?; + if self.peek_token() == Token::RParen { + return self.expected("at least one value", self.peek_token()); + } + let from = self.parse_comma_separated(Parser::parse_partition_bound_value)?; + self.expect_token(&Token::RParen)?; + self.expect_keyword(Keyword::TO)?; + self.expect_token(&Token::LParen)?; + if self.peek_token() == Token::RParen { + return self.expected("at least one value", self.peek_token()); + } + let to = self.parse_comma_separated(Parser::parse_partition_bound_value)?; + self.expect_token(&Token::RParen)?; + Ok(ForValues::From { from, to }) + } else if self.parse_keyword(Keyword::WITH) { + // FOR VALUES WITH (MODULUS n, REMAINDER r) + self.expect_token(&Token::LParen)?; + self.expect_keyword(Keyword::MODULUS)?; + let modulus = self.parse_literal_uint()?; + self.expect_token(&Token::Comma)?; + self.expect_keyword(Keyword::REMAINDER)?; + let remainder = self.parse_literal_uint()?; + self.expect_token(&Token::RParen)?; + Ok(ForValues::With { modulus, remainder }) + } else { + self.expected("IN, FROM, or WITH after FOR VALUES", self.peek_token()) + } + } + + /// Parse a single [PartitionBoundValue]. + fn parse_partition_bound_value(&mut self) -> Result { + if self.parse_keyword(Keyword::MINVALUE) { + Ok(PartitionBoundValue::MinValue) + } else if self.parse_keyword(Keyword::MAXVALUE) { + Ok(PartitionBoundValue::MaxValue) + } else { + Ok(PartitionBoundValue::Expr(self.parse_expr()?)) + } + } + /// Parse configuration like inheritance, partitioning, clustering information during the table creation. /// /// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#syntax_2) diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index 24b9efcaa6..2bdeba912f 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -2294,6 +2294,8 @@ fn test_bigquery_create_function() { remote_connection: None, called_on_null: None, parallel: None, + security: None, + set_params: vec![], }) ); diff --git a/tests/sqlparser_duckdb.rs b/tests/sqlparser_duckdb.rs index 73a1afe260..4a2f29e151 100644 --- a/tests/sqlparser_duckdb.rs +++ b/tests/sqlparser_duckdb.rs @@ -755,6 +755,8 @@ fn test_duckdb_union_datatype() { cluster_by: Default::default(), clustered_by: Default::default(), inherits: Default::default(), + partition_of: Default::default(), + for_values: Default::default(), strict: Default::default(), copy_grants: Default::default(), enable_schema_evolution: Default::default(), diff --git a/tests/sqlparser_mssql.rs b/tests/sqlparser_mssql.rs index 70e0aab491..1927b864e0 100644 --- a/tests/sqlparser_mssql.rs +++ b/tests/sqlparser_mssql.rs @@ -266,6 +266,8 @@ fn parse_create_function() { behavior: None, called_on_null: None, parallel: None, + security: None, + set_params: vec![], using: None, language: None, determinism_specifier: None, @@ -439,6 +441,8 @@ fn parse_create_function_parameter_default_values() { behavior: None, called_on_null: None, parallel: None, + security: None, + set_params: vec![], using: None, language: None, determinism_specifier: None, @@ -1897,6 +1901,8 @@ fn parse_create_table_with_valid_options() { cluster_by: None, clustered_by: None, inherits: None, + partition_of: None, + for_values: None, strict: false, iceberg: false, copy_grants: false, @@ -2064,6 +2070,8 @@ fn parse_create_table_with_identity_column() { cluster_by: None, clustered_by: None, inherits: None, + partition_of: None, + for_values: None, strict: false, copy_grants: false, enable_schema_evolution: None, diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 70f27a13da..24707604ad 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -4294,6 +4294,8 @@ $$"#; behavior: None, called_on_null: None, parallel: None, + security: None, + set_params: vec![], function_body: Some(CreateFunctionBody::AsBeforeOptions { body: Expr::Value( (Value::DollarQuotedString(DollarQuotedString {value: "\nBEGIN\n IF str1 <> str2 THEN\n RETURN TRUE;\n ELSE\n RETURN FALSE;\n END IF;\nEND;\n".to_owned(), tag: None})).with_empty_span() @@ -4335,6 +4337,8 @@ $$"#; behavior: None, called_on_null: None, parallel: None, + security: None, + set_params: vec![], function_body: Some(CreateFunctionBody::AsBeforeOptions { body: Expr::Value( (Value::DollarQuotedString(DollarQuotedString {value: "\nBEGIN\n IF int1 <> 0 THEN\n RETURN TRUE;\n ELSE\n RETURN FALSE;\n END IF;\nEND;\n".to_owned(), tag: None})).with_empty_span() @@ -4380,6 +4384,8 @@ $$"#; behavior: None, called_on_null: None, parallel: None, + security: None, + set_params: vec![], function_body: Some(CreateFunctionBody::AsBeforeOptions { body: Expr::Value( (Value::DollarQuotedString(DollarQuotedString {value: "\nBEGIN\n IF a <> b THEN\n RETURN TRUE;\n ELSE\n RETURN FALSE;\n END IF;\nEND;\n".to_owned(), tag: None})).with_empty_span() @@ -4425,6 +4431,8 @@ $$"#; behavior: None, called_on_null: None, parallel: None, + security: None, + set_params: vec![], function_body: Some(CreateFunctionBody::AsBeforeOptions { body: Expr::Value( (Value::DollarQuotedString(DollarQuotedString {value: "\nBEGIN\n IF int1 <> int2 THEN\n RETURN TRUE;\n ELSE\n RETURN FALSE;\n END IF;\nEND;\n".to_owned(), tag: None})).with_empty_span() @@ -4463,6 +4471,8 @@ $$"#; behavior: None, called_on_null: None, parallel: None, + security: None, + set_params: vec![], function_body: Some(CreateFunctionBody::AsBeforeOptions { body: Expr::Value( (Value::DollarQuotedString(DollarQuotedString { @@ -4504,6 +4514,8 @@ fn parse_create_function() { behavior: Some(FunctionBehavior::Immutable), called_on_null: Some(FunctionCalledOnNull::Strict), parallel: Some(FunctionParallel::Safe), + security: None, + set_params: vec![], function_body: Some(CreateFunctionBody::AsBeforeOptions { body: Expr::Value( (Value::SingleQuotedString("select $1 + $2;".into())).with_empty_span() @@ -4534,6 +4546,61 @@ fn parse_create_function_detailed() { ); } +#[test] +fn parse_create_function_with_security() { + let sql = + "CREATE FUNCTION test_fn() RETURNS void LANGUAGE sql SECURITY DEFINER AS $$ SELECT 1 $$"; + match pg_and_generic().verified_stmt(sql) { + Statement::CreateFunction(CreateFunction { security, .. }) => { + assert_eq!(security, Some(FunctionSecurity::Definer)); + } + _ => panic!("Expected CreateFunction"), + } + + let sql2 = + "CREATE FUNCTION test_fn() RETURNS void LANGUAGE sql SECURITY INVOKER AS $$ SELECT 1 $$"; + match pg_and_generic().verified_stmt(sql2) { + Statement::CreateFunction(CreateFunction { security, .. }) => { + assert_eq!(security, Some(FunctionSecurity::Invoker)); + } + _ => panic!("Expected CreateFunction"), + } +} + +#[test] +fn parse_create_function_with_set_params() { + let sql = + "CREATE FUNCTION test_fn() RETURNS void LANGUAGE sql SET search_path = auth, pg_temp, public AS $$ SELECT 1 $$"; + match pg_and_generic().verified_stmt(sql) { + Statement::CreateFunction(CreateFunction { set_params, .. }) => { + assert_eq!(set_params.len(), 1); + assert_eq!(set_params[0].name.to_string(), "search_path"); + } + _ => panic!("Expected CreateFunction"), + } + + // Test multiple SET params + let sql2 = + "CREATE FUNCTION test_fn() RETURNS void LANGUAGE sql SET search_path = public SET statement_timeout = '5s' AS $$ SELECT 1 $$"; + match pg_and_generic().verified_stmt(sql2) { + Statement::CreateFunction(CreateFunction { set_params, .. }) => { + assert_eq!(set_params.len(), 2); + } + _ => panic!("Expected CreateFunction"), + } + + // Test FROM CURRENT + let sql3 = + "CREATE FUNCTION test_fn() RETURNS void LANGUAGE sql SET search_path FROM CURRENT AS $$ SELECT 1 $$"; + match pg_and_generic().verified_stmt(sql3) { + Statement::CreateFunction(CreateFunction { set_params, .. }) => { + assert_eq!(set_params.len(), 1); + assert!(matches!(set_params[0].value, FunctionSetValue::FromCurrent)); + } + _ => panic!("Expected CreateFunction"), + } +} + #[test] fn parse_incorrect_create_function_parallel() { let sql = "CREATE FUNCTION add(INTEGER, INTEGER) RETURNS INTEGER LANGUAGE SQL PARALLEL BLAH AS 'select $1 + $2;'"; @@ -4562,6 +4629,8 @@ fn parse_create_function_c_with_module_pathname() { behavior: Some(FunctionBehavior::Immutable), called_on_null: None, parallel: Some(FunctionParallel::Safe), + security: None, + set_params: vec![], function_body: Some(CreateFunctionBody::AsBeforeOptions { body: Expr::Value( (Value::SingleQuotedString("MODULE_PATHNAME".into())).with_empty_span() @@ -6130,6 +6199,8 @@ fn parse_trigger_related_functions() { cluster_by: None, clustered_by: None, inherits: None, + partition_of: None, + for_values: None, strict: false, copy_grants: false, enable_schema_evolution: None, @@ -6185,6 +6256,8 @@ fn parse_trigger_related_functions() { behavior: None, called_on_null: None, parallel: None, + security: None, + set_params: vec![], using: None, language: Some(Ident::new("plpgsql")), determinism_specifier: None, @@ -7922,3 +7995,257 @@ fn parse_identifiers_semicolon_handling() { let statement = "SHOW search_path; SHOW ALL; SHOW ALL"; pg_and_generic().statements_parse_to(statement, statement); } + +#[test] +fn parse_create_table_partition_of_range() { + // RANGE partition with FROM ... TO + let sql = "CREATE TABLE measurement_y2006m02 PARTITION OF measurement FOR VALUES FROM ('2006-02-01') TO ('2006-03-01')"; + match pg_and_generic().verified_stmt(sql) { + Statement::CreateTable(create_table) => { + assert_eq!("measurement_y2006m02", create_table.name.to_string()); + assert_eq!( + Some(ObjectName::from(vec![Ident::new("measurement")])), + create_table.partition_of + ); + match create_table.for_values { + Some(ForValues::From { from, to }) => { + assert_eq!(1, from.len()); + assert_eq!(1, to.len()); + match &from[0] { + PartitionBoundValue::Expr(Expr::Value(v)) => { + assert_eq!("'2006-02-01'", v.to_string()); + } + _ => panic!("Expected Expr value in from"), + } + match &to[0] { + PartitionBoundValue::Expr(Expr::Value(v)) => { + assert_eq!("'2006-03-01'", v.to_string()); + } + _ => panic!("Expected Expr value in to"), + } + } + _ => panic!("Expected ForValues::From"), + } + } + _ => panic!("Expected CreateTable"), + } +} + +#[test] +fn parse_create_table_partition_of_range_with_minvalue_maxvalue() { + // RANGE partition with MINVALUE/MAXVALUE + let sql = + "CREATE TABLE orders_old PARTITION OF orders FOR VALUES FROM (MINVALUE) TO ('2020-01-01')"; + match pg_and_generic().verified_stmt(sql) { + Statement::CreateTable(create_table) => { + assert_eq!("orders_old", create_table.name.to_string()); + assert_eq!( + Some(ObjectName::from(vec![Ident::new("orders")])), + create_table.partition_of + ); + match create_table.for_values { + Some(ForValues::From { from, to }) => { + assert_eq!(PartitionBoundValue::MinValue, from[0]); + match &to[0] { + PartitionBoundValue::Expr(Expr::Value(v)) => { + assert_eq!("'2020-01-01'", v.to_string()); + } + _ => panic!("Expected Expr value in to"), + } + } + _ => panic!("Expected ForValues::From"), + } + } + _ => panic!("Expected CreateTable"), + } + + // With MAXVALUE + let sql = + "CREATE TABLE orders_new PARTITION OF orders FOR VALUES FROM ('2024-01-01') TO (MAXVALUE)"; + match pg_and_generic().verified_stmt(sql) { + Statement::CreateTable(create_table) => match create_table.for_values { + Some(ForValues::From { from, to }) => { + match &from[0] { + PartitionBoundValue::Expr(Expr::Value(v)) => { + assert_eq!("'2024-01-01'", v.to_string()); + } + _ => panic!("Expected Expr value in from"), + } + assert_eq!(PartitionBoundValue::MaxValue, to[0]); + } + _ => panic!("Expected ForValues::From"), + }, + _ => panic!("Expected CreateTable"), + } +} + +#[test] +fn parse_create_table_partition_of_list() { + // LIST partition + let sql = "CREATE TABLE orders_us PARTITION OF orders FOR VALUES IN ('US', 'CA', 'MX')"; + match pg_and_generic().verified_stmt(sql) { + Statement::CreateTable(create_table) => { + assert_eq!("orders_us", create_table.name.to_string()); + assert_eq!( + Some(ObjectName::from(vec![Ident::new("orders")])), + create_table.partition_of + ); + match create_table.for_values { + Some(ForValues::In(values)) => { + assert_eq!(3, values.len()); + } + _ => panic!("Expected ForValues::In"), + } + } + _ => panic!("Expected CreateTable"), + } +} + +#[test] +fn parse_create_table_partition_of_hash() { + // HASH partition + let sql = "CREATE TABLE orders_p0 PARTITION OF orders FOR VALUES WITH (MODULUS 4, REMAINDER 0)"; + match pg_and_generic().verified_stmt(sql) { + Statement::CreateTable(create_table) => { + assert_eq!("orders_p0", create_table.name.to_string()); + assert_eq!( + Some(ObjectName::from(vec![Ident::new("orders")])), + create_table.partition_of + ); + match create_table.for_values { + Some(ForValues::With { modulus, remainder }) => { + assert_eq!(4, modulus); + assert_eq!(0, remainder); + } + _ => panic!("Expected ForValues::With"), + } + } + _ => panic!("Expected CreateTable"), + } +} + +#[test] +fn parse_create_table_partition_of_default() { + // DEFAULT partition + let sql = "CREATE TABLE orders_default PARTITION OF orders DEFAULT"; + match pg_and_generic().verified_stmt(sql) { + Statement::CreateTable(create_table) => { + assert_eq!("orders_default", create_table.name.to_string()); + assert_eq!( + Some(ObjectName::from(vec![Ident::new("orders")])), + create_table.partition_of + ); + assert_eq!(Some(ForValues::Default), create_table.for_values); + } + _ => panic!("Expected CreateTable"), + } +} + +#[test] +fn parse_create_table_partition_of_multicolumn_range() { + // Multi-column RANGE partition + let sql = "CREATE TABLE sales_2023_q1 PARTITION OF sales FOR VALUES FROM ('2023-01-01', 1) TO ('2023-04-01', 1)"; + match pg_and_generic().verified_stmt(sql) { + Statement::CreateTable(create_table) => { + assert_eq!("sales_2023_q1", create_table.name.to_string()); + match create_table.for_values { + Some(ForValues::From { from, to }) => { + assert_eq!(2, from.len()); + assert_eq!(2, to.len()); + } + _ => panic!("Expected ForValues::From"), + } + } + _ => panic!("Expected CreateTable"), + } +} + +#[test] +fn parse_create_table_partition_of_with_constraint() { + // With table constraint (not column constraint which has different syntax in PARTITION OF) + let sql = "CREATE TABLE orders_2023 PARTITION OF orders (\ +CONSTRAINT check_date CHECK (order_date >= '2023-01-01')\ +) FOR VALUES FROM ('2023-01-01') TO ('2024-01-01')"; + match pg_and_generic().verified_stmt(sql) { + Statement::CreateTable(create_table) => { + assert_eq!("orders_2023", create_table.name.to_string()); + assert_eq!( + Some(ObjectName::from(vec![Ident::new("orders")])), + create_table.partition_of + ); + // Check that table constraint was parsed + assert_eq!(1, create_table.constraints.len()); + match create_table.for_values { + Some(ForValues::From { .. }) => {} + _ => panic!("Expected ForValues::From"), + } + } + _ => panic!("Expected CreateTable"), + } +} + +#[test] +fn parse_create_table_partition_of_errors() { + let sql = "CREATE TABLE p PARTITION OF parent"; + let result = pg_and_generic().parse_sql_statements(sql); + assert!(result.is_err()); + let err = result.unwrap_err().to_string(); + assert!( + err.contains("FOR VALUES or DEFAULT"), + "Expected error about FOR VALUES, got: {err}" + ); + + let sql = "CREATE TABLE p PARTITION OF parent WITH (fillfactor = 70)"; + let result = pg_and_generic().parse_sql_statements(sql); + assert!(result.is_err()); + let err = result.unwrap_err().to_string(); + assert!( + err.contains("FOR VALUES or DEFAULT"), + "Expected error about FOR VALUES, got: {err}" + ); + + let sql = "CREATE TABLE p PARTITION OF parent FOR VALUES RANGE (1, 10)"; + let result = pg_and_generic().parse_sql_statements(sql); + assert!(result.is_err()); + let err = result.unwrap_err().to_string(); + assert!( + err.contains("IN, FROM, or WITH"), + "Expected error about invalid keyword after FOR VALUES, got: {err}" + ); + + let sql = "CREATE TABLE p PARTITION OF parent FOR VALUES FROM (1)"; + let result = pg_and_generic().parse_sql_statements(sql); + assert!(result.is_err()); + let err = result.unwrap_err().to_string(); + assert!( + err.contains("TO"), + "Expected error about missing TO clause, got: {err}" + ); + + let sql = "CREATE TABLE p PARTITION OF parent FOR VALUES IN ()"; + let result = pg_and_generic().parse_sql_statements(sql); + assert!(result.is_err()); + let err = result.unwrap_err().to_string(); + assert!( + err.contains("at least one value"), + "Expected error about empty value list in IN clause, got: {err}" + ); + + let sql = "CREATE TABLE p PARTITION OF parent FOR VALUES FROM () TO (10)"; + let result = pg_and_generic().parse_sql_statements(sql); + assert!(result.is_err()); + let err = result.unwrap_err().to_string(); + assert!( + err.contains("at least one value"), + "Expected error about empty FROM list, got: {err}" + ); + + let sql = "CREATE TABLE p PARTITION OF parent FOR VALUES FROM (1) TO ()"; + let result = pg_and_generic().parse_sql_statements(sql); + assert!(result.is_err()); + let err = result.unwrap_err().to_string(); + assert!( + err.contains("at least one value"), + "Expected error about empty TO list, got: {err}" + ); +} From ce74e7fe217ac5d2cb5392237a03c7d252a3ea6a Mon Sep 17 00:00:00 2001 From: James Vorderbruggen Date: Wed, 7 Jan 2026 05:33:10 -0600 Subject: [PATCH 018/121] Databricks: Support Timetravel With "TIMESTAMP AS OF" (#2134) Co-authored-by: Ifeanyi Ubah --- src/ast/query.rs | 5 +++++ src/dialect/databricks.rs | 5 +++++ src/parser/mod.rs | 3 +++ tests/sqlparser_bigquery.rs | 2 +- tests/sqlparser_databricks.rs | 14 ++++++++++++++ 5 files changed, 28 insertions(+), 1 deletion(-) diff --git a/src/ast/query.rs b/src/ast/query.rs index 16fc9ec0e6..efec56ffd4 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -2241,6 +2241,10 @@ pub enum TableVersion { /// When the table version is defined using `FOR SYSTEM_TIME AS OF`. /// For example: `SELECT * FROM tbl FOR SYSTEM_TIME AS OF TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 1 HOUR)` ForSystemTimeAsOf(Expr), + /// When the table version is defined using `TIMESTAMP AS OF`. + /// Databricks supports this syntax. + /// For example: `SELECT * FROM tbl TIMESTAMP AS OF CURRENT_TIMESTAMP() - INTERVAL 1 HOUR` + TimestampAsOf(Expr), /// When the table version is defined using a function. /// For example: `SELECT * FROM tbl AT(TIMESTAMP => '2020-08-14 09:30:00')` Function(Expr), @@ -2250,6 +2254,7 @@ impl Display for TableVersion { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { TableVersion::ForSystemTimeAsOf(e) => write!(f, "FOR SYSTEM_TIME AS OF {e}")?, + TableVersion::TimestampAsOf(e) => write!(f, "TIMESTAMP AS OF {e}")?, TableVersion::Function(func) => write!(f, "{func}")?, } Ok(()) diff --git a/src/dialect/databricks.rs b/src/dialect/databricks.rs index c5d5f97401..ec866295d1 100644 --- a/src/dialect/databricks.rs +++ b/src/dialect/databricks.rs @@ -47,6 +47,11 @@ impl Dialect for DatabricksDialect { true } + /// + fn supports_timestamp_versioning(&self) -> bool { + true + } + fn supports_lambda_functions(&self) -> bool { true } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 373076f120..4e914df73e 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -15493,6 +15493,9 @@ impl<'a> Parser<'a> { let func_name = self.parse_object_name(true)?; let func = self.parse_function(func_name)?; return Ok(Some(TableVersion::Function(func))); + } else if self.parse_keywords(&[Keyword::TIMESTAMP, Keyword::AS, Keyword::OF]) { + let expr = self.parse_expr()?; + return Ok(Some(TableVersion::TimestampAsOf(expr))); } } Ok(None) diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index 2bdeba912f..d8c3ada1d1 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -1739,7 +1739,7 @@ fn parse_table_time_travel() { args: None, with_hints: vec![], version: Some(TableVersion::ForSystemTimeAsOf(Expr::Value( - Value::SingleQuotedString(version).with_empty_span() + Value::SingleQuotedString(version.clone()).with_empty_span() ))), partitions: vec![], with_ordinality: false, diff --git a/tests/sqlparser_databricks.rs b/tests/sqlparser_databricks.rs index 065e8f9e70..9a9a73fe6b 100644 --- a/tests/sqlparser_databricks.rs +++ b/tests/sqlparser_databricks.rs @@ -366,3 +366,17 @@ fn data_type_timestamp_ntz() { s => panic!("Unexpected statement: {s:?}"), } } + +#[test] +fn parse_table_time_travel() { + all_dialects_where(|d| d.supports_timestamp_versioning()) + .verified_only_select("SELECT 1 FROM t1 TIMESTAMP AS OF '2018-10-18T22:15:12.013Z'"); + + all_dialects_where(|d| d.supports_timestamp_versioning()).verified_only_select( + "SELECT 1 FROM t1 TIMESTAMP AS OF CURRENT_TIMESTAMP() - INTERVAL 12 HOURS", + ); + + assert!(databricks() + .parse_sql_statements("SELECT 1 FROM t1 FOR TIMESTAMP AS OF 'some_timestamp'") + .is_err()); +} From 3af9988c432e3dae1a2343d60e64394f3ad9fa42 Mon Sep 17 00:00:00 2001 From: Michael Victor Zink Date: Thu, 8 Jan 2026 03:00:15 -0800 Subject: [PATCH 019/121] MySQL: Parse bitwise shift left/right operators (#2152) --- src/dialect/duckdb.rs | 4 ++++ src/dialect/generic.rs | 4 ++++ src/dialect/mod.rs | 5 +++++ src/dialect/mysql.rs | 4 ++++ src/dialect/postgresql.rs | 4 ++++ src/dialect/redshift.rs | 4 ++++ src/parser/mod.rs | 4 ++-- tests/sqlparser_common.rs | 23 +++++++++++++++++++++++ 8 files changed, 50 insertions(+), 2 deletions(-) diff --git a/src/dialect/duckdb.rs b/src/dialect/duckdb.rs index f08d827b94..ea09901318 100644 --- a/src/dialect/duckdb.rs +++ b/src/dialect/duckdb.rs @@ -43,6 +43,10 @@ impl Dialect for DuckDbDialect { true } + fn supports_bitwise_shift_operators(&self) -> bool { + true + } + fn supports_named_fn_args_with_eq_operator(&self) -> bool { true } diff --git a/src/dialect/generic.rs b/src/dialect/generic.rs index bbedbc0592..f3a0903a40 100644 --- a/src/dialect/generic.rs +++ b/src/dialect/generic.rs @@ -132,6 +132,10 @@ impl Dialect for GenericDialect { true } + fn supports_bitwise_shift_operators(&self) -> bool { + true + } + fn supports_comment_on(&self) -> bool { true } diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index 1a416e4df6..0b6212974b 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -894,6 +894,11 @@ pub trait Dialect: Debug + Any { false } + /// Returns true if the dialect supports `<<` and `>>` shift operators. + fn supports_bitwise_shift_operators(&self) -> bool { + false + } + /// Returns true if the dialect supports nested comments /// e.g. `/* /* nested */ */` fn supports_nested_comments(&self) -> bool { diff --git a/src/dialect/mysql.rs b/src/dialect/mysql.rs index 8d2a5ad4bd..53a30f1840 100644 --- a/src/dialect/mysql.rs +++ b/src/dialect/mysql.rs @@ -84,6 +84,10 @@ impl Dialect for MySqlDialect { true } + fn supports_bitwise_shift_operators(&self) -> bool { + true + } + fn parse_infix( &self, parser: &mut crate::parser::Parser, diff --git a/src/dialect/postgresql.rs b/src/dialect/postgresql.rs index e861cc5153..02bab0e064 100644 --- a/src/dialect/postgresql.rs +++ b/src/dialect/postgresql.rs @@ -199,6 +199,10 @@ impl Dialect for PostgreSqlDialect { true } + fn supports_bitwise_shift_operators(&self) -> bool { + true + } + /// see fn supports_comment_on(&self) -> bool { true diff --git a/src/dialect/redshift.rs b/src/dialect/redshift.rs index 1cd6098a6c..43c0646ce3 100644 --- a/src/dialect/redshift.rs +++ b/src/dialect/redshift.rs @@ -120,6 +120,10 @@ impl Dialect for RedshiftSqlDialect { true } + fn supports_bitwise_shift_operators(&self) -> bool { + true + } + fn supports_array_typedef_with_brackets(&self) -> bool { true } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 4e914df73e..3294acf6ae 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -3485,10 +3485,10 @@ impl<'a> Parser<'a> { Token::DuckIntDiv if dialect_is!(dialect is DuckDbDialect | GenericDialect) => { Some(BinaryOperator::DuckIntegerDivide) } - Token::ShiftLeft if dialect_is!(dialect is PostgreSqlDialect | DuckDbDialect | GenericDialect | RedshiftSqlDialect) => { + Token::ShiftLeft if dialect.supports_bitwise_shift_operators() => { Some(BinaryOperator::PGBitwiseShiftLeft) } - Token::ShiftRight if dialect_is!(dialect is PostgreSqlDialect | DuckDbDialect | GenericDialect | RedshiftSqlDialect) => { + Token::ShiftRight if dialect.supports_bitwise_shift_operators() => { Some(BinaryOperator::PGBitwiseShiftRight) } Token::Sharp if dialect_is!(dialect is PostgreSqlDialect | RedshiftSqlDialect) => { diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 9f549e4d0e..365bddb0fc 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -2369,6 +2369,29 @@ fn parse_bitwise_ops() { } } +#[test] +fn parse_bitwise_shift_ops() { + let dialects = all_dialects_where(|d| d.supports_bitwise_shift_operators()); + let sql = "SELECT 1 << 2, 3 >> 4"; + let select = dialects.verified_only_select(sql); + assert_eq!( + SelectItem::UnnamedExpr(Expr::BinaryOp { + left: Box::new(Expr::Value((number("1")).with_empty_span())), + op: BinaryOperator::PGBitwiseShiftLeft, + right: Box::new(Expr::Value((number("2")).with_empty_span())), + }), + select.projection[0] + ); + assert_eq!( + SelectItem::UnnamedExpr(Expr::BinaryOp { + left: Box::new(Expr::Value((number("3")).with_empty_span())), + op: BinaryOperator::PGBitwiseShiftRight, + right: Box::new(Expr::Value((number("4")).with_empty_span())), + }), + select.projection[1] + ); +} + #[test] fn parse_binary_any() { let select = verified_only_select("SELECT a = ANY(b)"); From d80c0b9b6c8bcbfe880e60daa1957674f0fcaa09 Mon Sep 17 00:00:00 2001 From: Yoav Cohen <59807311+yoavcloud@users.noreply.github.com> Date: Fri, 9 Jan 2026 10:20:21 +0100 Subject: [PATCH 020/121] Redshift: Add support for optional JSON format in copy option (#2141) --- src/ast/mod.rs | 12 +++++++++--- src/parser/mod.rs | 10 +++++++++- tests/sqlparser_common.rs | 6 ++++++ 3 files changed, 24 insertions(+), 4 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index c8d9c6be38..114dee11e3 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -8296,8 +8296,8 @@ pub enum CopyLegacyOption { IamRole(IamRoleKind), /// IGNOREHEADER \[ AS \] number_rows IgnoreHeader(u64), - /// JSON - Json, + /// JSON \[ AS \] 'json_option' + Json(Option), /// MANIFEST \[ VERBOSE \] Manifest { verbose: bool }, /// MAXFILESIZE \[ AS \] max-size \[ MB | GB \] @@ -8388,7 +8388,13 @@ impl fmt::Display for CopyLegacyOption { Header => write!(f, "HEADER"), IamRole(role) => write!(f, "IAM_ROLE {role}"), IgnoreHeader(num_rows) => write!(f, "IGNOREHEADER {num_rows}"), - Json => write!(f, "JSON"), + Json(opt) => { + write!(f, "JSON")?; + if let Some(opt) = opt { + write!(f, " AS '{}'", value::escape_single_quote_string(opt))?; + } + Ok(()) + } Manifest { verbose } => write!(f, "MANIFEST{}", if *verbose { " VERBOSE" } else { "" }), MaxFileSize(file_size) => write!(f, "MAXFILESIZE {file_size}"), Null(string) => write!(f, "NULL '{}'", value::escape_single_quote_string(string)), diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 3294acf6ae..3a31d92587 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -10747,7 +10747,15 @@ impl<'a> Parser<'a> { let num_rows = self.parse_literal_uint()?; CopyLegacyOption::IgnoreHeader(num_rows) } - Some(Keyword::JSON) => CopyLegacyOption::Json, + Some(Keyword::JSON) => { + let _ = self.parse_keyword(Keyword::AS); + let fmt = if matches!(self.peek_token().token, Token::SingleQuotedString(_)) { + Some(self.parse_literal_string()?) + } else { + None + }; + CopyLegacyOption::Json(fmt) + } Some(Keyword::MANIFEST) => { let verbose = self.parse_keyword(Keyword::VERBOSE); CopyLegacyOption::Manifest { verbose } diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 365bddb0fc..9ed59eac98 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -17421,6 +17421,9 @@ fn parse_copy_options() { "EMPTYASNULL ", "IAM_ROLE DEFAULT ", "IGNOREHEADER AS 1 ", + "JSON ", + "JSON 'auto' ", + "JSON AS 'auto' ", "TIMEFORMAT AS 'auto' ", "TRUNCATECOLUMNS ", "REMOVEQUOTES ", @@ -17446,6 +17449,9 @@ fn parse_copy_options() { "EMPTYASNULL ", "IAM_ROLE DEFAULT ", "IGNOREHEADER 1 ", + "JSON ", + "JSON AS 'auto' ", + "JSON AS 'auto' ", "TIMEFORMAT 'auto' ", "TRUNCATECOLUMNS ", "REMOVEQUOTES ", From 00da3d71fc8e0e11ed14dffadc18ef349f371007 Mon Sep 17 00:00:00 2001 From: Yoav Cohen <59807311+yoavcloud@users.noreply.github.com> Date: Fri, 9 Jan 2026 10:35:03 +0100 Subject: [PATCH 021/121] MySQL: Add missing support for TREE explain format (#2145) --- src/keywords.rs | 1 + src/parser/mod.rs | 1 + tests/sqlparser_common.rs | 36 ++++++++++++++++++++++++++++++++++++ 3 files changed, 38 insertions(+) diff --git a/src/keywords.rs b/src/keywords.rs index 87c77379c2..845d710291 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -1040,6 +1040,7 @@ define_keywords!( TRANSLATE_REGEX, TRANSLATION, TREAT, + TREE, TRIGGER, TRIM, TRIM_ARRAY, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 3a31d92587..e436a7afc2 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -6006,6 +6006,7 @@ impl<'a> Parser<'a> { Keyword::TEXT => Ok(AnalyzeFormat::TEXT), Keyword::GRAPHVIZ => Ok(AnalyzeFormat::GRAPHVIZ), Keyword::JSON => Ok(AnalyzeFormat::JSON), + Keyword::TREE => Ok(AnalyzeFormat::TREE), _ => self.expected("fileformat", next_token), }, _ => self.expected("fileformat", next_token), diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 9ed59eac98..d3f85affab 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -5442,6 +5442,42 @@ fn parse_explain_analyze_with_simple_select() { Some(AnalyzeFormatKind::Keyword(AnalyzeFormat::TEXT)), None, ); + + run_explain_analyze( + all_dialects(), + "EXPLAIN FORMAT=TEXT SELECT sqrt(id) FROM foo", + false, + false, + Some(AnalyzeFormatKind::Assignment(AnalyzeFormat::TEXT)), + None, + ); + + run_explain_analyze( + all_dialects(), + "EXPLAIN FORMAT=GRAPHVIZ SELECT sqrt(id) FROM foo", + false, + false, + Some(AnalyzeFormatKind::Assignment(AnalyzeFormat::GRAPHVIZ)), + None, + ); + + run_explain_analyze( + all_dialects(), + "EXPLAIN FORMAT=JSON SELECT sqrt(id) FROM foo", + false, + false, + Some(AnalyzeFormatKind::Assignment(AnalyzeFormat::JSON)), + None, + ); + + run_explain_analyze( + all_dialects(), + "EXPLAIN FORMAT=TREE SELECT sqrt(id) FROM foo", + false, + false, + Some(AnalyzeFormatKind::Assignment(AnalyzeFormat::TREE)), + None, + ); } #[test] From 24e12839d8a990639246e182f41fff0e6c91903c Mon Sep 17 00:00:00 2001 From: Yoav Cohen <59807311+yoavcloud@users.noreply.github.com> Date: Fri, 9 Jan 2026 11:47:49 +0100 Subject: [PATCH 022/121] MySQL: Add support for && as boolean AND (#2144) --- src/dialect/mod.rs | 5 +++++ src/dialect/mysql.rs | 5 +++++ src/parser/mod.rs | 3 +++ tests/sqlparser_common.rs | 6 ++++++ 4 files changed, 19 insertions(+) diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index 0b6212974b..4c7173d544 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -1221,6 +1221,11 @@ pub trait Dialect: Debug + Any { fn supports_quote_delimited_string(&self) -> bool { false } + + /// Returns true if the dialect considers the `&&` operator as a boolean AND operator. + fn supports_double_ampersand_operator(&self) -> bool { + false + } } /// This represents the operators for which precedence must be defined diff --git a/src/dialect/mysql.rs b/src/dialect/mysql.rs index 53a30f1840..60385c5bce 100644 --- a/src/dialect/mysql.rs +++ b/src/dialect/mysql.rs @@ -171,6 +171,11 @@ impl Dialect for MySqlDialect { fn supports_cross_join_constraint(&self) -> bool { true } + + /// See: + fn supports_double_ampersand_operator(&self) -> bool { + true + } } /// `LOCK TABLES` diff --git a/src/parser/mod.rs b/src/parser/mod.rs index e436a7afc2..08ce9f1cc6 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -3500,6 +3500,9 @@ impl<'a> Parser<'a> { Token::Overlap if dialect_is!(dialect is PostgreSqlDialect | GenericDialect) => { Some(BinaryOperator::PGOverlap) } + Token::Overlap if dialect.supports_double_ampersand_operator() => { + Some(BinaryOperator::And) + } Token::CaretAt if dialect_is!(dialect is PostgreSqlDialect | GenericDialect) => { Some(BinaryOperator::PGStartsWith) } diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index d3f85affab..dd95315b76 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -18037,3 +18037,9 @@ fn parse_select_parenthesized_wildcard() { assert_eq!(select2.projection.len(), 1); assert!(matches!(select2.projection[0], SelectItem::Wildcard(_))); } + +#[test] +fn parse_overlap_as_bool_and() { + let dialects = all_dialects_where(|d| d.supports_double_ampersand_operator()); + dialects.one_statement_parses_to("SELECT x && y", "SELECT x AND y"); +} From ab76a07bc292ba653e9d241e5c760ca2341a82a5 Mon Sep 17 00:00:00 2001 From: Yoav Cohen <59807311+yoavcloud@users.noreply.github.com> Date: Fri, 9 Jan 2026 12:22:51 +0100 Subject: [PATCH 023/121] PostgreSQL: ALTER USER password option (#2142) --- src/ast/mod.rs | 37 +++++++++++++++++++++++++++++++++++-- src/parser/alter.rs | 21 +++++++++++++++++++-- tests/sqlparser_common.rs | 9 +++++++++ 3 files changed, 63 insertions(+), 4 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 114dee11e3..ad8147f968 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -10108,12 +10108,15 @@ impl fmt::Display for CreateUser { /// Modifies the properties of a user /// -/// Syntax: +/// [Snowflake Syntax:](https://docs.snowflake.com/en/sql-reference/sql/alter-user) /// ```sql /// ALTER USER [ IF EXISTS ] [ ] [ OPTIONS ] /// ``` /// -/// [Snowflake](https://docs.snowflake.com/en/sql-reference/sql/alter-user) +/// [PostgreSQL Syntax:](https://www.postgresql.org/docs/current/sql-alteruser.html) +/// ```sql +/// ALTER USER [ WITH ] option [ ... ] +/// ``` #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] @@ -10137,6 +10140,8 @@ pub struct AlterUser { pub unset_tag: Vec, pub set_props: KeyValueOptions, pub unset_props: Vec, + /// The following options are PostgreSQL-specific: + pub password: Option, } /// ```sql @@ -10313,6 +10318,34 @@ impl fmt::Display for AlterUser { if !self.unset_props.is_empty() { write!(f, " UNSET {}", display_comma_separated(&self.unset_props))?; } + if let Some(password) = &self.password { + write!(f, " {}", password)?; + } + Ok(()) + } +} + +/// ```sql +/// ALTER USER [ WITH ] PASSWORD { 'password' | NULL }`` +/// ``` +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct AlterUserPassword { + pub encrypted: bool, + pub password: Option, +} + +impl Display for AlterUserPassword { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + if self.encrypted { + write!(f, "ENCRYPTED ")?; + } + write!(f, "PASSWORD")?; + match &self.password { + None => write!(f, " NULL")?, + Some(password) => write!(f, " '{}'", value::escape_single_quote_string(password))?, + } Ok(()) } } diff --git a/src/parser/alter.rs b/src/parser/alter.rs index b3e3c99e64..01b5ca30d1 100644 --- a/src/parser/alter.rs +++ b/src/parser/alter.rs @@ -21,8 +21,8 @@ use crate::{ helpers::key_value_options::{KeyValueOptions, KeyValueOptionsDelimiter}, AlterConnectorOwner, AlterPolicyOperation, AlterRoleOperation, AlterUser, AlterUserAddMfaMethodOtp, AlterUserAddRoleDelegation, AlterUserModifyMfaMethod, - AlterUserRemoveRoleDelegation, AlterUserSetPolicy, Expr, MfaMethodKind, Password, - ResetConfig, RoleOption, SetConfigValue, Statement, UserPolicyKind, + AlterUserPassword, AlterUserRemoveRoleDelegation, AlterUserSetPolicy, Expr, MfaMethodKind, + Password, ResetConfig, RoleOption, SetConfigValue, Statement, UserPolicyKind, }, dialect::{MsSqlDialect, PostgreSqlDialect}, keywords::Keyword, @@ -150,6 +150,7 @@ impl Parser<'_> { pub fn parse_alter_user(&mut self) -> Result { let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); let name = self.parse_identifier()?; + let _ = self.parse_keyword(Keyword::WITH); let rename_to = if self.parse_keywords(&[Keyword::RENAME, Keyword::TO]) { Some(self.parse_identifier()?) } else { @@ -292,6 +293,21 @@ impl Parser<'_> { vec![] }; + let encrypted = self.parse_keyword(Keyword::ENCRYPTED); + let password = if self.parse_keyword(Keyword::PASSWORD) { + let password = if self.parse_keyword(Keyword::NULL) { + None + } else { + Some(self.parse_literal_string()?) + }; + Some(AlterUserPassword { + encrypted, + password, + }) + } else { + None + }; + Ok(Statement::AlterUser(AlterUser { if_exists, name, @@ -311,6 +327,7 @@ impl Parser<'_> { unset_tag, set_props, unset_props, + password, })) } diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index dd95315b76..4c3babd65e 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -17950,6 +17950,15 @@ fn test_parse_alter_user() { _ => unreachable!(), } verified_stmt("ALTER USER u1 SET DEFAULT_SECONDARY_ROLES=('ALL'), PASSWORD='secret', WORKLOAD_IDENTITY=(TYPE=AWS, ARN='arn:aws:iam::123456789:r1/')"); + + verified_stmt("ALTER USER u1 PASSWORD 'AAA'"); + verified_stmt("ALTER USER u1 ENCRYPTED PASSWORD 'AAA'"); + verified_stmt("ALTER USER u1 PASSWORD NULL"); + + one_statement_parses_to( + "ALTER USER u1 WITH PASSWORD 'AAA'", + "ALTER USER u1 PASSWORD 'AAA'", + ); } #[test] From d4fb5c3e6ee0099218dcef4a801d3fb4c0add7b3 Mon Sep 17 00:00:00 2001 From: Yoav Cohen <59807311+yoavcloud@users.noreply.github.com> Date: Fri, 9 Jan 2026 12:43:38 +0100 Subject: [PATCH 024/121] Key Value Options: add support for trailing semicolon (#2140) --- src/parser/mod.rs | 9 +++++++-- tests/sqlparser_common.rs | 8 ++++++++ 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 08ce9f1cc6..d0a774b591 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -18780,7 +18780,7 @@ impl<'a> Parser<'a> { return self.expected(" another option or EOF", self.peek_token()); } } - Token::EOF => break, + Token::EOF | Token::SemiColon => break, Token::Comma => { delimiter = KeyValueOptionsDelimiter::Comma; continue; @@ -18792,7 +18792,12 @@ impl<'a> Parser<'a> { self.prev_token(); break; } - _ => return self.expected("another option, EOF, Comma or ')'", self.peek_token()), + _ => { + return self.expected( + "another option, EOF, SemiColon, Comma or ')'", + self.peek_token(), + ) + } }; } diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 4c3babd65e..208a56e236 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -18052,3 +18052,11 @@ fn parse_overlap_as_bool_and() { let dialects = all_dialects_where(|d| d.supports_double_ampersand_operator()); dialects.one_statement_parses_to("SELECT x && y", "SELECT x AND y"); } + +#[test] +fn test_parse_key_value_options_trailing_semicolon() { + one_statement_parses_to( + "CREATE USER u1 option1='value1' option2='value2';", + "CREATE USER u1 option1='value1' option2='value2'", + ); +} From aa5c6b3238679ef96b180df877da5f24fe947989 Mon Sep 17 00:00:00 2001 From: Luca Cappelletti Date: Fri, 9 Jan 2026 16:13:53 +0100 Subject: [PATCH 025/121] Added support for `ALTER OPERATOR CLASS` syntax (#2135) --- src/ast/comments.rs | 11 +- src/ast/data_type.rs | 45 +- src/ast/dcl.rs | 80 +- src/ast/ddl.rs | 467 +++++++- src/ast/dml.rs | 13 +- src/ast/helpers/key_value_options.rs | 13 + src/ast/helpers/mod.rs | 6 + src/ast/helpers/stmt_create_database.rs | 45 + src/ast/helpers/stmt_create_table.rs | 154 ++- src/ast/helpers/stmt_data_loading.rs | 16 + src/ast/mod.rs | 1344 +++++++++++++++++++++-- src/ast/query.rs | 434 ++++++-- src/ast/spans.rs | 1 + src/ast/table_constraints.rs | 21 +- src/ast/trigger.rs | 21 +- src/ast/value.rs | 87 +- src/ast/visitor.rs | 9 + src/dialect/clickhouse.rs | 2 +- src/dialect/mod.rs | 34 +- src/keywords.rs | 16 +- src/lib.rs | 2 + src/parser/alter.rs | 1 + src/parser/merge.rs | 1 + src/parser/mod.rs | 257 ++++- src/tokenizer.rs | 36 +- tests/sqlparser_postgres.rs | 150 +++ 26 files changed, 2946 insertions(+), 320 deletions(-) diff --git a/src/ast/comments.rs b/src/ast/comments.rs index 1f5b3102d3..b398474b38 100644 --- a/src/ast/comments.rs +++ b/src/ast/comments.rs @@ -174,7 +174,16 @@ pub enum Comment { /// until end-of-line or end-of-file in the source code. /// /// Note: `content` will include the terminating new-line character, if any. - SingleLine { content: String, prefix: String }, + /// A single-line comment, typically introduced with a prefix and spanning + /// until end-of-line or end-of-file in the source code. + /// + /// Note: `content` will include the terminating new-line character, if any. + SingleLine { + /// The content of the comment (including trailing newline, if any). + content: String, + /// The prefix introducing the comment (e.g. `--`, `#`). + prefix: String, + }, /// A multi-line comment, typically enclosed in `/* .. */` markers. The /// string represents the content excluding the markers. diff --git a/src/ast/data_type.rs b/src/ast/data_type.rs index 6da6a90d06..535a52323b 100644 --- a/src/ast/data_type.rs +++ b/src/ast/data_type.rs @@ -32,7 +32,9 @@ use super::{value::escape_single_quote_string, ColumnDef}; #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// A member of an ENUM type. pub enum EnumMember { + /// Just a name. Name(String), /// ClickHouse allows to specify an integer value for each enum value. /// @@ -957,18 +959,31 @@ impl fmt::Display for TimezoneInfo { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum IntervalFields { + /// `YEAR` field Year, + /// `MONTH` field Month, + /// `DAY` field Day, + /// `HOUR` field Hour, + /// `MINUTE` field Minute, + /// `SECOND` field Second, + /// `YEAR TO MONTH` field YearToMonth, + /// `DAY TO HOUR` field DayToHour, + /// `DAY TO MINUTE` field DayToMinute, + /// `DAY TO SECOND` field DayToSecond, + /// `HOUR TO MINUTE` field HourToMinute, + /// `HOUR TO SECOND` field HourToSecond, + /// `MINUTE TO SECOND` field MinuteToSecond, } @@ -1000,11 +1015,11 @@ impl fmt::Display for IntervalFields { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum ExactNumberInfo { - /// No additional information, e.g. `DECIMAL` + /// No additional information, e.g. `DECIMAL`. None, - /// Only precision information, e.g. `DECIMAL(10)` + /// Only precision information, e.g. `DECIMAL(10)`. Precision(u64), - /// Precision and scale information, e.g. `DECIMAL(10,2)` + /// Precision and scale information, e.g. `DECIMAL(10,2)`. PrecisionAndScale(u64, i64), } @@ -1031,13 +1046,14 @@ impl fmt::Display for ExactNumberInfo { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum CharacterLength { + /// Integer length with optional unit (e.g. `CHAR(10)` or `VARCHAR(10 CHARACTERS)`). IntegerLength { /// Default (if VARYING) or maximum (if not VARYING) length length: u64, /// Optional unit. If not informed, the ANSI handles it as CHARACTERS implicitly unit: Option, }, - /// VARCHAR(MAX) or NVARCHAR(MAX), used in T-SQL (Microsoft SQL Server) + /// VARCHAR(MAX) or NVARCHAR(MAX), used in T-SQL (Microsoft SQL Server). Max, } @@ -1087,12 +1103,16 @@ impl fmt::Display for CharLengthUnits { #[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Information about [binary length][1], including length and possibly unit. +/// +/// [1]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#binary-length pub enum BinaryLength { + /// Integer length for binary types (e.g. `VARBINARY(100)`). IntegerLength { /// Default (if VARYING) length: u64, }, - /// VARBINARY(MAX) used in T-SQL (Microsoft SQL Server) + /// VARBINARY(MAX) used in T-SQL (Microsoft SQL Server). Max, } @@ -1118,13 +1138,13 @@ impl fmt::Display for BinaryLength { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum ArrayElemTypeDef { - /// `ARRAY` + /// Use `ARRAY` style without an explicit element type. None, - /// `ARRAY` + /// Angle-bracket style, e.g. `ARRAY`. AngleBracket(Box), - /// `INT[]` or `INT[2]` + /// Square-bracket style, e.g. `INT[]` or `INT[2]`. SquareBracket(Box, Option), - /// `Array(Int64)` + /// Parenthesis style, e.g. `Array(Int64)`. Parenthesis(Box), } @@ -1136,12 +1156,19 @@ pub enum ArrayElemTypeDef { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum GeometricTypeKind { + /// Point geometry Point, + /// Line geometry Line, + /// Line segment geometry LineSegment, + /// Box geometry GeometricBox, + /// Path geometry GeometricPath, + /// Polygon geometry Polygon, + /// Circle geometry Circle, } diff --git a/src/ast/dcl.rs b/src/ast/dcl.rs index d04875a733..7183bc3fb7 100644 --- a/src/ast/dcl.rs +++ b/src/ast/dcl.rs @@ -39,15 +39,25 @@ use crate::tokenizer::Span; #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum RoleOption { + /// Enable or disable BYPASSRLS. BypassRLS(bool), + /// Connection limit expression. ConnectionLimit(Expr), + /// CREATEDB flag. CreateDB(bool), + /// CREATEROLE flag. CreateRole(bool), + /// INHERIT flag. Inherit(bool), + /// LOGIN flag. Login(bool), + /// Password value or NULL password. Password(Password), + /// Replication privilege flag. Replication(bool), + /// SUPERUSER flag. SuperUser(bool), + /// `VALID UNTIL` expression. ValidUntil(Expr), } @@ -104,8 +114,11 @@ impl fmt::Display for RoleOption { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum SetConfigValue { + /// Use the default value. Default, + /// Use the current value (`FROM CURRENT`). FromCurrent, + /// Set to the provided expression value. Value(Expr), } @@ -116,7 +129,9 @@ pub enum SetConfigValue { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum ResetConfig { + /// Reset all configuration parameters. ALL, + /// Reset the named configuration parameter. ConfigName(ObjectName), } @@ -127,28 +142,48 @@ pub enum ResetConfig { pub enum AlterRoleOperation { /// Generic RenameRole { + /// Role name to rename. role_name: Ident, }, /// MS SQL Server /// AddMember { + /// Member name to add to the role. member_name: Ident, }, + /// MS SQL Server + /// + /// DropMember { + /// Member name to remove from the role. member_name: Ident, }, /// PostgreSQL /// WithOptions { + /// Role options to apply. options: Vec, }, + /// PostgreSQL + /// + /// + /// `SET configuration_parameter { TO | = } { value | DEFAULT }` Set { + /// Configuration name to set. config_name: ObjectName, + /// Value to assign to the configuration. config_value: SetConfigValue, + /// Optional database scope for the setting. in_database: Option, }, + /// PostgreSQL + /// + /// + /// `RESET configuration_parameter` | `RESET ALL` Reset { + /// Configuration to reset. config_name: ResetConfig, + /// Optional database scope for the reset. in_database: Option, }, } @@ -205,14 +240,22 @@ impl fmt::Display for AlterRoleOperation { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum Use { - Catalog(ObjectName), // e.g. `USE CATALOG foo.bar` - Schema(ObjectName), // e.g. `USE SCHEMA foo.bar` - Database(ObjectName), // e.g. `USE DATABASE foo.bar` - Warehouse(ObjectName), // e.g. `USE WAREHOUSE foo.bar` - Role(ObjectName), // e.g. `USE ROLE PUBLIC` - SecondaryRoles(SecondaryRoles), // e.g. `USE SECONDARY ROLES ALL` - Object(ObjectName), // e.g. `USE foo.bar` - Default, // e.g. `USE DEFAULT` + /// Switch to the given catalog (e.g. `USE CATALOG ...`). + Catalog(ObjectName), + /// Switch to the given schema (e.g. `USE SCHEMA ...`). + Schema(ObjectName), + /// Switch to the given database (e.g. `USE DATABASE ...`). + Database(ObjectName), + /// Switch to the given warehouse (e.g. `USE WAREHOUSE ...`). + Warehouse(ObjectName), + /// Switch to the given role (e.g. `USE ROLE ...`). + Role(ObjectName), + /// Use secondary roles specification (e.g. `USE SECONDARY ROLES ...`). + SecondaryRoles(SecondaryRoles), + /// Use the specified object (e.g. `USE foo.bar`). + Object(ObjectName), + /// Reset to default (e.g. `USE DEFAULT`). + Default, } impl fmt::Display for Use { @@ -239,8 +282,11 @@ impl fmt::Display for Use { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum SecondaryRoles { + /// Use all secondary roles. All, + /// Use no secondary roles. None, + /// Explicit list of secondary roles. List(Vec), } @@ -260,25 +306,43 @@ impl fmt::Display for SecondaryRoles { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct CreateRole { + /// Role names to create. pub names: Vec, + /// Whether `IF NOT EXISTS` was specified. pub if_not_exists: bool, // Postgres + /// Whether `LOGIN` was specified. pub login: Option, + /// Whether `INHERIT` was specified. pub inherit: Option, + /// Whether `BYPASSRLS` was specified. pub bypassrls: Option, + /// Optional password for the role. pub password: Option, + /// Whether `SUPERUSER` was specified. pub superuser: Option, + /// Whether `CREATEDB` was specified. pub create_db: Option, + /// Whether `CREATEROLE` was specified. pub create_role: Option, + /// Whether `REPLICATION` privilege was specified. pub replication: Option, + /// Optional connection limit expression. pub connection_limit: Option, + /// Optional account validity expression. pub valid_until: Option, + /// Members of `IN ROLE` clause. pub in_role: Vec, + /// Members of `IN GROUP` clause. pub in_group: Vec, + /// Roles listed in `ROLE` clause. pub role: Vec, + /// Users listed in `USER` clause. pub user: Vec, + /// Admin users listed in `ADMIN` clause. pub admin: Vec, // MSSQL + /// Optional authorization owner. pub authorization_owner: Option, } diff --git a/src/ast/ddl.rs b/src/ast/ddl.rs index 2a24741f37..6c83144211 100644 --- a/src/ast/ddl.rs +++ b/src/ast/ddl.rs @@ -61,7 +61,9 @@ use crate::tokenizer::{Span, Token}; #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct IndexColumn { + /// The indexed column expression. pub column: OrderByExpr, + /// Optional operator class (index operator name). pub operator_class: Option, } @@ -97,9 +99,13 @@ impl fmt::Display for IndexColumn { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum ReplicaIdentity { + /// No replica identity (`REPLICA IDENTITY NONE`). None, + /// Full replica identity (`REPLICA IDENTITY FULL`). Full, + /// Default replica identity (`REPLICA IDENTITY DEFAULT`). Default, + /// Use the given index as replica identity (`REPLICA IDENTITY USING INDEX`). Index(Ident), } @@ -121,7 +127,9 @@ impl fmt::Display for ReplicaIdentity { pub enum AlterTableOperation { /// `ADD [NOT VALID]` AddConstraint { + /// The table constraint to add. constraint: TableConstraint, + /// Whether the constraint should be marked `NOT VALID`. not_valid: bool, }, /// `ADD [COLUMN] [IF NOT EXISTS] ` @@ -140,8 +148,11 @@ pub enum AlterTableOperation { /// Note: this is a ClickHouse-specific operation. /// Please refer to [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/alter/projection#add-projection) AddProjection { + /// Whether `IF NOT EXISTS` was specified. if_not_exists: bool, + /// Name of the projection to add. name: Ident, + /// The projection's select clause. select: ProjectionSelect, }, /// `DROP PROJECTION [IF EXISTS] name` @@ -149,7 +160,9 @@ pub enum AlterTableOperation { /// Note: this is a ClickHouse-specific operation. /// Please refer to [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/alter/projection#drop-projection) DropProjection { + /// Whether `IF EXISTS` was specified. if_exists: bool, + /// Name of the projection to drop. name: Ident, }, /// `MATERIALIZE PROJECTION [IF EXISTS] name [IN PARTITION partition_name]` @@ -157,8 +170,11 @@ pub enum AlterTableOperation { /// Note: this is a ClickHouse-specific operation. /// Please refer to [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/alter/projection#materialize-projection) MaterializeProjection { + /// Whether `IF EXISTS` was specified. if_exists: bool, + /// Name of the projection to materialize. name: Ident, + /// Optional partition name to operate on. partition: Option, }, /// `CLEAR PROJECTION [IF EXISTS] name [IN PARTITION partition_name]` @@ -166,8 +182,11 @@ pub enum AlterTableOperation { /// Note: this is a ClickHouse-specific operation. /// Please refer to [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/alter/projection#clear-projection) ClearProjection { + /// Whether `IF EXISTS` was specified. if_exists: bool, + /// Name of the projection to clear. name: Ident, + /// Optional partition name to operate on. partition: Option, }, /// `DISABLE ROW LEVEL SECURITY` @@ -178,25 +197,34 @@ pub enum AlterTableOperation { /// /// Note: this is a PostgreSQL-specific operation. DisableRule { + /// Name of the rule to disable. name: Ident, }, /// `DISABLE TRIGGER [ trigger_name | ALL | USER ]` /// /// Note: this is a PostgreSQL-specific operation. DisableTrigger { + /// Name of the trigger to disable (or ALL/USER). name: Ident, }, /// `DROP CONSTRAINT [ IF EXISTS ] ` DropConstraint { + /// `IF EXISTS` flag for dropping the constraint. if_exists: bool, + /// Name of the constraint to drop. name: Ident, + /// Optional drop behavior (`CASCADE`/`RESTRICT`). drop_behavior: Option, }, /// `DROP [ COLUMN ] [ IF EXISTS ] [ , , ... ] [ CASCADE ]` DropColumn { + /// Whether the `COLUMN` keyword was present. has_column_keyword: bool, + /// Names of columns to drop. column_names: Vec, + /// Whether `IF EXISTS` was specified for the columns. if_exists: bool, + /// Optional drop behavior for the column removal. drop_behavior: Option, }, /// `ATTACH PART|PARTITION ` @@ -205,6 +233,7 @@ pub enum AlterTableOperation { AttachPartition { // PART is not a short form of PARTITION, it's a separate keyword // which represents a physical file on disk and partition is a logical entity. + /// Partition expression to attach. partition: Partition, }, /// `DETACH PART|PARTITION ` @@ -212,20 +241,25 @@ pub enum AlterTableOperation { /// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#detach-partitionpart) DetachPartition { // See `AttachPartition` for more details + /// Partition expression to detach. partition: Partition, }, /// `FREEZE PARTITION ` /// Note: this is a ClickHouse-specific operation, please refer to /// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#freeze-partition) FreezePartition { + /// Partition to freeze. partition: Partition, + /// Optional name for the freeze operation. with_name: Option, }, /// `UNFREEZE PARTITION ` /// Note: this is a ClickHouse-specific operation, please refer to /// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#unfreeze-partition) UnfreezePartition { + /// Partition to unfreeze. partition: Partition, + /// Optional name associated with the unfreeze operation. with_name: Option, }, /// `DROP PRIMARY KEY` @@ -233,6 +267,7 @@ pub enum AlterTableOperation { /// [MySQL](https://dev.mysql.com/doc/refman/8.4/en/alter-table.html) /// [Snowflake](https://docs.snowflake.com/en/sql-reference/constraints-drop) DropPrimaryKey { + /// Optional drop behavior for the primary key (`CASCADE`/`RESTRICT`). drop_behavior: Option, }, /// `DROP FOREIGN KEY ` @@ -240,37 +275,44 @@ pub enum AlterTableOperation { /// [MySQL](https://dev.mysql.com/doc/refman/8.4/en/alter-table.html) /// [Snowflake](https://docs.snowflake.com/en/sql-reference/constraints-drop) DropForeignKey { + /// Foreign key symbol/name to drop. name: Ident, + /// Optional drop behavior for the foreign key. drop_behavior: Option, }, /// `DROP INDEX ` /// /// [MySQL]: https://dev.mysql.com/doc/refman/8.4/en/alter-table.html DropIndex { + /// Name of the index to drop. name: Ident, }, /// `ENABLE ALWAYS RULE rewrite_rule_name` /// /// Note: this is a PostgreSQL-specific operation. EnableAlwaysRule { + /// Name of the rule to enable. name: Ident, }, /// `ENABLE ALWAYS TRIGGER trigger_name` /// /// Note: this is a PostgreSQL-specific operation. EnableAlwaysTrigger { + /// Name of the trigger to enable. name: Ident, }, /// `ENABLE REPLICA RULE rewrite_rule_name` /// /// Note: this is a PostgreSQL-specific operation. EnableReplicaRule { + /// Name of the replica rule to enable. name: Ident, }, /// `ENABLE REPLICA TRIGGER trigger_name` /// /// Note: this is a PostgreSQL-specific operation. EnableReplicaTrigger { + /// Name of the replica trigger to enable. name: Ident, }, /// `ENABLE ROW LEVEL SECURITY` @@ -281,17 +323,21 @@ pub enum AlterTableOperation { /// /// Note: this is a PostgreSQL-specific operation. EnableRule { + /// Name of the rule to enable. name: Ident, }, /// `ENABLE TRIGGER [ trigger_name | ALL | USER ]` /// /// Note: this is a PostgreSQL-specific operation. EnableTrigger { + /// Name of the trigger to enable (or ALL/USER). name: Ident, }, /// `RENAME TO PARTITION (partition=val)` RenamePartitions { + /// Old partition expressions to be renamed. old_partitions: Vec, + /// New partition expressions corresponding to the old ones. new_partitions: Vec, }, /// REPLICA IDENTITY { DEFAULT | USING INDEX index_name | FULL | NOTHING } @@ -299,78 +345,109 @@ pub enum AlterTableOperation { /// Note: this is a PostgreSQL-specific operation. /// Please refer to [PostgreSQL documentation](https://www.postgresql.org/docs/current/sql-altertable.html) ReplicaIdentity { + /// Replica identity setting to apply. identity: ReplicaIdentity, }, /// Add Partitions AddPartitions { + /// Whether `IF NOT EXISTS` was present when adding partitions. if_not_exists: bool, + /// New partitions to add. new_partitions: Vec, }, + /// `DROP PARTITIONS ...` / drop partitions from the table. DropPartitions { + /// Partitions to drop (expressions). partitions: Vec, + /// Whether `IF EXISTS` was specified for dropping partitions. if_exists: bool, }, /// `RENAME [ COLUMN ] TO ` RenameColumn { + /// Existing column name to rename. old_column_name: Ident, + /// New column name. new_column_name: Ident, }, /// `RENAME TO ` RenameTable { + /// The new table name or renaming kind. table_name: RenameTableNameKind, }, // CHANGE [ COLUMN ] [ ] + /// Change an existing column's name, type, and options. ChangeColumn { + /// Old column name. old_name: Ident, + /// New column name. new_name: Ident, + /// New data type for the column. data_type: DataType, + /// Column options to apply after the change. options: Vec, - /// MySQL `ALTER TABLE` only [FIRST | AFTER column_name] + /// MySQL-specific column position (`FIRST`/`AFTER`). column_position: Option, }, // CHANGE [ COLUMN ] [ ] + /// Modify an existing column's type and options. ModifyColumn { + /// Column name to modify. col_name: Ident, + /// New data type for the column. data_type: DataType, + /// Column options to set. options: Vec, - /// MySQL `ALTER TABLE` only [FIRST | AFTER column_name] + /// MySQL-specific column position (`FIRST`/`AFTER`). column_position: Option, }, /// `RENAME CONSTRAINT TO ` /// /// Note: this is a PostgreSQL-specific operation. + /// Rename a constraint on the table. RenameConstraint { + /// Existing constraint name. old_name: Ident, + /// New constraint name. new_name: Ident, }, /// `ALTER [ COLUMN ]` + /// Alter a specific column with the provided operation. AlterColumn { + /// The column to alter. column_name: Ident, + /// Operation to apply to the column. op: AlterColumnOperation, }, /// 'SWAP WITH ' /// /// Note: this is Snowflake specific SwapWith { + /// Table name to swap with. table_name: ObjectName, }, /// 'SET TBLPROPERTIES ( { property_key [ = ] property_val } [, ...] )' SetTblProperties { + /// Table properties specified as SQL options. table_properties: Vec, }, /// `OWNER TO { | CURRENT_ROLE | CURRENT_USER | SESSION_USER }` /// /// Note: this is PostgreSQL-specific OwnerTo { + /// The new owner to assign to the table. new_owner: Owner, }, /// Snowflake table clustering options /// ClusterBy { + /// Expressions used for clustering the table. exprs: Vec, }, + /// Remove the clustering key from the table. DropClusteringKey, + /// Suspend background reclustering operations. SuspendRecluster, + /// Resume background reclustering operations. ResumeRecluster, /// `REFRESH [ '' ]` /// @@ -395,7 +472,9 @@ pub enum AlterTableOperation { /// /// [MySQL]: https://dev.mysql.com/doc/refman/8.4/en/alter-table.html Algorithm { + /// Whether the `=` sign was used (`ALGORITHM = ...`). equals: bool, + /// The algorithm to use for the alter operation (MySQL-specific). algorithm: AlterTableAlgorithm, }, @@ -405,7 +484,9 @@ pub enum AlterTableOperation { /// /// [MySQL]: https://dev.mysql.com/doc/refman/8.4/en/alter-table.html Lock { + /// Whether the `=` sign was used (`LOCK = ...`). equals: bool, + /// The locking behavior to apply (MySQL-specific). lock: AlterTableLock, }, /// `AUTO_INCREMENT [=] ` @@ -414,11 +495,14 @@ pub enum AlterTableOperation { /// /// [MySQL]: https://dev.mysql.com/doc/refman/8.4/en/alter-table.html AutoIncrement { + /// Whether the `=` sign was used (`AUTO_INCREMENT = ...`). equals: bool, + /// Value to set for the auto-increment counter. value: ValueWithSpan, }, /// `VALIDATE CONSTRAINT ` ValidateConstraint { + /// Name of the constraint to validate. name: Ident, }, /// Arbitrary parenthesized `SET` options. @@ -429,6 +513,7 @@ pub enum AlterTableOperation { /// ``` /// [PostgreSQL](https://www.postgresql.org/docs/current/sql-altertable.html) SetOptionsParens { + /// Parenthesized options supplied to `SET (...)`. options: Vec, }, } @@ -440,12 +525,18 @@ pub enum AlterTableOperation { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum AlterPolicyOperation { + /// Rename the policy to `new_name`. Rename { + /// The new identifier for the policy. new_name: Ident, }, + /// Apply/modify policy properties. Apply { + /// Optional list of owners the policy applies to. to: Option>, + /// Optional `USING` expression for the policy. using: Option, + /// Optional `WITH CHECK` expression for the policy. with_check: Option, }, } @@ -482,10 +573,15 @@ impl fmt::Display for AlterPolicyOperation { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Algorithm option for `ALTER TABLE` operations (MySQL-specific). pub enum AlterTableAlgorithm { + /// Default algorithm selection. Default, + /// `INSTANT` algorithm. Instant, + /// `INPLACE` algorithm. Inplace, + /// `COPY` algorithm. Copy, } @@ -506,10 +602,15 @@ impl fmt::Display for AlterTableAlgorithm { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Locking behavior for `ALTER TABLE` (MySQL-specific). pub enum AlterTableLock { + /// `DEFAULT` lock behavior. Default, + /// `NONE` lock. None, + /// `SHARED` lock. Shared, + /// `EXCLUSIVE` lock. Exclusive, } @@ -527,10 +628,15 @@ impl fmt::Display for AlterTableLock { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// New owner specification for `ALTER TABLE ... OWNER TO ...` pub enum Owner { + /// A specific user/role identifier. Ident(Ident), + /// `CURRENT_ROLE` keyword. CurrentRole, + /// `CURRENT_USER` keyword. CurrentUser, + /// `SESSION_USER` keyword. SessionUser, } @@ -548,8 +654,11 @@ impl fmt::Display for Owner { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// New connector owner specification for `ALTER CONNECTOR ... OWNER TO ...` pub enum AlterConnectorOwner { + /// `USER ` connector owner. User(Ident), + /// `ROLE ` connector owner. Role(Ident), } @@ -565,8 +674,13 @@ impl fmt::Display for AlterConnectorOwner { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Alterations that can be applied to an index. pub enum AlterIndexOperation { - RenameIndex { index_name: ObjectName }, + /// Rename the index to `index_name`. + RenameIndex { + /// The new name for the index. + index_name: ObjectName, + }, } impl fmt::Display for AlterTableOperation { @@ -921,7 +1035,9 @@ impl fmt::Display for AlterIndexOperation { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct AlterType { + /// Name of the type being altered (may be schema-qualified). pub name: ObjectName, + /// The specific alteration operation to perform. pub operation: AlterTypeOperation, } @@ -930,8 +1046,11 @@ pub struct AlterType { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum AlterTypeOperation { + /// Rename the type. Rename(AlterTypeRename), + /// Add a new value to the type (for enum-like types). AddValue(AlterTypeAddValue), + /// Rename an existing value of the type. RenameValue(AlterTypeRenameValue), } @@ -940,6 +1059,7 @@ pub enum AlterTypeOperation { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct AlterTypeRename { + /// The new name for the type. pub new_name: Ident, } @@ -948,8 +1068,11 @@ pub struct AlterTypeRename { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct AlterTypeAddValue { + /// If true, do not error when the value already exists (`IF NOT EXISTS`). pub if_not_exists: bool, + /// The identifier for the new value to add. pub value: Ident, + /// Optional relative position for the new value (`BEFORE` / `AFTER`). pub position: Option, } @@ -958,7 +1081,9 @@ pub struct AlterTypeAddValue { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum AlterTypeAddValuePosition { + /// Place the new value before the given neighbor value. Before(Ident), + /// Place the new value after the given neighbor value. After(Ident), } @@ -967,7 +1092,9 @@ pub enum AlterTypeAddValuePosition { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct AlterTypeRenameValue { + /// Existing value identifier to rename. pub from: Ident, + /// New identifier for the value. pub to: Ident, } @@ -1029,7 +1156,11 @@ pub enum AlterOperatorOperation { /// `OWNER TO { new_owner | CURRENT_ROLE | CURRENT_USER | SESSION_USER }` OwnerTo(Owner), /// `SET SCHEMA new_schema` - SetSchema { schema_name: ObjectName }, + /// Set the operator's schema name. + SetSchema { + /// New schema name for the operator + schema_name: ObjectName, + }, /// `SET ( options )` Set { /// List of operator options to set @@ -1112,15 +1243,20 @@ pub enum AlterColumnOperation { /// `DROP NOT NULL` DropNotNull, /// `SET DEFAULT ` - SetDefault { value: Expr }, + /// Set the column default value. + SetDefault { + /// Expression representing the new default value. + value: Expr, + }, /// `DROP DEFAULT` DropDefault, /// `[SET DATA] TYPE [USING ]` SetDataType { + /// Target data type for the column. data_type: DataType, - /// PostgreSQL specific + /// PostgreSQL-specific `USING ` expression for conversion. using: Option, - /// Set to true if the statement includes the `SET DATA TYPE` keywords + /// Set to true if the statement includes the `SET DATA TYPE` keywords. had_set: bool, }, @@ -1128,7 +1264,9 @@ pub enum AlterColumnOperation { /// /// Note: this is a PostgreSQL-specific operation. AddGenerated { + /// Optional `GENERATED AS` specifier (e.g. `ALWAYS` or `BY DEFAULT`). generated_as: Option, + /// Optional sequence options for identity generation. sequence_options: Option>, }, } @@ -1204,6 +1342,7 @@ pub enum KeyOrIndexDisplay { } impl KeyOrIndexDisplay { + /// Check if this is the `None` variant. pub fn is_none(self) -> bool { matches!(self, Self::None) } @@ -1243,12 +1382,19 @@ impl fmt::Display for KeyOrIndexDisplay { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum IndexType { + /// B-Tree index (commonly default for many databases). BTree, + /// Hash index. Hash, + /// Generalized Inverted Index (GIN). GIN, + /// Generalized Search Tree (GiST) index. GiST, + /// Space-partitioned GiST (SPGiST) index. SPGiST, + /// Block Range Index (BRIN). BRIN, + /// Bloom filter based index. Bloom, /// Users may define their own index types, which would /// not be covered by the above variants. @@ -1324,10 +1470,15 @@ impl fmt::Display for NullsDistinctOption { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// A parameter of a stored procedure or function declaration. pub struct ProcedureParam { + /// Parameter name. pub name: Ident, + /// Parameter data type. pub data_type: DataType, + /// Optional mode (`IN`, `OUT`, `INOUT`, etc.). pub mode: Option, + /// Optional default expression for the parameter. pub default: Option, } @@ -1352,8 +1503,11 @@ impl fmt::Display for ProcedureParam { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct ColumnDef { + /// Column name. pub name: Ident, + /// Column data type. pub data_type: DataType, + /// Column options (defaults, constraints, generated, etc.). pub options: Vec, } @@ -1391,20 +1545,27 @@ impl fmt::Display for ColumnDef { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct ViewColumnDef { + /// Column identifier. pub name: Ident, + /// Optional data type for the column. pub data_type: Option, + /// Optional column options (defaults, comments, etc.). pub options: Option, } #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Representation of how multiple `ColumnOption`s are grouped for a column. pub enum ColumnOptions { + /// Options separated by comma: `OPTIONS(a, b, c)`. CommaSeparated(Vec), + /// Options separated by spaces: `OPTION_A OPTION_B`. SpaceSeparated(Vec), } impl ColumnOptions { + /// Get the column options as a slice. pub fn as_slice(&self) -> &[ColumnOption] { match self { ColumnOptions::CommaSeparated(options) => options.as_slice(), @@ -1453,7 +1614,9 @@ impl fmt::Display for ViewColumnDef { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct ColumnOptionDef { + /// Optional name of the constraint. pub name: Option, + /// The actual column option (e.g. `NOT NULL`, `DEFAULT`, `GENERATED`, ...). pub option: ColumnOption, } @@ -1514,11 +1677,14 @@ impl fmt::Display for IdentityPropertyKind { } } +/// Properties for the `IDENTITY` / `AUTOINCREMENT` column option. #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct IdentityProperty { + /// Optional parameters specifying seed/increment for the identity column. pub parameters: Option, + /// Optional ordering specifier (`ORDER` / `NOORDER`). pub order: Option, } @@ -1573,11 +1739,14 @@ impl fmt::Display for IdentityPropertyFormatKind { } } } +/// Parameters specifying seed and increment for identity columns. #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct IdentityParameters { + /// The initial seed expression for the identity column. pub seed: Expr, + /// The increment expression for the identity column. pub increment: Expr, } @@ -1591,7 +1760,9 @@ pub struct IdentityParameters { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum IdentityPropertyOrder { + /// `ORDER` - preserve ordering for generated values (where supported). Order, + /// `NOORDER` - do not enforce ordering for generated values. NoOrder, } @@ -1615,7 +1786,9 @@ impl fmt::Display for IdentityPropertyOrder { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum ColumnPolicy { + /// `MASKING POLICY ()` MaskingPolicy(ColumnPolicyProperty), + /// `PROJECTION POLICY ()` ProjectionPolicy(ColumnPolicyProperty), } @@ -1639,6 +1812,7 @@ impl fmt::Display for ColumnPolicy { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Properties describing a column policy (masking or projection). pub struct ColumnPolicyProperty { /// This flag indicates that the column policy option is declared using the `WITH` prefix. /// Example @@ -1647,7 +1821,9 @@ pub struct ColumnPolicyProperty { /// ``` /// [Snowflake]: https://docs.snowflake.com/en/sql-reference/sql/create-table pub with: bool, + /// The name of the policy to apply to the column. pub policy_name: ObjectName, + /// Optional list of column identifiers referenced by the policy. pub using_columns: Option>, } @@ -1668,6 +1844,7 @@ pub struct TagsColumnOption { /// ``` /// [Snowflake]: https://docs.snowflake.com/en/sql-reference/sql/create-table pub with: bool, + /// List of tags to attach to the column. pub tags: Vec, } @@ -1726,16 +1903,24 @@ pub enum ColumnOption { /// - MySQL's `AUTO_INCREMENT` or SQLite's `AUTOINCREMENT` /// - ... DialectSpecific(Vec), + /// `CHARACTER SET ` column option CharacterSet(ObjectName), + /// `COLLATE ` column option Collation(ObjectName), + /// `COMMENT ''` column option Comment(String), + /// `ON UPDATE ` column option OnUpdate(Expr), /// `Generated`s are modifiers that follow a column definition in a `CREATE /// TABLE` statement. Generated { + /// How the column is generated (e.g. `GENERATED ALWAYS`, `BY DEFAULT`, or expression-stored). generated_as: GeneratedAs, + /// Sequence/identity options when generation is backed by a sequence. sequence_options: Option>, + /// Optional expression used to generate the column value. generation_expr: Option, + /// Mode of the generated expression (`VIRTUAL` or `STORED`) when `generation_expr` is present. generation_expr_mode: Option, /// false if 'GENERATED ALWAYS' is skipped (option starts with AS) generated_keyword: bool, @@ -1947,8 +2132,11 @@ impl fmt::Display for ColumnOption { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum GeneratedAs { + /// `GENERATED ALWAYS` Always, + /// `GENERATED BY DEFAULT` ByDefault, + /// Expression-based generated column that is stored (used internally for expression-stored columns) ExpStored, } @@ -1958,7 +2146,9 @@ pub enum GeneratedAs { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum GeneratedExpressionMode { + /// `VIRTUAL` generated expression Virtual, + /// `STORED` generated expression Stored, } @@ -2021,6 +2211,7 @@ pub struct ConstraintCharacteristics { pub enforced: Option, } +/// Initial setting for deferrable constraints (`INITIALLY IMMEDIATE` or `INITIALLY DEFERRED`). #[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] @@ -2092,10 +2283,15 @@ impl fmt::Display for ConstraintCharacteristics { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum ReferentialAction { + /// `RESTRICT` - disallow action if it would break referential integrity. Restrict, + /// `CASCADE` - propagate the action to referencing rows. Cascade, + /// `SET NULL` - set referencing columns to NULL. SetNull, + /// `NO ACTION` - no action at the time; may be deferred. NoAction, + /// `SET DEFAULT` - set referencing columns to their default values. SetDefault, } @@ -2118,7 +2314,9 @@ impl fmt::Display for ReferentialAction { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum DropBehavior { + /// `RESTRICT` - refuse to drop if there are any dependent objects. Restrict, + /// `CASCADE` - automatically drop objects that depend on the object being dropped. Cascade, } @@ -2138,16 +2336,22 @@ impl fmt::Display for DropBehavior { pub enum UserDefinedTypeRepresentation { /// Composite type: `CREATE TYPE name AS (attributes)` Composite { + /// List of attributes for the composite type. attributes: Vec, }, /// Enum type: `CREATE TYPE name AS ENUM (labels)` /// /// Note: this is PostgreSQL-specific. See - Enum { labels: Vec }, + /// Enum type: `CREATE TYPE name AS ENUM (labels)` + Enum { + /// Labels that make up the enum type. + labels: Vec, + }, /// Range type: `CREATE TYPE name AS RANGE (options)` /// /// Note: this is PostgreSQL-specific. See Range { + /// Options for the range type definition. options: Vec, }, /// Base type (SQL definition): `CREATE TYPE name (options)` @@ -2156,6 +2360,7 @@ pub enum UserDefinedTypeRepresentation { /// /// Note: this is PostgreSQL-specific. See SqlDefinition { + /// Options for SQL definition of the user-defined type. options: Vec, }, } @@ -2184,8 +2389,11 @@ impl fmt::Display for UserDefinedTypeRepresentation { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct UserDefinedTypeCompositeAttributeDef { + /// Attribute name. pub name: Ident, + /// Attribute data type. pub data_type: DataType, + /// Optional collation for the attribute. pub collation: Option, } @@ -2488,11 +2696,14 @@ impl fmt::Display for UserDefinedTypeSqlDefinitionOption { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum Partition { + /// ClickHouse supports PARTITION ID 'partition_id' syntax. Identifier(Ident), + /// ClickHouse supports PARTITION expr syntax. Expr(Expr), /// ClickHouse supports PART expr which represents physical partition in disk. /// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#attach-partitionpart) Part(Expr), + /// Hive supports multiple partitions in PARTITION (part1, part2, ...) syntax. Partitions(Vec), } @@ -2515,7 +2726,9 @@ impl fmt::Display for Partition { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum Deduplicate { + /// DEDUPLICATE ALL All, + /// DEDUPLICATE BY expr ByExpression(Expr), } @@ -2536,8 +2749,11 @@ impl fmt::Display for Deduplicate { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct ClusteredBy { + /// columns used for clustering pub columns: Vec, + /// optional sorted by expressions pub sorted_by: Option>, + /// number of buckets pub num_buckets: Value, } @@ -2563,19 +2779,28 @@ pub struct CreateIndex { /// index name pub name: Option, #[cfg_attr(feature = "visitor", visit(with = "visit_relation"))] + /// table name pub table_name: ObjectName, /// Index type used in the statement. Can also be found inside [`CreateIndex::index_options`] /// depending on the position of the option within the statement. pub using: Option, + /// columns included in the index pub columns: Vec, + /// whether the index is unique pub unique: bool, + /// whether the index is created concurrently pub concurrently: bool, + /// IF NOT EXISTS clause pub if_not_exists: bool, + /// INCLUDE clause: pub include: Vec, + /// NULLS DISTINCT / NOT DISTINCT clause: pub nulls_distinct: Option, /// WITH clause: pub with: Vec, + /// WHERE clause: pub predicate: Option, + /// Index options: pub index_options: Vec, /// [MySQL] allows a subset of options normally used for `ALTER TABLE`: /// @@ -2642,35 +2867,57 @@ impl fmt::Display for CreateIndex { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct CreateTable { + /// `OR REPLACE` clause pub or_replace: bool, + /// `TEMP` or `TEMPORARY` clause pub temporary: bool, + /// `EXTERNAL` clause pub external: bool, + /// `DYNAMIC` clause pub dynamic: bool, + /// `GLOBAL` clause pub global: Option, + /// `IF NOT EXISTS` clause pub if_not_exists: bool, + /// `TRANSIENT` clause pub transient: bool, + /// `VOLATILE` clause pub volatile: bool, + /// `ICEBERG` clause pub iceberg: bool, /// Table name #[cfg_attr(feature = "visitor", visit(with = "visit_relation"))] pub name: ObjectName, - /// Optional schema + /// Column definitions pub columns: Vec, + /// Table constraints pub constraints: Vec, + /// Hive-specific distribution style pub hive_distribution: HiveDistributionStyle, + /// Hive-specific formats like `ROW FORMAT DELIMITED` or `ROW FORMAT SERDE 'serde_class' WITH SERDEPROPERTIES (...)` pub hive_formats: Option, + /// Table options pub table_options: CreateTableOptions, + /// General comment for the table pub file_format: Option, + /// Location of the table data pub location: Option, + /// Query used to populate the table pub query: Option>, + /// If the table should be created without a rowid (SQLite) pub without_rowid: bool, + /// `LIKE` clause pub like: Option, + /// `CLONE` clause pub clone: Option, + /// Table version (for systems that support versioned tables) pub version: Option, - // For Hive dialect, the table comment is after the column definitions without `=`, - // so the `comment` field is optional and different than the comment field in the general options list. - // [Hive](https://cwiki.apache.org/confluence/display/Hive/LanguageManual+DDL#LanguageManualDDL-CreateTable) + /// For Hive dialect, the table comment is after the column definitions without `=`, + /// so the `comment` field is optional and different than the comment field in the general options list. + /// [Hive](https://cwiki.apache.org/confluence/display/Hive/LanguageManual+DDL#LanguageManualDDL-CreateTable) pub comment: Option, + /// ClickHouse "ON COMMIT" clause: + /// pub on_commit: Option, /// ClickHouse "ON CLUSTER" clause: /// @@ -3085,11 +3332,18 @@ pub enum ForValues { In(Vec), /// `FOR VALUES FROM (expr|MINVALUE|MAXVALUE, ...) TO (expr|MINVALUE|MAXVALUE, ...)` From { + /// The lower bound values for the partition. from: Vec, + /// The upper bound values for the partition. to: Vec, }, /// `FOR VALUES WITH (MODULUS n, REMAINDER r)` - With { modulus: u64, remainder: u64 }, + With { + /// The modulus value for hash partitioning. + modulus: u64, + /// The remainder value for hash partitioning. + remainder: u64, + }, /// `DEFAULT` Default, } @@ -3127,8 +3381,11 @@ impl fmt::Display for ForValues { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum PartitionBoundValue { + /// An expression representing a partition bound value. Expr(Expr), + /// Represents negative infinity in partition bounds. MinValue, + /// Represents positive infinity in partition bounds. MaxValue, } @@ -3194,16 +3451,23 @@ impl fmt::Display for CreateDomain { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// CREATE FUNCTION statement pub struct CreateFunction { /// True if this is a `CREATE OR ALTER FUNCTION` statement /// /// [MsSql](https://learn.microsoft.com/en-us/sql/t-sql/statements/create-function-transact-sql?view=sql-server-ver16#or-alter) pub or_alter: bool, + /// True if this is a `CREATE OR REPLACE FUNCTION` statement pub or_replace: bool, + /// True if this is a `CREATE TEMPORARY FUNCTION` statement pub temporary: bool, + /// True if this is a `CREATE IF NOT EXISTS FUNCTION` statement pub if_not_exists: bool, + /// Name of the function to be created. pub name: ObjectName, + /// List of arguments for the function. pub args: Option>, + /// The return type of the function. pub return_type: Option, /// The expression that defines the function. /// @@ -3359,11 +3623,17 @@ impl fmt::Display for CreateFunction { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct CreateConnector { + /// The name of the connector to be created. pub name: Ident, + /// Whether `IF NOT EXISTS` was specified. pub if_not_exists: bool, + /// The type of the connector. pub connector_type: Option, + /// The URL of the connector. pub url: Option, + /// The comment for the connector. pub comment: Option, + /// The DC properties for the connector. pub with_dcproperties: Option>, } @@ -3412,23 +3682,36 @@ impl fmt::Display for CreateConnector { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum AlterSchemaOperation { + /// Set the default collation for the schema. SetDefaultCollate { + /// The collation to set as default. collate: Expr, }, + /// Add a replica to the schema. AddReplica { + /// The replica to add. replica: Ident, + /// Optional options for the replica. options: Option>, }, + /// Drop a replica from the schema. DropReplica { + /// The replica to drop. replica: Ident, }, + /// Set options for the schema. SetOptionsParens { + /// The options to set. options: Vec, }, + /// Rename the schema. Rename { + /// The new name for the schema. name: ObjectName, }, + /// Change the owner of the schema. OwnerTo { + /// The new owner of the schema. owner: Owner, }, } @@ -3464,7 +3747,9 @@ impl fmt::Display for AlterSchemaOperation { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum RenameTableNameKind { + /// `AS new_table_name` As(ObjectName), + /// `TO new_table_name` To(ObjectName), } @@ -3480,9 +3765,13 @@ impl fmt::Display for RenameTableNameKind { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// An `ALTER SCHEMA` (`Statement::AlterSchema`) statement. pub struct AlterSchema { + /// The schema name to alter. pub name: ObjectName, + /// Whether `IF EXISTS` was specified. pub if_exists: bool, + /// The list of operations to perform on the schema. pub operations: Vec, } @@ -3890,7 +4179,9 @@ pub struct CreateView { /// /// [MsSql](https://learn.microsoft.com/en-us/sql/t-sql/statements/create-view-transact-sql) pub or_alter: bool, + /// The `OR REPLACE` clause is used to re-create the view if it already exists. pub or_replace: bool, + /// if true, has MATERIALIZED view modifier pub materialized: bool, /// Snowflake: SECURE view modifier /// @@ -3908,9 +4199,13 @@ pub struct CreateView { /// CREATE VIEW IF NOT EXISTS myview AS SELECT 1` /// ``` pub name_before_not_exists: bool, + /// Optional column definitions pub columns: Vec, + /// The query that defines the view. pub query: Box, + /// Table options (e.g., WITH (..), OPTIONS (...)) pub options: CreateTableOptions, + /// BigQuery: CLUSTER BY columns pub cluster_by: Vec, /// Snowflake: Views can have comments in Snowflake. /// @@ -3999,10 +4294,15 @@ impl fmt::Display for CreateView { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct CreateExtension { + /// Extension name pub name: Ident, + /// Whether `IF NOT EXISTS` was specified for the CREATE EXTENSION. pub if_not_exists: bool, + /// Whether `CASCADE` was specified for the CREATE EXTENSION. pub cascade: bool, + /// Optional schema name for the extension. pub schema: Option, + /// Optional version for the extension. pub version: Option, } @@ -4053,9 +4353,11 @@ impl Spanned for CreateExtension { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct DropExtension { + /// One or more extension names to drop pub names: Vec, + /// Whether `IF EXISTS` was specified for the DROP EXTENSION. pub if_exists: bool, - /// `CASCADE` or `RESTRICT` + /// `CASCADE` or `RESTRICT` behaviour for the drop. pub cascade_or_restrict: Option, } @@ -4104,9 +4406,13 @@ pub struct AlterTable { /// Table name #[cfg_attr(feature = "visitor", visit(with = "visit_relation"))] pub name: ObjectName, + /// Whether `IF EXISTS` was specified for the `ALTER TABLE`. pub if_exists: bool, + /// Whether the `ONLY` keyword was used (restrict scope to the named table). pub only: bool, + /// List of `ALTER TABLE` operations to apply. pub operations: Vec, + /// Optional Hive `SET LOCATION` clause for the alter operation. pub location: Option, /// ClickHouse dialect supports `ON CLUSTER` clause for ALTER TABLE /// For example: `ALTER TABLE table_name ON CLUSTER cluster_name ADD COLUMN c UInt32` @@ -4150,6 +4456,7 @@ impl fmt::Display for AlterTable { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct DropFunction { + /// Whether to include the `IF EXISTS` clause. pub if_exists: bool, /// One or more functions to drop pub func_desc: Vec, @@ -4286,7 +4593,9 @@ impl fmt::Display for CreateOperatorClass { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct OperatorArgTypes { + /// Left-hand operand data type for the operator. pub left: DataType, + /// Right-hand operand data type for the operator. pub right: DataType, } @@ -4301,26 +4610,33 @@ impl fmt::Display for OperatorArgTypes { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum OperatorClassItem { - /// `OPERATOR` clause + /// `OPERATOR` clause describing a specific operator implementation. Operator { + /// Strategy number identifying the operator position in the opclass. strategy_number: u64, + /// The operator name referenced by this clause. operator_name: ObjectName, - /// Optional operator argument types + /// Optional operator argument types. op_types: Option, - /// `FOR SEARCH` or `FOR ORDER BY` + /// Optional purpose such as `FOR SEARCH` or `FOR ORDER BY`. purpose: Option, }, - /// `FUNCTION` clause + /// `FUNCTION` clause describing a support function for the operator class. Function { + /// Support function number for this entry. support_number: u64, - /// Optional function argument types for the operator class + /// Optional function argument types for the operator class. op_types: Option>, + /// The function name implementing the support function. function_name: ObjectName, - /// Function argument types + /// Function argument types for the support function. argument_types: Vec, }, - /// `STORAGE` clause - Storage { storage_type: DataType }, + /// `STORAGE` clause specifying the storage type. + Storage { + /// The storage data type. + storage_type: DataType, + }, } /// Purpose of an operator in an operator class @@ -4328,8 +4644,13 @@ pub enum OperatorClassItem { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum OperatorPurpose { + /// Purpose: used for index/search operations. ForSearch, - ForOrderBy { sort_family: ObjectName }, + /// Purpose: used for ORDER BY; optionally includes a sort family name. + ForOrderBy { + /// Optional sort family object name. + sort_family: ObjectName, + }, } impl fmt::Display for OperatorClassItem { @@ -4522,22 +4843,26 @@ impl Spanned for DropOperatorClass { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum OperatorFamilyItem { - /// `OPERATOR` clause + /// `OPERATOR` clause in an operator family modification. Operator { + /// Strategy number for the operator. strategy_number: u64, + /// Operator name referenced by this entry. operator_name: ObjectName, - /// Operator argument types + /// Operator argument types. op_types: Vec, - /// `FOR SEARCH` or `FOR ORDER BY` + /// Optional purpose such as `FOR SEARCH` or `FOR ORDER BY`. purpose: Option, }, - /// `FUNCTION` clause + /// `FUNCTION` clause in an operator family modification. Function { + /// Support function number. support_number: u64, - /// Optional operator argument types for the function + /// Optional operator argument types for the function. op_types: Option>, + /// Function name for the support function. function_name: ObjectName, - /// Function argument types + /// Function argument types. argument_types: Vec, }, } @@ -4547,16 +4872,18 @@ pub enum OperatorFamilyItem { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum OperatorFamilyDropItem { - /// `OPERATOR` clause + /// `OPERATOR` clause for DROP within an operator family. Operator { + /// Strategy number for the operator. strategy_number: u64, - /// Operator argument types + /// Operator argument types. op_types: Vec, }, - /// `FUNCTION` clause + /// `FUNCTION` clause for DROP within an operator family. Function { + /// Support function number. support_number: u64, - /// Operator argument types for the function + /// Operator argument types for the function. op_types: Vec, }, } @@ -4657,11 +4984,17 @@ pub enum AlterOperatorFamilyOperation { items: Vec, }, /// `RENAME TO new_name` - RenameTo { new_name: ObjectName }, + RenameTo { + /// The new name for the operator family. + new_name: ObjectName, + }, /// `OWNER TO { new_owner | CURRENT_ROLE | CURRENT_USER | SESSION_USER }` OwnerTo(Owner), /// `SET SCHEMA new_schema` - SetSchema { schema_name: ObjectName }, + SetSchema { + /// The target schema name. + schema_name: ObjectName, + }, } impl fmt::Display for AlterOperatorFamily { @@ -4702,3 +5035,67 @@ impl Spanned for AlterOperatorFamily { Span::empty() } } + +/// `ALTER OPERATOR CLASS` statement +/// See +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct AlterOperatorClass { + /// Operator class name (can be schema-qualified) + pub name: ObjectName, + /// Index method (btree, hash, gist, gin, etc.) + pub using: Ident, + /// The operation to perform + pub operation: AlterOperatorClassOperation, +} + +/// An [AlterOperatorClass] operation +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum AlterOperatorClassOperation { + /// `RENAME TO new_name` + /// Rename the operator class to a new name. + RenameTo { + /// The new name for the operator class. + new_name: ObjectName, + }, + /// `OWNER TO { new_owner | CURRENT_ROLE | CURRENT_USER | SESSION_USER }` + OwnerTo(Owner), + /// `SET SCHEMA new_schema` + /// Set the schema for the operator class. + SetSchema { + /// The target schema name. + schema_name: ObjectName, + }, +} + +impl fmt::Display for AlterOperatorClass { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "ALTER OPERATOR CLASS {} USING {}", self.name, self.using)?; + write!(f, " {}", self.operation) + } +} + +impl fmt::Display for AlterOperatorClassOperation { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + AlterOperatorClassOperation::RenameTo { new_name } => { + write!(f, "RENAME TO {new_name}") + } + AlterOperatorClassOperation::OwnerTo(owner) => { + write!(f, "OWNER TO {owner}") + } + AlterOperatorClassOperation::SetSchema { schema_name } => { + write!(f, "SET SCHEMA {schema_name}") + } + } + } +} + +impl Spanned for AlterOperatorClass { + fn span(&self) -> Span { + Span::empty() + } +} diff --git a/src/ast/dml.rs b/src/ast/dml.rs index d740b140e8..150a548920 100644 --- a/src/ast/dml.rs +++ b/src/ast/dml.rs @@ -68,6 +68,7 @@ pub struct Insert { pub after_columns: Vec, /// whether the insert has the table keyword (Hive) pub has_table_keyword: bool, + /// ON INSERT pub on: Option, /// RETURNING pub returning: Option>, @@ -331,7 +332,7 @@ pub struct Merge { pub on: Box, /// Specifies the actions to perform when values match or do not match. pub clauses: Vec, - // Specifies the output to save changes in MSSQL + /// Specifies the output to save changes in MSSQL pub output: Option, } @@ -367,8 +368,11 @@ impl Display for Merge { pub struct MergeClause { /// The `WHEN` token that starts the sub-expression. pub when_token: AttachedToken, + /// The type of `WHEN` clause. pub clause_kind: MergeClauseKind, + /// An optional predicate to further restrict the clause. pub predicate: Option, + /// The action to perform when the clause is matched. pub action: MergeAction, } @@ -607,13 +611,20 @@ impl Display for MergeUpdateExpr { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum OutputClause { + /// `OUTPUT` clause Output { + /// The `OUTPUT` token that starts the sub-expression. output_token: AttachedToken, + /// The select items to output select_items: Vec, + /// Optional `INTO` table to direct the output into_table: Option, }, + /// `RETURNING` clause Returning { + /// The `RETURNING` token that starts the sub-expression. returning_token: AttachedToken, + /// The select items to return select_items: Vec, }, } diff --git a/src/ast/helpers/key_value_options.rs b/src/ast/helpers/key_value_options.rs index 745c3a65af..e8e543b01d 100644 --- a/src/ast/helpers/key_value_options.rs +++ b/src/ast/helpers/key_value_options.rs @@ -34,24 +34,33 @@ use crate::ast::{display_comma_separated, display_separated, Value}; #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// A collection of key-value options. pub struct KeyValueOptions { + /// The list of key-value options. pub options: Vec, + /// The delimiter used between options. pub delimiter: KeyValueOptionsDelimiter, } #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// The delimiter used between key-value options. pub enum KeyValueOptionsDelimiter { + /// Options are separated by spaces. Space, + /// Options are separated by commas. Comma, } #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// A single key-value option. pub struct KeyValueOption { + /// The name of the option. pub option_name: String, + /// The value of the option. pub option_value: KeyValueOptionKind, } @@ -63,9 +72,13 @@ pub struct KeyValueOption { #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// The kind of value for a key-value option. pub enum KeyValueOptionKind { + /// A single value. Single(Value), + /// Multiple values. Multi(Vec), + /// A nested list of key-value options. KeyValueOptions(Box), } diff --git a/src/ast/helpers/mod.rs b/src/ast/helpers/mod.rs index 3efbcf7b05..344895dc15 100644 --- a/src/ast/helpers/mod.rs +++ b/src/ast/helpers/mod.rs @@ -14,8 +14,14 @@ // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. + +/// Helper utilities for attached tokens used by AST helpers. pub mod attached_token; +/// Utilities for parsing key/value style options in helper statements. pub mod key_value_options; +/// Helpers for `CREATE DATABASE` statement construction/parsing. pub mod stmt_create_database; +/// Helpers for `CREATE TABLE` statement construction/parsing. pub mod stmt_create_table; +/// Helpers for data loading/unloading related statements (stages, PUT, COPY INTO). pub mod stmt_data_loading; diff --git a/src/ast/helpers/stmt_create_database.rs b/src/ast/helpers/stmt_create_database.rs index 58a7b0906b..c718dbce19 100644 --- a/src/ast/helpers/stmt_create_database.rs +++ b/src/ast/helpers/stmt_create_database.rs @@ -55,29 +55,54 @@ use crate::parser::ParserError; #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct CreateDatabaseBuilder { + /// The database name to create. pub db_name: ObjectName, + /// Whether `IF NOT EXISTS` was specified. pub if_not_exists: bool, + /// Optional storage location for the database. pub location: Option, + /// Optional managed storage location. pub managed_location: Option, + /// Whether `OR REPLACE` was specified. pub or_replace: bool, + /// Whether the database is `TRANSIENT`. pub transient: bool, + /// Optional `CLONE` source object name. pub clone: Option, + /// Optional data retention time in days. pub data_retention_time_in_days: Option, + /// Optional max data extension time in days. pub max_data_extension_time_in_days: Option, + /// Optional external volume identifier. pub external_volume: Option, + /// Optional catalog name. pub catalog: Option, + /// Whether to replace invalid characters. pub replace_invalid_characters: Option, + /// Optional default DDL collation. pub default_ddl_collation: Option, + /// Optional storage serialization policy. pub storage_serialization_policy: Option, + /// Optional comment attached to the database. pub comment: Option, + /// Optional catalog sync configuration. pub catalog_sync: Option, + /// Optional catalog sync namespace mode. pub catalog_sync_namespace_mode: Option, + /// Optional namespace flatten delimiter for catalog sync. pub catalog_sync_namespace_flatten_delimiter: Option, + /// Optional tags attached to the database. pub with_tags: Option>, + /// Optional contact entries associated with the database. pub with_contacts: Option>, } impl CreateDatabaseBuilder { + /// Create a new `CreateDatabaseBuilder` with the given database name. + /// + /// # Arguments + /// + /// * `name` - The name of the database to be created. pub fn new(name: ObjectName) -> Self { Self { db_name: name, @@ -103,41 +128,49 @@ impl CreateDatabaseBuilder { } } + /// Set the location for the database. pub fn location(mut self, location: Option) -> Self { self.location = location; self } + /// Set the managed location for the database. pub fn managed_location(mut self, managed_location: Option) -> Self { self.managed_location = managed_location; self } + /// Set whether this is an `OR REPLACE` operation. pub fn or_replace(mut self, or_replace: bool) -> Self { self.or_replace = or_replace; self } + /// Set whether this is a transient database. pub fn transient(mut self, transient: bool) -> Self { self.transient = transient; self } + /// Set whether to use `IF NOT EXISTS`. pub fn if_not_exists(mut self, if_not_exists: bool) -> Self { self.if_not_exists = if_not_exists; self } + /// Set the clone clause for the database. pub fn clone_clause(mut self, clone: Option) -> Self { self.clone = clone; self } + /// Set the data retention time in days. pub fn data_retention_time_in_days(mut self, data_retention_time_in_days: Option) -> Self { self.data_retention_time_in_days = data_retention_time_in_days; self } + /// Set the maximum data extension time in days. pub fn max_data_extension_time_in_days( mut self, max_data_extension_time_in_days: Option, @@ -146,26 +179,31 @@ impl CreateDatabaseBuilder { self } + /// Set the external volume for the database. pub fn external_volume(mut self, external_volume: Option) -> Self { self.external_volume = external_volume; self } + /// Set the catalog for the database. pub fn catalog(mut self, catalog: Option) -> Self { self.catalog = catalog; self } + /// Set whether to replace invalid characters. pub fn replace_invalid_characters(mut self, replace_invalid_characters: Option) -> Self { self.replace_invalid_characters = replace_invalid_characters; self } + /// Set the default DDL collation. pub fn default_ddl_collation(mut self, default_ddl_collation: Option) -> Self { self.default_ddl_collation = default_ddl_collation; self } + /// Set the storage serialization policy. pub fn storage_serialization_policy( mut self, storage_serialization_policy: Option, @@ -174,16 +212,19 @@ impl CreateDatabaseBuilder { self } + /// Set the comment for the database. pub fn comment(mut self, comment: Option) -> Self { self.comment = comment; self } + /// Set the catalog sync for the database. pub fn catalog_sync(mut self, catalog_sync: Option) -> Self { self.catalog_sync = catalog_sync; self } + /// Set the catalog sync namespace mode for the database. pub fn catalog_sync_namespace_mode( mut self, catalog_sync_namespace_mode: Option, @@ -192,6 +233,7 @@ impl CreateDatabaseBuilder { self } + /// Set the catalog sync namespace flatten delimiter for the database. pub fn catalog_sync_namespace_flatten_delimiter( mut self, catalog_sync_namespace_flatten_delimiter: Option, @@ -200,16 +242,19 @@ impl CreateDatabaseBuilder { self } + /// Set the tags for the database. pub fn with_tags(mut self, with_tags: Option>) -> Self { self.with_tags = with_tags; self } + /// Set the contacts for the database. pub fn with_contacts(mut self, with_contacts: Option>) -> Self { self.with_contacts = with_contacts; self } + /// Build the `CREATE DATABASE` statement. pub fn build(self) -> Statement { Statement::CreateDatabase { db_name: self.db_name, diff --git a/src/ast/helpers/stmt_create_table.rs b/src/ast/helpers/stmt_create_table.rs index 62dbbbcba0..94af03481c 100644 --- a/src/ast/helpers/stmt_create_table.rs +++ b/src/ast/helpers/stmt_create_table.rs @@ -64,62 +64,116 @@ use crate::parser::ParserError; #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct CreateTableBuilder { + /// Whether the statement uses `OR REPLACE`. pub or_replace: bool, + /// Whether the table is `TEMPORARY`. pub temporary: bool, + /// Whether the table is `EXTERNAL`. pub external: bool, + /// Optional `GLOBAL` flag for dialects that support it. pub global: Option, + /// Whether `IF NOT EXISTS` was specified. pub if_not_exists: bool, + /// Whether `TRANSIENT` was specified. pub transient: bool, + /// Whether `VOLATILE` was specified. pub volatile: bool, + /// Iceberg-specific table flag. pub iceberg: bool, + /// Whether `DYNAMIC` table option is set. pub dynamic: bool, + /// The table name. pub name: ObjectName, + /// Column definitions for the table. pub columns: Vec, + /// Table-level constraints. pub constraints: Vec, + /// Hive distribution style. pub hive_distribution: HiveDistributionStyle, + /// Optional Hive format settings. pub hive_formats: Option, + /// Optional file format for storage. pub file_format: Option, + /// Optional storage location. pub location: Option, + /// Optional `AS SELECT` query for the table. pub query: Option>, + /// Whether `WITHOUT ROWID` is set. pub without_rowid: bool, + /// Optional `LIKE` clause kind. pub like: Option, + /// Optional `CLONE` source object name. pub clone: Option, + /// Optional table version. pub version: Option, + /// Optional table comment. pub comment: Option, + /// Optional `ON COMMIT` behavior. pub on_commit: Option, + /// Optional cluster identifier. pub on_cluster: Option, + /// Optional primary key expression. pub primary_key: Option>, + /// Optional `ORDER BY` for clustering/sorting. pub order_by: Option>, + /// Optional `PARTITION BY` expression. pub partition_by: Option>, + /// Optional `CLUSTER BY` expressions. pub cluster_by: Option>>, + /// Optional `CLUSTERED BY` clause. pub clustered_by: Option, + /// Optional parent tables (`INHERITS`). pub inherits: Option>, + /// Optional partitioned table (`PARTITION OF`) pub partition_of: Option, + /// Range of values associated with the partition (`FOR VALUES`) pub for_values: Option, + /// `STRICT` table flag. pub strict: bool, + /// Whether to copy grants from the source. pub copy_grants: bool, + /// Optional flag for schema evolution support. pub enable_schema_evolution: Option, + /// Optional change tracking flag. pub change_tracking: Option, + /// Optional data retention time in days. pub data_retention_time_in_days: Option, + /// Optional max data extension time in days. pub max_data_extension_time_in_days: Option, + /// Optional default DDL collation. pub default_ddl_collation: Option, + /// Optional aggregation policy object name. pub with_aggregation_policy: Option, + /// Optional row access policy applied to the table. pub with_row_access_policy: Option, + /// Optional tags/labels attached to the table metadata. pub with_tags: Option>, + /// Optional base location for staged data. pub base_location: Option, + /// Optional external volume identifier. pub external_volume: Option, + /// Optional catalog name. pub catalog: Option, + /// Optional catalog synchronization option. pub catalog_sync: Option, + /// Optional storage serialization policy. pub storage_serialization_policy: Option, + /// Parsed table options from the statement. pub table_options: CreateTableOptions, + /// Optional target lag configuration. pub target_lag: Option, + /// Optional warehouse identifier. pub warehouse: Option, + /// Optional refresh mode for materialized tables. pub refresh_mode: Option, + /// Optional initialization kind for the table. pub initialize: Option, + /// Whether operations require a user identity. pub require_user: bool, } impl CreateTableBuilder { + /// Create a new `CreateTableBuilder` for the given table name. pub fn new(name: ObjectName) -> Self { Self { or_replace: false, @@ -177,185 +231,191 @@ impl CreateTableBuilder { require_user: false, } } + /// Set `OR REPLACE` for the CREATE TABLE statement. pub fn or_replace(mut self, or_replace: bool) -> Self { self.or_replace = or_replace; self } - + /// Mark the table as `TEMPORARY`. pub fn temporary(mut self, temporary: bool) -> Self { self.temporary = temporary; self } - + /// Mark the table as `EXTERNAL`. pub fn external(mut self, external: bool) -> Self { self.external = external; self } - + /// Set optional `GLOBAL` flag (dialect-specific). pub fn global(mut self, global: Option) -> Self { self.global = global; self } - + /// Set `IF NOT EXISTS`. pub fn if_not_exists(mut self, if_not_exists: bool) -> Self { self.if_not_exists = if_not_exists; self } - + /// Set `TRANSIENT` flag. pub fn transient(mut self, transient: bool) -> Self { self.transient = transient; self } - + /// Set `VOLATILE` flag. pub fn volatile(mut self, volatile: bool) -> Self { self.volatile = volatile; self } - + /// Enable Iceberg table semantics. pub fn iceberg(mut self, iceberg: bool) -> Self { self.iceberg = iceberg; self } - + /// Set `DYNAMIC` table option. pub fn dynamic(mut self, dynamic: bool) -> Self { self.dynamic = dynamic; self } - + /// Set the table column definitions. pub fn columns(mut self, columns: Vec) -> Self { self.columns = columns; self } - + /// Set table-level constraints. pub fn constraints(mut self, constraints: Vec) -> Self { self.constraints = constraints; self } - + /// Set Hive distribution style. pub fn hive_distribution(mut self, hive_distribution: HiveDistributionStyle) -> Self { self.hive_distribution = hive_distribution; self } - + /// Set Hive-specific formats. pub fn hive_formats(mut self, hive_formats: Option) -> Self { self.hive_formats = hive_formats; self } - + /// Set file format for the table (e.g., PARQUET). pub fn file_format(mut self, file_format: Option) -> Self { self.file_format = file_format; self } + /// Set storage `location` for the table. pub fn location(mut self, location: Option) -> Self { self.location = location; self } - + /// Set an underlying `AS SELECT` query for the table. pub fn query(mut self, query: Option>) -> Self { self.query = query; self } + /// Set `WITHOUT ROWID` option. pub fn without_rowid(mut self, without_rowid: bool) -> Self { self.without_rowid = without_rowid; self } - + /// Set `LIKE` clause for the table. pub fn like(mut self, like: Option) -> Self { self.like = like; self } - // Different name to allow the object to be cloned + /// Set `CLONE` source object name. pub fn clone_clause(mut self, clone: Option) -> Self { self.clone = clone; self } - + /// Set table `VERSION`. pub fn version(mut self, version: Option) -> Self { self.version = version; self } - + /// Set a comment for the table or following column definitions. pub fn comment_after_column_def(mut self, comment: Option) -> Self { self.comment = comment; self } - + /// Set `ON COMMIT` behavior for temporary tables. pub fn on_commit(mut self, on_commit: Option) -> Self { self.on_commit = on_commit; self } - + /// Set cluster identifier for the table. pub fn on_cluster(mut self, on_cluster: Option) -> Self { self.on_cluster = on_cluster; self } - + /// Set a primary key expression for the table. pub fn primary_key(mut self, primary_key: Option>) -> Self { self.primary_key = primary_key; self } - + /// Set `ORDER BY` clause for clustered/sorted tables. pub fn order_by(mut self, order_by: Option>) -> Self { self.order_by = order_by; self } - + /// Set `PARTITION BY` expression. pub fn partition_by(mut self, partition_by: Option>) -> Self { self.partition_by = partition_by; self } - + /// Set `CLUSTER BY` expression(s). pub fn cluster_by(mut self, cluster_by: Option>>) -> Self { self.cluster_by = cluster_by; self } - + /// Set `CLUSTERED BY` clause. pub fn clustered_by(mut self, clustered_by: Option) -> Self { self.clustered_by = clustered_by; self } - + /// Set parent tables via `INHERITS`. pub fn inherits(mut self, inherits: Option>) -> Self { self.inherits = inherits; self } + /// Sets the table which is partitioned to create the current table. pub fn partition_of(mut self, partition_of: Option) -> Self { self.partition_of = partition_of; self } + /// Sets the range of values associated with the partition. pub fn for_values(mut self, for_values: Option) -> Self { self.for_values = for_values; self } + /// Set `STRICT` option. pub fn strict(mut self, strict: bool) -> Self { self.strict = strict; self } - + /// Enable copying grants from source object. pub fn copy_grants(mut self, copy_grants: bool) -> Self { self.copy_grants = copy_grants; self } - + /// Enable or disable schema evolution features. pub fn enable_schema_evolution(mut self, enable_schema_evolution: Option) -> Self { self.enable_schema_evolution = enable_schema_evolution; self } - + /// Enable or disable change tracking. pub fn change_tracking(mut self, change_tracking: Option) -> Self { self.change_tracking = change_tracking; self } - + /// Set data retention time (in days). pub fn data_retention_time_in_days(mut self, data_retention_time_in_days: Option) -> Self { self.data_retention_time_in_days = data_retention_time_in_days; self } - + /// Set maximum data extension time (in days). pub fn max_data_extension_time_in_days( mut self, max_data_extension_time_in_days: Option, @@ -363,17 +423,17 @@ impl CreateTableBuilder { self.max_data_extension_time_in_days = max_data_extension_time_in_days; self } - + /// Set default DDL collation. pub fn default_ddl_collation(mut self, default_ddl_collation: Option) -> Self { self.default_ddl_collation = default_ddl_collation; self } - + /// Set aggregation policy object. pub fn with_aggregation_policy(mut self, with_aggregation_policy: Option) -> Self { self.with_aggregation_policy = with_aggregation_policy; self } - + /// Attach a row access policy to the table. pub fn with_row_access_policy( mut self, with_row_access_policy: Option, @@ -381,32 +441,32 @@ impl CreateTableBuilder { self.with_row_access_policy = with_row_access_policy; self } - + /// Attach tags/labels to the table metadata. pub fn with_tags(mut self, with_tags: Option>) -> Self { self.with_tags = with_tags; self } - + /// Set a base storage location for staged data. pub fn base_location(mut self, base_location: Option) -> Self { self.base_location = base_location; self } - + /// Set an external volume identifier. pub fn external_volume(mut self, external_volume: Option) -> Self { self.external_volume = external_volume; self } - + /// Set the catalog name for the table. pub fn catalog(mut self, catalog: Option) -> Self { self.catalog = catalog; self } - + /// Set catalog synchronization option. pub fn catalog_sync(mut self, catalog_sync: Option) -> Self { self.catalog_sync = catalog_sync; self } - + /// Set a storage serialization policy. pub fn storage_serialization_policy( mut self, storage_serialization_policy: Option, @@ -414,37 +474,37 @@ impl CreateTableBuilder { self.storage_serialization_policy = storage_serialization_policy; self } - + /// Set arbitrary table options parsed from the statement. pub fn table_options(mut self, table_options: CreateTableOptions) -> Self { self.table_options = table_options; self } - + /// Set a target lag configuration (dialect-specific). pub fn target_lag(mut self, target_lag: Option) -> Self { self.target_lag = target_lag; self } - + /// Associate the table with a warehouse identifier. pub fn warehouse(mut self, warehouse: Option) -> Self { self.warehouse = warehouse; self } - + /// Set refresh mode for materialized/managed tables. pub fn refresh_mode(mut self, refresh_mode: Option) -> Self { self.refresh_mode = refresh_mode; self } - + /// Set initialization mode for the table. pub fn initialize(mut self, initialize: Option) -> Self { self.initialize = initialize; self } - + /// Require a user identity for table operations. pub fn require_user(mut self, require_user: bool) -> Self { self.require_user = require_user; self } - + /// Consume the builder and produce a `Statement::CreateTable`. pub fn build(self) -> Statement { CreateTable { or_replace: self.or_replace, diff --git a/src/ast/helpers/stmt_data_loading.rs b/src/ast/helpers/stmt_data_loading.rs index 62ee77ce32..dfc1f4b0bf 100644 --- a/src/ast/helpers/stmt_data_loading.rs +++ b/src/ast/helpers/stmt_data_loading.rs @@ -34,11 +34,17 @@ use sqlparser_derive::{Visit, VisitMut}; #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Parameters for a named stage object used in data loading/unloading. pub struct StageParamsObject { + /// Optional URL for the stage. pub url: Option, + /// Encryption-related key/value options. pub encryption: KeyValueOptions, + /// Optional endpoint string. pub endpoint: Option, + /// Optional storage integration identifier. pub storage_integration: Option, + /// Credentials for accessing the stage. pub credentials: KeyValueOptions, } @@ -48,7 +54,9 @@ pub struct StageParamsObject { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum StageLoadSelectItemKind { + /// A standard SQL select item expression. SelectItem(SelectItem), + /// A Snowflake-specific select item used for stage loading. StageLoadSelectItem(StageLoadSelectItem), } @@ -64,10 +72,15 @@ impl fmt::Display for StageLoadSelectItemKind { #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// A single item in the `SELECT` list for data loading from staged files. pub struct StageLoadSelectItem { + /// Optional alias for the input source. pub alias: Option, + /// Column number within the staged file (1-based). pub file_col_num: i32, + /// Optional element identifier following the column reference. pub element: Option, + /// Optional alias for the item (AS clause). pub item_as: Option, } @@ -116,9 +129,12 @@ impl fmt::Display for StageLoadSelectItem { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// A command to stage files to a named stage. pub struct FileStagingCommand { + /// The stage to which files are being staged. #[cfg_attr(feature = "visitor", visit(with = "visit_relation"))] pub stage: ObjectName, + /// Optional file matching `PATTERN` expression. pub pattern: Option, } diff --git a/src/ast/mod.rs b/src/ast/mod.rs index ad8147f968..9115bb29d9 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -60,22 +60,22 @@ pub use self::dcl::{ }; pub use self::ddl::{ Alignment, AlterColumnOperation, AlterConnectorOwner, AlterIndexOperation, AlterOperator, - AlterOperatorFamily, AlterOperatorFamilyOperation, AlterOperatorOperation, - AlterPolicyOperation, AlterSchema, AlterSchemaOperation, AlterTable, AlterTableAlgorithm, - AlterTableLock, AlterTableOperation, AlterTableType, AlterType, AlterTypeAddValue, - AlterTypeAddValuePosition, AlterTypeOperation, AlterTypeRename, AlterTypeRenameValue, - ClusteredBy, ColumnDef, ColumnOption, ColumnOptionDef, ColumnOptions, ColumnPolicy, - ColumnPolicyProperty, ConstraintCharacteristics, CreateConnector, CreateDomain, - CreateExtension, CreateFunction, CreateIndex, CreateOperator, CreateOperatorClass, - CreateOperatorFamily, CreateTable, CreateTrigger, CreateView, Deduplicate, DeferrableInitial, - DropBehavior, DropExtension, DropFunction, DropOperator, DropOperatorClass, DropOperatorFamily, - DropOperatorSignature, DropTrigger, ForValues, GeneratedAs, GeneratedExpressionMode, - IdentityParameters, IdentityProperty, IdentityPropertyFormatKind, IdentityPropertyKind, - IdentityPropertyOrder, IndexColumn, IndexOption, IndexType, KeyOrIndexDisplay, Msck, - NullsDistinctOption, OperatorArgTypes, OperatorClassItem, OperatorFamilyDropItem, - OperatorFamilyItem, OperatorOption, OperatorPurpose, Owner, Partition, PartitionBoundValue, - ProcedureParam, ReferentialAction, RenameTableNameKind, ReplicaIdentity, TagsColumnOption, - TriggerObjectKind, Truncate, UserDefinedTypeCompositeAttributeDef, + AlterOperatorClass, AlterOperatorClassOperation, AlterOperatorFamily, + AlterOperatorFamilyOperation, AlterOperatorOperation, AlterPolicyOperation, AlterSchema, + AlterSchemaOperation, AlterTable, AlterTableAlgorithm, AlterTableLock, AlterTableOperation, + AlterTableType, AlterType, AlterTypeAddValue, AlterTypeAddValuePosition, AlterTypeOperation, + AlterTypeRename, AlterTypeRenameValue, ClusteredBy, ColumnDef, ColumnOption, ColumnOptionDef, + ColumnOptions, ColumnPolicy, ColumnPolicyProperty, ConstraintCharacteristics, CreateConnector, + CreateDomain, CreateExtension, CreateFunction, CreateIndex, CreateOperator, + CreateOperatorClass, CreateOperatorFamily, CreateTable, CreateTrigger, CreateView, Deduplicate, + DeferrableInitial, DropBehavior, DropExtension, DropFunction, DropOperator, DropOperatorClass, + DropOperatorFamily, DropOperatorSignature, DropTrigger, ForValues, GeneratedAs, + GeneratedExpressionMode, IdentityParameters, IdentityProperty, IdentityPropertyFormatKind, + IdentityPropertyKind, IdentityPropertyOrder, IndexColumn, IndexOption, IndexType, + KeyOrIndexDisplay, Msck, NullsDistinctOption, OperatorArgTypes, OperatorClassItem, + OperatorFamilyDropItem, OperatorFamilyItem, OperatorOption, OperatorPurpose, Owner, Partition, + PartitionBoundValue, ProcedureParam, ReferentialAction, RenameTableNameKind, ReplicaIdentity, + TagsColumnOption, TriggerObjectKind, Truncate, UserDefinedTypeCompositeAttributeDef, UserDefinedTypeInternalLength, UserDefinedTypeRangeOption, UserDefinedTypeRepresentation, UserDefinedTypeSqlDefinitionOption, UserDefinedTypeStorage, ViewColumnDef, }; @@ -127,6 +127,7 @@ mod data_type; mod dcl; mod ddl; mod dml; +/// Helper modules for building and manipulating AST nodes. pub mod helpers; pub mod table_constraints; pub use table_constraints::{ @@ -145,6 +146,7 @@ mod value; #[cfg(feature = "visitor")] mod visitor; +/// Helper used to format a slice using a separator string (e.g., `", "`). pub struct DisplaySeparated<'a, T> where T: fmt::Display, @@ -290,6 +292,7 @@ impl Ident { } } + /// Create an `Ident` with the given `span` and `value` (unquoted). pub fn with_span(span: Span, value: S) -> Self where S: Into, @@ -301,6 +304,7 @@ impl Ident { } } + /// Create a quoted `Ident` with the given `quote` and `span`. pub fn with_quote_and_span(quote: char, span: Span, value: S) -> Self where S: Into, @@ -367,11 +371,14 @@ impl fmt::Display for ObjectName { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum ObjectNamePart { + /// A single identifier part, e.g. `schema` or `table`. Identifier(Ident), + /// A function that returns an identifier (dialect-specific). Function(ObjectNamePartFunction), } impl ObjectNamePart { + /// Return the identifier if this is an `Identifier` variant. pub fn as_ident(&self) -> Option<&Ident> { match self { ObjectNamePart::Identifier(ident) => Some(ident), @@ -397,7 +404,9 @@ impl fmt::Display for ObjectNamePart { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct ObjectNamePartFunction { + /// The function name that produces the object name part. pub name: Ident, + /// Function arguments used to compute the identifier. pub args: Vec, } @@ -444,14 +453,17 @@ impl fmt::Display for Array { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct Interval { + /// The interval value expression (commonly a string literal). pub value: Box, + /// Optional leading time unit (e.g., `HOUR`, `MINUTE`). pub leading_field: Option, + /// Optional leading precision for the leading field. pub leading_precision: Option, + /// Optional trailing time unit for a range (e.g., `SECOND`). pub last_field: Option, - /// The seconds precision can be specified in SQL source as - /// `INTERVAL '__' SECOND(_, x)` (in which case the `leading_field` - /// will be `Second` and the `last_field` will be `None`), - /// or as `__ TO SECOND(x)`. + /// The fractional seconds precision, when specified. + /// + /// See SQL `SECOND(n)` or `SECOND(m, n)` forms. pub fractional_seconds_precision: Option, } @@ -503,9 +515,11 @@ impl fmt::Display for Interval { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct StructField { + /// Optional name of the struct field. pub field_name: Option, + /// The field data type. pub field_type: DataType, - /// Struct field options. + /// Struct field options (e.g., `OPTIONS(...)` on BigQuery). /// See [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language#column_name_and_column_schema) pub options: Option>, } @@ -532,7 +546,9 @@ impl fmt::Display for StructField { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct UnionField { + /// Name of the union field. pub field_name: Ident, + /// Type of the union field. pub field_type: DataType, } @@ -549,7 +565,9 @@ impl fmt::Display for UnionField { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct DictionaryField { + /// Dictionary key identifier. pub key: Ident, + /// Value expression for the dictionary entry. pub value: Box, } @@ -564,6 +582,7 @@ impl fmt::Display for DictionaryField { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct Map { + /// Entries of the map as key/value pairs. pub entries: Vec, } @@ -580,7 +599,9 @@ impl Display for Map { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct MapEntry { + /// Key expression of the map entry. pub key: Box, + /// Value expression of the map entry. pub value: Box, } @@ -596,7 +617,9 @@ impl fmt::Display for MapEntry { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum CastFormat { + /// A simple cast format specified by a `Value`. Value(Value), + /// A cast format with an explicit time zone: `(format, timezone)`. ValueAtTimeZone(Value, Value), } @@ -608,12 +631,20 @@ pub enum JsonPathElem { /// Accesses an object field using dot notation, e.g. `obj:foo.bar.baz`. /// /// See . - Dot { key: String, quoted: bool }, + Dot { + /// The object key text (without quotes). + key: String, + /// `true` when the key was quoted in the source. + quoted: bool, + }, /// Accesses an object field or array element using bracket notation, /// e.g. `obj['foo']`. /// /// See . - Bracket { key: Expr }, + Bracket { + /// The expression used as the bracket key (string or numeric expression). + key: Expr, + }, } /// A JSON path. @@ -624,6 +655,7 @@ pub enum JsonPathElem { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct JsonPath { + /// Sequence of path elements that form the JSON path. pub path: Vec, } @@ -738,7 +770,9 @@ pub enum CeilFloorKind { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct CaseWhen { + /// The `WHEN` condition expression. pub condition: Expr, + /// The expression returned when `condition` matches. pub result: Expr, } @@ -801,7 +835,9 @@ pub enum Expr { /// `CompoundFieldAccess(Ident('a'), vec![Subscript('field1'), Ident('field2')]` /// - If a struct access likes `a.field1.field2`, it will be represented by CompoundIdentifier([a, field1, field2]) CompoundFieldAccess { + /// The base expression being accessed. root: Box, + /// Sequence of access operations (subscript or identifier accesses). access_chain: Vec, }, /// Access data nested in a value containing semi-structured data, such as @@ -837,95 +873,138 @@ pub enum Expr { IsNotDistinctFrom(Box, Box), /// ` IS [ NOT ] [ form ] NORMALIZED` IsNormalized { + /// Expression being tested. expr: Box, + /// Optional normalization `form` (e.g., NFC, NFD). form: Option, + /// `true` when `NOT` is present. negated: bool, }, /// `[ NOT ] IN (val1, val2, ...)` InList { + /// Left-hand expression to test for membership. expr: Box, + /// Literal list of expressions to check against. list: Vec, + /// `true` when the `NOT` modifier is present. negated: bool, }, /// `[ NOT ] IN (SELECT ...)` InSubquery { + /// Left-hand expression to test for membership. expr: Box, + /// The subquery providing the candidate values. subquery: Box, + /// `true` when the `NOT` modifier is present. negated: bool, }, /// `[ NOT ] IN UNNEST(array_expression)` InUnnest { + /// Left-hand expression to test for membership. expr: Box, + /// Array expression being unnested. array_expr: Box, + /// `true` when the `NOT` modifier is present. negated: bool, }, /// ` [ NOT ] BETWEEN AND ` Between { + /// Expression being compared. expr: Box, + /// `true` when the `NOT` modifier is present. negated: bool, + /// Lower bound. low: Box, + /// Upper bound. high: Box, }, /// Binary operation e.g. `1 + 1` or `foo > bar` BinaryOp { + /// Left operand. left: Box, + /// Operator between operands. op: BinaryOperator, + /// Right operand. right: Box, }, /// `[NOT] LIKE [ESCAPE ]` Like { + /// `true` when `NOT` is present. negated: bool, - // Snowflake supports the ANY keyword to match against a list of patterns - // https://docs.snowflake.com/en/sql-reference/functions/like_any + /// Snowflake supports the ANY keyword to match against a list of patterns + /// any: bool, + /// Expression to match. expr: Box, + /// Pattern expression. pattern: Box, + /// Optional escape character. escape_char: Option, }, /// `ILIKE` (case-insensitive `LIKE`) ILike { + /// `true` when `NOT` is present. negated: bool, - // Snowflake supports the ANY keyword to match against a list of patterns - // https://docs.snowflake.com/en/sql-reference/functions/like_any + /// Snowflake supports the ANY keyword to match against a list of patterns + /// any: bool, + /// Expression to match. expr: Box, + /// Pattern expression. pattern: Box, + /// Optional escape character. escape_char: Option, }, - /// SIMILAR TO regex + /// `SIMILAR TO` regex SimilarTo { + /// `true` when `NOT` is present. negated: bool, + /// Expression to test. expr: Box, + /// Pattern expression. pattern: Box, + /// Optional escape character. escape_char: Option, }, - /// MySQL: RLIKE regex or REGEXP regex + /// MySQL: `RLIKE` regex or `REGEXP` regex RLike { + /// `true` when `NOT` is present. negated: bool, + /// Expression to test. expr: Box, + /// Pattern expression. pattern: Box, - // true for REGEXP, false for RLIKE (no difference in semantics) + /// true for REGEXP, false for RLIKE (no difference in semantics) regexp: bool, }, /// `ANY` operation e.g. `foo > ANY(bar)`, comparison operator is one of `[=, >, <, =>, =<, !=]` /// AnyOp { + /// Left operand. left: Box, + /// Comparison operator. compare_op: BinaryOperator, + /// Right-hand subquery expression. right: Box, - // ANY and SOME are synonymous: https://docs.cloudera.com/cdw-runtime/cloud/using-hiveql/topics/hive_comparison_predicates.html + /// ANY and SOME are synonymous: is_some: bool, }, /// `ALL` operation e.g. `foo > ALL(bar)`, comparison operator is one of `[=, >, <, =>, =<, !=]` /// AllOp { + /// Left operand. left: Box, + /// Comparison operator. compare_op: BinaryOperator, + /// Right-hand subquery expression. right: Box, }, + /// Unary operation e.g. `NOT foo` UnaryOp { + /// The unary operator (e.g., `NOT`, `-`). op: UnaryOperator, + /// Operand expression. expr: Box, }, /// CONVERT a value to a different data type or character encoding. e.g. `CONVERT(foo USING utf8mb4)` @@ -933,13 +1012,13 @@ pub enum Expr { /// CONVERT (false) or TRY_CONVERT (true) /// is_try: bool, - /// The expression to convert + /// The expression to convert. expr: Box, - /// The target data type + /// The target data type, if provided. data_type: Option, - /// The target character encoding + /// Optional target character encoding (e.g., `utf8mb4`). charset: Option, - /// whether the target comes before the expr (MSSQL syntax) + /// `true` when target precedes the value (MSSQL syntax). target_before_value: bool, /// How to translate the expression. /// @@ -948,8 +1027,11 @@ pub enum Expr { }, /// `CAST` an expression to a different data type e.g. `CAST(foo AS VARCHAR(123))` Cast { + /// The cast kind (e.g., `CAST`, `TRY_CAST`). kind: CastKind, + /// Expression being cast. expr: Box, + /// Target data type. data_type: DataType, /// Optional CAST(string_expression AS type FORMAT format_string_expression) as used by [BigQuery] /// @@ -958,7 +1040,9 @@ pub enum Expr { }, /// AT a timestamp to a different timezone e.g. `FROM_UNIXTIME(0) AT TIME ZONE 'UTC-06:00'` AtTimeZone { + /// Timestamp expression to shift. timestamp: Box, + /// Time zone expression to apply. time_zone: Box, }, /// Extract a field from a timestamp e.g. `EXTRACT(MONTH FROM foo)` @@ -969,8 +1053,11 @@ pub enum Expr { /// EXTRACT(DateTimeField FROM ) | EXTRACT(DateTimeField, ) /// ``` Extract { + /// Which datetime field is being extracted. field: DateTimeField, + /// Syntax variant used (`From` or `Comma`). syntax: ExtractSyntax, + /// Expression to extract from. expr: Box, }, /// ```sql @@ -980,7 +1067,9 @@ pub enum Expr { /// CEIL( [, ] ) /// ``` Ceil { + /// Expression to ceil. expr: Box, + /// The CEIL/FLOOR kind (datetime field or scale). field: CeilFloorKind, }, /// ```sql @@ -990,14 +1079,18 @@ pub enum Expr { /// FLOOR( [, ] ) /// Floor { + /// Expression to floor. expr: Box, + /// The CEIL/FLOOR kind (datetime field or scale). field: CeilFloorKind, }, /// ```sql /// POSITION( in ) /// ``` Position { + /// Expression to search for. expr: Box, + /// Expression to search in. r#in: Box, }, /// ```sql @@ -1008,8 +1101,11 @@ pub enum Expr { /// SUBSTRING(, , ) /// ``` Substring { + /// Source expression. expr: Box, + /// Optional `FROM` expression. substring_from: Option>, + /// Optional `FOR` expression. substring_for: Option>, /// false if the expression is represented using the `SUBSTRING(expr [FROM start] [FOR len])` syntax @@ -1027,24 +1123,33 @@ pub enum Expr { /// TRIM(, [, characters]) -- only Snowflake or Bigquery /// ``` Trim { + /// The expression to trim from. expr: Box, - // ([BOTH | LEADING | TRAILING] + /// Which side to trim: `BOTH`, `LEADING`, or `TRAILING`. trim_where: Option, + /// Optional expression specifying what to trim from the value. trim_what: Option>, + /// Optional list of characters to trim (dialect-specific). trim_characters: Option>, }, /// ```sql /// OVERLAY( PLACING FROM [ FOR ] /// ``` Overlay { + /// The target expression being overlayed. expr: Box, + /// The expression to place into the target. overlay_what: Box, + /// The `FROM` position expression indicating where to start overlay. overlay_from: Box, + /// Optional `FOR` length expression limiting the overlay span. overlay_for: Option>, }, /// `expr COLLATE collation` Collate { + /// The expression being collated. expr: Box, + /// The collation name to apply to the expression. collation: ObjectName, }, /// Nested expression e.g. `(foo > bar)` or `(1)` @@ -1055,8 +1160,9 @@ pub enum Expr { /// /// Prefixed { + /// The prefix identifier (introducer or projection prefix). prefix: Ident, - /// The value of the constant. + /// The value expression being prefixed. /// Hint: you can unwrap the string value using `value.into_string()`. value: Box, }, @@ -1072,16 +1178,23 @@ pub enum Expr { /// not `< 0` nor `1, 2, 3` as allowed in a `` per /// Case { + /// The attached `CASE` token (keeps original spacing/comments). case_token: AttachedToken, + /// The attached `END` token (keeps original spacing/comments). end_token: AttachedToken, + /// Optional operand expression after `CASE` (for simple CASE). operand: Option>, + /// The `WHEN ... THEN` conditions and results. conditions: Vec, + /// Optional `ELSE` result expression. else_result: Option>, }, /// An exists expression `[ NOT ] EXISTS(SELECT ...)`, used in expressions like /// `WHERE [ NOT ] EXISTS (SELECT ...)`. Exists { + /// The subquery checked by `EXISTS`. subquery: Box, + /// Whether the `EXISTS` is negated (`NOT EXISTS`). negated: bool, }, /// A parenthesized subquery `(SELECT ...)`, used in expression like @@ -1117,7 +1230,9 @@ pub enum Expr { /// ``` /// [1]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#struct_type Named { + /// The expression being named. expr: Box, + /// The assigned identifier name for the expression. name: Ident, }, /// `DuckDB` specific `Struct` literal expression [1] @@ -1158,6 +1273,7 @@ pub enum Expr { /// `` opt_search_modifier: Option, }, + /// An unqualified `*` wildcard token (e.g. `*`). Wildcard(AttachedToken), /// Qualified wildcard, e.g. `alias.*` or `schema.table.*`. /// (Same caveats apply to `QualifiedWildcard` as to `Wildcard`.) @@ -1207,7 +1323,10 @@ impl Expr { #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum Subscript { /// Accesses the element of the array at the given index. - Index { index: Expr }, + Index { + /// The index expression used to access the array element. + index: Expr, + }, /// Accesses a slice of an array on PostgreSQL, e.g. /// @@ -1231,8 +1350,11 @@ pub enum Subscript { /// {1,3,5} /// ``` Slice { + /// Optional lower bound for the slice (inclusive). lower_bound: Option, + /// Optional upper bound for the slice (inclusive). upper_bound: Option, + /// Optional stride for the slice (step size). stride: Option, }, } @@ -1982,11 +2104,21 @@ impl fmt::Display for Expr { } } +/// The type of a window used in `OVER` clauses. +/// +/// A window can be either an inline specification (`WindowSpec`) or a +/// reference to a previously defined named window. +/// +/// - `WindowSpec(WindowSpec)`: An inline window specification, e.g. +/// `OVER (PARTITION BY ... ORDER BY ...)`. +/// - `NamedWindow(Ident)`: A reference to a named window declared elsewhere. #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum WindowType { + /// An inline window specification. WindowSpec(WindowSpec), + /// A reference to a previously defined named window. NamedWindow(Ident), } @@ -2081,7 +2213,9 @@ impl fmt::Display for WindowSpec { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct WindowFrame { + /// Units for the frame (e.g. `ROWS`, `RANGE`, `GROUPS`). pub units: WindowFrameUnits, + /// The start bound of the window frame. pub start_bound: WindowFrameBound, /// The right bound of the `BETWEEN .. AND` clause. The end bound of `None` /// indicates the shorthand form (e.g. `ROWS 1 PRECEDING`), which must @@ -2106,9 +2240,13 @@ impl Default for WindowFrame { #[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Units used to describe the window frame scope. pub enum WindowFrameUnits { + /// `ROWS` unit. Rows, + /// `RANGE` unit. Range, + /// `GROUPS` unit. Groups, } @@ -2128,8 +2266,11 @@ impl fmt::Display for WindowFrameUnits { #[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// How NULL values are treated in certain window functions. pub enum NullTreatment { + /// Ignore NULL values (e.g. `IGNORE NULLS`). IgnoreNulls, + /// Respect NULL values (e.g. `RESPECT NULLS`). RespectNulls, } @@ -2170,9 +2311,13 @@ impl fmt::Display for WindowFrameBound { #[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Indicates partition operation type for partition management statements. pub enum AddDropSync { + /// Add partitions. ADD, + /// Drop partitions. DROP, + /// Sync partitions. SYNC, } @@ -2189,12 +2334,19 @@ impl fmt::Display for AddDropSync { #[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Object kinds supported by `SHOW CREATE` statements. pub enum ShowCreateObject { + /// An event object for `SHOW CREATE EVENT`. Event, + /// A function object for `SHOW CREATE FUNCTION`. Function, + /// A procedure object for `SHOW CREATE PROCEDURE`. Procedure, + /// A table object for `SHOW CREATE TABLE`. Table, + /// A trigger object for `SHOW CREATE TRIGGER`. Trigger, + /// A view object for `SHOW CREATE VIEW`. View, } @@ -2214,13 +2366,21 @@ impl fmt::Display for ShowCreateObject { #[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Objects that can be targeted by a `COMMENT` statement. pub enum CommentObject { + /// A table column. Column, + /// A table. Table, + /// An extension. Extension, + /// A schema. Schema, + /// A database. Database, + /// A user. User, + /// A role. Role, } @@ -2241,8 +2401,11 @@ impl fmt::Display for CommentObject { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Password specification variants used in user-related statements. pub enum Password { + /// A concrete password expression. Password(Expr), + /// Represents a `NULL` password. NullPassword, } @@ -2268,8 +2431,11 @@ pub enum Password { pub struct CaseStatement { /// The `CASE` token that starts the statement. pub case_token: AttachedToken, + /// Optional expression to match against in `CASE ... WHEN`. pub match_expr: Option, + /// The `WHEN ... THEN` blocks of the `CASE` statement. pub when_blocks: Vec, + /// Optional `ELSE` block for the `CASE` statement. pub else_block: Option, /// The last token of the statement (`END` or `CASE`). pub end_case_token: AttachedToken, @@ -2336,9 +2502,13 @@ impl fmt::Display for CaseStatement { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct IfStatement { + /// The initial `IF` block containing the condition and statements. pub if_block: ConditionalStatementBlock, + /// Additional `ELSEIF` blocks. pub elseif_blocks: Vec, + /// Optional `ELSE` block. pub else_block: Option, + /// Optional trailing `END` token for the `IF` statement. pub end_token: Option, } @@ -2384,6 +2554,7 @@ impl fmt::Display for IfStatement { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct WhileStatement { + /// Block executed while the condition holds. pub while_block: ConditionalStatementBlock, } @@ -2423,13 +2594,18 @@ impl fmt::Display for WhileStatement { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct ConditionalStatementBlock { + /// Token representing the start of the block (e.g., WHEN/IF/WHILE). pub start_token: AttachedToken, + /// Optional condition expression for the block. pub condition: Option, + /// Optional token for the `THEN` keyword. pub then_token: Option, + /// The statements contained in this conditional block. pub conditional_statements: ConditionalStatements, } impl ConditionalStatementBlock { + /// Get the statements in this conditional block. pub fn statements(&self) -> &Vec { self.conditional_statements.statements() } @@ -2466,14 +2642,19 @@ impl fmt::Display for ConditionalStatementBlock { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Statements used inside conditional blocks (`IF`, `WHEN`, `WHILE`). pub enum ConditionalStatements { - /// SELECT 1; SELECT 2; SELECT 3; ... - Sequence { statements: Vec }, - /// BEGIN SELECT 1; SELECT 2; SELECT 3; ... END + /// Simple sequence of statements (no `BEGIN`/`END`). + Sequence { + /// The statements in the sequence. + statements: Vec, + }, + /// Block enclosed by `BEGIN` and `END`. BeginEnd(BeginEndStatements), } impl ConditionalStatements { + /// Get the statements in this conditional statements block. pub fn statements(&self) -> &Vec { match self { ConditionalStatements::Sequence { statements } => statements, @@ -2508,8 +2689,11 @@ impl fmt::Display for ConditionalStatements { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct BeginEndStatements { + /// Token representing the `BEGIN` keyword (may include span info). pub begin_token: AttachedToken, + /// Statements contained within the block. pub statements: Vec, + /// Token representing the `END` keyword (may include span info). pub end_token: AttachedToken, } @@ -2549,6 +2733,7 @@ impl fmt::Display for BeginEndStatements { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct RaiseStatement { + /// Optional value provided to the RAISE statement. pub value: Option, } @@ -2806,33 +2991,18 @@ impl fmt::Display for Declare { #[derive(Debug, Default, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Options allowed within a `CREATE TABLE` statement. pub enum CreateTableOptions { + /// No options specified. #[default] None, - /// Options specified using the `WITH` keyword. - /// e.g. `WITH (description = "123")` - /// - /// - /// - /// MSSQL supports more specific options that's not only key-value pairs. - /// - /// WITH ( - /// DISTRIBUTION = ROUND_ROBIN, - /// CLUSTERED INDEX (column_a DESC, column_b) - /// ) - /// - /// + /// Options specified using the `WITH` keyword, e.g. `WITH (k = v)`. With(Vec), - /// Options specified using the `OPTIONS` keyword. - /// e.g. `OPTIONS(description = "123")` - /// - /// + /// Options specified using the `OPTIONS(...)` clause. Options(Vec), - - /// Plain options, options which are not part on any declerative statement e.g. WITH/OPTIONS/... - /// + /// Plain space-separated options. Plain(Vec), - + /// Table properties (e.g., TBLPROPERTIES / storage properties). TableProperties(Vec), } @@ -2893,8 +3063,11 @@ impl Display for FromTable { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Type of `CREATE POLICY` (permissive or restrictive). pub enum CreatePolicyType { + /// Policy allows operations unless explicitly denied. Permissive, + /// Policy denies operations unless explicitly allowed. Restrictive, } @@ -2906,35 +3079,54 @@ pub enum CreatePolicyType { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Commands that a policy can apply to (FOR clause). pub enum CreatePolicyCommand { + /// Applies to all commands. All, + /// Applies to SELECT. Select, + /// Applies to INSERT. Insert, + /// Applies to UPDATE. Update, + /// Applies to DELETE. Delete, } #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Variants for the `SET` family of statements. pub enum Set { /// SQL Standard-style /// SET a = 1; + /// `SET var = value` (standard SQL-style assignment). SingleAssignment { + /// Optional scope modifier (`SESSION` / `LOCAL`). scope: Option, + /// Whether this is a Hive-style `HIVEVAR:` assignment. hivevar: bool, + /// Variable name to assign. variable: ObjectName, + /// Values assigned to the variable. values: Vec, }, /// Snowflake-style /// SET (a, b, ..) = (1, 2, ..); + /// `SET (a, b) = (1, 2)` (tuple assignment syntax). ParenthesizedAssignments { + /// Variables being assigned in tuple form. variables: Vec, + /// Corresponding values for the variables. values: Vec, }, /// MySQL-style /// SET a = 1, b = 2, ..; - MultipleAssignments { assignments: Vec }, + /// `SET a = 1, b = 2` (MySQL-style comma-separated assignments). + MultipleAssignments { + /// List of `SET` assignments (MySQL-style comma-separated). + assignments: Vec, + }, /// Session authorization for Postgres/Redshift /// /// ```sql @@ -2971,12 +3163,21 @@ pub enum Set { /// Note: this is a PostgreSQL-specific statements /// `SET TIME ZONE ` is an alias for `SET timezone TO ` in PostgreSQL /// However, we allow it for all dialects. - SetTimeZone { local: bool, value: Expr }, + /// `SET TIME ZONE` statement. `local` indicates the `LOCAL` keyword. + /// `SET TIME ZONE ` statement. + SetTimeZone { + /// Whether the `LOCAL` keyword was specified. + local: bool, + /// Time zone expression value. + value: Expr, + }, /// ```sql /// SET NAMES 'charset_name' [COLLATE 'collation_name'] /// ``` SetNames { + /// Character set name to set. charset_name: Ident, + /// Optional collation name. collation_name: Option, }, /// ```sql @@ -2989,8 +3190,11 @@ pub enum Set { /// SET TRANSACTION ... /// ``` SetTransaction { + /// Transaction modes (e.g., ISOLATION LEVEL, READ ONLY). modes: Vec, + /// Optional snapshot value for transaction snapshot control. snapshot: Option, + /// `true` when the `SESSION` keyword was used. session: bool, }, } @@ -3091,7 +3295,9 @@ impl Display for Set { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct ExceptionWhen { + /// Identifiers that trigger this branch (error conditions). pub idents: Vec, + /// Statements to execute when the condition matches. pub statements: Vec, } @@ -3118,13 +3324,21 @@ impl Display for ExceptionWhen { #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct Analyze { #[cfg_attr(feature = "visitor", visit(with = "visit_relation"))] + /// Name of the table to analyze. pub table_name: ObjectName, + /// Optional partition expressions to restrict the analysis. pub partitions: Option>, + /// `true` when analyzing specific columns. pub for_columns: bool, + /// Columns to analyze when `for_columns` is `true`. pub columns: Vec, + /// Whether to cache metadata before analyzing. pub cache_metadata: bool, + /// Whether to skip scanning the table. pub noscan: bool, + /// Whether to compute statistics during analysis. pub compute_statistics: bool, + /// Whether the `TABLE` keyword was present. pub has_table_keyword: bool, } @@ -3180,6 +3394,7 @@ pub enum Statement { /// ``` /// Analyze (Hive) Analyze(Analyze), + /// `SET` statements (session, transaction, timezone, etc.). Set(Set), /// ```sql /// TRUNCATE @@ -3214,11 +3429,17 @@ pub enum Statement { extension_name: Ident, }, // TODO: Support ROW FORMAT + /// LOAD DATA from a directory or query source. Directory { + /// Whether to overwrite existing files. overwrite: bool, + /// Whether the directory is local to the server. local: bool, + /// Path to the directory or files. path: String, + /// Optional file format for the data. file_format: Option, + /// Source query providing data to load. source: Box, }, /// A `CASE` statement. @@ -3262,19 +3483,33 @@ pub enum Statement { /// in different enums. This can be refactored later once custom dialects /// are allowed to have custom Statements. CopyIntoSnowflake { + /// Kind of COPY INTO operation (table or location). kind: CopyIntoSnowflakeKind, + /// Target object for the COPY INTO operation. into: ObjectName, + /// Optional list of target columns. into_columns: Option>, + /// Optional source object name (staged data). from_obj: Option, + /// Optional alias for the source object. from_obj_alias: Option, + /// Stage-specific parameters (e.g., credentials, path). stage_params: StageParamsObject, + /// Optional list of transformations applied when loading. from_transformations: Option>, + /// Optional source query instead of a staged object. from_query: Option>, + /// Optional list of specific file names to load. files: Option>, + /// Optional filename matching pattern. pattern: Option, + /// File format options. file_format: KeyValueOptions, + /// Additional copy options. copy_options: KeyValueOptions, + /// Optional validation mode string. validation_mode: Option, + /// Optional partition expression for loading. partition: Option>, }, /// ```sql @@ -3312,9 +3547,13 @@ pub enum Statement { /// Sqlite specific statement CreateVirtualTable { #[cfg_attr(feature = "visitor", visit(with = "visit_relation"))] + /// Name of the virtual table module instance. name: ObjectName, + /// `true` when `IF NOT EXISTS` was specified. if_not_exists: bool, + /// Module name used by the virtual table. module_name: Ident, + /// Arguments passed to the module. module_args: Vec, }, /// ```sql @@ -3331,12 +3570,19 @@ pub enum Statement { /// ``` /// See [DuckDB](https://duckdb.org/docs/sql/statements/create_secret.html) CreateSecret { + /// `true` when `OR REPLACE` was specified. or_replace: bool, + /// Optional `TEMPORARY` flag. temporary: Option, + /// `true` when `IF NOT EXISTS` was present. if_not_exists: bool, + /// Optional secret name. name: Option, + /// Optional storage specifier identifier. storage_specifier: Option, + /// The secret type identifier. secret_type: Ident, + /// Additional secret options. options: Vec, }, /// A `CREATE SERVER` statement. @@ -3346,13 +3592,20 @@ pub enum Statement { /// ``` /// See [PostgreSQL](https://www.postgresql.org/docs/current/sql-createpolicy.html) CreatePolicy { + /// Name of the policy. name: Ident, + /// Table the policy is defined on. #[cfg_attr(feature = "visitor", visit(with = "visit_relation"))] table_name: ObjectName, + /// Optional policy type (e.g., `PERMISSIVE` / `RESTRICTIVE`). policy_type: Option, + /// Optional command the policy applies to (e.g., `SELECT`). command: Option, + /// Optional list of grantee owners. to: Option>, + /// Optional expression for the `USING` clause. using: Option, + /// Optional expression for the `WITH CHECK` clause. with_check: Option, }, /// ```sql @@ -3388,18 +3641,23 @@ pub enum Statement { /// ALTER INDEX /// ``` AlterIndex { + /// Name of the index to alter. name: ObjectName, + /// The operation to perform on the index. operation: AlterIndexOperation, }, /// ```sql /// ALTER VIEW /// ``` AlterView { - /// View name + /// View name being altered. #[cfg_attr(feature = "visitor", visit(with = "visit_relation"))] name: ObjectName, + /// Optional new column list for the view. columns: Vec, + /// Replacement query for the view definition. query: Box, + /// Additional WITH options for the view. with_options: Vec, }, /// ```sql @@ -3418,10 +3676,17 @@ pub enum Statement { /// See [PostgreSQL](https://www.postgresql.org/docs/current/sql-alteropfamily.html) AlterOperatorFamily(AlterOperatorFamily), /// ```sql + /// ALTER OPERATOR CLASS + /// ``` + /// See [PostgreSQL](https://www.postgresql.org/docs/current/sql-alteropclass.html) + AlterOperatorClass(AlterOperatorClass), + /// ```sql /// ALTER ROLE /// ``` AlterRole { + /// Role name being altered. name: Ident, + /// Operation to perform on the role. operation: AlterRoleOperation, }, /// ```sql @@ -3429,9 +3694,12 @@ pub enum Statement { /// ``` /// (Postgresql-specific) AlterPolicy { + /// Policy name to alter. name: Ident, + /// Target table name the policy is defined on. #[cfg_attr(feature = "visitor", visit(with = "visit_relation"))] table_name: ObjectName, + /// Optional operation specific to the policy alteration. operation: AlterPolicyOperation, }, /// ```sql @@ -3443,9 +3711,13 @@ pub enum Statement { /// ``` /// (Hive-specific) AlterConnector { + /// Name of the connector to alter. name: Ident, + /// Optional connector properties to set. properties: Option>, + /// Optional new URL for the connector. url: Option, + /// Optional new owner specification. owner: Option, }, /// ```sql @@ -3477,12 +3749,15 @@ pub enum Statement { /// ``` /// See AttachDuckDBDatabase { + /// `true` when `IF NOT EXISTS` was present. if_not_exists: bool, - /// true if the syntax is 'ATTACH DATABASE', false if it's just 'ATTACH' + /// `true` if the syntax used `ATTACH DATABASE` rather than `ATTACH`. database: bool, - /// An expression that indicates the path to the database file + /// The path identifier to the database file being attached. database_path: Ident, + /// Optional alias assigned to the attached database. database_alias: Option, + /// Dialect-specific attach options (e.g., `READ_ONLY`). attach_options: Vec, }, /// (DuckDB-specific) @@ -3491,9 +3766,11 @@ pub enum Statement { /// ``` /// See DetachDuckDBDatabase { + /// `true` when `IF EXISTS` was present. if_exists: bool, - /// true if the syntax is 'DETACH DATABASE', false if it's just 'DETACH' + /// `true` if the syntax used `DETACH DATABASE` rather than `DETACH`. database: bool, + /// Alias of the database to detach. database_alias: Ident, }, /// ```sql @@ -3537,19 +3814,24 @@ pub enum Statement { /// DROP PROCEDURE /// ``` DropProcedure { + /// `true` when `IF EXISTS` was present. if_exists: bool, - /// One or more function to drop + /// One or more functions/procedures to drop. proc_desc: Vec, - /// `CASCADE` or `RESTRICT` + /// Optional drop behavior (`CASCADE` or `RESTRICT`). drop_behavior: Option, }, /// ```sql /// DROP SECRET /// ``` DropSecret { + /// `true` when `IF EXISTS` was present. if_exists: bool, + /// Optional `TEMPORARY` marker. temporary: Option, + /// Name of the secret to drop. name: Ident, + /// Optional storage specifier identifier. storage_specifier: Option, }, ///```sql @@ -3557,9 +3839,13 @@ pub enum Statement { /// ``` /// See [PostgreSQL](https://www.postgresql.org/docs/current/sql-droppolicy.html) DropPolicy { + /// `true` when `IF EXISTS` was present. if_exists: bool, + /// Name of the policy to drop. name: Ident, + /// Name of the table the policy applies to. table_name: ObjectName, + /// Optional drop behavior (`CASCADE` or `RESTRICT`). drop_behavior: Option, }, /// ```sql @@ -3567,7 +3853,9 @@ pub enum Statement { /// ``` /// See [Hive](https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=27362034#LanguageManualDDL-DropConnector) DropConnector { + /// `true` when `IF EXISTS` was present. if_exists: bool, + /// Name of the connector to drop. name: Ident, }, /// ```sql @@ -3578,6 +3866,7 @@ pub enum Statement { /// Note: this is a PostgreSQL-specific statement, /// but may also compatible with other SQL. Declare { + /// Cursor declaration statements collected by `DECLARE`. stmts: Vec, }, /// ```sql @@ -3623,9 +3912,11 @@ pub enum Statement { Fetch { /// Cursor name name: Ident, + /// The fetch direction (e.g., `FORWARD`, `BACKWARD`). direction: FetchDirection, + /// The fetch position (e.g., `ALL`, `NEXT`, `ABSOLUTE`). position: FetchPosition, - /// Optional, It's possible to fetch rows form cursor to the table + /// Optional target table to fetch rows into. into: Option, }, /// ```sql @@ -3635,11 +3926,17 @@ pub enum Statement { /// Note: this is a Mysql-specific statement, /// but may also compatible with other SQL. Flush { + /// The specific flush option or object to flush. object_type: FlushType, + /// Optional flush location (dialect-specific). location: Option, + /// Optional channel name used for flush operations. channel: Option, + /// Whether a read lock was requested. read_lock: bool, + /// Whether this is an export flush operation. export: bool, + /// Optional list of tables involved in the flush. tables: Vec, }, /// ```sql @@ -3649,12 +3946,14 @@ pub enum Statement { /// Note: this is a PostgreSQL-specific statement, /// but may also compatible with other SQL. Discard { + /// The kind of object(s) to discard (ALL, PLANS, etc.). object_type: DiscardObject, }, /// `SHOW FUNCTIONS` /// /// Note: this is a Presto-specific statement. ShowFunctions { + /// Optional filter for which functions to display. filter: Option, }, /// ```sql @@ -3663,6 +3962,7 @@ pub enum Statement { /// /// Note: this is a PostgreSQL-specific statement. ShowVariable { + /// Variable name as one or more identifiers. variable: Vec, }, /// ```sql @@ -3671,8 +3971,11 @@ pub enum Statement { /// /// Note: this is a MySQL-specific statement. ShowStatus { + /// Optional filter for which status entries to display. filter: Option, + /// `true` when `GLOBAL` scope was requested. global: bool, + /// `true` when `SESSION` scope was requested. session: bool, }, /// ```sql @@ -3681,8 +3984,11 @@ pub enum Statement { /// /// Note: this is a MySQL-specific statement. ShowVariables { + /// Optional filter for which variables to display. filter: Option, + /// `true` when `GLOBAL` scope was requested. global: bool, + /// `true` when `SESSION` scope was requested. session: bool, }, /// ```sql @@ -3691,31 +3997,42 @@ pub enum Statement { /// /// Note: this is a MySQL-specific statement. ShowCreate { + /// The kind of object being shown (TABLE, VIEW, etc.). obj_type: ShowCreateObject, + /// The name of the object to show create statement for. obj_name: ObjectName, }, /// ```sql /// SHOW COLUMNS /// ``` ShowColumns { + /// `true` when extended column information was requested. extended: bool, + /// `true` when full column details were requested. full: bool, + /// Additional options for `SHOW COLUMNS`. show_options: ShowStatementOptions, }, /// ```sql /// SHOW DATABASES /// ``` ShowDatabases { + /// `true` when terse output format was requested. terse: bool, + /// `true` when history information was requested. history: bool, + /// Additional options for `SHOW DATABASES`. show_options: ShowStatementOptions, }, /// ```sql /// SHOW SCHEMAS /// ``` ShowSchemas { + /// `true` when terse (compact) output was requested. terse: bool, + /// `true` when history information was requested. history: bool, + /// Additional options for `SHOW SCHEMAS`. show_options: ShowStatementOptions, }, // ```sql @@ -3723,6 +4040,7 @@ pub enum Statement { // ``` // [MySQL]: // + /// Show the available character sets (alias `CHARSET`). ShowCharset(ShowCharset), /// ```sql /// SHOW OBJECTS LIKE 'line%' IN mydb.public @@ -3734,19 +4052,28 @@ pub enum Statement { /// SHOW TABLES /// ``` ShowTables { + /// `true` when terse output format was requested (compact listing). terse: bool, + /// `true` when history rows are requested. history: bool, + /// `true` when extended information should be shown. extended: bool, + /// `true` when a full listing was requested. full: bool, + /// `true` when external tables should be included. external: bool, + /// Additional options for `SHOW` statements. show_options: ShowStatementOptions, }, /// ```sql /// SHOW VIEWS /// ``` ShowViews { + /// `true` when terse output format was requested. terse: bool, + /// `true` when materialized views should be included. materialized: bool, + /// Additional options for `SHOW` statements. show_options: ShowStatementOptions, }, /// ```sql @@ -3755,6 +4082,7 @@ pub enum Statement { /// /// Note: this is a MySQL-specific statement. ShowCollation { + /// Optional filter for which collations to display. filter: Option, }, /// ```sql @@ -3771,9 +4099,13 @@ pub enum Statement { /// ``` /// If `begin` is true StartTransaction { + /// Transaction modes such as `ISOLATION LEVEL` or `READ WRITE`. modes: Vec, + /// `true` when this was parsed as `BEGIN` instead of `START`. begin: bool, + /// Optional specific keyword used: `TRANSACTION` or `WORK`. transaction: Option, + /// Optional transaction modifier (e.g., `AND NO CHAIN`). modifier: Option, /// List of statements belonging to the `BEGIN` block. /// Example: @@ -3807,8 +4139,11 @@ pub enum Statement { /// /// Note: this is a PostgreSQL-specific statement. Comment { + /// Type of object being commented (table, column, etc.). object_type: CommentObject, + /// Name of the object the comment applies to. object_name: ObjectName, + /// Optional comment text (None to remove comment). comment: Option, /// An optional `IF EXISTS` clause. (Non-standard.) /// See @@ -3824,15 +4159,20 @@ pub enum Statement { /// ``` /// If `end` is true Commit { + /// `true` when `AND [ NO ] CHAIN` was present. chain: bool, + /// `true` when this `COMMIT` was parsed as an `END` block terminator. end: bool, + /// Optional transaction modifier for commit semantics. modifier: Option, }, /// ```sql /// ROLLBACK [ TRANSACTION | WORK ] [ AND [ NO ] CHAIN ] [ TO [ SAVEPOINT ] savepoint_name ] /// ``` Rollback { + /// `true` when `AND [ NO ] CHAIN` was present. chain: bool, + /// Optional savepoint name to roll back to. savepoint: Option, }, /// ```sql @@ -3841,6 +4181,7 @@ pub enum Statement { CreateSchema { /// ` | AUTHORIZATION | AUTHORIZATION ` schema_name: SchemaName, + /// `true` when `IF NOT EXISTS` was present. if_not_exists: bool, /// Schema properties. /// @@ -3881,25 +4222,45 @@ pub enum Statement { /// See: /// CreateDatabase { + /// Database name. db_name: ObjectName, + /// `IF NOT EXISTS` flag. if_not_exists: bool, + /// Optional location URI. location: Option, + /// Optional managed location. managed_location: Option, + /// `OR REPLACE` flag. or_replace: bool, + /// `TRANSIENT` flag. transient: bool, + /// Optional clone source. clone: Option, + /// Optional data retention time in days. data_retention_time_in_days: Option, + /// Optional maximum data extension time in days. max_data_extension_time_in_days: Option, + /// Optional external volume identifier. external_volume: Option, + /// Optional catalog name. catalog: Option, + /// Whether to replace invalid characters. replace_invalid_characters: Option, + /// Default DDL collation string. default_ddl_collation: Option, + /// Storage serialization policy. storage_serialization_policy: Option, + /// Optional comment. comment: Option, + /// Optional catalog sync identifier. catalog_sync: Option, + /// Catalog sync namespace mode. catalog_sync_namespace_mode: Option, + /// Optional flatten delimiter for namespace sync. catalog_sync_namespace_flatten_delimiter: Option, + /// Optional tags for the database. with_tags: Option>, + /// Optional contact entries for the database. with_contacts: Option>, }, /// ```sql @@ -3920,10 +4281,15 @@ pub enum Statement { /// CREATE PROCEDURE /// ``` CreateProcedure { + /// `OR ALTER` flag. or_alter: bool, + /// Procedure name. name: ObjectName, + /// Optional procedure parameters. params: Option>, + /// Optional language identifier. language: Option, + /// Procedure body statements. body: ConditionalStatements, }, /// ```sql @@ -3933,10 +4299,15 @@ pub enum Statement { /// Supported variants: /// 1. [DuckDB](https://duckdb.org/docs/sql/statements/create_macro) CreateMacro { + /// `OR REPLACE` flag. or_replace: bool, + /// Whether macro is temporary. temporary: bool, + /// Macro name. name: ObjectName, + /// Optional macro arguments. args: Option>, + /// Macro definition body. definition: MacroDefinition, }, /// ```sql @@ -3944,33 +4315,51 @@ pub enum Statement { /// ``` /// See CreateStage { + /// `OR REPLACE` flag for stage. or_replace: bool, + /// Whether stage is temporary. temporary: bool, + /// `IF NOT EXISTS` flag. if_not_exists: bool, + /// Stage name. name: ObjectName, + /// Stage parameters. stage_params: StageParamsObject, + /// Directory table parameters. directory_table_params: KeyValueOptions, + /// File format options. file_format: KeyValueOptions, + /// Copy options for stage. copy_options: KeyValueOptions, + /// Optional comment. comment: Option, }, /// ```sql /// ASSERT [AS ] /// ``` Assert { + /// Assertion condition expression. condition: Expr, + /// Optional message expression. message: Option, }, /// ```sql /// GRANT privileges ON objects TO grantees /// ``` Grant { + /// Privileges being granted. privileges: Privileges, + /// Optional objects the privileges apply to. objects: Option, + /// List of grantees receiving the privileges. grantees: Vec, + /// Whether `WITH GRANT OPTION` is present. with_grant_option: bool, + /// Optional `AS GRANTOR` identifier. as_grantor: Option, + /// Optional `GRANTED BY` identifier. granted_by: Option, + /// Optional `CURRENT GRANTS` modifier. current_grants: Option, }, /// ```sql @@ -3981,10 +4370,15 @@ pub enum Statement { /// REVOKE privileges ON objects FROM grantees /// ``` Revoke { + /// Privileges to revoke. privileges: Privileges, + /// Optional objects from which to revoke. objects: Option, + /// Grantees affected by the revoke. grantees: Vec, + /// Optional `GRANTED BY` identifier. granted_by: Option, + /// Optional `CASCADE`/`RESTRICT` behavior. cascade: Option, }, /// ```sql @@ -3993,7 +4387,9 @@ pub enum Statement { /// /// Note: this is a PostgreSQL-specific statement. Deallocate { + /// Name to deallocate (or `ALL`). name: Ident, + /// Whether `PREPARE` keyword was present. prepare: bool, }, /// ```sql @@ -4005,12 +4401,17 @@ pub enum Statement { /// BigQuery: /// Snowflake: Execute { + /// Optional function/procedure name. name: Option, + /// Parameter expressions passed to execute. parameters: Vec, + /// Whether parentheses were present. has_parentheses: bool, - /// Is this an `EXECUTE IMMEDIATE` + /// Is this an `EXECUTE IMMEDIATE`. immediate: bool, + /// Identifiers to capture results into. into: Vec, + /// `USING` expressions with optional aliases. using: Vec, /// Whether the last parameter is the return value of the procedure /// MSSQL: @@ -4025,8 +4426,11 @@ pub enum Statement { /// /// Note: this is a PostgreSQL-specific statement. Prepare { + /// Name of the prepared statement. name: Ident, + /// Optional data types for parameters. data_types: Vec, + /// Statement being prepared. statement: Box, }, /// ```sql @@ -4036,8 +4440,10 @@ pub enum Statement { /// See /// See Kill { + /// Optional kill modifier (CONNECTION, QUERY, MUTATION). modifier: Option, // processlist_id + /// The id of the process to kill. id: u64, }, /// ```sql @@ -4066,7 +4472,7 @@ pub enum Statement { describe_alias: DescribeAlias, /// Carry out the command and show actual run times and other statistics. analyze: bool, - // Display additional information regarding the plan. + /// Display additional information regarding the plan. verbose: bool, /// `EXPLAIN QUERY PLAN` /// Display the query plan without running the query. @@ -4088,12 +4494,14 @@ pub enum Statement { /// ``` /// Define a new savepoint within the current transaction Savepoint { + /// Name of the savepoint being defined. name: Ident, }, /// ```sql /// RELEASE [ SAVEPOINT ] savepoint_name /// ``` ReleaseSavepoint { + /// Name of the savepoint to release. name: Ident, }, /// A `MERGE` statement. @@ -4118,6 +4526,7 @@ pub enum Statement { /// Table name #[cfg_attr(feature = "visitor", visit(with = "visit_relation"))] table_name: ObjectName, + /// `true` if `AS` keyword was present before the query. has_as: bool, /// Table confs options: Vec, @@ -4131,6 +4540,7 @@ pub enum Statement { /// Table name #[cfg_attr(feature = "visitor", visit(with = "visit_relation"))] table_name: ObjectName, + /// `true` when `IF EXISTS` was present. if_exists: bool, }, /// ```sql @@ -4138,11 +4548,17 @@ pub enum Statement { /// ``` /// Define a new sequence: CreateSequence { + /// Whether the sequence is temporary. temporary: bool, + /// `IF NOT EXISTS` flag. if_not_exists: bool, + /// Sequence name. name: ObjectName, + /// Optional data type for the sequence. data_type: Option, + /// Sequence options (INCREMENT, MINVALUE, etc.). sequence_options: Vec, + /// Optional `OWNED BY` target. owned_by: Option, }, /// A `CREATE DOMAIN` statement. @@ -4151,15 +4567,20 @@ pub enum Statement { /// CREATE TYPE /// ``` CreateType { + /// Type name to create. name: ObjectName, + /// Optional type representation details. representation: Option, }, /// ```sql /// PRAGMA . = /// ``` Pragma { + /// Pragma name (possibly qualified). name: ObjectName, + /// Optional pragma value. value: Option, + /// Whether the pragma used `=`. is_eq: bool, }, /// ```sql @@ -4167,6 +4588,7 @@ pub enum Statement { /// ``` /// Note: this is a MySQL-specific statement. See LockTables { + /// List of tables to lock with modes. tables: Vec, }, /// ```sql @@ -4186,11 +4608,17 @@ pub enum Statement { /// UNLOAD('statement') TO [ OPTIONS ] /// ``` Unload { + /// Optional query AST to unload. query: Option>, + /// Optional original query text. query_text: Option, + /// Destination identifier. to: Ident, + /// Optional IAM role/auth information. auth: Option, + /// Additional `WITH` options. with: Vec, + /// Legacy copy-style options. options: Vec, }, /// ```sql @@ -4199,10 +4627,15 @@ pub enum Statement { /// /// See ClickHouse OptimizeTable { + /// Table name to optimize. name: ObjectName, + /// Optional cluster identifier. on_cluster: Option, + /// Optional partition spec. partition: Option, + /// Whether `FINAL` was specified. include_final: bool, + /// Optional deduplication settings. deduplicate: Option, }, /// ```sql @@ -4212,6 +4645,7 @@ pub enum Statement { /// /// See Postgres LISTEN { + /// Notification channel identifier. channel: Ident, }, /// ```sql @@ -4221,6 +4655,7 @@ pub enum Statement { /// /// See Postgres UNLISTEN { + /// Notification channel identifier. channel: Ident, }, /// ```sql @@ -4230,7 +4665,9 @@ pub enum Statement { /// /// See Postgres NOTIFY { + /// Notification channel identifier. channel: Ident, + /// Optional payload string. payload: Option, }, /// ```sql @@ -4242,11 +4679,17 @@ pub enum Statement { /// /// See Hive LoadData { + /// Whether `LOCAL` is present. local: bool, + /// Input path for files to load. inpath: String, + /// Whether `OVERWRITE` was specified. overwrite: bool, + /// Target table name to load into. table_name: ObjectName, + /// Optional partition specification. partitioned: Option>, + /// Optional table format information. table_format: Option, }, /// ```sql @@ -4269,10 +4712,15 @@ pub enum Statement { /// [ WITH option [ , ...n ] ] /// See RaisError { + /// Error message expression or identifier. message: Box, + /// Severity expression. severity: Box, + /// State expression. state: Box, + /// Substitution arguments for the message. arguments: Vec, + /// Additional `WITH` options for RAISERROR. options: Vec, }, /// ```sql @@ -4350,7 +4798,9 @@ impl From for Statement { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum CurrentGrantsKind { + /// `COPY CURRENT GRANTS` (copy current grants to target). CopyCurrentGrants, + /// `REVOKE CURRENT GRANTS` (revoke current grants from target). RevokeCurrentGrants, } @@ -4366,9 +4816,14 @@ impl fmt::Display for CurrentGrantsKind { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// `RAISERROR` options +/// See pub enum RaisErrorOption { + /// Log the error. Log, + /// Do not wait for completion. NoWait, + /// Set the error state. SetError, } @@ -4982,6 +5437,9 @@ impl fmt::Display for Statement { Statement::AlterOperatorFamily(alter_operator_family) => { write!(f, "{alter_operator_family}") } + Statement::AlterOperatorClass(alter_operator_class) => { + write!(f, "{alter_operator_class}") + } Statement::AlterRole { name, operation } => { write!(f, "ALTER ROLE {name} {operation}") } @@ -5831,11 +6289,17 @@ impl fmt::Display for Statement { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum SequenceOptions { + /// `INCREMENT [BY] ` option; second value indicates presence of `BY` keyword. IncrementBy(Expr, bool), + /// `MINVALUE ` or `NO MINVALUE`. MinValue(Option), + /// `MAXVALUE ` or `NO MAXVALUE`. MaxValue(Option), + /// `START [WITH] `; second value indicates presence of `WITH`. StartWith(Expr, bool), + /// `CACHE ` option. Cache(Expr), + /// `CYCLE` or `NO CYCLE` option. Cycle(bool), } @@ -5885,8 +6349,11 @@ impl fmt::Display for SequenceOptions { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct SetAssignment { + /// Optional context scope (e.g., SESSION or LOCAL). pub scope: Option, + /// Assignment target name. pub name: ObjectName, + /// Assigned expression value. pub value: Expr, } @@ -5933,7 +6400,9 @@ impl fmt::Display for TruncateTableTarget { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum TruncateIdentityOption { + /// Restart identity values (RESTART IDENTITY). Restart, + /// Continue identity values (CONTINUE IDENTITY). Continue, } @@ -5943,7 +6412,9 @@ pub enum TruncateIdentityOption { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum CascadeOption { + /// Apply cascading action (e.g., CASCADE). Cascade, + /// Restrict the action (e.g., RESTRICT). Restrict, } @@ -5961,7 +6432,9 @@ impl Display for CascadeOption { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum BeginTransactionKind { + /// Standard `TRANSACTION` keyword. Transaction, + /// Alternate `WORK` keyword. Work, } @@ -5980,11 +6453,11 @@ impl Display for BeginTransactionKind { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum MinMaxValue { - // clause is not specified + /// Clause is not specified. Empty, - // NO MINVALUE/NO MAXVALUE + /// NO MINVALUE / NO MAXVALUE. None, - // MINVALUE / MAXVALUE + /// `MINVALUE ` / `MAXVALUE `. Some(Expr), } @@ -5992,6 +6465,7 @@ pub enum MinMaxValue { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] #[non_exhaustive] +/// Behavior to apply for `INSERT` when a conflict occurs. pub enum OnInsert { /// ON DUPLICATE KEY UPDATE (MySQL when the key already exists, then execute an update instead) DuplicateKeyUpdate(Vec), @@ -6002,40 +6476,53 @@ pub enum OnInsert { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Optional aliases for `INSERT` targets: row alias and optional column aliases. pub struct InsertAliases { + /// Row alias (table-style alias) for the inserted values. pub row_alias: ObjectName, + /// Optional list of column aliases for the inserted values. pub col_aliases: Option>, } #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// `ON CONFLICT` clause representation. pub struct OnConflict { + /// Optional conflict target specifying columns or constraint. pub conflict_target: Option, + /// Action to take when a conflict occurs. pub action: OnConflictAction, } #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Target specification for an `ON CONFLICT` clause. pub enum ConflictTarget { + /// Target specified as a list of columns. Columns(Vec), + /// Target specified as a named constraint. OnConstraint(ObjectName), } #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Action to perform when an `ON CONFLICT` target is matched. pub enum OnConflictAction { + /// Do nothing on conflict. DoNothing, + /// Perform an update on conflict. DoUpdate(DoUpdate), } #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Details for `DO UPDATE` action of an `ON CONFLICT` clause. pub struct DoUpdate { - /// Column assignments + /// Column assignments to perform on update. pub assignments: Vec, - /// WHERE + /// Optional WHERE clause limiting the update. pub selection: Option, } @@ -6132,21 +6619,48 @@ impl fmt::Display for Privileges { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum FetchDirection { - Count { limit: Value }, + /// Fetch a specific count of rows. + Count { + /// The limit value for the count. + limit: Value, + }, + /// Fetch the next row. Next, + /// Fetch the prior row. Prior, + /// Fetch the first row. First, + /// Fetch the last row. Last, - Absolute { limit: Value }, - Relative { limit: Value }, + /// Fetch an absolute row by index. + Absolute { + /// The absolute index value. + limit: Value, + }, + /// Fetch a row relative to the current position. + Relative { + /// The relative offset value. + limit: Value, + }, + /// Fetch all rows. All, // FORWARD // FORWARD count - Forward { limit: Option }, + /// Fetch forward by an optional limit. + Forward { + /// Optional forward limit. + limit: Option, + }, + /// Fetch all forward rows. ForwardAll, // BACKWARD // BACKWARD count - Backward { limit: Option }, + /// Fetch backward by an optional limit. + Backward { + /// Optional backward limit. + limit: Option, + }, + /// Fetch all backward rows. BackwardAll, } @@ -6198,7 +6712,9 @@ impl fmt::Display for FetchDirection { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum FetchPosition { + /// Use `FROM ` position specifier. From, + /// Use `IN ` position specifier. In, } @@ -6218,71 +6734,125 @@ impl fmt::Display for FetchPosition { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum Action { + /// Add a search optimization. AddSearchOptimization, + /// Apply an `APPLY` operation with a specific type. Apply { + /// The type of apply operation. apply_type: ActionApplyType, }, + /// Apply a budget operation. ApplyBudget, + /// Attach a listing. AttachListing, + /// Attach a policy. AttachPolicy, + /// Audit operation. Audit, + /// Bind a service endpoint. BindServiceEndpoint, + /// Connect permission. Connect, + /// Create action, optionally specifying an object type. Create { + /// Optional object type to create. obj_type: Option, }, + /// Actions related to database roles. DatabaseRole { + /// The role name. role: ObjectName, }, + /// Delete permission. Delete, + /// Drop permission. Drop, + /// Evolve schema permission. EvolveSchema, + /// Exec action (execute) with optional object type. Exec { + /// Optional execute object type. obj_type: Option, }, + /// Execute action with optional object type. Execute { + /// Optional execute object type. obj_type: Option, }, + /// Failover operation. Failover, + /// Use imported privileges. ImportedPrivileges, + /// Import a share. ImportShare, + /// Insert rows with optional column list. Insert { + /// Optional list of target columns for insert. columns: Option>, }, + /// Manage operation with a specific manage type. Manage { + /// The specific manage sub-type. manage_type: ActionManageType, }, + /// Manage releases. ManageReleases, + /// Manage versions. ManageVersions, + /// Modify operation with an optional modify type. Modify { + /// The optional modify sub-type. modify_type: Option, }, + /// Monitor operation with an optional monitor type. Monitor { + /// The optional monitor sub-type. monitor_type: Option, }, + /// Operate permission. Operate, + /// Override share restrictions. OverrideShareRestrictions, + /// Ownership permission. Ownership, + /// Purchase a data exchange listing. PurchaseDataExchangeListing, + + /// Read access. Read, + /// Read session-level access. ReadSession, + /// References with optional column list. References { + /// Optional list of referenced column identifiers. columns: Option>, }, + /// Replication permission. Replicate, + /// Resolve all references. ResolveAll, + /// Role-related permission with target role name. Role { + /// The target role name. role: ObjectName, }, + /// Select permission with optional column list. Select { + /// Optional list of selected columns. columns: Option>, }, + /// Temporary object permission. Temporary, + /// Trigger-related permission. Trigger, + /// Truncate permission. Truncate, + /// Update permission with optional affected columns. Update { + /// Optional list of columns affected by update. columns: Option>, }, + /// Usage permission. Usage, } @@ -6376,22 +6946,39 @@ impl fmt::Display for Action { /// See /// under `globalPrivileges` in the `CREATE` privilege. pub enum ActionCreateObjectType { + /// An account-level object. Account, + /// An application object. Application, + /// An application package object. ApplicationPackage, + /// A compute pool object. ComputePool, + /// A data exchange listing. DataExchangeListing, + /// A database object. Database, + /// An external volume object. ExternalVolume, + /// A failover group object. FailoverGroup, + /// An integration object. Integration, + /// A network policy object. NetworkPolicy, + /// An organization listing. OrganiationListing, + /// A replication group object. ReplicationGroup, + /// A role object. Role, + /// A schema object. Schema, + /// A share object. Share, + /// A user object. User, + /// A warehouse object. Warehouse, } @@ -6425,15 +7012,25 @@ impl fmt::Display for ActionCreateObjectType { /// See /// under `globalPrivileges` in the `APPLY` privilege. pub enum ActionApplyType { + /// Apply an aggregation policy. AggregationPolicy, + /// Apply an authentication policy. AuthenticationPolicy, + /// Apply a join policy. JoinPolicy, + /// Apply a masking policy. MaskingPolicy, + /// Apply a packages policy. PackagesPolicy, + /// Apply a password policy. PasswordPolicy, + /// Apply a projection policy. ProjectionPolicy, + /// Apply a row access policy. RowAccessPolicy, + /// Apply a session policy. SessionPolicy, + /// Apply a tag. Tag, } @@ -6460,10 +7057,15 @@ impl fmt::Display for ActionApplyType { /// See /// under `globalPrivileges` in the `EXECUTE` privilege. pub enum ActionExecuteObjectType { + /// Alert object. Alert, + /// Data metric function object. DataMetricFunction, + /// Managed alert object. ManagedAlert, + /// Managed task object. ManagedTask, + /// Task object. Task, } @@ -6485,12 +7087,19 @@ impl fmt::Display for ActionExecuteObjectType { /// See /// under `globalPrivileges` in the `MANAGE` privilege. pub enum ActionManageType { + /// Account support cases management. AccountSupportCases, + /// Event sharing management. EventSharing, + /// Grants management. Grants, + /// Listing auto-fulfillment management. ListingAutoFulfillment, + /// Organization support cases management. OrganizationSupportCases, + /// User support cases management. UserSupportCases, + /// Warehouses management. Warehouses, } @@ -6514,9 +7123,13 @@ impl fmt::Display for ActionManageType { /// See /// under `globalPrivileges` in the `MODIFY` privilege. pub enum ActionModifyType { + /// Modify log level. LogLevel, + /// Modify trace level. TraceLevel, + /// Modify session log level. SessionLogLevel, + /// Modify session trace level. SessionTraceLevel, } @@ -6537,8 +7150,11 @@ impl fmt::Display for ActionModifyType { /// See /// under `globalPrivileges` in the `MONITOR` privilege. pub enum ActionMonitorType { + /// Monitor execution. Execution, + /// Monitor security. Security, + /// Monitor usage. Usage, } @@ -6557,7 +7173,9 @@ impl fmt::Display for ActionMonitorType { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct Grantee { + /// The category/type of grantee (role, user, share, etc.). pub grantee_type: GranteesType, + /// Optional name of the grantee (identifier or user@host). pub name: Option, } @@ -6600,15 +7218,25 @@ impl fmt::Display for Grantee { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// The kind of principal receiving privileges. pub enum GranteesType { + /// A role principal. Role, + /// A share principal. Share, + /// A user principal. User, + /// A group principal. Group, + /// The public principal. Public, + /// A database role principal. DatabaseRole, + /// An application principal. Application, + /// An application role principal. ApplicationRole, + /// No specific principal (e.g. `NONE`). None, } @@ -6620,7 +7248,12 @@ pub enum GranteeName { /// A bare identifier ObjectName(ObjectName), /// A MySQL user/host pair such as 'root'@'%' - UserHost { user: Ident, host: Ident }, + UserHost { + /// The user identifier portion. + user: Ident, + /// The host identifier portion. + host: Ident, + }, } impl fmt::Display for GranteeName { @@ -6640,29 +7273,65 @@ impl fmt::Display for GranteeName { #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum GrantObjects { /// Grant privileges on `ALL SEQUENCES IN SCHEMA [, ...]` - AllSequencesInSchema { schemas: Vec }, + AllSequencesInSchema { + /// The target schema names. + schemas: Vec, + }, /// Grant privileges on `ALL TABLES IN SCHEMA [, ...]` - AllTablesInSchema { schemas: Vec }, + AllTablesInSchema { + /// The target schema names. + schemas: Vec, + }, /// Grant privileges on `ALL VIEWS IN SCHEMA [, ...]` - AllViewsInSchema { schemas: Vec }, + AllViewsInSchema { + /// The target schema names. + schemas: Vec, + }, /// Grant privileges on `ALL MATERIALIZED VIEWS IN SCHEMA [, ...]` - AllMaterializedViewsInSchema { schemas: Vec }, + AllMaterializedViewsInSchema { + /// The target schema names. + schemas: Vec, + }, /// Grant privileges on `ALL EXTERNAL TABLES IN SCHEMA [, ...]` - AllExternalTablesInSchema { schemas: Vec }, + AllExternalTablesInSchema { + /// The target schema names. + schemas: Vec, + }, /// Grant privileges on `ALL FUNCTIONS IN SCHEMA [, ...]` - AllFunctionsInSchema { schemas: Vec }, + AllFunctionsInSchema { + /// The target schema names. + schemas: Vec, + }, /// Grant privileges on `FUTURE SCHEMAS IN DATABASE [, ...]` - FutureSchemasInDatabase { databases: Vec }, + FutureSchemasInDatabase { + /// The target database names. + databases: Vec, + }, /// Grant privileges on `FUTURE TABLES IN SCHEMA [, ...]` - FutureTablesInSchema { schemas: Vec }, + FutureTablesInSchema { + /// The target schema names. + schemas: Vec, + }, /// Grant privileges on `FUTURE VIEWS IN SCHEMA [, ...]` - FutureViewsInSchema { schemas: Vec }, + FutureViewsInSchema { + /// The target schema names. + schemas: Vec, + }, /// Grant privileges on `FUTURE EXTERNAL TABLES IN SCHEMA [, ...]` - FutureExternalTablesInSchema { schemas: Vec }, + FutureExternalTablesInSchema { + /// The target schema names. + schemas: Vec, + }, /// Grant privileges on `FUTURE MATERIALIZED VIEWS IN SCHEMA [, ...]` - FutureMaterializedViewsInSchema { schemas: Vec }, + FutureMaterializedViewsInSchema { + /// The target schema names. + schemas: Vec, + }, /// Grant privileges on `FUTURE SEQUENCES IN SCHEMA [, ...]` - FutureSequencesInSchema { schemas: Vec }, + FutureSequencesInSchema { + /// The target schema names. + schemas: Vec, + }, /// Grant privileges on specific databases Databases(Vec), /// Grant privileges on specific schemas @@ -6697,7 +7366,9 @@ pub enum GrantObjects { /// For example: /// `GRANT USAGE ON PROCEDURE foo(varchar) TO ROLE role1` Procedure { + /// The procedure name. name: ObjectName, + /// Optional argument types for overloaded procedures. arg_types: Vec, }, @@ -6707,7 +7378,9 @@ pub enum GrantObjects { /// For example: /// `GRANT USAGE ON FUNCTION foo(varchar) TO ROLE role1` Function { + /// The function name. name: ObjectName, + /// Optional argument types for overloaded functions. arg_types: Vec, }, } @@ -6866,10 +7539,15 @@ impl fmt::Display for GrantObjects { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct DenyStatement { + /// The privileges to deny. pub privileges: Privileges, + /// The objects the privileges apply to. pub objects: GrantObjects, + /// The grantees (users/roles) to whom the denial applies. pub grantees: Vec, + /// Optional identifier of the principal that performed the grant. pub granted_by: Option, + /// Optional cascade option controlling dependent objects. pub cascade: Option, } @@ -6895,7 +7573,9 @@ impl fmt::Display for DenyStatement { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct Assignment { + /// The left-hand side of the assignment. pub target: AssignmentTarget, + /// The expression assigned to the target. pub value: Expr, } @@ -6930,11 +7610,13 @@ impl fmt::Display for AssignmentTarget { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Expression forms allowed as a function argument. pub enum FunctionArgExpr { + /// A normal expression argument. Expr(Expr), /// Qualified wildcard, e.g. `alias.*` or `schema.table.*`. QualifiedWildcard(ObjectName), - /// An unqualified `*` + /// An unqualified `*` wildcard. Wildcard, } @@ -6990,23 +7672,31 @@ impl fmt::Display for FunctionArgOperator { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Forms of function arguments (named, expression-named, or positional). pub enum FunctionArg { /// `name` is identifier /// /// Enabled when `Dialect::supports_named_fn_args_with_expr_name` returns 'false' Named { + /// The identifier name of the argument. name: Ident, + /// The argument expression or wildcard form. arg: FunctionArgExpr, + /// The operator separating name and value. operator: FunctionArgOperator, }, /// `name` is arbitrary expression /// /// Enabled when `Dialect::supports_named_fn_args_with_expr_name` returns 'true' ExprNamed { + /// The expression used as the argument name. name: Expr, + /// The argument expression or wildcard form. arg: FunctionArgExpr, + /// The operator separating name and value. operator: FunctionArgOperator, }, + /// An unnamed argument (positional), given by expression or wildcard. Unnamed(FunctionArgExpr), } @@ -7031,9 +7721,15 @@ impl fmt::Display for FunctionArg { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Which cursor(s) to close. pub enum CloseCursor { + /// Close all cursors. All, - Specific { name: Ident }, + /// Close a specific cursor by name. + Specific { + /// The name of the cursor to close. + name: Ident, + }, } impl fmt::Display for CloseCursor { @@ -7065,6 +7761,7 @@ pub struct DropDomain { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct TypedString { + /// The data type of the typed string (e.g. DATE, TIME, TIMESTAMP). pub data_type: DataType, /// The value of the constant. /// Hint: you can unwrap the string value using `value.into_string()`. @@ -7109,6 +7806,7 @@ impl fmt::Display for TypedString { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct Function { + /// The function name (may be qualified). pub name: ObjectName, /// Flags whether this function call uses the [ODBC syntax]. /// @@ -7250,6 +7948,7 @@ impl fmt::Display for FunctionArgumentList { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Clauses that can appear inside a function argument list. pub enum FunctionArgumentClause { /// Indicates how `NULL`s should be handled in the calculation, e.g. in `FIRST_VALUE` on [BigQuery]. /// @@ -7321,8 +8020,10 @@ impl fmt::Display for FunctionArgumentClause { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct Method { + /// The expression on which the method is invoked. pub expr: Box, // always non-empty + /// The sequence of chained method calls. pub method_chain: Vec, } @@ -7340,8 +8041,9 @@ impl fmt::Display for Method { #[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// How duplicate values are treated inside function argument lists. pub enum DuplicateTreatment { - /// Perform the calculation only unique values. + /// Consider only unique values. Distinct, /// Retain all duplicate values (the default). All, @@ -7359,10 +8061,11 @@ impl fmt::Display for DuplicateTreatment { #[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// How the `ANALYZE`/`EXPLAIN ANALYZE` format is specified. pub enum AnalyzeFormatKind { - /// e.g. `EXPLAIN ANALYZE FORMAT JSON SELECT * FROM tbl` + /// Format provided as a keyword, e.g. `FORMAT JSON`. Keyword(AnalyzeFormat), - /// e.g. `EXPLAIN ANALYZE FORMAT=JSON SELECT * FROM tbl` + /// Format provided as an assignment, e.g. `FORMAT=JSON`. Assignment(AnalyzeFormat), } @@ -7378,11 +8081,17 @@ impl fmt::Display for AnalyzeFormatKind { #[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Output formats supported for `ANALYZE`/`EXPLAIN ANALYZE`. pub enum AnalyzeFormat { + /// Plain text format. TEXT, + /// Graphviz DOT format. GRAPHVIZ, + /// JSON format. JSON, + /// Traditional explain output. TRADITIONAL, + /// Tree-style explain output. TREE, } @@ -7403,12 +8112,19 @@ impl fmt::Display for AnalyzeFormat { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum FileFormat { + /// Text file format. TEXTFILE, + /// Sequence file format. SEQUENCEFILE, + /// ORC file format. ORC, + /// Parquet file format. PARQUET, + /// Avro file format. AVRO, + /// RCFile format. RCFILE, + /// JSON file format. JSONFILE, } @@ -7437,7 +8153,9 @@ pub enum ListAggOnOverflow { /// `ON OVERFLOW TRUNCATE [ ] WITH[OUT] COUNT` Truncate { + /// Optional filler expression used when truncating. filler: Option>, + /// Whether to include a count when truncating. with_count: bool, }, } @@ -7478,8 +8196,11 @@ impl fmt::Display for HavingBound { #[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Which bound is used in a HAVING clause for ANY_VALUE on BigQuery. pub enum HavingBoundKind { + /// The minimum bound. Min, + /// The maximum bound. Max, } @@ -7495,18 +8216,31 @@ impl fmt::Display for HavingBoundKind { #[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Types of database objects referenced by DDL statements. pub enum ObjectType { + /// A table. Table, + /// A view. View, + /// A materialized view. MaterializedView, + /// An index. Index, + /// A schema. Schema, + /// A database. Database, + /// A role. Role, + /// A sequence. Sequence, + /// A stage. Stage, + /// A type definition. Type, + /// A user. User, + /// A stream. Stream, } @@ -7532,9 +8266,13 @@ impl fmt::Display for ObjectType { #[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Types supported by `KILL` statements. pub enum KillType { + /// Kill a connection. Connection, + /// Kill a running query. Query, + /// Kill a mutation (ClickHouse). Mutation, } @@ -7553,39 +8291,62 @@ impl fmt::Display for KillType { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Distribution style options for Hive tables. pub enum HiveDistributionStyle { + /// Partitioned distribution with the given columns. PARTITIONED { + /// Columns used for partitioning. columns: Vec, }, + /// Skewed distribution definition. SKEWED { + /// Columns participating in the skew definition. columns: Vec, + /// Columns listed in the `ON` clause for skewing. on: Vec, + /// Whether skewed data is stored as directories. stored_as_directories: bool, }, + /// No distribution style specified. NONE, } #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Row format specification for Hive tables (SERDE or DELIMITED). pub enum HiveRowFormat { - SERDE { class: String }, - DELIMITED { delimiters: Vec }, + /// SerDe class specification with the implementing class name. + SERDE { + /// The SerDe implementation class name. + class: String, + }, + /// Delimited row format with one or more delimiter specifications. + DELIMITED { + /// The list of delimiters used for delimiting fields/lines. + delimiters: Vec, + }, } #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Format specification for `LOAD DATA` Hive operations. pub struct HiveLoadDataFormat { + /// SerDe expression used for the table. pub serde: Expr, + /// Input format expression. pub input_format: Expr, } #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// A single row delimiter specification for Hive `ROW FORMAT`. pub struct HiveRowDelimiter { + /// The delimiter kind (fields/lines/etc.). pub delimiter: HiveDelimiter, + /// The delimiter character identifier. pub char: Ident, } @@ -7599,12 +8360,19 @@ impl fmt::Display for HiveRowDelimiter { #[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Kind of delimiter used in Hive `ROW FORMAT` definitions. pub enum HiveDelimiter { + /// Fields terminated by a delimiter. FieldsTerminatedBy, + /// Fields escaped by a character. FieldsEscapedBy, + /// Collection items terminated by a delimiter. CollectionItemsTerminatedBy, + /// Map keys terminated by a delimiter. MapKeysTerminatedBy, + /// Lines terminated by a delimiter. LinesTerminatedBy, + /// Null represented by a specific token. NullDefinedAs, } @@ -7625,8 +8393,11 @@ impl fmt::Display for HiveDelimiter { #[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Describe output format options for Hive `DESCRIBE`/`EXPLAIN`. pub enum HiveDescribeFormat { + /// Extended describe output. Extended, + /// Formatted describe output. Formatted, } @@ -7643,9 +8414,13 @@ impl fmt::Display for HiveDescribeFormat { #[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Aliases accepted for describe-style commands. pub enum DescribeAlias { + /// `DESCRIBE` alias. Describe, + /// `EXPLAIN` alias. Explain, + /// `DESC` alias. Desc, } @@ -7664,12 +8439,18 @@ impl fmt::Display for DescribeAlias { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] #[allow(clippy::large_enum_variant)] +/// Hive input/output format specification used in `CREATE TABLE`. pub enum HiveIOFormat { + /// Generic IO format with separate input and output expressions. IOF { + /// Expression for the input format. input_format: Expr, + /// Expression for the output format. output_format: Expr, }, + /// File format wrapper referencing a `FileFormat` variant. FileFormat { + /// The file format used for storage. format: FileFormat, }, } @@ -7677,18 +8458,26 @@ pub enum HiveIOFormat { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash, Default)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Hive table format and storage-related options. pub struct HiveFormat { + /// Optional row format specification. pub row_format: Option, + /// Optional SerDe properties expressed as SQL options. pub serde_properties: Option>, + /// Optional input/output storage format details. pub storage: Option, + /// Optional location (URI or path) for table data. pub location: Option, } #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// A clustered index column specification. pub struct ClusteredIndex { + /// Column identifier for the clustered index entry. pub name: Ident, + /// Optional sort direction: `Some(true)` for ASC, `Some(false)` for DESC, `None` for unspecified. pub asc: Option, } @@ -7706,9 +8495,13 @@ impl fmt::Display for ClusteredIndex { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Clustered options used for `CREATE TABLE` clustered/indexed storage. pub enum TableOptionsClustered { + /// Use a columnstore index. ColumnstoreIndex, + /// Columnstore index with an explicit ordering of columns. ColumnstoreIndexOrder(Vec), + /// A named clustered index with one or more columns. Index(Vec), } @@ -7737,13 +8530,16 @@ impl fmt::Display for TableOptionsClustered { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum PartitionRangeDirection { + /// LEFT range direction. Left, + /// RIGHT range direction. Right, } #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// SQL option syntax used in table and server definitions. pub enum SqlOption { /// Clustered represents the clustered version of table storage for MSSQL. /// @@ -7756,7 +8552,12 @@ pub enum SqlOption { /// Any option that consists of a key value pair where the value is an expression. e.g. /// /// WITH(DISTRIBUTION = ROUND_ROBIN) - KeyValue { key: Ident, value: Expr }, + KeyValue { + /// The option key identifier. + key: Ident, + /// The expression value for the option. + value: Expr, + }, /// One or more table partitions and represents which partition the boundary values belong to, /// e.g. /// @@ -7764,8 +8565,11 @@ pub enum SqlOption { /// /// Partition { + /// The partition column name. column_name: Ident, + /// Optional direction for the partition range (LEFT/RIGHT). range_direction: Option, + /// Values that define the partition boundaries. for_values: Vec, }, /// Comment parameter (supports `=` and no `=` syntax) @@ -7844,8 +8648,11 @@ impl fmt::Display for SqlOption { #[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Storage type options for a tablespace. pub enum StorageType { + /// Store on disk. Disk, + /// Store in memory. Memory, } @@ -7855,15 +8662,20 @@ pub enum StorageType { /// MySql TableSpace option /// pub struct TablespaceOption { + /// Name of the tablespace. pub name: String, + /// Optional storage type for the tablespace. pub storage: Option, } #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// A key/value identifier pair used for secret or key-based options. pub struct SecretOption { + /// The option key identifier. pub key: Ident, + /// The option value identifier. pub value: Ident, } @@ -7880,11 +8692,17 @@ impl fmt::Display for SecretOption { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct CreateServerStatement { + /// The server name. pub name: ObjectName, + /// Whether `IF NOT EXISTS` was specified. pub if_not_exists: bool, + /// Optional server type identifier. pub server_type: Option, + /// Optional server version identifier. pub version: Option, + /// Foreign-data wrapper object name. pub foreign_data_wrapper: ObjectName, + /// Optional list of server options. pub options: Option>, } @@ -7923,11 +8741,14 @@ impl fmt::Display for CreateServerStatement { } } +/// A key/value option for `CREATE SERVER`. #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct CreateServerOption { + /// Option key identifier. pub key: Ident, + /// Option value identifier. pub value: Ident, } @@ -7940,8 +8761,11 @@ impl fmt::Display for CreateServerOption { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Options supported by DuckDB for `ATTACH DATABASE`. pub enum AttachDuckDBDatabaseOption { + /// READ_ONLY option, optional boolean value. ReadOnly(Option), + /// TYPE option specifying a database type identifier. Type(Ident), } @@ -7959,8 +8783,11 @@ impl fmt::Display for AttachDuckDBDatabaseOption { #[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Mode for transactions: access mode or isolation level. pub enum TransactionMode { + /// Access mode for a transaction (e.g. `READ ONLY` / `READ WRITE`). AccessMode(TransactionAccessMode), + /// Isolation level for a transaction (e.g. `SERIALIZABLE`). IsolationLevel(TransactionIsolationLevel), } @@ -7977,8 +8804,11 @@ impl fmt::Display for TransactionMode { #[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Transaction access mode (READ ONLY / READ WRITE). pub enum TransactionAccessMode { + /// READ ONLY access mode. ReadOnly, + /// READ WRITE access mode. ReadWrite, } @@ -7995,11 +8825,17 @@ impl fmt::Display for TransactionAccessMode { #[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Transaction isolation levels. pub enum TransactionIsolationLevel { + /// READ UNCOMMITTED isolation level. ReadUncommitted, + /// READ COMMITTED isolation level. ReadCommitted, + /// REPEATABLE READ isolation level. RepeatableRead, + /// SERIALIZABLE isolation level. Serializable, + /// SNAPSHOT isolation level. Snapshot, } @@ -8024,10 +8860,15 @@ impl fmt::Display for TransactionIsolationLevel { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum TransactionModifier { + /// DEFERRED transaction modifier. Deferred, + /// IMMEDIATE transaction modifier. Immediate, + /// EXCLUSIVE transaction modifier. Exclusive, + /// TRY block modifier (MS-SQL style TRY/CATCH). Try, + /// CATCH block modifier (MS-SQL style TRY/CATCH). Catch, } @@ -8047,10 +8888,15 @@ impl fmt::Display for TransactionModifier { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Filter forms usable in SHOW statements. pub enum ShowStatementFilter { + /// Filter using LIKE pattern. Like(String), + /// Filter using ILIKE pattern. ILike(String), + /// Filter using a WHERE expression. Where(Expr), + /// Filter provided without a keyword (raw string). NoKeyword(String), } @@ -8069,8 +8915,11 @@ impl fmt::Display for ShowStatementFilter { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Clause types used with SHOW ... IN/FROM. pub enum ShowStatementInClause { + /// Use the `IN` clause. IN, + /// Use the `FROM` clause. FROM, } @@ -8092,10 +8941,15 @@ impl fmt::Display for ShowStatementInClause { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum SqliteOnConflict { + /// Use ROLLBACK on conflict. Rollback, + /// Use ABORT on conflict. Abort, + /// Use FAIL on conflict. Fail, + /// Use IGNORE on conflict. Ignore, + /// Use REPLACE on conflict. Replace, } @@ -8121,8 +8975,11 @@ impl fmt::Display for SqliteOnConflict { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum MysqlInsertPriority { + /// LOW_PRIORITY modifier for INSERT/REPLACE. LowPriority, + /// DELAYED modifier for INSERT/REPLACE. Delayed, + /// HIGH_PRIORITY modifier for INSERT/REPLACE. HighPriority, } @@ -8140,7 +8997,9 @@ impl fmt::Display for crate::ast::MysqlInsertPriority { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Source for the `COPY` command: a table or a query. pub enum CopySource { + /// Copy from a table with optional column list. Table { /// The name of the table to copy from. table_name: ObjectName, @@ -8148,19 +9007,25 @@ pub enum CopySource { /// are copied. columns: Vec, }, + /// Copy from the results of a query. Query(Box), } #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Target for the `COPY` command: STDIN, STDOUT, a file, or a program. pub enum CopyTarget { + /// Use standard input as the source. Stdin, + /// Use standard output as the target. Stdout, + /// Read from or write to a file. File { /// The path name of the input or output file. filename: String, }, + /// Use a program as the source or target (shell command). Program { /// A command to execute command: String, @@ -8186,9 +9051,13 @@ impl fmt::Display for CopyTarget { #[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Action to take `ON COMMIT` for temporary tables. pub enum OnCommit { + /// Delete rows on commit. DeleteRows, + /// Preserve rows on commit. PreserveRows, + /// Drop the table on commit. Drop, } @@ -8271,7 +9140,12 @@ pub enum CopyLegacyOption { /// CLEANPATH CleanPath, /// COMPUPDATE [ PRESET | { ON | TRUE } | { OFF | FALSE } ] - CompUpdate { preset: bool, enabled: Option }, + CompUpdate { + /// Whether the COMPUPDATE PRESET option was used. + preset: bool, + /// Optional enabled flag for COMPUPDATE. + enabled: Option, + }, /// CSV ... Csv(Vec), /// DATEFORMAT \[ AS \] {'dateformat_string' | 'auto' } @@ -8280,8 +9154,11 @@ pub enum CopyLegacyOption { Delimiter(char), /// EMPTYASNULL EmptyAsNull, - /// ENCRYPTED \[ AUTO \] - Encrypted { auto: bool }, + /// `ENCRYPTED \[ AUTO \]` + Encrypted { + /// Whether `AUTO` was specified for encryption. + auto: bool, + }, /// ESCAPE Escape, /// EXTENSION 'extension-name' @@ -8299,12 +9176,15 @@ pub enum CopyLegacyOption { /// JSON \[ AS \] 'json_option' Json(Option), /// MANIFEST \[ VERBOSE \] - Manifest { verbose: bool }, + Manifest { + /// Whether the MANIFEST is verbose. + verbose: bool, + }, /// MAXFILESIZE \[ AS \] max-size \[ MB | GB \] MaxFileSize(FileSize), - /// NULL \[ AS \] 'null_string' + /// `NULL \[ AS \] 'null_string'` Null(String), - /// PARALLEL [ { ON | TRUE } | { OFF | FALSE } ] + /// `PARALLEL [ { ON | TRUE } | { OFF | FALSE } ]` Parallel(Option), /// PARQUET Parquet, @@ -8445,7 +9325,9 @@ impl fmt::Display for CopyLegacyOption { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct FileSize { + /// Numeric size value. pub size: Value, + /// Optional unit for the size (MB or GB). pub unit: Option, } @@ -8459,11 +9341,14 @@ impl fmt::Display for FileSize { } } +/// Units for `FileSize` (MB or GB). #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum FileSizeUnit { + /// Megabytes. MB, + /// Gigabytes. GB, } @@ -8485,7 +9370,9 @@ impl fmt::Display for FileSizeUnit { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct UnloadPartitionBy { + /// Columns used to partition the unload output. pub columns: Vec, + /// Whether to include the partition in the output. pub include: bool, } @@ -8556,13 +9443,18 @@ impl fmt::Display for CopyLegacyCsvOption { } } +/// Objects that can be discarded with `DISCARD`. #[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum DiscardObject { + /// Discard all session state. ALL, + /// Discard cached plans. PLANS, + /// Discard sequence values. SEQUENCES, + /// Discard temporary objects. TEMP, } @@ -8577,22 +9469,36 @@ impl fmt::Display for DiscardObject { } } +/// Types of flush operations supported by `FLUSH`. #[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum FlushType { + /// Flush binary logs. BinaryLogs, + /// Flush engine logs. EngineLogs, + /// Flush error logs. ErrorLogs, + /// Flush general logs. GeneralLogs, + /// Flush hosts information. Hosts, + /// Flush logs. Logs, + /// Flush privileges. Privileges, + /// Flush optimizer costs. OptimizerCosts, + /// Flush relay logs. RelayLogs, + /// Flush slow logs. SlowLogs, + /// Flush status. Status, + /// Flush user resources. UserResources, + /// Flush table data. Tables, } @@ -8616,11 +9522,14 @@ impl fmt::Display for FlushType { } } +/// Location modifier for flush commands. #[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum FlushLocation { + /// Do not write changes to the binary log. NoWriteToBinlog, + /// Apply flush locally. Local, } @@ -8666,7 +9575,9 @@ impl fmt::Display for ContextModifier { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub enum DropFunctionOption { + /// `RESTRICT` option for DROP FUNCTION. Restrict, + /// `CASCADE` option for DROP FUNCTION. Cascade, } @@ -8684,7 +9595,9 @@ impl fmt::Display for DropFunctionOption { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct FunctionDesc { + /// The function name. pub name: ObjectName, + /// Optional list of function arguments. pub args: Option>, } @@ -8703,9 +9616,13 @@ impl fmt::Display for FunctionDesc { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct OperateFunctionArg { + /// Optional argument mode (`IN`, `OUT`, `INOUT`). pub mode: Option, + /// Optional argument identifier/name. pub name: Option, + /// The data type of the argument. pub data_type: DataType, + /// Optional default expression for the argument. pub default_expr: Option, } @@ -8752,8 +9669,11 @@ impl fmt::Display for OperateFunctionArg { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum ArgMode { + /// `IN` mode. In, + /// `OUT` mode. Out, + /// `INOUT` mode. InOut, } @@ -8772,8 +9692,11 @@ impl fmt::Display for ArgMode { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum FunctionBehavior { + /// Function is immutable. Immutable, + /// Function is stable. Stable, + /// Function is volatile. Volatile, } @@ -8794,7 +9717,9 @@ impl fmt::Display for FunctionBehavior { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum FunctionSecurity { + /// Execute the function with the privileges of the user who defined it. Definer, + /// Execute the function with the privileges of the user who invokes it. Invoker, } @@ -8827,7 +9752,9 @@ pub enum FunctionSetValue { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct FunctionDefinitionSetParam { + /// The name of the configuration parameter. pub name: Ident, + /// The value to set for the parameter. pub value: FunctionSetValue, } @@ -8848,8 +9775,11 @@ impl fmt::Display for FunctionDefinitionSetParam { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum FunctionCalledOnNull { + /// Function is called even when inputs are null. CalledOnNullInput, + /// Function returns null when any input is null. ReturnsNullOnNullInput, + /// Function is strict about null inputs. Strict, } @@ -8868,8 +9798,11 @@ impl fmt::Display for FunctionCalledOnNull { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum FunctionParallel { + /// The function is not safe to run in parallel. Unsafe, + /// The function is restricted for parallel execution. Restricted, + /// The function is safe to run in parallel. Safe, } @@ -8890,7 +9823,9 @@ impl fmt::Display for FunctionParallel { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum FunctionDeterminismSpecifier { + /// Function is deterministic. Deterministic, + /// Function is not deterministic. NotDeterministic, } @@ -9010,9 +9945,13 @@ pub enum CreateFunctionBody { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// `USING` clause options for `CREATE FUNCTION` (e.g., JAR, FILE, ARCHIVE). pub enum CreateFunctionUsing { + /// Use a JAR file located at the given URI. Jar(String), + /// Use a file located at the given URI. File(String), + /// Use an archive located at the given URI. Archive(String), } @@ -9035,7 +9974,9 @@ impl fmt::Display for CreateFunctionUsing { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct MacroArg { + /// The argument name. pub name: Ident, + /// Optional default expression for the argument. pub default_expr: Option, } @@ -9062,8 +10003,11 @@ impl fmt::Display for MacroArg { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Definition for a DuckDB macro: either an expression or a table-producing query. pub enum MacroDefinition { + /// The macro is defined as an expression. Expr(Expr), + /// The macro is defined as a table (query). Table(Box), } @@ -9146,12 +10090,16 @@ impl fmt::Display for SearchModifier { } } +/// Represents a `LOCK TABLE` clause with optional alias and lock type. #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct LockTable { + /// The table identifier to lock. pub table: Ident, + /// Optional alias for the table. pub alias: Option, + /// The type of lock to apply to the table. pub lock_type: LockTableType, } @@ -9175,9 +10123,18 @@ impl fmt::Display for LockTable { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// The type of lock used in `LOCK TABLE` statements. pub enum LockTableType { - Read { local: bool }, - Write { low_priority: bool }, + /// Shared/read lock. If `local` is true, it's a local read lock. + Read { + /// Whether the read lock is local. + local: bool, + }, + /// Exclusive/write lock. If `low_priority` is true, the write is low priority. + Write { + /// Whether the write lock is low priority. + low_priority: bool, + }, } impl fmt::Display for LockTableType { @@ -9204,8 +10161,11 @@ impl fmt::Display for LockTableType { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Hive-specific `SET LOCATION` helper used in some `LOAD DATA` statements. pub struct HiveSetLocation { + /// Whether the `SET` keyword was present. pub has_set: bool, + /// The location identifier. pub location: Ident, } @@ -9223,8 +10183,11 @@ impl fmt::Display for HiveSetLocation { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// MySQL `ALTER TABLE` column position specifier: `FIRST` or `AFTER `. pub enum MySQLColumnPosition { + /// Place the column first in the table. First, + /// Place the column after the specified identifier. After(Ident), } @@ -9244,9 +10207,13 @@ impl Display for MySQLColumnPosition { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// MySQL `CREATE VIEW` algorithm options. pub enum CreateViewAlgorithm { + /// `UNDEFINED` algorithm. Undefined, + /// `MERGE` algorithm. Merge, + /// `TEMPTABLE` algorithm. TempTable, } @@ -9263,8 +10230,11 @@ impl Display for CreateViewAlgorithm { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// MySQL `CREATE VIEW` SQL SECURITY options. pub enum CreateViewSecurity { + /// The view runs with the privileges of the definer. Definer, + /// The view runs with the privileges of the invoker. Invoker, } @@ -9284,8 +10254,11 @@ impl Display for CreateViewSecurity { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct CreateViewParams { + /// Optional view algorithm (e.g., MERGE, TEMPTABLE). pub algorithm: Option, + /// Optional definer (the security principal that will own the view). pub definer: Option, + /// Optional SQL SECURITY setting for the view. pub security: Option, } @@ -9320,8 +10293,11 @@ impl Display for CreateViewParams { /// ENGINE = SummingMergeTree([columns]) /// ``` pub struct NamedParenthesizedList { + /// The option key (identifier) for this named list. pub key: Ident, + /// Optional secondary name associated with the key. pub name: Option, + /// The list of identifier values for the key. pub values: Vec, } @@ -9333,11 +10309,14 @@ pub struct NamedParenthesizedList { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct RowAccessPolicy { + /// The fully-qualified policy object name. pub policy: ObjectName, + /// Identifiers for the columns or objects the policy applies to. pub on: Vec, } impl RowAccessPolicy { + /// Create a new `RowAccessPolicy` for the given `policy` and `on` identifiers. pub fn new(policy: ObjectName, on: Vec) -> Self { Self { policy, on } } @@ -9361,11 +10340,14 @@ impl Display for RowAccessPolicy { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct Tag { + /// The tag key (can be qualified). pub key: ObjectName, + /// The tag value as a string. pub value: String, } impl Tag { + /// Create a new `Tag` with the given key and value. pub fn new(key: ObjectName, value: String) -> Self { Self { key, value } } @@ -9384,7 +10366,9 @@ impl Display for Tag { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct ContactEntry { + /// The purpose label for the contact entry. pub purpose: String, + /// The contact information associated with the purpose. pub contact: String, } @@ -9402,6 +10386,7 @@ pub enum CommentDef { /// Includes `=` when printing the comment, as `COMMENT = 'comment'` /// Does not include `=` when printing the comment, as `COMMENT 'comment'` WithEq(String), + /// Comment variant that omits the `=` when displayed. WithoutEq(String), } @@ -9480,7 +10465,9 @@ where #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct UtilityOption { + /// The option name (identifier). pub name: Ident, + /// Optional argument for the option (number, string, keyword, etc.). pub arg: Option, } @@ -9501,10 +10488,15 @@ impl Display for UtilityOption { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct ShowStatementOptions { + /// Optional scope to show in (for example: TABLE, SCHEMA). pub show_in: Option, + /// Optional `STARTS WITH` filter value. pub starts_with: Option, + /// Optional `LIMIT` expression. pub limit: Option, + /// Optional `FROM` value used with `LIMIT`. pub limit_from: Option, + /// Optional filter position (infix or suffix) for `LIKE`/`FILTER`. pub filter_position: Option, } @@ -9546,19 +10538,28 @@ impl Display for ShowStatementOptions { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Where a `SHOW` filter appears relative to the main clause. pub enum ShowStatementFilterPosition { + /// Put the filter in an infix position (e.g. `SHOW COLUMNS LIKE '%name%' IN TABLE tbl`). Infix(ShowStatementFilter), // For example: SHOW COLUMNS LIKE '%name%' IN TABLE tbl + /// Put the filter in a suffix position (e.g. `SHOW COLUMNS IN tbl LIKE '%name%'`). Suffix(ShowStatementFilter), // For example: SHOW COLUMNS IN tbl LIKE '%name%' } #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Parent object types usable with `SHOW ... IN ` clauses. pub enum ShowStatementInParentType { + /// ACCOUNT parent type for SHOW statements. Account, + /// DATABASE parent type for SHOW statements. Database, + /// SCHEMA parent type for SHOW statements. Schema, + /// TABLE parent type for SHOW statements. Table, + /// VIEW parent type for SHOW statements. View, } @@ -9577,9 +10578,13 @@ impl fmt::Display for ShowStatementInParentType { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Represents a `SHOW ... IN` clause with optional parent qualifier and name. pub struct ShowStatementIn { + /// The clause that specifies what to show (e.g. COLUMNS, TABLES). pub clause: ShowStatementInClause, + /// Optional parent type qualifier (ACCOUNT/DATABASE/...). pub parent_type: Option, + /// Optional parent object name for the SHOW clause. #[cfg_attr(feature = "visitor", visit(with = "visit_relation"))] pub parent_name: Option, } @@ -9605,6 +10610,7 @@ pub struct ShowCharset { /// The statement can be written as `SHOW CHARSET` or `SHOW CHARACTER SET` /// true means CHARSET was used and false means CHARACTER SET was used pub is_shorthand: bool, + /// Optional `LIKE`/`WHERE`-style filter for the statement. pub filter: Option, } @@ -9626,8 +10632,11 @@ impl fmt::Display for ShowCharset { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Options for a `SHOW OBJECTS` statement. pub struct ShowObjects { + /// Whether to show terse output. pub terse: bool, + /// Additional options controlling the SHOW output. pub show_options: ShowStatementOptions, } @@ -9644,7 +10653,9 @@ pub struct ShowObjects { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum JsonNullClause { + /// `NULL ON NULL` behavior for JSON functions. NullOnNull, + /// `ABSENT ON NULL` behavior for JSON functions. AbsentOnNull, } @@ -9667,6 +10678,7 @@ impl Display for JsonNullClause { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct JsonReturningClause { + /// The data type to return from the JSON function (e.g. JSON/JSONB). pub data_type: DataType, } @@ -9681,7 +10693,9 @@ impl Display for JsonReturningClause { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct RenameTable { + /// The current name of the object to rename. pub old_name: ObjectName, + /// The new name for the object. pub new_name: ObjectName, } @@ -9727,7 +10741,9 @@ impl fmt::Display for TableObject { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct SetSessionAuthorizationParam { + /// The scope for the `SET SESSION AUTHORIZATION` (e.g., GLOBAL/SESSION). pub scope: ContextModifier, + /// The specific authorization parameter kind. pub kind: SetSessionAuthorizationParamKind, } @@ -9761,10 +10777,15 @@ impl fmt::Display for SetSessionAuthorizationParamKind { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Kind of session parameter being set by `SET SESSION`. pub enum SetSessionParamKind { + /// Generic session parameter (name/value pair). Generic(SetSessionParamGeneric), + /// Identity insert related parameter. IdentityInsert(SetSessionParamIdentityInsert), + /// Offsets-related parameter. Offsets(SetSessionParamOffsets), + /// Statistics-related parameter. Statistics(SetSessionParamStatistics), } @@ -9782,8 +10803,11 @@ impl fmt::Display for SetSessionParamKind { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Generic `SET SESSION` parameter represented as name(s) and value. pub struct SetSessionParamGeneric { + /// Names of the session parameters being set. pub names: Vec, + /// The value to assign to the parameter(s). pub value: String, } @@ -9796,8 +10820,11 @@ impl fmt::Display for SetSessionParamGeneric { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// `IDENTITY_INSERT` session parameter for a specific object. pub struct SetSessionParamIdentityInsert { + /// Object name targeted by `IDENTITY_INSERT`. pub obj: ObjectName, + /// Value (ON/OFF) for the identity insert setting. pub value: SessionParamValue, } @@ -9810,8 +10837,11 @@ impl fmt::Display for SetSessionParamIdentityInsert { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Offsets-related session parameter with keywords and a value. pub struct SetSessionParamOffsets { + /// Keywords specifying which offsets to modify. pub keywords: Vec, + /// Value (ON/OFF) for the offsets setting. pub value: SessionParamValue, } @@ -9829,8 +10859,11 @@ impl fmt::Display for SetSessionParamOffsets { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Statistics-related session parameter specifying topic and value. pub struct SetSessionParamStatistics { + /// Statistics topic to set (IO/PROFILE/TIME/XML). pub topic: SessionParamStatsTopic, + /// Value (ON/OFF) for the statistics topic. pub value: SessionParamValue, } @@ -9843,10 +10876,15 @@ impl fmt::Display for SetSessionParamStatistics { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Topics available for session statistics configuration. pub enum SessionParamStatsTopic { + /// Input/output statistics. IO, + /// Profile statistics. Profile, + /// Time statistics. Time, + /// XML-related statistics. Xml, } @@ -9864,8 +10902,11 @@ impl fmt::Display for SessionParamStatsTopic { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Value for a session boolean-like parameter (ON/OFF). pub enum SessionParamValue { + /// Session parameter enabled. On, + /// Session parameter disabled. Off, } @@ -9888,7 +10929,9 @@ impl fmt::Display for SessionParamValue { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum StorageSerializationPolicy { + /// Use compatible serialization mode. Compatible, + /// Use optimized serialization mode. Optimized, } @@ -9911,7 +10954,9 @@ impl Display for StorageSerializationPolicy { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum CatalogSyncNamespaceMode { + /// Nest namespaces when syncing catalog. Nest, + /// Flatten namespaces when syncing catalog. Flatten, } @@ -9940,7 +10985,9 @@ pub enum CopyIntoSnowflakeKind { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// `PRINT` statement for producing debug/output messages. pub struct PrintStatement { + /// The expression producing the message to print. pub message: Box, } @@ -9958,6 +11005,7 @@ impl fmt::Display for PrintStatement { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct ReturnStatement { + /// Optional return value expression. pub value: Option, } @@ -9975,6 +11023,7 @@ impl fmt::Display for ReturnStatement { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum ReturnStatementValue { + /// Return an expression from a function or trigger. Expr(Expr), } @@ -10000,7 +11049,9 @@ impl fmt::Display for OpenStatement { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum NullInclusion { + /// Include NULL values in the UNPIVOT output. IncludeNulls, + /// Exclude NULL values from the UNPIVOT output. ExcludeNulls, } @@ -10024,7 +11075,9 @@ impl fmt::Display for NullInclusion { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct MemberOf { + /// The value to check for membership. pub value: Box, + /// The JSON array expression to check against. pub array: Box, } @@ -10037,9 +11090,13 @@ impl fmt::Display for MemberOf { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Represents an `EXPORT DATA` statement. pub struct ExportData { + /// Options for the export operation. pub options: Vec, + /// The query producing the data to export. pub query: Box, + /// Optional named connection to use for export. pub connection: Option, } @@ -10074,11 +11131,17 @@ impl fmt::Display for ExportData { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct CreateUser { + /// Replace existing user if present. pub or_replace: bool, + /// Only create the user if it does not already exist. pub if_not_exists: bool, + /// The name of the user to create. pub name: Ident, + /// Key/value options for user creation. pub options: KeyValueOptions, + /// Whether tags are specified using `WITH TAG`. pub with_tags: bool, + /// Tags for the user. pub tags: KeyValueOptions, } @@ -10121,24 +11184,42 @@ impl fmt::Display for CreateUser { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct AlterUser { + /// Whether to only alter the user if it exists. pub if_exists: bool, + /// The name of the user to alter. pub name: Ident, - /// The following fields are Snowflake-specific: + /// Optional new name for the user (Snowflake-specific). + /// See: pub rename_to: Option, + /// Reset the user's password. pub reset_password: bool, + /// Abort all running queries for the user. pub abort_all_queries: bool, + /// Optionally add a delegated role authorization. pub add_role_delegation: Option, + /// Optionally remove a delegated role authorization. pub remove_role_delegation: Option, + /// Enroll the user in MFA. pub enroll_mfa: bool, + /// Set the default MFA method for the user. pub set_default_mfa_method: Option, + /// Remove the user's default MFA method. pub remove_mfa_method: Option, + /// Modify an MFA method for the user. pub modify_mfa_method: Option, + /// Add an MFA OTP method with optional count. pub add_mfa_method_otp: Option, + /// Set a user policy. pub set_policy: Option, + /// Unset a user policy. pub unset_policy: Option, + /// Key/value tag options to set on the user. pub set_tag: KeyValueOptions, + /// Tags to unset on the user. pub unset_tag: Vec, + /// Key/value properties to set on the user. pub set_props: KeyValueOptions, + /// Properties to unset on the user. pub unset_props: Vec, /// The following options are PostgreSQL-specific: pub password: Option, @@ -10151,7 +11232,9 @@ pub struct AlterUser { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct AlterUserAddRoleDelegation { + /// Role name to delegate. pub role: Ident, + /// Security integration receiving the delegation. pub integration: Ident, } @@ -10162,7 +11245,9 @@ pub struct AlterUserAddRoleDelegation { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct AlterUserRemoveRoleDelegation { + /// Optional role name to remove delegation for. pub role: Option, + /// Security integration from which to remove delegation. pub integration: Ident, } @@ -10173,6 +11258,7 @@ pub struct AlterUserRemoveRoleDelegation { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct AlterUserAddMfaMethodOtp { + /// Optional OTP count parameter. pub count: Option, } @@ -10183,7 +11269,9 @@ pub struct AlterUserAddMfaMethodOtp { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct AlterUserModifyMfaMethod { + /// The MFA method being modified. pub method: MfaMethodKind, + /// The new comment for the MFA method. pub comment: String, } @@ -10192,8 +11280,11 @@ pub struct AlterUserModifyMfaMethod { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum MfaMethodKind { + /// PassKey (hardware or platform passkey) MFA method. PassKey, + /// Time-based One-Time Password (TOTP) MFA method. Totp, + /// Duo Security MFA method. Duo, } @@ -10214,7 +11305,9 @@ impl fmt::Display for MfaMethodKind { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct AlterUserSetPolicy { + /// The kind of user policy being set (authentication/password/session). pub policy_kind: UserPolicyKind, + /// The identifier of the policy to apply. pub policy: Ident, } @@ -10223,8 +11316,11 @@ pub struct AlterUserSetPolicy { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum UserPolicyKind { + /// Authentication policy. Authentication, + /// Password policy. Password, + /// Session policy. Session, } @@ -10332,7 +11428,9 @@ impl fmt::Display for AlterUser { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct AlterUserPassword { + /// Whether the password is encrypted. pub encrypted: bool, + /// The password string, or `None` for `NULL`. pub password: Option, } @@ -10374,8 +11472,11 @@ pub enum CreateTableLikeKind { #[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Controls whether defaults are included when creating a table FROM/LILE another. pub enum CreateTableLikeDefaults { + /// Include default values from the source table. Including, + /// Exclude default values from the source table. Excluding, } @@ -10391,8 +11492,11 @@ impl fmt::Display for CreateTableLikeDefaults { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Represents the `LIKE` clause of a `CREATE TABLE` statement. pub struct CreateTableLike { + /// The source table name to copy the schema from. pub name: ObjectName, + /// Optional behavior controlling whether defaults are copied. pub defaults: Option, } @@ -10413,8 +11517,11 @@ impl fmt::Display for CreateTableLike { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum RefreshModeKind { + /// Automatic refresh mode (`AUTO`). Auto, + /// Full refresh mode (`FULL`). Full, + /// Incremental refresh mode (`INCREMENTAL`). Incremental, } @@ -10435,7 +11542,9 @@ impl fmt::Display for RefreshModeKind { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum InitializeKind { + /// Initialize on creation (`ON CREATE`). OnCreate, + /// Initialize on schedule (`ON SCHEDULE`). OnSchedule, } @@ -10458,13 +11567,21 @@ impl fmt::Display for InitializeKind { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct VacuumStatement { + /// Whether `FULL` was specified. pub full: bool, + /// Whether `SORT ONLY` was specified. pub sort_only: bool, + /// Whether `DELETE ONLY` was specified. pub delete_only: bool, + /// Whether `REINDEX` was specified. pub reindex: bool, + /// Whether `RECLUSTER` was specified. pub recluster: bool, + /// Optional table to run `VACUUM` on. pub table_name: Option, + /// Optional threshold value (percent) for `TO threshold PERCENT`. pub threshold: Option, + /// Whether `BOOST` was specified. pub boost: bool, } @@ -10512,6 +11629,7 @@ pub enum Reset { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct ResetStatement { + /// The reset action to perform (either `ALL` or a specific configuration parameter). pub reset: Reset, } diff --git a/src/ast/query.rs b/src/ast/query.rs index efec56ffd4..86f3d13b70 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -120,8 +120,11 @@ impl fmt::Display for Query { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct ProjectionSelect { + /// The list of projected select items. pub projection: Vec, + /// Optional `ORDER BY` clause for the projection-select. pub order_by: Option, + /// Optional `GROUP BY` clause for the projection-select. pub group_by: Option, } @@ -151,17 +154,28 @@ pub enum SetExpr { /// in its body and an optional ORDER BY / LIMIT. Query(Box), /// UNION/EXCEPT/INTERSECT of two queries + /// A set operation combining two query expressions. SetOperation { + /// The set operator used (e.g. `UNION`, `EXCEPT`). op: SetOperator, + /// Optional quantifier (`ALL`, `DISTINCT`, etc.). set_quantifier: SetQuantifier, + /// Left operand of the set operation. left: Box, + /// Right operand of the set operation. right: Box, }, + /// `VALUES (...)` Values(Values), + /// `INSERT` statement Insert(Statement), + /// `UPDATE` statement Update(Statement), + /// `DELETE` statement Delete(Statement), + /// `MERGE` statement Merge(Statement), + /// `TABLE` command Table(Box), } @@ -222,10 +236,15 @@ impl fmt::Display for SetExpr { #[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// A set operator for combining two `SetExpr`s. pub enum SetOperator { + /// `UNION` set operator Union, + /// `EXCEPT` set operator Except, + /// `INTERSECT` set operator Intersect, + /// `MINUS` set operator (non-standard) Minus, } @@ -247,11 +266,17 @@ impl fmt::Display for SetOperator { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum SetQuantifier { + /// `ALL` quantifier All, + /// `DISTINCT` quantifier Distinct, + /// `BY NAME` quantifier ByName, + /// `ALL BY NAME` quantifier AllByName, + /// `DISTINCT BY NAME` quantifier DistinctByName, + /// No quantifier specified None, } @@ -272,8 +297,11 @@ impl fmt::Display for SetQuantifier { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] /// A [`TABLE` command]( https://www.postgresql.org/docs/current/sql-select.html#SQL-TABLE) #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// A (possibly schema-qualified) table reference used in `FROM` clauses. pub struct Table { + /// Optional table name (absent for e.g. `TABLE` command without argument). pub table_name: Option, + /// Optional schema/catalog name qualifying the table. pub schema_name: Option, } @@ -588,6 +616,7 @@ impl fmt::Display for NamedWindowExpr { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// A named window definition: ` AS ` pub struct NamedWindowDefinition(pub Ident, pub NamedWindowExpr); impl fmt::Display for NamedWindowDefinition { @@ -599,10 +628,13 @@ impl fmt::Display for NamedWindowDefinition { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// A `WITH` clause, introducing common table expressions (CTEs). pub struct With { - /// Token for the "WITH" keyword + /// Token for the `WITH` keyword pub with_token: AttachedToken, + /// Whether the `WITH` is recursive (`WITH RECURSIVE`). pub recursive: bool, + /// The list of CTEs declared by this `WITH` clause. pub cte_tables: Vec, } @@ -620,6 +652,7 @@ impl fmt::Display for With { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Indicates whether a CTE is materialized or not. pub enum CteAsMaterialized { /// The `WITH` statement specifies `AS MATERIALIZED` behavior Materialized, @@ -649,11 +682,15 @@ impl fmt::Display for CteAsMaterialized { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct Cte { + /// The CTE alias (name introduced before the `AS` keyword). pub alias: TableAlias, + /// The query that defines the CTE body. pub query: Box, + /// Optional `FROM` identifier for materialized CTEs. pub from: Option, + /// Optional `AS MATERIALIZED` / `AS NOT MATERIALIZED` hint. pub materialized: Option, - /// Token for the closing parenthesis + /// Token for the closing parenthesis of the CTE definition. pub closing_paren_token: AttachedToken, } @@ -708,7 +745,12 @@ pub enum SelectItem { /// Any expression, not followed by `[ AS ] alias` UnnamedExpr(Expr), /// An expression, followed by `[ AS ] alias` - ExprWithAlias { expr: Expr, alias: Ident }, + ExprWithAlias { + /// The expression being projected. + expr: Expr, + /// The alias for the expression. + alias: Ident, + }, /// An expression, followed by a wildcard expansion. /// e.g. `alias.*`, `STRUCT('foo').*` QualifiedWildcard(SelectItemQualifiedWildcardKind, WildcardAdditionalOptions), @@ -737,7 +779,9 @@ impl fmt::Display for SelectItemQualifiedWildcardKind { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct IdentWithAlias { + /// The identifier being aliased. pub ident: Ident, + /// The alias to apply to `ident`. pub alias: Ident, } @@ -815,6 +859,7 @@ impl fmt::Display for WildcardAdditionalOptions { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct IlikeSelectItem { + /// The pattern expression used with `ILIKE`. pub pattern: String, } @@ -954,6 +999,7 @@ impl fmt::Display for ExceptSelectItem { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct ReplaceSelectItem { + /// List of replacement elements contained in the `REPLACE(...)` clause. pub items: Vec>, } @@ -973,8 +1019,11 @@ impl fmt::Display for ReplaceSelectItem { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct ReplaceSelectElement { + /// Expression producing the replacement value. pub expr: Expr, + /// The target column name for the replacement. pub column_name: Ident, + /// Whether the `AS` keyword was present in the original syntax. pub as_keyword: bool, } @@ -1013,8 +1062,11 @@ impl fmt::Display for SelectItem { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// A left table followed by zero or more joins. pub struct TableWithJoins { + /// The starting table factor (left side) of the join chain. pub relation: TableFactor, + /// The sequence of joins applied to the relation. pub joins: Vec, } @@ -1056,8 +1108,11 @@ impl fmt::Display for ConnectBy { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// A single setting key-value pair. pub struct Setting { + /// Setting name/key. pub key: Ident, + /// The value expression assigned to the setting. pub value: Expr, } @@ -1077,7 +1132,9 @@ impl fmt::Display for Setting { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct ExprWithAlias { + /// The expression. pub expr: Expr, + /// Optional alias for the expression. pub alias: Option, } @@ -1102,7 +1159,9 @@ impl fmt::Display for ExprWithAlias { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct ExprWithAliasAndOrderBy { + /// Expression with optional alias. pub expr: ExprWithAlias, + /// Ordering options applied to the expression. pub order_by: OrderByOptions, } @@ -1117,8 +1176,9 @@ impl fmt::Display for ExprWithAliasAndOrderBy { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct TableFunctionArgs { + /// The list of arguments passed to the table-valued function. pub args: Vec, - /// ClickHouse-specific SETTINGS clause. + /// ClickHouse-specific `SETTINGS` clause. /// For example, /// `SELECT * FROM executable('generate_random.py', TabSeparated, 'id UInt32, random String', SETTINGS send_chunk_header = false, pool_size = 16)` /// [`executable` table function](https://clickhouse.com/docs/en/engines/table-functions/executable) @@ -1128,9 +1188,13 @@ pub struct TableFunctionArgs { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Type of index hint (e.g., `USE`, `IGNORE`, `FORCE`). pub enum TableIndexHintType { + /// `USE` hint. Use, + /// `IGNORE` hint. Ignore, + /// `FORCE` hint. Force, } @@ -1147,8 +1211,11 @@ impl fmt::Display for TableIndexHintType { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// The kind of index referenced by an index hint (e.g. `USE INDEX`). pub enum TableIndexType { + /// The `INDEX` kind. Index, + /// The `KEY` kind. Key, } @@ -1164,9 +1231,13 @@ impl fmt::Display for TableIndexType { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Which clause the table index hint applies to. pub enum TableIndexHintForClause { + /// Apply the hint to JOIN clauses. Join, + /// Apply the hint to `ORDER BY` clauses. OrderBy, + /// Apply the hint to `GROUP BY` clauses. GroupBy, } @@ -1183,10 +1254,15 @@ impl fmt::Display for TableIndexHintForClause { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// MySQL-style index hints attached to a table (e.g., `USE INDEX(...)`). pub struct TableIndexHints { + /// Type of hint (e.g., `USE`, `FORCE`, or `IGNORE`). pub hint_type: TableIndexHintType, + /// The index type (e.g., `INDEX`). pub index_type: TableIndexType, + /// Optional `FOR` clause specifying the scope (JOIN / ORDER BY / GROUP BY). pub for_clause: Option, + /// List of index names referred to by the hint. pub index_names: Vec, } @@ -1206,9 +1282,12 @@ impl fmt::Display for TableIndexHints { #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] #[cfg_attr(feature = "visitor", visit(with = "visit_table_factor"))] pub enum TableFactor { + /// A named table or relation, possibly with arguments, hints, or sampling. Table { #[cfg_attr(feature = "visitor", visit(with = "visit_relation"))] + /// Table or relation name. name: ObjectName, + /// Optional alias for the table (e.g. `table AS t`). alias: Option, /// Arguments of a table-valued function, as supported by Postgres /// and MSSQL. Note that deprecated MSSQL `FROM foo (NOLOCK)` syntax @@ -1238,21 +1317,31 @@ pub enum TableFactor { /// See: index_hints: Vec, }, + /// A derived table (a parenthesized subquery), optionally `LATERAL`. Derived { + /// Whether the derived table is LATERAL. lateral: bool, + /// The subquery producing the derived table. subquery: Box, + /// Optional alias for the derived table. alias: Option, }, /// `TABLE()[ AS ]` TableFunction { + /// Expression representing the table function call. expr: Expr, + /// Optional alias for the table function result. alias: Option, }, /// `e.g. LATERAL FLATTEN()[ AS ]` Function { + /// Whether the function is LATERAL. lateral: bool, + /// Name of the table function. name: ObjectName, + /// Arguments passed to the function. args: Vec, + /// Optional alias for the result of the function. alias: Option, }, /// ```sql @@ -1266,10 +1355,15 @@ pub enum TableFactor { /// +---------+--------+ /// ``` UNNEST { + /// Optional alias for the UNNEST table (e.g. `UNNEST(...) AS t`). alias: Option, + /// Expressions producing the arrays to be unnested. array_exprs: Vec, + /// Whether `WITH OFFSET` was specified to include element offsets. with_offset: bool, + /// Optional alias for the offset column when `WITH OFFSET` is used. with_offset_alias: Option, + /// Whether `WITH ORDINALITY` was specified to include ordinality. with_ordinality: bool, }, /// The `JSON_TABLE` table-valued function. @@ -1327,7 +1421,9 @@ pub enum TableFactor { /// The parser may also accept non-standard nesting of bare tables for some /// dialects, but the information about such nesting is stripped from AST. NestedJoin { + /// The nested join expression contained in parentheses. table_with_joins: Box, + /// Optional alias for the nested join. alias: Option, }, /// Represents PIVOT operation on a table. @@ -1336,11 +1432,17 @@ pub enum TableFactor { /// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#pivot_operator) /// [Snowflake](https://docs.snowflake.com/en/sql-reference/constructs/pivot) Pivot { + /// The input table to pivot. table: Box, + /// Aggregate expressions used as pivot values (optionally aliased). aggregate_functions: Vec, // Function expression + /// Columns producing the values to be pivoted. value_column: Vec, + /// Source of pivot values (e.g. list of literals or columns). value_source: PivotValueSource, + /// Optional expression providing a default when a pivot produces NULL. default_on_null: Option, + /// Optional alias for the pivoted table. alias: Option, }, /// An UNPIVOT operation on a table. @@ -1353,17 +1455,24 @@ pub enum TableFactor { /// See . /// See . Unpivot { + /// The input table to unpivot. table: Box, + /// Expression producing the unpivoted value. value: Expr, + /// Identifier used for the generated column name. name: Ident, + /// Columns or expressions to unpivot, optionally aliased. columns: Vec, + /// Whether to include or exclude NULLs during unpivot. null_inclusion: Option, + /// Optional alias for the resulting table. alias: Option, }, /// A `MATCH_RECOGNIZE` operation on a table. /// /// See . MatchRecognize { + /// The input table to apply `MATCH_RECOGNIZE` on. table: Box, /// `PARTITION BY [, ... ]` partition_by: Vec, @@ -1379,6 +1488,7 @@ pub enum TableFactor { pattern: MatchRecognizePattern, /// `DEFINE AS [, ... ]` symbols: Vec, + /// The alias for the table. alias: Option, }, /// The `XMLTABLE` table-valued function. @@ -1453,20 +1563,30 @@ pub enum TableSampleKind { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Represents a `TABLESAMPLE` clause and its options. pub struct TableSample { + /// Modifier (e.g. `SAMPLE` or `TABLESAMPLE`). pub modifier: TableSampleModifier, + /// Optional sampling method name (e.g. `BERNOULLI`, `SYSTEM`). pub name: Option, + /// Optional sampling quantity (value and optional unit). pub quantity: Option, + /// Optional seed clause. pub seed: Option, + /// Optional bucket specification for `BUCKET ... OUT OF ...`-style sampling. pub bucket: Option, + /// Optional offset expression for sampling. pub offset: Option, } #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Modifier specifying whether `SAMPLE` or `TABLESAMPLE` keyword was used. pub enum TableSampleModifier { + /// `SAMPLE` modifier. Sample, + /// `TABLESAMPLE` modifier. TableSample, } @@ -1483,9 +1603,13 @@ impl fmt::Display for TableSampleModifier { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Quantity for a `TABLESAMPLE` clause (e.g. `10 PERCENT` or `(10)`). pub struct TableSampleQuantity { + /// Whether the quantity was wrapped in parentheses. pub parenthesized: bool, + /// The numeric expression specifying the quantity. pub value: Expr, + /// Optional unit (e.g. `PERCENT`, `ROWS`). pub unit: Option, } @@ -1509,10 +1633,15 @@ impl fmt::Display for TableSampleQuantity { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Sampling method used by `TABLESAMPLE`. pub enum TableSampleMethod { + /// `ROW` sampling method. Row, + /// `BERNOULLI` sampling method. Bernoulli, + /// `SYSTEM` sampling method. System, + /// `BLOCK` sampling method. Block, } @@ -1530,8 +1659,11 @@ impl fmt::Display for TableSampleMethod { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// `SEED` or `REPEATABLE` clause used with sampling. pub struct TableSampleSeed { + /// Seed modifier (e.g. `REPEATABLE` or `SEED`). pub modifier: TableSampleSeedModifier, + /// The seed value expression. pub value: Value, } @@ -1545,8 +1677,11 @@ impl fmt::Display for TableSampleSeed { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Modifier specifying how the sample seed is applied. pub enum TableSampleSeedModifier { + /// `REPEATABLE` modifier. Repeatable, + /// `SEED` modifier. Seed, } @@ -1562,8 +1697,11 @@ impl fmt::Display for TableSampleSeedModifier { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Unit used with a `TABLESAMPLE` quantity (rows or percent). pub enum TableSampleUnit { + /// `ROWS` unit. Rows, + /// `PERCENT` unit. Percent, } @@ -1579,9 +1717,13 @@ impl fmt::Display for TableSampleUnit { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Bucket-based sampling clause: `BUCKET OUT OF [ON ]`. pub struct TableSampleBucket { + /// The bucket index expression. pub bucket: Value, + /// The total number of buckets expression. pub total: Value, + /// Optional `ON ` specification. pub on: Option, } @@ -1657,8 +1799,11 @@ impl fmt::Display for PivotValueSource { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// An item in the `MEASURES` clause of `MATCH_RECOGNIZE`. pub struct Measure { + /// Expression producing the measure value. pub expr: Expr, + /// Alias for the measure column. pub alias: Ident, } @@ -1728,6 +1873,7 @@ impl fmt::Display for AfterMatchSkip { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// The mode for handling empty matches in a `MATCH_RECOGNIZE` operation. pub enum EmptyMatchesMode { /// `SHOW EMPTY MATCHES` Show, @@ -1753,8 +1899,11 @@ impl fmt::Display for EmptyMatchesMode { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// A symbol defined in a `MATCH_RECOGNIZE` operation. pub struct SymbolDefinition { + /// The symbol identifier. pub symbol: Ident, + /// The expression defining the symbol. pub definition: Expr, } @@ -2180,12 +2329,15 @@ impl fmt::Display for TableFactor { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// An alias for a table reference, optionally including an explicit `AS` and column names. pub struct TableAlias { /// Tells whether the alias was introduced with an explicit, preceding "AS" /// keyword, e.g. `AS name`. Typically, the keyword is preceding the name /// (e.g. `.. FROM table AS t ..`). pub explicit: bool, + /// Alias identifier for the table. pub name: Ident, + /// Optional column aliases declared in parentheses after the table alias. pub columns: Vec, } @@ -2237,6 +2389,7 @@ impl fmt::Display for TableAliasColumnDef { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Specifies a table version selection, e.g. `FOR SYSTEM_TIME AS OF` or `AT(...)`. pub enum TableVersion { /// When the table version is defined using `FOR SYSTEM_TIME AS OF`. /// For example: `SELECT * FROM tbl FOR SYSTEM_TIME AS OF TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 1 HOUR)` @@ -2264,11 +2417,14 @@ impl Display for TableVersion { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// A single `JOIN` clause including relation and join operator/options. pub struct Join { + /// The joined table factor (table reference or derived table). pub relation: TableFactor, /// ClickHouse supports the optional `GLOBAL` keyword before the join operator. /// See [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/select/join) pub global: bool, + /// The join operator and its constraint (INNER/LEFT/RIGHT/CROSS/ASOF/etc.). pub join_operator: JoinOperator, } @@ -2405,41 +2561,50 @@ impl fmt::Display for Join { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// The operator used for joining two tables, e.g. `INNER`, `LEFT`, `CROSS`, `ASOF`, etc. pub enum JoinOperator { + /// Generic `JOIN` with an optional constraint. Join(JoinConstraint), + /// `INNER JOIN` with an optional constraint. Inner(JoinConstraint), + /// `LEFT JOIN` with an optional constraint. Left(JoinConstraint), + /// `LEFT OUTER JOIN` with an optional constraint. LeftOuter(JoinConstraint), + /// `RIGHT JOIN` with an optional constraint. Right(JoinConstraint), + /// `RIGHT OUTER JOIN` with an optional constraint. RightOuter(JoinConstraint), + /// `FULL OUTER JOIN` with an optional constraint. FullOuter(JoinConstraint), - /// CROSS (constraint is non-standard) + /// `CROSS JOIN` (constraint usage is non-standard). CrossJoin(JoinConstraint), - /// SEMI (non-standard) + /// `SEMI JOIN` (non-standard) Semi(JoinConstraint), - /// LEFT SEMI (non-standard) + /// `LEFT SEMI JOIN` (non-standard) LeftSemi(JoinConstraint), - /// RIGHT SEMI (non-standard) + /// `RIGHT SEMI JOIN` (non-standard) RightSemi(JoinConstraint), - /// ANTI (non-standard) + /// `ANTI JOIN` (non-standard) Anti(JoinConstraint), - /// LEFT ANTI (non-standard) + /// `LEFT ANTI JOIN` (non-standard) LeftAnti(JoinConstraint), - /// RIGHT ANTI (non-standard) + /// `RIGHT ANTI JOIN` (non-standard) RightAnti(JoinConstraint), - /// CROSS APPLY (non-standard) + /// `CROSS APPLY` (non-standard) CrossApply, - /// OUTER APPLY (non-standard) + /// `OUTER APPLY` (non-standard) OuterApply, - /// `ASOF` joins are used for joining tables containing time-series data - /// whose timestamp columns do not match exactly. + /// `ASOF` joins are used for joining time-series tables whose timestamp columns do not match exactly. /// /// See . AsOf { + /// Condition used to match records in the `ASOF` join. match_condition: Expr, + /// Additional constraint applied to the `ASOF` join. constraint: JoinConstraint, }, - /// STRAIGHT_JOIN (non-standard) + /// `STRAIGHT_JOIN` (MySQL non-standard behavior) /// /// See . StraightJoin(JoinConstraint), @@ -2448,35 +2613,42 @@ pub enum JoinOperator { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Represents how two tables are constrained in a join: `ON`, `USING`, `NATURAL`, or none. pub enum JoinConstraint { + /// `ON ` join condition. On(Expr), + /// `USING(...)` list of column names. Using(Vec), + /// `NATURAL` join (columns matched automatically). Natural, + /// No constraint specified (e.g. `CROSS JOIN`). None, } #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// The kind of `ORDER BY` clause: either `ALL` with modifiers or a list of expressions. pub enum OrderByKind { - /// ALL syntax of [DuckDB] and [ClickHouse]. + /// `GROUP BY ALL`/`ORDER BY ALL` syntax with optional modifiers. /// /// [DuckDB]: /// [ClickHouse]: All(OrderByOptions), - /// Expressions + /// A standard list of ordering expressions. Expressions(Vec), } #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Represents an `ORDER BY` clause with its kind and optional `INTERPOLATE`. pub struct OrderBy { + /// The kind of ordering (expressions or `ALL`). pub kind: OrderByKind, - /// Optional: `INTERPOLATE` - /// Supported by [ClickHouse syntax] + /// Optional `INTERPOLATE` clause (ClickHouse extension). pub interpolate: Option, } @@ -2508,10 +2680,11 @@ impl fmt::Display for OrderBy { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct OrderByExpr { + /// The expression to order by. pub expr: Expr, + /// Ordering options such as `ASC`/`DESC` and `NULLS` behavior. pub options: OrderByOptions, - /// Optional: `WITH FILL` - /// Supported by [ClickHouse syntax]: + /// Optional `WITH FILL` clause (ClickHouse extension) which specifies how to fill gaps. pub with_fill: Option, } @@ -2542,9 +2715,13 @@ impl fmt::Display for OrderByExpr { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// `WITH FILL` options for ClickHouse `ORDER BY` expressions. pub struct WithFill { + /// Optional lower bound expression for the fill range (`FROM `). pub from: Option, + /// Optional upper bound expression for the fill range (`TO `). pub to: Option, + /// Optional step expression specifying interpolation step (`STEP `). pub step: Option, } @@ -2571,15 +2748,20 @@ impl fmt::Display for WithFill { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// An expression used by `WITH FILL`/`INTERPOLATE` to specify interpolation for a column. pub struct InterpolateExpr { + /// The column to interpolate. pub column: Ident, + /// Optional `AS ` expression specifying how to compute interpolated values. pub expr: Option, } #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// `INTERPOLATE` clause used with ClickHouse `WITH FILL` to compute missing values. pub struct Interpolate { + /// Optional list of interpolation expressions. pub exprs: Option>, } @@ -2596,10 +2778,11 @@ impl fmt::Display for InterpolateExpr { #[derive(Default, Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Options for an `ORDER BY` expression (ASC/DESC and NULLS FIRST/LAST). pub struct OrderByOptions { - /// Optional `ASC` or `DESC` + /// Optional `ASC` (`Some(true)`) or `DESC` (`Some(false)`). pub asc: Option, - /// Optional `NULLS FIRST` or `NULLS LAST` + /// Optional `NULLS FIRST` (`Some(true)`) or `NULLS LAST` (`Some(false)`). pub nulls_first: Option, } @@ -2622,26 +2805,26 @@ impl fmt::Display for OrderByOptions { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Represents the different syntactic forms of `LIMIT` clauses. pub enum LimitClause { - /// Standard SQL syntax + /// Standard SQL `LIMIT` syntax (optionally `BY` and `OFFSET`). /// /// `LIMIT [BY ,,...] [OFFSET ]` LimitOffset { - /// `LIMIT { | ALL }` + /// `LIMIT { | ALL }` expression. limit: Option, - /// `OFFSET [ { ROW | ROWS } ]` + /// Optional `OFFSET` expression with optional `ROW(S)` keyword. offset: Option, - /// `BY { ,,... } }` - /// - /// [ClickHouse](https://clickhouse.com/docs/sql-reference/statements/select/limit-by) + /// Optional `BY { ,... }` list used by some dialects (ClickHouse). limit_by: Vec, }, - /// [MySQL]-specific syntax; the order of expressions is reversed. - /// - /// `LIMIT , ` - /// - /// [MySQL]: https://dev.mysql.com/doc/refman/8.4/en/select.html - OffsetCommaLimit { offset: Expr, limit: Expr }, + /// MySQL-specific syntax: `LIMIT , ` (order reversed). + OffsetCommaLimit { + /// The offset expression. + offset: Expr, + /// The limit expression. + limit: Expr, + }, } impl fmt::Display for LimitClause { @@ -2674,8 +2857,11 @@ impl fmt::Display for LimitClause { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// `OFFSET` clause consisting of a value and a rows specifier. pub struct Offset { + /// The numeric expression following `OFFSET`. pub value: Expr, + /// Whether the offset uses `ROW`/`ROWS` or omits it. pub rows: OffsetRows, } @@ -2690,9 +2876,11 @@ impl fmt::Display for Offset { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum OffsetRows { - /// Omitting ROW/ROWS is non-standard MySQL quirk. + /// Omitting `ROW`/`ROWS` entirely (non-standard MySQL quirk). None, + /// `ROW` keyword present. Row, + /// `ROWS` keyword present. Rows, } @@ -2726,45 +2914,71 @@ pub enum PipeOperator { /// Syntax: `|> LIMIT [OFFSET ]` /// /// See more at - Limit { expr: Expr, offset: Option }, + Limit { + /// The expression specifying the number of rows to return. + expr: Expr, + /// Optional offset expression provided inline with `LIMIT`. + offset: Option, + }, /// Filters the results of the input table. /// /// Syntax: `|> WHERE ` /// /// See more at - Where { expr: Expr }, + Where { + /// The filter expression. + expr: Expr, + }, /// `ORDER BY [ASC|DESC], ...` - OrderBy { exprs: Vec }, + OrderBy { + /// The ordering expressions. + exprs: Vec, + }, /// Produces a new table with the listed columns, similar to the outermost SELECT clause in a table subquery in standard syntax. /// /// Syntax `|> SELECT [[AS] alias], ...` /// /// See more at - Select { exprs: Vec }, + Select { + /// The select items to produce. + exprs: Vec, + }, /// Propagates the existing table and adds computed columns, similar to SELECT *, new_column in standard syntax. /// /// Syntax: `|> EXTEND [[AS] alias], ...` /// /// See more at - Extend { exprs: Vec }, + Extend { + /// Expressions defining added columns. + exprs: Vec, + }, /// Replaces the value of a column in the current table, similar to SELECT * REPLACE (expression AS column) in standard syntax. /// /// Syntax: `|> SET = , ...` /// /// See more at - Set { assignments: Vec }, + Set { + /// Assignments to apply (`column = expr`). + assignments: Vec, + }, /// Removes listed columns from the current table, similar to SELECT * EXCEPT (column) in standard syntax. /// /// Syntax: `|> DROP , ...` /// /// See more at - Drop { columns: Vec }, + Drop { + /// Columns to drop. + columns: Vec, + }, /// Introduces a table alias for the input table, similar to applying the AS alias clause on a table subquery in standard syntax. /// /// Syntax: `|> AS ` /// /// See more at - As { alias: Ident }, + As { + /// Alias to assign to the input table. + alias: Ident, + }, /// Performs aggregation on data across grouped rows or an entire table. /// /// Syntax: `|> AGGREGATE [[AS] alias], ...` @@ -2777,26 +2991,36 @@ pub enum PipeOperator { /// /// See more at Aggregate { + /// Expressions computed for each row prior to grouping. full_table_exprs: Vec, + /// Grouping expressions for aggregation. group_by_expr: Vec, }, /// Selects a random sample of rows from the input table. /// Syntax: `|> TABLESAMPLE SYSTEM (10 PERCENT) /// See more at - TableSample { sample: Box }, + TableSample { + /// Sampling clause describing the sample. + sample: Box, + }, /// Renames columns in the input table. /// /// Syntax: `|> RENAME old_name AS new_name, ...` /// /// See more at - Rename { mappings: Vec }, + Rename { + /// Mappings of old to new identifiers. + mappings: Vec, + }, /// Combines the input table with one or more tables using UNION. /// /// Syntax: `|> UNION [ALL|DISTINCT] (), (), ...` /// /// See more at Union { + /// Set quantifier (`ALL` or `DISTINCT`). set_quantifier: SetQuantifier, + /// The queries to combine with `UNION`. queries: Vec, }, /// Returns only the rows that are present in both the input table and the specified tables. @@ -2805,7 +3029,9 @@ pub enum PipeOperator { /// /// See more at Intersect { + /// Set quantifier for the `INTERSECT` operator. set_quantifier: SetQuantifier, + /// The queries to intersect. queries: Vec, }, /// Returns only the rows that are present in the input table but not in the specified tables. @@ -2814,7 +3040,9 @@ pub enum PipeOperator { /// /// See more at Except { + /// Set quantifier for the `EXCEPT` operator. set_quantifier: SetQuantifier, + /// The queries to exclude from the input set. queries: Vec, }, /// Calls a table function or procedure that returns a table. @@ -2823,7 +3051,9 @@ pub enum PipeOperator { /// /// See more at Call { + /// The function or procedure to call which returns a table. function: Function, + /// Optional alias for the result table. alias: Option, }, /// Pivots data from rows to columns. @@ -2832,9 +3062,13 @@ pub enum PipeOperator { /// /// See more at Pivot { + /// Aggregate functions to compute during pivot. aggregate_functions: Vec, + /// Column(s) that provide the pivot values. value_column: Vec, + /// The source of pivot values (literal list or subquery). value_source: PivotValueSource, + /// Optional alias for the output. alias: Option, }, /// The `UNPIVOT` pipe operator transforms columns into rows. @@ -2846,9 +3080,13 @@ pub enum PipeOperator { /// /// See more at Unpivot { + /// Output column that will receive the unpivoted value. value_column: Ident, + /// Column name holding the unpivoted column name. name_column: Ident, + /// Columns to unpivot. unpivot_columns: Vec, + /// Optional alias for the unpivot result. alias: Option, }, /// Joins the input table with another table. @@ -2999,9 +3237,13 @@ impl PipeOperator { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// `FETCH` clause options. pub struct Fetch { + /// `WITH TIES` option is present. pub with_ties: bool, + /// `PERCENT` modifier is present. pub percent: bool, + /// Optional quantity expression (e.g. `FETCH FIRST 10 ROWS`). pub quantity: Option, } @@ -3020,9 +3262,13 @@ impl fmt::Display for Fetch { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// `FOR ...` locking clause. pub struct LockClause { + /// The kind of lock requested (e.g. `SHARE`, `UPDATE`). pub lock_type: LockType, + /// Optional object name after `OF` (e.g. `FOR UPDATE OF t1`). pub of: Option, + /// Optional non-blocking behavior (`NOWAIT` / `SKIP LOCKED`). pub nonblock: Option, } @@ -3042,8 +3288,11 @@ impl fmt::Display for LockClause { #[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// The lock type used in `FOR ` clauses (e.g. `FOR SHARE`, `FOR UPDATE`). pub enum LockType { + /// `SHARE` lock (shared lock). Share, + /// `UPDATE` lock (exclusive/update lock). Update, } @@ -3060,8 +3309,11 @@ impl fmt::Display for LockType { #[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Non-blocking lock options for `FOR ...` clauses. pub enum NonBlock { + /// `NOWAIT` — do not wait for the lock. Nowait, + /// `SKIP LOCKED` — skip rows that are locked. SkipLocked, } @@ -3078,11 +3330,12 @@ impl fmt::Display for NonBlock { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// `DISTINCT` or `DISTINCT ON (...)` modifiers for `SELECT` lists. pub enum Distinct { - /// DISTINCT + /// `DISTINCT` (remove duplicate rows) Distinct, - /// DISTINCT ON({column names}) + /// `DISTINCT ON (...)` (Postgres extension) On(Vec), } @@ -3101,22 +3354,25 @@ impl fmt::Display for Distinct { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// MSSQL `TOP` clause options. pub struct Top { /// SQL semantic equivalent of LIMIT but with same structure as FETCH. /// MSSQL only. pub with_ties: bool, - /// MSSQL only. + /// Apply `PERCENT` extension. pub percent: bool, + /// The optional quantity (expression or constant) following `TOP`. pub quantity: Option, } #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Quantity used in a `TOP` clause: either an expression or a constant. pub enum TopQuantity { - // A parenthesized expression. MSSQL only. + /// A parenthesized expression (MSSQL syntax: `TOP (expr)`). Expr(Expr), - // An unparenthesized integer constant. + /// An unparenthesized integer constant: `TOP 10`. Constant(u64), } @@ -3140,13 +3396,15 @@ impl fmt::Display for Top { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// An explicit `VALUES` clause and its rows. pub struct Values { - /// Was there an explicit ROWs keyword (MySQL)? + /// Was there an explicit `ROW` keyword (MySQL)? /// pub explicit_row: bool, - // MySql supports both VALUES and VALUE keywords. - // + /// `true` if `VALUE` (singular) keyword was used instead of `VALUES`. + /// pub value_keyword: bool, + /// The list of rows, each row is a list of expressions. pub rows: Vec>, } @@ -3171,10 +3429,15 @@ impl fmt::Display for Values { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// `SELECT INTO` clause options. pub struct SelectInto { + /// `TEMPORARY` modifier. pub temporary: bool, + /// `UNLOGGED` modifier. pub unlogged: bool, + /// `TABLE` keyword present. pub table: bool, + /// Name of the target table. pub name: ObjectName, } @@ -3195,12 +3458,15 @@ impl fmt::Display for SelectInto { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Modifiers used with `GROUP BY` such as `WITH ROLLUP` or `WITH CUBE`. pub enum GroupByWithModifier { + /// `WITH ROLLUP` modifier. Rollup, + /// `WITH CUBE` modifier. Cube, + /// `WITH TOTALS` modifier (ClickHouse). Totals, - /// Hive supports GROUP BY GROUPING SETS syntax. - /// e.g. GROUP BY year , month GROUPING SETS((year,month),(year),(month)) + /// Hive supports GROUPING SETS syntax, e.g. `GROUP BY GROUPING SETS(...)`. /// /// [Hive]: GroupingSets(Expr), @@ -3222,6 +3488,8 @@ impl fmt::Display for GroupByWithModifier { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Represents the two syntactic forms that `GROUP BY` can take, including +/// `GROUP BY ALL` with optional modifiers and ordinary `GROUP BY `. pub enum GroupByExpr { /// ALL syntax of [Snowflake], [DuckDB] and [ClickHouse]. /// @@ -3233,8 +3501,7 @@ pub enum GroupByExpr { /// /// [ClickHouse]: All(Vec), - - /// Expressions + /// `GROUP BY ` with optional modifiers. Expressions(Vec, Vec), } @@ -3261,14 +3528,16 @@ impl fmt::Display for GroupByExpr { } } -/// FORMAT identifier or FORMAT NULL clause, specific to ClickHouse. +/// `FORMAT` identifier or `FORMAT NULL` clause, specific to ClickHouse. /// /// [ClickHouse]: #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum FormatClause { + /// The format identifier. Identifier(Ident), + /// `FORMAT NULL` clause. Null, } @@ -3288,7 +3557,9 @@ impl fmt::Display for FormatClause { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct InputFormatClause { + /// The format identifier. pub ident: Ident, + /// Optional format parameters. pub values: Vec, } @@ -3304,24 +3575,35 @@ impl fmt::Display for InputFormatClause { } } -/// FOR XML or FOR JSON clause, specific to MSSQL -/// (formats the output of a query as XML or JSON) +/// `FOR XML` or `FOR JSON` clause (MSSQL): formats the output of a query as XML or JSON. #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum ForClause { + /// `FOR BROWSE` clause. Browse, + /// `FOR JSON ...` clause and its options. Json { + /// JSON mode (`AUTO` or `PATH`). for_json: ForJson, + /// Optional `ROOT('...')` parameter. root: Option, + /// `INCLUDE_NULL_VALUES` flag. include_null_values: bool, + /// `WITHOUT_ARRAY_WRAPPER` flag. without_array_wrapper: bool, }, + /// `FOR XML ...` clause and its options. Xml { + /// XML mode (`RAW`, `AUTO`, `EXPLICIT`, `PATH`). for_xml: ForXml, + /// `ELEMENTS` flag. elements: bool, + /// `BINARY BASE64` flag. binary_base64: bool, + /// Optional `ROOT('...')` parameter. root: Option, + /// `TYPE` flag. r#type: bool, }, } @@ -3379,10 +3661,15 @@ impl fmt::Display for ForClause { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Modes for `FOR XML` clause. pub enum ForXml { + /// `RAW` mode with optional root name: `RAW('root')`. Raw(Option), + /// `AUTO` mode. Auto, + /// `EXPLICIT` mode. Explicit, + /// `PATH` mode with optional root: `PATH('root')`. Path(Option), } @@ -3412,8 +3699,11 @@ impl fmt::Display for ForXml { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +/// Modes for `FOR JSON` clause. pub enum ForJson { + /// `AUTO` mode. Auto, + /// `PATH` mode. Path, } @@ -3478,8 +3768,11 @@ impl fmt::Display for JsonTableColumn { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +/// A nested column in a `JSON_TABLE` column list. pub struct JsonTableNestedColumn { + /// JSON path expression (must be a literal `Value`). pub path: Value, + /// Columns extracted from the matched nested array. pub columns: Vec, } @@ -3544,9 +3837,13 @@ impl fmt::Display for JsonTableNamedColumn { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +/// Error/empty-value handling for `JSON_TABLE` columns. pub enum JsonTableColumnErrorHandling { + /// `NULL` — return NULL when the path does not match. Null, + /// `DEFAULT ` — use the provided `Value` as a default. Default(Value), + /// `ERROR` — raise an error. Error, } @@ -3605,10 +3902,15 @@ impl fmt::Display for OpenJsonTableColumn { #[derive(Debug, Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Mode of BigQuery value tables, e.g. `AS STRUCT` or `AS VALUE`. pub enum ValueTableMode { + /// `AS STRUCT` AsStruct, + /// `AS VALUE` AsValue, + /// `DISTINCT AS STRUCT` DistinctAsStruct, + /// `DISTINCT AS VALUE` DistinctAsValue, } @@ -3711,10 +4013,14 @@ impl fmt::Display for XmlTableColumn { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +/// Argument passed in the `XMLTABLE PASSING` clause. pub struct XmlPassingArgument { + /// Expression to pass to the XML table. pub expr: Expr, + /// Optional alias for the argument. pub alias: Option, - pub by_value: bool, // True if BY VALUE is specified + /// `true` if `BY VALUE` is specified for the argument. + pub by_value: bool, } impl fmt::Display for XmlPassingArgument { @@ -3734,7 +4040,9 @@ impl fmt::Display for XmlPassingArgument { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +/// The PASSING clause for `XMLTABLE`. pub struct XmlPassingClause { + /// The list of passed arguments. pub arguments: Vec, } diff --git a/src/ast/spans.rs b/src/ast/spans.rs index f88b302965..488c886249 100644 --- a/src/ast/spans.rs +++ b/src/ast/spans.rs @@ -405,6 +405,7 @@ impl Spanned for Statement { Statement::AlterType { .. } => Span::empty(), Statement::AlterOperator { .. } => Span::empty(), Statement::AlterOperatorFamily { .. } => Span::empty(), + Statement::AlterOperatorClass { .. } => Span::empty(), Statement::AlterRole { .. } => Span::empty(), Statement::AlterSession { .. } => Span::empty(), Statement::AttachDatabase { .. } => Span::empty(), diff --git a/src/ast/table_constraints.rs b/src/ast/table_constraints.rs index ddf0c12539..cb3c2376d6 100644 --- a/src/ast/table_constraints.rs +++ b/src/ast/table_constraints.rs @@ -155,10 +155,13 @@ impl fmt::Display for TableConstraint { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// A `CHECK` constraint (`[ CONSTRAINT ] CHECK () [[NOT] ENFORCED]`). pub struct CheckConstraint { + /// Optional constraint name. pub name: Option, + /// The boolean expression the CHECK constraint enforces. pub expr: Box, - /// MySQL-specific syntax + /// MySQL-specific `ENFORCED` / `NOT ENFORCED` flag. /// pub enforced: Option, } @@ -197,16 +200,24 @@ impl crate::ast::Spanned for CheckConstraint { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct ForeignKeyConstraint { + /// Optional constraint name. pub name: Option, - /// MySQL-specific field + /// MySQL-specific index name associated with the foreign key. /// pub index_name: Option, + /// Columns in the local table that participate in the foreign key. pub columns: Vec, + /// Referenced foreign table name. pub foreign_table: ObjectName, + /// Columns in the referenced table. pub referred_columns: Vec, + /// Action to perform `ON DELETE`. pub on_delete: Option, + /// Action to perform `ON UPDATE`. pub on_update: Option, + /// Optional `MATCH` kind (FULL | PARTIAL | SIMPLE). pub match_kind: Option, + /// Optional characteristics (e.g., `DEFERRABLE`). pub characteristics: Option, } @@ -344,6 +355,7 @@ pub struct IndexConstraint { /// Referred column identifier list. pub columns: Vec, /// Optional index options such as `USING`; see [`IndexOption`]. + /// Options applied to the index (e.g., `COMMENT`, `WITH` options). pub index_options: Vec, } @@ -413,7 +425,9 @@ pub struct PrimaryKeyConstraint { pub index_type: Option, /// Identifiers of the columns that form the primary key. pub columns: Vec, + /// Optional index options such as `USING`. pub index_options: Vec, + /// Optional characteristics like `DEFERRABLE`. pub characteristics: Option, } @@ -458,6 +472,7 @@ impl crate::ast::Spanned for PrimaryKeyConstraint { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Unique constraint definition. pub struct UniqueConstraint { /// Constraint name. /// @@ -473,7 +488,9 @@ pub struct UniqueConstraint { pub index_type: Option, /// Identifiers of the columns that are unique. pub columns: Vec, + /// Optional index options such as `USING`. pub index_options: Vec, + /// Optional characteristics like `DEFERRABLE`. pub characteristics: Option, /// Optional Postgres nulls handling: `[ NULLS [ NOT ] DISTINCT ]` pub nulls_distinct: NullsDistinctOption, diff --git a/src/ast/trigger.rs b/src/ast/trigger.rs index 2c64e42393..8c189a3378 100644 --- a/src/ast/trigger.rs +++ b/src/ast/trigger.rs @@ -23,7 +23,9 @@ use super::*; #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum TriggerObject { + /// The trigger fires once for each row affected by the triggering event Row, + /// The trigger fires once for the triggering SQL statement Statement, } @@ -36,12 +38,14 @@ impl fmt::Display for TriggerObject { } } -/// This clause indicates whether the following relation name is for the before-image transition relation or the after-image transition relation #[derive(Debug, Clone, Copy, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// This clause indicates whether the following relation name is for the before-image transition relation or the after-image transition relation pub enum TriggerReferencingType { + /// The transition relation containing the old rows affected by the triggering statement OldTable, + /// The transition relation containing the new rows affected by the triggering statement NewTable, } @@ -59,8 +63,11 @@ impl fmt::Display for TriggerReferencingType { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct TriggerReferencing { + /// The referencing type (`OLD TABLE` or `NEW TABLE`). pub refer_type: TriggerReferencingType, + /// True if the `AS` keyword is present in the referencing clause. pub is_as: bool, + /// The transition relation name provided by the referencing clause. pub transition_relation_name: ObjectName, } @@ -81,9 +88,13 @@ impl fmt::Display for TriggerReferencing { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum TriggerEvent { + /// Trigger on INSERT event Insert, + /// Trigger on UPDATE event, with optional list of columns Update(Vec), + /// Trigger on DELETE event Delete, + /// Trigger on TRUNCATE event Truncate, } @@ -110,9 +121,13 @@ impl fmt::Display for TriggerEvent { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum TriggerPeriod { + /// The trigger fires once for each row affected by the triggering event For, + /// The trigger fires once for the triggering SQL statement After, + /// The trigger fires before the triggering event Before, + /// The trigger fires instead of the triggering event InsteadOf, } @@ -132,7 +147,9 @@ impl fmt::Display for TriggerPeriod { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum TriggerExecBodyType { + /// Execute a function Function, + /// Execute a procedure Procedure, } @@ -149,7 +166,9 @@ impl fmt::Display for TriggerExecBodyType { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct TriggerExecBody { + /// Whether the body is a `FUNCTION` or `PROCEDURE` invocation. pub exec_type: TriggerExecBodyType, + /// Description of the function/procedure to execute. pub func_desc: FunctionDesc, } diff --git a/src/ast/value.rs b/src/ast/value.rs index ccbb12a332..dc46a5bbb9 100644 --- a/src/ast/value.rs +++ b/src/ast/value.rs @@ -64,11 +64,14 @@ use sqlparser_derive::{Visit, VisitMut}; /// // convert back to `Value` /// let value: Value = value_with_span.into(); /// ``` +/// A `Value` paired with its source `Span` location. #[derive(Debug, Clone, Eq)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct ValueWithSpan { + /// The wrapped `Value`. pub value: Value, + /// The source `Span` covering the token(s) that produced the value. pub span: Span, } @@ -121,13 +124,14 @@ pub enum Value { #[cfg(not(feature = "bigdecimal"))] Number(String, bool), #[cfg(feature = "bigdecimal")] - // HINT: use `test_utils::number` to make an instance of - // Value::Number This might help if you your tests pass locally - // but fail on CI with the `--all-features` flag enabled + /// HINT: use `test_utils::number` to make an instance of + /// Value::Number This might help if you your tests pass locally + /// but fail on CI with the `--all-features` flag enabled + /// Numeric literal (uses `BigDecimal` when the `bigdecimal` feature is enabled). Number(BigDecimal, bool), /// 'string value' SingleQuotedString(String), - // $$string value$$ (postgres syntax) + /// Dollar-quoted string literal, e.g. `$$...$$` or `$tag$...$tag$` (Postgres syntax). DollarQuotedString(DollarQuotedString), /// Triple single quoted strings: Example '''abc''' /// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/lexical#quoted_literals) @@ -176,6 +180,7 @@ pub enum Value { /// X'hex value' HexStringLiteral(String), + /// Double quoted string literal, e.g. `"abc"`. DoubleQuotedString(String), /// Boolean value true or false Boolean(bool), @@ -219,10 +224,12 @@ impl Value { } } + /// Attach the provided `span` to this `Value` and return `ValueWithSpan`. pub fn with_span(self, span: Span) -> ValueWithSpan { ValueWithSpan { value: self, span } } + /// Convenience for attaching an empty span to this `Value`. pub fn with_empty_span(self) -> ValueWithSpan { self.with_span(Span::empty()) } @@ -268,11 +275,14 @@ impl fmt::Display for Value { } } +/// A dollar-quoted string literal, e.g. `$$...$$` or `$tag$...$tag$`. #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct DollarQuotedString { + /// Inner string contents. pub value: String, + /// Optional tag used in the opening/closing delimiter. pub tag: Option, } @@ -311,59 +321,102 @@ impl fmt::Display for QuoteDelimitedString { } } +/// Represents the date/time fields used by functions like `EXTRACT`. +/// +/// Each variant corresponds to a supported date/time part (for example +/// `YEAR`, `MONTH`, `DAY`, etc.). The `Custom` variant allows arbitrary +/// identifiers (e.g. dialect-specific abbreviations). #[derive(Debug, Clone, PartialEq, Eq, Ord, PartialOrd, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum DateTimeField { + /// `YEAR` Year, + /// `YEARS` (plural form) Years, + /// `MONTH` Month, + /// `MONTHS` (plural form) Months, - /// Week optionally followed by a WEEKDAY. - /// - /// ```sql - /// WEEK(MONDAY) - /// ``` + /// `WEEK`, optionally followed by a weekday, e.g. `WEEK(MONDAY)`. /// /// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/date_functions#extract) Week(Option), + /// `WEEKS` (plural form) Weeks, + /// `DAY` Day, + /// `DAYOFWEEK` DayOfWeek, + /// `DAYOFYEAR` DayOfYear, + /// `DAYS` (plural form) Days, + /// `DATE` Date, + /// `DATETIME` Datetime, + /// `HOUR` Hour, + /// `HOURS` (plural form) Hours, + /// `MINUTE` Minute, + /// `MINUTES` (plural form) Minutes, + /// `SECOND` Second, + /// `SECONDS` (plural form) Seconds, + /// `CENTURY` Century, + /// `DECADE` Decade, + /// `DOW` (day of week short form) Dow, + /// `DOY` (day of year short form) Doy, + /// `EPOCH` Epoch, + /// `ISODOW` Isodow, - IsoWeek, + /// `ISOYEAR` Isoyear, + /// `ISOWEEK` + IsoWeek, + /// `JULIAN` Julian, + /// `MICROSECOND` Microsecond, + /// `MICROSECONDS` (plural form) Microseconds, + /// `MILLENIUM` (alternate spelling) Millenium, + /// `MILLENNIUM` (alternate spelling) Millennium, + /// `MILLISECOND` Millisecond, + /// `MILLISECONDS` (plural form) Milliseconds, + /// `NANOSECOND` Nanosecond, + /// `NANOSECONDS` (plural form) Nanoseconds, + /// `QUARTER` Quarter, + /// `TIME` Time, + /// `TIMEZONE` Timezone, + /// `TIMEZONE_ABBR` TimezoneAbbr, + /// `TIMEZONE_HOUR` TimezoneHour, + /// `TIMEZONE_MINUTE` TimezoneMinute, + /// `TIMEZONE_REGION` TimezoneRegion, + /// `NODATETIME` indicates no date/time part NoDateTime, /// Arbitrary abbreviation or custom date-time part. /// @@ -523,14 +576,18 @@ impl fmt::Display for EscapeQuotedString<'_> { } } +/// Return a helper which formats `string` for inclusion inside a quoted +/// literal that uses `quote` as the delimiter. pub fn escape_quoted_string(string: &str, quote: char) -> EscapeQuotedString<'_> { EscapeQuotedString { string, quote } } +/// Convenience wrapper for escaping strings for single-quoted literals (`'`). pub fn escape_single_quote_string(s: &str) -> EscapeQuotedString<'_> { escape_quoted_string(s, '\'') } +/// Convenience wrapper for escaping strings for double-quoted literals (`").` pub fn escape_double_quote_string(s: &str) -> EscapeQuotedString<'_> { escape_quoted_string(s, '\"') } @@ -565,6 +622,8 @@ impl fmt::Display for EscapeEscapedStringLiteral<'_> { } } +/// Return a helper which escapes characters for string literals that use +/// PostgreSQL-style escaped string literals (e.g. `E'...')`. pub fn escape_escaped_string(s: &str) -> EscapeEscapedStringLiteral<'_> { EscapeEscapedStringLiteral(s) } @@ -600,16 +659,24 @@ impl fmt::Display for EscapeUnicodeStringLiteral<'_> { } } +/// Return a helper which escapes non-ASCII characters using `\XXXX` or +/// `\+XXXXXX` Unicode escape formats (used for `U&'...'` style literals). pub fn escape_unicode_string(s: &str) -> EscapeUnicodeStringLiteral<'_> { EscapeUnicodeStringLiteral(s) } +/// The side on which `TRIM` should be applied. +/// +/// Corresponds to `TRIM(BOTH|LEADING|TRAILING)` SQL syntax. #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum TrimWhereField { + /// `BOTH` (trim from both ends) Both, + /// `LEADING` (trim from start) Leading, + /// `TRAILING` (trim from end) Trailing, } diff --git a/src/ast/visitor.rs b/src/ast/visitor.rs index 328f925f7a..5d841655b5 100644 --- a/src/ast/visitor.rs +++ b/src/ast/visitor.rs @@ -32,6 +32,10 @@ use core::ops::ControlFlow; /// #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] /// ``` pub trait Visit { + /// Visit this node with the provided [`Visitor`]. + /// + /// Implementations should call the appropriate visitor hooks to traverse + /// child nodes and return a `ControlFlow` value to allow early exit. fn visit(&self, visitor: &mut V) -> ControlFlow; } @@ -47,6 +51,11 @@ pub trait Visit { /// #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] /// ``` pub trait VisitMut { + /// Mutably visit this node with the provided [`VisitorMut`]. + /// + /// Implementations should call the appropriate mutable visitor hooks to + /// traverse and allow in-place mutation of child nodes. Returning a + /// `ControlFlow` value permits early termination of the traversal. fn visit(&mut self, visitor: &mut V) -> ControlFlow; } diff --git a/src/dialect/clickhouse.rs b/src/dialect/clickhouse.rs index bdac1f57b5..39e8a0b304 100644 --- a/src/dialect/clickhouse.rs +++ b/src/dialect/clickhouse.rs @@ -17,7 +17,7 @@ use crate::dialect::Dialect; -// A [`Dialect`] for [ClickHouse](https://clickhouse.com/). +/// A [`Dialect`] for [ClickHouse](https://clickhouse.com/). #[derive(Debug)] pub struct ClickHouseDialect {} diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index 4c7173d544..9e6c1859f1 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -881,10 +881,14 @@ pub trait Dialect: Debug + Any { false } + /// Returns true if the dialect supports `EXPLAIN` statements with utility options + /// e.g. `EXPLAIN (ANALYZE TRUE, BUFFERS TRUE) SELECT * FROM tbl;` fn supports_explain_with_utility_options(&self) -> bool { false } + /// Returns true if the dialect supports `ASC` and `DESC` in column definitions + /// e.g. `CREATE TABLE t (a INT ASC, b INT DESC);` fn supports_asc_desc_in_column_definition(&self) -> bool { false } @@ -1118,6 +1122,13 @@ pub trait Dialect: Debug + Any { false } + /// Returns true if the dialect supports space-separated column options + /// in a `CREATE TABLE` statement. For example: + /// ```sql + /// CREATE TABLE tbl ( + /// col INT NOT NULL DEFAULT 0 + /// ); + /// ``` fn supports_space_separated_column_options(&self) -> bool { false } @@ -1228,31 +1239,50 @@ pub trait Dialect: Debug + Any { } } -/// This represents the operators for which precedence must be defined +/// Operators for which precedence must be defined. /// -/// higher number -> higher precedence +/// Higher number -> higher precedence. +/// See expression parsing for how these values are used. #[derive(Debug, Clone, Copy)] pub enum Precedence { + /// Member access operator `.` (highest precedence). Period, + /// Postgres style type cast `::`. DoubleColon, + /// Timezone operator (e.g. `AT TIME ZONE`). AtTz, + /// Multiplication / Division / Modulo operators (`*`, `/`, `%`). MulDivModOp, + /// Addition / Subtraction (`+`, `-`). PlusMinus, + /// Bitwise `XOR` operator (`^`). Xor, + /// Bitwise `AND` operator (`&`). Ampersand, + /// Bitwise `CARET` (^) for some dialects. Caret, + /// Bitwise `OR` / pipe operator (`|`). Pipe, + /// `BETWEEN` operator. Between, + /// Equality operator (`=`). Eq, + /// Pattern matching (`LIKE`). Like, + /// `IS` operator (e.g. `IS NULL`). Is, + /// Other Postgres-specific operators. PgOther, + /// Unary `NOT`. UnaryNot, + /// Logical `AND`. And, + /// Logical `OR` (lowest precedence). Or, } impl dyn Dialect { + /// Returns true if `self` is the concrete dialect `T`. #[inline] pub fn is(&self) -> bool { // borrowed from `Any` implementation diff --git a/src/keywords.rs b/src/keywords.rs index 845d710291..77207283c2 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -37,6 +37,7 @@ use sqlparser_derive::{Visit, VisitMut}; /// expands to `pub const SELECT = "SELECT";` macro_rules! kw_def { ($ident:ident = $string_keyword:expr) => { + #[doc = concat!("The `", $string_keyword, "` SQL keyword.")] pub const $ident: &'static str = $string_keyword; }; ($ident:ident) => { @@ -54,16 +55,23 @@ macro_rules! define_keywords { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] #[allow(non_camel_case_types)] + /// An enumeration of SQL keywords recognized by the parser. pub enum Keyword { + /// Represents no keyword. NoKeyword, - $($ident),* + $( + #[doc = concat!("The `", stringify!($ident), "` SQL keyword.")] + $ident + ),* } + /// Array of all `Keyword` enum values in declaration order. pub const ALL_KEYWORDS_INDEX: &[Keyword] = &[ $(Keyword::$ident),* ]; $(kw_def!($ident $(= $string_keyword)?);)* + /// Array of all SQL keywords as string constants. pub const ALL_KEYWORDS: &[&str] = &[ $($ident),* ]; @@ -1250,9 +1258,9 @@ pub const RESERVED_FOR_COLUMN_ALIAS: &[Keyword] = &[ Keyword::END, ]; -// Global list of reserved keywords allowed after FROM. -// Parser should call Dialect::get_reserved_keyword_after_from -// to allow for each dialect to customize the list. +/// Global list of reserved keywords allowed after FROM. +/// Parser should call Dialect::get_reserved_keyword_after_from +/// to allow for each dialect to customize the list. pub const RESERVED_FOR_TABLE_FACTOR: &[Keyword] = &[ Keyword::INTO, Keyword::LIMIT, diff --git a/src/lib.rs b/src/lib.rs index 4050173caa..f5d23a21fc 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -154,6 +154,7 @@ // would bloat the API and hide intent. Extra memory is a worthwhile tradeoff. #![allow(clippy::large_enum_variant)] #![forbid(clippy::unreachable)] +#![forbid(missing_docs)] // Allow proc-macros to find this crate extern crate self as sqlparser; @@ -167,6 +168,7 @@ extern crate pretty_assertions; pub mod ast; #[macro_use] +/// Submodules for SQL dialects. pub mod dialect; mod display_utils; pub mod keywords; diff --git a/src/parser/alter.rs b/src/parser/alter.rs index 01b5ca30d1..935d22f8d0 100644 --- a/src/parser/alter.rs +++ b/src/parser/alter.rs @@ -30,6 +30,7 @@ use crate::{ }; impl Parser<'_> { + /// Parse `ALTER ROLE` statement pub fn parse_alter_role(&mut self) -> Result { if dialect_of!(self is PostgreSqlDialect) { return self.parse_pg_alter_role(); diff --git a/src/parser/merge.rs b/src/parser/merge.rs index 2bc1544f00..81798c4569 100644 --- a/src/parser/merge.rs +++ b/src/parser/merge.rs @@ -39,6 +39,7 @@ impl Parser<'_> { Ok(Box::new(SetExpr::Merge(self.parse_merge(merge_token)?))) } + /// Parse a `MERGE` statement pub fn parse_merge(&mut self, merge_token: TokenWithSpan) -> Result { let into = self.parse_keyword(Keyword::INTO); diff --git a/src/parser/mod.rs b/src/parser/mod.rs index d0a774b591..8001611e00 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -48,10 +48,14 @@ use crate::keywords::{Keyword, ALL_KEYWORDS}; use crate::tokenizer::*; use sqlparser::parser::ParserState::ColumnDefinition; +/// Errors produced by the SQL parser. #[derive(Debug, Clone, PartialEq, Eq)] pub enum ParserError { + /// Error originating from the tokenizer with a message. TokenizerError(String), + /// Generic parser error with a message. ParserError(String), + /// Raised when a recursion depth limit is exceeded. RecursionLimitExceeded, } @@ -154,19 +158,29 @@ mod recursion { } #[derive(PartialEq, Eq)] +/// Indicates whether a parser element is optional or mandatory. pub enum IsOptional { + /// The element is optional. Optional, + /// The element is mandatory. Mandatory, } +/// Indicates if a table expression is lateral. pub enum IsLateral { + /// The expression is lateral. Lateral, + /// The expression is not lateral. NotLateral, } +/// Represents a wildcard expression used in SELECT lists. pub enum WildcardExpr { + /// A specific expression used instead of a wildcard. Expr(Expr), + /// A qualified wildcard like `table.*`. QualifiedWildcard(ObjectName), + /// An unqualified `*` wildcard. Wildcard, } @@ -228,6 +242,7 @@ impl From for MatchedTrailingBracket { /// Options that control how the [`Parser`] parses SQL text #[derive(Debug, Clone, PartialEq, Eq)] pub struct ParserOptions { + /// Allow trailing commas in lists (e.g. `a, b,`). pub trailing_commas: bool, /// Controls how literal values are unescaped. See /// [`Tokenizer::with_unescape`] for more details. @@ -872,7 +887,9 @@ impl<'a> Parser<'a> { Ok(Statement::Raise(RaiseStatement { value })) } - + /// Parse a COMMENT statement. + /// + /// See [Statement::Comment] pub fn parse_comment(&mut self) -> Result { let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); @@ -918,6 +935,7 @@ impl<'a> Parser<'a> { }) } + /// Parse `FLUSH` statement. pub fn parse_flush(&mut self) -> Result { let mut channel = None; let mut tables: Vec = vec![]; @@ -1005,6 +1023,7 @@ impl<'a> Parser<'a> { }) } + /// Parse `MSCK` statement. pub fn parse_msck(&mut self) -> Result { let repair = self.parse_keyword(Keyword::REPAIR); self.expect_keyword_is(Keyword::TABLE)?; @@ -1033,6 +1052,7 @@ impl<'a> Parser<'a> { .into()) } + /// Parse `TRUNCATE` statement. pub fn parse_truncate(&mut self) -> Result { let table = self.parse_keyword(Keyword::TABLE); @@ -1089,6 +1109,7 @@ impl<'a> Parser<'a> { } } + /// Parse options for `ATTACH DUCKDB DATABASE` statement. pub fn parse_attach_duckdb_database_options( &mut self, ) -> Result, ParserError> { @@ -1124,6 +1145,7 @@ impl<'a> Parser<'a> { } } + /// Parse `ATTACH DUCKDB DATABASE` statement. pub fn parse_attach_duckdb_database(&mut self) -> Result { let database = self.parse_keyword(Keyword::DATABASE); let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); @@ -1144,6 +1166,7 @@ impl<'a> Parser<'a> { }) } + /// Parse `DETACH DUCKDB DATABASE` statement. pub fn parse_detach_duckdb_database(&mut self) -> Result { let database = self.parse_keyword(Keyword::DATABASE); let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); @@ -1155,6 +1178,7 @@ impl<'a> Parser<'a> { }) } + /// Parse `ATTACH DATABASE` statement. pub fn parse_attach_database(&mut self) -> Result { let database = self.parse_keyword(Keyword::DATABASE); let database_file_name = self.parse_expr()?; @@ -1167,6 +1191,7 @@ impl<'a> Parser<'a> { }) } + /// Parse `ANALYZE` statement. pub fn parse_analyze(&mut self) -> Result { let has_table_keyword = self.parse_keyword(Keyword::TABLE); let table_name = self.parse_object_name(false)?; @@ -1289,6 +1314,7 @@ impl<'a> Parser<'a> { self.parse_subexpr(self.dialect.prec_unknown()) } + /// Parse expression with optional alias and order by. pub fn parse_expr_with_alias_and_order_by( &mut self, ) -> Result { @@ -1336,6 +1362,7 @@ impl<'a> Parser<'a> { Ok(expr) } + /// Parse `ASSERT` statement. pub fn parse_assert(&mut self) -> Result { let condition = self.parse_expr()?; let message = if self.parse_keyword(Keyword::AS) { @@ -1347,11 +1374,13 @@ impl<'a> Parser<'a> { Ok(Statement::Assert { condition, message }) } + /// Parse `SAVEPOINT` statement. pub fn parse_savepoint(&mut self) -> Result { let name = self.parse_identifier()?; Ok(Statement::Savepoint { name }) } + /// Parse `RELEASE` statement. pub fn parse_release(&mut self) -> Result { let _ = self.parse_keyword(Keyword::SAVEPOINT); let name = self.parse_identifier()?; @@ -1359,11 +1388,13 @@ impl<'a> Parser<'a> { Ok(Statement::ReleaseSavepoint { name }) } + /// Parse `LISTEN` statement. pub fn parse_listen(&mut self) -> Result { let channel = self.parse_identifier()?; Ok(Statement::LISTEN { channel }) } + /// Parse `UNLISTEN` statement. pub fn parse_unlisten(&mut self) -> Result { let channel = if self.consume_token(&Token::Mul) { Ident::new(Expr::Wildcard(AttachedToken::empty()).to_string()) @@ -1379,6 +1410,7 @@ impl<'a> Parser<'a> { Ok(Statement::UNLISTEN { channel }) } + /// Parse `NOTIFY` statement. pub fn parse_notify(&mut self) -> Result { let channel = self.parse_identifier()?; let payload = if self.consume_token(&Token::Comma) { @@ -2065,6 +2097,7 @@ impl<'a> Parser<'a> { && self.consume_tokens(&[Token::LParen, Token::Plus, Token::RParen]) } + /// Parse utility options in the form of `(option1, option2 arg2, option3 arg3, ...)` pub fn parse_utility_options(&mut self) -> Result, ParserError> { self.expect_token(&Token::LParen)?; let options = self.parse_comma_separated(Self::parse_utility_option)?; @@ -2174,6 +2207,7 @@ impl<'a> Parser<'a> { }) } + /// Parse a function call expression named by `name` and return it as an `Expr`. pub fn parse_function(&mut self, name: ObjectName) -> Result { self.parse_function_call(name).map(Expr::Function) } @@ -2282,6 +2316,7 @@ impl<'a> Parser<'a> { } } + /// Parse time-related function `name` possibly followed by `(...)` arguments. pub fn parse_time_functions(&mut self, name: ObjectName) -> Result { let args = if self.consume_token(&Token::LParen) { FunctionArguments::List(self.parse_function_argument_list()?) @@ -2300,6 +2335,7 @@ impl<'a> Parser<'a> { })) } + /// Parse window frame `UNITS` clause: `ROWS`, `RANGE`, or `GROUPS`. pub fn parse_window_frame_units(&mut self) -> Result { let next_token = self.next_token(); match &next_token.token { @@ -2313,6 +2349,7 @@ impl<'a> Parser<'a> { } } + /// Parse a `WINDOW` frame definition (units and bounds). pub fn parse_window_frame(&mut self) -> Result { let units = self.parse_window_frame_units()?; let (start_bound, end_bound) = if self.parse_keyword(Keyword::BETWEEN) { @@ -2330,7 +2367,7 @@ impl<'a> Parser<'a> { }) } - /// Parse `CURRENT ROW` or `{ | UNBOUNDED } { PRECEDING | FOLLOWING }` + /// Parse a window frame bound: `CURRENT ROW` or ` PRECEDING|FOLLOWING`. pub fn parse_window_frame_bound(&mut self) -> Result { if self.parse_keywords(&[Keyword::CURRENT, Keyword::ROW]) { Ok(WindowFrameBound::CurrentRow) @@ -2419,6 +2456,7 @@ impl<'a> Parser<'a> { } } + /// Parse a `CASE` expression and return an [`Expr::Case`]. pub fn parse_case_expr(&mut self) -> Result { let case_token = AttachedToken(self.get_current_token().clone()); let mut operand = None; @@ -2451,6 +2489,7 @@ impl<'a> Parser<'a> { }) } + /// Parse an optional `FORMAT` clause for `CAST` expressions. pub fn parse_optional_cast_format(&mut self) -> Result, ParserError> { if self.parse_keyword(Keyword::FORMAT) { let value = self.parse_value()?.value; @@ -2463,6 +2502,7 @@ impl<'a> Parser<'a> { } } + /// Parse an optional `AT TIME ZONE` clause. pub fn parse_optional_time_zone(&mut self) -> Result, ParserError> { if self.parse_keywords(&[Keyword::AT, Keyword::TIME, Keyword::ZONE]) { self.parse_value().map(|v| Some(v.value)) @@ -2560,6 +2600,7 @@ impl<'a> Parser<'a> { Ok(exists_node) } + /// Parse a SQL `EXTRACT` expression e.g. `EXTRACT(YEAR FROM date)`. pub fn parse_extract_expr(&mut self) -> Result { self.expect_token(&Token::LParen)?; let field = self.parse_date_time_field()?; @@ -2585,6 +2626,7 @@ impl<'a> Parser<'a> { }) } + /// Parse a `CEIL` or `FLOOR` expression. pub fn parse_ceil_floor_expr(&mut self, is_ceil: bool) -> Result { self.expect_token(&Token::LParen)?; let expr = self.parse_expr()?; @@ -2619,6 +2661,7 @@ impl<'a> Parser<'a> { } } + /// Parse a `POSITION` expression. pub fn parse_position_expr(&mut self, ident: Ident) -> Result { let between_prec = self.dialect.prec_value(Precedence::Between); let position_expr = self.maybe_parse(|p| { @@ -2643,7 +2686,7 @@ impl<'a> Parser<'a> { } } - // { SUBSTRING | SUBSTR } ( [FROM 1] [FOR 3]) + /// Parse `SUBSTRING`/`SUBSTR` expressions: `SUBSTRING(expr FROM start FOR length)` or `SUBSTR(expr, start, length)`. pub fn parse_substring(&mut self) -> Result { let shorthand = match self.expect_one_of_keywords(&[Keyword::SUBSTR, Keyword::SUBSTRING])? { Keyword::SUBSTR => true, @@ -2676,6 +2719,9 @@ impl<'a> Parser<'a> { }) } + /// Parse an OVERLAY expression. + /// + /// See [Expr::Overlay] pub fn parse_overlay_expr(&mut self) -> Result { // PARSE OVERLAY (EXPR PLACING EXPR FROM 1 [FOR 3]) self.expect_token(&Token::LParen)?; @@ -2744,6 +2790,9 @@ impl<'a> Parser<'a> { } } + /// Parse the `WHERE` field for a `TRIM` expression. + /// + /// See [TrimWhereField] pub fn parse_trim_where(&mut self) -> Result { let next_token = self.next_token(); match &next_token.token { @@ -2765,6 +2814,9 @@ impl<'a> Parser<'a> { Ok(Expr::Array(Array { elem: exprs, named })) } + /// Parse the `ON OVERFLOW` clause for `LISTAGG`. + /// + /// See [`ListAggOnOverflow`] pub fn parse_listagg_on_overflow(&mut self) -> Result, ParserError> { if self.parse_keywords(&[Keyword::ON, Keyword::OVERFLOW]) { if self.parse_keyword(Keyword::ERROR) { @@ -2801,10 +2853,12 @@ impl<'a> Parser<'a> { } } - // This function parses date/time fields for the EXTRACT function-like - // operator, interval qualifiers, and the ceil/floor operations. - // EXTRACT supports a wider set of date/time fields than interval qualifiers, - // so this function may need to be split in two. + /// Parse a date/time field for `EXTRACT`, interval qualifiers, and ceil/floor operations. + /// + /// `EXTRACT` supports a wider set of date/time fields than interval qualifiers, + /// so this function may need to be split in two. + /// + /// See [`DateTimeField`] pub fn parse_date_time_field(&mut self) -> Result { let next_token = self.next_token(); match &next_token.token { @@ -2878,6 +2932,9 @@ impl<'a> Parser<'a> { } } + /// Parse a `NOT` expression. + /// + /// Represented in the AST as `Expr::UnaryOp` with `UnaryOperator::Not`. pub fn parse_not(&mut self) -> Result { match self.peek_token().token { Token::Word(w) => match w.keyword { @@ -4314,6 +4371,9 @@ impl<'a> Parser<'a> { } #[must_use] + /// Check if the current token is the expected keyword without consuming it. + /// + /// Returns true if the current token matches the expected keyword. pub fn peek_keyword(&self, expected: Keyword) -> bool { matches!(&self.peek_token_ref().token, Token::Word(w) if expected == w.keyword) } @@ -4520,6 +4580,7 @@ impl<'a> Parser<'a> { ) } + /// Parse a list of actions for `GRANT` statements. pub fn parse_actions_list(&mut self) -> Result, ParserError> { let mut values = vec![]; loop { @@ -4669,6 +4730,7 @@ impl<'a> Parser<'a> { Ok(values) } + /// Parse an expression enclosed in parentheses. pub fn parse_parenthesized(&mut self, mut f: F) -> Result where F: FnMut(&mut Parser<'a>) -> Result, @@ -5088,6 +5150,7 @@ impl<'a> Parser<'a> { }) } + /// Parse a `CREATE SCHEMA` statement. pub fn parse_create_schema(&mut self) -> Result { let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); @@ -5144,6 +5207,7 @@ impl<'a> Parser<'a> { } } + /// Parse a `CREATE DATABASE` statement. pub fn parse_create_database(&mut self) -> Result { let ine = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); let db_name = self.parse_object_name(false)?; @@ -5188,6 +5252,7 @@ impl<'a> Parser<'a> { }) } + /// Parse an optional `USING` clause for `CREATE FUNCTION`. pub fn parse_optional_create_function_using( &mut self, ) -> Result, ParserError> { @@ -5210,6 +5275,7 @@ impl<'a> Parser<'a> { } } + /// Parse a `CREATE FUNCTION` statement. pub fn parse_create_function( &mut self, or_alter: bool, @@ -5709,6 +5775,7 @@ impl<'a> Parser<'a> { })) } + /// Parse a `CREATE TRIGGER` statement. pub fn parse_create_trigger( &mut self, temporary: bool, @@ -5801,6 +5868,7 @@ impl<'a> Parser<'a> { .into()) } + /// Parse the period part of a trigger (`BEFORE`, `AFTER`, etc.). pub fn parse_trigger_period(&mut self) -> Result { Ok( match self.expect_one_of_keywords(&[ @@ -5822,6 +5890,7 @@ impl<'a> Parser<'a> { ) } + /// Parse the event part of a trigger (`INSERT`, `UPDATE`, etc.). pub fn parse_trigger_event(&mut self) -> Result { Ok( match self.expect_one_of_keywords(&[ @@ -5848,6 +5917,7 @@ impl<'a> Parser<'a> { ) } + /// Parse the `REFERENCING` clause of a trigger. pub fn parse_trigger_referencing(&mut self) -> Result, ParserError> { let refer_type = match self.parse_one_of_keywords(&[Keyword::OLD, Keyword::NEW]) { Some(Keyword::OLD) if self.parse_keyword(Keyword::TABLE) => { @@ -5870,6 +5940,7 @@ impl<'a> Parser<'a> { })) } + /// Parse the execution body of a trigger (`FUNCTION` or `PROCEDURE`). pub fn parse_trigger_exec_body(&mut self) -> Result { Ok(TriggerExecBody { exec_type: match self @@ -5885,6 +5956,7 @@ impl<'a> Parser<'a> { }) } + /// Parse a `CREATE MACRO` statement. pub fn parse_create_macro( &mut self, or_replace: bool, @@ -5932,6 +6004,7 @@ impl<'a> Parser<'a> { Ok(MacroArg { name, default_expr }) } + /// Parse a `CREATE EXTERNAL TABLE` statement. pub fn parse_create_external_table( &mut self, or_replace: bool, @@ -5977,6 +6050,7 @@ impl<'a> Parser<'a> { .build()) } + /// Parse a file format for external tables. pub fn parse_file_format(&mut self) -> Result { let next_token = self.next_token(); match &next_token.token { @@ -6002,6 +6076,7 @@ impl<'a> Parser<'a> { } } + /// Parse an `ANALYZE FORMAT`. pub fn parse_analyze_format(&mut self) -> Result { let next_token = self.next_token(); match &next_token.token { @@ -6016,6 +6091,7 @@ impl<'a> Parser<'a> { } } + /// Parse a `CREATE VIEW` statement. pub fn parse_create_view( &mut self, or_alter: bool, @@ -6167,6 +6243,7 @@ impl<'a> Parser<'a> { } } + /// Parse a `CREATE ROLE` statement. pub fn parse_create_role(&mut self) -> Result { let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); let names = self.parse_comma_separated(|p| p.parse_object_name(false))?; @@ -6392,6 +6469,7 @@ impl<'a> Parser<'a> { .into()) } + /// Parse an `OWNER` clause. pub fn parse_owner(&mut self) -> Result { let owner = match self.parse_one_of_keywords(&[Keyword::CURRENT_USER, Keyword::CURRENT_ROLE, Keyword::SESSION_USER]) { Some(Keyword::CURRENT_USER) => Owner::CurrentUser, @@ -6850,6 +6928,7 @@ impl<'a> Parser<'a> { })) } + /// Parse a `DROP` statement. pub fn parse_drop(&mut self) -> Result { // MySQL dialect supports `TEMPORARY` let temporary = dialect_of!(self is MySqlDialect | GenericDialect | DuckDbDialect) @@ -7434,7 +7513,7 @@ impl<'a> Parser<'a> { }) } - // FETCH [ direction { FROM | IN } ] cursor INTO target; + /// Parse `FETCH [direction] { FROM | IN } cursor INTO target;` statement. pub fn parse_fetch_statement(&mut self) -> Result { let direction = if self.parse_keyword(Keyword::NEXT) { FetchDirection::Next @@ -7504,6 +7583,7 @@ impl<'a> Parser<'a> { }) } + /// Parse a `DISCARD` statement. pub fn parse_discard(&mut self) -> Result { let object_type = if self.parse_keyword(Keyword::ALL) { DiscardObject::ALL @@ -7522,6 +7602,7 @@ impl<'a> Parser<'a> { Ok(Statement::Discard { object_type }) } + /// Parse a `CREATE INDEX` statement. pub fn parse_create_index(&mut self, unique: bool) -> Result { let concurrently = self.parse_keyword(Keyword::CONCURRENTLY); let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); @@ -7613,6 +7694,7 @@ impl<'a> Parser<'a> { })) } + /// Parse a `CREATE EXTENSION` statement. pub fn parse_create_extension(&mut self) -> Result { let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); let name = self.parse_identifier()?; @@ -7740,7 +7822,9 @@ impl<'a> Parser<'a> { })) } - //TODO: Implement parsing for Skewed + /// Parse Hive distribution style. + /// + /// TODO: Support parsing for `SKEWED` distribution style. pub fn parse_hive_distribution(&mut self) -> Result { if self.parse_keywords(&[Keyword::PARTITIONED, Keyword::BY]) { self.expect_token(&Token::LParen)?; @@ -7752,6 +7836,7 @@ impl<'a> Parser<'a> { } } + /// Parse Hive formats. pub fn parse_hive_formats(&mut self) -> Result, ParserError> { let mut hive_format: Option = None; loop { @@ -7807,6 +7892,7 @@ impl<'a> Parser<'a> { Ok(hive_format) } + /// Parse Hive row format. pub fn parse_row_format(&mut self) -> Result { self.expect_keyword_is(Keyword::FORMAT)?; match self.parse_one_of_keywords(&[Keyword::SERDE, Keyword::DELIMITED]) { @@ -7911,6 +7997,7 @@ impl<'a> Parser<'a> { } } + /// Parse `CREATE TABLE` statement. pub fn parse_create_table( &mut self, or_replace: bool, @@ -8435,6 +8522,7 @@ impl<'a> Parser<'a> { Ok(Some(SqlOption::KeyValue { key, value })) } + /// Parse plain options. pub fn parse_plain_options(&mut self) -> Result, ParserError> { let mut options = Vec::new(); @@ -8448,6 +8536,7 @@ impl<'a> Parser<'a> { Ok(options) } + /// Parse optional inline comment. pub fn parse_optional_inline_comment(&mut self) -> Result, ParserError> { let comment = if self.parse_keyword(Keyword::COMMENT) { let has_eq = self.consume_token(&Token::Eq); @@ -8463,6 +8552,7 @@ impl<'a> Parser<'a> { Ok(comment) } + /// Parse comment value. pub fn parse_comment_value(&mut self) -> Result { let next_token = self.next_token(); let value = match next_token.token { @@ -8473,6 +8563,7 @@ impl<'a> Parser<'a> { Ok(value) } + /// Parse optional procedure parameters. pub fn parse_optional_procedure_parameters( &mut self, ) -> Result>, ParserError> { @@ -8495,6 +8586,7 @@ impl<'a> Parser<'a> { Ok(Some(params)) } + /// Parse columns and constraints. pub fn parse_columns(&mut self) -> Result<(Vec, Vec), ParserError> { let mut columns = vec![]; let mut constraints = vec![]; @@ -8531,6 +8623,7 @@ impl<'a> Parser<'a> { Ok((columns, constraints)) } + /// Parse procedure parameter. pub fn parse_procedure_param(&mut self) -> Result { let mode = if self.parse_keyword(Keyword::IN) { Some(ArgMode::In) @@ -8557,6 +8650,7 @@ impl<'a> Parser<'a> { }) } + /// Parse column definition. pub fn parse_column_def(&mut self) -> Result { let col_name = self.parse_identifier()?; let data_type = if self.is_column_type_sqlite_unspecified() { @@ -8612,6 +8706,7 @@ impl<'a> Parser<'a> { } } + /// Parse optional column option. pub fn parse_optional_column_option(&mut self) -> Result, ParserError> { if let Some(option) = self.dialect.parse_column_option(self)? { return option; @@ -8965,6 +9060,7 @@ impl<'a> Parser<'a> { })) } + /// Parse optional `CLUSTERED BY` clause for Hive/Generic dialects. pub fn parse_optional_clustered_by(&mut self) -> Result, ParserError> { let clustered_by = if dialect_of!(self is HiveDialect|GenericDialect) && self.parse_keywords(&[Keyword::CLUSTERED, Keyword::BY]) @@ -8994,6 +9090,9 @@ impl<'a> Parser<'a> { Ok(clustered_by) } + /// Parse a referential action used in foreign key clauses. + /// + /// Recognized forms: `RESTRICT`, `CASCADE`, `SET NULL`, `NO ACTION`, `SET DEFAULT`. pub fn parse_referential_action(&mut self) -> Result { if self.parse_keyword(Keyword::RESTRICT) { Ok(ReferentialAction::Restrict) @@ -9013,6 +9112,7 @@ impl<'a> Parser<'a> { } } + /// Parse a `MATCH` kind for constraint references: `FULL`, `PARTIAL`, or `SIMPLE`. pub fn parse_match_kind(&mut self) -> Result { if self.parse_keyword(Keyword::FULL) { Ok(ConstraintReferenceMatchKind::Full) @@ -9025,6 +9125,7 @@ impl<'a> Parser<'a> { } } + /// Parse optional constraint characteristics such as `DEFERRABLE`, `INITIALLY` and `ENFORCED`. pub fn parse_constraint_characteristics( &mut self, ) -> Result, ParserError> { @@ -9062,6 +9163,7 @@ impl<'a> Parser<'a> { } } + /// Parse an optional table constraint (e.g. `PRIMARY KEY`, `UNIQUE`, `FOREIGN KEY`, `CHECK`). pub fn parse_optional_table_constraint( &mut self, ) -> Result, ParserError> { @@ -9277,6 +9379,7 @@ impl<'a> Parser<'a> { }) } + /// Optionally parse a parenthesized list of `SqlOption`s introduced by `keyword`. pub fn maybe_parse_options( &mut self, keyword: Keyword, @@ -9289,6 +9392,7 @@ impl<'a> Parser<'a> { Ok(None) } + /// Parse a parenthesized list of `SqlOption`s following `keyword`, or return an empty vec. pub fn parse_options(&mut self, keyword: Keyword) -> Result, ParserError> { if self.parse_keyword(keyword) { self.expect_token(&Token::LParen)?; @@ -9300,6 +9404,7 @@ impl<'a> Parser<'a> { } } + /// Parse options introduced by one of `keywords` followed by a parenthesized list. pub fn parse_options_with_keywords( &mut self, keywords: &[Keyword], @@ -9314,6 +9419,7 @@ impl<'a> Parser<'a> { } } + /// Parse an index type token (e.g. `BTREE`, `HASH`, or a custom identifier). pub fn parse_index_type(&mut self) -> Result { Ok(if self.parse_keyword(Keyword::BTREE) { IndexType::BTree @@ -9339,6 +9445,7 @@ impl<'a> Parser<'a> { /// ```sql //// USING BTREE (name, age DESC) /// ``` + /// Optionally parse `USING ` and return the parsed `IndexType` if present. pub fn parse_optional_using_then_index_type( &mut self, ) -> Result, ParserError> { @@ -9351,11 +9458,13 @@ impl<'a> Parser<'a> { /// Parse `[ident]`, mostly `ident` is name, like: /// `window_name`, `index_name`, ... + /// Parse an optional identifier, returning `Some(Ident)` if present. pub fn parse_optional_ident(&mut self) -> Result, ParserError> { self.maybe_parse(|parser| parser.parse_identifier()) } #[must_use] + /// Parse optional `KEY` or `INDEX` display tokens used in index/constraint declarations. pub fn parse_index_type_display(&mut self) -> KeyOrIndexDisplay { if self.parse_keyword(Keyword::KEY) { KeyOrIndexDisplay::Key @@ -9366,6 +9475,7 @@ impl<'a> Parser<'a> { } } + /// Parse an optional index option such as `USING ` or `COMMENT `. pub fn parse_optional_index_option(&mut self) -> Result, ParserError> { if let Some(index_type) = self.parse_optional_using_then_index_type()? { Ok(Some(IndexOption::Using(index_type))) @@ -9377,6 +9487,7 @@ impl<'a> Parser<'a> { } } + /// Parse zero or more index options and return them as a vector. pub fn parse_index_options(&mut self) -> Result, ParserError> { let mut options = Vec::new(); @@ -9388,6 +9499,7 @@ impl<'a> Parser<'a> { } } + /// Parse a single `SqlOption` used by various dialect-specific DDL statements. pub fn parse_sql_option(&mut self) -> Result { let is_mssql = dialect_of!(self is MsSqlDialect|GenericDialect); @@ -9411,6 +9523,7 @@ impl<'a> Parser<'a> { } } + /// Parse a `CLUSTERED` table option (MSSQL-specific syntaxes supported). pub fn parse_option_clustered(&mut self) -> Result { if self.parse_keywords(&[ Keyword::CLUSTERED, @@ -9447,6 +9560,7 @@ impl<'a> Parser<'a> { } } + /// Parse a `PARTITION(...) FOR VALUES(...)` table option. pub fn parse_option_partition(&mut self) -> Result { self.expect_keyword_is(Keyword::PARTITION)?; self.expect_token(&Token::LParen)?; @@ -9476,6 +9590,7 @@ impl<'a> Parser<'a> { }) } + /// Parse a parenthesized list of partition expressions and return a `Partition` value. pub fn parse_partition(&mut self) -> Result { self.expect_token(&Token::LParen)?; let partitions = self.parse_comma_separated(Parser::parse_expr)?; @@ -9483,6 +9598,7 @@ impl<'a> Parser<'a> { Ok(Partition::Partitions(partitions)) } + /// Parse a parenthesized `SELECT` projection used for projection-based operations. pub fn parse_projection_select(&mut self) -> Result { self.expect_token(&Token::LParen)?; self.expect_keyword_is(Keyword::SELECT)?; @@ -9496,6 +9612,7 @@ impl<'a> Parser<'a> { order_by, }) } + /// Parse `ALTER TABLE ... ADD PROJECTION ...` operation. pub fn parse_alter_table_add_projection(&mut self) -> Result { let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); let name = self.parse_identifier()?; @@ -9507,6 +9624,7 @@ impl<'a> Parser<'a> { }) } + /// Parse a single `ALTER TABLE` operation and return an `AlterTableOperation`. pub fn parse_alter_table_operation(&mut self) -> Result { let operation = if self.parse_keyword(Keyword::ADD) { if let Some(constraint) = self.parse_optional_table_constraint()? { @@ -9992,6 +10110,7 @@ impl<'a> Parser<'a> { } } + /// Parse an `ALTER ` statement and dispatch to the appropriate alter handler. pub fn parse_alter(&mut self) -> Result { let object_type = self.expect_one_of_keywords(&[ Keyword::VIEW, @@ -10040,6 +10159,8 @@ impl<'a> Parser<'a> { Keyword::OPERATOR => { if self.parse_keyword(Keyword::FAMILY) { self.parse_alter_operator_family() + } else if self.parse_keyword(Keyword::CLASS) { + self.parse_alter_operator_class() } else { self.parse_alter_operator() } @@ -10099,6 +10220,7 @@ impl<'a> Parser<'a> { .into()) } + /// Parse an `ALTER VIEW` statement. pub fn parse_alter_view(&mut self) -> Result { let name = self.parse_object_name(false)?; let columns = self.parse_parenthesized_column_list(Optional, false)?; @@ -10439,8 +10561,40 @@ impl<'a> Parser<'a> { })) } - // Parse a [Statement::AlterSchema] - // ALTER SCHEMA [ IF EXISTS ] schema_name + /// Parse an `ALTER OPERATOR CLASS` statement. + /// + /// Handles operations like `RENAME TO`, `OWNER TO`, and `SET SCHEMA`. + pub fn parse_alter_operator_class(&mut self) -> Result { + let name = self.parse_object_name(false)?; + self.expect_keyword(Keyword::USING)?; + let using = self.parse_identifier()?; + + let operation = if self.parse_keywords(&[Keyword::RENAME, Keyword::TO]) { + let new_name = self.parse_object_name(false)?; + AlterOperatorClassOperation::RenameTo { new_name } + } else if self.parse_keywords(&[Keyword::OWNER, Keyword::TO]) { + let owner = self.parse_owner()?; + AlterOperatorClassOperation::OwnerTo(owner) + } else if self.parse_keywords(&[Keyword::SET, Keyword::SCHEMA]) { + let schema_name = self.parse_object_name(false)?; + AlterOperatorClassOperation::SetSchema { schema_name } + } else { + return self.expected_ref( + "RENAME TO, OWNER TO, or SET SCHEMA after ALTER OPERATOR CLASS", + self.peek_token_ref(), + ); + }; + + Ok(Statement::AlterOperatorClass(AlterOperatorClass { + name, + using, + operation, + })) + } + + /// Parse an `ALTER SCHEMA` statement. + /// + /// Supports operations such as setting options, renaming, adding/dropping replicas, and changing owner. pub fn parse_alter_schema(&mut self) -> Result { self.expect_keywords(&[Keyword::ALTER, Keyword::SCHEMA])?; let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); @@ -10580,6 +10734,7 @@ impl<'a> Parser<'a> { })) } + /// Parse a `CLOSE` cursor statement. pub fn parse_close(&mut self) -> Result { let cursor = if self.parse_keyword(Keyword::ALL) { CloseCursor::All @@ -10910,6 +11065,7 @@ impl<'a> Parser<'a> { self.parse_tab_value() } + /// Parse a single tab-separated value row used by `COPY` payload parsing. pub fn parse_tab_value(&mut self) -> Vec> { let mut values = vec![]; let mut content = String::from(""); @@ -11219,6 +11375,7 @@ impl<'a> Parser<'a> { self.expected("unicode normalization form", self.peek_token()) } + /// Parse parenthesized enum members, used with `ENUM(...)` type definitions. pub fn parse_enum_values(&mut self) -> Result, ParserError> { self.expect_token(&Token::LParen)?; let values = self.parse_comma_separated(|parser| { @@ -11684,6 +11841,7 @@ impl<'a> Parser<'a> { Ok(columns) } + /// Parse a parenthesized, comma-separated list of single-quoted strings. pub fn parse_string_values(&mut self) -> Result, ParserError> { self.expect_token(&Token::LParen)?; let mut values = Vec::new(); @@ -11898,6 +12056,7 @@ impl<'a> Parser<'a> { } } + /// Parse an optional `GROUP BY` clause, returning `Some(GroupByExpr)` when present. pub fn parse_optional_group_by(&mut self) -> Result, ParserError> { if self.parse_keywords(&[Keyword::GROUP, Keyword::BY]) { let expressions = if self.parse_keyword(Keyword::ALL) { @@ -11954,6 +12113,7 @@ impl<'a> Parser<'a> { } } + /// Parse an optional `ORDER BY` clause, returning `Some(OrderBy)` when present. pub fn parse_optional_order_by(&mut self) -> Result, ParserError> { if self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) { let order_by = @@ -12414,6 +12574,7 @@ impl<'a> Parser<'a> { self.parse_parenthesized_column_list_inner(optional, allow_empty, |p| p.parse_identifier()) } + /// Parse a parenthesized list of compound identifiers as expressions. pub fn parse_parenthesized_compound_identifier_list( &mut self, optional: IsOptional, @@ -12488,6 +12649,7 @@ impl<'a> Parser<'a> { } } + /// Parse an unsigned precision value enclosed in parentheses, e.g. `(10)`. pub fn parse_precision(&mut self) -> Result { self.expect_token(&Token::LParen)?; let n = self.parse_literal_uint()?; @@ -12495,6 +12657,7 @@ impl<'a> Parser<'a> { Ok(n) } + /// Parse an optional precision `(n)` and return it as `Some(n)` when present. pub fn parse_optional_precision(&mut self) -> Result, ParserError> { if self.consume_token(&Token::LParen) { let n = self.parse_literal_uint()?; @@ -12604,6 +12767,7 @@ impl<'a> Parser<'a> { Ok((precision, time_zone)) } + /// Parse an optional character length specification `(n | MAX [CHARACTERS|OCTETS])`. pub fn parse_optional_character_length( &mut self, ) -> Result, ParserError> { @@ -12616,6 +12780,7 @@ impl<'a> Parser<'a> { } } + /// Parse an optional binary length specification like `(n)`. pub fn parse_optional_binary_length(&mut self) -> Result, ParserError> { if self.consume_token(&Token::LParen) { let binary_length = self.parse_binary_length()?; @@ -12626,6 +12791,7 @@ impl<'a> Parser<'a> { } } + /// Parse a character length, handling `MAX` or integer lengths with optional units. pub fn parse_character_length(&mut self) -> Result { if self.parse_keyword(Keyword::MAX) { return Ok(CharacterLength::Max); @@ -12641,6 +12807,7 @@ impl<'a> Parser<'a> { Ok(CharacterLength::IntegerLength { length, unit }) } + /// Parse a binary length specification, returning `BinaryLength`. pub fn parse_binary_length(&mut self) -> Result { if self.parse_keyword(Keyword::MAX) { return Ok(BinaryLength::Max); @@ -12649,6 +12816,7 @@ impl<'a> Parser<'a> { Ok(BinaryLength::IntegerLength { length }) } + /// Parse an optional `(precision[, scale])` and return `(Option, Option)`. pub fn parse_optional_precision_scale( &mut self, ) -> Result<(Option, Option), ParserError> { @@ -12666,6 +12834,7 @@ impl<'a> Parser<'a> { } } + /// Parse exact-number precision/scale info like `(precision[, scale])` for decimal types. pub fn parse_exact_number_optional_precision_scale( &mut self, ) -> Result { @@ -12709,6 +12878,7 @@ impl<'a> Parser<'a> { } } + /// Parse optional type modifiers appearing in parentheses e.g. `(UNSIGNED, ZEROFILL)`. pub fn parse_optional_type_modifiers(&mut self) -> Result>, ParserError> { if self.consume_token(&Token::LParen) { let mut modifiers = Vec::new(); @@ -12756,6 +12926,7 @@ impl<'a> Parser<'a> { Ok(Box::new(SetExpr::Delete(self.parse_delete(delete_token)?))) } + /// Parse a `DELETE` statement and return `Statement::Delete`. pub fn parse_delete(&mut self, delete_token: TokenWithSpan) -> Result { let (tables, with_from_keyword) = if !self.parse_keyword(Keyword::FROM) { // `FROM` keyword is optional in BigQuery SQL. @@ -12814,7 +12985,8 @@ impl<'a> Parser<'a> { })) } - // KILL [CONNECTION | QUERY | MUTATION] processlist_id + /// Parse a `KILL` statement, optionally specifying `CONNECTION`, `QUERY`, or `MUTATION`. + /// KILL [CONNECTION | QUERY | MUTATION] processlist_id pub fn parse_kill(&mut self) -> Result { let modifier_keyword = self.parse_one_of_keywords(&[Keyword::CONNECTION, Keyword::QUERY, Keyword::MUTATION]); @@ -12840,6 +13012,7 @@ impl<'a> Parser<'a> { Ok(Statement::Kill { modifier, id }) } + /// Parse an `EXPLAIN` statement, handling dialect-specific options and modifiers. pub fn parse_explain( &mut self, describe_alias: DescribeAlias, @@ -13520,6 +13693,7 @@ impl<'a> Parser<'a> { Ok(expr.into()) } + /// Parse a set operator token into its `SetOperator` variant. pub fn parse_set_operator(&mut self, token: &Token) -> Option { match token { Token::Word(w) if w.keyword == Keyword::UNION => Some(SetOperator::Union), @@ -13530,6 +13704,7 @@ impl<'a> Parser<'a> { } } + /// Parse a set quantifier (e.g., `ALL`, `DISTINCT BY NAME`) for the given set operator. pub fn parse_set_quantifier(&mut self, op: &Option) -> SetQuantifier { match op { Some( @@ -13818,6 +13993,7 @@ impl<'a> Parser<'a> { res } + /// Parse a `CONNECT BY` clause (Oracle-style hierarchical query support). pub fn parse_connect_by(&mut self) -> Result { let (condition, relationships) = if self.parse_keywords(&[Keyword::CONNECT, Keyword::BY]) { let relationships = self.with_state(ParserState::ConnectBy, |parser| { @@ -14127,6 +14303,7 @@ impl<'a> Parser<'a> { self.expected("equals sign or TO", self.peek_token()) } + /// Parse session parameter assignments after `SET` when no `=` or `TO` is present. pub fn parse_set_session_params(&mut self) -> Result { if self.parse_keyword(Keyword::STATISTICS) { let topic = match self.parse_one_of_keywords(&[ @@ -14201,6 +14378,7 @@ impl<'a> Parser<'a> { } } + /// Parse a `SHOW` statement and dispatch to specific SHOW handlers. pub fn parse_show(&mut self) -> Result { let terse = self.parse_keyword(Keyword::TERSE); let extended = self.parse_keyword(Keyword::EXTENDED); @@ -14288,6 +14466,7 @@ impl<'a> Parser<'a> { }) } + /// Parse `SHOW CREATE ` returning the corresponding `ShowCreate` statement. pub fn parse_show_create(&mut self) -> Result { let obj_type = match self.expect_one_of_keywords(&[ Keyword::TABLE, @@ -14313,6 +14492,7 @@ impl<'a> Parser<'a> { Ok(Statement::ShowCreate { obj_type, obj_name }) } + /// Parse `SHOW COLUMNS`/`SHOW FIELDS` and return a `ShowColumns` statement. pub fn parse_show_columns( &mut self, extended: bool, @@ -14358,16 +14538,19 @@ impl<'a> Parser<'a> { }) } + /// Parse `SHOW FUNCTIONS` and optional filter. pub fn parse_show_functions(&mut self) -> Result { let filter = self.parse_show_statement_filter()?; Ok(Statement::ShowFunctions { filter }) } + /// Parse `SHOW COLLATION` and optional filter. pub fn parse_show_collation(&mut self) -> Result { let filter = self.parse_show_statement_filter()?; Ok(Statement::ShowCollation { filter }) } + /// Parse an optional filter used by `SHOW` statements (LIKE, ILIKE, WHERE, or literal). pub fn parse_show_statement_filter( &mut self, ) -> Result, ParserError> { @@ -14391,6 +14574,7 @@ impl<'a> Parser<'a> { } } + /// Parse a `USE` statement (database/catalog/schema/warehouse/role selection). pub fn parse_use(&mut self) -> Result { // Determine which keywords are recognized by the current dialect let parsed_keyword = if dialect_of!(self is HiveDialect) { @@ -14442,6 +14626,7 @@ impl<'a> Parser<'a> { } } + /// Parse a table factor followed by any join clauses, returning `TableWithJoins`. pub fn parse_table_and_joins(&mut self) -> Result { let relation = self.parse_table_factor()?; // Note that for keywords to be properly handled here, they need to be @@ -15601,6 +15786,7 @@ impl<'a> Parser<'a> { Ok(Some(res)) } + /// Parse a derived table factor (a parenthesized subquery), handling optional LATERAL. pub fn parse_derived_table_factor( &mut self, lateral: IsLateral, @@ -15665,6 +15851,7 @@ impl<'a> Parser<'a> { Ok(ExprWithAlias { expr, alias }) } + /// Parse a PIVOT table factor (ClickHouse/Oracle style pivot), returning a TableFactor. pub fn parse_pivot_table_factor( &mut self, table: TableFactor, @@ -15718,6 +15905,7 @@ impl<'a> Parser<'a> { }) } + /// Parse an UNPIVOT table factor, returning a TableFactor. pub fn parse_unpivot_table_factor( &mut self, table: TableFactor, @@ -15751,6 +15939,7 @@ impl<'a> Parser<'a> { }) } + /// Parse a JOIN constraint (`NATURAL`, `ON `, `USING (...)`, or no constraint). pub fn parse_join_constraint(&mut self, natural: bool) -> Result { if natural { Ok(JoinConstraint::Natural) @@ -15876,6 +16065,7 @@ impl<'a> Parser<'a> { Ok(values) } + /// Parse privileges and optional target objects for GRANT/DENY/REVOKE statements. pub fn parse_grant_deny_revoke_privileges_objects( &mut self, ) -> Result<(Privileges, Option), ParserError> { @@ -16089,6 +16279,7 @@ impl<'a> Parser<'a> { } } + /// Parse a single grantable permission/action (used within GRANT statements). pub fn parse_grant_permission(&mut self) -> Result { fn parse_columns(parser: &mut Parser) -> Result>, ParserError> { let columns = parser.parse_parenthesized_column_list(Optional, false)?; @@ -16340,6 +16531,7 @@ impl<'a> Parser<'a> { } } + /// Parse a grantee name, possibly with a host qualifier (user@host). pub fn parse_grantee_name(&mut self) -> Result { let mut name = self.parse_object_name(false)?; if self.dialect.supports_user_host_grantee() @@ -16636,9 +16828,9 @@ impl<'a> Parser<'a> { } } - // Parses input format clause used for [ClickHouse]. - // - // + /// Parses input format clause used for ClickHouse. + /// + /// pub fn parse_input_format_clause(&mut self) -> Result { let ident = self.parse_identifier()?; let values = self @@ -16674,6 +16866,7 @@ impl<'a> Parser<'a> { } } + /// Parse an optional `PARTITION (...)` clause for INSERT statements. pub fn parse_insert_partition(&mut self) -> Result>, ParserError> { if self.parse_keyword(Keyword::PARTITION) { self.expect_token(&Token::LParen)?; @@ -16685,6 +16878,7 @@ impl<'a> Parser<'a> { } } + /// Parse optional Hive `INPUTFORMAT ... SERDE ...` clause used by LOAD DATA. pub fn parse_load_data_table_format( &mut self, ) -> Result, ParserError> { @@ -16711,6 +16905,7 @@ impl<'a> Parser<'a> { Ok(Box::new(SetExpr::Update(self.parse_update(update_token)?))) } + /// Parse an `UPDATE` statement and return `Statement::Update`. pub fn parse_update(&mut self, update_token: TokenWithSpan) -> Result { let or = self.parse_conflict_clause(); let table = self.parse_table_and_joins()?; @@ -16778,6 +16973,7 @@ impl<'a> Parser<'a> { } } + /// Parse a single function argument, handling named and unnamed variants. pub fn parse_function_args(&mut self) -> Result { let arg = if self.dialect.supports_named_fn_args_with_expr_name() { self.maybe_parse(|p| { @@ -16837,6 +17033,7 @@ impl<'a> Parser<'a> { } } + /// Parse an optional, comma-separated list of function arguments (consumes closing paren). pub fn parse_optional_args(&mut self) -> Result, ParserError> { if self.consume_token(&Token::RParen) { Ok(vec![]) @@ -17222,6 +17419,7 @@ impl<'a> Parser<'a> { Ok(opt_replace) } + /// Parse a single element of a `REPLACE (...)` select-item clause. pub fn parse_replace_elements(&mut self) -> Result { let expr = self.parse_expr()?; let as_keyword = self.parse_keyword(Keyword::AS); @@ -17317,6 +17515,7 @@ impl<'a> Parser<'a> { // Parse a WITH FILL clause (ClickHouse dialect) // that follow the WITH FILL keywords in a ORDER BY clause + /// Parse a `WITH FILL` clause used in ORDER BY (ClickHouse dialect). pub fn parse_with_fill(&mut self) -> Result { let from = if self.parse_keyword(Keyword::FROM) { Some(self.parse_expr()?) @@ -17339,8 +17538,8 @@ impl<'a> Parser<'a> { Ok(WithFill { from, to, step }) } - // Parse a set of comma separated INTERPOLATE expressions (ClickHouse dialect) - // that follow the INTERPOLATE keyword in an ORDER BY clause with the WITH FILL modifier + /// Parse a set of comma separated INTERPOLATE expressions (ClickHouse dialect) + /// that follow the INTERPOLATE keyword in an ORDER BY clause with the WITH FILL modifier pub fn parse_interpolations(&mut self) -> Result, ParserError> { if !self.parse_keyword(Keyword::INTERPOLATE) { return Ok(None); @@ -17360,7 +17559,7 @@ impl<'a> Parser<'a> { Ok(Some(Interpolate { exprs: None })) } - // Parse a INTERPOLATE expression (ClickHouse dialect) + /// Parse a INTERPOLATE expression (ClickHouse dialect) pub fn parse_interpolation(&mut self) -> Result { let column = self.parse_identifier()?; let expr = if self.parse_keyword(Keyword::AS) { @@ -17477,6 +17676,7 @@ impl<'a> Parser<'a> { }) } + /// Parse a VALUES clause pub fn parse_values( &mut self, allow_empty: bool, @@ -17506,6 +17706,7 @@ impl<'a> Parser<'a> { }) } + /// Parse a 'START TRANSACTION' statement pub fn parse_start_transaction(&mut self) -> Result { self.expect_keyword_is(Keyword::TRANSACTION)?; Ok(Statement::StartTransaction { @@ -17519,6 +17720,7 @@ impl<'a> Parser<'a> { }) } + /// Parse a 'BEGIN' statement pub fn parse_begin(&mut self) -> Result { let modifier = if !self.dialect.supports_start_transaction_modifier() { None @@ -17551,6 +17753,7 @@ impl<'a> Parser<'a> { }) } + /// Parse a 'BEGIN ... EXCEPTION ... END' block pub fn parse_begin_exception_end(&mut self) -> Result { let statements = self.parse_statement_list(&[Keyword::EXCEPTION, Keyword::END])?; @@ -17596,6 +17799,7 @@ impl<'a> Parser<'a> { }) } + /// Parse an 'END' statement pub fn parse_end(&mut self) -> Result { let modifier = if !self.dialect.supports_end_transaction_modifier() { None @@ -17613,6 +17817,7 @@ impl<'a> Parser<'a> { }) } + /// Parse a list of transaction modes pub fn parse_transaction_modes(&mut self) -> Result, ParserError> { let mut modes = vec![]; let mut required = false; @@ -17651,6 +17856,7 @@ impl<'a> Parser<'a> { Ok(modes) } + /// Parse a 'COMMIT' statement pub fn parse_commit(&mut self) -> Result { Ok(Statement::Commit { chain: self.parse_commit_rollback_chain()?, @@ -17659,6 +17865,7 @@ impl<'a> Parser<'a> { }) } + /// Parse a 'ROLLBACK' statement pub fn parse_rollback(&mut self) -> Result { let chain = self.parse_commit_rollback_chain()?; let savepoint = self.parse_rollback_savepoint()?; @@ -17666,6 +17873,7 @@ impl<'a> Parser<'a> { Ok(Statement::Rollback { chain, savepoint }) } + /// Parse an optional `AND [NO] CHAIN` clause for `COMMIT` and `ROLLBACK` statements pub fn parse_commit_rollback_chain(&mut self) -> Result { let _ = self.parse_one_of_keywords(&[Keyword::TRANSACTION, Keyword::WORK]); if self.parse_keyword(Keyword::AND) { @@ -17677,6 +17885,7 @@ impl<'a> Parser<'a> { } } + /// Parse an optional 'TO SAVEPOINT savepoint_name' clause for ROLLBACK statements pub fn parse_rollback_savepoint(&mut self) -> Result, ParserError> { if self.parse_keyword(Keyword::TO) { let _ = self.parse_keyword(Keyword::SAVEPOINT); @@ -17716,6 +17925,7 @@ impl<'a> Parser<'a> { }) } + /// Parse a single `RAISERROR` option pub fn parse_raiserror_option(&mut self) -> Result { match self.expect_one_of_keywords(&[Keyword::LOG, Keyword::NOWAIT, Keyword::SETERROR])? { Keyword::LOG => Ok(RaisErrorOption::Log), @@ -17728,12 +17938,14 @@ impl<'a> Parser<'a> { } } + /// Parse a SQL `DEALLOCATE` statement pub fn parse_deallocate(&mut self) -> Result { let prepare = self.parse_keyword(Keyword::PREPARE); let name = self.parse_identifier()?; Ok(Statement::Deallocate { name, prepare }) } + /// Parse a SQL `EXECUTE` statement pub fn parse_execute(&mut self) -> Result { let name = if self.dialect.supports_execute_immediate() && self.parse_keyword(Keyword::IMMEDIATE) @@ -17792,6 +18004,7 @@ impl<'a> Parser<'a> { }) } + /// Parse a SQL `PREPARE` statement pub fn parse_prepare(&mut self) -> Result { let name = self.parse_identifier()?; @@ -17810,6 +18023,7 @@ impl<'a> Parser<'a> { }) } + /// Parse a SQL `UNLOAD` statement pub fn parse_unload(&mut self) -> Result { self.expect_keyword(Keyword::UNLOAD)?; self.expect_token(&Token::LParen)?; @@ -17872,7 +18086,7 @@ impl<'a> Parser<'a> { } } - // PRAGMA [schema-name '.'] pragma-name [('=' pragma-value) | '(' pragma-value ')'] + /// PRAGMA [schema-name '.'] pragma-name [('=' pragma-value) | '(' pragma-value ')'] pub fn parse_pragma(&mut self) -> Result { let name = self.parse_object_name(false)?; if self.consume_token(&Token::LParen) { @@ -18104,6 +18318,7 @@ impl<'a> Parser<'a> { self.index } + /// Parse a named window definition. pub fn parse_named_window(&mut self) -> Result { let ident = self.parse_identifier()?; self.expect_keyword_is(Keyword::AS)?; @@ -18119,6 +18334,7 @@ impl<'a> Parser<'a> { Ok(NamedWindowDefinition(ident, window_expr)) } + /// Parse `CREATE PROCEDURE` statement. pub fn parse_create_procedure(&mut self, or_alter: bool) -> Result { let name = self.parse_object_name(false)?; let params = self.parse_optional_procedure_parameters()?; @@ -18142,6 +18358,7 @@ impl<'a> Parser<'a> { }) } + /// Parse a window specification. pub fn parse_window_spec(&mut self) -> Result { let window_name = match self.peek_token().token { Token::Word(word) if word.keyword == Keyword::NoKeyword => { @@ -18176,6 +18393,7 @@ impl<'a> Parser<'a> { }) } + /// Parse `CREATE TYPE` statement. pub fn parse_create_type(&mut self) -> Result { let name = self.parse_object_name(false)?; @@ -18891,6 +19109,7 @@ fn maybe_prefixed_expr(expr: Expr, prefix: Option) -> Expr { impl Word { #[deprecated(since = "0.54.0", note = "please use `into_ident` instead")] + /// Convert this word into an [`Ident`] identifier pub fn to_ident(&self, span: Span) -> Ident { Ident { value: self.value.clone(), diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 8666563ace..eb935a4f28 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -401,10 +401,17 @@ impl fmt::Display for Token { } impl Token { + /// Create a `Token::Word` from an unquoted `keyword`. + /// + /// The lookup is case-insensitive; unknown values become `Keyword::NoKeyword`. pub fn make_keyword(keyword: &str) -> Self { Token::make_word(keyword, None) } + /// Create a `Token::Word` from `word` with an optional `quote_style`. + /// + /// When `quote_style` is `None`, the parser attempts a case-insensitive keyword + /// lookup and sets the `Word::keyword` accordingly. pub fn make_word(word: &str, quote_style: Option) -> Self { let word_uppercase = word.to_uppercase(); Token::Word(Word { @@ -460,14 +467,27 @@ impl Word { } } +/// Represents whitespace in the input: spaces, newlines, tabs and comments. #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum Whitespace { + /// A single space character. Space, + /// A newline character. Newline, + /// A tab character. Tab, - SingleLineComment { comment: String, prefix: String }, + /// A single-line comment (e.g. `-- comment` or `# comment`). + /// The `comment` field contains the text, and `prefix` contains the comment prefix. + SingleLineComment { + /// The content of the comment (without the prefix). + comment: String, + /// The prefix used for the comment (for example `--` or `#`). + prefix: String, + }, + + /// A multi-line comment (without the `/* ... */` delimiters). MultiLineComment(String), } @@ -569,7 +589,9 @@ impl From<(u64, u64)> for Location { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct Span { + /// Start `Location` (inclusive). pub start: Location, + /// End `Location` (inclusive). pub end: Location, } @@ -691,8 +713,11 @@ pub type TokenWithLocation = TokenWithSpan; #[derive(Debug, Clone, Hash, Ord, PartialOrd, Eq, PartialEq)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// A `Token` together with its `Span` (location in the source). pub struct TokenWithSpan { + /// The token value. pub token: Token, + /// The span covering the token in the input. pub span: Span, } @@ -736,10 +761,12 @@ impl fmt::Display for TokenWithSpan { } } -/// Tokenizer error +/// An error reported by the tokenizer, with a human-readable `message` and a `location`. #[derive(Debug, PartialEq, Eq)] pub struct TokenizerError { + /// A descriptive error message. pub message: String, + /// The `Location` where the error was detected. pub location: Location, } @@ -754,8 +781,8 @@ impl std::error::Error for TokenizerError {} struct State<'a> { peekable: Peekable>, - pub line: u64, - pub col: u64, + line: u64, + col: u64, } impl State<'_> { @@ -780,6 +807,7 @@ impl State<'_> { self.peekable.peek() } + /// Return the current `Location` (line and column) pub fn location(&self) -> Location { Location { line: self.line, diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 24707604ad..325e3939e7 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -7604,6 +7604,156 @@ fn parse_alter_operator_family() { .is_err()); } +#[test] +fn parse_alter_operator_class() { + // Test ALTER OPERATOR CLASS ... RENAME TO + let sql = "ALTER OPERATOR CLASS int_ops USING btree RENAME TO integer_ops"; + assert_eq!( + pg_and_generic().verified_stmt(sql), + Statement::AlterOperatorClass(AlterOperatorClass { + name: ObjectName::from(vec![Ident::new("int_ops")]), + using: Ident::new("btree"), + operation: AlterOperatorClassOperation::RenameTo { + new_name: ObjectName::from(vec![Ident::new("integer_ops")]), + }, + }) + ); + + // Test ALTER OPERATOR CLASS ... OWNER TO + let sql = "ALTER OPERATOR CLASS int_ops USING btree OWNER TO joe"; + assert_eq!( + pg_and_generic().verified_stmt(sql), + Statement::AlterOperatorClass(AlterOperatorClass { + name: ObjectName::from(vec![Ident::new("int_ops")]), + using: Ident::new("btree"), + operation: AlterOperatorClassOperation::OwnerTo(Owner::Ident(Ident::new("joe"))), + }) + ); + + // Test ALTER OPERATOR CLASS ... OWNER TO CURRENT_USER + let sql = "ALTER OPERATOR CLASS int_ops USING btree OWNER TO CURRENT_USER"; + assert_eq!( + pg_and_generic().verified_stmt(sql), + Statement::AlterOperatorClass(AlterOperatorClass { + name: ObjectName::from(vec![Ident::new("int_ops")]), + using: Ident::new("btree"), + operation: AlterOperatorClassOperation::OwnerTo(Owner::CurrentUser), + }) + ); + + // Test ALTER OPERATOR CLASS ... SET SCHEMA + let sql = "ALTER OPERATOR CLASS int_ops USING btree SET SCHEMA new_schema"; + assert_eq!( + pg_and_generic().verified_stmt(sql), + Statement::AlterOperatorClass(AlterOperatorClass { + name: ObjectName::from(vec![Ident::new("int_ops")]), + using: Ident::new("btree"), + operation: AlterOperatorClassOperation::SetSchema { + schema_name: ObjectName::from(vec![Ident::new("new_schema")]), + }, + }) + ); + + // Test with schema-qualified operator class name + let sql = "ALTER OPERATOR CLASS myschema.int_ops USING btree RENAME TO integer_ops"; + assert_eq!( + pg_and_generic().verified_stmt(sql), + Statement::AlterOperatorClass(AlterOperatorClass { + name: ObjectName::from(vec![Ident::new("myschema"), Ident::new("int_ops")]), + using: Ident::new("btree"), + operation: AlterOperatorClassOperation::RenameTo { + new_name: ObjectName::from(vec![Ident::new("integer_ops")]), + }, + }) + ); + + // Test with different index methods + for index_method in &["hash", "gist", "gin", "spgist", "brin"] { + let sql = format!( + "ALTER OPERATOR CLASS int_ops USING {} RENAME TO integer_ops", + index_method + ); + pg_and_generic().verified_stmt(&sql); + } + + // Test error cases + // Missing USING clause + assert!(pg() + .parse_sql_statements("ALTER OPERATOR CLASS int_ops RENAME TO integer_ops") + .is_err()); + + // Invalid operation + assert!(pg() + .parse_sql_statements("ALTER OPERATOR CLASS int_ops USING btree INVALID_OPERATION") + .is_err()); + + // Missing new name for RENAME TO + assert!(pg() + .parse_sql_statements("ALTER OPERATOR CLASS int_ops USING btree RENAME TO") + .is_err()); + + // Missing owner for OWNER TO + assert!(pg() + .parse_sql_statements("ALTER OPERATOR CLASS int_ops USING btree OWNER TO") + .is_err()); + + // Missing schema for SET SCHEMA + assert!(pg() + .parse_sql_statements("ALTER OPERATOR CLASS int_ops USING btree SET SCHEMA") + .is_err()); + + // Invalid new name + assert!(pg() + .parse_sql_statements("ALTER OPERATOR CLASS int_ops USING btree RENAME TO 123invalid") + .is_err()); + + // Invalid owner + assert!(pg() + .parse_sql_statements("ALTER OPERATOR CLASS int_ops USING btree OWNER TO 123invalid") + .is_err()); + + // Invalid schema name + assert!(pg() + .parse_sql_statements("ALTER OPERATOR CLASS int_ops USING btree SET SCHEMA 123invalid") + .is_err()); + + // Missing operator class name + assert!(pg() + .parse_sql_statements("ALTER OPERATOR CLASS USING btree RENAME TO integer_ops") + .is_err()); + + // Extra tokens at end + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR CLASS int_ops USING btree RENAME TO integer_ops EXTRA" + ) + .is_err()); + + // Missing index method + assert!(pg() + .parse_sql_statements("ALTER OPERATOR CLASS int_ops RENAME TO integer_ops") + .is_err()); + + // Invalid index method + assert!(pg() + .parse_sql_statements("ALTER OPERATOR CLASS int_ops USING 123invalid RENAME TO integer_ops") + .is_err()); + + // Trying to use ADD operation (only valid for OPERATOR FAMILY) + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR CLASS int_ops USING btree ADD OPERATOR 1 < (INT4, INT2)" + ) + .is_err()); + + // Trying to use DROP operation (only valid for OPERATOR FAMILY) + assert!(pg() + .parse_sql_statements( + "ALTER OPERATOR CLASS int_ops USING btree DROP OPERATOR 1 (INT4, INT2)" + ) + .is_err()); +} + #[test] fn parse_drop_operator_family() { for if_exists in [true, false] { From 44c402385e15e0d96c68d151dcb8019fe31687f8 Mon Sep 17 00:00:00 2001 From: Luca Cappelletti Date: Sat, 10 Jan 2026 11:29:29 +0100 Subject: [PATCH 026/121] Added missing `Copy` derives (#2158) --- src/ast/comments.rs | 6 +++--- src/ast/data_type.rs | 2 +- src/ast/ddl.rs | 14 +++++++------- src/ast/dml.rs | 2 +- src/ast/mod.rs | 2 +- src/ast/query.rs | 18 +++++++++--------- src/ast/value.rs | 2 +- 7 files changed, 23 insertions(+), 23 deletions(-) diff --git a/src/ast/comments.rs b/src/ast/comments.rs index b398474b38..7744c89e2c 100644 --- a/src/ast/comments.rs +++ b/src/ast/comments.rs @@ -25,7 +25,7 @@ use core::{ use crate::tokenizer::{Location, Span}; /// An opaque container for comments from a parse SQL source code. -#[derive(Default, Debug)] +#[derive(Default, Debug, Clone)] pub struct Comments(Vec); impl Comments { @@ -151,7 +151,7 @@ impl From for Vec { } /// A source code comment with information of its entire span. -#[derive(Debug, Clone, PartialEq, Eq)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct CommentWithSpan { /// The source code comment iself pub comment: Comment, @@ -168,7 +168,7 @@ impl Deref for CommentWithSpan { } /// A unified type of the different source code comment formats. -#[derive(Debug, Clone, PartialEq, Eq)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub enum Comment { /// A single line comment, typically introduced with a prefix and spanning /// until end-of-line or end-of-file in the source code. diff --git a/src/ast/data_type.rs b/src/ast/data_type.rs index 535a52323b..285eec5054 100644 --- a/src/ast/data_type.rs +++ b/src/ast/data_type.rs @@ -894,7 +894,7 @@ fn format_clickhouse_datetime_precision_and_timezone( } /// Type of brackets used for `STRUCT` literals. -#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[derive(Debug, Clone, Copy, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum StructBracketKind { diff --git a/src/ast/ddl.rs b/src/ast/ddl.rs index 6c83144211..23fcc01018 100644 --- a/src/ast/ddl.rs +++ b/src/ast/ddl.rs @@ -570,7 +570,7 @@ impl fmt::Display for AlterPolicyOperation { /// [MySQL] `ALTER TABLE` algorithm. /// /// [MySQL]: https://dev.mysql.com/doc/refman/8.4/en/alter-table.html -#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[derive(Debug, Clone, Copy, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] /// Algorithm option for `ALTER TABLE` operations (MySQL-specific). @@ -599,7 +599,7 @@ impl fmt::Display for AlterTableAlgorithm { /// [MySQL] `ALTER TABLE` lock. /// /// [MySQL]: https://dev.mysql.com/doc/refman/8.4/en/alter-table.html -#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[derive(Debug, Clone, Copy, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] /// Locking behavior for `ALTER TABLE` (MySQL-specific). @@ -1445,7 +1445,7 @@ impl fmt::Display for IndexOption { /// [PostgreSQL] unique index nulls handling option: `[ NULLS [ NOT ] DISTINCT ]` /// /// [PostgreSQL]: https://www.postgresql.org/docs/17/sql-altertable.html -#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[derive(Debug, Clone, Copy, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum NullsDistinctOption { @@ -1756,7 +1756,7 @@ pub struct IdentityParameters { /// ORDER | NOORDER /// ``` /// [Snowflake]: https://docs.snowflake.com/en/sql-reference/sql/create-table -#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[derive(Debug, Clone, Copy, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum IdentityPropertyOrder { @@ -2128,7 +2128,7 @@ impl fmt::Display for ColumnOption { /// `GeneratedAs`s are modifiers that follow a column option in a `generated`. /// 'ExpStored' is used for a column generated from an expression and stored. -#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[derive(Debug, Clone, Copy, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum GeneratedAs { @@ -2142,7 +2142,7 @@ pub enum GeneratedAs { /// `GeneratedExpressionMode`s are modifiers that follow an expression in a `generated`. /// No modifier is typically the same as Virtual. -#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[derive(Debug, Clone, Copy, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum GeneratedExpressionMode { @@ -3799,7 +3799,7 @@ impl Spanned for RenameTableNameKind { } } -#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[derive(Debug, Clone, Copy, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] /// Whether the syntax used for the trigger object (ROW or STATEMENT) is `FOR` or `FOR EACH`. diff --git a/src/ast/dml.rs b/src/ast/dml.rs index 150a548920..32c023e057 100644 --- a/src/ast/dml.rs +++ b/src/ast/dml.rs @@ -401,7 +401,7 @@ impl Display for MergeClause { /// ``` /// [Snowflake](https://docs.snowflake.com/en/sql-reference/sql/merge) /// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#merge_statement) -#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[derive(Debug, Clone, Copy, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum MergeClauseKind { diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 9115bb29d9..7e0f1a1049 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -8526,7 +8526,7 @@ impl fmt::Display for TableOptionsClustered { } /// Specifies which partition the boundary values on table partitioning belongs to. -#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[derive(Debug, Clone, Copy, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum PartitionRangeDirection { diff --git a/src/ast/query.rs b/src/ast/query.rs index 86f3d13b70..d8af243f7d 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -322,7 +322,7 @@ impl fmt::Display for Table { } /// What did this select look like? -#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[derive(Debug, Clone, Copy, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum SelectFlavor { @@ -649,7 +649,7 @@ impl fmt::Display for With { } } -#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[derive(Debug, Clone, Copy, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] /// Indicates whether a CTE is materialized or not. @@ -1185,7 +1185,7 @@ pub struct TableFunctionArgs { pub settings: Option>, } -#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[derive(Debug, Clone, Copy, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] /// Type of index hint (e.g., `USE`, `IGNORE`, `FORCE`). @@ -1208,7 +1208,7 @@ impl fmt::Display for TableIndexHintType { } } -#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[derive(Debug, Clone, Copy, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] /// The kind of index referenced by an index hint (e.g. `USE INDEX`). @@ -1228,7 +1228,7 @@ impl fmt::Display for TableIndexType { } } -#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[derive(Debug, Clone, Copy, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] /// Which clause the table index hint applies to. @@ -1579,7 +1579,7 @@ pub struct TableSample { pub offset: Option, } -#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[derive(Debug, Clone, Copy, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] /// Modifier specifying whether `SAMPLE` or `TABLESAMPLE` keyword was used. @@ -1630,7 +1630,7 @@ impl fmt::Display for TableSampleQuantity { } /// The table sample method names -#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[derive(Debug, Clone, Copy, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] /// Sampling method used by `TABLESAMPLE`. @@ -1674,7 +1674,7 @@ impl fmt::Display for TableSampleSeed { } } -#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[derive(Debug, Clone, Copy, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] /// Modifier specifying how the sample seed is applied. @@ -1694,7 +1694,7 @@ impl fmt::Display for TableSampleSeedModifier { } } -#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[derive(Debug, Clone, Copy, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] /// Unit used with a `TABLESAMPLE` quantity (rows or percent). diff --git a/src/ast/value.rs b/src/ast/value.rs index dc46a5bbb9..8879a252b6 100644 --- a/src/ast/value.rs +++ b/src/ast/value.rs @@ -484,7 +484,7 @@ impl fmt::Display for DateTimeField { } } -#[derive(Debug, Clone, PartialEq, Eq, Ord, PartialOrd, Hash)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Ord, PartialOrd, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] /// The Unicode Standard defines four normalization forms, which are intended to eliminate From ee3b6223e673b9c0f737d649a7f934de0186bc64 Mon Sep 17 00:00:00 2001 From: Louis Vialar Date: Tue, 13 Jan 2026 11:18:32 +0100 Subject: [PATCH 027/121] Tokenize empty line comments correctly (#2161) --- src/tokenizer.rs | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/src/tokenizer.rs b/src/tokenizer.rs index eb935a4f28..a9f9fb4436 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -1387,7 +1387,11 @@ impl<'a> Tokenizer<'a> { Some('-') => { let mut is_comment = true; if self.dialect.requires_single_line_comment_whitespace() { - is_comment = Some(' ') == chars.peekable.clone().nth(1); + is_comment = chars + .peekable + .clone() + .nth(1) + .is_some_and(char::is_whitespace); } if is_comment { @@ -4069,6 +4073,24 @@ mod tests { Token::Minus, ], ); + + all_dialects_where(|d| d.requires_single_line_comment_whitespace()).tokenizes_to( + "--\n-- Table structure for table...\n--\n", + vec![ + Token::Whitespace(Whitespace::SingleLineComment { + prefix: "--".to_string(), + comment: "\n".to_string(), + }), + Token::Whitespace(Whitespace::SingleLineComment { + prefix: "--".to_string(), + comment: " Table structure for table...\n".to_string(), + }), + Token::Whitespace(Whitespace::SingleLineComment { + prefix: "--".to_string(), + comment: "\n".to_string(), + }), + ], + ); } #[test] From 3880a933e4c2d1e639c1b8bcb48bea34c37801fd Mon Sep 17 00:00:00 2001 From: Ophir LOJKINE Date: Tue, 13 Jan 2026 21:50:08 +0100 Subject: [PATCH 028/121] Add support for DuckDB `LAMBDA` keyword syntax (#2149) Co-authored-by: Claude Opus 4.5 Co-authored-by: Ifeanyi Ubah --- src/ast/mod.rs | 35 +++++++++++++++++++++++++++++- src/dialect/generic.rs | 4 ++++ src/keywords.rs | 1 + src/parser/mod.rs | 41 +++++++++++++++++++++++++++++++++++ tests/sqlparser_common.rs | 3 ++- tests/sqlparser_databricks.rs | 6 +++-- tests/sqlparser_duckdb.rs | 19 ++++++++++++++++ 7 files changed, 105 insertions(+), 4 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 7e0f1a1049..35a62ab766 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -1415,14 +1415,47 @@ pub struct LambdaFunction { pub params: OneOrManyWithParens, /// The body of the lambda function. pub body: Box, + /// The syntax style used to write the lambda function. + pub syntax: LambdaSyntax, } impl fmt::Display for LambdaFunction { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!(f, "{} -> {}", self.params, self.body) + match self.syntax { + LambdaSyntax::Arrow => write!(f, "{} -> {}", self.params, self.body), + LambdaSyntax::LambdaKeyword => { + // For lambda keyword syntax, display params without parentheses + // e.g., `lambda x, y : expr` not `lambda (x, y) : expr` + write!(f, "lambda ")?; + match &self.params { + OneOrManyWithParens::One(p) => write!(f, "{p}")?, + OneOrManyWithParens::Many(ps) => write!(f, "{}", display_comma_separated(ps))?, + }; + write!(f, " : {}", self.body) + } + } } } +/// The syntax style for a lambda function. +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash, Copy)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum LambdaSyntax { + /// Arrow syntax: `param -> expr` or `(param1, param2) -> expr` + /// + /// + /// + /// Supported, but deprecated in DuckDB: + /// + Arrow, + /// Lambda keyword syntax: `lambda param : expr` or `lambda param1, param2 : expr` + /// + /// Recommended in DuckDB: + /// + LambdaKeyword, +} + /// Encapsulates the common pattern in SQL where either one unparenthesized item /// such as an identifier or expression is permitted, or multiple of the same /// item in a parenthesized list. For accessing items regardless of the form, diff --git a/src/dialect/generic.rs b/src/dialect/generic.rs index f3a0903a40..da57253d67 100644 --- a/src/dialect/generic.rs +++ b/src/dialect/generic.rs @@ -203,4 +203,8 @@ impl Dialect for GenericDialect { fn supports_quote_delimited_string(&self) -> bool { true } + + fn supports_lambda_functions(&self) -> bool { + true + } } diff --git a/src/keywords.rs b/src/keywords.rs index 77207283c2..964e4b388d 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -555,6 +555,7 @@ define_keywords!( KEY_BLOCK_SIZE, KILL, LAG, + LAMBDA, LANGUAGE, LARGE, LAST, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 8001611e00..64b6539102 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1548,6 +1548,9 @@ impl<'a> Parser<'a> { Keyword::MAP if *self.peek_token_ref() == Token::LBrace && self.dialect.support_map_literal_syntax() => { Ok(Some(self.parse_duckdb_map_literal()?)) } + Keyword::LAMBDA if self.dialect.supports_lambda_functions() => { + Ok(Some(self.parse_lambda_expr()?)) + } _ if self.dialect.supports_geometric_types() => match w.keyword { Keyword::CIRCLE => Ok(Some(self.parse_geometric_type(GeometricTypeKind::Circle)?)), Keyword::BOX => Ok(Some(self.parse_geometric_type(GeometricTypeKind::GeometricBox)?)), @@ -1600,6 +1603,7 @@ impl<'a> Parser<'a> { Ok(Expr::Lambda(LambdaFunction { params: OneOrManyWithParens::One(w.clone().into_ident(w_span)), body: Box::new(self.parse_expr()?), + syntax: LambdaSyntax::Arrow, })) } _ => Ok(Expr::Identifier(w.clone().into_ident(w_span))), @@ -2141,10 +2145,47 @@ impl<'a> Parser<'a> { Ok(Expr::Lambda(LambdaFunction { params: OneOrManyWithParens::Many(params), body: Box::new(expr), + syntax: LambdaSyntax::Arrow, })) }) } + /// Parses a lambda expression using the `LAMBDA` keyword syntax. + /// + /// Syntax: `LAMBDA : ` + /// + /// Examples: + /// - `LAMBDA x : x + 1` + /// - `LAMBDA x, i : x > i` + /// + /// See + fn parse_lambda_expr(&mut self) -> Result { + // Parse the parameters: either a single identifier or comma-separated identifiers + let params = if self.consume_token(&Token::LParen) { + // Parenthesized parameters: (x, y) + let params = self.parse_comma_separated(|p| p.parse_identifier())?; + self.expect_token(&Token::RParen)?; + OneOrManyWithParens::Many(params) + } else { + // Unparenthesized parameters: x or x, y + let params = self.parse_comma_separated(|p| p.parse_identifier())?; + if params.len() == 1 { + OneOrManyWithParens::One(params.into_iter().next().unwrap()) + } else { + OneOrManyWithParens::Many(params) + } + }; + // Expect the colon separator + self.expect_token(&Token::Colon)?; + // Parse the body expression + let body = self.parse_expr()?; + Ok(Expr::Lambda(LambdaFunction { + params, + body: Box::new(body), + syntax: LambdaSyntax::LambdaKeyword, + })) + } + /// Tries to parse the body of an [ODBC escaping sequence] /// i.e. without the enclosing braces /// Currently implemented: diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 208a56e236..c7a1981e94 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -15678,7 +15678,8 @@ fn test_lambdas() { }, ], else_result: Some(Box::new(Expr::value(number("1")))), - }) + }), + syntax: LambdaSyntax::Arrow, }) ] )), diff --git a/tests/sqlparser_databricks.rs b/tests/sqlparser_databricks.rs index 9a9a73fe6b..9064c8dc34 100644 --- a/tests/sqlparser_databricks.rs +++ b/tests/sqlparser_databricks.rs @@ -73,7 +73,8 @@ fn test_databricks_exists() { ), Expr::Lambda(LambdaFunction { params: OneOrManyWithParens::One(Ident::new("x")), - body: Box::new(Expr::IsNull(Box::new(Expr::Identifier(Ident::new("x"))))) + body: Box::new(Expr::IsNull(Box::new(Expr::Identifier(Ident::new("x"))))), + syntax: LambdaSyntax::Arrow, }) ] ), @@ -141,7 +142,8 @@ fn test_databricks_lambdas() { }, ], else_result: Some(Box::new(Expr::value(number("1")))) - }) + }), + syntax: LambdaSyntax::Arrow, }) ] )), diff --git a/tests/sqlparser_duckdb.rs b/tests/sqlparser_duckdb.rs index 4a2f29e151..80a15eb11f 100644 --- a/tests/sqlparser_duckdb.rs +++ b/tests/sqlparser_duckdb.rs @@ -872,3 +872,22 @@ fn parse_extract_single_quotes() { let sql = "SELECT EXTRACT('month' FROM my_timestamp) FROM my_table"; duckdb().verified_stmt(sql); } + +#[test] +fn test_duckdb_lambda_function() { + // Test basic lambda with list_filter + let sql = "SELECT [3, 4, 5, 6].list_filter(lambda x : x > 4)"; + duckdb_and_generic().verified_stmt(sql); + + // Test lambda with arrow syntax (also supported by DuckDB) + let sql_arrow = "SELECT list_filter([1, 2, 3], x -> x > 1)"; + duckdb_and_generic().verified_stmt(sql_arrow); + + // Test lambda with multiple parameters (with index) + let sql_multi = "SELECT list_filter([1, 3, 1, 5], lambda x, i : x > i)"; + duckdb_and_generic().verified_stmt(sql_multi); + + // Test lambda in list_transform + let sql_transform = "SELECT list_transform([1, 2, 3], lambda x : x * 2)"; + duckdb_and_generic().verified_stmt(sql_transform); +} From 6daa46db7e595b585bed8ae35fbceefa3448e596 Mon Sep 17 00:00:00 2001 From: Yoav Cohen <59807311+yoavcloud@users.noreply.github.com> Date: Wed, 14 Jan 2026 12:41:05 +0100 Subject: [PATCH 029/121] MySQL: Add support for casting using the BINARY keyword (#2146) --- src/dialect/mod.rs | 6 ++++++ src/dialect/mysql.rs | 6 ++++++ src/parser/mod.rs | 9 +++++++++ tests/sqlparser_common.rs | 6 ++++++ 4 files changed, 27 insertions(+) diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index 9e6c1859f1..873108ee9a 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -1237,6 +1237,12 @@ pub trait Dialect: Debug + Any { fn supports_double_ampersand_operator(&self) -> bool { false } + + /// Returns true if the dialect supports casting an expression to a binary type + /// using the `BINARY ` syntax. + fn supports_binary_kw_as_cast(&self) -> bool { + false + } } /// Operators for which precedence must be defined. diff --git a/src/dialect/mysql.rs b/src/dialect/mysql.rs index 60385c5bce..81aa9d445a 100644 --- a/src/dialect/mysql.rs +++ b/src/dialect/mysql.rs @@ -176,6 +176,12 @@ impl Dialect for MySqlDialect { fn supports_double_ampersand_operator(&self) -> bool { true } + + /// Deprecated functionality by MySQL but still supported + /// See: + fn supports_binary_kw_as_cast(&self) -> bool { + true + } } /// `LOCK TABLES` diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 64b6539102..4cee5c33e2 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1645,6 +1645,15 @@ impl<'a> Parser<'a> { // an unary negation `NOT ('a' LIKE 'b')`. To solve this, we don't accept the // `type 'string'` syntax for the custom data types at all. DataType::Custom(..) => parser_err!("dummy", loc), + // MySQL supports using the `BINARY` keyword as a cast to binary type. + DataType::Binary(..) if self.dialect.supports_binary_kw_as_cast() => { + Ok(Expr::Cast { + kind: CastKind::Cast, + expr: Box::new(parser.parse_expr()?), + data_type: DataType::Binary(None), + format: None, + }) + } data_type => Ok(Expr::TypedString(TypedString { data_type, value: parser.parse_value()?, diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index c7a1981e94..95ad9a209b 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -18061,3 +18061,9 @@ fn test_parse_key_value_options_trailing_semicolon() { "CREATE USER u1 option1='value1' option2='value2'", ); } + +#[test] +fn test_binary_kw_as_cast() { + all_dialects_where(|d| d.supports_binary_kw_as_cast()) + .one_statement_parses_to("SELECT BINARY 1+1", "SELECT CAST(1 + 1 AS BINARY)"); +} From c4d9e39848b7988bf2cf1a7d9d4b415ec8b76dc2 Mon Sep 17 00:00:00 2001 From: Luca Cappelletti Date: Fri, 16 Jan 2026 11:51:02 +0100 Subject: [PATCH 030/121] Added missing `From` impls for `Statement` variants (#2160) --- src/ast/helpers/stmt_create_table.rs | 178 +++++++------------- src/ast/mod.rs | 66 ++++++++ src/dialect/snowflake.rs | 22 ++- src/parser/alter.rs | 6 +- src/parser/merge.rs | 12 +- src/parser/mod.rs | 243 +++++++++++++-------------- 6 files changed, 275 insertions(+), 252 deletions(-) diff --git a/src/ast/helpers/stmt_create_table.rs b/src/ast/helpers/stmt_create_table.rs index 94af03481c..e63c90dbcf 100644 --- a/src/ast/helpers/stmt_create_table.rs +++ b/src/ast/helpers/stmt_create_table.rs @@ -504,8 +504,8 @@ impl CreateTableBuilder { self.require_user = require_user; self } - /// Consume the builder and produce a `Statement::CreateTable`. - pub fn build(self) -> Statement { + /// Consume the builder and produce a `CreateTable`. + pub fn build(self) -> CreateTable { CreateTable { or_replace: self.or_replace, temporary: self.temporary, @@ -561,7 +561,6 @@ impl CreateTableBuilder { initialize: self.initialize, require_user: self.require_user, } - .into() } } @@ -572,115 +571,7 @@ impl TryFrom for CreateTableBuilder { // ownership. fn try_from(stmt: Statement) -> Result { match stmt { - Statement::CreateTable(CreateTable { - or_replace, - temporary, - external, - global, - if_not_exists, - transient, - volatile, - iceberg, - dynamic, - name, - columns, - constraints, - hive_distribution, - hive_formats, - file_format, - location, - query, - without_rowid, - like, - clone, - version, - comment, - on_commit, - on_cluster, - primary_key, - order_by, - partition_by, - cluster_by, - clustered_by, - inherits, - partition_of, - for_values, - strict, - copy_grants, - enable_schema_evolution, - change_tracking, - data_retention_time_in_days, - max_data_extension_time_in_days, - default_ddl_collation, - with_aggregation_policy, - with_row_access_policy, - with_tags, - base_location, - external_volume, - catalog, - catalog_sync, - storage_serialization_policy, - table_options, - target_lag, - warehouse, - refresh_mode, - initialize, - require_user, - }) => Ok(Self { - or_replace, - temporary, - external, - global, - if_not_exists, - transient, - dynamic, - name, - columns, - constraints, - hive_distribution, - hive_formats, - file_format, - location, - query, - without_rowid, - like, - clone, - version, - comment, - on_commit, - on_cluster, - primary_key, - order_by, - partition_by, - cluster_by, - clustered_by, - inherits, - partition_of, - for_values, - strict, - iceberg, - copy_grants, - enable_schema_evolution, - change_tracking, - data_retention_time_in_days, - max_data_extension_time_in_days, - default_ddl_collation, - with_aggregation_policy, - with_row_access_policy, - with_tags, - volatile, - base_location, - external_volume, - catalog, - catalog_sync, - storage_serialization_policy, - table_options, - target_lag, - warehouse, - refresh_mode, - initialize, - require_user, - }), + Statement::CreateTable(create_table) => Ok(create_table.into()), _ => Err(ParserError::ParserError(format!( "Expected create table statement, but received: {stmt}" ))), @@ -688,6 +579,66 @@ impl TryFrom for CreateTableBuilder { } } +impl From for CreateTableBuilder { + fn from(table: CreateTable) -> Self { + Self { + or_replace: table.or_replace, + temporary: table.temporary, + external: table.external, + global: table.global, + if_not_exists: table.if_not_exists, + transient: table.transient, + volatile: table.volatile, + iceberg: table.iceberg, + dynamic: table.dynamic, + name: table.name, + columns: table.columns, + constraints: table.constraints, + hive_distribution: table.hive_distribution, + hive_formats: table.hive_formats, + file_format: table.file_format, + location: table.location, + query: table.query, + without_rowid: table.without_rowid, + like: table.like, + clone: table.clone, + version: table.version, + comment: table.comment, + on_commit: table.on_commit, + on_cluster: table.on_cluster, + primary_key: table.primary_key, + order_by: table.order_by, + partition_by: table.partition_by, + cluster_by: table.cluster_by, + clustered_by: table.clustered_by, + inherits: table.inherits, + partition_of: table.partition_of, + for_values: table.for_values, + strict: table.strict, + copy_grants: table.copy_grants, + enable_schema_evolution: table.enable_schema_evolution, + change_tracking: table.change_tracking, + data_retention_time_in_days: table.data_retention_time_in_days, + max_data_extension_time_in_days: table.max_data_extension_time_in_days, + default_ddl_collation: table.default_ddl_collation, + with_aggregation_policy: table.with_aggregation_policy, + with_row_access_policy: table.with_row_access_policy, + with_tags: table.with_tags, + base_location: table.base_location, + external_volume: table.external_volume, + catalog: table.catalog, + catalog_sync: table.catalog_sync, + storage_serialization_policy: table.storage_serialization_policy, + table_options: table.table_options, + target_lag: table.target_lag, + warehouse: table.warehouse, + refresh_mode: table.refresh_mode, + initialize: table.initialize, + require_user: table.require_user, + } + } +} + /// Helper return type when parsing configuration for a `CREATE TABLE` statement. #[derive(Default)] pub(crate) struct CreateTableConfiguration { @@ -707,7 +658,8 @@ mod tests { pub fn test_from_valid_statement() { let builder = CreateTableBuilder::new(ObjectName::from(vec![Ident::new("table_name")])); - let stmt = builder.clone().build(); + let create_table = builder.clone().build(); + let stmt: Statement = create_table.into(); assert_eq!(builder, CreateTableBuilder::try_from(stmt).unwrap()); } diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 35a62ab766..d77186bc77 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -11807,6 +11807,24 @@ impl From for Statement { } } +impl From for Statement { + fn from(c: CreateOperator) -> Self { + Self::CreateOperator(c) + } +} + +impl From for Statement { + fn from(c: CreateOperatorFamily) -> Self { + Self::CreateOperatorFamily(c) + } +} + +impl From for Statement { + fn from(c: CreateOperatorClass) -> Self { + Self::CreateOperatorClass(c) + } +} + impl From for Statement { fn from(a: AlterSchema) -> Self { Self::AlterSchema(a) @@ -11819,6 +11837,36 @@ impl From for Statement { } } +impl From for Statement { + fn from(a: AlterOperator) -> Self { + Self::AlterOperator(a) + } +} + +impl From for Statement { + fn from(a: AlterOperatorFamily) -> Self { + Self::AlterOperatorFamily(a) + } +} + +impl From for Statement { + fn from(a: AlterOperatorClass) -> Self { + Self::AlterOperatorClass(a) + } +} + +impl From for Statement { + fn from(m: Merge) -> Self { + Self::Merge(m) + } +} + +impl From for Statement { + fn from(a: AlterUser) -> Self { + Self::AlterUser(a) + } +} + impl From for Statement { fn from(d: DropDomain) -> Self { Self::DropDomain(d) @@ -11861,6 +11909,24 @@ impl From for Statement { } } +impl From for Statement { + fn from(d: DropOperator) -> Self { + Self::DropOperator(d) + } +} + +impl From for Statement { + fn from(d: DropOperatorFamily) -> Self { + Self::DropOperatorFamily(d) + } +} + +impl From for Statement { + fn from(d: DropOperatorClass) -> Self { + Self::DropOperatorClass(d) + } +} + impl From for Statement { fn from(d: DenyStatement) -> Self { Self::Deny(d) diff --git a/src/dialect/snowflake.rs b/src/dialect/snowflake.rs index ed01c128b9..eade01c040 100644 --- a/src/dialect/snowflake.rs +++ b/src/dialect/snowflake.rs @@ -28,11 +28,11 @@ use crate::ast::helpers::stmt_data_loading::{ }; use crate::ast::{ AlterTable, AlterTableOperation, AlterTableType, CatalogSyncNamespaceMode, ColumnOption, - ColumnPolicy, ColumnPolicyProperty, ContactEntry, CopyIntoSnowflakeKind, CreateTableLikeKind, - DollarQuotedString, Ident, IdentityParameters, IdentityProperty, IdentityPropertyFormatKind, - IdentityPropertyKind, IdentityPropertyOrder, InitializeKind, ObjectName, ObjectNamePart, - RefreshModeKind, RowAccessPolicy, ShowObjects, SqlOption, Statement, - StorageSerializationPolicy, TagsColumnOption, Value, WrappedCollection, + ColumnPolicy, ColumnPolicyProperty, ContactEntry, CopyIntoSnowflakeKind, CreateTable, + CreateTableLikeKind, DollarQuotedString, Ident, IdentityParameters, IdentityProperty, + IdentityPropertyFormatKind, IdentityPropertyKind, IdentityPropertyOrder, InitializeKind, + ObjectName, ObjectNamePart, RefreshModeKind, RowAccessPolicy, ShowObjects, SqlOption, + Statement, StorageSerializationPolicy, TagsColumnOption, Value, WrappedCollection, }; use crate::dialect::{Dialect, Precedence}; use crate::keywords::Keyword; @@ -272,9 +272,13 @@ impl Dialect for SnowflakeDialect { // OK - this is CREATE STAGE statement return Some(parse_create_stage(or_replace, temporary, parser)); } else if parser.parse_keyword(Keyword::TABLE) { - return Some(parse_create_table( - or_replace, global, temporary, volatile, transient, iceberg, dynamic, parser, - )); + return Some( + parse_create_table( + or_replace, global, temporary, volatile, transient, iceberg, dynamic, + parser, + ) + .map(Into::into), + ); } else if parser.parse_keyword(Keyword::DATABASE) { return Some(parse_create_database(or_replace, transient, parser)); } else { @@ -719,7 +723,7 @@ pub fn parse_create_table( iceberg: bool, dynamic: bool, parser: &mut Parser, -) -> Result { +) -> Result { let if_not_exists = parser.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); let table_name = parser.parse_object_name(false)?; diff --git a/src/parser/alter.rs b/src/parser/alter.rs index 935d22f8d0..8ef712ef77 100644 --- a/src/parser/alter.rs +++ b/src/parser/alter.rs @@ -148,7 +148,7 @@ impl Parser<'_> { /// ```sql /// ALTER USER [ IF EXISTS ] [ ] [ OPTIONS ] /// ``` - pub fn parse_alter_user(&mut self) -> Result { + pub fn parse_alter_user(&mut self) -> Result { let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); let name = self.parse_identifier()?; let _ = self.parse_keyword(Keyword::WITH); @@ -309,7 +309,7 @@ impl Parser<'_> { None }; - Ok(Statement::AlterUser(AlterUser { + Ok(AlterUser { if_exists, name, rename_to, @@ -329,7 +329,7 @@ impl Parser<'_> { set_props, unset_props, password, - })) + }) } fn parse_mfa_method(&mut self) -> Result { diff --git a/src/parser/merge.rs b/src/parser/merge.rs index 81798c4569..62da68a201 100644 --- a/src/parser/merge.rs +++ b/src/parser/merge.rs @@ -18,7 +18,7 @@ use alloc::{boxed::Box, format, vec, vec::Vec}; use crate::{ ast::{ Merge, MergeAction, MergeClause, MergeClauseKind, MergeInsertExpr, MergeInsertKind, - MergeUpdateExpr, ObjectName, OutputClause, SetExpr, Statement, + MergeUpdateExpr, ObjectName, OutputClause, SetExpr, }, dialect::{BigQueryDialect, GenericDialect, MySqlDialect}, keywords::Keyword, @@ -36,11 +36,13 @@ impl Parser<'_> { &mut self, merge_token: TokenWithSpan, ) -> Result, ParserError> { - Ok(Box::new(SetExpr::Merge(self.parse_merge(merge_token)?))) + Ok(Box::new(SetExpr::Merge( + self.parse_merge(merge_token)?.into(), + ))) } /// Parse a `MERGE` statement - pub fn parse_merge(&mut self, merge_token: TokenWithSpan) -> Result { + pub fn parse_merge(&mut self, merge_token: TokenWithSpan) -> Result { let into = self.parse_keyword(Keyword::INTO); let table = self.parse_table_factor()?; @@ -55,7 +57,7 @@ impl Parser<'_> { None => None, }; - Ok(Statement::Merge(Merge { + Ok(Merge { merge_token: merge_token.into(), into, table, @@ -63,7 +65,7 @@ impl Parser<'_> { on: Box::new(on), clauses, output, - })) + }) } fn parse_merge_clauses(&mut self) -> Result, ParserError> { diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 4cee5c33e2..47bb1164a6 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -604,28 +604,28 @@ impl<'a> Parser<'a> { Keyword::DESC => self.parse_explain(DescribeAlias::Desc), Keyword::DESCRIBE => self.parse_explain(DescribeAlias::Describe), Keyword::EXPLAIN => self.parse_explain(DescribeAlias::Explain), - Keyword::ANALYZE => self.parse_analyze(), + Keyword::ANALYZE => self.parse_analyze().map(Into::into), Keyword::CASE => { self.prev_token(); - self.parse_case_stmt() + self.parse_case_stmt().map(Into::into) } Keyword::IF => { self.prev_token(); - self.parse_if_stmt() + self.parse_if_stmt().map(Into::into) } Keyword::WHILE => { self.prev_token(); - self.parse_while() + self.parse_while().map(Into::into) } Keyword::RAISE => { self.prev_token(); - self.parse_raise_stmt() + self.parse_raise_stmt().map(Into::into) } Keyword::SELECT | Keyword::WITH | Keyword::VALUES | Keyword::FROM => { self.prev_token(); - self.parse_query().map(Statement::Query) + self.parse_query().map(Into::into) } - Keyword::TRUNCATE => self.parse_truncate(), + Keyword::TRUNCATE => self.parse_truncate().map(Into::into), Keyword::ATTACH => { if dialect_of!(self is DuckDbDialect) { self.parse_attach_duckdb_database() @@ -636,7 +636,7 @@ impl<'a> Parser<'a> { Keyword::DETACH if dialect_of!(self is DuckDbDialect | GenericDialect) => { self.parse_detach_duckdb_database() } - Keyword::MSCK => self.parse_msck(), + Keyword::MSCK => self.parse_msck().map(Into::into), Keyword::CREATE => self.parse_create(), Keyword::CACHE => self.parse_cache_table(), Keyword::DROP => self.parse_drop(), @@ -679,7 +679,7 @@ impl<'a> Parser<'a> { Keyword::DEALLOCATE => self.parse_deallocate(), Keyword::EXECUTE | Keyword::EXEC => self.parse_execute(), Keyword::PREPARE => self.parse_prepare(), - Keyword::MERGE => self.parse_merge(next_token), + Keyword::MERGE => self.parse_merge(next_token).map(Into::into), // `LISTEN`, `UNLISTEN` and `NOTIFY` are Postgres-specific // syntaxes. They are used for Postgres statement. Keyword::LISTEN if self.dialect.supports_listen_notify() => self.parse_listen(), @@ -713,12 +713,12 @@ impl<'a> Parser<'a> { self.prev_token(); self.parse_vacuum() } - Keyword::RESET => self.parse_reset(), + Keyword::RESET => self.parse_reset().map(Into::into), _ => self.expected("an SQL statement", next_token), }, Token::LParen => { self.prev_token(); - self.parse_query().map(Statement::Query) + self.parse_query().map(Into::into) } _ => self.expected("an SQL statement", next_token), } @@ -727,7 +727,7 @@ impl<'a> Parser<'a> { /// Parse a `CASE` statement. /// /// See [Statement::Case] - pub fn parse_case_stmt(&mut self) -> Result { + pub fn parse_case_stmt(&mut self) -> Result { let case_token = self.expect_keyword(Keyword::CASE)?; let match_expr = if self.peek_keyword(Keyword::WHEN) { @@ -752,19 +752,19 @@ impl<'a> Parser<'a> { end_case_token = self.expect_keyword(Keyword::CASE)?; } - Ok(Statement::Case(CaseStatement { + Ok(CaseStatement { case_token: AttachedToken(case_token), match_expr, when_blocks, else_block, end_case_token: AttachedToken(end_case_token), - })) + }) } /// Parse an `IF` statement. /// /// See [Statement::If] - pub fn parse_if_stmt(&mut self) -> Result { + pub fn parse_if_stmt(&mut self) -> Result { self.expect_keyword_is(Keyword::IF)?; let if_block = self.parse_conditional_statement_block(&[ Keyword::ELSE, @@ -793,22 +793,22 @@ impl<'a> Parser<'a> { self.expect_keyword_is(Keyword::END)?; let end_token = self.expect_keyword(Keyword::IF)?; - Ok(Statement::If(IfStatement { + Ok(IfStatement { if_block, elseif_blocks, else_block, end_token: Some(AttachedToken(end_token)), - })) + }) } /// Parse a `WHILE` statement. /// /// See [Statement::While] - fn parse_while(&mut self) -> Result { + fn parse_while(&mut self) -> Result { self.expect_keyword_is(Keyword::WHILE)?; let while_block = self.parse_conditional_statement_block(&[Keyword::END])?; - Ok(Statement::While(WhileStatement { while_block })) + Ok(WhileStatement { while_block }) } /// Parses an expression and associated list of statements @@ -875,7 +875,7 @@ impl<'a> Parser<'a> { /// Parse a `RAISE` statement. /// /// See [Statement::Raise] - pub fn parse_raise_stmt(&mut self) -> Result { + pub fn parse_raise_stmt(&mut self) -> Result { self.expect_keyword_is(Keyword::RAISE)?; let value = if self.parse_keywords(&[Keyword::USING, Keyword::MESSAGE]) { @@ -885,7 +885,7 @@ impl<'a> Parser<'a> { self.maybe_parse(|parser| parser.parse_expr().map(RaiseStatementValue::Expr))? }; - Ok(Statement::Raise(RaiseStatement { value })) + Ok(RaiseStatement { value }) } /// Parse a COMMENT statement. /// @@ -1024,7 +1024,7 @@ impl<'a> Parser<'a> { } /// Parse `MSCK` statement. - pub fn parse_msck(&mut self) -> Result { + pub fn parse_msck(&mut self) -> Result { let repair = self.parse_keyword(Keyword::REPAIR); self.expect_keyword_is(Keyword::TABLE)?; let table_name = self.parse_object_name(false)?; @@ -1048,12 +1048,11 @@ impl<'a> Parser<'a> { repair, table_name, partition_action, - } - .into()) + }) } /// Parse `TRUNCATE` statement. - pub fn parse_truncate(&mut self) -> Result { + pub fn parse_truncate(&mut self) -> Result { let table = self.parse_keyword(Keyword::TABLE); let table_names = self @@ -1095,8 +1094,7 @@ impl<'a> Parser<'a> { identity, cascade, on_cluster, - } - .into()) + }) } fn parse_cascade_option(&mut self) -> Option { @@ -1192,7 +1190,7 @@ impl<'a> Parser<'a> { } /// Parse `ANALYZE` statement. - pub fn parse_analyze(&mut self) -> Result { + pub fn parse_analyze(&mut self) -> Result { let has_table_keyword = self.parse_keyword(Keyword::TABLE); let table_name = self.parse_object_name(false)?; let mut for_columns = false; @@ -1246,8 +1244,7 @@ impl<'a> Parser<'a> { cache_metadata, noscan, compute_statistics, - } - .into()) + }) } /// Parse a new expression including wildcard & qualified wildcard. @@ -1432,7 +1429,7 @@ impl<'a> Parser<'a> { Ok(RenameTable { old_name, new_name }) })?; - Ok(Statement::RenameTable(rename_tables)) + Ok(rename_tables.into()) } else { self.expected("KEYWORD `TABLE` after RENAME", self.peek_token()) } @@ -4925,41 +4922,45 @@ impl<'a> Parser<'a> { let create_view_params = self.parse_create_view_params()?; if self.parse_keyword(Keyword::TABLE) { self.parse_create_table(or_replace, temporary, global, transient) + .map(Into::into) } else if self.peek_keyword(Keyword::MATERIALIZED) || self.peek_keyword(Keyword::VIEW) || self.peek_keywords(&[Keyword::SECURE, Keyword::MATERIALIZED, Keyword::VIEW]) || self.peek_keywords(&[Keyword::SECURE, Keyword::VIEW]) { self.parse_create_view(or_alter, or_replace, temporary, create_view_params) + .map(Into::into) } else if self.parse_keyword(Keyword::POLICY) { self.parse_create_policy() } else if self.parse_keyword(Keyword::EXTERNAL) { - self.parse_create_external_table(or_replace) + self.parse_create_external_table(or_replace).map(Into::into) } else if self.parse_keyword(Keyword::FUNCTION) { self.parse_create_function(or_alter, or_replace, temporary) } else if self.parse_keyword(Keyword::DOMAIN) { - self.parse_create_domain() + self.parse_create_domain().map(Into::into) } else if self.parse_keyword(Keyword::TRIGGER) { self.parse_create_trigger(temporary, or_alter, or_replace, false) + .map(Into::into) } else if self.parse_keywords(&[Keyword::CONSTRAINT, Keyword::TRIGGER]) { self.parse_create_trigger(temporary, or_alter, or_replace, true) + .map(Into::into) } else if self.parse_keyword(Keyword::MACRO) { self.parse_create_macro(or_replace, temporary) } else if self.parse_keyword(Keyword::SECRET) { self.parse_create_secret(or_replace, temporary, persistent) } else if self.parse_keyword(Keyword::USER) { - self.parse_create_user(or_replace) + self.parse_create_user(or_replace).map(Into::into) } else if or_replace { self.expected( "[EXTERNAL] TABLE or [MATERIALIZED] VIEW or FUNCTION after CREATE OR REPLACE", self.peek_token(), ) } else if self.parse_keyword(Keyword::EXTENSION) { - self.parse_create_extension() + self.parse_create_extension().map(Into::into) } else if self.parse_keyword(Keyword::INDEX) { - self.parse_create_index(false) + self.parse_create_index(false).map(Into::into) } else if self.parse_keywords(&[Keyword::UNIQUE, Keyword::INDEX]) { - self.parse_create_index(true) + self.parse_create_index(true).map(Into::into) } else if self.parse_keyword(Keyword::VIRTUAL) { self.parse_create_virtual_table() } else if self.parse_keyword(Keyword::SCHEMA) { @@ -4967,7 +4968,7 @@ impl<'a> Parser<'a> { } else if self.parse_keyword(Keyword::DATABASE) { self.parse_create_database() } else if self.parse_keyword(Keyword::ROLE) { - self.parse_create_role() + self.parse_create_role().map(Into::into) } else if self.parse_keyword(Keyword::SEQUENCE) { self.parse_create_sequence(temporary) } else if self.parse_keyword(Keyword::TYPE) { @@ -4975,15 +4976,15 @@ impl<'a> Parser<'a> { } else if self.parse_keyword(Keyword::PROCEDURE) { self.parse_create_procedure(or_alter) } else if self.parse_keyword(Keyword::CONNECTOR) { - self.parse_create_connector() + self.parse_create_connector().map(Into::into) } else if self.parse_keyword(Keyword::OPERATOR) { // Check if this is CREATE OPERATOR FAMILY or CREATE OPERATOR CLASS if self.parse_keyword(Keyword::FAMILY) { - self.parse_create_operator_family() + self.parse_create_operator_family().map(Into::into) } else if self.parse_keyword(Keyword::CLASS) { - self.parse_create_operator_class() + self.parse_create_operator_class().map(Into::into) } else { - self.parse_create_operator() + self.parse_create_operator().map(Into::into) } } else if self.parse_keyword(Keyword::SERVER) { self.parse_pg_create_server() @@ -4992,7 +4993,7 @@ impl<'a> Parser<'a> { } } - fn parse_create_user(&mut self, or_replace: bool) -> Result { + fn parse_create_user(&mut self, or_replace: bool) -> Result { let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); let name = self.parse_identifier()?; let options = self @@ -5004,7 +5005,7 @@ impl<'a> Parser<'a> { } else { vec![] }; - Ok(Statement::CreateUser(CreateUser { + Ok(CreateUser { or_replace, if_not_exists, name, @@ -5017,7 +5018,7 @@ impl<'a> Parser<'a> { options: tags, delimiter: KeyValueOptionsDelimiter::Comma, }, - })) + }) } /// See [DuckDB Docs](https://duckdb.org/docs/sql/statements/create_secret.html) for more details. @@ -5334,14 +5335,18 @@ impl<'a> Parser<'a> { ) -> Result { if dialect_of!(self is HiveDialect) { self.parse_hive_create_function(or_replace, temporary) + .map(Into::into) } else if dialect_of!(self is PostgreSqlDialect | GenericDialect) { self.parse_postgres_create_function(or_replace, temporary) + .map(Into::into) } else if dialect_of!(self is DuckDbDialect) { self.parse_create_macro(or_replace, temporary) } else if dialect_of!(self is BigQueryDialect) { self.parse_bigquery_create_function(or_replace, temporary) + .map(Into::into) } else if dialect_of!(self is MsSqlDialect) { self.parse_mssql_create_function(or_alter, or_replace, temporary) + .map(Into::into) } else { self.prev_token(); self.expected("an object type after CREATE", self.peek_token()) @@ -5355,7 +5360,7 @@ impl<'a> Parser<'a> { &mut self, or_replace: bool, temporary: bool, - ) -> Result { + ) -> Result { let name = self.parse_object_name(false)?; self.expect_token(&Token::LParen)?; @@ -5476,7 +5481,7 @@ impl<'a> Parser<'a> { } } - Ok(Statement::CreateFunction(CreateFunction { + Ok(CreateFunction { or_alter: false, or_replace, temporary, @@ -5495,7 +5500,7 @@ impl<'a> Parser<'a> { determinism_specifier: None, options: None, remote_connection: None, - })) + }) } /// Parse `CREATE FUNCTION` for [Hive] @@ -5505,14 +5510,14 @@ impl<'a> Parser<'a> { &mut self, or_replace: bool, temporary: bool, - ) -> Result { + ) -> Result { let name = self.parse_object_name(false)?; self.expect_keyword_is(Keyword::AS)?; let body = self.parse_create_function_body_string()?; let using = self.parse_optional_create_function_using()?; - Ok(Statement::CreateFunction(CreateFunction { + Ok(CreateFunction { or_alter: false, or_replace, temporary, @@ -5531,7 +5536,7 @@ impl<'a> Parser<'a> { determinism_specifier: None, options: None, remote_connection: None, - })) + }) } /// Parse `CREATE FUNCTION` for [BigQuery] @@ -5541,7 +5546,7 @@ impl<'a> Parser<'a> { &mut self, or_replace: bool, temporary: bool, - ) -> Result { + ) -> Result { let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); let (name, args) = self.parse_create_function_name_and_params()?; @@ -5592,7 +5597,7 @@ impl<'a> Parser<'a> { None }; - Ok(Statement::CreateFunction(CreateFunction { + Ok(CreateFunction { or_alter: false, or_replace, temporary, @@ -5611,7 +5616,7 @@ impl<'a> Parser<'a> { parallel: None, security: None, set_params: vec![], - })) + }) } /// Parse `CREATE FUNCTION` for [MsSql] @@ -5622,7 +5627,7 @@ impl<'a> Parser<'a> { or_alter: bool, or_replace: bool, temporary: bool, - ) -> Result { + ) -> Result { let (name, args) = self.parse_create_function_name_and_params()?; self.expect_keyword(Keyword::RETURNS)?; @@ -5683,7 +5688,7 @@ impl<'a> Parser<'a> { parser_err!("Unparsable function body", self.peek_token().span.start)? }; - Ok(Statement::CreateFunction(CreateFunction { + Ok(CreateFunction { or_alter, or_replace, temporary, @@ -5702,7 +5707,7 @@ impl<'a> Parser<'a> { parallel: None, security: None, set_params: vec![], - })) + }) } fn parse_create_function_name_and_params( @@ -5796,7 +5801,7 @@ impl<'a> Parser<'a> { /// ```sql /// DROP TRIGGER [ IF EXISTS ] name ON table_name [ CASCADE | RESTRICT ] /// ``` - pub fn parse_drop_trigger(&mut self) -> Result { + pub fn parse_drop_trigger(&mut self) -> Result { if !dialect_of!(self is PostgreSqlDialect | SQLiteDialect | GenericDialect | MySqlDialect | MsSqlDialect) { self.prev_token(); @@ -5817,12 +5822,12 @@ impl<'a> Parser<'a> { )), None => None, }; - Ok(Statement::DropTrigger(DropTrigger { + Ok(DropTrigger { if_exists, trigger_name, table_name, option, - })) + }) } /// Parse a `CREATE TRIGGER` statement. @@ -5832,7 +5837,7 @@ impl<'a> Parser<'a> { or_alter: bool, or_replace: bool, is_constraint: bool, - ) -> Result { + ) -> Result { if !dialect_of!(self is PostgreSqlDialect | SQLiteDialect | GenericDialect | MySqlDialect | MsSqlDialect) { self.prev_token(); @@ -5914,8 +5919,7 @@ impl<'a> Parser<'a> { statements_as: false, statements, characteristics, - } - .into()) + }) } /// Parse the period part of a trigger (`BEFORE`, `AFTER`, etc.). @@ -6058,7 +6062,7 @@ impl<'a> Parser<'a> { pub fn parse_create_external_table( &mut self, or_replace: bool, - ) -> Result { + ) -> Result { self.expect_keyword_is(Keyword::TABLE)?; let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); let table_name = self.parse_object_name(false)?; @@ -6148,7 +6152,7 @@ impl<'a> Parser<'a> { or_replace: bool, temporary: bool, create_view_params: Option, - ) -> Result { + ) -> Result { let secure = self.parse_keyword(Keyword::SECURE); let materialized = self.parse_keyword(Keyword::MATERIALIZED); self.expect_keyword_is(Keyword::VIEW)?; @@ -6231,8 +6235,7 @@ impl<'a> Parser<'a> { to, params: create_view_params, name_before_not_exists, - } - .into()) + }) } /// Parse optional parameters for the `CREATE VIEW` statement supported by [MySQL]. @@ -6294,7 +6297,7 @@ impl<'a> Parser<'a> { } /// Parse a `CREATE ROLE` statement. - pub fn parse_create_role(&mut self) -> Result { + pub fn parse_create_role(&mut self) -> Result { let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); let names = self.parse_comma_separated(|p| p.parse_object_name(false))?; @@ -6515,8 +6518,7 @@ impl<'a> Parser<'a> { user, admin, authorization_owner, - } - .into()) + }) } /// Parse an `OWNER` clause. @@ -6541,7 +6543,7 @@ impl<'a> Parser<'a> { } /// Parses a [Statement::CreateDomain] statement. - fn parse_create_domain(&mut self) -> Result { + fn parse_create_domain(&mut self) -> Result { let name = self.parse_object_name(false)?; self.expect_keyword_is(Keyword::AS)?; let data_type = self.parse_data_type()?; @@ -6560,13 +6562,13 @@ impl<'a> Parser<'a> { constraints.push(constraint); } - Ok(Statement::CreateDomain(CreateDomain { + Ok(CreateDomain { name, data_type, collation, default, constraints, - })) + }) } /// ```sql @@ -6663,7 +6665,7 @@ impl<'a> Parser<'a> { /// ``` /// /// [Hive Documentation](https://cwiki.apache.org/confluence/pages/viewpage.action?pageId=27362034#LanguageManualDDL-CreateDataConnectorCreateConnector) - pub fn parse_create_connector(&mut self) -> Result { + pub fn parse_create_connector(&mut self) -> Result { let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); let name = self.parse_identifier()?; @@ -6687,14 +6689,14 @@ impl<'a> Parser<'a> { _ => None, }; - Ok(Statement::CreateConnector(CreateConnector { + Ok(CreateConnector { name, if_not_exists, connector_type, url, comment, with_dcproperties, - })) + }) } /// Parse an operator name, which can contain special characters like +, -, <, >, = @@ -6718,7 +6720,7 @@ impl<'a> Parser<'a> { /// Parse a [Statement::CreateOperator] /// /// [PostgreSQL Documentation](https://www.postgresql.org/docs/current/sql-createoperator.html) - pub fn parse_create_operator(&mut self) -> Result { + pub fn parse_create_operator(&mut self) -> Result { let name = self.parse_operator_name()?; self.expect_token(&Token::LParen)?; @@ -6827,34 +6829,31 @@ impl<'a> Parser<'a> { ParserError::ParserError("CREATE OPERATOR requires FUNCTION parameter".to_string()) })?; - Ok(Statement::CreateOperator(CreateOperator { + Ok(CreateOperator { name, function, is_procedure, left_arg, right_arg, options, - })) + }) } /// Parse a [Statement::CreateOperatorFamily] /// /// [PostgreSQL Documentation](https://www.postgresql.org/docs/current/sql-createopfamily.html) - pub fn parse_create_operator_family(&mut self) -> Result { + pub fn parse_create_operator_family(&mut self) -> Result { let name = self.parse_object_name(false)?; self.expect_keyword(Keyword::USING)?; let using = self.parse_identifier()?; - Ok(Statement::CreateOperatorFamily(CreateOperatorFamily { - name, - using, - })) + Ok(CreateOperatorFamily { name, using }) } /// Parse a [Statement::CreateOperatorClass] /// /// [PostgreSQL Documentation](https://www.postgresql.org/docs/current/sql-createopclass.html) - pub fn parse_create_operator_class(&mut self) -> Result { + pub fn parse_create_operator_class(&mut self) -> Result { let name = self.parse_object_name(false)?; let default = self.parse_keyword(Keyword::DEFAULT); self.expect_keywords(&[Keyword::FOR, Keyword::TYPE])?; @@ -6968,14 +6967,14 @@ impl<'a> Parser<'a> { } } - Ok(Statement::CreateOperatorClass(CreateOperatorClass { + Ok(CreateOperatorClass { name, default, for_type, using, family, items, - })) + }) } /// Parse a `DROP` statement. @@ -7011,19 +7010,19 @@ impl<'a> Parser<'a> { } else if self.parse_keyword(Keyword::STREAM) { ObjectType::Stream } else if self.parse_keyword(Keyword::FUNCTION) { - return self.parse_drop_function(); + return self.parse_drop_function().map(Into::into); } else if self.parse_keyword(Keyword::POLICY) { return self.parse_drop_policy(); } else if self.parse_keyword(Keyword::CONNECTOR) { return self.parse_drop_connector(); } else if self.parse_keyword(Keyword::DOMAIN) { - return self.parse_drop_domain(); + return self.parse_drop_domain().map(Into::into); } else if self.parse_keyword(Keyword::PROCEDURE) { return self.parse_drop_procedure(); } else if self.parse_keyword(Keyword::SECRET) { return self.parse_drop_secret(temporary, persistent); } else if self.parse_keyword(Keyword::TRIGGER) { - return self.parse_drop_trigger(); + return self.parse_drop_trigger().map(Into::into); } else if self.parse_keyword(Keyword::EXTENSION) { return self.parse_drop_extension(); } else if self.parse_keyword(Keyword::OPERATOR) { @@ -7088,15 +7087,15 @@ impl<'a> Parser<'a> { /// DROP FUNCTION [ IF EXISTS ] name [ ( [ [ argmode ] [ argname ] argtype [, ...] ] ) ] [, ...] /// [ CASCADE | RESTRICT ] /// ``` - fn parse_drop_function(&mut self) -> Result { + fn parse_drop_function(&mut self) -> Result { let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); let func_desc = self.parse_comma_separated(Parser::parse_function_desc)?; let drop_behavior = self.parse_optional_drop_behavior(); - Ok(Statement::DropFunction(DropFunction { + Ok(DropFunction { if_exists, func_desc, drop_behavior, - })) + }) } /// ```sql @@ -7131,15 +7130,15 @@ impl<'a> Parser<'a> { /// ```sql /// DROP DOMAIN [ IF EXISTS ] name [ CASCADE | RESTRICT ] /// ``` - fn parse_drop_domain(&mut self) -> Result { + fn parse_drop_domain(&mut self) -> Result { let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); let name = self.parse_object_name(false)?; let drop_behavior = self.parse_optional_drop_behavior(); - Ok(Statement::DropDomain(DropDomain { + Ok(DropDomain { if_exists, name, drop_behavior, - })) + }) } /// ```sql @@ -7653,7 +7652,7 @@ impl<'a> Parser<'a> { } /// Parse a `CREATE INDEX` statement. - pub fn parse_create_index(&mut self, unique: bool) -> Result { + pub fn parse_create_index(&mut self, unique: bool) -> Result { let concurrently = self.parse_keyword(Keyword::CONCURRENTLY); let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); @@ -7727,7 +7726,7 @@ impl<'a> Parser<'a> { alter_options.push(self.parse_alter_table_operation()?) } - Ok(Statement::CreateIndex(CreateIndex { + Ok(CreateIndex { name: index_name, table_name, using, @@ -7741,11 +7740,11 @@ impl<'a> Parser<'a> { predicate, index_options, alter_options, - })) + }) } /// Parse a `CREATE EXTENSION` statement. - pub fn parse_create_extension(&mut self) -> Result { + pub fn parse_create_extension(&mut self) -> Result { let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); let name = self.parse_identifier()?; @@ -7775,8 +7774,7 @@ impl<'a> Parser<'a> { schema, version, cascade, - } - .into()) + }) } /// Parse a PostgreSQL-specific [Statement::DropExtension] statement. @@ -8054,7 +8052,7 @@ impl<'a> Parser<'a> { temporary: bool, global: Option, transient: bool, - ) -> Result { + ) -> Result { let allow_unquoted_hyphen = dialect_of!(self is BigQueryDialect); let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); let table_name = self.parse_object_name(allow_unquoted_hyphen)?; @@ -10208,17 +10206,17 @@ impl<'a> Parser<'a> { } Keyword::OPERATOR => { if self.parse_keyword(Keyword::FAMILY) { - self.parse_alter_operator_family() + self.parse_alter_operator_family().map(Into::into) } else if self.parse_keyword(Keyword::CLASS) { - self.parse_alter_operator_class() + self.parse_alter_operator_class().map(Into::into) } else { - self.parse_alter_operator() + self.parse_alter_operator().map(Into::into) } } Keyword::ROLE => self.parse_alter_role(), Keyword::POLICY => self.parse_alter_policy(), Keyword::CONNECTOR => self.parse_alter_connector(), - Keyword::USER => self.parse_alter_user(), + Keyword::USER => self.parse_alter_user().map(Into::into), // unreachable because expect_one_of_keywords used above unexpected_keyword => Err(ParserError::ParserError( format!("Internal parser error: expected any of {{VIEW, TYPE, TABLE, INDEX, ROLE, POLICY, CONNECTOR, ICEBERG, SCHEMA, USER, OPERATOR}}, got {unexpected_keyword:?}"), @@ -10340,7 +10338,7 @@ impl<'a> Parser<'a> { /// Parse a [Statement::AlterOperator] /// /// [PostgreSQL Documentation](https://www.postgresql.org/docs/current/sql-alteroperator.html) - pub fn parse_alter_operator(&mut self) -> Result { + pub fn parse_alter_operator(&mut self) -> Result { let name = self.parse_operator_name()?; // Parse (left_type, right_type) @@ -10439,12 +10437,12 @@ impl<'a> Parser<'a> { ); }; - Ok(Statement::AlterOperator(AlterOperator { + Ok(AlterOperator { name, left_type, right_type, operation, - })) + }) } /// Parse an operator item for ALTER OPERATOR FAMILY ADD operations @@ -10577,7 +10575,7 @@ impl<'a> Parser<'a> { /// Parse a [Statement::AlterOperatorFamily] /// See - pub fn parse_alter_operator_family(&mut self) -> Result { + pub fn parse_alter_operator_family(&mut self) -> Result { let name = self.parse_object_name(false)?; self.expect_keyword(Keyword::USING)?; let using = self.parse_identifier()?; @@ -10604,17 +10602,17 @@ impl<'a> Parser<'a> { ); }; - Ok(Statement::AlterOperatorFamily(AlterOperatorFamily { + Ok(AlterOperatorFamily { name, using, operation, - })) + }) } /// Parse an `ALTER OPERATOR CLASS` statement. /// /// Handles operations like `RENAME TO`, `OWNER TO`, and `SET SCHEMA`. - pub fn parse_alter_operator_class(&mut self) -> Result { + pub fn parse_alter_operator_class(&mut self) -> Result { let name = self.parse_object_name(false)?; self.expect_keyword(Keyword::USING)?; let using = self.parse_identifier()?; @@ -10635,11 +10633,11 @@ impl<'a> Parser<'a> { ); }; - Ok(Statement::AlterOperatorClass(AlterOperatorClass { + Ok(AlterOperatorClass { name, using, operation, - })) + }) } /// Parse an `ALTER SCHEMA` statement. @@ -16853,7 +16851,7 @@ impl<'a> Parser<'a> { None }; - Ok(Statement::Insert(Insert { + Ok(Insert { insert_token: insert_token.into(), or, table: table_object, @@ -16874,7 +16872,8 @@ impl<'a> Parser<'a> { insert_alias, settings, format_clause, - })) + } + .into()) } } @@ -19134,15 +19133,15 @@ impl<'a> Parser<'a> { } /// Parses a RESET statement - fn parse_reset(&mut self) -> Result { + fn parse_reset(&mut self) -> Result { if self.parse_keyword(Keyword::ALL) { - return Ok(Statement::Reset(ResetStatement { reset: Reset::ALL })); + return Ok(ResetStatement { reset: Reset::ALL }); } let obj = self.parse_object_name(false)?; - Ok(Statement::Reset(ResetStatement { + Ok(ResetStatement { reset: Reset::ConfigurationParameter(obj), - })) + }) } } From 46f2234c1596d8763db3955ec50d6343ca2f77a5 Mon Sep 17 00:00:00 2001 From: Samyak Sarnayak Date: Fri, 16 Jan 2026 16:21:08 +0530 Subject: [PATCH 031/121] GenericDialect: support colon operator for JsonAccess (#2124) --- src/dialect/mod.rs | 10 +++ src/dialect/mssql.rs | 9 +++ src/dialect/postgresql.rs | 3 + src/parser/mod.rs | 8 +- tests/sqlparser_common.rs | 145 +++++++++++++++++++++++++++++++++++ tests/sqlparser_snowflake.rs | 115 +-------------------------- 6 files changed, 174 insertions(+), 116 deletions(-) diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index 873108ee9a..d1728566e3 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -759,6 +759,13 @@ pub trait Dialect: Debug + Any { Token::DoubleColon | Token::ExclamationMark | Token::LBracket | Token::CaretAt => { Ok(p!(DoubleColon)) } + Token::Colon => match parser.peek_nth_token(1).token { + // When colon is followed by a string or a number, it's usually in MAP syntax. + Token::SingleQuotedString(_) | Token::Number(_, _) => Ok(self.prec_unknown()), + // In other cases, it's used in semi-structured data traversal like in variant or JSON + // string columns. See `JsonAccess`. + _ => Ok(p!(Colon)), + }, Token::Arrow | Token::LongArrow | Token::HashArrow @@ -812,6 +819,7 @@ pub trait Dialect: Debug + Any { Precedence::Ampersand => 23, Precedence::Caret => 22, Precedence::Pipe => 21, + Precedence::Colon => 21, Precedence::Between => 20, Precedence::Eq => 20, Precedence::Like => 19, @@ -1269,6 +1277,8 @@ pub enum Precedence { Caret, /// Bitwise `OR` / pipe operator (`|`). Pipe, + /// `:` operator for json/variant access. + Colon, /// `BETWEEN` operator. Between, /// Equality operator (`=`). diff --git a/src/dialect/mssql.rs b/src/dialect/mssql.rs index faf3402c24..a285452507 100644 --- a/src/dialect/mssql.rs +++ b/src/dialect/mssql.rs @@ -160,6 +160,15 @@ impl Dialect for MsSqlDialect { None } } + + fn get_next_precedence(&self, parser: &Parser) -> Option> { + let token = parser.peek_token(); + match token.token { + // lowest prec to prevent it from turning into a binary op + Token::Colon => Some(Ok(self.prec_unknown())), + _ => None, + } + } } impl MsSqlDialect { diff --git a/src/dialect/postgresql.rs b/src/dialect/postgresql.rs index 02bab0e064..7c9e7db86c 100644 --- a/src/dialect/postgresql.rs +++ b/src/dialect/postgresql.rs @@ -136,6 +136,8 @@ impl Dialect for PostgreSqlDialect { | Token::ShiftRight | Token::ShiftLeft | Token::CustomBinaryOperator(_) => Some(Ok(PG_OTHER_PREC)), + // lowest prec to prevent it from turning into a binary op + Token::Colon => Some(Ok(self.prec_unknown())), _ => None, } } @@ -159,6 +161,7 @@ impl Dialect for PostgreSqlDialect { Precedence::Ampersand => PG_OTHER_PREC, Precedence::Caret => CARET_PREC, Precedence::Pipe => PG_OTHER_PREC, + Precedence::Colon => PG_OTHER_PREC, Precedence::Between => BETWEEN_LIKE_PREC, Precedence::Eq => EQ_PREC, Precedence::Like => BETWEEN_LIKE_PREC, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 47bb1164a6..6fd7b5ca43 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -3918,7 +3918,7 @@ impl<'a> Parser<'a> { expr: Box::new(expr), }) } else if Token::LBracket == *tok && self.dialect.supports_partiql() - || (dialect_of!(self is SnowflakeDialect | GenericDialect) && Token::Colon == *tok) + || (Token::Colon == *tok) { self.prev_token(); self.parse_json_access(expr) @@ -3954,7 +3954,8 @@ impl<'a> Parser<'a> { let lower_bound = if self.consume_token(&Token::Colon) { None } else { - Some(self.parse_expr()?) + // parse expr until we hit a colon (or any token with lower precedence) + Some(self.parse_subexpr(self.dialect.prec_value(Precedence::Colon))?) }; // check for end @@ -3982,7 +3983,8 @@ impl<'a> Parser<'a> { stride: None, }); } else { - Some(self.parse_expr()?) + // parse expr until we hit a colon (or any token with lower precedence) + Some(self.parse_subexpr(self.dialect.prec_value(Precedence::Colon))?) }; // check for end diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 95ad9a209b..bbbf0d8356 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -18067,3 +18067,148 @@ fn test_binary_kw_as_cast() { all_dialects_where(|d| d.supports_binary_kw_as_cast()) .one_statement_parses_to("SELECT BINARY 1+1", "SELECT CAST(1 + 1 AS BINARY)"); } + +#[test] +fn parse_semi_structured_data_traversal() { + let dialects = TestedDialects::new(vec![ + Box::new(GenericDialect {}), + Box::new(SnowflakeDialect {}), + Box::new(DatabricksDialect {}), + ]); + + // most basic case + let sql = "SELECT a:b FROM t"; + let select = dialects.verified_only_select(sql); + assert_eq!( + SelectItem::UnnamedExpr(Expr::JsonAccess { + value: Box::new(Expr::Identifier(Ident::new("a"))), + path: JsonPath { + path: vec![JsonPathElem::Dot { + key: "b".to_owned(), + quoted: false + }] + }, + }), + select.projection[0] + ); + + // identifier can be quoted + let sql = r#"SELECT a:"my long object key name" FROM t"#; + let select = dialects.verified_only_select(sql); + assert_eq!( + SelectItem::UnnamedExpr(Expr::JsonAccess { + value: Box::new(Expr::Identifier(Ident::new("a"))), + path: JsonPath { + path: vec![JsonPathElem::Dot { + key: "my long object key name".to_owned(), + quoted: true + }] + }, + }), + select.projection[0] + ); + + dialects.verified_stmt("SELECT a:b::INT FROM t"); + + // unquoted keywords are permitted in the object key + let sql = "SELECT a:select, a:from FROM t"; + let select = dialects.verified_only_select(sql); + assert_eq!( + vec![ + SelectItem::UnnamedExpr(Expr::JsonAccess { + value: Box::new(Expr::Identifier(Ident::new("a"))), + path: JsonPath { + path: vec![JsonPathElem::Dot { + key: "select".to_owned(), + quoted: false + }] + }, + }), + SelectItem::UnnamedExpr(Expr::JsonAccess { + value: Box::new(Expr::Identifier(Ident::new("a"))), + path: JsonPath { + path: vec![JsonPathElem::Dot { + key: "from".to_owned(), + quoted: false + }] + }, + }) + ], + select.projection + ); + + // multiple levels can be traversed + // https://docs.snowflake.com/en/user-guide/querying-semistructured#dot-notation + let sql = r#"SELECT a:foo."bar".baz"#; + let select = dialects.verified_only_select(sql); + assert_eq!( + vec![SelectItem::UnnamedExpr(Expr::JsonAccess { + value: Box::new(Expr::Identifier(Ident::new("a"))), + path: JsonPath { + path: vec![ + JsonPathElem::Dot { + key: "foo".to_owned(), + quoted: false, + }, + JsonPathElem::Dot { + key: "bar".to_owned(), + quoted: true, + }, + JsonPathElem::Dot { + key: "baz".to_owned(), + quoted: false, + } + ] + }, + })], + select.projection + ); + + // dot and bracket notation can be mixed (starting with : case) + // https://docs.snowflake.com/en/user-guide/querying-semistructured#dot-notation + let sql = r#"SELECT a:foo[0].bar"#; + let select = dialects.verified_only_select(sql); + assert_eq!( + vec![SelectItem::UnnamedExpr(Expr::JsonAccess { + value: Box::new(Expr::Identifier(Ident::new("a"))), + path: JsonPath { + path: vec![ + JsonPathElem::Dot { + key: "foo".to_owned(), + quoted: false, + }, + JsonPathElem::Bracket { + key: Expr::value(number("0")), + }, + JsonPathElem::Dot { + key: "bar".to_owned(), + quoted: false, + } + ] + }, + })], + select.projection + ); +} + +#[test] +fn parse_array_subscript() { + let dialects = all_dialects_except(|d| { + d.is::() + || d.is::() + || d.is::() + || d.is::() + }); + + dialects.verified_stmt("SELECT arr[1]"); + dialects.verified_stmt("SELECT arr[:]"); + dialects.verified_stmt("SELECT arr[1:2]"); + dialects.verified_stmt("SELECT arr[1:2:4]"); + dialects.verified_stmt("SELECT arr[1:array_length(arr)]"); + dialects.verified_stmt("SELECT arr[array_length(arr) - 1:array_length(arr)]"); + dialects + .verified_stmt("SELECT arr[array_length(arr) - 2:array_length(arr) - 1:array_length(arr)]"); + + dialects.verified_stmt("SELECT arr[1][2]"); + dialects.verified_stmt("SELECT arr[:][:]"); +} diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index 37e9f8cb4b..5889b2bd02 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -1265,37 +1265,8 @@ fn parse_lateral_flatten() { // https://docs.snowflake.com/en/user-guide/querying-semistructured #[test] fn parse_semi_structured_data_traversal() { - // most basic case - let sql = "SELECT a:b FROM t"; - let select = snowflake().verified_only_select(sql); - assert_eq!( - SelectItem::UnnamedExpr(Expr::JsonAccess { - value: Box::new(Expr::Identifier(Ident::new("a"))), - path: JsonPath { - path: vec![JsonPathElem::Dot { - key: "b".to_owned(), - quoted: false - }] - }, - }), - select.projection[0] - ); - - // identifier can be quoted - let sql = r#"SELECT a:"my long object key name" FROM t"#; - let select = snowflake().verified_only_select(sql); - assert_eq!( - SelectItem::UnnamedExpr(Expr::JsonAccess { - value: Box::new(Expr::Identifier(Ident::new("a"))), - path: JsonPath { - path: vec![JsonPathElem::Dot { - key: "my long object key name".to_owned(), - quoted: true - }] - }, - }), - select.projection[0] - ); + // see `tests/sqlparser_common.rs` -> `parse_semi_structured_data_traversal` for more test + // cases. This test only has Snowflake-specific syntax like array access. // expressions are allowed in bracket notation let sql = r#"SELECT a[2 + 2] FROM t"#; @@ -1316,88 +1287,6 @@ fn parse_semi_structured_data_traversal() { select.projection[0] ); - snowflake().verified_stmt("SELECT a:b::INT FROM t"); - - // unquoted keywords are permitted in the object key - let sql = "SELECT a:select, a:from FROM t"; - let select = snowflake().verified_only_select(sql); - assert_eq!( - vec![ - SelectItem::UnnamedExpr(Expr::JsonAccess { - value: Box::new(Expr::Identifier(Ident::new("a"))), - path: JsonPath { - path: vec![JsonPathElem::Dot { - key: "select".to_owned(), - quoted: false - }] - }, - }), - SelectItem::UnnamedExpr(Expr::JsonAccess { - value: Box::new(Expr::Identifier(Ident::new("a"))), - path: JsonPath { - path: vec![JsonPathElem::Dot { - key: "from".to_owned(), - quoted: false - }] - }, - }) - ], - select.projection - ); - - // multiple levels can be traversed - // https://docs.snowflake.com/en/user-guide/querying-semistructured#dot-notation - let sql = r#"SELECT a:foo."bar".baz"#; - let select = snowflake().verified_only_select(sql); - assert_eq!( - vec![SelectItem::UnnamedExpr(Expr::JsonAccess { - value: Box::new(Expr::Identifier(Ident::new("a"))), - path: JsonPath { - path: vec![ - JsonPathElem::Dot { - key: "foo".to_owned(), - quoted: false, - }, - JsonPathElem::Dot { - key: "bar".to_owned(), - quoted: true, - }, - JsonPathElem::Dot { - key: "baz".to_owned(), - quoted: false, - } - ] - }, - })], - select.projection - ); - - // dot and bracket notation can be mixed (starting with : case) - // https://docs.snowflake.com/en/user-guide/querying-semistructured#dot-notation - let sql = r#"SELECT a:foo[0].bar"#; - let select = snowflake().verified_only_select(sql); - assert_eq!( - vec![SelectItem::UnnamedExpr(Expr::JsonAccess { - value: Box::new(Expr::Identifier(Ident::new("a"))), - path: JsonPath { - path: vec![ - JsonPathElem::Dot { - key: "foo".to_owned(), - quoted: false, - }, - JsonPathElem::Bracket { - key: Expr::value(number("0")), - }, - JsonPathElem::Dot { - key: "bar".to_owned(), - quoted: false, - } - ] - }, - })], - select.projection - ); - // dot and bracket notation can be mixed (starting with bracket case) // https://docs.snowflake.com/en/user-guide/querying-semistructured#dot-notation let sql = r#"SELECT a[0].foo.bar"#; From 6060a11d1f728d1639f761d3bb90e136911934f9 Mon Sep 17 00:00:00 2001 From: James Vorderbruggen Date: Tue, 20 Jan 2026 06:30:35 -0600 Subject: [PATCH 032/121] Databricks: Support Timetravel With "VERSION AS OF" (#2155) --- src/ast/query.rs | 5 +++++ src/dialect/bigquery.rs | 2 +- src/dialect/databricks.rs | 2 +- src/dialect/mod.rs | 2 +- src/dialect/mssql.rs | 2 +- src/dialect/snowflake.rs | 2 +- src/parser/mod.rs | 5 ++++- tests/sqlparser_databricks.rs | 11 +++++++++-- 8 files changed, 23 insertions(+), 8 deletions(-) diff --git a/src/ast/query.rs b/src/ast/query.rs index d8af243f7d..a1fc33b6aa 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -2398,6 +2398,10 @@ pub enum TableVersion { /// Databricks supports this syntax. /// For example: `SELECT * FROM tbl TIMESTAMP AS OF CURRENT_TIMESTAMP() - INTERVAL 1 HOUR` TimestampAsOf(Expr), + /// When the table version is defined using `VERSION AS OF`. + /// Databricks supports this syntax. + /// For example: `SELECT * FROM tbl VERSION AS OF 2` + VersionAsOf(Expr), /// When the table version is defined using a function. /// For example: `SELECT * FROM tbl AT(TIMESTAMP => '2020-08-14 09:30:00')` Function(Expr), @@ -2408,6 +2412,7 @@ impl Display for TableVersion { match self { TableVersion::ForSystemTimeAsOf(e) => write!(f, "FOR SYSTEM_TIME AS OF {e}")?, TableVersion::TimestampAsOf(e) => write!(f, "TIMESTAMP AS OF {e}")?, + TableVersion::VersionAsOf(e) => write!(f, "VERSION AS OF {e}")?, TableVersion::Function(func) => write!(f, "{func}")?, } Ok(()) diff --git a/src/dialect/bigquery.rs b/src/dialect/bigquery.rs index 27fd3cca3b..6ad8a50893 100644 --- a/src/dialect/bigquery.rs +++ b/src/dialect/bigquery.rs @@ -136,7 +136,7 @@ impl Dialect for BigQueryDialect { } // See - fn supports_timestamp_versioning(&self) -> bool { + fn supports_table_versioning(&self) -> bool { true } diff --git a/src/dialect/databricks.rs b/src/dialect/databricks.rs index ec866295d1..029709fea6 100644 --- a/src/dialect/databricks.rs +++ b/src/dialect/databricks.rs @@ -48,7 +48,7 @@ impl Dialect for DatabricksDialect { } /// - fn supports_timestamp_versioning(&self) -> bool { + fn supports_table_versioning(&self) -> bool { true } diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index d1728566e3..cd7fdee12f 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -1072,7 +1072,7 @@ pub trait Dialect: Debug + Any { /// Returns true if this dialect supports querying historical table data /// by specifying which version of the data to query. - fn supports_timestamp_versioning(&self) -> bool { + fn supports_table_versioning(&self) -> bool { false } diff --git a/src/dialect/mssql.rs b/src/dialect/mssql.rs index a285452507..9f8e726562 100644 --- a/src/dialect/mssql.rs +++ b/src/dialect/mssql.rs @@ -107,7 +107,7 @@ impl Dialect for MsSqlDialect { } /// See: - fn supports_timestamp_versioning(&self) -> bool { + fn supports_table_versioning(&self) -> bool { true } diff --git a/src/dialect/snowflake.rs b/src/dialect/snowflake.rs index eade01c040..1e571d0f85 100644 --- a/src/dialect/snowflake.rs +++ b/src/dialect/snowflake.rs @@ -540,7 +540,7 @@ impl Dialect for SnowflakeDialect { } /// See: - fn supports_timestamp_versioning(&self) -> bool { + fn supports_table_versioning(&self) -> bool { true } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 6fd7b5ca43..149365c477 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -15731,7 +15731,7 @@ impl<'a> Parser<'a> { /// Parses a the timestamp version specifier (i.e. query historical data) pub fn maybe_parse_table_version(&mut self) -> Result, ParserError> { - if self.dialect.supports_timestamp_versioning() { + if self.dialect.supports_table_versioning() { if self.parse_keywords(&[Keyword::FOR, Keyword::SYSTEM_TIME, Keyword::AS, Keyword::OF]) { let expr = self.parse_expr()?; @@ -15743,6 +15743,9 @@ impl<'a> Parser<'a> { } else if self.parse_keywords(&[Keyword::TIMESTAMP, Keyword::AS, Keyword::OF]) { let expr = self.parse_expr()?; return Ok(Some(TableVersion::TimestampAsOf(expr))); + } else if self.parse_keywords(&[Keyword::VERSION, Keyword::AS, Keyword::OF]) { + let expr = Expr::Value(self.parse_number_value()?); + return Ok(Some(TableVersion::VersionAsOf(expr))); } } Ok(None) diff --git a/tests/sqlparser_databricks.rs b/tests/sqlparser_databricks.rs index 9064c8dc34..7f5ec6c3fc 100644 --- a/tests/sqlparser_databricks.rs +++ b/tests/sqlparser_databricks.rs @@ -371,14 +371,21 @@ fn data_type_timestamp_ntz() { #[test] fn parse_table_time_travel() { - all_dialects_where(|d| d.supports_timestamp_versioning()) + all_dialects_where(|d| d.supports_table_versioning()) .verified_only_select("SELECT 1 FROM t1 TIMESTAMP AS OF '2018-10-18T22:15:12.013Z'"); - all_dialects_where(|d| d.supports_timestamp_versioning()).verified_only_select( + all_dialects_where(|d| d.supports_table_versioning()).verified_only_select( "SELECT 1 FROM t1 TIMESTAMP AS OF CURRENT_TIMESTAMP() - INTERVAL 12 HOURS", ); + all_dialects_where(|d| d.supports_table_versioning()) + .verified_only_select("SELECT 1 FROM t1 VERSION AS OF 1"); + assert!(databricks() .parse_sql_statements("SELECT 1 FROM t1 FOR TIMESTAMP AS OF 'some_timestamp'") .is_err()); + + assert!(all_dialects_where(|d| d.supports_table_versioning()) + .parse_sql_statements("SELECT 1 FROM t1 VERSION AS OF 1 - 2",) + .is_err()) } From 2cf33de1ce6f189d28fa52bff88cf3bbde384cca Mon Sep 17 00:00:00 2001 From: Andriy Romanov Date: Thu, 22 Jan 2026 06:50:06 -0800 Subject: [PATCH 033/121] Fixed truncate table if exists for snowflake (#2166) --- src/ast/ddl.rs | 7 +++++-- src/parser/mod.rs | 2 ++ tests/sqlparser_common.rs | 1 + tests/sqlparser_postgres.rs | 3 +++ tests/sqlparser_snowflake.rs | 7 +++++++ 5 files changed, 18 insertions(+), 2 deletions(-) diff --git a/src/ast/ddl.rs b/src/ast/ddl.rs index 23fcc01018..fcd14b6d1f 100644 --- a/src/ast/ddl.rs +++ b/src/ast/ddl.rs @@ -4062,7 +4062,7 @@ impl fmt::Display for DropTrigger { /// A `TRUNCATE` statement. /// /// ```sql -/// TRUNCATE TABLE table_names [PARTITION (partitions)] [RESTART IDENTITY | CONTINUE IDENTITY] [CASCADE | RESTRICT] [ON CLUSTER cluster_name] +/// TRUNCATE TABLE [IF EXISTS] table_names [PARTITION (partitions)] [RESTART IDENTITY | CONTINUE IDENTITY] [CASCADE | RESTRICT] [ON CLUSTER cluster_name] /// ``` #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] @@ -4074,6 +4074,8 @@ pub struct Truncate { pub partitions: Option>, /// TABLE - optional keyword pub table: bool, + /// Snowflake/Redshift-specific option: [ IF EXISTS ] + pub if_exists: bool, /// Postgres-specific option: [ RESTART IDENTITY | CONTINUE IDENTITY ] pub identity: Option, /// Postgres-specific option: [ CASCADE | RESTRICT ] @@ -4086,10 +4088,11 @@ pub struct Truncate { impl fmt::Display for Truncate { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { let table = if self.table { "TABLE " } else { "" }; + let if_exists = if self.if_exists { "IF EXISTS " } else { "" }; write!( f, - "TRUNCATE {table}{table_names}", + "TRUNCATE {table}{if_exists}{table_names}", table_names = display_comma_separated(&self.table_names) )?; diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 149365c477..733abbbf31 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1054,6 +1054,7 @@ impl<'a> Parser<'a> { /// Parse `TRUNCATE` statement. pub fn parse_truncate(&mut self) -> Result { let table = self.parse_keyword(Keyword::TABLE); + let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); let table_names = self .parse_comma_separated(|p| { @@ -1091,6 +1092,7 @@ impl<'a> Parser<'a> { table_names, partitions, table, + if_exists, identity, cascade, on_cluster, diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index bbbf0d8356..c67bcb18ea 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -16773,6 +16773,7 @@ fn parse_truncate_only() { table_names, partitions: None, table: true, + if_exists: false, identity: None, cascade: None, on_cluster: None, diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 325e3939e7..57bddc6569 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -5089,6 +5089,7 @@ fn parse_truncate() { table_names, partitions: None, table: false, + if_exists: false, identity: None, cascade: None, on_cluster: None, @@ -5113,6 +5114,7 @@ fn parse_truncate_with_options() { table_names, partitions: None, table: true, + if_exists: false, identity: Some(TruncateIdentityOption::Restart), cascade: Some(CascadeOption::Cascade), on_cluster: None, @@ -5146,6 +5148,7 @@ fn parse_truncate_with_table_list() { table_names, partitions: None, table: true, + if_exists: false, identity: Some(TruncateIdentityOption::Restart), cascade: Some(CascadeOption::Cascade), on_cluster: None, diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index 5889b2bd02..72f60f1a68 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -4533,3 +4533,10 @@ fn test_alter_external_table() { snowflake() .verified_stmt("ALTER EXTERNAL TABLE IF EXISTS some_table REFRESH 'year=2025/month=12/'"); } + +#[test] +fn test_truncate_table_if_exists() { + snowflake().verified_stmt("TRUNCATE TABLE IF EXISTS my_table"); + snowflake().verified_stmt("TRUNCATE TABLE my_table"); + snowflake().verified_stmt("TRUNCATE IF EXISTS my_table"); +} From 845e2138e27dcaf677b92dc36be32b0ec403ae3d Mon Sep 17 00:00:00 2001 From: Simon Sawert Date: Thu, 22 Jan 2026 17:52:13 +0100 Subject: [PATCH 034/121] fix: qualified column names with SQL keywords parse as identifiers (#2157) --- src/parser/mod.rs | 65 +++++++++++++++++++++++++++------------ tests/sqlparser_common.rs | 45 +++++++++++++++++++++++++++ 2 files changed, 91 insertions(+), 19 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 733abbbf31..882803a5ac 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1900,26 +1900,53 @@ impl<'a> Parser<'a> { chain.push(AccessExpr::Dot(expr)); self.advance_token(); // The consumed string } - // Fallback to parsing an arbitrary expression. - _ => match self.parse_subexpr(self.dialect.prec_value(Precedence::Period))? { - // If we get back a compound field access or identifier, - // we flatten the nested expression. - // For example if the current root is `foo` - // and we get back a compound identifier expression `bar.baz` - // The full expression should be `foo.bar.baz` (i.e. - // a root with an access chain with 2 entries) and not - // `foo.(bar.baz)` (i.e. a root with an access chain with - // 1 entry`). - Expr::CompoundFieldAccess { root, access_chain } => { - chain.push(AccessExpr::Dot(*root)); - chain.extend(access_chain); - } - Expr::CompoundIdentifier(parts) => chain - .extend(parts.into_iter().map(Expr::Identifier).map(AccessExpr::Dot)), - expr => { - chain.push(AccessExpr::Dot(expr)); + // Fallback to parsing an arbitrary expression, but restrict to expression + // types that are valid after the dot operator. This ensures that e.g. + // `T.interval` is parsed as a compound identifier, not as an interval + // expression. + _ => { + let expr = self.maybe_parse(|parser| { + let expr = parser + .parse_subexpr(parser.dialect.prec_value(Precedence::Period))?; + match &expr { + Expr::CompoundFieldAccess { .. } + | Expr::CompoundIdentifier(_) + | Expr::Identifier(_) + | Expr::Value(_) + | Expr::Function(_) => Ok(expr), + _ => parser.expected("an identifier or value", parser.peek_token()), + } + })?; + + match expr { + // If we get back a compound field access or identifier, + // we flatten the nested expression. + // For example if the current root is `foo` + // and we get back a compound identifier expression `bar.baz` + // The full expression should be `foo.bar.baz` (i.e. + // a root with an access chain with 2 entries) and not + // `foo.(bar.baz)` (i.e. a root with an access chain with + // 1 entry`). + Some(Expr::CompoundFieldAccess { root, access_chain }) => { + chain.push(AccessExpr::Dot(*root)); + chain.extend(access_chain); + } + Some(Expr::CompoundIdentifier(parts)) => chain.extend( + parts.into_iter().map(Expr::Identifier).map(AccessExpr::Dot), + ), + Some(expr) => { + chain.push(AccessExpr::Dot(expr)); + } + // If the expression is not a valid suffix, fall back to + // parsing as an identifier. This handles cases like `T.interval` + // where `interval` is a keyword but should be treated as an identifier. + None => { + chain.push(AccessExpr::Dot(Expr::Identifier( + self.parse_identifier()?, + ))); + } } - }, + } } } else if !self.dialect.supports_partiql() && self.peek_token_ref().token == Token::LBracket diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index c67bcb18ea..f892bf7a9b 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -15045,6 +15045,51 @@ fn test_reserved_keywords_for_identifiers() { dialects.parse_sql_statements(sql).unwrap(); } +#[test] +fn test_keywords_as_column_names_after_dot() { + // Test various keywords that have special meaning when standalone + // but should be treated as identifiers after a dot. + let keywords = [ + "interval", // INTERVAL '1' DAY + "case", // CASE WHEN ... END + "cast", // CAST(x AS y) + "extract", // EXTRACT(DAY FROM ...) + "trim", // TRIM(...) + "substring", // SUBSTRING(...) + "left", // LEFT(str, n) + "right", // RIGHT(str, n) + ]; + + for kw in keywords { + let sql = format!("SELECT T.{kw} FROM T"); + verified_stmt(&sql); + + let sql = format!("SELECT SUM(x) OVER (PARTITION BY T.{kw} ORDER BY T.id) FROM T"); + verified_stmt(&sql); + + let sql = format!("SELECT T.{kw}, S.{kw} FROM T, S WHERE T.{kw} = S.{kw}"); + verified_stmt(&sql); + } + + let select = verified_only_select("SELECT T.interval, T.case FROM T"); + match &select.projection[0] { + SelectItem::UnnamedExpr(Expr::CompoundIdentifier(idents)) => { + assert_eq!(idents.len(), 2); + assert_eq!(idents[0].value, "T"); + assert_eq!(idents[1].value, "interval"); + } + _ => panic!("Expected CompoundIdentifier for T.interval"), + } + match &select.projection[1] { + SelectItem::UnnamedExpr(Expr::CompoundIdentifier(idents)) => { + assert_eq!(idents.len(), 2); + assert_eq!(idents[0].value, "T"); + assert_eq!(idents[1].value, "case"); + } + _ => panic!("Expected CompoundIdentifier for T.case"), + } +} + #[test] fn parse_create_table_with_bit_types() { let sql = "CREATE TABLE t (a BIT, b BIT VARYING, c BIT(42), d BIT VARYING(43))"; From abec6f9b207287d269e015f738cc0bf36da82d08 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Fri, 23 Jan 2026 03:46:01 -0700 Subject: [PATCH 035/121] Refactor: replace some `dialect_of!` checks with `Dialect` trait methods (#2171) Co-authored-by: Claude Opus 4.5 --- src/dialect/databricks.rs | 5 +++ src/dialect/generic.rs | 16 ++++++++ src/dialect/mod.rs | 81 +++++++++++++++++++++++++++++++++++++++ src/dialect/snowflake.rs | 30 +++++++++++++++ src/parser/mod.rs | 13 +++---- 5 files changed, 138 insertions(+), 7 deletions(-) diff --git a/src/dialect/databricks.rs b/src/dialect/databricks.rs index 029709fea6..40807a0163 100644 --- a/src/dialect/databricks.rs +++ b/src/dialect/databricks.rs @@ -79,4 +79,9 @@ impl Dialect for DatabricksDialect { fn supports_group_by_with_modifier(&self) -> bool { true } + + /// See + fn supports_values_as_table_factor(&self) -> bool { + true + } } diff --git a/src/dialect/generic.rs b/src/dialect/generic.rs index da57253d67..42510e2f09 100644 --- a/src/dialect/generic.rs +++ b/src/dialect/generic.rs @@ -104,6 +104,22 @@ impl Dialect for GenericDialect { true } + fn supports_extract_comma_syntax(&self) -> bool { + true + } + + fn supports_create_view_comment_syntax(&self) -> bool { + true + } + + fn supports_parens_around_table_factor(&self) -> bool { + true + } + + fn supports_values_as_table_factor(&self) -> bool { + true + } + fn supports_create_index_with_clause(&self) -> bool { true } diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index cd7fdee12f..284fc41726 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -856,6 +856,87 @@ pub trait Dialect: Debug + Any { false } + /// Returns true if this dialect supports the `EXTRACT` function + /// with a comma separator instead of `FROM`. + /// + /// Example: + /// ```sql + /// SELECT EXTRACT(YEAR, date_column) FROM table; + /// ``` + /// + /// [Snowflake](https://docs.snowflake.com/en/sql-reference/functions/extract) + fn supports_extract_comma_syntax(&self) -> bool { + false + } + + /// Returns true if this dialect supports a subquery passed to a function + /// as the only argument without enclosing parentheses. + /// + /// Example: + /// ```sql + /// SELECT FLATTEN(SELECT * FROM tbl); + /// ``` + /// + /// [Snowflake](https://docs.snowflake.com/en/sql-reference/functions/flatten) + fn supports_subquery_as_function_arg(&self) -> bool { + false + } + + /// Returns true if this dialect supports the `COMMENT` clause in + /// `CREATE VIEW` statements using the `COMMENT = 'comment'` syntax. + /// + /// Example: + /// ```sql + /// CREATE VIEW v COMMENT = 'my comment' AS SELECT 1; + /// ``` + /// + /// [Snowflake](https://docs.snowflake.com/en/sql-reference/sql/create-view#optional-parameters) + fn supports_create_view_comment_syntax(&self) -> bool { + false + } + + /// Returns true if this dialect supports the `ARRAY` type without + /// specifying an element type. + /// + /// Example: + /// ```sql + /// CREATE TABLE t (a ARRAY); + /// ``` + /// + /// [Snowflake](https://docs.snowflake.com/en/sql-reference/data-types-semistructured#array) + fn supports_array_typedef_without_element_type(&self) -> bool { + false + } + + /// Returns true if this dialect supports extra parentheses around + /// lone table names or derived tables in the `FROM` clause. + /// + /// Example: + /// ```sql + /// SELECT * FROM (mytable); + /// SELECT * FROM ((SELECT 1)); + /// SELECT * FROM (mytable) AS alias; + /// ``` + /// + /// [Snowflake](https://docs.snowflake.com/en/sql-reference/constructs/from) + fn supports_parens_around_table_factor(&self) -> bool { + false + } + + /// Returns true if this dialect supports `VALUES` as a table factor + /// without requiring parentheses around the entire clause. + /// + /// Example: + /// ```sql + /// SELECT * FROM VALUES (1, 'a'), (2, 'b') AS t (col1, col2); + /// ``` + /// + /// [Snowflake](https://docs.snowflake.com/en/sql-reference/constructs/values) + /// [Databricks](https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-values.html) + fn supports_values_as_table_factor(&self) -> bool { + false + } + /// Returns true if this dialect allows dollar placeholders /// e.g. `SELECT $var` (SQLite) fn supports_dollar_placeholder(&self) -> bool { diff --git a/src/dialect/snowflake.rs b/src/dialect/snowflake.rs index 1e571d0f85..d768f7a211 100644 --- a/src/dialect/snowflake.rs +++ b/src/dialect/snowflake.rs @@ -211,6 +211,36 @@ impl Dialect for SnowflakeDialect { true } + /// See [doc](https://docs.snowflake.com/en/sql-reference/functions/extract) + fn supports_extract_comma_syntax(&self) -> bool { + true + } + + /// See [doc](https://docs.snowflake.com/en/sql-reference/functions/flatten) + fn supports_subquery_as_function_arg(&self) -> bool { + true + } + + /// See [doc](https://docs.snowflake.com/en/sql-reference/sql/create-view#optional-parameters) + fn supports_create_view_comment_syntax(&self) -> bool { + true + } + + /// See [doc](https://docs.snowflake.com/en/sql-reference/data-types-semistructured#array) + fn supports_array_typedef_without_element_type(&self) -> bool { + true + } + + /// See [doc](https://docs.snowflake.com/en/sql-reference/constructs/from) + fn supports_parens_around_table_factor(&self) -> bool { + true + } + + /// See [doc](https://docs.snowflake.com/en/sql-reference/constructs/values) + fn supports_values_as_table_factor(&self) -> bool { + true + } + fn parse_statement(&self, parser: &mut Parser) -> Option> { if parser.parse_keyword(Keyword::BEGIN) { return Some(parser.parse_begin_exception_end()); diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 882803a5ac..0971d91548 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -2293,7 +2293,7 @@ impl<'a> Parser<'a> { // Snowflake permits a subquery to be passed as an argument without // an enclosing set of parens if it's the only argument. - if dialect_of!(self is SnowflakeDialect) && self.peek_sub_query() { + if self.dialect.supports_subquery_as_function_arg() && self.peek_sub_query() { let subquery = self.parse_query()?; self.expect_token(&Token::RParen)?; return Ok(Function { @@ -2683,8 +2683,7 @@ impl<'a> Parser<'a> { let syntax = if self.parse_keyword(Keyword::FROM) { ExtractSyntax::From - } else if self.consume_token(&Token::Comma) - && dialect_of!(self is SnowflakeDialect | GenericDialect) + } else if self.dialect.supports_extract_comma_syntax() && self.consume_token(&Token::Comma) { ExtractSyntax::Comma } else { @@ -6228,7 +6227,7 @@ impl<'a> Parser<'a> { None }; - let comment = if dialect_of!(self is SnowflakeDialect | GenericDialect) + let comment = if self.dialect.supports_create_view_comment_syntax() && self.parse_keyword(Keyword::COMMENT) { self.expect_token(&Token::Eq)?; @@ -11790,7 +11789,7 @@ impl<'a> Parser<'a> { Keyword::ENUM16 => Ok(DataType::Enum(self.parse_enum_values()?, Some(16))), Keyword::SET => Ok(DataType::Set(self.parse_string_values()?)), Keyword::ARRAY => { - if dialect_of!(self is SnowflakeDialect) { + if self.dialect.supports_array_typedef_without_element_type() { Ok(DataType::Array(ArrayElemTypeDef::None)) } else if dialect_of!(self is ClickHouseDialect) { Ok(self.parse_sub_type(|internal_type| { @@ -14989,7 +14988,7 @@ impl<'a> Parser<'a> { table_with_joins: Box::new(table_and_joins), alias, }) - } else if dialect_of!(self is SnowflakeDialect | GenericDialect) { + } else if self.dialect.supports_parens_around_table_factor() { // Dialect-specific behavior: Snowflake diverges from the // standard and from most of the other implementations by // allowing extra parentheses not only around a join (B), but @@ -15035,7 +15034,7 @@ impl<'a> Parser<'a> { // appearing alone in parentheses (e.g. `FROM (mytable)`) self.expected("joined table", self.peek_token()) } - } else if dialect_of!(self is SnowflakeDialect | DatabricksDialect | GenericDialect) + } else if self.dialect.supports_values_as_table_factor() && matches!( self.peek_tokens(), [ From 6550ec8009b9584bd38ec9d687d5e819cc43a5c2 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Fri, 23 Jan 2026 03:46:29 -0700 Subject: [PATCH 036/121] perf: remove unnecessary string clone in maybe_concat_string_literal (#2173) Co-authored-by: Claude Opus 4.5 --- src/parser/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 0971d91548..cfc173d76f 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -11298,7 +11298,7 @@ impl<'a> Parser<'a> { while let Token::SingleQuotedString(ref s) | Token::DoubleQuotedString(ref s) = self.peek_token_ref().token { - str.push_str(s.clone().as_str()); + str.push_str(s); self.advance_token(); } } From e7a30191f3b643da867306d2187be6efb50b1809 Mon Sep 17 00:00:00 2001 From: Michael Victor Zink Date: Fri, 23 Jan 2026 11:19:06 -0800 Subject: [PATCH 037/121] MySQL: Support `CAST(... AS ... ARRAY)` syntax (#2151) --- src/ast/mod.rs | 16 +++++++++++++--- src/ast/spans.rs | 3 ++- src/parser/mod.rs | 5 +++++ tests/sqlparser_common.rs | 18 ++++++++++++++++++ tests/sqlparser_databricks.rs | 1 + tests/sqlparser_duckdb.rs | 1 + tests/sqlparser_mysql.rs | 27 +++++++++++++++++++++++++++ tests/sqlparser_postgres.rs | 5 +++++ tests/sqlparser_snowflake.rs | 12 +++++++----- 9 files changed, 79 insertions(+), 9 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index d77186bc77..0470d6a8b2 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -1033,6 +1033,12 @@ pub enum Expr { expr: Box, /// Target data type. data_type: DataType, + /// [MySQL] allows CAST(... AS type ARRAY) in functional index definitions for InnoDB + /// multi-valued indices. It's not really a datatype, and is only allowed in `CAST` in key + /// specifications, so it's a flag here. + /// + /// [MySQL]: https://dev.mysql.com/doc/refman/8.4/en/cast-functions.html#function_cast + array: bool, /// Optional CAST(string_expression AS type FORMAT format_string_expression) as used by [BigQuery] /// /// [BigQuery]: https://cloud.google.com/bigquery/docs/reference/standard-sql/format-elements#formatting_syntax @@ -1879,14 +1885,18 @@ impl fmt::Display for Expr { kind, expr, data_type, + array, format, } => match kind { CastKind::Cast => { + write!(f, "CAST({expr} AS {data_type}")?; + if *array { + write!(f, " ARRAY")?; + } if let Some(format) = format { - write!(f, "CAST({expr} AS {data_type} FORMAT {format})") - } else { - write!(f, "CAST({expr} AS {data_type})") + write!(f, " FORMAT {format}")?; } + write!(f, ")") } CastKind::TryCast => { if let Some(format) = format { diff --git a/src/ast/spans.rs b/src/ast/spans.rs index 488c886249..1c5cc4738d 100644 --- a/src/ast/spans.rs +++ b/src/ast/spans.rs @@ -1540,6 +1540,7 @@ impl Spanned for Expr { kind: _, expr, data_type: _, + array: _, format: _, } => expr.span(), Expr::AtTimeZone { @@ -2801,7 +2802,7 @@ WHERE id = 1 UPDATE SET target_table.description = source_table.description WHEN MATCHED AND target_table.x != 'X' THEN DELETE - WHEN NOT MATCHED AND 1 THEN INSERT (product, quantity) ROW + WHEN NOT MATCHED AND 1 THEN INSERT (product, quantity) ROW "#; let r = Parser::parse_sql(&crate::dialect::GenericDialect, sql).unwrap(); diff --git a/src/parser/mod.rs b/src/parser/mod.rs index cfc173d76f..586c2f6bad 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1650,6 +1650,7 @@ impl<'a> Parser<'a> { kind: CastKind::Cast, expr: Box::new(parser.parse_expr()?), data_type: DataType::Binary(None), + array: false, format: None, }) } @@ -2655,12 +2656,14 @@ impl<'a> Parser<'a> { let expr = self.parse_expr()?; self.expect_keyword_is(Keyword::AS)?; let data_type = self.parse_data_type()?; + let array = self.parse_keyword(Keyword::ARRAY); let format = self.parse_optional_cast_format()?; self.expect_token(&Token::RParen)?; Ok(Expr::Cast { kind, expr: Box::new(expr), data_type, + array, format, }) } @@ -3938,6 +3941,7 @@ impl<'a> Parser<'a> { kind: CastKind::DoubleColon, expr: Box::new(expr), data_type: self.parse_data_type()?, + array: false, format: None, }) } else if Token::ExclamationMark == *tok && self.dialect.supports_factorial_operator() { @@ -4178,6 +4182,7 @@ impl<'a> Parser<'a> { kind: CastKind::DoubleColon, expr: Box::new(expr), data_type: self.parse_data_type()?, + array: false, format: None, }) } diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index f892bf7a9b..dcc92207b4 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -3027,6 +3027,7 @@ fn parse_cast() { kind: CastKind::Cast, expr: Box::new(Expr::Identifier(Ident::new("id"))), data_type: DataType::BigInt(None), + array: false, format: None, }, expr_from_projection(only(&select.projection)) @@ -3039,6 +3040,7 @@ fn parse_cast() { kind: CastKind::Cast, expr: Box::new(Expr::Identifier(Ident::new("id"))), data_type: DataType::TinyInt(None), + array: false, format: None, }, expr_from_projection(only(&select.projection)) @@ -3070,6 +3072,7 @@ fn parse_cast() { length: 50, unit: None, })), + array: false, format: None, }, expr_from_projection(only(&select.projection)) @@ -3082,6 +3085,7 @@ fn parse_cast() { kind: CastKind::Cast, expr: Box::new(Expr::Identifier(Ident::new("id"))), data_type: DataType::Clob(None), + array: false, format: None, }, expr_from_projection(only(&select.projection)) @@ -3094,6 +3098,7 @@ fn parse_cast() { kind: CastKind::Cast, expr: Box::new(Expr::Identifier(Ident::new("id"))), data_type: DataType::Clob(Some(50)), + array: false, format: None, }, expr_from_projection(only(&select.projection)) @@ -3106,6 +3111,7 @@ fn parse_cast() { kind: CastKind::Cast, expr: Box::new(Expr::Identifier(Ident::new("id"))), data_type: DataType::Binary(Some(50)), + array: false, format: None, }, expr_from_projection(only(&select.projection)) @@ -3118,6 +3124,7 @@ fn parse_cast() { kind: CastKind::Cast, expr: Box::new(Expr::Identifier(Ident::new("id"))), data_type: DataType::Varbinary(Some(BinaryLength::IntegerLength { length: 50 })), + array: false, format: None, }, expr_from_projection(only(&select.projection)) @@ -3130,6 +3137,7 @@ fn parse_cast() { kind: CastKind::Cast, expr: Box::new(Expr::Identifier(Ident::new("id"))), data_type: DataType::Blob(None), + array: false, format: None, }, expr_from_projection(only(&select.projection)) @@ -3142,6 +3150,7 @@ fn parse_cast() { kind: CastKind::Cast, expr: Box::new(Expr::Identifier(Ident::new("id"))), data_type: DataType::Blob(Some(50)), + array: false, format: None, }, expr_from_projection(only(&select.projection)) @@ -3154,6 +3163,7 @@ fn parse_cast() { kind: CastKind::Cast, expr: Box::new(Expr::Identifier(Ident::new("details"))), data_type: DataType::JSONB, + array: false, format: None, }, expr_from_projection(only(&select.projection)) @@ -3169,6 +3179,7 @@ fn parse_try_cast() { kind: CastKind::TryCast, expr: Box::new(Expr::Identifier(Ident::new("id"))), data_type: DataType::BigInt(None), + array: false, format: None, }, expr_from_projection(only(&select.projection)) @@ -6505,6 +6516,7 @@ fn interval_disallow_interval_expr_double_colon() { fractional_seconds_precision: None, })), data_type: DataType::Text, + array: false, format: None, } ) @@ -9220,6 +9232,7 @@ fn parse_double_colon_cast_at_timezone() { .with_empty_span() )), data_type: DataType::Timestamp(None, TimezoneInfo::None), + array: false, format: None }), time_zone: Box::new(Expr::Value( @@ -13352,6 +13365,7 @@ fn test_dictionary_syntax() { (Value::SingleQuotedString("2023-04-01".to_owned())).with_empty_span(), )), data_type: DataType::Timestamp(None, TimezoneInfo::None), + array: false, format: None, }), }, @@ -13363,6 +13377,7 @@ fn test_dictionary_syntax() { (Value::SingleQuotedString("2023-04-05".to_owned())).with_empty_span(), )), data_type: DataType::Timestamp(None, TimezoneInfo::None), + array: false, format: None, }), }, @@ -13606,6 +13621,7 @@ fn test_extract_seconds_ok() { fields: None, precision: None }, + array: false, format: None, }), } @@ -13634,6 +13650,7 @@ fn test_extract_seconds_ok() { fields: None, precision: None, }, + array: false, format: None, }), })], @@ -13691,6 +13708,7 @@ fn test_extract_seconds_single_quote_ok() { fields: None, precision: None }, + array: false, format: None, }), } diff --git a/tests/sqlparser_databricks.rs b/tests/sqlparser_databricks.rs index 7f5ec6c3fc..b088afd78b 100644 --- a/tests/sqlparser_databricks.rs +++ b/tests/sqlparser_databricks.rs @@ -349,6 +349,7 @@ fn data_type_timestamp_ntz() { "created_at".into() )))), data_type: DataType::TimestampNtz(None), + array: false, format: None } ); diff --git a/tests/sqlparser_duckdb.rs b/tests/sqlparser_duckdb.rs index 80a15eb11f..bdfe4f50a2 100644 --- a/tests/sqlparser_duckdb.rs +++ b/tests/sqlparser_duckdb.rs @@ -380,6 +380,7 @@ fn test_duckdb_specific_int_types() { Value::Number("123".parse().unwrap(), false).with_empty_span() )), data_type: data_type.clone(), + array: false, format: None, }, expr_from_projection(&select.projection[0]) diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index e847d3edb7..4a62053867 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -874,6 +874,25 @@ fn test_functional_key_part() { )), }), data_type: DataType::Unsigned, + array: false, + format: None, + })), + ); + assert_eq!( + index_column(mysql_and_generic().verified_stmt( + r#"CREATE TABLE t (jsoncol JSON, PRIMARY KEY ((CAST(col ->> '$.fields' AS UNSIGNED ARRAY)) ASC))"# + )), + Expr::Nested(Box::new(Expr::Cast { + kind: CastKind::Cast, + expr: Box::new(Expr::BinaryOp { + left: Box::new(Expr::Identifier(Ident::new("col"))), + op: BinaryOperator::LongArrow, + right: Box::new(Expr::Value( + Value::SingleQuotedString("$.fields".to_string()).with_empty_span() + )), + }), + data_type: DataType::Unsigned, + array: true, format: None, })), ); @@ -4096,6 +4115,14 @@ fn parse_cast_integers() { .expect_err("CAST doesn't allow display width"); } +#[test] +fn parse_cast_array() { + mysql().verified_expr("CAST(foo AS SIGNED ARRAY)"); + mysql() + .run_parser_method("CAST(foo AS ARRAY)", |p| p.parse_expr()) + .expect_err("ARRAY alone is not a type"); +} + #[test] fn parse_match_against_with_alias() { let sql = "SELECT tbl.ProjectID FROM surveys.tbl1 AS tbl WHERE MATCH (tbl.ReferenceID) AGAINST ('AAA' IN BOOLEAN MODE)"; diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 57bddc6569..7c194c1c9f 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -1706,6 +1706,7 @@ fn parse_execute() { (Value::Number("1337".parse().unwrap(), false)).with_empty_span() )), data_type: DataType::SmallInt(None), + array: false, format: None }, alias: None @@ -1717,6 +1718,7 @@ fn parse_execute() { (Value::Number("7331".parse().unwrap(), false)).with_empty_span() )), data_type: DataType::SmallInt(None), + array: false, format: None }, alias: None @@ -2343,6 +2345,7 @@ fn parse_array_index_expr() { ))), None )), + array: false, format: None, }))), access_chain: vec![ @@ -5573,6 +5576,7 @@ fn parse_at_time_zone() { Value::SingleQuotedString("America/Los_Angeles".to_owned()).with_empty_span(), )), data_type: DataType::Text, + array: false, format: None, }), }), @@ -6389,6 +6393,7 @@ fn arrow_cast_precedence() { (Value::SingleQuotedString("bar".to_string())).with_empty_span() )), data_type: DataType::Text, + array: false, format: None, }), } diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index 72f60f1a68..ede912ebe4 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -1101,8 +1101,8 @@ fn parse_create_dynamic_table() { " EXTERNAL_VOLUME='my_external_volume'", " CATALOG='SNOWFLAKE'", " BASE_LOCATION='my_iceberg_table'", - " TARGET_LAG='20 minutes'", - " WAREHOUSE=mywh", + " TARGET_LAG='20 minutes'", + " WAREHOUSE=mywh", " AS SELECT product_id, product_name FROM staging_table" )); @@ -1250,6 +1250,7 @@ fn parse_array() { kind: CastKind::Cast, expr: Box::new(Expr::Identifier(Ident::new("a"))), data_type: DataType::Array(ArrayElemTypeDef::None), + array: false, format: None, }, expr_from_projection(only(&select.projection)) @@ -1349,8 +1350,6 @@ fn parse_semi_structured_data_traversal() { Expr::JsonAccess { value: Box::new(Expr::Cast { kind: CastKind::DoubleColon, - data_type: DataType::Array(ArrayElemTypeDef::None), - format: None, expr: Box::new(Expr::JsonAccess { value: Box::new(Expr::Identifier(Ident::new("a"))), path: JsonPath { @@ -1359,7 +1358,10 @@ fn parse_semi_structured_data_traversal() { quoted: false }] } - }) + }), + data_type: DataType::Array(ArrayElemTypeDef::None), + array: false, + format: None, }), path: JsonPath { path: vec![JsonPathElem::Bracket { From 614ea06e31b415e012ac59afd8c90a723d8868b5 Mon Sep 17 00:00:00 2001 From: finchxxia <13153363548@163.com> Date: Sat, 24 Jan 2026 03:25:58 +0800 Subject: [PATCH 038/121] Snowflake: Support SAMPLE clause on subqueries (#2164) --- src/ast/query.rs | 6 ++++++ src/ast/spans.rs | 1 + src/parser/mod.rs | 8 ++++++++ tests/sqlparser_common.rs | 6 +++++- tests/sqlparser_snowflake.rs | 17 +++++++++++++++++ 5 files changed, 37 insertions(+), 1 deletion(-) diff --git a/src/ast/query.rs b/src/ast/query.rs index a1fc33b6aa..7ea4de19e4 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -1325,6 +1325,8 @@ pub enum TableFactor { subquery: Box, /// Optional alias for the derived table. alias: Option, + /// Optional table sample modifier + sample: Option, }, /// `TABLE()[ AS ]` TableFunction { @@ -2071,6 +2073,7 @@ impl fmt::Display for TableFactor { lateral, subquery, alias, + sample, } => { if *lateral { write!(f, "LATERAL ")?; @@ -2083,6 +2086,9 @@ impl fmt::Display for TableFactor { if let Some(alias) = alias { write!(f, " {alias}")?; } + if let Some(TableSampleKind::AfterTableAlias(sample)) = sample { + write!(f, " {sample}")?; + } Ok(()) } TableFactor::Function { diff --git a/src/ast/spans.rs b/src/ast/spans.rs index 1c5cc4738d..58d70a87ed 100644 --- a/src/ast/spans.rs +++ b/src/ast/spans.rs @@ -1915,6 +1915,7 @@ impl Spanned for TableFactor { lateral: _, subquery, alias, + sample: _, } => subquery .span() .union_opt(&alias.as_ref().map(|alias| alias.span())), diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 586c2f6bad..6fb06c6415 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -15073,6 +15073,7 @@ impl<'a> Parser<'a> { pipe_operators: vec![], }), alias, + sample: None, }) } else if dialect_of!(self is BigQueryDialect | PostgreSqlDialect | GenericDialect) && self.parse_keyword(Keyword::UNNEST) @@ -15880,6 +15881,12 @@ impl<'a> Parser<'a> { let subquery = self.parse_query()?; self.expect_token(&Token::RParen)?; let alias = self.maybe_parse_table_alias()?; + + // Parse optional SAMPLE clause after alias + let sample = self + .maybe_parse_table_sample()? + .map(TableSampleKind::AfterTableAlias); + Ok(TableFactor::Derived { lateral: match lateral { Lateral => true, @@ -15887,6 +15894,7 @@ impl<'a> Parser<'a> { }, subquery, alias, + sample, }) } diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index dcc92207b4..b2c41d9748 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -512,7 +512,8 @@ fn parse_update_set_from() { format_clause: None, pipe_operators: vec![], }), - alias: table_alias(true, "t2") + alias: table_alias(true, "t2"), + sample: None, }, joins: vec![] }])), @@ -7863,6 +7864,7 @@ fn parse_derived_tables() { lateral: false, subquery: Box::new(verified_query("(SELECT 1) UNION (SELECT 2)")), alias: table_alias(true, "t1"), + sample: None, }, joins: vec![Join { relation: table_from_name(ObjectName::from(vec!["t2".into()])), @@ -8871,6 +8873,7 @@ fn lateral_derived() { lateral, ref subquery, alias: Some(ref alias), + sample: _, } = join.relation { assert_eq!(lateral_in, lateral); @@ -9950,6 +9953,7 @@ fn parse_merge() { pipe_operators: vec![], }), alias: table_alias(true, "stg"), + sample: None, } ); assert_eq!(source, source_no_into); diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index ede912ebe4..cb5af62167 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -3406,6 +3406,23 @@ fn test_table_sample() { snowflake_and_generic().verified_stmt("SELECT id FROM mytable TABLESAMPLE (10) SEED (1)"); } +#[test] +fn test_subquery_sample() { + // Test SAMPLE clause on subqueries (derived tables) + snowflake_and_generic().verified_stmt("SELECT * FROM (SELECT * FROM mytable) SAMPLE (10)"); + snowflake_and_generic() + .verified_stmt("SELECT * FROM (SELECT * FROM mytable) SAMPLE (10000 ROWS)"); + snowflake_and_generic() + .verified_stmt("SELECT * FROM (SELECT * FROM mytable) AS t SAMPLE (50 PERCENT)"); + // Nested subquery with SAMPLE + snowflake_and_generic().verified_stmt( + "SELECT * FROM (SELECT * FROM (SELECT report_from FROM mytable) SAMPLE (10000 ROWS)) AS anon_1", + ); + // SAMPLE with SEED on subquery + snowflake_and_generic() + .verified_stmt("SELECT * FROM (SELECT * FROM mytable) SAMPLE (10) SEED (42)"); +} + #[test] fn parse_ls_and_rm() { snowflake().one_statement_parses_to("LS @~", "LIST @~"); From 4305dd43f315968d98ec1df5d1f22a4fecf534a7 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Fri, 23 Jan 2026 12:27:59 -0700 Subject: [PATCH 039/121] refactor: use `to_ident()` instead of `clone().into_ident()` for borrowed Words (#2177) Co-authored-by: Claude Opus 4.5 --- sqlparser_bench/benches/sqlparser_bench.rs | 72 +++++++++++++++++++++- src/parser/mod.rs | 35 ++++++----- 2 files changed, 92 insertions(+), 15 deletions(-) diff --git a/sqlparser_bench/benches/sqlparser_bench.rs b/sqlparser_bench/benches/sqlparser_bench.rs index 9637a98f7a..b52683aa55 100644 --- a/sqlparser_bench/benches/sqlparser_bench.rs +++ b/sqlparser_bench/benches/sqlparser_bench.rs @@ -17,7 +17,9 @@ use criterion::{criterion_group, criterion_main, Criterion}; use sqlparser::dialect::GenericDialect; +use sqlparser::keywords::Keyword; use sqlparser::parser::Parser; +use sqlparser::tokenizer::{Span, Word}; fn basic_queries(c: &mut Criterion) { let mut group = c.benchmark_group("sqlparser-rs parsing benchmark"); @@ -82,5 +84,73 @@ fn basic_queries(c: &mut Criterion) { }); } -criterion_group!(benches, basic_queries); +/// Benchmark comparing `to_ident(&self)` vs `clone().into_ident(self)`. +/// +/// Both approaches have equivalent performance since the String clone dominates. +/// `to_ident()` is preferred for clearer code (one method call vs two). +fn word_to_ident(c: &mut Criterion) { + let mut group = c.benchmark_group("word_to_ident"); + + // Create Word instances with varying identifier lengths + let words: Vec = (0..100) + .map(|i| Word { + value: format!("identifier_name_with_number_{i}"), + quote_style: None, + keyword: Keyword::NoKeyword, + }) + .collect(); + let span = Span::empty(); + + // clone().into_ident(): clones entire Word struct, then moves the String value + group.bench_function("clone_into_ident_100x", |b| { + b.iter(|| { + for w in &words { + std::hint::black_box(w.clone().into_ident(span)); + } + }); + }); + + // to_ident(): clones only the String value directly into the Ident + group.bench_function("to_ident_100x", |b| { + b.iter(|| { + for w in &words { + std::hint::black_box(w.to_ident(span)); + } + }); + }); + + group.finish(); +} + +/// Benchmark parsing queries with many identifiers to show real-world impact +fn parse_many_identifiers(c: &mut Criterion) { + let mut group = c.benchmark_group("parse_identifiers"); + let dialect = GenericDialect {}; + + // Query with many column references (identifiers) + let many_columns = (0..100) + .map(|n| format!("column_{n}")) + .collect::>() + .join(", "); + let query = format!("SELECT {many_columns} FROM my_table"); + + group.bench_function("select_100_columns", |b| { + b.iter(|| Parser::parse_sql(&dialect, std::hint::black_box(&query))); + }); + + // Query with many table.column references + let qualified_columns = (0..100) + .map(|n| format!("t{}.column_{n}", n % 5)) + .collect::>() + .join(", "); + let query_qualified = format!("SELECT {qualified_columns} FROM t0, t1, t2, t3, t4"); + + group.bench_function("select_100_qualified_columns", |b| { + b.iter(|| Parser::parse_sql(&dialect, std::hint::black_box(&query_qualified))); + }); + + group.finish(); +} + +criterion_group!(benches, basic_queries, word_to_ident, parse_many_identifiers); criterion_main!(benches); diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 6fb06c6415..d021d163e2 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1460,7 +1460,7 @@ impl<'a> Parser<'a> { if dialect_of!(self is PostgreSqlDialect | GenericDialect) => { Ok(Some(Expr::Function(Function { - name: ObjectName::from(vec![w.clone().into_ident(w_span)]), + name: ObjectName::from(vec![w.to_ident(w_span)]), uses_odbc_syntax: false, parameters: FunctionArguments::None, args: FunctionArguments::None, @@ -1475,7 +1475,7 @@ impl<'a> Parser<'a> { | Keyword::CURRENT_DATE | Keyword::LOCALTIME | Keyword::LOCALTIMESTAMP => { - Ok(Some(self.parse_time_functions(ObjectName::from(vec![w.clone().into_ident(w_span)]))?)) + Ok(Some(self.parse_time_functions(ObjectName::from(vec![w.to_ident(w_span)]))?)) } Keyword::CASE => Ok(Some(self.parse_case_expr()?)), Keyword::CONVERT => Ok(Some(self.parse_convert_expr(false)?)), @@ -1500,7 +1500,7 @@ impl<'a> Parser<'a> { Keyword::CEIL => Ok(Some(self.parse_ceil_floor_expr(true)?)), Keyword::FLOOR => Ok(Some(self.parse_ceil_floor_expr(false)?)), Keyword::POSITION if self.peek_token_ref().token == Token::LParen => { - Ok(Some(self.parse_position_expr(w.clone().into_ident(w_span))?)) + Ok(Some(self.parse_position_expr(w.to_ident(w_span))?)) } Keyword::SUBSTR | Keyword::SUBSTRING => { self.prev_token(); @@ -1522,7 +1522,7 @@ impl<'a> Parser<'a> { let query = self.parse_query()?; self.expect_token(&Token::RParen)?; Ok(Some(Expr::Function(Function { - name: ObjectName::from(vec![w.clone().into_ident(w_span)]), + name: ObjectName::from(vec![w.to_ident(w_span)]), uses_odbc_syntax: false, parameters: FunctionArguments::None, args: FunctionArguments::Subquery(query), @@ -1572,7 +1572,7 @@ impl<'a> Parser<'a> { ) -> Result { match self.peek_token().token { Token::LParen if !self.peek_outer_join_operator() => { - let id_parts = vec![w.clone().into_ident(w_span)]; + let id_parts = vec![w.to_ident(w_span)]; self.parse_function(ObjectName::from(id_parts)) } // string introducer https://dev.mysql.com/doc/refman/8.0/en/charset-introducer.html @@ -1582,7 +1582,7 @@ impl<'a> Parser<'a> { if w.value.starts_with('_') => { Ok(Expr::Prefixed { - prefix: w.clone().into_ident(w_span), + prefix: w.to_ident(w_span), value: self.parse_introduced_string_expr()?.into(), }) } @@ -1593,19 +1593,19 @@ impl<'a> Parser<'a> { if w.value.starts_with('_') => { Ok(Expr::Prefixed { - prefix: w.clone().into_ident(w_span), + prefix: w.to_ident(w_span), value: self.parse_introduced_string_expr()?.into(), }) } Token::Arrow if self.dialect.supports_lambda_functions() => { self.expect_token(&Token::Arrow)?; Ok(Expr::Lambda(LambdaFunction { - params: OneOrManyWithParens::One(w.clone().into_ident(w_span)), + params: OneOrManyWithParens::One(w.to_ident(w_span)), body: Box::new(self.parse_expr()?), syntax: LambdaSyntax::Arrow, })) } - _ => Ok(Expr::Identifier(w.clone().into_ident(w_span))), + _ => Ok(Expr::Identifier(w.to_ident(w_span))), } } @@ -12401,9 +12401,10 @@ impl<'a> Parser<'a> { pub fn parse_identifiers(&mut self) -> Result, ParserError> { let mut idents = vec![]; loop { - match &self.peek_token_ref().token { + let token = self.peek_token_ref(); + match &token.token { Token::Word(w) => { - idents.push(w.clone().into_ident(self.peek_token_ref().span)); + idents.push(w.to_ident(token.span)); } Token::EOF | Token::Eq | Token::SemiColon => break, _ => {} @@ -19203,8 +19204,11 @@ fn maybe_prefixed_expr(expr: Expr, prefix: Option) -> Expr { } impl Word { - #[deprecated(since = "0.54.0", note = "please use `into_ident` instead")] - /// Convert this word into an [`Ident`] identifier + /// Convert a reference to this word into an [`Ident`] by cloning the value. + /// + /// Use this method when you need to keep the original `Word` around. + /// If you can consume the `Word`, prefer [`into_ident`](Self::into_ident) instead + /// to avoid cloning. pub fn to_ident(&self, span: Span) -> Ident { Ident { value: self.value.clone(), @@ -19213,7 +19217,10 @@ impl Word { } } - /// Convert this word into an [`Ident`] identifier + /// Convert this word into an [`Ident`] identifier, consuming the `Word`. + /// + /// This avoids cloning the string value. If you need to keep the original + /// `Word`, use [`to_ident`](Self::to_ident) instead. pub fn into_ident(self, span: Span) -> Ident { Ident { value: self.value, From a7d77634df6da1a5440a0a8cdc22aaa85439ee19 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Fri, 23 Jan 2026 12:29:08 -0700 Subject: [PATCH 040/121] Refactor: replace more `dialect_of!` checks with `Dialect` trait methods (#2175) Co-authored-by: Claude Opus 4.5 --- src/dialect/bigquery.rs | 5 ++ src/dialect/clickhouse.rs | 40 ++++++++++ src/dialect/duckdb.rs | 15 ++++ src/dialect/generic.rs | 48 ++++++++++++ src/dialect/mod.rs | 153 ++++++++++++++++++++++++++++++++++++++ src/dialect/snowflake.rs | 15 ++++ src/parser/mod.rs | 50 ++++++------- 7 files changed, 297 insertions(+), 29 deletions(-) diff --git a/src/dialect/bigquery.rs b/src/dialect/bigquery.rs index 6ad8a50893..5563d1335c 100644 --- a/src/dialect/bigquery.rs +++ b/src/dialect/bigquery.rs @@ -156,4 +156,9 @@ impl Dialect for BigQueryDialect { fn supports_create_table_multi_schema_info_sources(&self) -> bool { true } + + /// See + fn supports_select_wildcard_replace(&self) -> bool { + true + } } diff --git a/src/dialect/clickhouse.rs b/src/dialect/clickhouse.rs index 39e8a0b304..041b94ecd4 100644 --- a/src/dialect/clickhouse.rs +++ b/src/dialect/clickhouse.rs @@ -100,4 +100,44 @@ impl Dialect for ClickHouseDialect { fn supports_nested_comments(&self) -> bool { true } + + /// See + fn supports_optimize_table(&self) -> bool { + true + } + + /// See + fn supports_prewhere(&self) -> bool { + true + } + + /// See + fn supports_with_fill(&self) -> bool { + true + } + + /// See + fn supports_limit_by(&self) -> bool { + true + } + + /// See + fn supports_interpolate(&self) -> bool { + true + } + + /// See + fn supports_settings(&self) -> bool { + true + } + + /// See + fn supports_select_format(&self) -> bool { + true + } + + /// See + fn supports_select_wildcard_replace(&self) -> bool { + true + } } diff --git a/src/dialect/duckdb.rs b/src/dialect/duckdb.rs index ea09901318..b3803aee3e 100644 --- a/src/dialect/duckdb.rs +++ b/src/dialect/duckdb.rs @@ -113,4 +113,19 @@ impl Dialect for DuckDbDialect { fn supports_notnull_operator(&self) -> bool { true } + + /// See + fn supports_install(&self) -> bool { + true + } + + /// See + fn supports_detach(&self) -> bool { + true + } + + /// See + fn supports_select_wildcard_replace(&self) -> bool { + true + } } diff --git a/src/dialect/generic.rs b/src/dialect/generic.rs index 42510e2f09..d460c5237c 100644 --- a/src/dialect/generic.rs +++ b/src/dialect/generic.rs @@ -223,4 +223,52 @@ impl Dialect for GenericDialect { fn supports_lambda_functions(&self) -> bool { true } + + fn supports_select_wildcard_replace(&self) -> bool { + true + } + + fn supports_select_wildcard_ilike(&self) -> bool { + true + } + + fn supports_select_wildcard_rename(&self) -> bool { + true + } + + fn supports_optimize_table(&self) -> bool { + true + } + + fn supports_install(&self) -> bool { + true + } + + fn supports_detach(&self) -> bool { + true + } + + fn supports_prewhere(&self) -> bool { + true + } + + fn supports_with_fill(&self) -> bool { + true + } + + fn supports_limit_by(&self) -> bool { + true + } + + fn supports_interpolate(&self) -> bool { + true + } + + fn supports_settings(&self) -> bool { + true + } + + fn supports_select_format(&self) -> bool { + true + } } diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index 284fc41726..98ec93da41 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -1332,6 +1332,159 @@ pub trait Dialect: Debug + Any { fn supports_binary_kw_as_cast(&self) -> bool { false } + + /// Returns true if this dialect supports the `REPLACE` option in a + /// `SELECT *` wildcard expression. + /// + /// Example: + /// ```sql + /// SELECT * REPLACE (col1 AS col1_alias) FROM table; + /// ``` + /// + /// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#select_replace) + /// [ClickHouse](https://clickhouse.com/docs/sql-reference/statements/select#replace) + /// [DuckDB](https://duckdb.org/docs/sql/query_syntax/select#replace-clause) + /// [Snowflake](https://docs.snowflake.com/en/sql-reference/sql/select#parameters) + fn supports_select_wildcard_replace(&self) -> bool { + false + } + + /// Returns true if this dialect supports the `ILIKE` option in a + /// `SELECT *` wildcard expression. + /// + /// Example: + /// ```sql + /// SELECT * ILIKE '%pattern%' FROM table; + /// ``` + /// + /// [Snowflake](https://docs.snowflake.com/en/sql-reference/sql/select#parameters) + fn supports_select_wildcard_ilike(&self) -> bool { + false + } + + /// Returns true if this dialect supports the `RENAME` option in a + /// `SELECT *` wildcard expression. + /// + /// Example: + /// ```sql + /// SELECT * RENAME col1 AS col1_alias FROM table; + /// ``` + /// + /// [Snowflake](https://docs.snowflake.com/en/sql-reference/sql/select#parameters) + fn supports_select_wildcard_rename(&self) -> bool { + false + } + + /// Returns true if this dialect supports the `OPTIMIZE TABLE` statement. + /// + /// Example: + /// ```sql + /// OPTIMIZE TABLE table_name; + /// ``` + /// + /// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/optimize) + fn supports_optimize_table(&self) -> bool { + false + } + + /// Returns true if this dialect supports the `INSTALL` statement. + /// + /// Example: + /// ```sql + /// INSTALL extension_name; + /// ``` + /// + /// [DuckDB](https://duckdb.org/docs/extensions/overview) + fn supports_install(&self) -> bool { + false + } + + /// Returns true if this dialect supports the `DETACH` statement. + /// + /// Example: + /// ```sql + /// DETACH DATABASE db_name; + /// ``` + /// + /// [DuckDB](https://duckdb.org/docs/sql/statements/attach#detach-syntax) + fn supports_detach(&self) -> bool { + false + } + + /// Returns true if this dialect supports the `PREWHERE` clause + /// in `SELECT` statements. + /// + /// Example: + /// ```sql + /// SELECT * FROM table PREWHERE col > 0 WHERE col < 100; + /// ``` + /// + /// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/select/prewhere) + fn supports_prewhere(&self) -> bool { + false + } + + /// Returns true if this dialect supports the `WITH FILL` clause + /// in `ORDER BY` expressions. + /// + /// Example: + /// ```sql + /// SELECT * FROM table ORDER BY col WITH FILL FROM 1 TO 10 STEP 1; + /// ``` + /// + /// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/select/order-by#order-by-expr-with-fill-modifier) + fn supports_with_fill(&self) -> bool { + false + } + + /// Returns true if this dialect supports the `LIMIT BY` clause. + /// + /// Example: + /// ```sql + /// SELECT * FROM table LIMIT 10 BY col; + /// ``` + /// + /// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/select/limit-by) + fn supports_limit_by(&self) -> bool { + false + } + + /// Returns true if this dialect supports the `INTERPOLATE` clause + /// in `ORDER BY` expressions. + /// + /// Example: + /// ```sql + /// SELECT * FROM table ORDER BY col WITH FILL INTERPOLATE (col2 AS col2 + 1); + /// ``` + /// + /// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/select/order-by#order-by-expr-with-fill-modifier) + fn supports_interpolate(&self) -> bool { + false + } + + /// Returns true if this dialect supports the `SETTINGS` clause. + /// + /// Example: + /// ```sql + /// SELECT * FROM table SETTINGS max_threads = 4; + /// ``` + /// + /// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/select#settings-in-select-query) + fn supports_settings(&self) -> bool { + false + } + + /// Returns true if this dialect supports the `FORMAT` clause in `SELECT` statements. + /// + /// Example: + /// ```sql + /// SELECT * FROM table FORMAT JSON; + /// ``` + /// + /// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/select/format) + fn supports_select_format(&self) -> bool { + false + } } /// Operators for which precedence must be defined. diff --git a/src/dialect/snowflake.rs b/src/dialect/snowflake.rs index d768f7a211..e2d8cb2e1d 100644 --- a/src/dialect/snowflake.rs +++ b/src/dialect/snowflake.rs @@ -616,6 +616,21 @@ impl Dialect for SnowflakeDialect { fn supports_semantic_view_table_factor(&self) -> bool { true } + + /// See + fn supports_select_wildcard_replace(&self) -> bool { + true + } + + /// See + fn supports_select_wildcard_ilike(&self) -> bool { + true + } + + /// See + fn supports_select_wildcard_rename(&self) -> bool { + true + } } // Peeks ahead to identify tokens that are expected after diff --git a/src/parser/mod.rs b/src/parser/mod.rs index d021d163e2..0276d058c7 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -633,7 +633,7 @@ impl<'a> Parser<'a> { self.parse_attach_database() } } - Keyword::DETACH if dialect_of!(self is DuckDbDialect | GenericDialect) => { + Keyword::DETACH if self.dialect.supports_detach() => { self.parse_detach_duckdb_database() } Keyword::MSCK => self.parse_msck().map(Into::into), @@ -693,12 +693,10 @@ impl<'a> Parser<'a> { } Keyword::RENAME => self.parse_rename(), // `INSTALL` is duckdb specific https://duckdb.org/docs/extensions/overview - Keyword::INSTALL if dialect_of!(self is DuckDbDialect | GenericDialect) => { - self.parse_install() - } + Keyword::INSTALL if self.dialect.supports_install() => self.parse_install(), Keyword::LOAD => self.parse_load(), // `OPTIMIZE` is clickhouse specific https://clickhouse.tech/docs/en/sql-reference/statements/optimize/ - Keyword::OPTIMIZE if dialect_of!(self is ClickHouseDialect | GenericDialect) => { + Keyword::OPTIMIZE if self.dialect.supports_optimize_table() => { self.parse_optimize_table() } // `COMMENT` is snowflake specific https://docs.snowflake.com/en/sql-reference/sql/comment @@ -12208,7 +12206,7 @@ impl<'a> Parser<'a> { } } else { let exprs = self.parse_comma_separated(Parser::parse_order_by_expr)?; - let interpolate = if dialect_of!(self is ClickHouseDialect | GenericDialect) { + let interpolate = if self.dialect.supports_interpolate() { self.parse_interpolations()? } else { None @@ -12250,9 +12248,7 @@ impl<'a> Parser<'a> { })); } - let limit_by = if dialect_of!(self is ClickHouseDialect | GenericDialect) - && self.parse_keyword(Keyword::BY) - { + let limit_by = if self.dialect.supports_limit_by() && self.parse_keyword(Keyword::BY) { Some(self.parse_comma_separated(Parser::parse_expr)?) } else { None @@ -13264,18 +13260,17 @@ impl<'a> Parser<'a> { locks.push(self.parse_lock()?); } } - let format_clause = if dialect_of!(self is ClickHouseDialect | GenericDialect) - && self.parse_keyword(Keyword::FORMAT) - { - if self.parse_keyword(Keyword::NULL) { - Some(FormatClause::Null) + let format_clause = + if self.dialect.supports_select_format() && self.parse_keyword(Keyword::FORMAT) { + if self.parse_keyword(Keyword::NULL) { + Some(FormatClause::Null) + } else { + let ident = self.parse_identifier()?; + Some(FormatClause::Identifier(ident)) + } } else { - let ident = self.parse_identifier()?; - Some(FormatClause::Identifier(ident)) - } - } else { - None - }; + None + }; let pipe_operators = if self.dialect.supports_pipe_operator() { self.parse_pipe_operators()? @@ -13519,8 +13514,7 @@ impl<'a> Parser<'a> { } fn parse_settings(&mut self) -> Result>, ParserError> { - let settings = if dialect_of!(self is ClickHouseDialect|GenericDialect) - && self.parse_keyword(Keyword::SETTINGS) + let settings = if self.dialect.supports_settings() && self.parse_keyword(Keyword::SETTINGS) { let key_values = self.parse_comma_separated(|p| { let key = p.parse_identifier()?; @@ -13930,8 +13924,7 @@ impl<'a> Parser<'a> { } } - let prewhere = if dialect_of!(self is ClickHouseDialect|GenericDialect) - && self.parse_keyword(Keyword::PREWHERE) + let prewhere = if self.dialect.supports_prewhere() && self.parse_keyword(Keyword::PREWHERE) { Some(self.parse_expr()?) } else { @@ -17358,7 +17351,7 @@ impl<'a> Parser<'a> { &mut self, wildcard_token: TokenWithSpan, ) -> Result { - let opt_ilike = if dialect_of!(self is GenericDialect | SnowflakeDialect) { + let opt_ilike = if self.dialect.supports_select_wildcard_ilike() { self.parse_optional_select_item_ilike()? } else { None @@ -17374,13 +17367,12 @@ impl<'a> Parser<'a> { } else { None }; - let opt_replace = if dialect_of!(self is GenericDialect | BigQueryDialect | ClickHouseDialect | DuckDbDialect | SnowflakeDialect) - { + let opt_replace = if self.dialect.supports_select_wildcard_replace() { self.parse_optional_select_item_replace()? } else { None }; - let opt_rename = if dialect_of!(self is GenericDialect | SnowflakeDialect) { + let opt_rename = if self.dialect.supports_select_wildcard_rename() { self.parse_optional_select_item_rename()? } else { None @@ -17577,7 +17569,7 @@ impl<'a> Parser<'a> { let options = self.parse_order_by_options()?; - let with_fill = if dialect_of!(self is ClickHouseDialect | GenericDialect) + let with_fill = if self.dialect.supports_with_fill() && self.parse_keywords(&[Keyword::WITH, Keyword::FILL]) { Some(self.parse_with_fill()?) From 3c7ecf3dc9fef865788a3f2004cb6987b2b7f60c Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Fri, 23 Jan 2026 12:29:19 -0700 Subject: [PATCH 041/121] minor: reduce unnecessary string allocations (#2178) Co-authored-by: Claude Opus 4.5 --- src/ast/mod.rs | 4 ++-- src/parser/mod.rs | 10 ++++------ src/tokenizer.rs | 4 ++-- 3 files changed, 8 insertions(+), 10 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 0470d6a8b2..fcfdf364b5 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -4931,9 +4931,9 @@ impl fmt::Display for Statement { f, "{tables}{read}{export}", tables = if !tables.is_empty() { - " ".to_string() + &display_comma_separated(tables).to_string() + format!(" {}", display_comma_separated(tables)) } else { - "".to_string() + String::new() }, export = if *export { " FOR EXPORT" } else { "" }, read = if *read_lock { " WITH READ LOCK" } else { "" } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 0276d058c7..8d021af881 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -11149,16 +11149,14 @@ impl<'a> Parser<'a> { /// Parse a single tab-separated value row used by `COPY` payload parsing. pub fn parse_tab_value(&mut self) -> Vec> { let mut values = vec![]; - let mut content = String::from(""); + let mut content = String::new(); while let Some(t) = self.next_token_no_skip().map(|t| &t.token) { match t { Token::Whitespace(Whitespace::Tab) => { - values.push(Some(content.to_string())); - content.clear(); + values.push(Some(core::mem::take(&mut content))); } Token::Whitespace(Whitespace::Newline) => { - values.push(Some(content.to_string())); - content.clear(); + values.push(Some(core::mem::take(&mut content))); } Token::Backslash => { if self.consume_token(&Token::Period) { @@ -11283,7 +11281,7 @@ impl<'a> Parser<'a> { Token::Number(w, false) => Ok(Ident::with_span(next_token.span, w)), _ => self.expected("placeholder", next_token), }?; - Ok(Value::Placeholder(tok.to_string() + &ident.value) + Ok(Value::Placeholder(format!("{tok}{}", ident.value)) .with_span(Span::new(span.start, ident.span.end))) } unexpected => self.expected( diff --git a/src/tokenizer.rs b/src/tokenizer.rs index a9f9fb4436..42fa5b6187 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -1755,7 +1755,7 @@ impl<'a> Tokenizer<'a> { '?' => { chars.next(); let s = peeking_take_while(chars, |ch| ch.is_numeric()); - Ok(Some(Token::Placeholder(String::from("?") + &s))) + Ok(Some(Token::Placeholder(format!("?{s}")))) } // identifier or keyword @@ -1904,7 +1904,7 @@ impl<'a> Tokenizer<'a> { } } } else { - return Ok(Token::Placeholder(String::from("$") + &value)); + return Ok(Token::Placeholder(format!("${value}"))); } } From a175cdb067ca37098ab0761999f4c2be7387aaa7 Mon Sep 17 00:00:00 2001 From: isaacparker0 <128327439+isaacparker0@users.noreply.github.com> Date: Sat, 24 Jan 2026 06:15:28 -0500 Subject: [PATCH 042/121] PostgreSQL: Support force row level security (#2169) --- src/ast/ddl.rs | 18 ++++++++++++++++++ src/ast/spans.rs | 2 ++ src/parser/mod.rs | 15 +++++++++++++++ tests/sqlparser_postgres.rs | 2 ++ 4 files changed, 37 insertions(+) diff --git a/src/ast/ddl.rs b/src/ast/ddl.rs index fcd14b6d1f..3a5cd32bc3 100644 --- a/src/ast/ddl.rs +++ b/src/ast/ddl.rs @@ -192,6 +192,7 @@ pub enum AlterTableOperation { /// `DISABLE ROW LEVEL SECURITY` /// /// Note: this is a PostgreSQL-specific operation. + /// Please refer to [PostgreSQL documentation](https://www.postgresql.org/docs/current/sql-altertable.html) DisableRowLevelSecurity, /// `DISABLE RULE rewrite_rule_name` /// @@ -318,7 +319,18 @@ pub enum AlterTableOperation { /// `ENABLE ROW LEVEL SECURITY` /// /// Note: this is a PostgreSQL-specific operation. + /// Please refer to [PostgreSQL documentation](https://www.postgresql.org/docs/current/sql-altertable.html) EnableRowLevelSecurity, + /// `FORCE ROW LEVEL SECURITY` + /// + /// Note: this is a PostgreSQL-specific operation. + /// Please refer to [PostgreSQL documentation](https://www.postgresql.org/docs/current/sql-altertable.html) + ForceRowLevelSecurity, + /// `NO FORCE ROW LEVEL SECURITY` + /// + /// Note: this is a PostgreSQL-specific operation. + /// Please refer to [PostgreSQL documentation](https://www.postgresql.org/docs/current/sql-altertable.html) + NoForceRowLevelSecurity, /// `ENABLE RULE rewrite_rule_name` /// /// Note: this is a PostgreSQL-specific operation. @@ -876,6 +888,12 @@ impl fmt::Display for AlterTableOperation { AlterTableOperation::EnableRowLevelSecurity => { write!(f, "ENABLE ROW LEVEL SECURITY") } + AlterTableOperation::ForceRowLevelSecurity => { + write!(f, "FORCE ROW LEVEL SECURITY") + } + AlterTableOperation::NoForceRowLevelSecurity => { + write!(f, "NO FORCE ROW LEVEL SECURITY") + } AlterTableOperation::EnableRule { name } => { write!(f, "ENABLE RULE {name}") } diff --git a/src/ast/spans.rs b/src/ast/spans.rs index 58d70a87ed..126e587a88 100644 --- a/src/ast/spans.rs +++ b/src/ast/spans.rs @@ -1121,6 +1121,8 @@ impl Spanned for AlterTableOperation { AlterTableOperation::EnableReplicaRule { name } => name.span, AlterTableOperation::EnableReplicaTrigger { name } => name.span, AlterTableOperation::EnableRowLevelSecurity => Span::empty(), + AlterTableOperation::ForceRowLevelSecurity => Span::empty(), + AlterTableOperation::NoForceRowLevelSecurity => Span::empty(), AlterTableOperation::EnableRule { name } => name.span, AlterTableOperation::EnableTrigger { name } => name.span, AlterTableOperation::RenamePartitions { diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 8d021af881..55fec67841 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -9825,6 +9825,21 @@ impl<'a> Parser<'a> { self.peek_token(), ); } + } else if self.parse_keywords(&[ + Keyword::FORCE, + Keyword::ROW, + Keyword::LEVEL, + Keyword::SECURITY, + ]) { + AlterTableOperation::ForceRowLevelSecurity + } else if self.parse_keywords(&[ + Keyword::NO, + Keyword::FORCE, + Keyword::ROW, + Keyword::LEVEL, + Keyword::SECURITY, + ]) { + AlterTableOperation::NoForceRowLevelSecurity } else if self.parse_keywords(&[Keyword::CLEAR, Keyword::PROJECTION]) && dialect_of!(self is ClickHouseDialect|GenericDialect) { diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 7c194c1c9f..6a4b78b5f3 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -640,6 +640,8 @@ fn parse_alter_table_enable() { pg_and_generic().verified_stmt("ALTER TABLE tab ENABLE REPLICA TRIGGER trigger_name"); pg_and_generic().verified_stmt("ALTER TABLE tab ENABLE REPLICA RULE rule_name"); pg_and_generic().verified_stmt("ALTER TABLE tab ENABLE ROW LEVEL SECURITY"); + pg_and_generic().verified_stmt("ALTER TABLE tab FORCE ROW LEVEL SECURITY"); + pg_and_generic().verified_stmt("ALTER TABLE tab NO FORCE ROW LEVEL SECURITY"); pg_and_generic().verified_stmt("ALTER TABLE tab ENABLE RULE rule_name"); pg_and_generic().verified_stmt("ALTER TABLE tab ENABLE TRIGGER ALL"); pg_and_generic().verified_stmt("ALTER TABLE tab ENABLE TRIGGER USER"); From 153e7c57465476aa7350672c5cee4f1ef02b7365 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Sat, 24 Jan 2026 04:30:51 -0700 Subject: [PATCH 043/121] perf: optimize `make_word()` to avoid unnecessary allocations (#2176) Co-authored-by: Claude Opus 4.5 --- src/tokenizer.rs | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 42fa5b6187..8c33ad3dc8 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -413,16 +413,22 @@ impl Token { /// When `quote_style` is `None`, the parser attempts a case-insensitive keyword /// lookup and sets the `Word::keyword` accordingly. pub fn make_word(word: &str, quote_style: Option) -> Self { - let word_uppercase = word.to_uppercase(); + // Only perform keyword lookup for unquoted identifiers. + // Use to_ascii_uppercase() since SQL keywords are ASCII, + // avoiding Unicode case conversion overhead. + let keyword = if quote_style.is_none() { + let word_uppercase = word.to_ascii_uppercase(); + ALL_KEYWORDS + .binary_search(&word_uppercase.as_str()) + .map_or(Keyword::NoKeyword, |x| ALL_KEYWORDS_INDEX[x]) + } else { + Keyword::NoKeyword + }; + Token::Word(Word { value: word.to_string(), quote_style, - keyword: if quote_style.is_none() { - let keyword = ALL_KEYWORDS.binary_search(&word_uppercase.as_str()); - keyword.map_or(Keyword::NoKeyword, |x| ALL_KEYWORDS_INDEX[x]) - } else { - Keyword::NoKeyword - }, + keyword, }) } } From 802c7d3e03df900392a009ce60b9f30fd954ac4e Mon Sep 17 00:00:00 2001 From: Michael Victor Zink Date: Sun, 25 Jan 2026 01:38:09 -0800 Subject: [PATCH 044/121] PostgreSQL: Add support for `*` (descendant) option in TRUNCATE (#2181) --- src/ast/mod.rs | 18 +++++++++-- src/parser/mod.rs | 17 +++++----- tests/sqlparser_common.rs | 2 ++ tests/sqlparser_postgres.rs | 62 +++++++++++++++++++++++++++++++++++++ 4 files changed, 89 insertions(+), 10 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index fcfdf364b5..33f99bc26e 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -6422,10 +6422,18 @@ pub struct TruncateTableTarget { /// name of the table being truncated #[cfg_attr(feature = "visitor", visit(with = "visit_relation"))] pub name: ObjectName, - /// Postgres-specific option - /// [ TRUNCATE TABLE ONLY ] + /// Postgres-specific option: explicitly exclude descendants (also default without ONLY) + /// ```sql + /// TRUNCATE TABLE ONLY name + /// ``` /// pub only: bool, + /// Postgres-specific option: asterisk after table name to explicitly indicate descendants + /// ```sql + /// TRUNCATE TABLE name [ * ] + /// ``` + /// + pub has_asterisk: bool, } impl fmt::Display for TruncateTableTarget { @@ -6433,7 +6441,11 @@ impl fmt::Display for TruncateTableTarget { if self.only { write!(f, "ONLY ")?; }; - write!(f, "{}", self.name) + write!(f, "{}", self.name)?; + if self.has_asterisk { + write!(f, " *")?; + }; + Ok(()) } } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 55fec67841..e071201740 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1054,13 +1054,16 @@ impl<'a> Parser<'a> { let table = self.parse_keyword(Keyword::TABLE); let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); - let table_names = self - .parse_comma_separated(|p| { - Ok((p.parse_keyword(Keyword::ONLY), p.parse_object_name(false)?)) - })? - .into_iter() - .map(|(only, name)| TruncateTableTarget { name, only }) - .collect(); + let table_names = self.parse_comma_separated(|p| { + let only = p.parse_keyword(Keyword::ONLY); + let name = p.parse_object_name(false)?; + let has_asterisk = p.consume_token(&Token::Mul); + Ok(TruncateTableTarget { + name, + only, + has_asterisk, + }) + })?; let mut partitions = None; if self.parse_keyword(Keyword::PARTITION) { diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index b2c41d9748..6da4ea534e 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -16828,10 +16828,12 @@ fn parse_truncate_only() { TruncateTableTarget { name: ObjectName::from(vec![Ident::new("employee")]), only: false, + has_asterisk: false, }, TruncateTableTarget { name: ObjectName::from(vec![Ident::new("dept")]), only: true, + has_asterisk: false, }, ]; diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 6a4b78b5f3..7bd7f43c6f 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -5088,6 +5088,7 @@ fn parse_truncate() { let table_names = vec![TruncateTableTarget { name: table_name.clone(), only: false, + has_asterisk: false, }]; assert_eq!( Statement::Truncate(Truncate { @@ -5112,6 +5113,7 @@ fn parse_truncate_with_options() { let table_names = vec![TruncateTableTarget { name: table_name.clone(), only: true, + has_asterisk: false, }]; assert_eq!( @@ -5141,10 +5143,12 @@ fn parse_truncate_with_table_list() { TruncateTableTarget { name: table_name_a.clone(), only: false, + has_asterisk: false, }, TruncateTableTarget { name: table_name_b.clone(), only: false, + has_asterisk: false, }, ]; @@ -5162,6 +5166,64 @@ fn parse_truncate_with_table_list() { ); } +#[test] +fn parse_truncate_with_descendant() { + let truncate = pg_and_generic().verified_stmt("TRUNCATE TABLE t *"); + + let table_names = vec![TruncateTableTarget { + name: ObjectName::from(vec![Ident::new("t")]), + only: false, + has_asterisk: true, + }]; + + assert_eq!( + Statement::Truncate(Truncate { + table_names, + partitions: None, + table: true, + if_exists: false, + identity: None, + cascade: None, + on_cluster: None, + }), + truncate + ); + + let truncate = pg_and_generic() + .verified_stmt("TRUNCATE TABLE ONLY parent, child *, grandchild RESTART IDENTITY"); + + let table_names = vec![ + TruncateTableTarget { + name: ObjectName::from(vec![Ident::new("parent")]), + only: true, + has_asterisk: false, + }, + TruncateTableTarget { + name: ObjectName::from(vec![Ident::new("child")]), + only: false, + has_asterisk: true, + }, + TruncateTableTarget { + name: ObjectName::from(vec![Ident::new("grandchild")]), + only: false, + has_asterisk: false, + }, + ]; + + assert_eq!( + Statement::Truncate(Truncate { + table_names, + partitions: None, + table: true, + if_exists: false, + identity: Some(TruncateIdentityOption::Restart), + cascade: None, + on_cluster: None, + }), + truncate + ); +} + #[test] fn parse_select_regexp_as_column_name() { pg_and_generic().verified_only_select( From 2d47fec0ab0632d2c7453387a226a4c4d293f93a Mon Sep 17 00:00:00 2001 From: Alexander Beedie Date: Thu, 29 Jan 2026 17:50:47 +0400 Subject: [PATCH 045/121] Fix identifier parsing not breaking on the `|>` pipe operator (#2156) --- src/parser/mod.rs | 4 +- tests/sqlparser_common.rs | 425 ++++++++++++++++++-------------------- 2 files changed, 207 insertions(+), 222 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index e071201740..5847f77928 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -12418,7 +12418,9 @@ impl<'a> Parser<'a> { Token::Word(w) => { idents.push(w.to_ident(token.span)); } - Token::EOF | Token::Eq | Token::SemiColon => break, + Token::EOF | Token::Eq | Token::SemiColon | Token::VerticalBarRightAngleBracket => { + break + } _ => {} } self.advance_token(); diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 6da4ea534e..87c15e2d34 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -72,9 +72,7 @@ fn parse_numeric_literal_underscore() { assert_eq!( select.projection, - vec![UnnamedExpr(Expr::Value( - (number("10_000")).with_empty_span() - ))] + vec![UnnamedExpr(Expr::Value(number("10_000").with_empty_span()))] ); } @@ -16223,303 +16221,288 @@ fn parse_set_names() { } #[test] -fn parse_pipeline_operator() { +fn parse_pipe_operator_as() { let dialects = all_dialects_where(|d| d.supports_pipe_operator()); + dialects.verified_stmt("SELECT * FROM tbl |> AS new_users"); +} - // select pipe operator - dialects.verified_stmt("SELECT * FROM users |> SELECT id"); - dialects.verified_stmt("SELECT * FROM users |> SELECT id, name"); +#[test] +fn parse_pipe_operator_select() { + let dialects = all_dialects_where(|d| d.supports_pipe_operator()); + dialects.verified_stmt("SELECT * FROM tbl |> SELECT id"); + dialects.verified_stmt("SELECT * FROM tbl |> SELECT id, name"); dialects.verified_query_with_canonical( - "SELECT * FROM users |> SELECT id user_id", - "SELECT * FROM users |> SELECT id AS user_id", + "SELECT * FROM tbl |> SELECT id user_id", + "SELECT * FROM tbl |> SELECT id AS user_id", ); - dialects.verified_stmt("SELECT * FROM users |> SELECT id AS user_id"); + dialects.verified_stmt("SELECT * FROM tbl |> SELECT id AS user_id"); +} - // extend pipe operator - dialects.verified_stmt("SELECT * FROM users |> EXTEND id + 1 AS new_id"); - dialects.verified_stmt("SELECT * FROM users |> EXTEND id AS new_id, name AS new_name"); +#[test] +fn parse_pipe_operator_extend() { + let dialects = all_dialects_where(|d| d.supports_pipe_operator()); + dialects.verified_stmt("SELECT * FROM tbl |> EXTEND id + 1 AS new_id"); + dialects.verified_stmt("SELECT * FROM tbl |> EXTEND id AS new_id, name AS new_name"); dialects.verified_query_with_canonical( - "SELECT * FROM users |> EXTEND id user_id", - "SELECT * FROM users |> EXTEND id AS user_id", + "SELECT * FROM tbl |> EXTEND id user_id", + "SELECT * FROM tbl |> EXTEND id AS user_id", ); +} - // set pipe operator - dialects.verified_stmt("SELECT * FROM users |> SET id = id + 1"); - dialects.verified_stmt("SELECT * FROM users |> SET id = id + 1, name = name + ' Doe'"); - - // drop pipe operator - dialects.verified_stmt("SELECT * FROM users |> DROP id"); - dialects.verified_stmt("SELECT * FROM users |> DROP id, name"); +#[test] +fn parse_pipe_operator_set() { + let dialects = all_dialects_where(|d| d.supports_pipe_operator()); + dialects.verified_stmt("SELECT * FROM tbl |> SET id = id + 1"); + dialects.verified_stmt("SELECT * FROM tbl |> SET id = id + 1, name = name + ' Doe'"); +} - // as pipe operator - dialects.verified_stmt("SELECT * FROM users |> AS new_users"); +#[test] +fn parse_pipe_operator_drop() { + let dialects = all_dialects_where(|d| d.supports_pipe_operator()); + dialects.verified_stmt("SELECT * FROM tbl |> DROP id"); + dialects.verified_stmt("SELECT * FROM tbl |> DROP id, name"); + dialects.verified_stmt("SELECT * FROM tbl |> DROP c |> RENAME a AS x"); + dialects.verified_stmt("SELECT * FROM tbl |> DROP a, b |> SELECT c"); +} - // limit pipe operator - dialects.verified_stmt("SELECT * FROM users |> LIMIT 10"); - dialects.verified_stmt("SELECT * FROM users |> LIMIT 10 OFFSET 5"); - dialects.verified_stmt("SELECT * FROM users |> LIMIT 10 |> LIMIT 5"); - dialects.verified_stmt("SELECT * FROM users |> LIMIT 10 |> WHERE true"); +#[test] +fn parse_pipe_operator_limit() { + let dialects = all_dialects_where(|d| d.supports_pipe_operator()); + dialects.verified_stmt("SELECT * FROM tbl |> LIMIT 10"); + dialects.verified_stmt("SELECT * FROM tbl |> LIMIT 10 OFFSET 5"); + dialects.verified_stmt("SELECT * FROM tbl |> LIMIT 10 |> LIMIT 5"); + dialects.verified_stmt("SELECT * FROM tbl |> LIMIT 10 |> WHERE true"); +} - // where pipe operator - dialects.verified_stmt("SELECT * FROM users |> WHERE id = 1"); - dialects.verified_stmt("SELECT * FROM users |> WHERE id = 1 AND name = 'John'"); - dialects.verified_stmt("SELECT * FROM users |> WHERE id = 1 OR name = 'John'"); +#[test] +fn parse_pipe_operator_where() { + let dialects = all_dialects_where(|d| d.supports_pipe_operator()); + dialects.verified_stmt("SELECT * FROM tbl |> WHERE id = 1"); + dialects.verified_stmt("SELECT * FROM tbl |> WHERE id = 1 AND name = 'John'"); + dialects.verified_stmt("SELECT * FROM tbl |> WHERE id = 1 OR name = 'John'"); +} - // aggregate pipe operator full table - dialects.verified_stmt("SELECT * FROM users |> AGGREGATE COUNT(*)"); +#[test] +fn parse_pipe_operator_aggregate() { + let dialects = all_dialects_where(|d| d.supports_pipe_operator()); + dialects.verified_stmt("SELECT * FROM tbl |> AGGREGATE COUNT(*)"); dialects.verified_query_with_canonical( - "SELECT * FROM users |> AGGREGATE COUNT(*) total_users", - "SELECT * FROM users |> AGGREGATE COUNT(*) AS total_users", - ); - dialects.verified_stmt("SELECT * FROM users |> AGGREGATE COUNT(*) AS total_users"); - dialects.verified_stmt("SELECT * FROM users |> AGGREGATE COUNT(*), MIN(id)"); - - // aggregate pipe opeprator with grouping - dialects.verified_stmt( - "SELECT * FROM users |> AGGREGATE SUM(o_totalprice) AS price, COUNT(*) AS cnt GROUP BY EXTRACT(YEAR FROM o_orderdate) AS year", + "SELECT * FROM tbl |> AGGREGATE COUNT(*) total_users", + "SELECT * FROM tbl |> AGGREGATE COUNT(*) AS total_users", ); + dialects.verified_stmt("SELECT * FROM tbl |> AGGREGATE COUNT(*) AS total_users"); + dialects.verified_stmt("SELECT * FROM tbl |> AGGREGATE COUNT(*), MIN(id)"); + dialects.verified_stmt("SELECT * FROM tbl |> AGGREGATE SUM(o_totalprice) AS price, COUNT(*) AS cnt GROUP BY EXTRACT(YEAR FROM o_orderdate) AS year"); dialects.verified_stmt( - "SELECT * FROM users |> AGGREGATE GROUP BY EXTRACT(YEAR FROM o_orderdate) AS year", + "SELECT * FROM tbl |> AGGREGATE GROUP BY EXTRACT(YEAR FROM o_orderdate) AS year", ); dialects - .verified_stmt("SELECT * FROM users |> AGGREGATE GROUP BY EXTRACT(YEAR FROM o_orderdate)"); - dialects.verified_stmt("SELECT * FROM users |> AGGREGATE GROUP BY a, b"); - dialects.verified_stmt("SELECT * FROM users |> AGGREGATE SUM(c) GROUP BY a, b"); - dialects.verified_stmt("SELECT * FROM users |> AGGREGATE SUM(c) ASC"); + .verified_stmt("SELECT * FROM tbl |> AGGREGATE GROUP BY EXTRACT(YEAR FROM o_orderdate)"); + dialects.verified_stmt("SELECT * FROM tbl |> AGGREGATE GROUP BY a, b"); + dialects.verified_stmt("SELECT * FROM tbl |> AGGREGATE SUM(c) GROUP BY a, b"); + dialects.verified_stmt("SELECT * FROM tbl |> AGGREGATE SUM(c) ASC"); +} - // order by pipe operator - dialects.verified_stmt("SELECT * FROM users |> ORDER BY id ASC"); - dialects.verified_stmt("SELECT * FROM users |> ORDER BY id DESC"); - dialects.verified_stmt("SELECT * FROM users |> ORDER BY id DESC, name ASC"); +#[test] +fn parse_pipe_operator_order_by() { + let dialects = all_dialects_where(|d| d.supports_pipe_operator()); + dialects.verified_stmt("SELECT * FROM tbl |> ORDER BY id ASC"); + dialects.verified_stmt("SELECT * FROM tbl |> ORDER BY id DESC"); + dialects.verified_stmt("SELECT * FROM tbl |> ORDER BY id DESC, name ASC"); +} - // tablesample pipe operator +#[test] +fn parse_pipe_operator_tablesample() { + let dialects = all_dialects_where(|d| d.supports_pipe_operator()); dialects.verified_stmt("SELECT * FROM tbl |> TABLESAMPLE BERNOULLI (50)"); dialects.verified_stmt("SELECT * FROM tbl |> TABLESAMPLE SYSTEM (50 PERCENT)"); dialects.verified_stmt("SELECT * FROM tbl |> TABLESAMPLE SYSTEM (50) REPEATABLE (10)"); +} - // rename pipe operator - dialects.verified_stmt("SELECT * FROM users |> RENAME old_name AS new_name"); - dialects.verified_stmt("SELECT * FROM users |> RENAME id AS user_id, name AS user_name"); +#[test] +fn parse_pipe_operator_rename() { + let dialects = all_dialects_where(|d| d.supports_pipe_operator()); + dialects.verified_stmt("SELECT * FROM tbl |> RENAME old_name AS new_name"); + dialects.verified_stmt("SELECT * FROM tbl |> RENAME id AS user_id, name AS user_name"); dialects.verified_query_with_canonical( - "SELECT * FROM users |> RENAME id user_id", - "SELECT * FROM users |> RENAME id AS user_id", + "SELECT * FROM tbl |> RENAME id user_id", + "SELECT * FROM tbl |> RENAME id AS user_id", ); +} - // union pipe operator - dialects.verified_stmt("SELECT * FROM users |> UNION ALL (SELECT * FROM admins)"); - dialects.verified_stmt("SELECT * FROM users |> UNION DISTINCT (SELECT * FROM admins)"); - dialects.verified_stmt("SELECT * FROM users |> UNION (SELECT * FROM admins)"); - - // union pipe operator with multiple queries - dialects.verified_stmt( - "SELECT * FROM users |> UNION ALL (SELECT * FROM admins), (SELECT * FROM guests)", - ); - dialects.verified_stmt("SELECT * FROM users |> UNION DISTINCT (SELECT * FROM admins), (SELECT * FROM guests), (SELECT * FROM employees)"); +#[test] +fn parse_pipe_operator_union() { + let dialects = all_dialects_where(|d| d.supports_pipe_operator()); + dialects.verified_stmt("SELECT * FROM tbl |> UNION ALL (SELECT * FROM admins)"); + dialects.verified_stmt("SELECT * FROM tbl |> UNION DISTINCT (SELECT * FROM admins)"); + dialects.verified_stmt("SELECT * FROM tbl |> UNION (SELECT * FROM admins)"); dialects.verified_stmt( - "SELECT * FROM users |> UNION (SELECT * FROM admins), (SELECT * FROM guests)", + "SELECT * FROM tbl |> UNION ALL (SELECT * FROM admins), (SELECT * FROM guests)", ); - - // union pipe operator with BY NAME modifier - dialects.verified_stmt("SELECT * FROM users |> UNION BY NAME (SELECT * FROM admins)"); - dialects.verified_stmt("SELECT * FROM users |> UNION ALL BY NAME (SELECT * FROM admins)"); - dialects.verified_stmt("SELECT * FROM users |> UNION DISTINCT BY NAME (SELECT * FROM admins)"); - - // union pipe operator with BY NAME and multiple queries + dialects.verified_stmt("SELECT * FROM tbl |> UNION DISTINCT (SELECT * FROM admins), (SELECT * FROM guests), (SELECT * FROM employees)"); + dialects + .verified_stmt("SELECT * FROM tbl |> UNION (SELECT * FROM admins), (SELECT * FROM guests)"); + dialects.verified_stmt("SELECT * FROM tbl |> UNION BY NAME (SELECT * FROM admins)"); + dialects.verified_stmt("SELECT * FROM tbl |> UNION ALL BY NAME (SELECT * FROM admins)"); + dialects.verified_stmt("SELECT * FROM tbl |> UNION DISTINCT BY NAME (SELECT * FROM admins)"); dialects.verified_stmt( - "SELECT * FROM users |> UNION BY NAME (SELECT * FROM admins), (SELECT * FROM guests)", + "SELECT * FROM tbl |> UNION BY NAME (SELECT * FROM admins), (SELECT * FROM guests)", ); +} - // intersect pipe operator (BigQuery requires DISTINCT modifier for INTERSECT) - dialects.verified_stmt("SELECT * FROM users |> INTERSECT DISTINCT (SELECT * FROM admins)"); - - // intersect pipe operator with BY NAME modifier +#[test] +fn parse_pipe_operator_intersect() { + let dialects = all_dialects_where(|d| d.supports_pipe_operator()); + dialects.verified_stmt("SELECT * FROM tbl |> INTERSECT DISTINCT (SELECT * FROM admins)"); dialects - .verified_stmt("SELECT * FROM users |> INTERSECT DISTINCT BY NAME (SELECT * FROM admins)"); - - // intersect pipe operator with multiple queries + .verified_stmt("SELECT * FROM tbl |> INTERSECT DISTINCT BY NAME (SELECT * FROM admins)"); dialects.verified_stmt( - "SELECT * FROM users |> INTERSECT DISTINCT (SELECT * FROM admins), (SELECT * FROM guests)", + "SELECT * FROM tbl |> INTERSECT DISTINCT (SELECT * FROM admins), (SELECT * FROM guests)", ); + dialects.verified_stmt("SELECT * FROM tbl |> INTERSECT DISTINCT BY NAME (SELECT * FROM admins), (SELECT * FROM guests)"); +} - // intersect pipe operator with BY NAME and multiple queries - dialects.verified_stmt("SELECT * FROM users |> INTERSECT DISTINCT BY NAME (SELECT * FROM admins), (SELECT * FROM guests)"); - - // except pipe operator (BigQuery requires DISTINCT modifier for EXCEPT) - dialects.verified_stmt("SELECT * FROM users |> EXCEPT DISTINCT (SELECT * FROM admins)"); - - // except pipe operator with BY NAME modifier - dialects.verified_stmt("SELECT * FROM users |> EXCEPT DISTINCT BY NAME (SELECT * FROM admins)"); - - // except pipe operator with multiple queries +#[test] +fn parse_pipe_operator_except() { + let dialects = all_dialects_where(|d| d.supports_pipe_operator()); + dialects.verified_stmt("SELECT * FROM tbl |> EXCEPT DISTINCT (SELECT * FROM admins)"); + dialects.verified_stmt("SELECT * FROM tbl |> EXCEPT DISTINCT BY NAME (SELECT * FROM admins)"); dialects.verified_stmt( - "SELECT * FROM users |> EXCEPT DISTINCT (SELECT * FROM admins), (SELECT * FROM guests)", + "SELECT * FROM tbl |> EXCEPT DISTINCT (SELECT * FROM admins), (SELECT * FROM guests)", ); + dialects.verified_stmt("SELECT * FROM tbl |> EXCEPT DISTINCT BY NAME (SELECT * FROM admins), (SELECT * FROM guests)"); +} - // except pipe operator with BY NAME and multiple queries - dialects.verified_stmt("SELECT * FROM users |> EXCEPT DISTINCT BY NAME (SELECT * FROM admins), (SELECT * FROM guests)"); - - // call pipe operator - dialects.verified_stmt("SELECT * FROM users |> CALL my_function()"); - dialects.verified_stmt("SELECT * FROM users |> CALL process_data(5, 'test')"); +#[test] +fn parse_pipe_operator_call() { + let dialects = all_dialects_where(|d| d.supports_pipe_operator()); + dialects.verified_stmt("SELECT * FROM tbl |> CALL my_function()"); + dialects.verified_stmt("SELECT * FROM tbl |> CALL process_data(5, 'test')"); + dialects + .verified_stmt("SELECT * FROM tbl |> CALL namespace.function_name(col1, col2, 'literal')"); + dialects.verified_stmt("SELECT * FROM tbl |> CALL transform_data(col1 + col2)"); + dialects.verified_stmt("SELECT * FROM tbl |> CALL analyze_data('param1', 100, true)"); + dialects.verified_stmt("SELECT * FROM tbl |> CALL tvf1(arg1) AS al"); + dialects.verified_stmt("SELECT * FROM tbl |> CALL process_data(5) AS result_table"); + dialects.verified_stmt("SELECT * FROM tbl |> CALL namespace.func() AS my_alias"); + dialects.verified_stmt("SELECT * FROM tbl |> CALL tvf1(arg1) |> CALL tvf2(arg2, arg3)"); dialects.verified_stmt( - "SELECT * FROM users |> CALL namespace.function_name(col1, col2, 'literal')", + "SELECT * FROM tbl |> CALL transform(col1) |> CALL validate() |> CALL process(param)", ); - - // call pipe operator with complex arguments - dialects.verified_stmt("SELECT * FROM users |> CALL transform_data(col1 + col2)"); - dialects.verified_stmt("SELECT * FROM users |> CALL analyze_data('param1', 100, true)"); - - // call pipe operator with aliases - dialects.verified_stmt("SELECT * FROM input_table |> CALL tvf1(arg1) AS al"); - dialects.verified_stmt("SELECT * FROM users |> CALL process_data(5) AS result_table"); - dialects.verified_stmt("SELECT * FROM users |> CALL namespace.func() AS my_alias"); - - // multiple call pipe operators in sequence - dialects.verified_stmt("SELECT * FROM input_table |> CALL tvf1(arg1) |> CALL tvf2(arg2, arg3)"); + dialects + .verified_stmt("SELECT * FROM tbl |> CALL tvf1(arg1) AS step1 |> CALL tvf2(arg2) AS step2"); dialects.verified_stmt( - "SELECT * FROM data |> CALL transform(col1) |> CALL validate() |> CALL process(param)", + "SELECT * FROM tbl |> CALL preprocess() AS clean_data |> CALL analyze(mode) AS results", ); - - // multiple call pipe operators with aliases dialects.verified_stmt( - "SELECT * FROM input_table |> CALL tvf1(arg1) AS step1 |> CALL tvf2(arg2) AS step2", + "SELECT * FROM tbl |> CALL transform() |> WHERE status = 'active' |> CALL process(param)", ); dialects.verified_stmt( - "SELECT * FROM data |> CALL preprocess() AS clean_data |> CALL analyze(mode) AS results", + "SELECT * FROM tbl |> CALL preprocess() AS clean |> SELECT col1, col2 |> CALL validate()", ); +} - // call pipe operators mixed with other pipe operators +#[test] +fn parse_pipe_operator_pivot() { + let dialects = all_dialects_where(|d| d.supports_pipe_operator()); dialects.verified_stmt( - "SELECT * FROM users |> CALL transform() |> WHERE status = 'active' |> CALL process(param)", + "SELECT * FROM tbl |> PIVOT(SUM(amount) FOR quarter IN ('Q1', 'Q2', 'Q3', 'Q4'))", ); dialects.verified_stmt( - "SELECT * FROM data |> CALL preprocess() AS clean |> SELECT col1, col2 |> CALL validate()", + "SELECT * FROM tbl |> PIVOT(AVG(revenue) FOR region IN ('North', 'South', 'East', 'West'))", ); - - // pivot pipe operator + dialects.verified_stmt("SELECT * FROM tbl |> PIVOT(SUM(sales) AS total_sales, COUNT(*) AS num_transactions FOR month IN ('Jan', 'Feb', 'Mar'))"); + dialects.verified_stmt("SELECT * FROM tbl |> PIVOT(SUM(amount) FOR product.category IN ('Electronics', 'Clothing'))"); + dialects.verified_stmt("SELECT * FROM tbl |> WHERE year = 2023 |> PIVOT(SUM(revenue) FOR quarter IN ('Q1', 'Q2', 'Q3', 'Q4'))"); dialects.verified_stmt( - "SELECT * FROM monthly_sales |> PIVOT(SUM(amount) FOR quarter IN ('Q1', 'Q2', 'Q3', 'Q4'))", + "SELECT * FROM tbl |> PIVOT(SUM(sales) FOR quarter IN ('Q1', 'Q2')) AS quarterly_sales", ); - dialects.verified_stmt("SELECT * FROM sales_data |> PIVOT(AVG(revenue) FOR region IN ('North', 'South', 'East', 'West'))"); - - // pivot pipe operator with multiple aggregate functions - dialects.verified_stmt("SELECT * FROM data |> PIVOT(SUM(sales) AS total_sales, COUNT(*) AS num_transactions FOR month IN ('Jan', 'Feb', 'Mar'))"); - - // pivot pipe operator with compound column names - dialects.verified_stmt("SELECT * FROM sales |> PIVOT(SUM(amount) FOR product.category IN ('Electronics', 'Clothing'))"); - - // pivot pipe operator mixed with other pipe operators - dialects.verified_stmt("SELECT * FROM sales_data |> WHERE year = 2023 |> PIVOT(SUM(revenue) FOR quarter IN ('Q1', 'Q2', 'Q3', 'Q4'))"); - - // pivot pipe operator with aliases - dialects.verified_stmt("SELECT * FROM monthly_sales |> PIVOT(SUM(sales) FOR quarter IN ('Q1', 'Q2')) AS quarterly_sales"); - dialects.verified_stmt("SELECT * FROM data |> PIVOT(AVG(price) FOR category IN ('A', 'B', 'C')) AS avg_by_category"); - dialects.verified_stmt("SELECT * FROM sales |> PIVOT(COUNT(*) AS transactions, SUM(amount) AS total FOR region IN ('North', 'South')) AS regional_summary"); - - // pivot pipe operator with implicit aliases (without AS keyword) + dialects.verified_stmt( + "SELECT * FROM tbl |> PIVOT(AVG(price) FOR category IN ('A', 'B', 'C')) AS avg_by_category", + ); + dialects.verified_stmt("SELECT * FROM tbl |> PIVOT(COUNT(*) AS transactions, SUM(amount) AS total FOR region IN ('North', 'South')) AS regional_summary"); dialects.verified_query_with_canonical( - "SELECT * FROM monthly_sales |> PIVOT(SUM(sales) FOR quarter IN ('Q1', 'Q2')) quarterly_sales", - "SELECT * FROM monthly_sales |> PIVOT(SUM(sales) FOR quarter IN ('Q1', 'Q2')) AS quarterly_sales", + "SELECT * FROM tbl |> PIVOT(SUM(sales) FOR quarter IN ('Q1', 'Q2')) quarterly_sales", + "SELECT * FROM tbl |> PIVOT(SUM(sales) FOR quarter IN ('Q1', 'Q2')) AS quarterly_sales", ); dialects.verified_query_with_canonical( - "SELECT * FROM data |> PIVOT(AVG(price) FOR category IN ('A', 'B', 'C')) avg_by_category", - "SELECT * FROM data |> PIVOT(AVG(price) FOR category IN ('A', 'B', 'C')) AS avg_by_category", + "SELECT * FROM tbl |> PIVOT(AVG(price) FOR category IN ('A', 'B', 'C')) avg_by_category", + "SELECT * FROM tbl |> PIVOT(AVG(price) FOR category IN ('A', 'B', 'C')) AS avg_by_category", ); +} - // unpivot pipe operator basic usage - dialects - .verified_stmt("SELECT * FROM sales |> UNPIVOT(revenue FOR quarter IN (Q1, Q2, Q3, Q4))"); - dialects.verified_stmt("SELECT * FROM data |> UNPIVOT(value FOR category IN (A, B, C))"); +#[test] +fn parse_pipe_operator_unpivot() { + let dialects = all_dialects_where(|d| d.supports_pipe_operator()); + dialects.verified_stmt("SELECT * FROM tbl |> UNPIVOT(revenue FOR quarter IN (Q1, Q2, Q3, Q4))"); + dialects.verified_stmt("SELECT * FROM tbl |> UNPIVOT(value FOR category IN (A, B, C))"); dialects.verified_stmt( - "SELECT * FROM metrics |> UNPIVOT(measurement FOR metric_type IN (cpu, memory, disk))", + "SELECT * FROM tbl |> UNPIVOT(measurement FOR metric_type IN (cpu, memory, disk))", ); - - // unpivot pipe operator with multiple columns - dialects.verified_stmt("SELECT * FROM quarterly_sales |> UNPIVOT(amount FOR period IN (jan, feb, mar, apr, may, jun))"); dialects.verified_stmt( - "SELECT * FROM report |> UNPIVOT(score FOR subject IN (math, science, english, history))", + "SELECT * FROM tbl |> UNPIVOT(amount FOR period IN (jan, feb, mar, apr, may, jun))", ); - - // unpivot pipe operator mixed with other pipe operators - dialects.verified_stmt("SELECT * FROM sales_data |> WHERE year = 2023 |> UNPIVOT(revenue FOR quarter IN (Q1, Q2, Q3, Q4))"); - - // unpivot pipe operator with aliases - dialects.verified_stmt("SELECT * FROM quarterly_sales |> UNPIVOT(amount FOR period IN (Q1, Q2)) AS unpivoted_sales"); dialects.verified_stmt( - "SELECT * FROM data |> UNPIVOT(value FOR category IN (A, B, C)) AS transformed_data", - ); - dialects.verified_stmt("SELECT * FROM metrics |> UNPIVOT(measurement FOR metric_type IN (cpu, memory)) AS metric_measurements"); - - // unpivot pipe operator with implicit aliases (without AS keyword) - dialects.verified_query_with_canonical( - "SELECT * FROM quarterly_sales |> UNPIVOT(amount FOR period IN (Q1, Q2)) unpivoted_sales", - "SELECT * FROM quarterly_sales |> UNPIVOT(amount FOR period IN (Q1, Q2)) AS unpivoted_sales", - ); - dialects.verified_query_with_canonical( - "SELECT * FROM data |> UNPIVOT(value FOR category IN (A, B, C)) transformed_data", - "SELECT * FROM data |> UNPIVOT(value FOR category IN (A, B, C)) AS transformed_data", + "SELECT * FROM tbl |> UNPIVOT(score FOR subject IN (math, science, english, history))", ); - - // many pipes + dialects.verified_stmt("SELECT * FROM tbl |> WHERE year = 2023 |> UNPIVOT(revenue FOR quarter IN (Q1, Q2, Q3, Q4))"); dialects.verified_stmt( - "SELECT * FROM CustomerOrders |> AGGREGATE SUM(cost) AS total_cost GROUP BY customer_id, state, item_type |> EXTEND COUNT(*) OVER (PARTITION BY customer_id) AS num_orders |> WHERE num_orders > 1 |> AGGREGATE AVG(total_cost) AS average GROUP BY state DESC, item_type ASC", + "SELECT * FROM tbl |> UNPIVOT(amount FOR period IN (Q1, Q2)) AS unpivoted_sales", ); - - // join pipe operator - INNER JOIN - dialects.verified_stmt("SELECT * FROM users |> JOIN orders ON users.id = orders.user_id"); - dialects.verified_stmt("SELECT * FROM users |> INNER JOIN orders ON users.id = orders.user_id"); - - // join pipe operator - LEFT JOIN - dialects.verified_stmt("SELECT * FROM users |> LEFT JOIN orders ON users.id = orders.user_id"); dialects.verified_stmt( - "SELECT * FROM users |> LEFT OUTER JOIN orders ON users.id = orders.user_id", + "SELECT * FROM tbl |> UNPIVOT(value FOR category IN (A, B, C)) AS transformed_data", ); - - // join pipe operator - RIGHT JOIN - dialects.verified_stmt("SELECT * FROM users |> RIGHT JOIN orders ON users.id = orders.user_id"); - dialects.verified_stmt( - "SELECT * FROM users |> RIGHT OUTER JOIN orders ON users.id = orders.user_id", + dialects.verified_stmt("SELECT * FROM tbl |> UNPIVOT(measurement FOR metric_type IN (cpu, memory)) AS metric_measurements"); + dialects.verified_query_with_canonical( + "SELECT * FROM tbl |> UNPIVOT(amount FOR period IN (Q1, Q2)) unpivoted_sales", + "SELECT * FROM tbl |> UNPIVOT(amount FOR period IN (Q1, Q2)) AS unpivoted_sales", ); - - // join pipe operator - FULL JOIN - dialects.verified_stmt("SELECT * FROM users |> FULL JOIN orders ON users.id = orders.user_id"); dialects.verified_query_with_canonical( - "SELECT * FROM users |> FULL OUTER JOIN orders ON users.id = orders.user_id", - "SELECT * FROM users |> FULL JOIN orders ON users.id = orders.user_id", + "SELECT * FROM tbl |> UNPIVOT(value FOR category IN (A, B, C)) transformed_data", + "SELECT * FROM tbl |> UNPIVOT(value FOR category IN (A, B, C)) AS transformed_data", ); +} - // join pipe operator - CROSS JOIN - dialects.verified_stmt("SELECT * FROM users |> CROSS JOIN orders"); - - // join pipe operator with USING +#[test] +fn parse_pipe_operator_join() { + let dialects = all_dialects_where(|d| d.supports_pipe_operator()); + dialects.verified_stmt("SELECT * FROM tbl |> JOIN orders ON users.id = orders.user_id"); + dialects.verified_stmt("SELECT * FROM tbl |> INNER JOIN orders ON users.id = orders.user_id"); + dialects.verified_stmt("SELECT * FROM tbl |> LEFT JOIN orders ON users.id = orders.user_id"); + dialects + .verified_stmt("SELECT * FROM tbl |> LEFT OUTER JOIN orders ON users.id = orders.user_id"); + dialects.verified_stmt("SELECT * FROM tbl |> RIGHT JOIN orders ON users.id = orders.user_id"); + dialects + .verified_stmt("SELECT * FROM tbl |> RIGHT OUTER JOIN orders ON users.id = orders.user_id"); + dialects.verified_stmt("SELECT * FROM tbl |> FULL JOIN orders ON users.id = orders.user_id"); dialects.verified_query_with_canonical( - "SELECT * FROM users |> JOIN orders USING (user_id)", - "SELECT * FROM users |> JOIN orders USING(user_id)", + "SELECT * FROM tbl |> FULL OUTER JOIN orders ON users.id = orders.user_id", + "SELECT * FROM tbl |> FULL JOIN orders ON users.id = orders.user_id", ); + dialects.verified_stmt("SELECT * FROM tbl |> CROSS JOIN orders"); dialects.verified_query_with_canonical( - "SELECT * FROM users |> LEFT JOIN orders USING (user_id, order_date)", - "SELECT * FROM users |> LEFT JOIN orders USING(user_id, order_date)", + "SELECT * FROM tbl |> JOIN orders USING (user_id)", + "SELECT * FROM tbl |> JOIN orders USING(user_id)", ); - - // join pipe operator with alias (with an omitted "AS" keyword) dialects.verified_query_with_canonical( - "SELECT * FROM users |> JOIN orders o ON users.id = o.user_id", - "SELECT * FROM users |> JOIN orders o ON users.id = o.user_id", + "SELECT * FROM tbl |> LEFT JOIN orders USING (user_id, order_date)", + "SELECT * FROM tbl |> LEFT JOIN orders USING(user_id, order_date)", ); - dialects.verified_stmt("SELECT * FROM users |> LEFT JOIN orders AS o ON users.id = o.user_id"); - - // join pipe operator with complex ON condition - dialects.verified_stmt("SELECT * FROM users |> JOIN orders ON users.id = orders.user_id AND orders.status = 'active'"); - dialects.verified_stmt("SELECT * FROM users |> LEFT JOIN orders ON users.id = orders.user_id AND orders.amount > 100"); - - // multiple join pipe operators - dialects.verified_stmt("SELECT * FROM users |> JOIN orders ON users.id = orders.user_id |> JOIN products ON orders.product_id = products.id"); - dialects.verified_stmt("SELECT * FROM users |> LEFT JOIN orders ON users.id = orders.user_id |> RIGHT JOIN products ON orders.product_id = products.id"); + dialects.verified_stmt("SELECT * FROM tbl |> JOIN orders o ON users.id = o.user_id"); + dialects.verified_stmt("SELECT * FROM tbl |> LEFT JOIN orders AS o ON users.id = o.user_id"); + dialects.verified_stmt("SELECT * FROM tbl |> JOIN orders ON users.id = orders.user_id AND orders.status = 'active'"); + dialects.verified_stmt("SELECT * FROM tbl |> LEFT JOIN orders ON users.id = orders.user_id AND orders.amount > 100"); + dialects.verified_stmt("SELECT * FROM tbl |> JOIN orders ON users.id = orders.user_id |> JOIN products ON orders.product_id = products.id"); + dialects.verified_stmt("SELECT * FROM tbl |> LEFT JOIN orders ON users.id = orders.user_id |> RIGHT JOIN products ON orders.product_id = products.id"); + dialects.verified_stmt("SELECT * FROM tbl |> JOIN orders ON users.id = orders.user_id |> WHERE orders.amount > 100"); + dialects.verified_stmt("SELECT * FROM tbl |> WHERE users.active = true |> LEFT JOIN orders ON users.id = orders.user_id"); + dialects.verified_stmt("SELECT * FROM tbl |> JOIN orders ON users.id = orders.user_id |> SELECT users.name, orders.amount"); +} - // join pipe operator with other pipe operators - dialects.verified_stmt("SELECT * FROM users |> JOIN orders ON users.id = orders.user_id |> WHERE orders.amount > 100"); - dialects.verified_stmt("SELECT * FROM users |> WHERE users.active = true |> LEFT JOIN orders ON users.id = orders.user_id"); - dialects.verified_stmt("SELECT * FROM users |> JOIN orders ON users.id = orders.user_id |> SELECT users.name, orders.amount"); +#[test] +fn parse_pipe_operator_chained() { + let dialects = all_dialects_where(|d| d.supports_pipe_operator()); + dialects.verified_stmt("SELECT * FROM tbl |> AGGREGATE SUM(cost) AS total_cost GROUP BY customer_id, state, item_type |> EXTEND COUNT(*) OVER (PARTITION BY customer_id) AS num_orders |> WHERE num_orders > 1 |> AGGREGATE AVG(total_cost) AS average GROUP BY state DESC, item_type ASC"); } #[test] From be460b2b4db3ff01aa18fdbe400bc24270377fa6 Mon Sep 17 00:00:00 2001 From: xitep Date: Thu, 29 Jan 2026 14:52:59 +0100 Subject: [PATCH 046/121] [MySQL, Oracle] Parse optimizer hints (#2162) Co-authored-by: Ifeanyi Ubah --- src/ast/dml.rs | 54 ++++++++++++++++++++++++++---- src/ast/mod.rs | 51 ++++++++++++++++++++++++++++ src/ast/query.rs | 10 ++++++ src/ast/spans.rs | 5 +++ src/dialect/generic.rs | 4 +++ src/dialect/mod.rs | 10 ++++++ src/dialect/mysql.rs | 4 +++ src/dialect/oracle.rs | 4 +++ src/parser/merge.rs | 2 ++ src/parser/mod.rs | 63 +++++++++++++++++++++++++++++++++++ tests/sqlparser_bigquery.rs | 2 ++ tests/sqlparser_clickhouse.rs | 1 + tests/sqlparser_common.rs | 12 +++++++ tests/sqlparser_duckdb.rs | 2 ++ tests/sqlparser_mssql.rs | 3 ++ tests/sqlparser_mysql.rs | 57 +++++++++++++++++++++++++++++-- tests/sqlparser_oracle.rs | 55 ++++++++++++++++++++++++++++++ tests/sqlparser_postgres.rs | 6 ++++ tests/sqlparser_sqlite.rs | 1 + 19 files changed, 336 insertions(+), 10 deletions(-) diff --git a/src/ast/dml.rs b/src/ast/dml.rs index 32c023e057..4c36f70599 100644 --- a/src/ast/dml.rs +++ b/src/ast/dml.rs @@ -32,8 +32,8 @@ use crate::{ use super::{ display_comma_separated, helpers::attached_token::AttachedToken, query::InputFormatClause, Assignment, Expr, FromTable, Ident, InsertAliases, MysqlInsertPriority, ObjectName, OnInsert, - OrderByExpr, Query, SelectInto, SelectItem, Setting, SqliteOnConflict, TableFactor, - TableObject, TableWithJoins, UpdateTableFromKind, Values, + OptimizerHint, OrderByExpr, Query, SelectInto, SelectItem, Setting, SqliteOnConflict, + TableFactor, TableObject, TableWithJoins, UpdateTableFromKind, Values, }; /// INSERT statement. @@ -43,6 +43,11 @@ use super::{ pub struct Insert { /// Token for the `INSERT` keyword (or its substitutes) pub insert_token: AttachedToken, + /// A query optimizer hint + /// + /// [MySQL](https://dev.mysql.com/doc/refman/8.4/en/optimizer-hints.html) + /// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Comments.html#GUID-D316D545-89E2-4D54-977F-FC97815CD62E) + pub optimizer_hint: Option, /// Only for Sqlite pub or: Option, /// Only for mysql @@ -102,7 +107,11 @@ impl Display for Insert { }; if let Some(on_conflict) = self.or { - write!(f, "INSERT {on_conflict} INTO {table_name} ")?; + f.write_str("INSERT")?; + if let Some(hint) = self.optimizer_hint.as_ref() { + write!(f, " {hint}")?; + } + write!(f, " {on_conflict} INTO {table_name} ")?; } else { write!( f, @@ -111,8 +120,11 @@ impl Display for Insert { "REPLACE" } else { "INSERT" - }, + } )?; + if let Some(hint) = self.optimizer_hint.as_ref() { + write!(f, " {hint}")?; + } if let Some(priority) = self.priority { write!(f, " {priority}",)?; } @@ -188,6 +200,11 @@ impl Display for Insert { pub struct Delete { /// Token for the `DELETE` keyword pub delete_token: AttachedToken, + /// A query optimizer hint + /// + /// [MySQL](https://dev.mysql.com/doc/refman/8.4/en/optimizer-hints.html) + /// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Comments.html#GUID-D316D545-89E2-4D54-977F-FC97815CD62E) + pub optimizer_hint: Option, /// Multi tables delete are supported in mysql pub tables: Vec, /// FROM @@ -207,6 +224,10 @@ pub struct Delete { impl Display for Delete { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.write_str("DELETE")?; + if let Some(hint) = self.optimizer_hint.as_ref() { + f.write_str(" ")?; + hint.fmt(f)?; + } if !self.tables.is_empty() { indented_list(f, &self.tables)?; } @@ -257,6 +278,11 @@ impl Display for Delete { pub struct Update { /// Token for the `UPDATE` keyword pub update_token: AttachedToken, + /// A query optimizer hint + /// + /// [MySQL](https://dev.mysql.com/doc/refman/8.4/en/optimizer-hints.html) + /// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Comments.html#GUID-D316D545-89E2-4D54-977F-FC97815CD62E) + pub optimizer_hint: Option, /// TABLE pub table: TableWithJoins, /// Column assignments @@ -276,6 +302,10 @@ pub struct Update { impl Display for Update { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { f.write_str("UPDATE ")?; + if let Some(hint) = self.optimizer_hint.as_ref() { + hint.fmt(f)?; + f.write_str(" ")?; + } if let Some(or) = &self.or { or.fmt(f)?; f.write_str(" ")?; @@ -322,6 +352,10 @@ impl Display for Update { pub struct Merge { /// The `MERGE` token that starts the statement. pub merge_token: AttachedToken, + /// A query optimizer hint + /// + /// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Comments.html#GUID-D316D545-89E2-4D54-977F-FC97815CD62E) + pub optimizer_hint: Option, /// optional INTO keyword pub into: bool, /// Specifies the table to merge @@ -338,12 +372,18 @@ pub struct Merge { impl Display for Merge { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str("MERGE")?; + if let Some(hint) = self.optimizer_hint.as_ref() { + write!(f, " {hint}")?; + } + if self.into { + write!(f, " INTO")?; + } write!( f, - "MERGE{int} {table} USING {source} ", - int = if self.into { " INTO" } else { "" }, + " {table} USING {source} ", table = self.table, - source = self.source, + source = self.source )?; write!(f, "ON {on} ", on = self.on)?; write!(f, "{}", display_separated(&self.clauses, " "))?; diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 33f99bc26e..f255e5f3f7 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -11688,6 +11688,57 @@ pub struct ResetStatement { pub reset: Reset, } +/// Query optimizer hints are optionally supported comments after the +/// `SELECT`, `INSERT`, `UPDATE`, `REPLACE`, `MERGE`, and `DELETE` keywords in +/// the corresponding statements. +/// +/// See [Select::optimizer_hint] +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct OptimizerHint { + /// the raw test of the optimizer hint without its markers + pub text: String, + /// the style of the comment which `text` was extracted from, + /// e.g. `/*+...*/` or `--+...` + /// + /// Not all dialects support all styles, though. + pub style: OptimizerHintStyle, +} + +/// The commentary style of an [optimizer hint](OptimizerHint) +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum OptimizerHintStyle { + /// A hint corresponding to a single line comment, + /// e.g. `--+ LEADING(v.e v.d t)` + SingleLine { + /// the comment prefix, e.g. `--` + prefix: String, + }, + /// A hint corresponding to a multi line comment, + /// e.g. `/*+ LEADING(v.e v.d t) */` + MultiLine, +} + +impl fmt::Display for OptimizerHint { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match &self.style { + OptimizerHintStyle::SingleLine { prefix } => { + f.write_str(prefix)?; + f.write_str("+")?; + f.write_str(&self.text) + } + OptimizerHintStyle::MultiLine => { + f.write_str("/*+")?; + f.write_str(&self.text)?; + f.write_str("*/") + } + } + } +} + impl fmt::Display for ResetStatement { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match &self.reset { diff --git a/src/ast/query.rs b/src/ast/query.rs index 7ea4de19e4..08448cabef 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -343,6 +343,11 @@ pub enum SelectFlavor { pub struct Select { /// Token for the `SELECT` keyword pub select_token: AttachedToken, + /// A query optimizer hint + /// + /// [MySQL](https://dev.mysql.com/doc/refman/8.4/en/optimizer-hints.html) + /// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Comments.html#GUID-D316D545-89E2-4D54-977F-FC97815CD62E) + pub optimizer_hint: Option, /// `SELECT [DISTINCT] ...` pub distinct: Option, /// MSSQL syntax: `TOP () [ PERCENT ] [ WITH TIES ]` @@ -410,6 +415,11 @@ impl fmt::Display for Select { } } + if let Some(hint) = self.optimizer_hint.as_ref() { + f.write_str(" ")?; + hint.fmt(f)?; + } + if let Some(value_table_mode) = self.value_table_mode { f.write_str(" ")?; value_table_mode.fmt(f)?; diff --git a/src/ast/spans.rs b/src/ast/spans.rs index 126e587a88..60c983fa1e 100644 --- a/src/ast/spans.rs +++ b/src/ast/spans.rs @@ -894,6 +894,7 @@ impl Spanned for Delete { fn span(&self) -> Span { let Delete { delete_token, + optimizer_hint: _, tables, from, using, @@ -927,6 +928,7 @@ impl Spanned for Update { fn span(&self) -> Span { let Update { update_token, + optimizer_hint: _, table, assignments, from, @@ -1290,6 +1292,7 @@ impl Spanned for Insert { fn span(&self) -> Span { let Insert { insert_token, + optimizer_hint: _, or: _, // enum, sqlite specific ignore: _, // bool into: _, // bool @@ -2233,6 +2236,7 @@ impl Spanned for Select { fn span(&self) -> Span { let Select { select_token, + optimizer_hint: _, distinct: _, // todo top: _, // todo, mysql specific projection, @@ -2819,6 +2823,7 @@ WHERE id = 1 // ~ individual tokens within the statement let Statement::Merge(Merge { merge_token, + optimizer_hint: _, into: _, table: _, source: _, diff --git a/src/dialect/generic.rs b/src/dialect/generic.rs index d460c5237c..345d63fe40 100644 --- a/src/dialect/generic.rs +++ b/src/dialect/generic.rs @@ -271,4 +271,8 @@ impl Dialect for GenericDialect { fn supports_select_format(&self) -> bool { true } + + fn supports_comment_optimizer_hint(&self) -> bool { + true + } } diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index 98ec93da41..8cff4d23ec 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -1322,6 +1322,16 @@ pub trait Dialect: Debug + Any { false } + /// Returns `true` if the dialect supports query optimizer hints in the + /// format of single and multi line comments immediately following a + /// `SELECT`, `INSERT`, `REPLACE`, `DELETE`, or `MERGE` keyword. + /// + /// [MySQL](https://dev.mysql.com/doc/refman/8.4/en/optimizer-hints.html) + /// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/19/sqlrf/Comments.html#SQLRF-GUID-D316D545-89E2-4D54-977F-FC97815CD62E) + fn supports_comment_optimizer_hint(&self) -> bool { + false + } + /// Returns true if the dialect considers the `&&` operator as a boolean AND operator. fn supports_double_ampersand_operator(&self) -> bool { false diff --git a/src/dialect/mysql.rs b/src/dialect/mysql.rs index 81aa9d445a..b44001fe12 100644 --- a/src/dialect/mysql.rs +++ b/src/dialect/mysql.rs @@ -182,6 +182,10 @@ impl Dialect for MySqlDialect { fn supports_binary_kw_as_cast(&self) -> bool { true } + + fn supports_comment_optimizer_hint(&self) -> bool { + true + } } /// `LOCK TABLES` diff --git a/src/dialect/oracle.rs b/src/dialect/oracle.rs index 54c2ace5fb..7ff9326282 100644 --- a/src/dialect/oracle.rs +++ b/src/dialect/oracle.rs @@ -99,4 +99,8 @@ impl Dialect for OracleDialect { fn supports_quote_delimited_string(&self) -> bool { true } + + fn supports_comment_optimizer_hint(&self) -> bool { + true + } } diff --git a/src/parser/merge.rs b/src/parser/merge.rs index 62da68a201..31f435f8f9 100644 --- a/src/parser/merge.rs +++ b/src/parser/merge.rs @@ -43,6 +43,7 @@ impl Parser<'_> { /// Parse a `MERGE` statement pub fn parse_merge(&mut self, merge_token: TokenWithSpan) -> Result { + let optimizer_hint = self.maybe_parse_optimizer_hint()?; let into = self.parse_keyword(Keyword::INTO); let table = self.parse_table_factor()?; @@ -59,6 +60,7 @@ impl Parser<'_> { Ok(Merge { merge_token: merge_token.into(), + optimizer_hint, into, table, source, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 5847f77928..23a961d3c0 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -4325,6 +4325,11 @@ impl<'a> Parser<'a> { }) } + /// Return nth token, possibly whitespace, that has not yet been processed. + fn peek_nth_token_no_skip_ref(&self, n: usize) -> &TokenWithSpan { + self.tokens.get(self.index + n).unwrap_or(&EOF_TOKEN) + } + /// Return true if the next tokens exactly `expected` /// /// Does not advance the current token. @@ -13026,6 +13031,7 @@ impl<'a> Parser<'a> { /// Parse a `DELETE` statement and return `Statement::Delete`. pub fn parse_delete(&mut self, delete_token: TokenWithSpan) -> Result { + let optimizer_hint = self.maybe_parse_optimizer_hint()?; let (tables, with_from_keyword) = if !self.parse_keyword(Keyword::FROM) { // `FROM` keyword is optional in BigQuery SQL. // https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#delete_statement @@ -13069,6 +13075,7 @@ impl<'a> Parser<'a> { Ok(Statement::Delete(Delete { delete_token: delete_token.into(), + optimizer_hint, tables, from: if with_from_keyword { FromTable::WithFromKeyword(from) @@ -13839,6 +13846,7 @@ impl<'a> Parser<'a> { if !self.peek_keyword(Keyword::SELECT) { return Ok(Select { select_token: AttachedToken(from_token), + optimizer_hint: None, distinct: None, top: None, top_before_distinct: false, @@ -13866,6 +13874,7 @@ impl<'a> Parser<'a> { } let select_token = self.expect_keyword(Keyword::SELECT)?; + let optimizer_hint = self.maybe_parse_optimizer_hint()?; let value_table_mode = self.parse_value_table_mode()?; let mut top_before_distinct = false; @@ -14020,6 +14029,7 @@ impl<'a> Parser<'a> { Ok(Select { select_token: AttachedToken(select_token), + optimizer_hint, distinct, top, top_before_distinct, @@ -14048,6 +14058,55 @@ impl<'a> Parser<'a> { }) } + /// Parses an optional optimizer hint at the current token position + /// + /// [MySQL](https://dev.mysql.com/doc/refman/8.4/en/optimizer-hints.html#optimizer-hints-overview) + /// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Comments.html#GUID-D316D545-89E2-4D54-977F-FC97815CD62E) + fn maybe_parse_optimizer_hint(&mut self) -> Result, ParserError> { + let supports_hints = self.dialect.supports_comment_optimizer_hint(); + if !supports_hints { + return Ok(None); + } + loop { + let t = self.peek_nth_token_no_skip_ref(0); + match &t.token { + Token::Whitespace(ws) => { + match ws { + Whitespace::SingleLineComment { comment, .. } + | Whitespace::MultiLineComment(comment) => { + return Ok(match comment.strip_prefix("+") { + None => None, + Some(text) => { + let hint = OptimizerHint { + text: text.into(), + style: if let Whitespace::SingleLineComment { + prefix, .. + } = ws + { + OptimizerHintStyle::SingleLine { + prefix: prefix.clone(), + } + } else { + OptimizerHintStyle::MultiLine + }, + }; + // Consume the comment token + self.next_token_no_skip(); + Some(hint) + } + }); + } + Whitespace::Space | Whitespace::Tab | Whitespace::Newline => { + // Consume the token and try with the next whitespace or comment + self.next_token_no_skip(); + } + } + } + _ => return Ok(None), + } + } + } + fn parse_value_table_mode(&mut self) -> Result, ParserError> { if !dialect_of!(self is BigQueryDialect) { return Ok(None); @@ -16742,6 +16801,7 @@ impl<'a> Parser<'a> { /// Parse an INSERT statement pub fn parse_insert(&mut self, insert_token: TokenWithSpan) -> Result { + let optimizer_hint = self.maybe_parse_optimizer_hint()?; let or = self.parse_conflict_clause(); let priority = if !dialect_of!(self is MySqlDialect | GenericDialect) { None @@ -16911,6 +16971,7 @@ impl<'a> Parser<'a> { Ok(Insert { insert_token: insert_token.into(), + optimizer_hint, or, table: table_object, table_alias, @@ -17014,6 +17075,7 @@ impl<'a> Parser<'a> { /// Parse an `UPDATE` statement and return `Statement::Update`. pub fn parse_update(&mut self, update_token: TokenWithSpan) -> Result { + let optimizer_hint = self.maybe_parse_optimizer_hint()?; let or = self.parse_conflict_clause(); let table = self.parse_table_and_joins()?; let from_before_set = if self.parse_keyword(Keyword::FROM) { @@ -17049,6 +17111,7 @@ impl<'a> Parser<'a> { }; Ok(Update { update_token: update_token.into(), + optimizer_hint, table, assignments, from, diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index d8c3ada1d1..fb28b4d217 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -2681,6 +2681,7 @@ fn test_export_data() { }), Span::empty() )), + optimizer_hint: None, distinct: None, top: None, top_before_distinct: false, @@ -2785,6 +2786,7 @@ fn test_export_data() { }), Span::empty() )), + optimizer_hint: None, distinct: None, top: None, top_before_distinct: false, diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index 44bfcda426..ac31a27835 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -41,6 +41,7 @@ fn parse_map_access_expr() { assert_eq!( Select { distinct: None, + optimizer_hint: None, select_token: AttachedToken::empty(), top: None, top_before_distinct: false, diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 87c15e2d34..2796f25866 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -455,6 +455,7 @@ fn parse_update_set_from() { stmt, Statement::Update(Update { update_token: AttachedToken::empty(), + optimizer_hint: None, table: TableWithJoins { relation: table_from_name(ObjectName::from(vec![Ident::new("t1")])), joins: vec![], @@ -470,6 +471,7 @@ fn parse_update_set_from() { with: None, body: Box::new(SetExpr::Select(Box::new(Select { select_token: AttachedToken::empty(), + optimizer_hint: None, distinct: None, top: None, top_before_distinct: false, @@ -548,6 +550,7 @@ fn parse_update_with_table_alias() { returning, or: None, limit: None, + optimizer_hint: None, update_token: _, }) => { assert_eq!( @@ -5804,6 +5807,7 @@ fn test_parse_named_window() { let actual_select_only = dialects.verified_only_select(sql); let expected = Select { select_token: AttachedToken::empty(), + optimizer_hint: None, distinct: None, top: None, top_before_distinct: false, @@ -6534,6 +6538,7 @@ fn parse_interval_and_or_xor() { with: None, body: Box::new(SetExpr::Select(Box::new(Select { select_token: AttachedToken::empty(), + optimizer_hint: None, distinct: None, top: None, top_before_distinct: false, @@ -8910,6 +8915,7 @@ fn lateral_function() { let actual_select_only = verified_only_select(sql); let expected = Select { select_token: AttachedToken::empty(), + optimizer_hint: None, distinct: None, top: None, projection: vec![SelectItem::Wildcard(WildcardAdditionalOptions::default())], @@ -9911,6 +9917,7 @@ fn parse_merge() { with: None, body: Box::new(SetExpr::Select(Box::new(Select { select_token: AttachedToken::empty(), + optimizer_hint: None, distinct: None, top: None, top_before_distinct: false, @@ -12314,6 +12321,7 @@ fn parse_unload() { query: Some(Box::new(Query { body: Box::new(SetExpr::Select(Box::new(Select { select_token: AttachedToken::empty(), + optimizer_hint: None, distinct: None, top: None, top_before_distinct: false, @@ -12622,6 +12630,7 @@ fn parse_map_access_expr() { fn parse_connect_by() { let expect_query = Select { select_token: AttachedToken::empty(), + optimizer_hint: None, distinct: None, top: None, top_before_distinct: false, @@ -12704,6 +12713,7 @@ fn parse_connect_by() { all_dialects_where(|d| d.supports_connect_by()).verified_only_select(connect_by_3), Select { select_token: AttachedToken::empty(), + optimizer_hint: None, distinct: None, top: None, top_before_distinct: false, @@ -13637,6 +13647,7 @@ fn test_extract_seconds_ok() { with: None, body: Box::new(SetExpr::Select(Box::new(Select { select_token: AttachedToken::empty(), + optimizer_hint: None, distinct: None, top: None, top_before_distinct: false, @@ -15776,6 +15787,7 @@ fn test_select_from_first() { with: None, body: Box::new(SetExpr::Select(Box::new(Select { select_token: AttachedToken::empty(), + optimizer_hint: None, distinct: None, top: None, projection, diff --git a/tests/sqlparser_duckdb.rs b/tests/sqlparser_duckdb.rs index bdfe4f50a2..7cc710de28 100644 --- a/tests/sqlparser_duckdb.rs +++ b/tests/sqlparser_duckdb.rs @@ -266,6 +266,7 @@ fn test_select_union_by_name() { set_quantifier: *expected_quantifier, left: Box::::new(SetExpr::Select(Box::new(Select { select_token: AttachedToken::empty(), + optimizer_hint: None, distinct: None, top: None, projection: vec![SelectItem::Wildcard(WildcardAdditionalOptions::default())], @@ -297,6 +298,7 @@ fn test_select_union_by_name() { }))), right: Box::::new(SetExpr::Select(Box::new(Select { select_token: AttachedToken::empty(), + optimizer_hint: None, distinct: None, top: None, projection: vec![SelectItem::Wildcard(WildcardAdditionalOptions::default())], diff --git a/tests/sqlparser_mssql.rs b/tests/sqlparser_mssql.rs index 1927b864e0..7ef4ce85c2 100644 --- a/tests/sqlparser_mssql.rs +++ b/tests/sqlparser_mssql.rs @@ -141,6 +141,7 @@ fn parse_create_procedure() { pipe_operators: vec![], body: Box::new(SetExpr::Select(Box::new(Select { select_token: AttachedToken::empty(), + optimizer_hint: None, distinct: None, top: None, top_before_distinct: false, @@ -1348,6 +1349,7 @@ fn parse_substring_in_select() { body: Box::new(SetExpr::Select(Box::new(Select { select_token: AttachedToken::empty(), + optimizer_hint: None, distinct: Some(Distinct::Distinct), top: None, top_before_distinct: false, @@ -1505,6 +1507,7 @@ fn parse_mssql_declare() { body: Box::new(SetExpr::Select(Box::new(Select { select_token: AttachedToken::empty(), + optimizer_hint: None, distinct: None, top: None, top_before_distinct: false, diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index 4a62053867..80aed5bfee 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -1435,6 +1435,7 @@ fn parse_escaped_quote_identifiers_with_escape() { with: None, body: Box::new(SetExpr::Select(Box::new(Select { select_token: AttachedToken::empty(), + optimizer_hint: None, distinct: None, top: None, top_before_distinct: false, @@ -1490,6 +1491,7 @@ fn parse_escaped_quote_identifiers_with_no_escape() { with: None, body: Box::new(SetExpr::Select(Box::new(Select { select_token: AttachedToken::empty(), + optimizer_hint: None, distinct: None, top: None, top_before_distinct: false, @@ -1537,7 +1539,7 @@ fn parse_escaped_backticks_with_escape() { with: None, body: Box::new(SetExpr::Select(Box::new(Select { select_token: AttachedToken::empty(), - + optimizer_hint: None, distinct: None, top: None, top_before_distinct: false, @@ -1589,7 +1591,7 @@ fn parse_escaped_backticks_with_no_escape() { with: None, body: Box::new(SetExpr::Select(Box::new(Select { select_token: AttachedToken::empty(), - + optimizer_hint: None, distinct: None, top: None, top_before_distinct: false, @@ -2409,7 +2411,7 @@ fn parse_select_with_numeric_prefix_column_name() { q.body, Box::new(SetExpr::Select(Box::new(Select { select_token: AttachedToken::empty(), - + optimizer_hint: None, distinct: None, top: None, top_before_distinct: false, @@ -2584,6 +2586,7 @@ fn parse_select_with_concatenation_of_exp_number_and_numeric_prefix_column() { q.body, Box::new(SetExpr::Select(Box::new(Select { select_token: AttachedToken::empty(), + optimizer_hint: None, distinct: None, top: None, top_before_distinct: false, @@ -2651,6 +2654,7 @@ fn parse_update_with_joins() { returning, or: None, limit: None, + optimizer_hint: None, update_token: _, }) => { assert_eq!( @@ -3216,6 +3220,7 @@ fn parse_substring_in_select() { with: None, body: Box::new(SetExpr::Select(Box::new(Select { select_token: AttachedToken::empty(), + optimizer_hint: None, distinct: Some(Distinct::Distinct), top: None, top_before_distinct: false, @@ -3539,6 +3544,7 @@ fn parse_hex_string_introducer() { with: None, body: Box::new(SetExpr::Select(Box::new(Select { select_token: AttachedToken::empty(), + optimizer_hint: None, distinct: None, top: None, top_before_distinct: false, @@ -4381,3 +4387,48 @@ fn test_create_index_options() { "CREATE INDEX idx_name ON t(c1, c2) USING BTREE LOCK = EXCLUSIVE ALGORITHM = DEFAULT", ); } + +#[test] +fn test_optimizer_hints() { + let mysql_dialect = mysql_and_generic(); + + // ~ selects + mysql_dialect.verified_stmt( + "\ + SELECT /*+ SET_VAR(optimizer_switch = 'mrr_cost_based=off') \ + SET_VAR(max_heap_table_size = 1G) */ 1", + ); + + mysql_dialect.verified_stmt( + "\ + SELECT /*+ SET_VAR(target_partitions=1) */ * FROM \ + (SELECT /*+ SET_VAR(target_partitions=8) */ * FROM t1 LIMIT 1) AS dt", + ); + + // ~ inserts / replace + mysql_dialect.verified_stmt( + "\ + INSERT /*+ RESOURCE_GROUP(Batch) */ \ + INTO t2 VALUES (2)", + ); + + mysql_dialect.verified_stmt( + "\ + REPLACE /*+ foobar */ INTO test \ + VALUES (1, 'Old', '2014-08-20 18:47:00')", + ); + + // ~ updates + mysql_dialect.verified_stmt( + "\ + UPDATE /*+ quux */ table_name \ + SET column1 = 1 \ + WHERE 1 = 1", + ); + + // ~ deletes + mysql_dialect.verified_stmt( + "\ + DELETE /*+ foobar */ FROM table_name", + ); +} diff --git a/tests/sqlparser_oracle.rs b/tests/sqlparser_oracle.rs index 683660369a..1c12f868f4 100644 --- a/tests/sqlparser_oracle.rs +++ b/tests/sqlparser_oracle.rs @@ -333,3 +333,58 @@ fn parse_national_quote_delimited_string_but_is_a_word() { expr_from_projection(&select.projection[2]) ); } + +#[test] +fn test_optimizer_hints() { + let oracle_dialect = oracle(); + + // ~ selects + let select = oracle_dialect.verified_only_select_with_canonical( + "SELECT /*+one two three*/ /*+not a hint!*/ 1 FROM dual", + "SELECT /*+one two three*/ 1 FROM dual", + ); + assert_eq!( + select + .optimizer_hint + .as_ref() + .map(|hint| hint.text.as_str()), + Some("one two three") + ); + + let select = oracle_dialect.verified_only_select_with_canonical( + "SELECT /*one two three*/ /*+not a hint!*/ 1 FROM dual", + "SELECT 1 FROM dual", + ); + assert_eq!(select.optimizer_hint, None); + + let select = oracle_dialect.verified_only_select_with_canonical( + "SELECT --+ one two three /* asdf */\n 1 FROM dual", + "SELECT --+ one two three /* asdf */\n 1 FROM dual", + ); + assert_eq!( + select + .optimizer_hint + .as_ref() + .map(|hint| hint.text.as_str()), + Some(" one two three /* asdf */\n") + ); + + // ~ inserts + oracle_dialect.verified_stmt("INSERT /*+ append */ INTO t1 SELECT * FROM all_objects"); + + // ~ updates + oracle_dialect.verified_stmt("UPDATE /*+ DISABLE_PARALLEL_DML */ table_name SET column1 = 1"); + + // ~ deletes + oracle_dialect.verified_stmt("DELETE --+ ENABLE_PARALLEL_DML\n FROM table_name"); + + // ~ merges + oracle_dialect.verified_stmt( + "MERGE /*+ CLUSTERING */ INTO people_target pt \ + USING people_source ps \ + ON (pt.person_id = ps.person_id) \ + WHEN NOT MATCHED THEN INSERT \ + (pt.person_id, pt.first_name, pt.last_name, pt.title) \ + VALUES (ps.person_id, ps.first_name, ps.last_name, ps.title)", + ); +} diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 7bd7f43c6f..a449eebc06 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -1284,6 +1284,7 @@ fn parse_copy_to() { with: None, body: Box::new(SetExpr::Select(Box::new(Select { select_token: AttachedToken::empty(), + optimizer_hint: None, distinct: None, top: None, top_before_distinct: false, @@ -3064,6 +3065,7 @@ fn parse_array_subquery_expr() { set_quantifier: SetQuantifier::None, left: Box::new(SetExpr::Select(Box::new(Select { select_token: AttachedToken::empty(), + optimizer_hint: None, distinct: None, top: None, top_before_distinct: false, @@ -3090,6 +3092,7 @@ fn parse_array_subquery_expr() { }))), right: Box::new(SetExpr::Select(Box::new(Select { select_token: AttachedToken::empty(), + optimizer_hint: None, distinct: None, top: None, top_before_distinct: false, @@ -5384,6 +5387,7 @@ fn test_simple_postgres_insert_with_alias() { statement, Statement::Insert(Insert { insert_token: AttachedToken::empty(), + optimizer_hint: None, or: None, ignore: false, into: true, @@ -5455,6 +5459,7 @@ fn test_simple_postgres_insert_with_alias() { statement, Statement::Insert(Insert { insert_token: AttachedToken::empty(), + optimizer_hint: None, or: None, ignore: false, into: true, @@ -5528,6 +5533,7 @@ fn test_simple_insert_with_quoted_alias() { statement, Statement::Insert(Insert { insert_token: AttachedToken::empty(), + optimizer_hint: None, or: None, ignore: false, into: true, diff --git a/tests/sqlparser_sqlite.rs b/tests/sqlparser_sqlite.rs index 321cfef073..da311ac064 100644 --- a/tests/sqlparser_sqlite.rs +++ b/tests/sqlparser_sqlite.rs @@ -477,6 +477,7 @@ fn parse_update_tuple_row_values() { assert_eq!( sqlite().verified_stmt("UPDATE x SET (a, b) = (1, 2)"), Statement::Update(Update { + optimizer_hint: None, or: None, assignments: vec![Assignment { target: AssignmentTarget::Tuple(vec![ From 62cf16f3ece6f3d5985e35893407c8db359ffd3f Mon Sep 17 00:00:00 2001 From: Yoav Cohen <59807311+yoavcloud@users.noreply.github.com> Date: Thu, 29 Jan 2026 14:54:53 +0100 Subject: [PATCH 047/121] Redshift: Support implicit string concatenation using newline (#2167) --- src/dialect/mod.rs | 13 +++++++++++++ src/dialect/redshift.rs | 4 ++++ src/parser/mod.rs | 27 +++++++++++++++++++++++++++ tests/sqlparser_common.rs | 19 +++++++++++++++++++ 4 files changed, 63 insertions(+) diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index 8cff4d23ec..ef563fc101 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -489,6 +489,19 @@ pub trait Dialect: Debug + Any { false } + /// Returns true if the dialect supports concatenating string literals with a newline. + /// For example, the following statement would return `true`: + /// ```sql + /// SELECT 'abc' in ( + /// 'a' + /// 'b' + /// 'c' + /// ); + /// ``` + fn supports_string_literal_concatenation_with_newline(&self) -> bool { + false + } + /// Does the dialect support trailing commas in the projection list? fn supports_projection_trailing_commas(&self) -> bool { self.supports_trailing_commas() diff --git a/src/dialect/redshift.rs b/src/dialect/redshift.rs index 43c0646ce3..c028061d2a 100644 --- a/src/dialect/redshift.rs +++ b/src/dialect/redshift.rs @@ -147,4 +147,8 @@ impl Dialect for RedshiftSqlDialect { fn supports_create_table_like_parenthesized(&self) -> bool { true } + + fn supports_string_literal_concatenation_with_newline(&self) -> bool { + true + } } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 23a961d3c0..9b6b67bd35 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -11325,7 +11325,34 @@ impl<'a> Parser<'a> { str.push_str(s); self.advance_token(); } + } else if self + .dialect + .supports_string_literal_concatenation_with_newline() + { + // We are iterating over tokens including whitespaces, to identify + // string literals separated by newlines so we can concatenate them. + let mut after_newline = false; + loop { + match self.peek_token_no_skip().token { + Token::Whitespace(Whitespace::Newline) => { + after_newline = true; + self.next_token_no_skip(); + } + Token::Whitespace(_) => { + self.next_token_no_skip(); + } + Token::SingleQuotedString(ref s) | Token::DoubleQuotedString(ref s) + if after_newline => + { + str.push_str(s.clone().as_str()); + self.next_token_no_skip(); + after_newline = false; + } + _ => break, + } + } } + str } diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 2796f25866..8b0bcc12f8 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -17712,6 +17712,25 @@ fn parse_adjacent_string_literal_concatenation() { let sql = "SELECT * FROM t WHERE col = 'Hello' \n ' ' \t 'World!'"; dialects.one_statement_parses_to(sql, r"SELECT * FROM t WHERE col = 'Hello World!'"); + + let dialects = all_dialects_where(|d| d.supports_string_literal_concatenation_with_newline()); + let sql = r#" + SELECT 'abc' in ('a' + 'b' + 'c', + 'd' + )"#; + dialects.one_statement_parses_to(sql, "SELECT 'abc' IN ('abc', 'd')"); + + let sql = r#" + SELECT 'abc' in ('a' + 'b' + -- COMMENT + 'c', + -- COMMENT + 'd' + )"#; + dialects.one_statement_parses_to(sql, "SELECT 'abc' IN ('abc', 'd')"); } #[test] From ed983e09c24e2e4c9d4e6d9800c6a34e615efcb8 Mon Sep 17 00:00:00 2001 From: Michael Victor Zink Date: Fri, 30 Jan 2026 03:11:07 -0800 Subject: [PATCH 048/121] PostgreSQL: Fix REPLICA IDENTITY to use NOTHING (#2179) --- src/ast/ddl.rs | 10 +++++----- src/parser/mod.rs | 6 +++--- tests/sqlparser_postgres.rs | 24 ++++++++++++++++++++++++ 3 files changed, 32 insertions(+), 8 deletions(-) diff --git a/src/ast/ddl.rs b/src/ast/ddl.rs index 3a5cd32bc3..1d0059db82 100644 --- a/src/ast/ddl.rs +++ b/src/ast/ddl.rs @@ -99,8 +99,8 @@ impl fmt::Display for IndexColumn { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum ReplicaIdentity { - /// No replica identity (`REPLICA IDENTITY NONE`). - None, + /// No replica identity (`REPLICA IDENTITY NOTHING`). + Nothing, /// Full replica identity (`REPLICA IDENTITY FULL`). Full, /// Default replica identity (`REPLICA IDENTITY DEFAULT`). @@ -112,7 +112,7 @@ pub enum ReplicaIdentity { impl fmt::Display for ReplicaIdentity { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { - ReplicaIdentity::None => f.write_str("NONE"), + ReplicaIdentity::Nothing => f.write_str("NOTHING"), ReplicaIdentity::Full => f.write_str("FULL"), ReplicaIdentity::Default => f.write_str("DEFAULT"), ReplicaIdentity::Index(idx) => write!(f, "USING INDEX {idx}"), @@ -1911,7 +1911,7 @@ pub enum ColumnOption { /// [ MATCH { FULL | PARTIAL | SIMPLE } ] /// { [ON DELETE ] [ON UPDATE ] | /// [ON UPDATE ] [ON DELETE ] - /// } + /// } /// [] /// `). ForeignKey(ForeignKeyConstraint), @@ -4363,7 +4363,7 @@ impl Spanned for CreateExtension { } } -/// DROP EXTENSION statement +/// DROP EXTENSION statement /// Note: this is a PostgreSQL-specific statement /// /// # References diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 9b6b67bd35..59ca4522a1 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -10145,8 +10145,8 @@ impl<'a> Parser<'a> { let value = self.parse_number_value()?; AlterTableOperation::AutoIncrement { equals, value } } else if self.parse_keywords(&[Keyword::REPLICA, Keyword::IDENTITY]) { - let identity = if self.parse_keyword(Keyword::NONE) { - ReplicaIdentity::None + let identity = if self.parse_keyword(Keyword::NOTHING) { + ReplicaIdentity::Nothing } else if self.parse_keyword(Keyword::FULL) { ReplicaIdentity::Full } else if self.parse_keyword(Keyword::DEFAULT) { @@ -10155,7 +10155,7 @@ impl<'a> Parser<'a> { ReplicaIdentity::Index(self.parse_identifier()?) } else { return self.expected( - "NONE, FULL, DEFAULT, or USING INDEX index_name after REPLICA IDENTITY", + "NOTHING, FULL, DEFAULT, or USING INDEX index_name after REPLICA IDENTITY", self.peek_token(), ); }; diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index a449eebc06..54e9ee0c01 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -6643,6 +6643,30 @@ fn parse_alter_table_replica_identity() { } _ => unreachable!(), } + + match pg_and_generic().verified_stmt("ALTER TABLE foo REPLICA IDENTITY NOTHING") { + Statement::AlterTable(AlterTable { operations, .. }) => { + assert_eq!( + operations, + vec![AlterTableOperation::ReplicaIdentity { + identity: ReplicaIdentity::Nothing + }] + ); + } + _ => unreachable!(), + } + + match pg_and_generic().verified_stmt("ALTER TABLE foo REPLICA IDENTITY DEFAULT") { + Statement::AlterTable(AlterTable { operations, .. }) => { + assert_eq!( + operations, + vec![AlterTableOperation::ReplicaIdentity { + identity: ReplicaIdentity::Default + }] + ); + } + _ => unreachable!(), + } } #[test] From c8b7f7cf4281cdfff3e08e9827928662b8be8095 Mon Sep 17 00:00:00 2001 From: Michael Victor Zink Date: Fri, 30 Jan 2026 03:11:49 -0800 Subject: [PATCH 049/121] Add ENFORCED/NOT ENFORCED support for column-level CHECK constraints (#2180) --- src/parser/mod.rs | 11 ++++++++++- tests/sqlparser_common.rs | 9 +++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 59ca4522a1..2763114314 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -8922,11 +8922,20 @@ impl<'a> Parser<'a> { // since `CHECK` requires parentheses, we can parse the inner expression in ParserState::Normal let expr: Expr = self.with_state(ParserState::Normal, |p| p.parse_expr())?; self.expect_token(&Token::RParen)?; + + let enforced = if self.parse_keyword(Keyword::ENFORCED) { + Some(true) + } else if self.parse_keywords(&[Keyword::NOT, Keyword::ENFORCED]) { + Some(false) + } else { + None + }; + Ok(Some( CheckConstraint { name: None, // Column-level check constraints don't have names expr: Box::new(expr), - enforced: None, // Could be extended later to support MySQL ENFORCED/NOT ENFORCED + enforced, } .into(), )) diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 8b0bcc12f8..69524ff993 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -16853,6 +16853,15 @@ fn check_enforced() { ); } +#[test] +fn column_check_enforced() { + all_dialects().verified_stmt("CREATE TABLE t (x INT CHECK (x > 1) NOT ENFORCED)"); + all_dialects().verified_stmt("CREATE TABLE t (x INT CHECK (x > 1) ENFORCED)"); + all_dialects().verified_stmt( + "CREATE TABLE t (a INT CHECK (a > 0) NOT ENFORCED, b INT CHECK (b > 0) ENFORCED, c INT CHECK (c > 0))", + ); +} + #[test] fn join_precedence() { all_dialects_except(|d| !d.supports_left_associative_joins_without_parens()) From 37aa792277089de6dbcf01823d7db5e9029ce5c7 Mon Sep 17 00:00:00 2001 From: Luca Cappelletti Date: Mon, 2 Feb 2026 09:38:44 +0100 Subject: [PATCH 050/121] Implement `core::error::Error` for `ParserError` and `TokenizerError` (#2189) --- src/parser/mod.rs | 3 +-- src/tokenizer.rs | 6 ++---- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 2763114314..dbdce02dc0 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -204,8 +204,7 @@ impl fmt::Display for ParserError { } } -#[cfg(feature = "std")] -impl std::error::Error for ParserError {} +impl core::error::Error for ParserError {} // By default, allow expressions up to this deep before erroring const DEFAULT_REMAINING_DEPTH: usize = 50; diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 8c33ad3dc8..506dee1d7a 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -782,8 +782,7 @@ impl fmt::Display for TokenizerError { } } -#[cfg(feature = "std")] -impl std::error::Error for TokenizerError {} +impl core::error::Error for TokenizerError {} struct State<'a> { peekable: Peekable>, @@ -2554,9 +2553,8 @@ mod tests { message: "test".into(), location: Location { line: 1, column: 1 }, }; - #[cfg(feature = "std")] { - use std::error::Error; + use core::error::Error; assert!(err.source().is_none()); } assert_eq!(err.to_string(), "test at Line: 1, Column: 1"); From ed1c4e2cee02cecc26cc0a2f3b860ae0a1526cdb Mon Sep 17 00:00:00 2001 From: Luca Cappelletti Date: Mon, 2 Feb 2026 10:12:50 +0100 Subject: [PATCH 051/121] Moved more structs outside of Statement to facilitate reuse (#2188) --- src/ast/dcl.rs | 101 +++++++++++++++- src/ast/ddl.rs | 191 +++++++++++++++++++++++++++++ src/ast/mod.rs | 245 ++++---------------------------------- src/parser/alter.rs | 8 +- src/parser/mod.rs | 25 ++-- tests/sqlparser_common.rs | 46 +++---- tests/sqlparser_mysql.rs | 8 +- 7 files changed, 355 insertions(+), 269 deletions(-) diff --git a/src/ast/dcl.rs b/src/ast/dcl.rs index 7183bc3fb7..3c50a81c06 100644 --- a/src/ast/dcl.rs +++ b/src/ast/dcl.rs @@ -29,7 +29,10 @@ use serde::{Deserialize, Serialize}; use sqlparser_derive::{Visit, VisitMut}; use super::{display_comma_separated, Expr, Ident, Password, Spanned}; -use crate::ast::{display_separated, ObjectName}; +use crate::ast::{ + display_separated, CascadeOption, CurrentGrantsKind, GrantObjects, Grantee, ObjectName, + Privileges, +}; use crate::tokenizer::Span; /// An option in `ROLE` statement. @@ -427,3 +430,99 @@ impl Spanned for CreateRole { Span::empty() } } + +/// GRANT privileges ON objects TO grantees +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct Grant { + /// Privileges being granted. + pub privileges: Privileges, + /// Optional objects the privileges apply to. + pub objects: Option, + /// List of grantees receiving the privileges. + pub grantees: Vec, + /// Whether `WITH GRANT OPTION` is present. + pub with_grant_option: bool, + /// Optional `AS GRANTOR` identifier. + pub as_grantor: Option, + /// Optional `GRANTED BY` identifier. + /// + /// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/dcl-statements) + pub granted_by: Option, + /// Optional `CURRENT GRANTS` modifier. + /// + /// [Snowflake](https://docs.snowflake.com/en/sql-reference/sql/grant-privilege) + pub current_grants: Option, +} + +impl fmt::Display for Grant { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "GRANT {privileges}", privileges = self.privileges)?; + if let Some(ref objects) = self.objects { + write!(f, " ON {objects}")?; + } + write!(f, " TO {}", display_comma_separated(&self.grantees))?; + if let Some(ref current_grants) = self.current_grants { + write!(f, " {current_grants}")?; + } + if self.with_grant_option { + write!(f, " WITH GRANT OPTION")?; + } + if let Some(ref as_grantor) = self.as_grantor { + write!(f, " AS {as_grantor}")?; + } + if let Some(ref granted_by) = self.granted_by { + write!(f, " GRANTED BY {granted_by}")?; + } + Ok(()) + } +} + +impl From for crate::ast::Statement { + fn from(v: Grant) -> Self { + crate::ast::Statement::Grant(v) + } +} + +/// REVOKE privileges ON objects FROM grantees +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct Revoke { + /// Privileges to revoke. + pub privileges: Privileges, + /// Optional objects from which to revoke. + pub objects: Option, + /// Grantees affected by the revoke. + pub grantees: Vec, + /// Optional `GRANTED BY` identifier. + /// + /// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/dcl-statements) + pub granted_by: Option, + /// Optional `CASCADE`/`RESTRICT` behavior. + pub cascade: Option, +} + +impl fmt::Display for Revoke { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "REVOKE {privileges}", privileges = self.privileges)?; + if let Some(ref objects) = self.objects { + write!(f, " ON {objects}")?; + } + write!(f, " FROM {}", display_comma_separated(&self.grantees))?; + if let Some(ref granted_by) = self.granted_by { + write!(f, " GRANTED BY {granted_by}")?; + } + if let Some(ref cascade) = self.cascade { + write!(f, " {cascade}")?; + } + Ok(()) + } +} + +impl From for crate::ast::Statement { + fn from(v: Revoke) -> Self { + crate::ast::Statement::Revoke(v) + } +} diff --git a/src/ast/ddl.rs b/src/ast/ddl.rs index 1d0059db82..0c4f93e647 100644 --- a/src/ast/ddl.rs +++ b/src/ast/ddl.rs @@ -5120,3 +5120,194 @@ impl Spanned for AlterOperatorClass { Span::empty() } } + +/// CREATE POLICY statement. +/// +/// See [PostgreSQL](https://www.postgresql.org/docs/current/sql-createpolicy.html) +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct CreatePolicy { + /// Name of the policy. + pub name: Ident, + /// Table the policy is defined on. + #[cfg_attr(feature = "visitor", visit(with = "visit_relation"))] + pub table_name: ObjectName, + /// Optional policy type (e.g., `PERMISSIVE` / `RESTRICTIVE`). + pub policy_type: Option, + /// Optional command the policy applies to (e.g., `SELECT`). + pub command: Option, + /// Optional list of grantee owners. + pub to: Option>, + /// Optional expression for the `USING` clause. + pub using: Option, + /// Optional expression for the `WITH CHECK` clause. + pub with_check: Option, +} + +impl fmt::Display for CreatePolicy { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "CREATE POLICY {name} ON {table_name}", + name = self.name, + table_name = self.table_name, + )?; + if let Some(ref policy_type) = self.policy_type { + write!(f, " AS {policy_type}")?; + } + if let Some(ref command) = self.command { + write!(f, " FOR {command}")?; + } + if let Some(ref to) = self.to { + write!(f, " TO {}", display_comma_separated(to))?; + } + if let Some(ref using) = self.using { + write!(f, " USING ({using})")?; + } + if let Some(ref with_check) = self.with_check { + write!(f, " WITH CHECK ({with_check})")?; + } + Ok(()) + } +} + +/// Policy type for a `CREATE POLICY` statement. +/// ```sql +/// AS [ PERMISSIVE | RESTRICTIVE ] +/// ``` +/// [PostgreSQL](https://www.postgresql.org/docs/current/sql-createpolicy.html) +#[derive(Debug, Clone, Copy, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum CreatePolicyType { + /// Policy allows operations unless explicitly denied. + Permissive, + /// Policy denies operations unless explicitly allowed. + Restrictive, +} + +impl fmt::Display for CreatePolicyType { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + CreatePolicyType::Permissive => write!(f, "PERMISSIVE"), + CreatePolicyType::Restrictive => write!(f, "RESTRICTIVE"), + } + } +} + +/// Command that a policy can apply to (FOR clause). +/// ```sql +/// FOR [ALL | SELECT | INSERT | UPDATE | DELETE] +/// ``` +/// [PostgreSQL](https://www.postgresql.org/docs/current/sql-createpolicy.html) +#[derive(Debug, Clone, Copy, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum CreatePolicyCommand { + /// Applies to all commands. + All, + /// Applies to SELECT. + Select, + /// Applies to INSERT. + Insert, + /// Applies to UPDATE. + Update, + /// Applies to DELETE. + Delete, +} + +impl fmt::Display for CreatePolicyCommand { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + CreatePolicyCommand::All => write!(f, "ALL"), + CreatePolicyCommand::Select => write!(f, "SELECT"), + CreatePolicyCommand::Insert => write!(f, "INSERT"), + CreatePolicyCommand::Update => write!(f, "UPDATE"), + CreatePolicyCommand::Delete => write!(f, "DELETE"), + } + } +} + +/// DROP POLICY statement. +/// +/// See [PostgreSQL](https://www.postgresql.org/docs/current/sql-droppolicy.html) +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct DropPolicy { + /// `true` when `IF EXISTS` was present. + pub if_exists: bool, + /// Name of the policy to drop. + pub name: Ident, + /// Name of the table the policy applies to. + #[cfg_attr(feature = "visitor", visit(with = "visit_relation"))] + pub table_name: ObjectName, + /// Optional drop behavior (`CASCADE` or `RESTRICT`). + pub drop_behavior: Option, +} + +impl fmt::Display for DropPolicy { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "DROP POLICY {if_exists}{name} ON {table_name}", + if_exists = if self.if_exists { "IF EXISTS " } else { "" }, + name = self.name, + table_name = self.table_name + )?; + if let Some(ref behavior) = self.drop_behavior { + write!(f, " {behavior}")?; + } + Ok(()) + } +} + +impl From for crate::ast::Statement { + fn from(v: CreatePolicy) -> Self { + crate::ast::Statement::CreatePolicy(v) + } +} + +impl From for crate::ast::Statement { + fn from(v: DropPolicy) -> Self { + crate::ast::Statement::DropPolicy(v) + } +} + +/// ALTER POLICY statement. +/// +/// ```sql +/// ALTER POLICY ON
[] +/// ``` +/// (Postgresql-specific) +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct AlterPolicy { + /// Policy name to alter. + pub name: Ident, + /// Target table name the policy is defined on. + #[cfg_attr(feature = "visitor", visit(with = "visit_relation"))] + pub table_name: ObjectName, + /// Optional operation specific to the policy alteration. + pub operation: AlterPolicyOperation, +} + +impl fmt::Display for AlterPolicy { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "ALTER POLICY {name} ON {table_name}{operation}", + name = self.name, + table_name = self.table_name, + operation = self.operation + ) + } +} + +impl From for crate::ast::Statement { + fn from(v: AlterPolicy) -> Self { + crate::ast::Statement::AlterPolicy(v) + } +} diff --git a/src/ast/mod.rs b/src/ast/mod.rs index f255e5f3f7..ce5a67e128 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -56,20 +56,22 @@ pub use self::data_type::{ ExactNumberInfo, IntervalFields, StructBracketKind, TimezoneInfo, }; pub use self::dcl::{ - AlterRoleOperation, CreateRole, ResetConfig, RoleOption, SecondaryRoles, SetConfigValue, Use, + AlterRoleOperation, CreateRole, Grant, ResetConfig, Revoke, RoleOption, SecondaryRoles, + SetConfigValue, Use, }; pub use self::ddl::{ Alignment, AlterColumnOperation, AlterConnectorOwner, AlterIndexOperation, AlterOperator, AlterOperatorClass, AlterOperatorClassOperation, AlterOperatorFamily, - AlterOperatorFamilyOperation, AlterOperatorOperation, AlterPolicyOperation, AlterSchema, - AlterSchemaOperation, AlterTable, AlterTableAlgorithm, AlterTableLock, AlterTableOperation, - AlterTableType, AlterType, AlterTypeAddValue, AlterTypeAddValuePosition, AlterTypeOperation, - AlterTypeRename, AlterTypeRenameValue, ClusteredBy, ColumnDef, ColumnOption, ColumnOptionDef, - ColumnOptions, ColumnPolicy, ColumnPolicyProperty, ConstraintCharacteristics, CreateConnector, - CreateDomain, CreateExtension, CreateFunction, CreateIndex, CreateOperator, - CreateOperatorClass, CreateOperatorFamily, CreateTable, CreateTrigger, CreateView, Deduplicate, + AlterOperatorFamilyOperation, AlterOperatorOperation, AlterPolicy, AlterPolicyOperation, + AlterSchema, AlterSchemaOperation, AlterTable, AlterTableAlgorithm, AlterTableLock, + AlterTableOperation, AlterTableType, AlterType, AlterTypeAddValue, AlterTypeAddValuePosition, + AlterTypeOperation, AlterTypeRename, AlterTypeRenameValue, ClusteredBy, ColumnDef, + ColumnOption, ColumnOptionDef, ColumnOptions, ColumnPolicy, ColumnPolicyProperty, + ConstraintCharacteristics, CreateConnector, CreateDomain, CreateExtension, CreateFunction, + CreateIndex, CreateOperator, CreateOperatorClass, CreateOperatorFamily, CreatePolicy, + CreatePolicyCommand, CreatePolicyType, CreateTable, CreateTrigger, CreateView, Deduplicate, DeferrableInitial, DropBehavior, DropExtension, DropFunction, DropOperator, DropOperatorClass, - DropOperatorFamily, DropOperatorSignature, DropTrigger, ForValues, GeneratedAs, + DropOperatorFamily, DropOperatorSignature, DropPolicy, DropTrigger, ForValues, GeneratedAs, GeneratedExpressionMode, IdentityParameters, IdentityProperty, IdentityPropertyFormatKind, IdentityPropertyKind, IdentityPropertyOrder, IndexColumn, IndexOption, IndexType, KeyOrIndexDisplay, Msck, NullsDistinctOption, OperatorArgTypes, OperatorClassItem, @@ -3098,44 +3100,6 @@ impl Display for FromTable { } } -/// Policy type for a `CREATE POLICY` statement. -/// ```sql -/// AS [ PERMISSIVE | RESTRICTIVE ] -/// ``` -/// [PostgreSQL](https://www.postgresql.org/docs/current/sql-createpolicy.html) -#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] -#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] -#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] -/// Type of `CREATE POLICY` (permissive or restrictive). -pub enum CreatePolicyType { - /// Policy allows operations unless explicitly denied. - Permissive, - /// Policy denies operations unless explicitly allowed. - Restrictive, -} - -/// Policy command for a `CREATE POLICY` statement. -/// ```sql -/// FOR [ALL | SELECT | INSERT | UPDATE | DELETE] -/// ``` -/// [PostgreSQL](https://www.postgresql.org/docs/current/sql-createpolicy.html) -#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] -#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] -#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] -/// Commands that a policy can apply to (FOR clause). -pub enum CreatePolicyCommand { - /// Applies to all commands. - All, - /// Applies to SELECT. - Select, - /// Applies to INSERT. - Insert, - /// Applies to UPDATE. - Update, - /// Applies to DELETE. - Delete, -} - #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] @@ -3634,23 +3598,7 @@ pub enum Statement { /// CREATE POLICY /// ``` /// See [PostgreSQL](https://www.postgresql.org/docs/current/sql-createpolicy.html) - CreatePolicy { - /// Name of the policy. - name: Ident, - /// Table the policy is defined on. - #[cfg_attr(feature = "visitor", visit(with = "visit_relation"))] - table_name: ObjectName, - /// Optional policy type (e.g., `PERMISSIVE` / `RESTRICTIVE`). - policy_type: Option, - /// Optional command the policy applies to (e.g., `SELECT`). - command: Option, - /// Optional list of grantee owners. - to: Option>, - /// Optional expression for the `USING` clause. - using: Option, - /// Optional expression for the `WITH CHECK` clause. - with_check: Option, - }, + CreatePolicy(CreatePolicy), /// ```sql /// CREATE CONNECTOR /// ``` @@ -3736,15 +3684,7 @@ pub enum Statement { /// ALTER POLICY ON
[] /// ``` /// (Postgresql-specific) - AlterPolicy { - /// Policy name to alter. - name: Ident, - /// Target table name the policy is defined on. - #[cfg_attr(feature = "visitor", visit(with = "visit_relation"))] - table_name: ObjectName, - /// Optional operation specific to the policy alteration. - operation: AlterPolicyOperation, - }, + AlterPolicy(AlterPolicy), /// ```sql /// ALTER CONNECTOR connector_name SET DCPROPERTIES(property_name=property_value, ...); /// or @@ -3881,16 +3821,7 @@ pub enum Statement { /// DROP POLICY /// ``` /// See [PostgreSQL](https://www.postgresql.org/docs/current/sql-droppolicy.html) - DropPolicy { - /// `true` when `IF EXISTS` was present. - if_exists: bool, - /// Name of the policy to drop. - name: Ident, - /// Name of the table the policy applies to. - table_name: ObjectName, - /// Optional drop behavior (`CASCADE` or `RESTRICT`). - drop_behavior: Option, - }, + DropPolicy(DropPolicy), /// ```sql /// DROP CONNECTOR /// ``` @@ -4389,22 +4320,7 @@ pub enum Statement { /// ```sql /// GRANT privileges ON objects TO grantees /// ``` - Grant { - /// Privileges being granted. - privileges: Privileges, - /// Optional objects the privileges apply to. - objects: Option, - /// List of grantees receiving the privileges. - grantees: Vec, - /// Whether `WITH GRANT OPTION` is present. - with_grant_option: bool, - /// Optional `AS GRANTOR` identifier. - as_grantor: Option, - /// Optional `GRANTED BY` identifier. - granted_by: Option, - /// Optional `CURRENT GRANTS` modifier. - current_grants: Option, - }, + Grant(Grant), /// ```sql /// DENY privileges ON object TO grantees /// ``` @@ -4412,18 +4328,7 @@ pub enum Statement { /// ```sql /// REVOKE privileges ON objects FROM grantees /// ``` - Revoke { - /// Privileges to revoke. - privileges: Privileges, - /// Optional objects from which to revoke. - objects: Option, - /// Grantees affected by the revoke. - grantees: Vec, - /// Optional `GRANTED BY` identifier. - granted_by: Option, - /// Optional `CASCADE`/`RESTRICT` behavior. - cascade: Option, - }, + Revoke(Revoke), /// ```sql /// DEALLOCATE [ PREPARE ] { name | ALL } /// ``` @@ -5406,48 +5311,7 @@ impl fmt::Display for Statement { Statement::CreateServer(stmt) => { write!(f, "{stmt}") } - Statement::CreatePolicy { - name, - table_name, - policy_type, - command, - to, - using, - with_check, - } => { - write!(f, "CREATE POLICY {name} ON {table_name}")?; - - if let Some(policy_type) = policy_type { - match policy_type { - CreatePolicyType::Permissive => write!(f, " AS PERMISSIVE")?, - CreatePolicyType::Restrictive => write!(f, " AS RESTRICTIVE")?, - } - } - - if let Some(command) = command { - match command { - CreatePolicyCommand::All => write!(f, " FOR ALL")?, - CreatePolicyCommand::Select => write!(f, " FOR SELECT")?, - CreatePolicyCommand::Insert => write!(f, " FOR INSERT")?, - CreatePolicyCommand::Update => write!(f, " FOR UPDATE")?, - CreatePolicyCommand::Delete => write!(f, " FOR DELETE")?, - } - } - - if let Some(to) = to { - write!(f, " TO {}", display_comma_separated(to))?; - } - - if let Some(using) = using { - write!(f, " USING ({using})")?; - } - - if let Some(with_check) = with_check { - write!(f, " WITH CHECK ({with_check})")?; - } - - Ok(()) - } + Statement::CreatePolicy(policy) => write!(f, "{policy}"), Statement::CreateConnector(create_connector) => create_connector.fmt(f), Statement::CreateOperator(create_operator) => create_operator.fmt(f), Statement::CreateOperatorFamily(create_operator_family) => { @@ -5486,13 +5350,7 @@ impl fmt::Display for Statement { Statement::AlterRole { name, operation } => { write!(f, "ALTER ROLE {name} {operation}") } - Statement::AlterPolicy { - name, - table_name, - operation, - } => { - write!(f, "ALTER POLICY {name} ON {table_name}{operation}") - } + Statement::AlterPolicy(alter_policy) => write!(f, "{alter_policy}"), Statement::AlterConnector { name, properties, @@ -5616,22 +5474,7 @@ impl fmt::Display for Statement { } Ok(()) } - Statement::DropPolicy { - if_exists, - name, - table_name, - drop_behavior, - } => { - write!(f, "DROP POLICY")?; - if *if_exists { - write!(f, " IF EXISTS")?; - } - write!(f, " {name} ON {table_name}")?; - if let Some(drop_behavior) = drop_behavior { - write!(f, " {drop_behavior}")?; - } - Ok(()) - } + Statement::DropPolicy(policy) => write!(f, "{policy}"), Statement::DropConnector { if_exists, name } => { write!( f, @@ -5899,55 +5742,9 @@ impl fmt::Display for Statement { } Ok(()) } - Statement::Grant { - privileges, - objects, - grantees, - with_grant_option, - as_grantor, - granted_by, - current_grants, - } => { - write!(f, "GRANT {privileges} ")?; - if let Some(objects) = objects { - write!(f, "ON {objects} ")?; - } - write!(f, "TO {}", display_comma_separated(grantees))?; - if *with_grant_option { - write!(f, " WITH GRANT OPTION")?; - } - if let Some(current_grants) = current_grants { - write!(f, " {current_grants}")?; - } - if let Some(grantor) = as_grantor { - write!(f, " AS {grantor}")?; - } - if let Some(grantor) = granted_by { - write!(f, " GRANTED BY {grantor}")?; - } - Ok(()) - } + Statement::Grant(grant) => write!(f, "{grant}"), Statement::Deny(s) => write!(f, "{s}"), - Statement::Revoke { - privileges, - objects, - grantees, - granted_by, - cascade, - } => { - write!(f, "REVOKE {privileges} ")?; - if let Some(objects) = objects { - write!(f, "ON {objects} ")?; - } - write!(f, "FROM {}", display_comma_separated(grantees))?; - if let Some(grantor) = granted_by { - write!(f, " GRANTED BY {grantor}")?; - } - if let Some(cascade) = cascade { - write!(f, " {cascade}")?; - } - Ok(()) - } + Statement::Revoke(revoke) => write!(f, "{revoke}"), Statement::Deallocate { name, prepare } => write!( f, "DEALLOCATE {prepare}{name}", diff --git a/src/parser/alter.rs b/src/parser/alter.rs index 8ef712ef77..c64c4a409f 100644 --- a/src/parser/alter.rs +++ b/src/parser/alter.rs @@ -19,7 +19,7 @@ use super::{Parser, ParserError}; use crate::{ ast::{ helpers::key_value_options::{KeyValueOptions, KeyValueOptionsDelimiter}, - AlterConnectorOwner, AlterPolicyOperation, AlterRoleOperation, AlterUser, + AlterConnectorOwner, AlterPolicy, AlterPolicyOperation, AlterRoleOperation, AlterUser, AlterUserAddMfaMethodOtp, AlterUserAddRoleDelegation, AlterUserModifyMfaMethod, AlterUserPassword, AlterUserRemoveRoleDelegation, AlterUserSetPolicy, Expr, MfaMethodKind, Password, ResetConfig, RoleOption, SetConfigValue, Statement, UserPolicyKind, @@ -54,7 +54,7 @@ impl Parser<'_> { /// ``` /// /// [PostgreSQL](https://www.postgresql.org/docs/current/sql-alterpolicy.html) - pub fn parse_alter_policy(&mut self) -> Result { + pub fn parse_alter_policy(&mut self) -> Result { let name = self.parse_identifier()?; self.expect_keyword_is(Keyword::ON)?; let table_name = self.parse_object_name(false)?; @@ -62,7 +62,7 @@ impl Parser<'_> { if self.parse_keyword(Keyword::RENAME) { self.expect_keyword_is(Keyword::TO)?; let new_name = self.parse_identifier()?; - Ok(Statement::AlterPolicy { + Ok(AlterPolicy { name, table_name, operation: AlterPolicyOperation::Rename { new_name }, @@ -91,7 +91,7 @@ impl Parser<'_> { } else { None }; - Ok(Statement::AlterPolicy { + Ok(AlterPolicy { name, table_name, operation: AlterPolicyOperation::Apply { diff --git a/src/parser/mod.rs b/src/parser/mod.rs index dbdce02dc0..4dc704ac9e 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -32,7 +32,6 @@ use recursion::RecursionCounter; use IsLateral::*; use IsOptional::*; -use crate::ast::Statement::CreatePolicy; use crate::ast::*; use crate::ast::{ comments, @@ -658,12 +657,12 @@ impl<'a> Parser<'a> { Keyword::SET => self.parse_set(), Keyword::SHOW => self.parse_show(), Keyword::USE => self.parse_use(), - Keyword::GRANT => self.parse_grant(), + Keyword::GRANT => self.parse_grant().map(Into::into), Keyword::DENY => { self.prev_token(); self.parse_deny() } - Keyword::REVOKE => self.parse_revoke(), + Keyword::REVOKE => self.parse_revoke().map(Into::into), Keyword::START => self.parse_start_transaction(), Keyword::BEGIN => self.parse_begin(), Keyword::END => self.parse_end(), @@ -4971,7 +4970,7 @@ impl<'a> Parser<'a> { self.parse_create_view(or_alter, or_replace, temporary, create_view_params) .map(Into::into) } else if self.parse_keyword(Keyword::POLICY) { - self.parse_create_policy() + self.parse_create_policy().map(Into::into) } else if self.parse_keyword(Keyword::EXTERNAL) { self.parse_create_external_table(or_replace).map(Into::into) } else if self.parse_keyword(Keyword::FUNCTION) { @@ -6620,7 +6619,7 @@ impl<'a> Parser<'a> { /// ``` /// /// [PostgreSQL Documentation](https://www.postgresql.org/docs/current/sql-createpolicy.html) - pub fn parse_create_policy(&mut self) -> Result { + pub fn parse_create_policy(&mut self) -> Result { let name = self.parse_identifier()?; self.expect_keyword_is(Keyword::ON)?; let table_name = self.parse_object_name(false)?; @@ -7052,7 +7051,7 @@ impl<'a> Parser<'a> { } else if self.parse_keyword(Keyword::FUNCTION) { return self.parse_drop_function().map(Into::into); } else if self.parse_keyword(Keyword::POLICY) { - return self.parse_drop_policy(); + return self.parse_drop_policy().map(Into::into); } else if self.parse_keyword(Keyword::CONNECTOR) { return self.parse_drop_connector(); } else if self.parse_keyword(Keyword::DOMAIN) { @@ -7143,13 +7142,13 @@ impl<'a> Parser<'a> { /// ``` /// /// [PostgreSQL Documentation](https://www.postgresql.org/docs/current/sql-droppolicy.html) - fn parse_drop_policy(&mut self) -> Result { + fn parse_drop_policy(&mut self) -> Result { let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); let name = self.parse_identifier()?; self.expect_keyword_is(Keyword::ON)?; let table_name = self.parse_object_name(false)?; let drop_behavior = self.parse_optional_drop_behavior(); - Ok(Statement::DropPolicy { + Ok(DropPolicy { if_exists, name, table_name, @@ -10278,7 +10277,7 @@ impl<'a> Parser<'a> { } } Keyword::ROLE => self.parse_alter_role(), - Keyword::POLICY => self.parse_alter_policy(), + Keyword::POLICY => self.parse_alter_policy().map(Into::into), Keyword::CONNECTOR => self.parse_alter_connector(), Keyword::USER => self.parse_alter_user().map(Into::into), // unreachable because expect_one_of_keywords used above @@ -16156,7 +16155,7 @@ impl<'a> Parser<'a> { } /// Parse a GRANT statement. - pub fn parse_grant(&mut self) -> Result { + pub fn parse_grant(&mut self) -> Result { let (privileges, objects) = self.parse_grant_deny_revoke_privileges_objects()?; self.expect_keyword_is(Keyword::TO)?; @@ -16186,7 +16185,7 @@ impl<'a> Parser<'a> { None }; - Ok(Statement::Grant { + Ok(Grant { privileges, objects, grantees, @@ -16781,7 +16780,7 @@ impl<'a> Parser<'a> { } /// Parse a REVOKE statement - pub fn parse_revoke(&mut self) -> Result { + pub fn parse_revoke(&mut self) -> Result { let (privileges, objects) = self.parse_grant_deny_revoke_privileges_objects()?; self.expect_keyword_is(Keyword::FROM)?; @@ -16795,7 +16794,7 @@ impl<'a> Parser<'a> { let cascade = self.parse_cascade_option(); - Ok(Statement::Revoke { + Ok(Revoke { privileges, objects, grantees, diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 69524ff993..5be16f4aa0 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -9605,14 +9605,14 @@ fn parse_drop_role() { fn parse_grant() { let sql = "GRANT SELECT, INSERT, UPDATE (shape, size), USAGE, DELETE, TRUNCATE, REFERENCES, TRIGGER, CONNECT, CREATE, EXECUTE, TEMPORARY, DROP ON abc, def TO xyz, m WITH GRANT OPTION GRANTED BY jj"; match verified_stmt(sql) { - Statement::Grant { + Statement::Grant(Grant { privileges, objects, grantees, with_grant_option, granted_by, .. - } => match (privileges, objects) { + }) => match (privileges, objects) { (Privileges::Actions(actions), Some(GrantObjects::Tables(objects))) => { assert_eq!( vec![ @@ -9657,13 +9657,13 @@ fn parse_grant() { let sql2 = "GRANT INSERT ON ALL TABLES IN SCHEMA public TO browser"; match verified_stmt(sql2) { - Statement::Grant { + Statement::Grant(Grant { privileges, objects, grantees, with_grant_option, .. - } => match (privileges, objects) { + }) => match (privileges, objects) { (Privileges::Actions(actions), Some(GrantObjects::AllTablesInSchema { schemas })) => { assert_eq!(vec![Action::Insert { columns: None }], actions); assert_eq_vec(&["public"], &schemas); @@ -9677,13 +9677,13 @@ fn parse_grant() { let sql3 = "GRANT USAGE, SELECT ON SEQUENCE p TO u"; match verified_stmt(sql3) { - Statement::Grant { + Statement::Grant(Grant { privileges, objects, grantees, granted_by, .. - } => match (privileges, objects, granted_by) { + }) => match (privileges, objects, granted_by) { (Privileges::Actions(actions), Some(GrantObjects::Sequences(objects)), None) => { assert_eq!( vec![Action::Usage, Action::Select { columns: None }], @@ -9699,7 +9699,7 @@ fn parse_grant() { let sql4 = "GRANT ALL PRIVILEGES ON aa, b TO z"; match verified_stmt(sql4) { - Statement::Grant { privileges, .. } => { + Statement::Grant(Grant { privileges, .. }) => { assert_eq!( Privileges::All { with_privileges_keyword: true @@ -9712,11 +9712,11 @@ fn parse_grant() { let sql5 = "GRANT ALL ON SCHEMA aa, b TO z"; match verified_stmt(sql5) { - Statement::Grant { + Statement::Grant(Grant { privileges, objects, .. - } => match (privileges, objects) { + }) => match (privileges, objects) { ( Privileges::All { with_privileges_keyword, @@ -9733,11 +9733,11 @@ fn parse_grant() { let sql6 = "GRANT USAGE ON ALL SEQUENCES IN SCHEMA bus TO a, beta WITH GRANT OPTION"; match verified_stmt(sql6) { - Statement::Grant { + Statement::Grant(Grant { privileges, objects, .. - } => match (privileges, objects) { + }) => match (privileges, objects) { ( Privileges::Actions(actions), Some(GrantObjects::AllSequencesInSchema { schemas }), @@ -9818,13 +9818,13 @@ fn parse_deny() { fn test_revoke() { let sql = "REVOKE ALL PRIVILEGES ON users, auth FROM analyst"; match verified_stmt(sql) { - Statement::Revoke { + Statement::Revoke(Revoke { privileges, objects: Some(GrantObjects::Tables(tables)), grantees, granted_by, cascade, - } => { + }) => { assert_eq!( Privileges::All { with_privileges_keyword: true @@ -9844,13 +9844,13 @@ fn test_revoke() { fn test_revoke_with_cascade() { let sql = "REVOKE ALL PRIVILEGES ON users, auth FROM analyst CASCADE"; match all_dialects_except(|d| d.is::()).verified_stmt(sql) { - Statement::Revoke { + Statement::Revoke(Revoke { privileges, objects: Some(GrantObjects::Tables(tables)), grantees, granted_by, cascade, - } => { + }) => { assert_eq!( Privileges::All { with_privileges_keyword: true @@ -13906,14 +13906,14 @@ fn test_create_policy() { WITH CHECK (1 = 1)"; match all_dialects().verified_stmt(sql) { - Statement::CreatePolicy { + Statement::CreatePolicy(CreatePolicy { name, table_name, to, using, with_check, .. - } => { + }) => { assert_eq!(name.to_string(), "my_policy"); assert_eq!(table_name.to_string(), "my_table"); assert_eq!( @@ -14014,12 +14014,12 @@ fn test_create_policy() { fn test_drop_policy() { let sql = "DROP POLICY IF EXISTS my_policy ON my_table RESTRICT"; match all_dialects().verified_stmt(sql) { - Statement::DropPolicy { + Statement::DropPolicy(DropPolicy { if_exists, name, table_name, drop_behavior, - } => { + }) => { assert_eq!(if_exists, true); assert_eq!(name.to_string(), "my_policy"); assert_eq!(table_name.to_string(), "my_table"); @@ -14054,12 +14054,12 @@ fn test_drop_policy() { #[test] fn test_alter_policy() { match verified_stmt("ALTER POLICY old_policy ON my_table RENAME TO new_policy") { - Statement::AlterPolicy { + Statement::AlterPolicy(AlterPolicy { name, table_name, operation, .. - } => { + }) => { assert_eq!(name.to_string(), "old_policy"); assert_eq!(table_name.to_string(), "my_table"); assert_eq!( @@ -14076,9 +14076,9 @@ fn test_alter_policy() { "ALTER POLICY my_policy ON my_table TO CURRENT_USER ", "USING ((SELECT c0)) WITH CHECK (c0 > 0)" )) { - Statement::AlterPolicy { + Statement::AlterPolicy(AlterPolicy { name, table_name, .. - } => { + }) => { assert_eq!(name.to_string(), "my_policy"); assert_eq!(table_name.to_string(), "my_table"); } diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index 80aed5bfee..2c942798c8 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -3805,7 +3805,7 @@ fn parse_bitstring_literal() { fn parse_grant() { let sql = "GRANT ALL ON *.* TO 'jeffrey'@'%'"; let stmt = mysql().verified_stmt(sql); - if let Statement::Grant { + if let Statement::Grant(Grant { privileges, objects, grantees, @@ -3813,7 +3813,7 @@ fn parse_grant() { as_grantor: _, granted_by, current_grants: _, - } = stmt + }) = stmt { assert_eq!( privileges, @@ -3851,13 +3851,13 @@ fn parse_grant() { fn parse_revoke() { let sql = "REVOKE ALL ON db1.* FROM 'jeffrey'@'%'"; let stmt = mysql_and_generic().verified_stmt(sql); - if let Statement::Revoke { + if let Statement::Revoke(Revoke { privileges, objects, grantees, granted_by, cascade, - } = stmt + }) = stmt { assert_eq!( privileges, From 3b9b05e6e6e18487e6e6238ff4cecb8528fc90ad Mon Sep 17 00:00:00 2001 From: isaacparker0 <128327439+isaacparker0@users.noreply.github.com> Date: Mon, 2 Feb 2026 11:18:44 -0500 Subject: [PATCH 052/121] Fix parsing cast operator after parenthesized `DEFAULT` expression (#2168) --- src/parser/mod.rs | 59 ++++++++++++------------------------- tests/sqlparser_common.rs | 6 ++++ tests/sqlparser_postgres.rs | 7 +++++ 3 files changed, 32 insertions(+), 40 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 4dc704ac9e..80c7058333 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1820,7 +1820,19 @@ impl<'a> Parser<'a> { } else if let Some(lambda) = self.try_parse_lambda()? { return Ok(lambda); } else { - let exprs = self.parse_comma_separated(Parser::parse_expr)?; + // Parentheses in expressions switch to "normal" parsing state. + // This matters for dialects (SQLite, DuckDB) where `NOT NULL` can + // be an alias for `IS NOT NULL`. In column definitions like: + // + // CREATE TABLE t (c INT DEFAULT (42 NOT NULL) NOT NULL) + // + // The `(42 NOT NULL)` is an expression with parens, so it parses + // as `IsNotNull(42)`. The trailing `NOT NULL` is outside those + // expression parens (the outer parens are CREATE TABLE syntax), + // so it remains a column constraint. + let exprs = self.with_state(ParserState::Normal, |p| { + p.parse_comma_separated(Parser::parse_expr) + })?; match exprs.len() { 0 => return Err(ParserError::ParserError( "Internal parser error: parse_comma_separated returned empty list" @@ -8823,19 +8835,15 @@ impl<'a> Parser<'a> { } else if self.parse_keyword(Keyword::NULL) { Ok(Some(ColumnOption::Null)) } else if self.parse_keyword(Keyword::DEFAULT) { - Ok(Some(ColumnOption::Default( - self.parse_column_option_expr()?, - ))) + Ok(Some(ColumnOption::Default(self.parse_expr()?))) } else if dialect_of!(self is ClickHouseDialect| GenericDialect) && self.parse_keyword(Keyword::MATERIALIZED) { - Ok(Some(ColumnOption::Materialized( - self.parse_column_option_expr()?, - ))) + Ok(Some(ColumnOption::Materialized(self.parse_expr()?))) } else if dialect_of!(self is ClickHouseDialect| GenericDialect) && self.parse_keyword(Keyword::ALIAS) { - Ok(Some(ColumnOption::Alias(self.parse_column_option_expr()?))) + Ok(Some(ColumnOption::Alias(self.parse_expr()?))) } else if dialect_of!(self is ClickHouseDialect| GenericDialect) && self.parse_keyword(Keyword::EPHEMERAL) { @@ -8844,9 +8852,7 @@ impl<'a> Parser<'a> { if matches!(self.peek_token().token, Token::Comma | Token::RParen) { Ok(Some(ColumnOption::Ephemeral(None))) } else { - Ok(Some(ColumnOption::Ephemeral(Some( - self.parse_column_option_expr()?, - )))) + Ok(Some(ColumnOption::Ephemeral(Some(self.parse_expr()?)))) } } else if self.parse_keywords(&[Keyword::PRIMARY, Keyword::KEY]) { let characteristics = self.parse_constraint_characteristics()?; @@ -8968,7 +8974,7 @@ impl<'a> Parser<'a> { } else if self.parse_keywords(&[Keyword::ON, Keyword::UPDATE]) && dialect_of!(self is MySqlDialect | GenericDialect) { - let expr = self.parse_column_option_expr()?; + let expr = self.parse_expr()?; Ok(Some(ColumnOption::OnUpdate(expr))) } else if self.parse_keyword(Keyword::GENERATED) { self.parse_optional_column_option_generated() @@ -8986,9 +8992,7 @@ impl<'a> Parser<'a> { } else if self.parse_keyword(Keyword::SRID) && dialect_of!(self is MySqlDialect | GenericDialect) { - Ok(Some(ColumnOption::Srid(Box::new( - self.parse_column_option_expr()?, - )))) + Ok(Some(ColumnOption::Srid(Box::new(self.parse_expr()?)))) } else if self.parse_keyword(Keyword::IDENTITY) && dialect_of!(self is MsSqlDialect | GenericDialect) { @@ -9030,31 +9034,6 @@ impl<'a> Parser<'a> { } } - /// When parsing some column option expressions we need to revert to [ParserState::Normal] since - /// `NOT NULL` is allowed as an alias for `IS NOT NULL`. - /// In those cases we use this helper instead of calling [Parser::parse_expr] directly. - /// - /// For example, consider these `CREATE TABLE` statements: - /// ```sql - /// CREATE TABLE foo (abc BOOL DEFAULT (42 NOT NULL) NOT NULL); - /// ``` - /// vs - /// ```sql - /// CREATE TABLE foo (abc BOOL NOT NULL); - /// ``` - /// - /// In the first we should parse the inner portion of `(42 NOT NULL)` as [Expr::IsNotNull], - /// whereas is both statements that trailing `NOT NULL` should only be parsed as a - /// [ColumnOption::NotNull]. - fn parse_column_option_expr(&mut self) -> Result { - if self.peek_token_ref().token == Token::LParen { - let expr: Expr = self.with_state(ParserState::Normal, |p| p.parse_prefix())?; - Ok(expr) - } else { - Ok(self.parse_expr()?) - } - } - pub(crate) fn parse_tag(&mut self) -> Result { let name = self.parse_object_name(false)?; self.expect_token(&Token::Eq)?; diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 5be16f4aa0..e6a48c7b37 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -17376,6 +17376,12 @@ fn test_parse_not_null_in_column_options() { ); } +#[test] +fn test_parse_default_expr_with_operators() { + all_dialects().verified_stmt("CREATE TABLE t (c INT DEFAULT (1 + 2) + 3)"); + all_dialects().verified_stmt("CREATE TABLE t (c INT DEFAULT (1 + 2) + 3 NOT NULL)"); +} + #[test] fn test_parse_default_with_collate_column_option() { let sql = "CREATE TABLE foo (abc TEXT DEFAULT 'foo' COLLATE 'en_US')"; diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 54e9ee0c01..1f91bffd90 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -512,6 +512,13 @@ fn parse_create_table_with_defaults() { } } +#[test] +fn parse_cast_in_default_expr() { + pg().verified_stmt("CREATE TABLE t (c TEXT DEFAULT (foo())::TEXT)"); + pg().verified_stmt("CREATE TABLE t (c TEXT DEFAULT (foo())::INT::TEXT)"); + pg().verified_stmt("CREATE TABLE t (c TEXT DEFAULT (foo())::TEXT NOT NULL)"); +} + #[test] fn parse_create_table_from_pg_dump() { let sql = "CREATE TABLE public.customer ( From 2ac82e946e5f6513b51b747f1783c1cf5f4a733d Mon Sep 17 00:00:00 2001 From: Alexander Beedie Date: Tue, 3 Feb 2026 20:09:11 +0700 Subject: [PATCH 053/121] Streamlined derivation of new `Dialect` objects (#2174) --- Cargo.toml | 7 +- derive/Cargo.toml | 2 +- derive/src/dialect.rs | 305 ++++++++++++++++++++++++++++++ derive/src/lib.rs | 276 +++------------------------ derive/src/visit.rs | 268 ++++++++++++++++++++++++++ src/dialect/ansi.rs | 2 +- src/dialect/clickhouse.rs | 2 +- src/dialect/hive.rs | 2 +- src/dialect/mod.rs | 108 ++++++++++- src/dialect/mssql.rs | 2 +- src/dialect/mysql.rs | 2 +- src/dialect/oracle.rs | 2 +- src/dialect/postgresql.rs | 2 +- src/dialect/redshift.rs | 2 +- src/dialect/sqlite.rs | 2 +- src/lib.rs | 3 + tests/sqlparser_derive_dialect.rs | 123 ++++++++++++ 17 files changed, 841 insertions(+), 269 deletions(-) create mode 100644 derive/src/dialect.rs create mode 100644 derive/src/visit.rs create mode 100644 tests/sqlparser_derive_dialect.rs diff --git a/Cargo.toml b/Cargo.toml index 177ab3db31..8945adef7e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -42,6 +42,7 @@ std = [] recursive-protection = ["std", "recursive"] # Enable JSON output in the `cli` example: json_example = ["serde_json", "serde"] +derive-dialect = ["sqlparser_derive"] visitor = ["sqlparser_derive"] [dependencies] @@ -61,6 +62,10 @@ simple_logger = "5.0" matches = "0.1" pretty_assertions = "1" +[[test]] +name = "sqlparser_derive_dialect" +required-features = ["derive-dialect"] + [package.metadata.docs.rs] # Document these features on docs.rs -features = ["serde", "visitor"] +features = ["serde", "visitor", "derive-dialect"] diff --git a/derive/Cargo.toml b/derive/Cargo.toml index 549477041b..f2f54926b5 100644 --- a/derive/Cargo.toml +++ b/derive/Cargo.toml @@ -36,6 +36,6 @@ edition = "2021" proc-macro = true [dependencies] -syn = { version = "2.0", default-features = false, features = ["printing", "parsing", "derive", "proc-macro"] } +syn = { version = "2.0", default-features = false, features = ["full", "printing", "parsing", "derive", "proc-macro", "clone-impls"] } proc-macro2 = "1.0" quote = "1.0" diff --git a/derive/src/dialect.rs b/derive/src/dialect.rs new file mode 100644 index 0000000000..9873e4f7b5 --- /dev/null +++ b/derive/src/dialect.rs @@ -0,0 +1,305 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Implementation of the `derive_dialect!` macro for creating custom SQL dialects. + +use proc_macro2::TokenStream; +use quote::{quote, quote_spanned}; +use std::collections::HashSet; +use syn::{ + braced, + parse::{Parse, ParseStream}, + Error, File, FnArg, Ident, Item, LitBool, LitChar, Pat, ReturnType, Signature, Token, + TraitItem, Type, +}; + +/// Override value types supported by the macro +pub(crate) enum Override { + Bool(LitBool), + Char(LitChar), + None, +} + +/// Parsed input for the `derive_dialect!` macro +pub(crate) struct DeriveDialectInput { + pub name: Ident, + pub base: Type, + pub preserve_type_id: bool, + pub overrides: Vec<(Ident, Override)>, +} + +/// `Dialect` trait method attrs +struct DialectMethod { + name: Ident, + signature: Signature, +} + +impl Parse for DeriveDialectInput { + fn parse(input: ParseStream) -> syn::Result { + let name: Ident = input.parse()?; + input.parse::()?; + let base: Type = input.parse()?; + + let mut preserve_type_id = false; + let mut overrides = Vec::new(); + + while input.peek(Token![,]) { + input.parse::()?; + if input.is_empty() { + break; + } + if input.peek(Ident) { + let ident: Ident = input.parse()?; + match ident.to_string().as_str() { + "preserve_type_id" => { + input.parse::()?; + preserve_type_id = input.parse::()?.value(); + } + "overrides" => { + input.parse::()?; + let content; + braced!(content in input); + while !content.is_empty() { + let key: Ident = content.parse()?; + content.parse::()?; + let value = if content.peek(LitBool) { + Override::Bool(content.parse()?) + } else if content.peek(LitChar) { + Override::Char(content.parse()?) + } else if content.peek(Ident) { + let ident: Ident = content.parse()?; + if ident == "None" { + Override::None + } else { + return Err(Error::new( + ident.span(), + format!("Expected `true`, `false`, a char, or `None`, found `{ident}`"), + )); + } + } else { + return Err( + content.error("Expected `true`, `false`, a char, or `None`") + ); + }; + overrides.push((key, value)); + if content.peek(Token![,]) { + content.parse::()?; + } + } + } + other => { + return Err(Error::new(ident.span(), format!( + "Unknown argument `{other}`. Expected `preserve_type_id` or `overrides`." + ))); + } + } + } + } + Ok(DeriveDialectInput { + name, + base, + preserve_type_id, + overrides, + }) + } +} + +/// Entry point for the `derive_dialect!` macro +pub(crate) fn derive_dialect(input: DeriveDialectInput) -> proc_macro::TokenStream { + let err = |msg: String| { + Error::new(proc_macro2::Span::call_site(), msg) + .to_compile_error() + .into() + }; + + let source = match read_dialect_mod_file() { + Ok(s) => s, + Err(e) => return err(format!("Failed to read dialect/mod.rs: {e}")), + }; + let file: File = match syn::parse_str(&source) { + Ok(f) => f, + Err(e) => return err(format!("Failed to parse source: {e}")), + }; + let methods = match extract_dialect_methods(&file) { + Ok(m) => m, + Err(e) => return e.to_compile_error().into(), + }; + + // Validate overrides + let bool_names: HashSet<_> = methods + .iter() + .filter(|m| is_bool_method(&m.signature)) + .map(|m| m.name.to_string()) + .collect(); + for (key, value) in &input.overrides { + let key_str = key.to_string(); + let err = |msg| Error::new(key.span(), msg).to_compile_error().into(); + match value { + Override::Bool(_) if !bool_names.contains(&key_str) => { + return err(format!("Unknown boolean method `{key_str}`")); + } + Override::Char(_) | Override::None if key_str != "identifier_quote_style" => { + return err(format!( + "Char/None only valid for `identifier_quote_style`, not `{key_str}`" + )); + } + _ => {} + } + } + generate_derived_dialect(&input, &methods).into() +} + +/// Generate the complete derived `Dialect` implementation +fn generate_derived_dialect(input: &DeriveDialectInput, methods: &[DialectMethod]) -> TokenStream { + let name = &input.name; + let base = &input.base; + + // Helper to find an override by method name + let find_override = |method_name: &str| { + input + .overrides + .iter() + .find(|(k, _)| k == method_name) + .map(|(_, v)| v) + }; + + // Helper to generate delegation to base dialect + let delegate = |method: &DialectMethod| { + let sig = &method.signature; + let method_name = &method.name; + let params = extract_param_names(sig); + quote_spanned! { method_name.span() => #sig { self.dialect.#method_name(#(#params),*) } } + }; + + // Generate the struct + let struct_def = quote_spanned! { name.span() => + #[derive(Debug, Default)] + pub struct #name { + dialect: #base, + } + impl #name { + pub fn new() -> Self { Self::default() } + } + }; + + // Generate TypeId method body + let type_id_body = if input.preserve_type_id { + quote! { Dialect::dialect(&self.dialect) } + } else { + quote! { ::core::any::TypeId::of::<#name>() } + }; + + // Generate method implementations + let method_impls = methods.iter().map(|method| { + let method_name = &method.name; + match find_override(&method_name.to_string()) { + Some(Override::Bool(value)) => { + quote_spanned! { method_name.span() => fn #method_name(&self) -> bool { #value } } + } + Some(Override::Char(c)) => { + quote_spanned! { method_name.span() => + fn identifier_quote_style(&self, _: &str) -> Option { Some(#c) } + } + } + Some(Override::None) => { + quote_spanned! { method_name.span() => + fn identifier_quote_style(&self, _: &str) -> Option { None } + } + } + None => delegate(method), + } + }); + + // Wrap impl in a const block with scoped imports so types resolve without qualification + quote! { + #struct_def + const _: () = { + use ::core::iter::Peekable; + use ::core::str::Chars; + use sqlparser::ast::{ColumnOption, Expr, GranteesType, Ident, ObjectNamePart, Statement}; + use sqlparser::dialect::{Dialect, Precedence}; + use sqlparser::keywords::Keyword; + use sqlparser::parser::{Parser, ParserError}; + + impl Dialect for #name { + fn dialect(&self) -> ::core::any::TypeId { #type_id_body } + #(#method_impls)* + } + }; + } +} + +/// Extract parameter names from a method signature (excluding self) +fn extract_param_names(sig: &Signature) -> Vec<&Ident> { + sig.inputs + .iter() + .filter_map(|arg| match arg { + FnArg::Typed(pt) => match pt.pat.as_ref() { + Pat::Ident(pi) => Some(&pi.ident), + _ => None, + }, + _ => None, + }) + .collect() +} + +/// Read the `dialect/mod.rs` file that contains the Dialect trait. +fn read_dialect_mod_file() -> Result { + let manifest_dir = + std::env::var("CARGO_MANIFEST_DIR").map_err(|_| "CARGO_MANIFEST_DIR not set")?; + let path = std::path::Path::new(&manifest_dir).join("src/dialect/mod.rs"); + std::fs::read_to_string(&path).map_err(|e| format!("Failed to read {}: {e}", path.display())) +} + +/// Extract all methods from the `Dialect` trait (excluding `dialect` for TypeId) +fn extract_dialect_methods(file: &File) -> Result, Error> { + let dialect_trait = file + .items + .iter() + .find_map(|item| match item { + Item::Trait(t) if t.ident == "Dialect" => Some(t), + _ => None, + }) + .ok_or_else(|| Error::new(proc_macro2::Span::call_site(), "Dialect trait not found"))?; + + let mut methods: Vec<_> = dialect_trait + .items + .iter() + .filter_map(|item| match item { + TraitItem::Fn(m) if m.sig.ident != "dialect" => Some(DialectMethod { + name: m.sig.ident.clone(), + signature: m.sig.clone(), + }), + _ => None, + }) + .collect(); + methods.sort_by_key(|m| m.name.to_string()); + Ok(methods) +} + +/// Check if a method signature is `fn name(&self) -> bool` +fn is_bool_method(sig: &Signature) -> bool { + sig.inputs.len() == 1 + && matches!( + sig.inputs.first(), + Some(FnArg::Receiver(r)) if r.reference.is_some() && r.mutability.is_none() + ) + && matches!( + &sig.output, + ReturnType::Type(_, ty) if matches!(ty.as_ref(), Type::Path(p) if p.path.is_ident("bool")) + ) +} diff --git a/derive/src/lib.rs b/derive/src/lib.rs index 08c5c5db4b..e3eaeea6d5 100644 --- a/derive/src/lib.rs +++ b/derive/src/lib.rs @@ -15,22 +15,25 @@ // specific language governing permissions and limitations // under the License. -use proc_macro2::TokenStream; -use quote::{format_ident, quote, quote_spanned, ToTokens}; -use syn::spanned::Spanned; -use syn::{ - parse::{Parse, ParseStream}, - parse_macro_input, parse_quote, Attribute, Data, DeriveInput, Fields, GenericParam, Generics, - Ident, Index, LitStr, Meta, Token, Type, TypePath, -}; -use syn::{Path, PathArguments}; +//! Procedural macros for sqlparser. +//! +//! This crate provides: +//! - [`Visit`] and [`VisitMut`] derive macros for AST traversal. +//! - [`derive_dialect!`] macro for creating custom SQL dialects. -/// Implementation of `[#derive(Visit)]` +use quote::quote; +use syn::parse_macro_input; + +mod dialect; +mod visit; + +/// Implementation of `#[derive(VisitMut)]` #[proc_macro_derive(VisitMut, attributes(visit))] pub fn derive_visit_mut(input: proc_macro::TokenStream) -> proc_macro::TokenStream { - derive_visit( + let input = parse_macro_input!(input as syn::DeriveInput); + visit::derive_visit( input, - &VisitType { + &visit::VisitType { visit_trait: quote!(VisitMut), visitor_trait: quote!(VisitorMut), modifier: Some(quote!(mut)), @@ -38,12 +41,13 @@ pub fn derive_visit_mut(input: proc_macro::TokenStream) -> proc_macro::TokenStre ) } -/// Implementation of `[#derive(Visit)]` +/// Implementation of `#[derive(Visit)]` #[proc_macro_derive(Visit, attributes(visit))] pub fn derive_visit_immutable(input: proc_macro::TokenStream) -> proc_macro::TokenStream { - derive_visit( + let input = parse_macro_input!(input as syn::DeriveInput); + visit::derive_visit( input, - &VisitType { + &visit::VisitType { visit_trait: quote!(Visit), visitor_trait: quote!(Visitor), modifier: None, @@ -51,241 +55,9 @@ pub fn derive_visit_immutable(input: proc_macro::TokenStream) -> proc_macro::Tok ) } -struct VisitType { - visit_trait: TokenStream, - visitor_trait: TokenStream, - modifier: Option, -} - -fn derive_visit(input: proc_macro::TokenStream, visit_type: &VisitType) -> proc_macro::TokenStream { - // Parse the input tokens into a syntax tree. - let input = parse_macro_input!(input as DeriveInput); - let name = input.ident; - - let VisitType { - visit_trait, - visitor_trait, - modifier, - } = visit_type; - - let attributes = Attributes::parse(&input.attrs); - // Add a bound `T: Visit` to every type parameter T. - let generics = add_trait_bounds(input.generics, visit_type); - let (impl_generics, ty_generics, where_clause) = generics.split_for_impl(); - - let (pre_visit, post_visit) = attributes.visit(quote!(self)); - let children = visit_children(&input.data, visit_type); - - let expanded = quote! { - // The generated impl. - // Note that it uses [`recursive::recursive`] to protect from stack overflow. - // See tests in https://github.com/apache/datafusion-sqlparser-rs/pull/1522/ for more info. - impl #impl_generics sqlparser::ast::#visit_trait for #name #ty_generics #where_clause { - #[cfg_attr(feature = "recursive-protection", recursive::recursive)] - fn visit( - &#modifier self, - visitor: &mut V - ) -> ::std::ops::ControlFlow { - #pre_visit - #children - #post_visit - ::std::ops::ControlFlow::Continue(()) - } - } - }; - - proc_macro::TokenStream::from(expanded) -} - -/// Parses attributes that can be provided to this macro -/// -/// `#[visit(leaf, with = "visit_expr")]` -#[derive(Default)] -struct Attributes { - /// Content for the `with` attribute - with: Option, -} - -struct WithIdent { - with: Option, -} -impl Parse for WithIdent { - fn parse(input: ParseStream) -> Result { - let mut result = WithIdent { with: None }; - let ident = input.parse::()?; - if ident != "with" { - return Err(syn::Error::new( - ident.span(), - "Expected identifier to be `with`", - )); - } - input.parse::()?; - let s = input.parse::()?; - result.with = Some(format_ident!("{}", s.value(), span = s.span())); - Ok(result) - } -} - -impl Attributes { - fn parse(attrs: &[Attribute]) -> Self { - let mut out = Self::default(); - for attr in attrs { - if let Meta::List(ref metalist) = attr.meta { - if metalist.path.is_ident("visit") { - match syn::parse2::(metalist.tokens.clone()) { - Ok(with_ident) => { - out.with = with_ident.with; - } - Err(e) => { - panic!("{}", e); - } - } - } - } - } - out - } - - /// Returns the pre and post visit token streams - fn visit(&self, s: TokenStream) -> (Option, Option) { - let pre_visit = self.with.as_ref().map(|m| { - let m = format_ident!("pre_{}", m); - quote!(visitor.#m(#s)?;) - }); - let post_visit = self.with.as_ref().map(|m| { - let m = format_ident!("post_{}", m); - quote!(visitor.#m(#s)?;) - }); - (pre_visit, post_visit) - } -} - -// Add a bound `T: Visit` to every type parameter T. -fn add_trait_bounds(mut generics: Generics, VisitType { visit_trait, .. }: &VisitType) -> Generics { - for param in &mut generics.params { - if let GenericParam::Type(ref mut type_param) = *param { - type_param - .bounds - .push(parse_quote!(sqlparser::ast::#visit_trait)); - } - } - generics -} - -// Generate the body of the visit implementation for the given type -fn visit_children( - data: &Data, - VisitType { - visit_trait, - modifier, - .. - }: &VisitType, -) -> TokenStream { - match data { - Data::Struct(data) => match &data.fields { - Fields::Named(fields) => { - let recurse = fields.named.iter().map(|f| { - let name = &f.ident; - let is_option = is_option(&f.ty); - let attributes = Attributes::parse(&f.attrs); - if is_option && attributes.with.is_some() { - let (pre_visit, post_visit) = attributes.visit(quote!(value)); - quote_spanned!(f.span() => - if let Some(value) = &#modifier self.#name { - #pre_visit sqlparser::ast::#visit_trait::visit(value, visitor)?; #post_visit - } - ) - } else { - let (pre_visit, post_visit) = attributes.visit(quote!(&#modifier self.#name)); - quote_spanned!(f.span() => - #pre_visit sqlparser::ast::#visit_trait::visit(&#modifier self.#name, visitor)?; #post_visit - ) - } - }); - quote! { - #(#recurse)* - } - } - Fields::Unnamed(fields) => { - let recurse = fields.unnamed.iter().enumerate().map(|(i, f)| { - let index = Index::from(i); - let attributes = Attributes::parse(&f.attrs); - let (pre_visit, post_visit) = attributes.visit(quote!(&self.#index)); - quote_spanned!(f.span() => #pre_visit sqlparser::ast::#visit_trait::visit(&#modifier self.#index, visitor)?; #post_visit) - }); - quote! { - #(#recurse)* - } - } - Fields::Unit => { - quote!() - } - }, - Data::Enum(data) => { - let statements = data.variants.iter().map(|v| { - let name = &v.ident; - match &v.fields { - Fields::Named(fields) => { - let names = fields.named.iter().map(|f| &f.ident); - let visit = fields.named.iter().map(|f| { - let name = &f.ident; - let attributes = Attributes::parse(&f.attrs); - let (pre_visit, post_visit) = attributes.visit(name.to_token_stream()); - quote_spanned!(f.span() => #pre_visit sqlparser::ast::#visit_trait::visit(#name, visitor)?; #post_visit) - }); - - quote!( - Self::#name { #(#names),* } => { - #(#visit)* - } - ) - } - Fields::Unnamed(fields) => { - let names = fields.unnamed.iter().enumerate().map(|(i, f)| format_ident!("_{}", i, span = f.span())); - let visit = fields.unnamed.iter().enumerate().map(|(i, f)| { - let name = format_ident!("_{}", i); - let attributes = Attributes::parse(&f.attrs); - let (pre_visit, post_visit) = attributes.visit(name.to_token_stream()); - quote_spanned!(f.span() => #pre_visit sqlparser::ast::#visit_trait::visit(#name, visitor)?; #post_visit) - }); - - quote! { - Self::#name ( #(#names),*) => { - #(#visit)* - } - } - } - Fields::Unit => { - quote! { - Self::#name => {} - } - } - } - }); - - quote! { - match self { - #(#statements),* - } - } - } - Data::Union(_) => unimplemented!(), - } -} - -fn is_option(ty: &Type) -> bool { - if let Type::Path(TypePath { - path: Path { segments, .. }, - .. - }) = ty - { - if let Some(segment) = segments.last() { - if segment.ident == "Option" { - if let PathArguments::AngleBracketed(args) = &segment.arguments { - return args.args.len() == 1; - } - } - } - } - false +/// Procedural macro for deriving new SQL dialects. +#[proc_macro] +pub fn derive_dialect(input: proc_macro::TokenStream) -> proc_macro::TokenStream { + let input = parse_macro_input!(input as dialect::DeriveDialectInput); + dialect::derive_dialect(input) } diff --git a/derive/src/visit.rs b/derive/src/visit.rs new file mode 100644 index 0000000000..baf3eb583b --- /dev/null +++ b/derive/src/visit.rs @@ -0,0 +1,268 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Implementation of the `Visit` and `VisitMut` derive macros. + +use proc_macro2::TokenStream; +use quote::{format_ident, quote, quote_spanned, ToTokens}; +use syn::spanned::Spanned; +use syn::{ + parse::{Parse, ParseStream}, + parse_quote, Attribute, Data, Fields, GenericParam, Generics, Ident, Index, LitStr, Meta, + Token, Type, TypePath, +}; +use syn::{Path, PathArguments}; + +pub(crate) struct VisitType { + pub visit_trait: TokenStream, + pub visitor_trait: TokenStream, + pub modifier: Option, +} + +pub(crate) fn derive_visit( + input: syn::DeriveInput, + visit_type: &VisitType, +) -> proc_macro::TokenStream { + let name = input.ident; + + let VisitType { + visit_trait, + visitor_trait, + modifier, + } = visit_type; + + let attributes = Attributes::parse(&input.attrs); + // Add a bound `T: Visit` to every type parameter T. + let generics = add_trait_bounds(input.generics, visit_type); + let (impl_generics, ty_generics, where_clause) = generics.split_for_impl(); + + let (pre_visit, post_visit) = attributes.visit(quote!(self)); + let children = visit_children(&input.data, visit_type); + + let expanded = quote! { + // The generated impl. + // Note that it uses [`recursive::recursive`] to protect from stack overflow. + // See tests in https://github.com/apache/datafusion-sqlparser-rs/pull/1522/ for more info. + impl #impl_generics sqlparser::ast::#visit_trait for #name #ty_generics #where_clause { + #[cfg_attr(feature = "recursive-protection", recursive::recursive)] + fn visit( + &#modifier self, + visitor: &mut V + ) -> ::std::ops::ControlFlow { + #pre_visit + #children + #post_visit + ::std::ops::ControlFlow::Continue(()) + } + } + }; + + proc_macro::TokenStream::from(expanded) +} + +/// Parses attributes that can be provided to this macro +/// +/// `#[visit(leaf, with = "visit_expr")]` +#[derive(Default)] +struct Attributes { + /// Content for the `with` attribute + with: Option, +} + +struct WithIdent { + with: Option, +} +impl Parse for WithIdent { + fn parse(input: ParseStream) -> Result { + let mut result = WithIdent { with: None }; + let ident = input.parse::()?; + if ident != "with" { + return Err(syn::Error::new( + ident.span(), + "Expected identifier to be `with`", + )); + } + input.parse::()?; + let s = input.parse::()?; + result.with = Some(format_ident!("{}", s.value(), span = s.span())); + Ok(result) + } +} + +impl Attributes { + fn parse(attrs: &[Attribute]) -> Self { + let mut out = Self::default(); + for attr in attrs { + if let Meta::List(ref metalist) = attr.meta { + if metalist.path.is_ident("visit") { + match syn::parse2::(metalist.tokens.clone()) { + Ok(with_ident) => { + out.with = with_ident.with; + } + Err(e) => { + panic!("{}", e); + } + } + } + } + } + out + } + + /// Returns the pre and post visit token streams + fn visit(&self, s: TokenStream) -> (Option, Option) { + let pre_visit = self.with.as_ref().map(|m| { + let m = format_ident!("pre_{}", m); + quote!(visitor.#m(#s)?;) + }); + let post_visit = self.with.as_ref().map(|m| { + let m = format_ident!("post_{}", m); + quote!(visitor.#m(#s)?;) + }); + (pre_visit, post_visit) + } +} + +// Add a bound `T: Visit` to every type parameter T. +fn add_trait_bounds(mut generics: Generics, VisitType { visit_trait, .. }: &VisitType) -> Generics { + for param in &mut generics.params { + if let GenericParam::Type(ref mut type_param) = *param { + type_param + .bounds + .push(parse_quote!(sqlparser::ast::#visit_trait)); + } + } + generics +} + +// Generate the body of the visit implementation for the given type +fn visit_children( + data: &Data, + VisitType { + visit_trait, + modifier, + .. + }: &VisitType, +) -> TokenStream { + match data { + Data::Struct(data) => match &data.fields { + Fields::Named(fields) => { + let recurse = fields.named.iter().map(|f| { + let name = &f.ident; + let is_option = is_option(&f.ty); + let attributes = Attributes::parse(&f.attrs); + if is_option && attributes.with.is_some() { + let (pre_visit, post_visit) = attributes.visit(quote!(value)); + quote_spanned!(f.span() => + if let Some(value) = &#modifier self.#name { + #pre_visit sqlparser::ast::#visit_trait::visit(value, visitor)?; #post_visit + } + ) + } else { + let (pre_visit, post_visit) = attributes.visit(quote!(&#modifier self.#name)); + quote_spanned!(f.span() => + #pre_visit sqlparser::ast::#visit_trait::visit(&#modifier self.#name, visitor)?; #post_visit + ) + } + }); + quote! { + #(#recurse)* + } + } + Fields::Unnamed(fields) => { + let recurse = fields.unnamed.iter().enumerate().map(|(i, f)| { + let index = Index::from(i); + let attributes = Attributes::parse(&f.attrs); + let (pre_visit, post_visit) = attributes.visit(quote!(&self.#index)); + quote_spanned!(f.span() => #pre_visit sqlparser::ast::#visit_trait::visit(&#modifier self.#index, visitor)?; #post_visit) + }); + quote! { + #(#recurse)* + } + } + Fields::Unit => { + quote!() + } + }, + Data::Enum(data) => { + let statements = data.variants.iter().map(|v| { + let name = &v.ident; + match &v.fields { + Fields::Named(fields) => { + let names = fields.named.iter().map(|f| &f.ident); + let visit = fields.named.iter().map(|f| { + let name = &f.ident; + let attributes = Attributes::parse(&f.attrs); + let (pre_visit, post_visit) = attributes.visit(name.to_token_stream()); + quote_spanned!(f.span() => #pre_visit sqlparser::ast::#visit_trait::visit(#name, visitor)?; #post_visit) + }); + + quote!( + Self::#name { #(#names),* } => { + #(#visit)* + } + ) + } + Fields::Unnamed(fields) => { + let names = fields.unnamed.iter().enumerate().map(|(i, f)| format_ident!("_{}", i, span = f.span())); + let visit = fields.unnamed.iter().enumerate().map(|(i, f)| { + let name = format_ident!("_{}", i); + let attributes = Attributes::parse(&f.attrs); + let (pre_visit, post_visit) = attributes.visit(name.to_token_stream()); + quote_spanned!(f.span() => #pre_visit sqlparser::ast::#visit_trait::visit(#name, visitor)?; #post_visit) + }); + + quote! { + Self::#name ( #(#names),*) => { + #(#visit)* + } + } + } + Fields::Unit => { + quote! { + Self::#name => {} + } + } + } + }); + + quote! { + match self { + #(#statements),* + } + } + } + Data::Union(_) => unimplemented!(), + } +} + +fn is_option(ty: &Type) -> bool { + if let Type::Path(TypePath { + path: Path { segments, .. }, + .. + }) = ty + { + if let Some(segment) = segments.last() { + if segment.ident == "Option" { + if let PathArguments::AngleBracketed(args) = &segment.arguments { + return args.args.len() == 1; + } + } + } + } + false +} diff --git a/src/dialect/ansi.rs b/src/dialect/ansi.rs index ec3c095be5..5a54390cfd 100644 --- a/src/dialect/ansi.rs +++ b/src/dialect/ansi.rs @@ -18,7 +18,7 @@ use crate::dialect::Dialect; /// A [`Dialect`] for [ANSI SQL](https://en.wikipedia.org/wiki/SQL:2011). -#[derive(Debug)] +#[derive(Debug, Default)] pub struct AnsiDialect {} impl Dialect for AnsiDialect { diff --git a/src/dialect/clickhouse.rs b/src/dialect/clickhouse.rs index 041b94ecd4..f8b6807f35 100644 --- a/src/dialect/clickhouse.rs +++ b/src/dialect/clickhouse.rs @@ -18,7 +18,7 @@ use crate::dialect::Dialect; /// A [`Dialect`] for [ClickHouse](https://clickhouse.com/). -#[derive(Debug)] +#[derive(Debug, Default)] pub struct ClickHouseDialect {} impl Dialect for ClickHouseDialect { diff --git a/src/dialect/hive.rs b/src/dialect/hive.rs index 3e15d395b1..32a982e907 100644 --- a/src/dialect/hive.rs +++ b/src/dialect/hive.rs @@ -18,7 +18,7 @@ use crate::dialect::Dialect; /// A [`Dialect`] for [Hive](https://hive.apache.org/). -#[derive(Debug)] +#[derive(Debug, Default)] pub struct HiveDialect {} impl Dialect for HiveDialect { diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index ef563fc101..477d60f832 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -51,6 +51,82 @@ pub use self::postgresql::PostgreSqlDialect; pub use self::redshift::RedshiftSqlDialect; pub use self::snowflake::SnowflakeDialect; pub use self::sqlite::SQLiteDialect; + +/// Macro for streamlining the creation of derived `Dialect` objects. +/// The generated struct includes `new()` and `default()` constructors. +/// Requires the `derive-dialect` feature. +/// +/// # Syntax +/// +/// ```text +/// derive_dialect!(NewDialect, BaseDialect); +/// derive_dialect!(NewDialect, BaseDialect, overrides = { method = value, ... }); +/// derive_dialect!(NewDialect, BaseDialect, preserve_type_id = true); +/// derive_dialect!(NewDialect, BaseDialect, preserve_type_id = true, overrides = { ... }); +/// ``` +/// +/// # Example +/// +/// ``` +/// use sqlparser::derive_dialect; +/// use sqlparser::dialect::{Dialect, GenericDialect}; +/// +/// // Override boolean methods (supports_*, allow_*, etc.) +/// derive_dialect!(CustomDialect, GenericDialect, overrides = { +/// supports_order_by_all = true, +/// supports_nested_comments = true, +/// }); +/// +/// let dialect = CustomDialect::new(); +/// assert!(dialect.supports_order_by_all()); +/// assert!(dialect.supports_nested_comments()); +/// ``` +/// +/// # Overriding `identifier_quote_style` +/// +/// Use a char literal or `None`: +/// ``` +/// use sqlparser::derive_dialect; +/// use sqlparser::dialect::{Dialect, PostgreSqlDialect}; +/// +/// derive_dialect!(BacktickPostgreSqlDialect, PostgreSqlDialect, +/// preserve_type_id = true, +/// overrides = { identifier_quote_style = '`' } +/// ); +/// let d: &dyn Dialect = &BacktickPostgreSqlDialect::new(); +/// assert_eq!(d.identifier_quote_style("foo"), Some('`')); +/// +/// derive_dialect!(QuotelessPostgreSqlDialect, PostgreSqlDialect, +/// preserve_type_id = true, +/// overrides = { identifier_quote_style = None } +/// ); +/// let d: &dyn Dialect = &QuotelessPostgreSqlDialect::new(); +/// assert_eq!(d.identifier_quote_style("foo"), None); +/// ``` +/// +/// # Type Identity +/// +/// By default, derived dialects have their own `TypeId`. Set `preserve_type_id = true` to +/// retain the base dialect's identity with respect to the parser's `dialect.is::()` checks: +/// ``` +/// use sqlparser::derive_dialect; +/// use sqlparser::dialect::{Dialect, GenericDialect}; +/// +/// derive_dialect!(EnhancedGenericDialect, GenericDialect, +/// preserve_type_id = true, +/// overrides = { +/// supports_order_by_all = true, +/// supports_nested_comments = true, +/// } +/// ); +/// let d: &dyn Dialect = &EnhancedGenericDialect::new(); +/// assert!(d.is::()); // still recognized as a GenericDialect +/// assert!(d.supports_nested_comments()); +/// assert!(d.supports_order_by_all()); +/// ``` +#[cfg(feature = "derive-dialect")] +pub use sqlparser_derive::derive_dialect; + use crate::ast::{ColumnOption, Expr, GranteesType, Ident, ObjectNamePart, Statement}; pub use crate::keywords; use crate::keywords::Keyword; @@ -62,14 +138,14 @@ use alloc::boxed::Box; /// Convenience check if a [`Parser`] uses a certain dialect. /// -/// Note: when possible please the new style, adding a method to the [`Dialect`] -/// trait rather than using this macro. +/// Note: when possible, please use the new style, adding a method to +/// the [`Dialect`] trait rather than using this macro. /// /// The benefits of adding a method on `Dialect` over this macro are: /// 1. user defined [`Dialect`]s can customize the parsing behavior /// 2. The differences between dialects can be clearly documented in the trait /// -/// `dialect_of!(parser is SQLiteDialect | GenericDialect)` evaluates +/// `dialect_of!(parser is SQLiteDialect | GenericDialect)` evaluates /// to `true` if `parser.dialect` is one of the [`Dialect`]s specified. macro_rules! dialect_of { ( $parsed_dialect: ident is $($dialect_type: ty)|+ ) => { @@ -123,9 +199,8 @@ macro_rules! dialect_is { pub trait Dialect: Debug + Any { /// Determine the [`TypeId`] of this dialect. /// - /// By default, return the same [`TypeId`] as [`Any::type_id`]. Can be overridden - /// by dialects that behave like other dialects - /// (for example when wrapping a dialect). + /// By default, return the same [`TypeId`] as [`Any::type_id`]. Can be overridden by + /// dialects that behave like other dialects (for example, when wrapping a dialect). fn dialect(&self) -> TypeId { self.type_id() } @@ -1646,6 +1721,27 @@ mod tests { dialect_from_str(v).unwrap() } + #[test] + #[cfg(feature = "derive-dialect")] + fn test_dialect_override() { + derive_dialect!(EnhancedGenericDialect, GenericDialect, + preserve_type_id = true, + overrides = { + supports_order_by_all = true, + supports_nested_comments = true, + supports_triple_quoted_string = true, + }, + ); + let dialect = EnhancedGenericDialect::new(); + + assert!(dialect.supports_order_by_all()); + assert!(dialect.supports_nested_comments()); + assert!(dialect.supports_triple_quoted_string()); + + let d: &dyn Dialect = &dialect; + assert!(d.is::()); + } + #[test] fn identifier_quote_style() { let tests: Vec<(&dyn Dialect, &str, Option)> = vec![ diff --git a/src/dialect/mssql.rs b/src/dialect/mssql.rs index 9f8e726562..24f7c7c4f3 100644 --- a/src/dialect/mssql.rs +++ b/src/dialect/mssql.rs @@ -28,7 +28,7 @@ use crate::tokenizer::Token; use alloc::{vec, vec::Vec}; /// A [`Dialect`] for [Microsoft SQL Server](https://www.microsoft.com/en-us/sql-server/) -#[derive(Debug)] +#[derive(Debug, Default)] pub struct MsSqlDialect {} impl Dialect for MsSqlDialect { diff --git a/src/dialect/mysql.rs b/src/dialect/mysql.rs index b44001fe12..ad3ba6f3af 100644 --- a/src/dialect/mysql.rs +++ b/src/dialect/mysql.rs @@ -35,7 +35,7 @@ const RESERVED_FOR_TABLE_ALIAS_MYSQL: &[Keyword] = &[ ]; /// A [`Dialect`] for [MySQL](https://www.mysql.com/) -#[derive(Debug)] +#[derive(Debug, Default)] pub struct MySqlDialect {} impl Dialect for MySqlDialect { diff --git a/src/dialect/oracle.rs b/src/dialect/oracle.rs index 7ff9326282..a72d5d7a81 100644 --- a/src/dialect/oracle.rs +++ b/src/dialect/oracle.rs @@ -25,7 +25,7 @@ use crate::{ use super::{Dialect, Precedence}; /// A [`Dialect`] for [Oracle Databases](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/index.html) -#[derive(Debug)] +#[derive(Debug, Default)] pub struct OracleDialect; impl Dialect for OracleDialect { diff --git a/src/dialect/postgresql.rs b/src/dialect/postgresql.rs index 7c9e7db86c..1924a5e313 100644 --- a/src/dialect/postgresql.rs +++ b/src/dialect/postgresql.rs @@ -34,7 +34,7 @@ use crate::parser::{Parser, ParserError}; use crate::tokenizer::Token; /// A [`Dialect`] for [PostgreSQL](https://www.postgresql.org/) -#[derive(Debug)] +#[derive(Debug, Default)] pub struct PostgreSqlDialect {} const PERIOD_PREC: u8 = 200; diff --git a/src/dialect/redshift.rs b/src/dialect/redshift.rs index c028061d2a..7b35848ba0 100644 --- a/src/dialect/redshift.rs +++ b/src/dialect/redshift.rs @@ -22,7 +22,7 @@ use core::str::Chars; use super::PostgreSqlDialect; /// A [`Dialect`] for [RedShift](https://aws.amazon.com/redshift/) -#[derive(Debug)] +#[derive(Debug, Default)] pub struct RedshiftSqlDialect {} // In most cases the redshift dialect is identical to [`PostgresSqlDialect`]. diff --git a/src/dialect/sqlite.rs b/src/dialect/sqlite.rs index ba4cb6173a..7d1c935f16 100644 --- a/src/dialect/sqlite.rs +++ b/src/dialect/sqlite.rs @@ -30,7 +30,7 @@ use crate::parser::{Parser, ParserError}; /// [`CREATE TABLE`](https://sqlite.org/lang_createtable.html) statement with no /// type specified, as in `CREATE TABLE t1 (a)`. In the AST, these columns will /// have the data type [`Unspecified`](crate::ast::DataType::Unspecified). -#[derive(Debug)] +#[derive(Debug, Default)] pub struct SQLiteDialect {} impl Dialect for SQLiteDialect { diff --git a/src/lib.rs b/src/lib.rs index f5d23a21fc..e68d7f93eb 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -170,6 +170,9 @@ pub mod ast; #[macro_use] /// Submodules for SQL dialects. pub mod dialect; + +#[cfg(feature = "derive-dialect")] +pub use dialect::derive_dialect; mod display_utils; pub mod keywords; pub mod parser; diff --git a/tests/sqlparser_derive_dialect.rs b/tests/sqlparser_derive_dialect.rs new file mode 100644 index 0000000000..d60fa1e11d --- /dev/null +++ b/tests/sqlparser_derive_dialect.rs @@ -0,0 +1,123 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Tests for the `derive_dialect!` macro. + +use sqlparser::derive_dialect; +use sqlparser::dialect::{Dialect, GenericDialect, MySqlDialect, PostgreSqlDialect}; +use sqlparser::parser::Parser; + +#[test] +fn test_method_overrides() { + derive_dialect!(EnhancedGenericDialect, GenericDialect, overrides = { + supports_order_by_all = true, + supports_triple_quoted_string = true, + }); + let dialect = EnhancedGenericDialect::new(); + + // Overridden methods + assert!(dialect.supports_order_by_all()); + assert!(dialect.supports_triple_quoted_string()); + + // Non-overridden retains base behavior + assert!(!dialect.supports_factorial_operator()); + + // Parsing works with the overrides + let result = Parser::new(&dialect) + .try_with_sql("SELECT '''value''' FROM t ORDER BY ALL") + .unwrap() + .parse_statements(); + + assert!(result.is_ok()); +} + +#[test] +fn test_preserve_type_id() { + // Check the override works and the parser recognizes it as the base type + derive_dialect!( + PreservedTypeDialect, + GenericDialect, + preserve_type_id = true, + overrides = { supports_order_by_all = true } + ); + let dialect = PreservedTypeDialect::new(); + let d: &dyn Dialect = &dialect; + + assert!(dialect.supports_order_by_all()); + assert!(d.is::()); +} + +#[test] +fn test_different_base_dialects() { + derive_dialect!( + EnhancedMySqlDialect, + MySqlDialect, + overrides = { supports_order_by_all = true } + ); + derive_dialect!(UniquePostgreSqlDialect, PostgreSqlDialect); + + let pg = UniquePostgreSqlDialect::new(); + let mysql = EnhancedMySqlDialect::new(); + + // Inherit different base behaviors + assert!(pg.supports_filter_during_aggregation()); // PostgreSQL feature + assert!(mysql.supports_string_literal_backslash_escape()); // MySQL feature + assert!(mysql.supports_order_by_all()); // Override + + // Each has unique TypeId + let pg_ref: &dyn Dialect = &pg; + let mysql_ref: &dyn Dialect = &mysql; + assert!(pg_ref.is::()); + assert!(!pg_ref.is::()); + assert!(mysql_ref.is::()); +} + +#[test] +fn test_identifier_quote_style_overrides() { + derive_dialect!( + BacktickGenericDialect, + GenericDialect, + overrides = { identifier_quote_style = '`' } + ); + derive_dialect!( + AnotherBacktickDialect, + GenericDialect, + overrides = { identifier_quote_style = '[' } + ); + derive_dialect!( + QuotelessPostgreSqlDialect, + PostgreSqlDialect, + preserve_type_id = true, + overrides = { identifier_quote_style = None } + ); + + // Char literal (auto-wrapped in Some) + assert_eq!( + BacktickGenericDialect::new().identifier_quote_style("x"), + Some('`') + ); + // Another char literal + assert_eq!( + AnotherBacktickDialect::new().identifier_quote_style("x"), + Some('[') + ); + // None (overrides PostgreSQL's default '"') + assert_eq!( + QuotelessPostgreSqlDialect::new().identifier_quote_style("x"), + None + ); +} From 3ac567076ce505adb033f08a2044cfe5c3a7b229 Mon Sep 17 00:00:00 2001 From: "Guan-Ming (Wesley) Chiu" <105915352+guan404ming@users.noreply.github.com> Date: Wed, 4 Feb 2026 00:11:38 +0800 Subject: [PATCH 054/121] MSSQL: Support standalone BEGIN...END blocks (#2186) Signed-off-by: Guan-Ming (Wesley) Chiu <105915352+guan404ming@users.noreply.github.com> Co-authored-by: Ifeanyi Ubah --- src/dialect/mssql.rs | 24 +++++++++++++- src/parser/mod.rs | 13 +++++--- tests/sqlparser_mssql.rs | 71 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 103 insertions(+), 5 deletions(-) diff --git a/src/dialect/mssql.rs b/src/dialect/mssql.rs index 24f7c7c4f3..4056bf8761 100644 --- a/src/dialect/mssql.rs +++ b/src/dialect/mssql.rs @@ -145,7 +145,29 @@ impl Dialect for MsSqlDialect { } fn parse_statement(&self, parser: &mut Parser) -> Option> { - if parser.peek_keyword(Keyword::IF) { + if parser.parse_keyword(Keyword::BEGIN) { + // Check if this is a BEGIN...END block rather than BEGIN TRANSACTION + let is_block = parser + .maybe_parse(|p| { + if p.parse_transaction_modifier().is_some() + || p.parse_one_of_keywords(&[Keyword::TRANSACTION, Keyword::WORK]) + .is_some() + || matches!(p.peek_token_ref().token, Token::SemiColon | Token::EOF) + { + p.expected("statement", p.peek_token()) + } else { + Ok(()) + } + }) + .unwrap_or(None) + .is_some(); + if is_block { + Some(parser.parse_begin_exception_end()) + } else { + parser.prev_token(); + None + } + } else if parser.peek_keyword(Keyword::IF) { Some(self.parse_if_stmt(parser)) } else if parser.parse_keywords(&[Keyword::CREATE, Keyword::TRIGGER]) { Some(self.parse_create_trigger(parser, false)) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 80c7058333..5fa224f970 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -17902,9 +17902,9 @@ impl<'a> Parser<'a> { }) } - /// Parse a 'BEGIN' statement - pub fn parse_begin(&mut self) -> Result { - let modifier = if !self.dialect.supports_start_transaction_modifier() { + /// Parse a transaction modifier keyword that can follow a `BEGIN` statement. + pub(crate) fn parse_transaction_modifier(&mut self) -> Option { + if !self.dialect.supports_start_transaction_modifier() { None } else if self.parse_keyword(Keyword::DEFERRED) { Some(TransactionModifier::Deferred) @@ -17918,7 +17918,12 @@ impl<'a> Parser<'a> { Some(TransactionModifier::Catch) } else { None - }; + } + } + + /// Parse a 'BEGIN' statement + pub fn parse_begin(&mut self) -> Result { + let modifier = self.parse_transaction_modifier(); let transaction = match self.parse_one_of_keywords(&[Keyword::TRANSACTION, Keyword::WORK]) { Some(Keyword::TRANSACTION) => Some(BeginTransactionKind::Transaction), Some(Keyword::WORK) => Some(BeginTransactionKind::Work), diff --git a/tests/sqlparser_mssql.rs b/tests/sqlparser_mssql.rs index 7ef4ce85c2..d770008847 100644 --- a/tests/sqlparser_mssql.rs +++ b/tests/sqlparser_mssql.rs @@ -2554,3 +2554,74 @@ fn test_sql_keywords_as_column_aliases() { } } } + +#[test] +fn parse_mssql_begin_end_block() { + // Single statement + let sql = "BEGIN SELECT 1; END"; + let stmt = ms().verified_stmt(sql); + match &stmt { + Statement::StartTransaction { + begin, + has_end_keyword, + statements, + transaction, + modifier, + .. + } => { + assert!(begin); + assert!(has_end_keyword); + assert!(transaction.is_none()); + assert!(modifier.is_none()); + assert_eq!(statements.len(), 1); + } + _ => panic!("Expected StartTransaction, got: {stmt:?}"), + } + + // Multiple statements + let sql = "BEGIN SELECT 1; SELECT 2; END"; + let stmt = ms().verified_stmt(sql); + match &stmt { + Statement::StartTransaction { + statements, + has_end_keyword, + .. + } => { + assert!(has_end_keyword); + assert_eq!(statements.len(), 2); + } + _ => panic!("Expected StartTransaction, got: {stmt:?}"), + } + + // DML inside BEGIN/END + let sql = "BEGIN INSERT INTO t VALUES (1); UPDATE t SET x = 2; END"; + let stmt = ms().verified_stmt(sql); + match &stmt { + Statement::StartTransaction { + statements, + has_end_keyword, + .. + } => { + assert!(has_end_keyword); + assert_eq!(statements.len(), 2); + } + _ => panic!("Expected StartTransaction, got: {stmt:?}"), + } + + // BEGIN TRANSACTION still works + let sql = "BEGIN TRANSACTION"; + let stmt = ms().verified_stmt(sql); + match &stmt { + Statement::StartTransaction { + begin, + has_end_keyword, + transaction, + .. + } => { + assert!(begin); + assert!(!has_end_keyword); + assert!(transaction.is_some()); + } + _ => panic!("Expected StartTransaction, got: {stmt:?}"), + } +} From 5e5c16c597ed6dd1d6539d4c0c701972b166921d Mon Sep 17 00:00:00 2001 From: Michael Victor Zink Date: Thu, 5 Feb 2026 07:40:58 -0800 Subject: [PATCH 055/121] MySQL: Add support for `SELECT` modifiers (#2172) --- src/ast/mod.rs | 17 +-- src/ast/query.rs | 119 ++++++++++++++++++++- src/ast/spans.rs | 3 +- src/dialect/mod.rs | 13 +++ src/dialect/mysql.rs | 4 + src/keywords.rs | 6 ++ src/parser/mod.rs | 110 ++++++++++++++++++-- tests/sqlparser_bigquery.rs | 2 + tests/sqlparser_clickhouse.rs | 5 +- tests/sqlparser_common.rs | 34 ++++-- tests/sqlparser_duckdb.rs | 2 + tests/sqlparser_mssql.rs | 3 + tests/sqlparser_mysql.rs | 189 ++++++++++++++++++++++++++++++++++ tests/sqlparser_postgres.rs | 3 + 14 files changed, 480 insertions(+), 30 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index ce5a67e128..1e62691630 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -97,14 +97,15 @@ pub use self::query::{ OffsetRows, OpenJsonTableColumn, OrderBy, OrderByExpr, OrderByKind, OrderByOptions, PipeOperator, PivotValueSource, ProjectionSelect, Query, RenameSelectItem, RepetitionQuantifier, ReplaceSelectElement, ReplaceSelectItem, RowsPerMatch, Select, - SelectFlavor, SelectInto, SelectItem, SelectItemQualifiedWildcardKind, SetExpr, SetOperator, - SetQuantifier, Setting, SymbolDefinition, Table, TableAlias, TableAliasColumnDef, TableFactor, - TableFunctionArgs, TableIndexHintForClause, TableIndexHintType, TableIndexHints, - TableIndexType, TableSample, TableSampleBucket, TableSampleKind, TableSampleMethod, - TableSampleModifier, TableSampleQuantity, TableSampleSeed, TableSampleSeedModifier, - TableSampleUnit, TableVersion, TableWithJoins, Top, TopQuantity, UpdateTableFromKind, - ValueTableMode, Values, WildcardAdditionalOptions, With, WithFill, XmlNamespaceDefinition, - XmlPassingArgument, XmlPassingClause, XmlTableColumn, XmlTableColumnOption, + SelectFlavor, SelectInto, SelectItem, SelectItemQualifiedWildcardKind, SelectModifiers, + SetExpr, SetOperator, SetQuantifier, Setting, SymbolDefinition, Table, TableAlias, + TableAliasColumnDef, TableFactor, TableFunctionArgs, TableIndexHintForClause, + TableIndexHintType, TableIndexHints, TableIndexType, TableSample, TableSampleBucket, + TableSampleKind, TableSampleMethod, TableSampleModifier, TableSampleQuantity, TableSampleSeed, + TableSampleSeedModifier, TableSampleUnit, TableVersion, TableWithJoins, Top, TopQuantity, + UpdateTableFromKind, ValueTableMode, Values, WildcardAdditionalOptions, With, WithFill, + XmlNamespaceDefinition, XmlPassingArgument, XmlPassingClause, XmlTableColumn, + XmlTableColumnOption, }; pub use self::trigger::{ diff --git a/src/ast/query.rs b/src/ast/query.rs index 08448cabef..bb2d889f28 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -334,6 +334,108 @@ pub enum SelectFlavor { FromFirstNoSelect, } +/// MySQL-specific SELECT modifiers that appear after the SELECT keyword. +/// +/// These modifiers affect query execution and optimization. They can appear in any order after +/// SELECT and before the column list, can be repeated, and can be interleaved with +/// DISTINCT/DISTINCTROW/ALL: +/// +/// ```sql +/// SELECT +/// [ALL | DISTINCT | DISTINCTROW] +/// [HIGH_PRIORITY] +/// [STRAIGHT_JOIN] +/// [SQL_SMALL_RESULT] [SQL_BIG_RESULT] [SQL_BUFFER_RESULT] +/// [SQL_NO_CACHE] [SQL_CALC_FOUND_ROWS] +/// select_expr [, select_expr] ... +/// ``` +/// +/// See [MySQL SELECT](https://dev.mysql.com/doc/refman/8.4/en/select.html). +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash, Default)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct SelectModifiers { + /// `HIGH_PRIORITY` gives the SELECT higher priority than statements that update a table. + /// + /// + pub high_priority: bool, + /// `STRAIGHT_JOIN` forces the optimizer to join tables in the order listed in the FROM clause. + /// + /// + pub straight_join: bool, + /// `SQL_SMALL_RESULT` hints that the result set is small, using in-memory temp tables. + /// + /// + pub sql_small_result: bool, + /// `SQL_BIG_RESULT` hints that the result set is large, using disk-based temp tables. + /// + /// + pub sql_big_result: bool, + /// `SQL_BUFFER_RESULT` forces the result to be put into a temporary table to release locks early. + /// + /// + pub sql_buffer_result: bool, + /// `SQL_NO_CACHE` tells MySQL not to cache the query result. (Deprecated in 8.4+.) + /// + /// + pub sql_no_cache: bool, + /// `SQL_CALC_FOUND_ROWS` tells MySQL to calculate the total number of rows. (Deprecated in 8.0.17+.) + /// + /// - [MySQL SELECT modifiers](https://dev.mysql.com/doc/refman/8.4/en/select.html) + /// - [`FOUND_ROWS()`](https://dev.mysql.com/doc/refman/8.4/en/information-functions.html#function_found-rows) + pub sql_calc_found_rows: bool, +} + +impl fmt::Display for SelectModifiers { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + if self.high_priority { + f.write_str(" HIGH_PRIORITY")?; + } + if self.straight_join { + f.write_str(" STRAIGHT_JOIN")?; + } + if self.sql_small_result { + f.write_str(" SQL_SMALL_RESULT")?; + } + if self.sql_big_result { + f.write_str(" SQL_BIG_RESULT")?; + } + if self.sql_buffer_result { + f.write_str(" SQL_BUFFER_RESULT")?; + } + if self.sql_no_cache { + f.write_str(" SQL_NO_CACHE")?; + } + if self.sql_calc_found_rows { + f.write_str(" SQL_CALC_FOUND_ROWS")?; + } + Ok(()) + } +} + +impl SelectModifiers { + /// Returns true if any of the modifiers are set. + pub fn is_any_set(&self) -> bool { + // Using irrefutable destructuring to catch fields added in the future + let Self { + high_priority, + straight_join, + sql_small_result, + sql_big_result, + sql_buffer_result, + sql_no_cache, + sql_calc_found_rows, + } = self; + *high_priority + || *straight_join + || *sql_small_result + || *sql_big_result + || *sql_buffer_result + || *sql_no_cache + || *sql_calc_found_rows + } +} + /// A restricted variant of `SELECT` (without CTEs/`ORDER BY`), which may /// appear either as the only body item of a `Query`, or as an operand /// to a set operation like `UNION`. @@ -350,6 +452,10 @@ pub struct Select { pub optimizer_hint: Option, /// `SELECT [DISTINCT] ...` pub distinct: Option, + /// MySQL-specific SELECT modifiers. + /// + /// See [MySQL SELECT](https://dev.mysql.com/doc/refman/8.4/en/select.html). + pub select_modifiers: Option, /// MSSQL syntax: `TOP () [ PERCENT ] [ WITH TIES ]` pub top: Option, /// Whether the top was located before `ALL`/`DISTINCT` @@ -442,6 +548,10 @@ impl fmt::Display for Select { } } + if let Some(ref select_modifiers) = self.select_modifiers { + select_modifiers.fmt(f)?; + } + if !self.projection.is_empty() { indented_list(f, &self.projection)?; } @@ -3351,8 +3461,14 @@ impl fmt::Display for NonBlock { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] -/// `DISTINCT` or `DISTINCT ON (...)` modifiers for `SELECT` lists. +/// `ALL`, `DISTINCT`, or `DISTINCT ON (...)` modifiers for `SELECT` lists. pub enum Distinct { + /// `ALL` (keep duplicate rows) + /// + /// Generally this is the default if omitted, but omission should be represented as + /// `None::>` + All, + /// `DISTINCT` (remove duplicate rows) Distinct, @@ -3363,6 +3479,7 @@ pub enum Distinct { impl fmt::Display for Distinct { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { + Distinct::All => write!(f, "ALL"), Distinct::Distinct => write!(f, "DISTINCT"), Distinct::On(col_names) => { let col_names = display_comma_separated(col_names); diff --git a/src/ast/spans.rs b/src/ast/spans.rs index 60c983fa1e..ffc96ed3e7 100644 --- a/src/ast/spans.rs +++ b/src/ast/spans.rs @@ -2238,7 +2238,8 @@ impl Spanned for Select { select_token, optimizer_hint: _, distinct: _, // todo - top: _, // todo, mysql specific + select_modifiers: _, + top: _, // todo, mysql specific projection, exclude: _, into, diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index 477d60f832..f23eb387ce 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -698,6 +698,19 @@ pub trait Dialect: Debug + Any { false } + /// Returns true if the dialect supports MySQL-specific SELECT modifiers + /// like `HIGH_PRIORITY`, `STRAIGHT_JOIN`, `SQL_SMALL_RESULT`, etc. + /// + /// For example: + /// ```sql + /// SELECT HIGH_PRIORITY STRAIGHT_JOIN SQL_SMALL_RESULT * FROM t1 JOIN t2 ON ... + /// ``` + /// + /// [MySQL](https://dev.mysql.com/doc/refman/8.4/en/select.html) + fn supports_select_modifiers(&self) -> bool { + false + } + /// Dialect-specific infix parser override /// /// This method is called to parse the next infix expression. diff --git a/src/dialect/mysql.rs b/src/dialect/mysql.rs index ad3ba6f3af..e1a6841711 100644 --- a/src/dialect/mysql.rs +++ b/src/dialect/mysql.rs @@ -156,6 +156,10 @@ impl Dialect for MySqlDialect { true } + fn supports_select_modifiers(&self) -> bool { + true + } + fn supports_set_names(&self) -> bool { true } diff --git a/src/keywords.rs b/src/keywords.rs index 964e4b388d..2e26bda8ec 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -333,6 +333,7 @@ define_keywords!( DISCARD, DISCONNECT, DISTINCT, + DISTINCTROW, DISTRIBUTE, DIV, DO, @@ -956,6 +957,11 @@ define_keywords!( SQLEXCEPTION, SQLSTATE, SQLWARNING, + SQL_BIG_RESULT, + SQL_BUFFER_RESULT, + SQL_CALC_FOUND_ROWS, + SQL_NO_CACHE, + SQL_SMALL_RESULT, SQRT, SRID, STABLE, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 5fa224f970..d9a5c5188b 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -4927,16 +4927,27 @@ impl<'a> Parser<'a> { /// and results in a [`ParserError`] if both `ALL` and `DISTINCT` are found. pub fn parse_all_or_distinct(&mut self) -> Result, ParserError> { let loc = self.peek_token().span.start; - let all = self.parse_keyword(Keyword::ALL); - let distinct = self.parse_keyword(Keyword::DISTINCT); - if !distinct { - return Ok(None); - } - if all { - return parser_err!("Cannot specify both ALL and DISTINCT".to_string(), loc); - } - let on = self.parse_keyword(Keyword::ON); - if !on { + let distinct = match self.parse_one_of_keywords(&[Keyword::ALL, Keyword::DISTINCT]) { + Some(Keyword::ALL) => { + if self.peek_keyword(Keyword::DISTINCT) { + return parser_err!("Cannot specify ALL then DISTINCT".to_string(), loc); + } + Some(Distinct::All) + } + Some(Keyword::DISTINCT) => { + if self.peek_keyword(Keyword::ALL) { + return parser_err!("Cannot specify DISTINCT then ALL".to_string(), loc); + } + Some(Distinct::Distinct) + } + None => return Ok(None), + _ => return parser_err!("ALL or DISTINCT", loc), + }; + + let Some(Distinct::Distinct) = distinct else { + return Ok(distinct); + }; + if !self.parse_keyword(Keyword::ON) { return Ok(Some(Distinct::Distinct)); } @@ -13861,6 +13872,7 @@ impl<'a> Parser<'a> { select_token: AttachedToken(from_token), optimizer_hint: None, distinct: None, + select_modifiers: None, top: None, top_before_distinct: false, projection: vec![], @@ -13890,13 +13902,26 @@ impl<'a> Parser<'a> { let optimizer_hint = self.maybe_parse_optimizer_hint()?; let value_table_mode = self.parse_value_table_mode()?; + let (select_modifiers, distinct_select_modifier) = + if self.dialect.supports_select_modifiers() { + self.parse_select_modifiers()? + } else { + (None, None) + }; + let mut top_before_distinct = false; let mut top = None; if self.dialect.supports_top_before_distinct() && self.parse_keyword(Keyword::TOP) { top = Some(self.parse_top()?); top_before_distinct = true; } - let distinct = self.parse_all_or_distinct()?; + + let distinct = if distinct_select_modifier.is_some() { + distinct_select_modifier + } else { + self.parse_all_or_distinct()? + }; + if !self.dialect.supports_top_before_distinct() && self.parse_keyword(Keyword::TOP) { top = Some(self.parse_top()?); } @@ -14044,6 +14069,7 @@ impl<'a> Parser<'a> { select_token: AttachedToken(select_token), optimizer_hint, distinct, + select_modifiers, top, top_before_distinct, projection, @@ -14120,6 +14146,68 @@ impl<'a> Parser<'a> { } } + /// Parses MySQL SELECT modifiers and DISTINCT/ALL in any order. + /// + /// Manual testing shows odifiers can appear in any order, and modifiers other than DISTINCT/ALL + /// can be repeated. + /// + /// + fn parse_select_modifiers( + &mut self, + ) -> Result<(Option, Option), ParserError> { + let mut modifiers = SelectModifiers::default(); + let mut distinct = None; + + let keywords = &[ + Keyword::ALL, + Keyword::DISTINCT, + Keyword::DISTINCTROW, + Keyword::HIGH_PRIORITY, + Keyword::STRAIGHT_JOIN, + Keyword::SQL_SMALL_RESULT, + Keyword::SQL_BIG_RESULT, + Keyword::SQL_BUFFER_RESULT, + Keyword::SQL_NO_CACHE, + Keyword::SQL_CALC_FOUND_ROWS, + ]; + + while let Some(keyword) = self.parse_one_of_keywords(keywords) { + match keyword { + Keyword::ALL | Keyword::DISTINCT if distinct.is_none() => { + self.prev_token(); + distinct = self.parse_all_or_distinct()?; + } + // DISTINCTROW is a MySQL-specific legacy (but not deprecated) alias for DISTINCT + Keyword::DISTINCTROW if distinct.is_none() => { + distinct = Some(Distinct::Distinct); + } + Keyword::HIGH_PRIORITY => modifiers.high_priority = true, + Keyword::STRAIGHT_JOIN => modifiers.straight_join = true, + Keyword::SQL_SMALL_RESULT => modifiers.sql_small_result = true, + Keyword::SQL_BIG_RESULT => modifiers.sql_big_result = true, + Keyword::SQL_BUFFER_RESULT => modifiers.sql_buffer_result = true, + Keyword::SQL_NO_CACHE => modifiers.sql_no_cache = true, + Keyword::SQL_CALC_FOUND_ROWS => modifiers.sql_calc_found_rows = true, + _ => { + self.prev_token(); + return self.expected( + "HIGH_PRIORITY, STRAIGHT_JOIN, or other MySQL select modifier", + self.peek_token(), + ); + } + } + } + + // Avoid polluting the AST with `Some(SelectModifiers::default())` empty value unless there + // actually were some modifiers set. + let select_modifiers = if modifiers.is_any_set() { + Some(modifiers) + } else { + None + }; + Ok((select_modifiers, distinct)) + } + fn parse_value_table_mode(&mut self) -> Result, ParserError> { if !dialect_of!(self is BigQueryDialect) { return Ok(None); diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index fb28b4d217..d5ad63735f 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -2683,6 +2683,7 @@ fn test_export_data() { )), optimizer_hint: None, distinct: None, + select_modifiers: None, top: None, top_before_distinct: false, projection: vec![ @@ -2788,6 +2789,7 @@ fn test_export_data() { )), optimizer_hint: None, distinct: None, + select_modifiers: None, top: None, top_before_distinct: false, projection: vec![ diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index ac31a27835..db83291179 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -40,9 +40,10 @@ fn parse_map_access_expr() { let select = clickhouse().verified_only_select(sql); assert_eq!( Select { - distinct: None, - optimizer_hint: None, select_token: AttachedToken::empty(), + optimizer_hint: None, + distinct: None, + select_modifiers: None, top: None, top_before_distinct: false, projection: vec![UnnamedExpr(Expr::CompoundFieldAccess { diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index e6a48c7b37..b442ec428a 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -473,6 +473,7 @@ fn parse_update_set_from() { select_token: AttachedToken::empty(), optimizer_hint: None, distinct: None, + select_modifiers: None, top: None, top_before_distinct: false, projection: vec![ @@ -1042,18 +1043,18 @@ fn parse_outer_join_operator() { #[test] fn parse_select_distinct_on() { let sql = "SELECT DISTINCT ON (album_id) name FROM track ORDER BY album_id, milliseconds"; - let select = verified_only_select(sql); + let select = all_dialects_except(|d| d.is::()).verified_only_select(sql); assert_eq!( &Some(Distinct::On(vec![Expr::Identifier(Ident::new("album_id"))])), &select.distinct ); let sql = "SELECT DISTINCT ON () name FROM track ORDER BY milliseconds"; - let select = verified_only_select(sql); + let select = all_dialects_except(|d| d.is::()).verified_only_select(sql); assert_eq!(&Some(Distinct::On(vec![])), &select.distinct); let sql = "SELECT DISTINCT ON (album_id, milliseconds) name FROM track"; - let select = verified_only_select(sql); + let select = all_dialects_except(|d| d.is::()).verified_only_select(sql); assert_eq!( &Some(Distinct::On(vec![ Expr::Identifier(Ident::new("album_id")), @@ -1074,14 +1075,24 @@ fn parse_select_distinct_missing_paren() { #[test] fn parse_select_all() { - one_statement_parses_to("SELECT ALL name FROM customer", "SELECT name FROM customer"); + verified_stmt("SELECT ALL name FROM customer"); } #[test] fn parse_select_all_distinct() { let result = parse_sql_statements("SELECT ALL DISTINCT name FROM customer"); assert_eq!( - ParserError::ParserError("Cannot specify both ALL and DISTINCT".to_string()), + ParserError::ParserError("Cannot specify ALL then DISTINCT".to_string()), + result.unwrap_err(), + ); + let result = parse_sql_statements("SELECT DISTINCT ALL name FROM customer"); + assert_eq!( + ParserError::ParserError("Cannot specify DISTINCT then ALL".to_string()), + result.unwrap_err(), + ); + let result = parse_sql_statements("SELECT ALL DISTINCT ON(name) name FROM customer"); + assert_eq!( + ParserError::ParserError("Cannot specify ALL then DISTINCT".to_string()), result.unwrap_err(), ); } @@ -5809,6 +5820,7 @@ fn test_parse_named_window() { select_token: AttachedToken::empty(), optimizer_hint: None, distinct: None, + select_modifiers: None, top: None, top_before_distinct: false, projection: vec![ @@ -6540,6 +6552,7 @@ fn parse_interval_and_or_xor() { select_token: AttachedToken::empty(), optimizer_hint: None, distinct: None, + select_modifiers: None, top: None, top_before_distinct: false, projection: vec![UnnamedExpr(Expr::Identifier(Ident { @@ -8917,6 +8930,7 @@ fn lateral_function() { select_token: AttachedToken::empty(), optimizer_hint: None, distinct: None, + select_modifiers: None, top: None, projection: vec![SelectItem::Wildcard(WildcardAdditionalOptions::default())], exclude: None, @@ -9919,6 +9933,7 @@ fn parse_merge() { select_token: AttachedToken::empty(), optimizer_hint: None, distinct: None, + select_modifiers: None, top: None, top_before_distinct: false, projection: vec![SelectItem::Wildcard( @@ -12323,6 +12338,7 @@ fn parse_unload() { select_token: AttachedToken::empty(), optimizer_hint: None, distinct: None, + select_modifiers: None, top: None, top_before_distinct: false, projection: vec![UnnamedExpr(Expr::Identifier(Ident::new("cola"))),], @@ -12632,6 +12648,7 @@ fn parse_connect_by() { select_token: AttachedToken::empty(), optimizer_hint: None, distinct: None, + select_modifiers: None, top: None, top_before_distinct: false, projection: vec![ @@ -12715,6 +12732,7 @@ fn parse_connect_by() { select_token: AttachedToken::empty(), optimizer_hint: None, distinct: None, + select_modifiers: None, top: None, top_before_distinct: false, projection: vec![ @@ -13649,6 +13667,7 @@ fn test_extract_seconds_ok() { select_token: AttachedToken::empty(), optimizer_hint: None, distinct: None, + select_modifiers: None, top: None, top_before_distinct: false, projection: vec![UnnamedExpr(Expr::Extract { @@ -14830,9 +14849,9 @@ fn test_load_extension() { #[test] fn test_select_top() { let dialects = all_dialects_where(|d| d.supports_top_before_distinct()); - dialects.one_statement_parses_to("SELECT ALL * FROM tbl", "SELECT * FROM tbl"); + dialects.verified_stmt("SELECT ALL * FROM tbl"); dialects.verified_stmt("SELECT TOP 3 * FROM tbl"); - dialects.one_statement_parses_to("SELECT TOP 3 ALL * FROM tbl", "SELECT TOP 3 * FROM tbl"); + dialects.verified_stmt("SELECT TOP 3 ALL * FROM tbl"); dialects.verified_stmt("SELECT TOP 3 DISTINCT * FROM tbl"); dialects.verified_stmt("SELECT TOP 3 DISTINCT a, b, c FROM tbl"); } @@ -15789,6 +15808,7 @@ fn test_select_from_first() { select_token: AttachedToken::empty(), optimizer_hint: None, distinct: None, + select_modifiers: None, top: None, projection, exclude: None, diff --git a/tests/sqlparser_duckdb.rs b/tests/sqlparser_duckdb.rs index 7cc710de28..e408d997b3 100644 --- a/tests/sqlparser_duckdb.rs +++ b/tests/sqlparser_duckdb.rs @@ -268,6 +268,7 @@ fn test_select_union_by_name() { select_token: AttachedToken::empty(), optimizer_hint: None, distinct: None, + select_modifiers: None, top: None, projection: vec![SelectItem::Wildcard(WildcardAdditionalOptions::default())], exclude: None, @@ -300,6 +301,7 @@ fn test_select_union_by_name() { select_token: AttachedToken::empty(), optimizer_hint: None, distinct: None, + select_modifiers: None, top: None, projection: vec![SelectItem::Wildcard(WildcardAdditionalOptions::default())], exclude: None, diff --git a/tests/sqlparser_mssql.rs b/tests/sqlparser_mssql.rs index d770008847..7f60934271 100644 --- a/tests/sqlparser_mssql.rs +++ b/tests/sqlparser_mssql.rs @@ -143,6 +143,7 @@ fn parse_create_procedure() { select_token: AttachedToken::empty(), optimizer_hint: None, distinct: None, + select_modifiers: None, top: None, top_before_distinct: false, projection: vec![SelectItem::UnnamedExpr(Expr::Value( @@ -1351,6 +1352,7 @@ fn parse_substring_in_select() { select_token: AttachedToken::empty(), optimizer_hint: None, distinct: Some(Distinct::Distinct), + select_modifiers: None, top: None, top_before_distinct: false, projection: vec![SelectItem::UnnamedExpr(Expr::Substring { @@ -1509,6 +1511,7 @@ fn parse_mssql_declare() { select_token: AttachedToken::empty(), optimizer_hint: None, distinct: None, + select_modifiers: None, top: None, top_before_distinct: false, projection: vec![SelectItem::UnnamedExpr(Expr::BinaryOp { diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index 2c942798c8..b719f2efbd 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -1437,6 +1437,7 @@ fn parse_escaped_quote_identifiers_with_escape() { select_token: AttachedToken::empty(), optimizer_hint: None, distinct: None, + select_modifiers: None, top: None, top_before_distinct: false, projection: vec![SelectItem::UnnamedExpr(Expr::Identifier(Ident { @@ -1493,6 +1494,7 @@ fn parse_escaped_quote_identifiers_with_no_escape() { select_token: AttachedToken::empty(), optimizer_hint: None, distinct: None, + select_modifiers: None, top: None, top_before_distinct: false, projection: vec![SelectItem::UnnamedExpr(Expr::Identifier(Ident { @@ -1541,6 +1543,7 @@ fn parse_escaped_backticks_with_escape() { select_token: AttachedToken::empty(), optimizer_hint: None, distinct: None, + select_modifiers: None, top: None, top_before_distinct: false, projection: vec![SelectItem::UnnamedExpr(Expr::Identifier(Ident { @@ -1593,6 +1596,7 @@ fn parse_escaped_backticks_with_no_escape() { select_token: AttachedToken::empty(), optimizer_hint: None, distinct: None, + select_modifiers: None, top: None, top_before_distinct: false, projection: vec![SelectItem::UnnamedExpr(Expr::Identifier(Ident { @@ -2413,6 +2417,7 @@ fn parse_select_with_numeric_prefix_column_name() { select_token: AttachedToken::empty(), optimizer_hint: None, distinct: None, + select_modifiers: None, top: None, top_before_distinct: false, projection: vec![SelectItem::UnnamedExpr(Expr::Identifier(Ident::new( @@ -2588,6 +2593,7 @@ fn parse_select_with_concatenation_of_exp_number_and_numeric_prefix_column() { select_token: AttachedToken::empty(), optimizer_hint: None, distinct: None, + select_modifiers: None, top: None, top_before_distinct: false, projection: vec![ @@ -3222,6 +3228,7 @@ fn parse_substring_in_select() { select_token: AttachedToken::empty(), optimizer_hint: None, distinct: Some(Distinct::Distinct), + select_modifiers: None, top: None, top_before_distinct: false, projection: vec![SelectItem::UnnamedExpr(Expr::Substring { @@ -3546,6 +3553,7 @@ fn parse_hex_string_introducer() { select_token: AttachedToken::empty(), optimizer_hint: None, distinct: None, + select_modifiers: None, top: None, top_before_distinct: false, projection: vec![SelectItem::UnnamedExpr(Expr::Prefixed { @@ -4272,6 +4280,187 @@ fn parse_straight_join() { .verified_stmt("SELECT a.*, b.* FROM table_a STRAIGHT_JOIN table_b AS b ON a.b_id = b.id"); } +#[test] +fn parse_distinctrow_to_distinct() { + mysql().one_statement_parses_to( + "SELECT DISTINCTROW * FROM employees", + "SELECT DISTINCT * FROM employees", + ); + mysql().one_statement_parses_to( + "SELECT HIGH_PRIORITY DISTINCTROW * FROM employees", + "SELECT DISTINCT HIGH_PRIORITY * FROM employees", + ); +} + +#[test] +fn parse_select_straight_join() { + let select = mysql().verified_only_select( + "SELECT STRAIGHT_JOIN * FROM employees e JOIN dept_emp d ON e.emp_no = d.emp_no WHERE d.emp_no = 10001", + ); + assert!(select.select_modifiers.unwrap().straight_join); + + mysql().verified_stmt( + "SELECT STRAIGHT_JOIN e.emp_no, d.dept_no FROM employees e JOIN dept_emp d ON e.emp_no = d.emp_no", + ); + mysql().verified_stmt("SELECT DISTINCT STRAIGHT_JOIN emp_no FROM employees"); + + let select = mysql().verified_only_select("SELECT * FROM employees"); + assert!(select.select_modifiers.is_none()); +} + +#[test] +fn parse_select_modifiers() { + let select = mysql().verified_only_select("SELECT HIGH_PRIORITY * FROM employees"); + assert!(select.select_modifiers.as_ref().unwrap().high_priority); + assert!(!select.select_modifiers.unwrap().straight_join); + + let select = mysql().verified_only_select("SELECT SQL_SMALL_RESULT * FROM employees"); + assert!(select.select_modifiers.unwrap().sql_small_result); + + let select = mysql().verified_only_select("SELECT SQL_BIG_RESULT * FROM employees"); + assert!(select.select_modifiers.unwrap().sql_big_result); + + let select = mysql().verified_only_select("SELECT SQL_BUFFER_RESULT * FROM employees"); + assert!(select.select_modifiers.unwrap().sql_buffer_result); + + let select = mysql().verified_only_select("SELECT SQL_NO_CACHE * FROM employees"); + assert!(select.select_modifiers.unwrap().sql_no_cache); + + let select = mysql().verified_only_select("SELECT SQL_CALC_FOUND_ROWS * FROM employees"); + assert!(select.select_modifiers.unwrap().sql_calc_found_rows); + + let select = mysql().verified_only_select( + "SELECT HIGH_PRIORITY STRAIGHT_JOIN SQL_SMALL_RESULT SQL_BIG_RESULT SQL_BUFFER_RESULT SQL_NO_CACHE SQL_CALC_FOUND_ROWS * FROM employees", + ); + assert!(select.select_modifiers.as_ref().unwrap().high_priority); + assert!(select.select_modifiers.as_ref().unwrap().straight_join); + assert!(select.select_modifiers.as_ref().unwrap().sql_small_result); + assert!(select.select_modifiers.as_ref().unwrap().sql_big_result); + assert!(select.select_modifiers.as_ref().unwrap().sql_buffer_result); + assert!(select.select_modifiers.as_ref().unwrap().sql_no_cache); + assert!(select.select_modifiers.unwrap().sql_calc_found_rows); + + mysql().verified_stmt("SELECT DISTINCT HIGH_PRIORITY emp_no FROM employees"); + mysql().verified_stmt("SELECT DISTINCT SQL_CALC_FOUND_ROWS emp_no FROM employees"); + mysql().verified_stmt("SELECT HIGH_PRIORITY STRAIGHT_JOIN e.emp_no, d.dept_no FROM employees e JOIN dept_emp d ON e.emp_no = d.emp_no"); +} + +#[test] +fn parse_select_modifiers_any_order() { + mysql().one_statement_parses_to( + "SELECT HIGH_PRIORITY DISTINCT * FROM employees", + "SELECT DISTINCT HIGH_PRIORITY * FROM employees", + ); + mysql().one_statement_parses_to( + "SELECT SQL_CALC_FOUND_ROWS DISTINCT HIGH_PRIORITY * FROM employees", + "SELECT DISTINCT HIGH_PRIORITY SQL_CALC_FOUND_ROWS * FROM employees", + ); + mysql().one_statement_parses_to( + "SELECT HIGH_PRIORITY DISTINCT SQL_SMALL_RESULT * FROM employees", + "SELECT DISTINCT HIGH_PRIORITY SQL_SMALL_RESULT * FROM employees", + ); + + mysql().one_statement_parses_to( + "SELECT HIGH_PRIORITY DISTINCTROW * FROM employees", + "SELECT DISTINCT HIGH_PRIORITY * FROM employees", + ); + + mysql().verified_stmt("SELECT ALL * FROM employees"); + mysql().verified_stmt("SELECT ALL HIGH_PRIORITY * FROM employees"); + mysql().one_statement_parses_to( + "SELECT HIGH_PRIORITY ALL * FROM employees", + "SELECT ALL HIGH_PRIORITY * FROM employees", + ); + + let select = mysql().verified_only_select_with_canonical( + "SELECT HIGH_PRIORITY DISTINCT * FROM employees", + "SELECT DISTINCT HIGH_PRIORITY * FROM employees", + ); + assert!(select.select_modifiers.unwrap().high_priority); + assert!(matches!(select.distinct, Some(Distinct::Distinct))); + + let select = mysql().verified_only_select_with_canonical( + "SELECT SQL_CALC_FOUND_ROWS ALL HIGH_PRIORITY * FROM employees", + "SELECT ALL HIGH_PRIORITY SQL_CALC_FOUND_ROWS * FROM employees", + ); + assert!(select.select_modifiers.as_ref().unwrap().high_priority); + assert!(select.select_modifiers.unwrap().sql_calc_found_rows); + assert_eq!(select.distinct, Some(Distinct::All)) +} + +#[test] +fn parse_select_modifiers_can_be_repeated() { + mysql().one_statement_parses_to( + "SELECT HIGH_PRIORITY HIGH_PRIORITY * FROM employees", + "SELECT HIGH_PRIORITY * FROM employees", + ); + mysql().one_statement_parses_to( + "SELECT SQL_CALC_FOUND_ROWS SQL_CALC_FOUND_ROWS * FROM employees", + "SELECT SQL_CALC_FOUND_ROWS * FROM employees", + ); + mysql().one_statement_parses_to( + "SELECT STRAIGHT_JOIN STRAIGHT_JOIN * FROM employees", + "SELECT STRAIGHT_JOIN * FROM employees", + ); + mysql().one_statement_parses_to( + "SELECT SQL_NO_CACHE SQL_NO_CACHE * FROM employees", + "SELECT SQL_NO_CACHE * FROM employees", + ); + mysql().one_statement_parses_to( + "SELECT HIGH_PRIORITY DISTINCT HIGH_PRIORITY * FROM employees", + "SELECT DISTINCT HIGH_PRIORITY * FROM employees", + ); + mysql().one_statement_parses_to( + "SELECT SQL_CALC_FOUND_ROWS DISTINCT SQL_CALC_FOUND_ROWS * FROM employees", + "SELECT DISTINCT SQL_CALC_FOUND_ROWS * FROM employees", + ); +} + +#[test] +fn parse_select_modifiers_canonical_ordering() { + mysql().one_statement_parses_to( + "SELECT SQL_CALC_FOUND_ROWS SQL_NO_CACHE SQL_BUFFER_RESULT SQL_BIG_RESULT SQL_SMALL_RESULT STRAIGHT_JOIN HIGH_PRIORITY * FROM employees", + "SELECT HIGH_PRIORITY STRAIGHT_JOIN SQL_SMALL_RESULT SQL_BIG_RESULT SQL_BUFFER_RESULT SQL_NO_CACHE SQL_CALC_FOUND_ROWS * FROM employees", + ); + mysql().one_statement_parses_to( + "SELECT SQL_NO_CACHE DISTINCT SQL_CALC_FOUND_ROWS * FROM employees", + "SELECT DISTINCT SQL_NO_CACHE SQL_CALC_FOUND_ROWS * FROM employees", + ); + mysql().one_statement_parses_to( + "SELECT HIGH_PRIORITY STRAIGHT_JOIN DISTINCT SQL_SMALL_RESULT * FROM employees", + "SELECT DISTINCT HIGH_PRIORITY STRAIGHT_JOIN SQL_SMALL_RESULT * FROM employees", + ); + mysql().one_statement_parses_to( + "SELECT HIGH_PRIORITY ALL STRAIGHT_JOIN * FROM employees", + "SELECT ALL HIGH_PRIORITY STRAIGHT_JOIN * FROM employees", + ); +} + +#[test] +fn parse_select_modifiers_errors() { + assert!(mysql() + .parse_sql_statements("SELECT DISTINCT DISTINCT * FROM t") + .is_err()); + assert!(mysql() + .parse_sql_statements("SELECT DISTINCTROW DISTINCTROW * FROM t") + .is_err()); + assert!(mysql() + .parse_sql_statements("SELECT DISTINCT DISTINCTROW * FROM t") + .is_err()); + assert!(mysql() + .parse_sql_statements("SELECT ALL DISTINCT * FROM t") + .is_err()); + assert!(mysql() + .parse_sql_statements("SELECT DISTINCT ALL * FROM t") + .is_err()); + assert!(mysql() + .parse_sql_statements("SELECT ALL DISTINCTROW * FROM t") + .is_err()); + assert!(mysql() + .parse_sql_statements("SELECT ALL ALL * FROM t") + .is_err()); +} + #[test] fn mysql_foreign_key_with_index_name() { mysql().verified_stmt( diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 1f91bffd90..4ce8ed8b45 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -1293,6 +1293,7 @@ fn parse_copy_to() { select_token: AttachedToken::empty(), optimizer_hint: None, distinct: None, + select_modifiers: None, top: None, top_before_distinct: false, projection: vec![ @@ -3074,6 +3075,7 @@ fn parse_array_subquery_expr() { select_token: AttachedToken::empty(), optimizer_hint: None, distinct: None, + select_modifiers: None, top: None, top_before_distinct: false, projection: vec![SelectItem::UnnamedExpr(Expr::Value( @@ -3101,6 +3103,7 @@ fn parse_array_subquery_expr() { select_token: AttachedToken::empty(), optimizer_hint: None, distinct: None, + select_modifiers: None, top: None, top_before_distinct: false, projection: vec![SelectItem::UnnamedExpr(Expr::Value( From 0c19e088bf621397458102fde83cbf1020925be5 Mon Sep 17 00:00:00 2001 From: Michael Victor Zink Date: Thu, 5 Feb 2026 08:02:40 -0800 Subject: [PATCH 056/121] MySQL: Add support for DEFAULT CHARACTER SET in CREATE DATABASE (#2182) --- src/ast/helpers/stmt_create_database.rs | 28 ++++++++ src/ast/mod.rs | 14 ++++ src/parser/mod.rs | 30 ++++++++ tests/sqlparser_mysql.rs | 93 +++++++++++++++++++++++++ 4 files changed, 165 insertions(+) diff --git a/src/ast/helpers/stmt_create_database.rs b/src/ast/helpers/stmt_create_database.rs index c718dbce19..e524228dee 100644 --- a/src/ast/helpers/stmt_create_database.rs +++ b/src/ast/helpers/stmt_create_database.rs @@ -85,6 +85,14 @@ pub struct CreateDatabaseBuilder { pub storage_serialization_policy: Option, /// Optional comment attached to the database. pub comment: Option, + /// Optional default character set (MySQL). + /// + /// + pub default_charset: Option, + /// Optional default collation (MySQL). + /// + /// + pub default_collation: Option, /// Optional catalog sync configuration. pub catalog_sync: Option, /// Optional catalog sync namespace mode. @@ -120,6 +128,8 @@ impl CreateDatabaseBuilder { default_ddl_collation: None, storage_serialization_policy: None, comment: None, + default_charset: None, + default_collation: None, catalog_sync: None, catalog_sync_namespace_mode: None, catalog_sync_namespace_flatten_delimiter: None, @@ -218,6 +228,18 @@ impl CreateDatabaseBuilder { self } + /// Set the default character set for the database. + pub fn default_charset(mut self, default_charset: Option) -> Self { + self.default_charset = default_charset; + self + } + + /// Set the default collation for the database. + pub fn default_collation(mut self, default_collation: Option) -> Self { + self.default_collation = default_collation; + self + } + /// Set the catalog sync for the database. pub fn catalog_sync(mut self, catalog_sync: Option) -> Self { self.catalog_sync = catalog_sync; @@ -272,6 +294,8 @@ impl CreateDatabaseBuilder { default_ddl_collation: self.default_ddl_collation, storage_serialization_policy: self.storage_serialization_policy, comment: self.comment, + default_charset: self.default_charset, + default_collation: self.default_collation, catalog_sync: self.catalog_sync, catalog_sync_namespace_mode: self.catalog_sync_namespace_mode, catalog_sync_namespace_flatten_delimiter: self.catalog_sync_namespace_flatten_delimiter, @@ -302,6 +326,8 @@ impl TryFrom for CreateDatabaseBuilder { default_ddl_collation, storage_serialization_policy, comment, + default_charset, + default_collation, catalog_sync, catalog_sync_namespace_mode, catalog_sync_namespace_flatten_delimiter, @@ -323,6 +349,8 @@ impl TryFrom for CreateDatabaseBuilder { default_ddl_collation, storage_serialization_policy, comment, + default_charset, + default_collation, catalog_sync, catalog_sync_namespace_mode, catalog_sync_namespace_flatten_delimiter, diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 1e62691630..a26c14ef7b 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -4227,6 +4227,10 @@ pub enum Statement { storage_serialization_policy: Option, /// Optional comment. comment: Option, + /// Optional default character set (MySQL). + default_charset: Option, + /// Optional default collation (MySQL). + default_collation: Option, /// Optional catalog sync identifier. catalog_sync: Option, /// Catalog sync namespace mode. @@ -5081,6 +5085,8 @@ impl fmt::Display for Statement { default_ddl_collation, storage_serialization_policy, comment, + default_charset, + default_collation, catalog_sync, catalog_sync_namespace_mode, catalog_sync_namespace_flatten_delimiter, @@ -5140,6 +5146,14 @@ impl fmt::Display for Statement { write!(f, " COMMENT = '{comment}'")?; } + if let Some(charset) = default_charset { + write!(f, " DEFAULT CHARACTER SET {charset}")?; + } + + if let Some(collation) = default_collation { + write!(f, " DEFAULT COLLATE {collation}")?; + } + if let Some(sync) = catalog_sync { write!(f, " CATALOG_SYNC = '{sync}'")?; } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index d9a5c5188b..bddafd1e3c 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -5341,6 +5341,34 @@ impl<'a> Parser<'a> { None }; + // Parse MySQL-style [DEFAULT] CHARACTER SET and [DEFAULT] COLLATE options + // + // Note: The docs only mention `CHARACTER SET`, but `CHARSET` is also supported. + // Furthermore, MySQL will only accept one character set, raising an error if there is more + // than one, but will accept multiple collations and use the last one. + // + // + let mut default_charset = None; + let mut default_collation = None; + loop { + let has_default = self.parse_keyword(Keyword::DEFAULT); + if default_charset.is_none() && self.parse_keywords(&[Keyword::CHARACTER, Keyword::SET]) + || self.parse_keyword(Keyword::CHARSET) + { + let _ = self.consume_token(&Token::Eq); + default_charset = Some(self.parse_identifier()?.value); + } else if self.parse_keyword(Keyword::COLLATE) { + let _ = self.consume_token(&Token::Eq); + default_collation = Some(self.parse_identifier()?.value); + } else if has_default { + // DEFAULT keyword not followed by CHARACTER SET, CHARSET, or COLLATE + self.prev_token(); + break; + } else { + break; + } + } + Ok(Statement::CreateDatabase { db_name, if_not_exists: ine, @@ -5357,6 +5385,8 @@ impl<'a> Parser<'a> { default_ddl_collation: None, storage_serialization_policy: None, comment: None, + default_charset, + default_collation, catalog_sync: None, catalog_sync_namespace_mode: None, catalog_sync_namespace_flatten_delimiter: None, diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index b719f2efbd..d1e718f4b8 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -4621,3 +4621,96 @@ fn test_optimizer_hints() { DELETE /*+ foobar */ FROM table_name", ); } + +#[test] +fn parse_create_database_with_charset() { + // Test DEFAULT CHARACTER SET with = sign + mysql_and_generic().verified_stmt("CREATE DATABASE mydb DEFAULT CHARACTER SET utf8mb4"); + + // Test DEFAULT CHARACTER SET without = sign (normalized form) + mysql_and_generic().one_statement_parses_to( + "CREATE DATABASE mydb DEFAULT CHARACTER SET = utf8mb4", + "CREATE DATABASE mydb DEFAULT CHARACTER SET utf8mb4", + ); + + // Test CHARACTER SET without DEFAULT + mysql_and_generic().one_statement_parses_to( + "CREATE DATABASE mydb CHARACTER SET utf8mb4", + "CREATE DATABASE mydb DEFAULT CHARACTER SET utf8mb4", + ); + + // Test CHARSET shorthand + mysql_and_generic().one_statement_parses_to( + "CREATE DATABASE mydb CHARSET utf8mb4", + "CREATE DATABASE mydb DEFAULT CHARACTER SET utf8mb4", + ); + + // Test DEFAULT CHARSET shorthand + mysql_and_generic().one_statement_parses_to( + "CREATE DATABASE mydb DEFAULT CHARSET utf8mb4", + "CREATE DATABASE mydb DEFAULT CHARACTER SET utf8mb4", + ); + + // Test DEFAULT COLLATE + mysql_and_generic().verified_stmt("CREATE DATABASE mydb DEFAULT COLLATE utf8mb4_unicode_ci"); + + // Test COLLATE without DEFAULT + mysql_and_generic().one_statement_parses_to( + "CREATE DATABASE mydb COLLATE utf8mb4_unicode_ci", + "CREATE DATABASE mydb DEFAULT COLLATE utf8mb4_unicode_ci", + ); + + // Test both CHARACTER SET and COLLATE together + mysql_and_generic().verified_stmt( + "CREATE DATABASE mydb DEFAULT CHARACTER SET utf8mb4 DEFAULT COLLATE utf8mb4_unicode_ci", + ); + + // Test IF NOT EXISTS with CHARACTER SET + mysql_and_generic() + .verified_stmt("CREATE DATABASE IF NOT EXISTS mydb DEFAULT CHARACTER SET utf16"); + + // Test the exact syntax from the issue + mysql_and_generic().one_statement_parses_to( + "CREATE DATABASE IF NOT EXISTS noria DEFAULT CHARACTER SET = utf16", + "CREATE DATABASE IF NOT EXISTS noria DEFAULT CHARACTER SET utf16", + ); +} + +#[test] +fn parse_create_database_with_charset_errors() { + // Missing charset name after CHARACTER SET + assert!(mysql_and_generic() + .parse_sql_statements("CREATE DATABASE mydb DEFAULT CHARACTER SET") + .is_err()); + + // Missing charset name after CHARSET + assert!(mysql_and_generic() + .parse_sql_statements("CREATE DATABASE mydb CHARSET") + .is_err()); + + // Missing collation name after COLLATE + assert!(mysql_and_generic() + .parse_sql_statements("CREATE DATABASE mydb DEFAULT COLLATE") + .is_err()); + + // Equals sign but no value + assert!(mysql_and_generic() + .parse_sql_statements("CREATE DATABASE mydb CHARACTER SET =") + .is_err()); +} + +#[test] +fn parse_create_database_with_charset_option_ordering() { + // MySQL allows COLLATE before CHARACTER SET - output is normalized to CHARACTER SET first + // (matches MySQL's own SHOW CREATE DATABASE output order) + mysql_and_generic().one_statement_parses_to( + "CREATE DATABASE mydb DEFAULT COLLATE utf8mb4_unicode_ci DEFAULT CHARACTER SET utf8mb4", + "CREATE DATABASE mydb DEFAULT CHARACTER SET utf8mb4 DEFAULT COLLATE utf8mb4_unicode_ci", + ); + + // COLLATE first without DEFAULT keywords + mysql_and_generic().one_statement_parses_to( + "CREATE DATABASE mydb COLLATE utf8mb4_unicode_ci CHARACTER SET utf8mb4", + "CREATE DATABASE mydb DEFAULT CHARACTER SET utf8mb4 DEFAULT COLLATE utf8mb4_unicode_ci", + ); +} From d7f56e8942f5e5ad2949b1a9be5f5b1c9bf1c3cd Mon Sep 17 00:00:00 2001 From: xitep Date: Thu, 5 Feb 2026 20:09:07 +0100 Subject: [PATCH 057/121] [Oracle] Support hierarchical queries (#2185) Co-authored-by: Ifeanyi Ubah --- src/ast/mod.rs | 2 +- src/ast/query.rs | 70 +++++++-- src/ast/spans.rs | 30 ++-- src/dialect/oracle.rs | 8 +- src/keywords.rs | 1 + src/parser/mod.rs | 79 +++++----- tests/sqlparser_bigquery.rs | 4 +- tests/sqlparser_clickhouse.rs | 2 +- tests/sqlparser_common.rs | 281 ++++++++++++++++++++++++---------- tests/sqlparser_duckdb.rs | 4 +- tests/sqlparser_mssql.rs | 6 +- tests/sqlparser_mysql.rs | 16 +- tests/sqlparser_oracle.rs | 42 ++++- tests/sqlparser_postgres.rs | 6 +- 14 files changed, 376 insertions(+), 175 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index a26c14ef7b..a595196958 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -87,7 +87,7 @@ pub use self::dml::{ }; pub use self::operator::{BinaryOperator, UnaryOperator}; pub use self::query::{ - AfterMatchSkip, ConnectBy, Cte, CteAsMaterialized, Distinct, EmptyMatchesMode, + AfterMatchSkip, ConnectByKind, Cte, CteAsMaterialized, Distinct, EmptyMatchesMode, ExceptSelectItem, ExcludeSelectItem, ExprWithAlias, ExprWithAliasAndOrderBy, Fetch, ForClause, ForJson, ForXml, FormatClause, GroupByExpr, GroupByWithModifier, IdentWithAlias, IlikeSelectItem, InputFormatClause, Interpolate, InterpolateExpr, Join, JoinConstraint, diff --git a/src/ast/query.rs b/src/ast/query.rs index bb2d889f28..b8f605be54 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -480,6 +480,8 @@ pub struct Select { pub prewhere: Option, /// WHERE pub selection: Option, + /// [START WITH ..] CONNECT BY .. + pub connect_by: Vec, /// GROUP BY pub group_by: GroupByExpr, /// CLUSTER BY (Hive) @@ -501,8 +503,6 @@ pub struct Select { pub window_before_qualify: bool, /// BigQuery syntax: `SELECT AS VALUE | SELECT AS STRUCT` pub value_table_mode: Option, - /// STARTING WITH .. CONNECT BY - pub connect_by: Option, /// Was this a FROM-first query? pub flavor: SelectFlavor, } @@ -585,6 +585,10 @@ impl fmt::Display for Select { SpaceOrNewline.fmt(f)?; Indent(selection).fmt(f)?; } + for clause in &self.connect_by { + SpaceOrNewline.fmt(f)?; + clause.fmt(f)?; + } match &self.group_by { GroupByExpr::All(_) => { SpaceOrNewline.fmt(f)?; @@ -648,10 +652,6 @@ impl fmt::Display for Select { display_comma_separated(&self.named_window).fmt(f)?; } } - if let Some(ref connect_by) = self.connect_by { - SpaceOrNewline.fmt(f)?; - connect_by.fmt(f)?; - } Ok(()) } } @@ -1204,24 +1204,60 @@ impl fmt::Display for TableWithJoins { /// Joins a table to itself to process hierarchical data in the table. /// /// See . +/// See #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] -pub struct ConnectBy { - /// START WITH - pub condition: Expr, +pub enum ConnectByKind { /// CONNECT BY - pub relationships: Vec, + ConnectBy { + /// the `CONNECT` token + connect_token: AttachedToken, + + /// [CONNECT BY] NOCYCLE + /// + /// Optional on [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Hierarchical-Queries.html#GUID-0118DF1D-B9A9-41EB-8556-C6E7D6A5A84E__GUID-5377971A-F518-47E4-8781-F06FEB3EF993) + nocycle: bool, + + /// join conditions denoting the hierarchical relationship + relationships: Vec, + }, + + /// START WITH + /// + /// Optional on [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Hierarchical-Queries.html#GUID-0118DF1D-B9A9-41EB-8556-C6E7D6A5A84E) + /// when comming _after_ the `CONNECT BY`. + StartWith { + /// the `START` token + start_token: AttachedToken, + + /// condition selecting the root rows of the hierarchy + condition: Box, + }, } -impl fmt::Display for ConnectBy { +impl fmt::Display for ConnectByKind { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!( - f, - "START WITH {condition} CONNECT BY {relationships}", - condition = self.condition, - relationships = display_comma_separated(&self.relationships) - ) + match self { + ConnectByKind::ConnectBy { + connect_token: _, + nocycle, + relationships, + } => { + write!( + f, + "CONNECT BY {nocycle}{relationships}", + nocycle = if *nocycle { "NOCYCLE " } else { "" }, + relationships = display_comma_separated(relationships) + ) + } + ConnectByKind::StartWith { + start_token: _, + condition, + } => { + write!(f, "START WITH {condition}") + } + } } } diff --git a/src/ast/spans.rs b/src/ast/spans.rs index ffc96ed3e7..16a9a926f7 100644 --- a/src/ast/spans.rs +++ b/src/ast/spans.rs @@ -32,7 +32,7 @@ use super::{ AlterIndexOperation, AlterTableOperation, Analyze, Array, Assignment, AssignmentTarget, AttachedToken, BeginEndStatements, CaseStatement, CloseCursor, ClusteredIndex, ColumnDef, ColumnOption, ColumnOptionDef, ConditionalStatementBlock, ConditionalStatements, - ConflictTarget, ConnectBy, ConstraintCharacteristics, CopySource, CreateIndex, CreateTable, + ConflictTarget, ConnectByKind, ConstraintCharacteristics, CopySource, CreateIndex, CreateTable, CreateTableOptions, Cte, Delete, DoUpdate, ExceptSelectItem, ExcludeSelectItem, Expr, ExprWithAlias, Fetch, ForValues, FromTable, Function, FunctionArg, FunctionArgExpr, FunctionArgumentClause, FunctionArgumentList, FunctionArguments, GroupByExpr, HavingBound, @@ -2269,28 +2269,34 @@ impl Spanned for Select { .chain(lateral_views.iter().map(|item| item.span())) .chain(prewhere.iter().map(|item| item.span())) .chain(selection.iter().map(|item| item.span())) + .chain(connect_by.iter().map(|item| item.span())) .chain(core::iter::once(group_by.span())) .chain(cluster_by.iter().map(|item| item.span())) .chain(distribute_by.iter().map(|item| item.span())) .chain(sort_by.iter().map(|item| item.span())) .chain(having.iter().map(|item| item.span())) .chain(named_window.iter().map(|item| item.span())) - .chain(qualify.iter().map(|item| item.span())) - .chain(connect_by.iter().map(|item| item.span())), + .chain(qualify.iter().map(|item| item.span())), ) } } -impl Spanned for ConnectBy { +impl Spanned for ConnectByKind { fn span(&self) -> Span { - let ConnectBy { - condition, - relationships, - } = self; - - union_spans( - core::iter::once(condition.span()).chain(relationships.iter().map(|item| item.span())), - ) + match self { + ConnectByKind::ConnectBy { + connect_token, + nocycle: _, + relationships, + } => union_spans( + core::iter::once(connect_token.0.span()) + .chain(relationships.last().iter().map(|item| item.span())), + ), + ConnectByKind::StartWith { + start_token, + condition, + } => union_spans([start_token.0.span(), condition.span()].into_iter()), + } } } diff --git a/src/dialect/oracle.rs b/src/dialect/oracle.rs index a72d5d7a81..71b0fdb5c7 100644 --- a/src/dialect/oracle.rs +++ b/src/dialect/oracle.rs @@ -22,7 +22,9 @@ use crate::{ tokenizer::Token, }; -use super::{Dialect, Precedence}; +use super::{keywords::Keyword, Dialect, Precedence}; + +const RESERVED_KEYWORDS_FOR_SELECT_ITEM_OPERATOR: [Keyword; 1] = [Keyword::CONNECT_BY_ROOT]; /// A [`Dialect`] for [Oracle Databases](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/index.html) #[derive(Debug, Default)] @@ -96,6 +98,10 @@ impl Dialect for OracleDialect { true } + fn get_reserved_keywords_for_select_item_operator(&self) -> &[Keyword] { + &RESERVED_KEYWORDS_FOR_SELECT_ITEM_OPERATOR + } + fn supports_quote_delimited_string(&self) -> bool { true } diff --git a/src/keywords.rs b/src/keywords.rs index 2e26bda8ec..f84f4d213a 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -678,6 +678,7 @@ define_keywords!( NOCOMPRESS, NOCREATEDB, NOCREATEROLE, + NOCYCLE, NOINHERIT, NOLOGIN, NONE, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index bddafd1e3c..5b1f59900b 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -4521,16 +4521,25 @@ impl<'a> Parser<'a> { /// consumed and returns false #[must_use] pub fn parse_keywords(&mut self, keywords: &[Keyword]) -> bool { - let index = self.index; + self.parse_keywords_indexed(keywords).is_some() + } + + /// Just like [Self::parse_keywords], but - upon success - returns the + /// token index of the first keyword. + #[must_use] + fn parse_keywords_indexed(&mut self, keywords: &[Keyword]) -> Option { + let start_index = self.index; + let mut first_keyword_index = None; for &keyword in keywords { if !self.parse_keyword(keyword) { - // println!("parse_keywords aborting .. did not find {:?}", keyword); - // reset index and return immediately - self.index = index; - return false; + self.index = start_index; + return None; + } + if first_keyword_index.is_none() { + first_keyword_index = Some(self.index.saturating_sub(1)); } } - true + first_keyword_index } /// If the current token is one of the given `keywords`, returns the keyword @@ -13921,7 +13930,7 @@ impl<'a> Parser<'a> { window_before_qualify: false, qualify: None, value_table_mode: None, - connect_by: None, + connect_by: vec![], flavor: SelectFlavor::FromFirstNoSelect, }); } @@ -14032,6 +14041,8 @@ impl<'a> Parser<'a> { None }; + let connect_by = self.maybe_parse_connect_by()?; + let group_by = self .parse_optional_group_by()? .unwrap_or_else(|| GroupByExpr::Expressions(vec![], vec![])); @@ -14084,17 +14095,6 @@ impl<'a> Parser<'a> { Default::default() }; - let connect_by = if self.dialect.supports_connect_by() - && self - .parse_one_of_keywords(&[Keyword::START, Keyword::CONNECT]) - .is_some() - { - self.prev_token(); - Some(self.parse_connect_by()?) - } else { - None - }; - Ok(Select { select_token: AttachedToken(select_token), optimizer_hint, @@ -14279,27 +14279,28 @@ impl<'a> Parser<'a> { } /// Parse a `CONNECT BY` clause (Oracle-style hierarchical query support). - pub fn parse_connect_by(&mut self) -> Result { - let (condition, relationships) = if self.parse_keywords(&[Keyword::CONNECT, Keyword::BY]) { - let relationships = self.with_state(ParserState::ConnectBy, |parser| { - parser.parse_comma_separated(Parser::parse_expr) - })?; - self.expect_keywords(&[Keyword::START, Keyword::WITH])?; - let condition = self.parse_expr()?; - (condition, relationships) - } else { - self.expect_keywords(&[Keyword::START, Keyword::WITH])?; - let condition = self.parse_expr()?; - self.expect_keywords(&[Keyword::CONNECT, Keyword::BY])?; - let relationships = self.with_state(ParserState::ConnectBy, |parser| { - parser.parse_comma_separated(Parser::parse_expr) - })?; - (condition, relationships) - }; - Ok(ConnectBy { - condition, - relationships, - }) + pub fn maybe_parse_connect_by(&mut self) -> Result, ParserError> { + let mut clauses = Vec::with_capacity(2); + loop { + if let Some(idx) = self.parse_keywords_indexed(&[Keyword::START, Keyword::WITH]) { + clauses.push(ConnectByKind::StartWith { + start_token: self.token_at(idx).clone().into(), + condition: self.parse_expr()?.into(), + }); + } else if let Some(idx) = self.parse_keywords_indexed(&[Keyword::CONNECT, Keyword::BY]) + { + clauses.push(ConnectByKind::ConnectBy { + connect_token: self.token_at(idx).clone().into(), + nocycle: self.parse_keyword(Keyword::NOCYCLE), + relationships: self.with_state(ParserState::ConnectBy, |parser| { + parser.parse_comma_separated(Parser::parse_expr) + })?, + }); + } else { + break; + } + } + Ok(clauses) } /// Parse `CREATE TABLE x AS TABLE y` diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index d5ad63735f..cf843ea2b3 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -2711,7 +2711,7 @@ fn test_export_data() { qualify: None, window_before_qualify: false, value_table_mode: None, - connect_by: None, + connect_by: vec![], flavor: SelectFlavor::Standard, }))), order_by: Some(OrderBy { @@ -2817,7 +2817,7 @@ fn test_export_data() { qualify: None, window_before_qualify: false, value_table_mode: None, - connect_by: None, + connect_by: vec![], flavor: SelectFlavor::Standard, }))), order_by: Some(OrderBy { diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index db83291179..b8b4e33708 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -103,7 +103,7 @@ fn parse_map_access_expr() { window_before_qualify: false, qualify: None, value_table_mode: None, - connect_by: None, + connect_by: vec![], flavor: SelectFlavor::Standard, }, select diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index b442ec428a..4f63e4a3e3 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -501,7 +501,7 @@ fn parse_update_set_from() { qualify: None, window_before_qualify: false, value_table_mode: None, - connect_by: None, + connect_by: vec![], flavor: SelectFlavor::Standard, }))), order_by: None, @@ -5959,7 +5959,7 @@ fn test_parse_named_window() { qualify: None, window_before_qualify: true, value_table_mode: None, - connect_by: None, + connect_by: vec![], flavor: SelectFlavor::Standard, }; assert_eq!(actual_select_only, expected); @@ -6634,7 +6634,7 @@ fn parse_interval_and_or_xor() { qualify: None, window_before_qualify: false, value_table_mode: None, - connect_by: None, + connect_by: vec![], flavor: SelectFlavor::Standard, }))), order_by: None, @@ -8972,7 +8972,7 @@ fn lateral_function() { qualify: None, window_before_qualify: false, value_table_mode: None, - connect_by: None, + connect_by: vec![], flavor: SelectFlavor::Standard, }; assert_eq!(actual_select_only, expected); @@ -9960,7 +9960,7 @@ fn parse_merge() { window_before_qualify: false, qualify: None, value_table_mode: None, - connect_by: None, + connect_by: vec![], flavor: SelectFlavor::Standard, }))), order_by: None, @@ -12360,7 +12360,7 @@ fn parse_unload() { window_before_qualify: false, qualify: None, value_table_mode: None, - connect_by: None, + connect_by: vec![], flavor: SelectFlavor::Standard, }))), with: None, @@ -12644,54 +12644,7 @@ fn parse_map_access_expr() { #[test] fn parse_connect_by() { - let expect_query = Select { - select_token: AttachedToken::empty(), - optimizer_hint: None, - distinct: None, - select_modifiers: None, - top: None, - top_before_distinct: false, - projection: vec![ - SelectItem::UnnamedExpr(Expr::Identifier(Ident::new("employee_id"))), - SelectItem::UnnamedExpr(Expr::Identifier(Ident::new("manager_id"))), - SelectItem::UnnamedExpr(Expr::Identifier(Ident::new("title"))), - ], - exclude: None, - from: vec![TableWithJoins { - relation: table_from_name(ObjectName::from(vec![Ident::new("employees")])), - joins: vec![], - }], - into: None, - lateral_views: vec![], - prewhere: None, - selection: None, - group_by: GroupByExpr::Expressions(vec![], vec![]), - cluster_by: vec![], - distribute_by: vec![], - sort_by: vec![], - having: None, - named_window: vec![], - qualify: None, - window_before_qualify: false, - value_table_mode: None, - connect_by: Some(ConnectBy { - condition: Expr::BinaryOp { - left: Box::new(Expr::Identifier(Ident::new("title"))), - op: BinaryOperator::Eq, - right: Box::new(Expr::Value( - Value::SingleQuotedString("president".to_owned()).with_empty_span(), - )), - }, - relationships: vec![Expr::BinaryOp { - left: Box::new(Expr::Identifier(Ident::new("manager_id"))), - op: BinaryOperator::Eq, - right: Box::new(Expr::Prior(Box::new(Expr::Identifier(Ident::new( - "employee_id", - ))))), - }], - }), - flavor: SelectFlavor::Standard, - }; + let dialects = all_dialects_where(|d| d.supports_connect_by()); let connect_by_1 = concat!( "SELECT employee_id, manager_id, title FROM employees ", @@ -12701,8 +12654,63 @@ fn parse_connect_by() { ); assert_eq!( - all_dialects_where(|d| d.supports_connect_by()).verified_only_select(connect_by_1), - expect_query + dialects.verified_only_select(connect_by_1), + Select { + select_token: AttachedToken::empty(), + optimizer_hint: None, + distinct: None, + select_modifiers: None, + top: None, + top_before_distinct: false, + projection: vec![ + SelectItem::UnnamedExpr(Expr::Identifier(Ident::new("employee_id"))), + SelectItem::UnnamedExpr(Expr::Identifier(Ident::new("manager_id"))), + SelectItem::UnnamedExpr(Expr::Identifier(Ident::new("title"))), + ], + exclude: None, + from: vec![TableWithJoins { + relation: table_from_name(ObjectName::from(vec![Ident::new("employees")])), + joins: vec![], + }], + into: None, + lateral_views: vec![], + prewhere: None, + selection: None, + group_by: GroupByExpr::Expressions(vec![], vec![]), + cluster_by: vec![], + distribute_by: vec![], + sort_by: vec![], + having: None, + named_window: vec![], + qualify: None, + window_before_qualify: false, + value_table_mode: None, + connect_by: vec![ + ConnectByKind::StartWith { + start_token: AttachedToken::empty(), + condition: Expr::BinaryOp { + left: Box::new(Expr::Identifier(Ident::new("title"))), + op: BinaryOperator::Eq, + right: Box::new(Expr::Value( + Value::SingleQuotedString("president".to_owned()).with_empty_span(), + )), + } + .into() + }, + ConnectByKind::ConnectBy { + connect_token: AttachedToken::empty(), + nocycle: false, + relationships: vec![Expr::BinaryOp { + left: Box::new(Expr::Identifier(Ident::new("manager_id"))), + op: BinaryOperator::Eq, + right: Box::new(Expr::Prior(Box::new(Expr::Identifier(Ident::new( + "employee_id", + ))))), + }], + } + ], + flavor: SelectFlavor::Standard, + } ); // CONNECT BY can come before START WITH @@ -12713,9 +12721,63 @@ fn parse_connect_by() { "ORDER BY employee_id" ); assert_eq!( - all_dialects_where(|d| d.supports_connect_by()) - .verified_only_select_with_canonical(connect_by_2, connect_by_1), - expect_query + dialects.verified_only_select(connect_by_2), + Select { + select_token: AttachedToken::empty(), + optimizer_hint: None, + distinct: None, + select_modifiers: None, + top: None, + top_before_distinct: false, + projection: vec![ + SelectItem::UnnamedExpr(Expr::Identifier(Ident::new("employee_id"))), + SelectItem::UnnamedExpr(Expr::Identifier(Ident::new("manager_id"))), + SelectItem::UnnamedExpr(Expr::Identifier(Ident::new("title"))), + ], + exclude: None, + from: vec![TableWithJoins { + relation: table_from_name(ObjectName::from(vec![Ident::new("employees")])), + joins: vec![], + }], + into: None, + lateral_views: vec![], + prewhere: None, + selection: None, + group_by: GroupByExpr::Expressions(vec![], vec![]), + cluster_by: vec![], + distribute_by: vec![], + sort_by: vec![], + having: None, + named_window: vec![], + qualify: None, + window_before_qualify: false, + value_table_mode: None, + connect_by: vec![ + ConnectByKind::ConnectBy { + connect_token: AttachedToken::empty(), + nocycle: false, + relationships: vec![Expr::BinaryOp { + left: Box::new(Expr::Identifier(Ident::new("manager_id"))), + op: BinaryOperator::Eq, + right: Box::new(Expr::Prior(Box::new(Expr::Identifier(Ident::new( + "employee_id", + ))))), + }], + }, + ConnectByKind::StartWith { + start_token: AttachedToken::empty(), + condition: Expr::BinaryOp { + left: Box::new(Expr::Identifier(Ident::new("title"))), + op: BinaryOperator::Eq, + right: Box::new(Expr::Value( + Value::SingleQuotedString("president".to_owned()).with_empty_span(), + )), + } + .into() + }, + ], + flavor: SelectFlavor::Standard, + } ); // WHERE must come before CONNECT BY @@ -12727,7 +12789,7 @@ fn parse_connect_by() { "ORDER BY employee_id" ); assert_eq!( - all_dialects_where(|d| d.supports_connect_by()).verified_only_select(connect_by_3), + dialects.verified_only_select(connect_by_3), Select { select_token: AttachedToken::empty(), optimizer_hint: None, @@ -12762,22 +12824,30 @@ fn parse_connect_by() { qualify: None, window_before_qualify: false, value_table_mode: None, - connect_by: Some(ConnectBy { - condition: Expr::BinaryOp { - left: Box::new(Expr::Identifier(Ident::new("title"))), - op: BinaryOperator::Eq, - right: Box::new(Expr::Value( - (Value::SingleQuotedString("president".to_owned(),)).with_empty_span() - )), + connect_by: vec![ + ConnectByKind::StartWith { + start_token: AttachedToken::empty(), + condition: Expr::BinaryOp { + left: Box::new(Expr::Identifier(Ident::new("title"))), + op: BinaryOperator::Eq, + right: Box::new(Expr::Value( + (Value::SingleQuotedString("president".to_owned(),)).with_empty_span() + )), + } + .into() }, - relationships: vec![Expr::BinaryOp { - left: Box::new(Expr::Identifier(Ident::new("manager_id"))), - op: BinaryOperator::Eq, - right: Box::new(Expr::Prior(Box::new(Expr::Identifier(Ident::new( - "employee_id", - ))))), - }], - }), + ConnectByKind::ConnectBy { + connect_token: AttachedToken::empty(), + nocycle: false, + relationships: vec![Expr::BinaryOp { + left: Box::new(Expr::Identifier(Ident::new("manager_id"))), + op: BinaryOperator::Eq, + right: Box::new(Expr::Prior(Box::new(Expr::Identifier(Ident::new( + "employee_id", + ))))), + }], + } + ], flavor: SelectFlavor::Standard, } ); @@ -12789,7 +12859,7 @@ fn parse_connect_by() { "WHERE employee_id <> 42 ", "ORDER BY employee_id" ); - all_dialects_where(|d| d.supports_connect_by()) + dialects .parse_sql_statements(connect_by_4) .expect_err("should have failed"); @@ -12803,6 +12873,61 @@ fn parse_connect_by() { "prior" )))] ); + + // no START WITH and NOCYCLE + let connect_by_5 = "SELECT child, parent FROM t CONNECT BY NOCYCLE parent = PRIOR child"; + assert_eq!( + dialects.verified_only_select(connect_by_5), + Select { + select_token: AttachedToken::empty(), + optimizer_hint: None, + distinct: None, + select_modifiers: None, + top: None, + top_before_distinct: false, + projection: vec![ + SelectItem::UnnamedExpr(Expr::Identifier(Ident::new("child"))), + SelectItem::UnnamedExpr(Expr::Identifier(Ident::new("parent"))), + ], + exclude: None, + from: vec![TableWithJoins { + relation: table_from_name(ObjectName::from(vec![Ident::new("t")])), + joins: vec![], + }], + into: None, + lateral_views: vec![], + prewhere: None, + selection: None, + group_by: GroupByExpr::Expressions(vec![], vec![]), + cluster_by: vec![], + distribute_by: vec![], + sort_by: vec![], + having: None, + named_window: vec![], + qualify: None, + window_before_qualify: false, + value_table_mode: None, + connect_by: vec![ConnectByKind::ConnectBy { + connect_token: AttachedToken::empty(), + nocycle: true, + relationships: vec![Expr::BinaryOp { + left: Expr::Identifier(Ident::new("parent")).into(), + op: BinaryOperator::Eq, + right: Expr::Prior(Expr::Identifier(Ident::new("child")).into()).into(), + }], + }], + flavor: SelectFlavor::Standard, + } + ); + + // CONNECT BY after WHERE and before GROUP BY + dialects.verified_only_select("SELECT 0 FROM t WHERE 1 = 1 CONNECT BY 2 = 2 GROUP BY 3"); + dialects.verified_only_select( + "SELECT 0 FROM t WHERE 1 = 1 START WITH 'a' = 'a' CONNECT BY 2 = 2 GROUP BY 3", + ); + dialects.verified_only_select( + "SELECT 0 FROM t WHERE 1 = 1 CONNECT BY 2 = 2 START WITH 'a' = 'a' GROUP BY 3", + ); } #[test] @@ -13701,7 +13826,7 @@ fn test_extract_seconds_ok() { qualify: None, window_before_qualify: false, value_table_mode: None, - connect_by: None, + connect_by: vec![], flavor: SelectFlavor::Standard, }))), order_by: None, @@ -15834,7 +15959,7 @@ fn test_select_from_first() { window_before_qualify: false, qualify: None, value_table_mode: None, - connect_by: None, + connect_by: vec![], flavor, }))), order_by: None, diff --git a/tests/sqlparser_duckdb.rs b/tests/sqlparser_duckdb.rs index e408d997b3..b9ae26491f 100644 --- a/tests/sqlparser_duckdb.rs +++ b/tests/sqlparser_duckdb.rs @@ -294,7 +294,7 @@ fn test_select_union_by_name() { window_before_qualify: false, qualify: None, value_table_mode: None, - connect_by: None, + connect_by: vec![], flavor: SelectFlavor::Standard, }))), right: Box::::new(SetExpr::Select(Box::new(Select { @@ -327,7 +327,7 @@ fn test_select_union_by_name() { window_before_qualify: false, qualify: None, value_table_mode: None, - connect_by: None, + connect_by: vec![], flavor: SelectFlavor::Standard, }))), }); diff --git a/tests/sqlparser_mssql.rs b/tests/sqlparser_mssql.rs index 7f60934271..84b8658b0e 100644 --- a/tests/sqlparser_mssql.rs +++ b/tests/sqlparser_mssql.rs @@ -164,7 +164,7 @@ fn parse_create_procedure() { window_before_qualify: false, qualify: None, value_table_mode: None, - connect_by: None, + connect_by: vec![], flavor: SelectFlavor::Standard, }))) }))], @@ -1392,7 +1392,7 @@ fn parse_substring_in_select() { qualify: None, window_before_qualify: false, value_table_mode: None, - connect_by: None, + connect_by: vec![], flavor: SelectFlavor::Standard, }))), order_by: None, @@ -1536,7 +1536,7 @@ fn parse_mssql_declare() { window_before_qualify: false, qualify: None, value_table_mode: None, - connect_by: None, + connect_by: vec![], flavor: SelectFlavor::Standard, }))) })) diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index d1e718f4b8..601b6c4f5c 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -1460,7 +1460,7 @@ fn parse_escaped_quote_identifiers_with_escape() { qualify: None, window_before_qualify: false, value_table_mode: None, - connect_by: None, + connect_by: vec![], flavor: SelectFlavor::Standard, }))), order_by: None, @@ -1517,7 +1517,7 @@ fn parse_escaped_quote_identifiers_with_no_escape() { qualify: None, window_before_qualify: false, value_table_mode: None, - connect_by: None, + connect_by: vec![], flavor: SelectFlavor::Standard, }))), order_by: None, @@ -1566,7 +1566,7 @@ fn parse_escaped_backticks_with_escape() { qualify: None, window_before_qualify: false, value_table_mode: None, - connect_by: None, + connect_by: vec![], flavor: SelectFlavor::Standard, }))), order_by: None, @@ -1619,7 +1619,7 @@ fn parse_escaped_backticks_with_no_escape() { qualify: None, window_before_qualify: false, value_table_mode: None, - connect_by: None, + connect_by: vec![], flavor: SelectFlavor::Standard, }))), order_by: None, @@ -2443,7 +2443,7 @@ fn parse_select_with_numeric_prefix_column_name() { qualify: None, window_before_qualify: false, value_table_mode: None, - connect_by: None, + connect_by: vec![], flavor: SelectFlavor::Standard, }))) ); @@ -2620,7 +2620,7 @@ fn parse_select_with_concatenation_of_exp_number_and_numeric_prefix_column() { qualify: None, window_before_qualify: false, value_table_mode: None, - connect_by: None, + connect_by: vec![], flavor: SelectFlavor::Standard, }))) ); @@ -3268,7 +3268,7 @@ fn parse_substring_in_select() { window_before_qualify: false, qualify: None, value_table_mode: None, - connect_by: None, + connect_by: vec![], flavor: SelectFlavor::Standard, }))), order_by: None, @@ -3578,7 +3578,7 @@ fn parse_hex_string_introducer() { qualify: None, value_table_mode: None, into: None, - connect_by: None, + connect_by: vec![], flavor: SelectFlavor::Standard, }))), order_by: None, diff --git a/tests/sqlparser_oracle.rs b/tests/sqlparser_oracle.rs index 1c12f868f4..0dbccdb5eb 100644 --- a/tests/sqlparser_oracle.rs +++ b/tests/sqlparser_oracle.rs @@ -209,7 +209,7 @@ fn parse_quote_delimited_string() { #[test] fn parse_invalid_quote_delimited_strings() { let dialect = all_dialects_where(|d| d.supports_quote_delimited_string()); - // ~ invalid quote delimiter + // invalid quote delimiter for q in [' ', '\t', '\r', '\n'] { assert_eq!( dialect.parse_sql_statements(&format!("SELECT Q'{q}abc{q}' FROM dual")), @@ -219,7 +219,7 @@ fn parse_invalid_quote_delimited_strings() { "with quote char {q:?}" ); } - // ~ invalid eof after quote + // invalid eof after quote assert_eq!( dialect.parse_sql_statements("SELECT Q'"), Err(ParserError::TokenizerError( @@ -227,7 +227,7 @@ fn parse_invalid_quote_delimited_strings() { )), "with EOF quote char" ); - // ~ unterminated string + // unterminated string assert_eq!( dialect.parse_sql_statements("SELECT Q'|asdfa...."), Err(ParserError::TokenizerError( @@ -338,7 +338,7 @@ fn parse_national_quote_delimited_string_but_is_a_word() { fn test_optimizer_hints() { let oracle_dialect = oracle(); - // ~ selects + // selects let select = oracle_dialect.verified_only_select_with_canonical( "SELECT /*+one two three*/ /*+not a hint!*/ 1 FROM dual", "SELECT /*+one two three*/ 1 FROM dual", @@ -369,16 +369,16 @@ fn test_optimizer_hints() { Some(" one two three /* asdf */\n") ); - // ~ inserts + // inserts oracle_dialect.verified_stmt("INSERT /*+ append */ INTO t1 SELECT * FROM all_objects"); - // ~ updates + // updates oracle_dialect.verified_stmt("UPDATE /*+ DISABLE_PARALLEL_DML */ table_name SET column1 = 1"); - // ~ deletes + // deletes oracle_dialect.verified_stmt("DELETE --+ ENABLE_PARALLEL_DML\n FROM table_name"); - // ~ merges + // merges oracle_dialect.verified_stmt( "MERGE /*+ CLUSTERING */ INTO people_target pt \ USING people_source ps \ @@ -388,3 +388,29 @@ fn test_optimizer_hints() { VALUES (ps.person_id, ps.first_name, ps.last_name, ps.title)", ); } + +#[test] +fn test_connect_by() { + let oracle_dialect = oracle(); + + oracle_dialect.verified_only_select( + "SELECT last_name AS \"Employee\", CONNECT_BY_ISCYCLE AS \"Cycle\", \ + LEVEL, \ + SYS_CONNECT_BY_PATH(last_name, '/') AS \"Path\" \ + FROM employees \ + WHERE level <= 3 AND department_id = 80 \ + START WITH last_name = 'King' \ + CONNECT BY NOCYCLE PRIOR employee_id = manager_id AND LEVEL <= 4 \ + ORDER BY \"Employee\", \"Cycle\", LEVEL, \"Path\"", + ); + + // CONNECT_BY_ROOT + oracle_dialect.verified_only_select( + "SELECT last_name AS \"Employee\", CONNECT_BY_ROOT last_name AS \"Manager\", \ + LEVEL - 1 AS \"Pathlen\", SYS_CONNECT_BY_PATH(last_name, '/') AS \"Path\" \ + FROM employees \ + WHERE LEVEL > 1 AND department_id = 110 \ + CONNECT BY PRIOR employee_id = manager_id \ + ORDER BY \"Employee\", \"Manager\", \"Pathlen\", \"Path\"", + ); +} diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 4ce8ed8b45..5853be7ebb 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -1331,7 +1331,7 @@ fn parse_copy_to() { sort_by: vec![], qualify: None, value_table_mode: None, - connect_by: None, + connect_by: vec![], flavor: SelectFlavor::Standard, }))), order_by: None, @@ -3096,7 +3096,7 @@ fn parse_array_subquery_expr() { qualify: None, window_before_qualify: false, value_table_mode: None, - connect_by: None, + connect_by: vec![], flavor: SelectFlavor::Standard, }))), right: Box::new(SetExpr::Select(Box::new(Select { @@ -3124,7 +3124,7 @@ fn parse_array_subquery_expr() { qualify: None, window_before_qualify: false, value_table_mode: None, - connect_by: None, + connect_by: vec![], flavor: SelectFlavor::Standard, }))), }), From cb21477fdc4527d6a09accf3da201c086782293e Mon Sep 17 00:00:00 2001 From: Michael Victor Zink Date: Fri, 6 Feb 2026 03:19:53 -0800 Subject: [PATCH 058/121] MySQL: Allow optional constraint name after CONSTRAINT keyword (#2183) --- src/dialect/generic.rs | 4 ++++ src/dialect/mod.rs | 17 +++++++++++++++++ src/dialect/mysql.rs | 5 +++++ src/parser/mod.rs | 15 ++++++++++++++- tests/sqlparser_mysql.rs | 21 +++++++++++++++++++++ 5 files changed, 61 insertions(+), 1 deletion(-) diff --git a/src/dialect/generic.rs b/src/dialect/generic.rs index 345d63fe40..6d25fa2b5c 100644 --- a/src/dialect/generic.rs +++ b/src/dialect/generic.rs @@ -275,4 +275,8 @@ impl Dialect for GenericDialect { fn supports_comment_optimizer_hint(&self) -> bool { true } + + fn supports_constraint_keyword_without_name(&self) -> bool { + true + } } diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index f23eb387ce..15a9c2d185 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -1169,6 +1169,23 @@ pub trait Dialect: Debug + Any { false } + /// Returns true if the dialect supports the `CONSTRAINT` keyword without a name + /// in table constraint definitions. + /// + /// Example: + /// ```sql + /// CREATE TABLE t (a INT, CONSTRAINT CHECK (a > 0)) + /// ``` + /// + /// This is a MySQL extension; the SQL standard requires a name after `CONSTRAINT`. + /// When the name is omitted, the output normalizes to just the constraint type + /// without the `CONSTRAINT` keyword (e.g., `CHECK (a > 0)`). + /// + /// + fn supports_constraint_keyword_without_name(&self) -> bool { + false + } + /// Returns true if the specified keyword is reserved and cannot be /// used as an identifier without special handling like quoting. fn is_reserved_for_identifier(&self, kw: Keyword) -> bool { diff --git a/src/dialect/mysql.rs b/src/dialect/mysql.rs index e1a6841711..3adb4bc205 100644 --- a/src/dialect/mysql.rs +++ b/src/dialect/mysql.rs @@ -190,6 +190,11 @@ impl Dialect for MySqlDialect { fn supports_comment_optimizer_hint(&self) -> bool { true } + + /// See: + fn supports_constraint_keyword_without_name(&self) -> bool { + true + } } /// `LOCK TABLES` diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 5b1f59900b..fb150b7608 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -9293,7 +9293,20 @@ impl<'a> Parser<'a> { &mut self, ) -> Result, ParserError> { let name = if self.parse_keyword(Keyword::CONSTRAINT) { - Some(self.parse_identifier()?) + if self.dialect.supports_constraint_keyword_without_name() + && self + .peek_one_of_keywords(&[ + Keyword::CHECK, + Keyword::PRIMARY, + Keyword::UNIQUE, + Keyword::FOREIGN, + ]) + .is_some() + { + None + } else { + Some(self.parse_identifier()?) + } } else { None }; diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index 601b6c4f5c..1b9d12f8cf 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -3469,6 +3469,27 @@ fn parse_create_table_unallow_constraint_then_index() { assert!(mysql_and_generic().parse_sql_statements(sql).is_ok()); } +#[test] +fn parse_create_table_constraint_check_without_name() { + let dialects = all_dialects_where(|d| d.supports_constraint_keyword_without_name()); + dialects.one_statement_parses_to( + "CREATE TABLE t (x INT, CONSTRAINT PRIMARY KEY (x))", + "CREATE TABLE t (x INT, PRIMARY KEY (x))", + ); + dialects.one_statement_parses_to( + "CREATE TABLE t (x INT, CONSTRAINT UNIQUE (x))", + "CREATE TABLE t (x INT, UNIQUE (x))", + ); + dialects.one_statement_parses_to( + "CREATE TABLE t (x INT, CONSTRAINT FOREIGN KEY (x) REFERENCES t2(id))", + "CREATE TABLE t (x INT, FOREIGN KEY (x) REFERENCES t2(id))", + ); + dialects.one_statement_parses_to( + "CREATE TABLE t (x INT, CONSTRAINT CHECK (x > 1))", + "CREATE TABLE t (x INT, CHECK (x > 1))", + ); +} + #[test] fn parse_create_table_with_fulltext_definition() { mysql_and_generic().verified_stmt("CREATE TABLE tb (id INT, FULLTEXT (id))"); From bc55b379c4faf8c3e175a390ff71c16ca891a0e0 Mon Sep 17 00:00:00 2001 From: Luca Cappelletti Date: Fri, 6 Feb 2026 12:50:30 +0100 Subject: [PATCH 059/121] Added missing derives to dialect marker structs (#2191) --- src/dialect/ansi.rs | 3 ++- src/dialect/bigquery.rs | 3 ++- src/dialect/clickhouse.rs | 3 ++- src/dialect/databricks.rs | 3 ++- src/dialect/duckdb.rs | 3 ++- src/dialect/generic.rs | 3 ++- src/dialect/hive.rs | 3 ++- src/dialect/mssql.rs | 3 ++- src/dialect/mysql.rs | 3 ++- src/dialect/oracle.rs | 3 ++- src/dialect/postgresql.rs | 3 ++- src/dialect/redshift.rs | 3 ++- src/dialect/snowflake.rs | 3 ++- src/dialect/sqlite.rs | 3 ++- 14 files changed, 28 insertions(+), 14 deletions(-) diff --git a/src/dialect/ansi.rs b/src/dialect/ansi.rs index 5a54390cfd..89c8a9ea24 100644 --- a/src/dialect/ansi.rs +++ b/src/dialect/ansi.rs @@ -18,7 +18,8 @@ use crate::dialect::Dialect; /// A [`Dialect`] for [ANSI SQL](https://en.wikipedia.org/wiki/SQL:2011). -#[derive(Debug, Default)] +#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct AnsiDialect {} impl Dialect for AnsiDialect { diff --git a/src/dialect/bigquery.rs b/src/dialect/bigquery.rs index 5563d1335c..6cef460676 100644 --- a/src/dialect/bigquery.rs +++ b/src/dialect/bigquery.rs @@ -42,7 +42,8 @@ const RESERVED_FOR_COLUMN_ALIAS: &[Keyword] = &[ ]; /// A [`Dialect`] for [Google Bigquery](https://cloud.google.com/bigquery/) -#[derive(Debug, Default)] +#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct BigQueryDialect; impl Dialect for BigQueryDialect { diff --git a/src/dialect/clickhouse.rs b/src/dialect/clickhouse.rs index f8b6807f35..ea4d7a971b 100644 --- a/src/dialect/clickhouse.rs +++ b/src/dialect/clickhouse.rs @@ -18,7 +18,8 @@ use crate::dialect::Dialect; /// A [`Dialect`] for [ClickHouse](https://clickhouse.com/). -#[derive(Debug, Default)] +#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct ClickHouseDialect {} impl Dialect for ClickHouseDialect { diff --git a/src/dialect/databricks.rs b/src/dialect/databricks.rs index 40807a0163..e903b0735f 100644 --- a/src/dialect/databricks.rs +++ b/src/dialect/databricks.rs @@ -20,7 +20,8 @@ use crate::dialect::Dialect; /// A [`Dialect`] for [Databricks SQL](https://www.databricks.com/) /// /// See . -#[derive(Debug, Default)] +#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct DatabricksDialect; impl Dialect for DatabricksDialect { diff --git a/src/dialect/duckdb.rs b/src/dialect/duckdb.rs index b3803aee3e..32967c4c58 100644 --- a/src/dialect/duckdb.rs +++ b/src/dialect/duckdb.rs @@ -18,7 +18,8 @@ use crate::dialect::Dialect; /// A [`Dialect`] for [DuckDB](https://duckdb.org/) -#[derive(Debug, Default)] +#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct DuckDbDialect; // In most cases the redshift dialect is identical to [`PostgresSqlDialect`]. diff --git a/src/dialect/generic.rs b/src/dialect/generic.rs index 6d25fa2b5c..38f12cc812 100644 --- a/src/dialect/generic.rs +++ b/src/dialect/generic.rs @@ -19,7 +19,8 @@ use crate::dialect::Dialect; /// A permissive, general purpose [`Dialect`], which parses a wide variety of SQL /// statements, from many different dialects. -#[derive(Debug, Default)] +#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct GenericDialect; impl Dialect for GenericDialect { diff --git a/src/dialect/hive.rs b/src/dialect/hive.rs index 32a982e907..b39232ad52 100644 --- a/src/dialect/hive.rs +++ b/src/dialect/hive.rs @@ -18,7 +18,8 @@ use crate::dialect::Dialect; /// A [`Dialect`] for [Hive](https://hive.apache.org/). -#[derive(Debug, Default)] +#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct HiveDialect {} impl Dialect for HiveDialect { diff --git a/src/dialect/mssql.rs b/src/dialect/mssql.rs index 4056bf8761..e763165d51 100644 --- a/src/dialect/mssql.rs +++ b/src/dialect/mssql.rs @@ -28,7 +28,8 @@ use crate::tokenizer::Token; use alloc::{vec, vec::Vec}; /// A [`Dialect`] for [Microsoft SQL Server](https://www.microsoft.com/en-us/sql-server/) -#[derive(Debug, Default)] +#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct MsSqlDialect {} impl Dialect for MsSqlDialect { diff --git a/src/dialect/mysql.rs b/src/dialect/mysql.rs index 3adb4bc205..e08c1c78bf 100644 --- a/src/dialect/mysql.rs +++ b/src/dialect/mysql.rs @@ -35,7 +35,8 @@ const RESERVED_FOR_TABLE_ALIAS_MYSQL: &[Keyword] = &[ ]; /// A [`Dialect`] for [MySQL](https://www.mysql.com/) -#[derive(Debug, Default)] +#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct MySqlDialect {} impl Dialect for MySqlDialect { diff --git a/src/dialect/oracle.rs b/src/dialect/oracle.rs index 71b0fdb5c7..deb7beacb2 100644 --- a/src/dialect/oracle.rs +++ b/src/dialect/oracle.rs @@ -27,7 +27,8 @@ use super::{keywords::Keyword, Dialect, Precedence}; const RESERVED_KEYWORDS_FOR_SELECT_ITEM_OPERATOR: [Keyword; 1] = [Keyword::CONNECT_BY_ROOT]; /// A [`Dialect`] for [Oracle Databases](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/index.html) -#[derive(Debug, Default)] +#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct OracleDialect; impl Dialect for OracleDialect { diff --git a/src/dialect/postgresql.rs b/src/dialect/postgresql.rs index 1924a5e313..8e4d78a448 100644 --- a/src/dialect/postgresql.rs +++ b/src/dialect/postgresql.rs @@ -34,7 +34,8 @@ use crate::parser::{Parser, ParserError}; use crate::tokenizer::Token; /// A [`Dialect`] for [PostgreSQL](https://www.postgresql.org/) -#[derive(Debug, Default)] +#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct PostgreSqlDialect {} const PERIOD_PREC: u8 = 200; diff --git a/src/dialect/redshift.rs b/src/dialect/redshift.rs index 7b35848ba0..21958e3829 100644 --- a/src/dialect/redshift.rs +++ b/src/dialect/redshift.rs @@ -22,7 +22,8 @@ use core::str::Chars; use super::PostgreSqlDialect; /// A [`Dialect`] for [RedShift](https://aws.amazon.com/redshift/) -#[derive(Debug, Default)] +#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct RedshiftSqlDialect {} // In most cases the redshift dialect is identical to [`PostgresSqlDialect`]. diff --git a/src/dialect/snowflake.rs b/src/dialect/snowflake.rs index e2d8cb2e1d..3b6fa1c296 100644 --- a/src/dialect/snowflake.rs +++ b/src/dialect/snowflake.rs @@ -127,7 +127,8 @@ const RESERVED_KEYWORDS_FOR_TABLE_FACTOR: &[Keyword] = &[ ]; /// A [`Dialect`] for [Snowflake](https://www.snowflake.com/) -#[derive(Debug, Default)] +#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct SnowflakeDialect; impl Dialect for SnowflakeDialect { diff --git a/src/dialect/sqlite.rs b/src/dialect/sqlite.rs index 7d1c935f16..765b387489 100644 --- a/src/dialect/sqlite.rs +++ b/src/dialect/sqlite.rs @@ -30,7 +30,8 @@ use crate::parser::{Parser, ParserError}; /// [`CREATE TABLE`](https://sqlite.org/lang_createtable.html) statement with no /// type specified, as in `CREATE TABLE t1 (a)`. In the AST, these columns will /// have the data type [`Unspecified`](crate::ast::DataType::Unspecified). -#[derive(Debug, Default)] +#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] +#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] pub struct SQLiteDialect {} impl Dialect for SQLiteDialect { From e4c550002f6b3d68ed6698e0a8ac6bce633607d6 Mon Sep 17 00:00:00 2001 From: Luca Cappelletti Date: Fri, 6 Feb 2026 12:56:29 +0100 Subject: [PATCH 060/121] Fixed overflow error, recursion counter was not included for parenthesis (#2199) --- src/parser/mod.rs | 3 +++ tests/sqlparser_common.rs | 7 +++++++ tests/sqlparser_snowflake.rs | 10 ++++------ 3 files changed, 14 insertions(+), 6 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index fb150b7608..14ddd2b504 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -13268,6 +13268,7 @@ impl<'a> Parser<'a> { /// preceded with some `WITH` CTE declarations and optionally followed /// by `ORDER BY`. Unlike some other parse_... methods, this one doesn't /// expect the initial keyword to be already consumed + #[cfg_attr(feature = "recursive-protection", recursive::recursive)] pub fn parse_query(&mut self) -> Result, ParserError> { let _guard = self.recursion_counter.try_decrease()?; let with = if self.parse_keyword(Keyword::WITH) { @@ -15118,7 +15119,9 @@ impl<'a> Parser<'a> { } /// A table name or a parenthesized subquery, followed by optional `[AS] alias` + #[cfg_attr(feature = "recursive-protection", recursive::recursive)] pub fn parse_table_factor(&mut self) -> Result { + let _guard = self.recursion_counter.try_decrease()?; if self.parse_keyword(Keyword::LATERAL) { // LATERAL must always be followed by a subquery or table function. if self.consume_token(&Token::LParen) { diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 4f63e4a3e3..b6b867049e 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -11020,6 +11020,13 @@ fn parse_deeply_nested_parens_hits_recursion_limits() { assert_eq!(ParserError::RecursionLimitExceeded, res.unwrap_err()); } +#[test] +fn parse_update_deeply_nested_parens_hits_recursion_limits() { + let sql = format!("\nUPDATE\n\n\n\n\n\n\n\n\n\n{}", "(".repeat(1000)); + let res = parse_sql_statements(&sql); + assert_eq!(ParserError::RecursionLimitExceeded, res.unwrap_err()); +} + #[test] fn parse_deeply_nested_unary_op_hits_recursion_limits() { let sql = format!("SELECT {}", "+".repeat(1000)); diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index cb5af62167..014a241fac 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -3202,12 +3202,10 @@ fn parse_view_column_descriptions() { #[test] fn test_parentheses_overflow() { - // TODO: increase / improve after we fix the recursion limit - // for real (see https://github.com/apache/datafusion-sqlparser-rs/issues/984) let max_nesting_level: usize = 25; - // Verify the recursion check is not too wasteful... (num of parentheses - 2 is acceptable) - let slack = 2; + // Verify the recursion check is not too wasteful (num of parentheses within budget) + let slack = 3; let l_parens = "(".repeat(max_nesting_level - slack); let r_parens = ")".repeat(max_nesting_level - slack); let sql = format!("SELECT * FROM {l_parens}a.b.c{r_parens}"); @@ -3215,8 +3213,8 @@ fn test_parentheses_overflow() { snowflake_with_recursion_limit(max_nesting_level).parse_sql_statements(sql.as_str()); assert_eq!(parsed.err(), None); - // Verify the recursion check triggers... (num of parentheses - 1 is acceptable) - let slack = 1; + // Verify the recursion check triggers (one more paren exceeds the budget) + let slack = 2; let l_parens = "(".repeat(max_nesting_level - slack); let r_parens = ")".repeat(max_nesting_level - slack); let sql = format!("SELECT * FROM {l_parens}a.b.c{r_parens}"); From 23acd2376698badf0d7f4e5ed818ff606b5357a4 Mon Sep 17 00:00:00 2001 From: Marcelo Altmann Date: Fri, 6 Feb 2026 21:10:42 +0530 Subject: [PATCH 061/121] Add support for C-style comments (#2034) --- src/dialect/generic.rs | 4 ++ src/dialect/mod.rs | 6 ++ src/dialect/mysql.rs | 5 ++ src/tokenizer.rs | 142 ++++++++++++++++++++++++++++++++++++++++- 4 files changed, 154 insertions(+), 3 deletions(-) diff --git a/src/dialect/generic.rs b/src/dialect/generic.rs index 38f12cc812..5e929d738f 100644 --- a/src/dialect/generic.rs +++ b/src/dialect/generic.rs @@ -177,6 +177,10 @@ impl Dialect for GenericDialect { true } + fn supports_multiline_comment_hints(&self) -> bool { + true + } + fn supports_user_host_grantee(&self) -> bool { true } diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index 15a9c2d185..d0b87d962a 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -1099,6 +1099,12 @@ pub trait Dialect: Debug + Any { false } + /// Returns true if the dialect supports optimizer hints in multiline comments + /// e.g. `/*!50110 KEY_BLOCK_SIZE = 1024*/` + fn supports_multiline_comment_hints(&self) -> bool { + false + } + /// Returns true if this dialect supports treating the equals operator `=` within a `SelectItem` /// as an alias assignment operator, rather than a boolean expression. /// For example: the following statements are equivalent for such a dialect: diff --git a/src/dialect/mysql.rs b/src/dialect/mysql.rs index e08c1c78bf..51a43f8935 100644 --- a/src/dialect/mysql.rs +++ b/src/dialect/mysql.rs @@ -89,6 +89,11 @@ impl Dialect for MySqlDialect { true } + /// see + fn supports_multiline_comment_hints(&self) -> bool { + true + } + fn parse_infix( &self, parser: &mut crate::parser::Parser, diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 506dee1d7a..cc5a2aa17b 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -945,10 +945,65 @@ impl<'a> Tokenizer<'a> { while let Some(token) = self.next_token(&mut state, buf.last().map(|t| &t.token))? { let span = location.span_to(state.location()); - buf.push(TokenWithSpan { token, span }); + // Check if this is a multiline comment hint that should be expanded + match &token { + Token::Whitespace(Whitespace::MultiLineComment(comment)) + if self.dialect.supports_multiline_comment_hints() + && comment.starts_with('!') => + { + // Re-tokenize the hints and add them to the buffer + self.tokenize_comment_hints(comment, span, buf)?; + } + _ => { + buf.push(TokenWithSpan { token, span }); + } + } + + location = state.location(); + } + Ok(()) + } + + /// Re-tokenize optimizer hints from a multiline comment and add them to the buffer. + /// For example, `/*!50110 KEY_BLOCK_SIZE = 1024*/` becomes tokens for `KEY_BLOCK_SIZE = 1024` + fn tokenize_comment_hints( + &self, + comment: &str, + span: Span, + buf: &mut Vec, + ) -> Result<(), TokenizerError> { + // Strip the leading '!' and any version digits (e.g., "50110") + let hint_content = comment + .strip_prefix('!') + .unwrap_or(comment) + .trim_start_matches(|c: char| c.is_ascii_digit()); + + // If there's no content after stripping, nothing to tokenize + if hint_content.is_empty() { + return Ok(()); + } + + // Create a new tokenizer for the hint content + let inner = Tokenizer::new(self.dialect, hint_content).with_unescape(self.unescape); + + // Create a state for tracking position within the hint + let mut state = State { + peekable: hint_content.chars().peekable(), + line: span.start.line, + col: span.start.column, + }; + // Tokenize the hint content and add tokens to the buffer + let mut location = state.location(); + while let Some(token) = inner.next_token(&mut state, buf.last().map(|t| &t.token))? { + let token_span = location.span_to(state.location()); + buf.push(TokenWithSpan { + token, + span: token_span, + }); location = state.location(); } + Ok(()) } @@ -2233,7 +2288,6 @@ impl<'a> Tokenizer<'a> { let mut s = String::new(); let mut nested = 1; let supports_nested_comments = self.dialect.supports_nested_comments(); - loop { match chars.next() { Some('/') if matches!(chars.peek(), Some('*')) && supports_nested_comments => { @@ -4218,6 +4272,88 @@ mod tests { Token::Whitespace(Whitespace::Space), Token::make_word("y", None), ], - ) + ); + } + + #[test] + fn tokenize_multiline_comment_with_comment_hint() { + let sql = String::from("0/*! word */1"); + + let dialect = MySqlDialect {}; + let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap(); + let expected = vec![ + Token::Number("0".to_string(), false), + Token::Whitespace(Whitespace::Space), + Token::Word(Word { + value: "word".to_string(), + quote_style: None, + keyword: Keyword::NoKeyword, + }), + Token::Whitespace(Whitespace::Space), + Token::Number("1".to_string(), false), + ]; + compare(expected, tokens); + } + + #[test] + fn tokenize_multiline_comment_with_comment_hint_and_version() { + let sql_multi = String::from("0 /*!50110 KEY_BLOCK_SIZE = 1024*/ 1"); + let dialect = MySqlDialect {}; + let tokens = Tokenizer::new(&dialect, &sql_multi).tokenize().unwrap(); + let expected = vec![ + Token::Number("0".to_string(), false), + Token::Whitespace(Whitespace::Space), + Token::Whitespace(Whitespace::Space), + Token::Word(Word { + value: "KEY_BLOCK_SIZE".to_string(), + quote_style: None, + keyword: Keyword::KEY_BLOCK_SIZE, + }), + Token::Whitespace(Whitespace::Space), + Token::Eq, + Token::Whitespace(Whitespace::Space), + Token::Number("1024".to_string(), false), + Token::Whitespace(Whitespace::Space), + Token::Number("1".to_string(), false), + ]; + compare(expected, tokens); + + let tokens = Tokenizer::new(&dialect, "0 /*!50110 */ 1") + .tokenize() + .unwrap(); + compare( + vec![ + Token::Number("0".to_string(), false), + Token::Whitespace(Whitespace::Space), + Token::Whitespace(Whitespace::Space), + Token::Whitespace(Whitespace::Space), + Token::Number("1".to_string(), false), + ], + tokens, + ); + + let tokens = Tokenizer::new(&dialect, "0 /*!*/ 1").tokenize().unwrap(); + compare( + vec![ + Token::Number("0".to_string(), false), + Token::Whitespace(Whitespace::Space), + Token::Whitespace(Whitespace::Space), + Token::Number("1".to_string(), false), + ], + tokens, + ); + let tokens = Tokenizer::new(&dialect, "0 /*! */ 1").tokenize().unwrap(); + compare( + vec![ + Token::Number("0".to_string(), false), + Token::Whitespace(Whitespace::Space), + Token::Whitespace(Whitespace::Space), + Token::Whitespace(Whitespace::Space), + Token::Whitespace(Whitespace::Space), + Token::Whitespace(Whitespace::Space), + Token::Number("1".to_string(), false), + ], + tokens, + ); } } From 0924f3a9b28cf79e7ef49819dabc719523f9aa8b Mon Sep 17 00:00:00 2001 From: "Guan-Ming (Wesley) Chiu" <105915352+guan404ming@users.noreply.github.com> Date: Fri, 6 Feb 2026 23:41:38 +0800 Subject: [PATCH 062/121] PostgreSQL: Support PostgreSQL ANALYZE with optional table and column (#2187) Signed-off-by: Guan-Ming (Wesley) Chiu <105915352+guan404ming@users.noreply.github.com> --- src/ast/mod.rs | 34 +++++++++++++++++++--------------- src/ast/spans.rs | 4 +++- src/parser/mod.rs | 9 ++++++++- tests/sqlparser_postgres.rs | 25 +++++++++++++++++++++++++ 4 files changed, 55 insertions(+), 17 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index a595196958..010a8189b0 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -3326,19 +3326,24 @@ impl Display for ExceptionWhen { } } -/// ANALYZE TABLE statement (Hive-specific) +/// ANALYZE statement +/// +/// Supported syntax varies by dialect: +/// - Hive: `ANALYZE TABLE t [PARTITION (...)] COMPUTE STATISTICS [NOSCAN] [FOR COLUMNS [col1, ...]] [CACHE METADATA]` +/// - PostgreSQL: `ANALYZE [VERBOSE] [t [(col1, ...)]]` See +/// - General: `ANALYZE [TABLE] t` #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct Analyze { #[cfg_attr(feature = "visitor", visit(with = "visit_relation"))] - /// Name of the table to analyze. - pub table_name: ObjectName, + /// Name of the table to analyze. `None` for bare `ANALYZE`. + pub table_name: Option, /// Optional partition expressions to restrict the analysis. pub partitions: Option>, - /// `true` when analyzing specific columns. + /// `true` when analyzing specific columns (Hive `FOR COLUMNS` syntax). pub for_columns: bool, - /// Columns to analyze when `for_columns` is `true`. + /// Columns to analyze. pub columns: Vec, /// Whether to cache metadata before analyzing. pub cache_metadata: bool, @@ -3352,22 +3357,21 @@ pub struct Analyze { impl fmt::Display for Analyze { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!( - f, - "ANALYZE{}{table_name}", + write!(f, "ANALYZE")?; + if let Some(ref table_name) = self.table_name { if self.has_table_keyword { - " TABLE " - } else { - " " - }, - table_name = self.table_name - )?; + write!(f, " TABLE")?; + } + write!(f, " {table_name}")?; + } + if !self.for_columns && !self.columns.is_empty() { + write!(f, " ({})", display_comma_separated(&self.columns))?; + } if let Some(ref parts) = self.partitions { if !parts.is_empty() { write!(f, " PARTITION ({})", display_comma_separated(parts))?; } } - if self.compute_statistics { write!(f, " COMPUTE STATISTICS")?; } diff --git a/src/ast/spans.rs b/src/ast/spans.rs index 16a9a926f7..bdd430e7a6 100644 --- a/src/ast/spans.rs +++ b/src/ast/spans.rs @@ -841,7 +841,9 @@ impl Spanned for ConstraintCharacteristics { impl Spanned for Analyze { fn span(&self) -> Span { union_spans( - core::iter::once(self.table_name.span()) + self.table_name + .iter() + .map(|t| t.span()) .chain( self.partitions .iter() diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 14ddd2b504..585242a8ab 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1193,13 +1193,20 @@ impl<'a> Parser<'a> { /// Parse `ANALYZE` statement. pub fn parse_analyze(&mut self) -> Result { let has_table_keyword = self.parse_keyword(Keyword::TABLE); - let table_name = self.parse_object_name(false)?; + let table_name = self.maybe_parse(|parser| parser.parse_object_name(false))?; let mut for_columns = false; let mut cache_metadata = false; let mut noscan = false; let mut partitions = None; let mut compute_statistics = false; let mut columns = vec![]; + + // PostgreSQL syntax: ANALYZE t (col1, col2) + if table_name.is_some() && self.consume_token(&Token::LParen) { + columns = self.parse_comma_separated(|p| p.parse_identifier())?; + self.expect_token(&Token::RParen)?; + } + loop { match self.parse_one_of_keywords(&[ Keyword::PARTITION, diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 5853be7ebb..cfb03737c9 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -8511,3 +8511,28 @@ fn parse_create_table_partition_of_errors() { "Expected error about empty TO list, got: {err}" ); } + +#[test] +fn parse_pg_analyze() { + // Bare ANALYZE + pg_and_generic().verified_stmt("ANALYZE"); + + // ANALYZE with table name + pg_and_generic().verified_stmt("ANALYZE t"); + + // ANALYZE with column specification + pg_and_generic().verified_stmt("ANALYZE t (col1, col2)"); + + // Verify AST for column specification + let stmt = pg().verified_stmt("ANALYZE t (col1, col2)"); + match &stmt { + Statement::Analyze(analyze) => { + assert_eq!(analyze.table_name.as_ref().unwrap().to_string(), "t"); + assert_eq!(analyze.columns.len(), 2); + assert_eq!(analyze.columns[0].to_string(), "col1"); + assert_eq!(analyze.columns[1].to_string(), "col2"); + assert!(!analyze.for_columns); + } + _ => panic!("Expected Analyze, got: {stmt:?}"), + } +} From 60abfecf66341f970894c03e696e862ef967e5a7 Mon Sep 17 00:00:00 2001 From: Albert Skalt <133099191+askalt@users.noreply.github.com> Date: Fri, 6 Feb 2026 21:18:12 +0300 Subject: [PATCH 063/121] Add Tokenizer custom token mapper support (#2184) --- src/tokenizer.rs | 51 ++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 47 insertions(+), 4 deletions(-) diff --git a/src/tokenizer.rs b/src/tokenizer.rs index cc5a2aa17b..852b73164f 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -934,6 +934,16 @@ impl<'a> Tokenizer<'a> { pub fn tokenize_with_location_into_buf( &mut self, buf: &mut Vec, + ) -> Result<(), TokenizerError> { + self.tokenize_with_location_into_buf_with_mapper(buf, |token| token) + } + + /// Tokenize the statement and produce a vector of tokens, mapping each token + /// with provided `mapper` + pub fn tokenize_with_location_into_buf_with_mapper( + &mut self, + buf: &mut Vec, + mut mapper: impl FnMut(TokenWithSpan) -> TokenWithSpan, ) -> Result<(), TokenizerError> { let mut state = State { peekable: self.query.chars().peekable(), @@ -952,10 +962,10 @@ impl<'a> Tokenizer<'a> { && comment.starts_with('!') => { // Re-tokenize the hints and add them to the buffer - self.tokenize_comment_hints(comment, span, buf)?; + self.tokenize_comment_hints(comment, span, buf, &mut mapper)?; } _ => { - buf.push(TokenWithSpan { token, span }); + buf.push(mapper(TokenWithSpan { token, span })); } } @@ -971,6 +981,7 @@ impl<'a> Tokenizer<'a> { comment: &str, span: Span, buf: &mut Vec, + mut mapper: impl FnMut(TokenWithSpan) -> TokenWithSpan, ) -> Result<(), TokenizerError> { // Strip the leading '!' and any version digits (e.g., "50110") let hint_content = comment @@ -997,10 +1008,10 @@ impl<'a> Tokenizer<'a> { let mut location = state.location(); while let Some(token) = inner.next_token(&mut state, buf.last().map(|t| &t.token))? { let token_span = location.span_to(state.location()); - buf.push(TokenWithSpan { + buf.push(mapper(TokenWithSpan { token, span: token_span, - }); + })); location = state.location(); } @@ -2644,6 +2655,38 @@ mod tests { compare(expected, tokens); } + #[test] + fn tokenize_with_mapper() { + let sql = String::from("SELECT ?"); + let dialect = GenericDialect {}; + let mut param_num = 1; + + let mut tokens = vec![]; + Tokenizer::new(&dialect, &sql) + .tokenize_with_location_into_buf_with_mapper(&mut tokens, |mut token_span| { + token_span.token = match token_span.token { + Token::Placeholder(n) => Token::Placeholder(if n == "?" { + let ret = format!("${}", param_num); + param_num += 1; + ret + } else { + n + }), + token => token, + }; + token_span + }) + .unwrap(); + let actual = tokens.into_iter().map(|t| t.token).collect(); + let expected = vec![ + Token::make_keyword("SELECT"), + Token::Whitespace(Whitespace::Space), + Token::Placeholder("$1".to_string()), + ]; + + compare(expected, actual); + } + #[test] fn tokenize_clickhouse_double_equal() { let sql = String::from("SELECT foo=='1'"); From fcc7bce1fa704125eaa9c033d3a022db7d661070 Mon Sep 17 00:00:00 2001 From: Samyak Sarnayak Date: Sat, 7 Feb 2026 02:28:29 +0530 Subject: [PATCH 064/121] Fix MAP literals parsing (#2205) --- src/parser/mod.rs | 3 ++- tests/sqlparser_common.rs | 53 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+), 1 deletion(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 585242a8ab..0b152f2bec 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -3510,7 +3510,8 @@ impl<'a> Parser<'a> { /// /// [map]: https://duckdb.org/docs/sql/data_types/map.html#creating-maps fn parse_duckdb_map_field(&mut self) -> Result { - let key = self.parse_expr()?; + // Stop before `:` so it can act as a key/value separator + let key = self.parse_subexpr(self.dialect.prec_value(Precedence::Colon))?; self.expect_token(&Token::Colon)?; diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index b6b867049e..3c32e627c4 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -13584,6 +13584,10 @@ fn test_map_syntax() { Expr::value(number(s)) } + fn null_expr() -> Expr { + Expr::Value((Value::Null).with_empty_span()) + } + check( "MAP {1: 10.0, 2: 20.0}", Expr::Map(Map { @@ -13648,6 +13652,55 @@ fn test_map_syntax() { ); check("MAP {}", Expr::Map(Map { entries: vec![] })); + + check( + "MAP {'a': 1, 'b': NULL}", + Expr::Map(Map { + entries: vec![ + MapEntry { + key: Box::new(Expr::Value( + (Value::SingleQuotedString("a".to_owned())).with_empty_span(), + )), + value: Box::new(number_expr("1")), + }, + MapEntry { + key: Box::new(Expr::Value( + (Value::SingleQuotedString("b".to_owned())).with_empty_span(), + )), + value: Box::new(null_expr()), + }, + ], + }), + ); + + check( + "MAP {1: [1, NULL, 3], 2: [4, NULL, 6], 3: [7, 8, 9]}", + Expr::Map(Map { + entries: vec![ + MapEntry { + key: Box::new(number_expr("1")), + value: Box::new(Expr::Array(Array { + elem: vec![number_expr("1"), null_expr(), number_expr("3")], + named: false, + })), + }, + MapEntry { + key: Box::new(number_expr("2")), + value: Box::new(Expr::Array(Array { + elem: vec![number_expr("4"), null_expr(), number_expr("6")], + named: false, + })), + }, + MapEntry { + key: Box::new(number_expr("3")), + value: Box::new(Expr::Array(Array { + elem: vec![number_expr("7"), number_expr("8"), number_expr("9")], + named: false, + })), + }, + ], + }), + ); } #[test] From 09deb9456a354c57f0b9f5052865a054ee3271b3 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Fri, 6 Feb 2026 16:29:58 -0500 Subject: [PATCH 065/121] Prepare to release sqlparser `0.61.0` and sqlparser_derive `0.5.0` (#2195) --- Cargo.toml | 4 +- changelog/0.61.0.md | 125 ++++++++++++++++++++++++++++++++++++++++++++ derive/Cargo.toml | 2 +- 3 files changed, 128 insertions(+), 3 deletions(-) create mode 100644 changelog/0.61.0.md diff --git a/Cargo.toml b/Cargo.toml index 8945adef7e..80d8b69038 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,7 +18,7 @@ [package] name = "sqlparser" description = "Extensible SQL Lexer and Parser with support for ANSI SQL:2011" -version = "0.60.0" +version = "0.61.0" authors = ["Apache DataFusion "] homepage = "https://github.com/apache/datafusion-sqlparser-rs" documentation = "https://docs.rs/sqlparser/" @@ -55,7 +55,7 @@ serde = { version = "1.0", default-features = false, features = ["derive", "allo # of dev-dependencies because of # https://github.com/rust-lang/cargo/issues/1596 serde_json = { version = "1.0", optional = true } -sqlparser_derive = { version = "0.4.0", path = "derive", optional = true } +sqlparser_derive = { version = "0.5.0", path = "derive", optional = true } [dev-dependencies] simple_logger = "5.0" diff --git a/changelog/0.61.0.md b/changelog/0.61.0.md new file mode 100644 index 0000000000..299f687727 --- /dev/null +++ b/changelog/0.61.0.md @@ -0,0 +1,125 @@ + + +# sqlparser-rs 0.61.0 Changelog + +This release consists of 58 commits from 20 contributors. See credits at the end of this changelog for more information. + +**Performance related:** + +- perf: remove unnecessary string clone in maybe_concat_string_literal [#2173](https://github.com/apache/datafusion-sqlparser-rs/pull/2173) (andygrove) +- perf: optimize `make_word()` to avoid unnecessary allocations [#2176](https://github.com/apache/datafusion-sqlparser-rs/pull/2176) (andygrove) + +**Fixed bugs:** + +- fix: Set the current folder as a "primary" for the `find` command [#2120](https://github.com/apache/datafusion-sqlparser-rs/pull/2120) (martin-g) +- fix: qualified column names with SQL keywords parse as identifiers [#2157](https://github.com/apache/datafusion-sqlparser-rs/pull/2157) (bombsimon) + +**Other:** + +- Fixing location for extrenal tables [#2108](https://github.com/apache/datafusion-sqlparser-rs/pull/2108) (romanoff) +- Added support for `ALTER OPERATOR` syntax [#2114](https://github.com/apache/datafusion-sqlparser-rs/pull/2114) (LucaCappelletti94) +- Oracle: Support for MERGE predicates [#2101](https://github.com/apache/datafusion-sqlparser-rs/pull/2101) (xitep) +- [Oracle] Lower StringConcat precedence [#2115](https://github.com/apache/datafusion-sqlparser-rs/pull/2115) (xitep) +- Added alter external table support for snowflake [#2122](https://github.com/apache/datafusion-sqlparser-rs/pull/2122) (romanoff) +- MSSQL: Add support for parenthesized stored procedure name in EXEC [#2126](https://github.com/apache/datafusion-sqlparser-rs/pull/2126) (yoavcloud) +- MSSQL: Parse IF/ELSE without semicolon delimiters [#2128](https://github.com/apache/datafusion-sqlparser-rs/pull/2128) (yoavcloud) +- Extract source comments [#2107](https://github.com/apache/datafusion-sqlparser-rs/pull/2107) (xitep) +- PostgreSQL: Support schema-qualified operator classes in CREATE INDEX [#2131](https://github.com/apache/datafusion-sqlparser-rs/pull/2131) (dddenis) +- Oracle: Support for quote delimited strings [#2130](https://github.com/apache/datafusion-sqlparser-rs/pull/2130) (xitep) +- Added support for `ALTER OPERATOR FAMILY` syntax [#2125](https://github.com/apache/datafusion-sqlparser-rs/pull/2125) (LucaCappelletti94) +- PostgreSQL Tokenization: Fix unexpected characters after question mark being silently ignored [#2129](https://github.com/apache/datafusion-sqlparser-rs/pull/2129) (jnlt3) +- Support parsing parenthesized wildcard `(*)` [#2123](https://github.com/apache/datafusion-sqlparser-rs/pull/2123) (romanoff) +- Make benchmark statement valid [#2139](https://github.com/apache/datafusion-sqlparser-rs/pull/2139) (xitep) +- Fix parse_identifiers not taking semicolons into account [#2137](https://github.com/apache/datafusion-sqlparser-rs/pull/2137) (jnlt3) +- Add PostgreSQL PARTITION OF syntax support [#2127](https://github.com/apache/datafusion-sqlparser-rs/pull/2127) (fmguerreiro) +- Databricks: Support Timetravel With "TIMESTAMP AS OF" [#2134](https://github.com/apache/datafusion-sqlparser-rs/pull/2134) (JamesVorder) +- MySQL: Parse bitwise shift left/right operators [#2152](https://github.com/apache/datafusion-sqlparser-rs/pull/2152) (mvzink) +- Redshift: Add support for optional JSON format in copy option [#2141](https://github.com/apache/datafusion-sqlparser-rs/pull/2141) (yoavcloud) +- MySQL: Add missing support for TREE explain format [#2145](https://github.com/apache/datafusion-sqlparser-rs/pull/2145) (yoavcloud) +- MySQL: Add support for && as boolean AND [#2144](https://github.com/apache/datafusion-sqlparser-rs/pull/2144) (yoavcloud) +- PostgreSQL: ALTER USER password option [#2142](https://github.com/apache/datafusion-sqlparser-rs/pull/2142) (yoavcloud) +- Key Value Options: add support for trailing semicolon [#2140](https://github.com/apache/datafusion-sqlparser-rs/pull/2140) (yoavcloud) +- Added support for `ALTER OPERATOR CLASS` syntax [#2135](https://github.com/apache/datafusion-sqlparser-rs/pull/2135) (LucaCappelletti94) +- Added missing `Copy` derives [#2158](https://github.com/apache/datafusion-sqlparser-rs/pull/2158) (LucaCappelletti94) +- Tokenize empty line comments correctly [#2161](https://github.com/apache/datafusion-sqlparser-rs/pull/2161) (zyuiop) +- Add support for DuckDB `LAMBDA` keyword syntax [#2149](https://github.com/apache/datafusion-sqlparser-rs/pull/2149) (lovasoa) +- MySQL: Add support for casting using the BINARY keyword [#2146](https://github.com/apache/datafusion-sqlparser-rs/pull/2146) (yoavcloud) +- Added missing `From` impls for `Statement` variants [#2160](https://github.com/apache/datafusion-sqlparser-rs/pull/2160) (LucaCappelletti94) +- GenericDialect: support colon operator for JsonAccess [#2124](https://github.com/apache/datafusion-sqlparser-rs/pull/2124) (Samyak2) +- Databricks: Support Timetravel With "VERSION AS OF" [#2155](https://github.com/apache/datafusion-sqlparser-rs/pull/2155) (JamesVorder) +- Fixed truncate table if exists for snowflake [#2166](https://github.com/apache/datafusion-sqlparser-rs/pull/2166) (romanoff) +- Refactor: replace some `dialect_of!` checks with `Dialect` trait methods [#2171](https://github.com/apache/datafusion-sqlparser-rs/pull/2171) (andygrove) +- MySQL: Support `CAST(... AS ... ARRAY)` syntax [#2151](https://github.com/apache/datafusion-sqlparser-rs/pull/2151) (mvzink) +- Snowflake: Support SAMPLE clause on subqueries [#2164](https://github.com/apache/datafusion-sqlparser-rs/pull/2164) (finchxxia) +- refactor: use `to_ident()` instead of `clone().into_ident()` for borrowed Words [#2177](https://github.com/apache/datafusion-sqlparser-rs/pull/2177) (andygrove) +- Refactor: replace more `dialect_of!` checks with `Dialect` trait methods [#2175](https://github.com/apache/datafusion-sqlparser-rs/pull/2175) (andygrove) +- minor: reduce unnecessary string allocations [#2178](https://github.com/apache/datafusion-sqlparser-rs/pull/2178) (andygrove) +- PostgreSQL: Support force row level security [#2169](https://github.com/apache/datafusion-sqlparser-rs/pull/2169) (isaacparker0) +- PostgreSQL: Add support for `*` (descendant) option in TRUNCATE [#2181](https://github.com/apache/datafusion-sqlparser-rs/pull/2181) (mvzink) +- Fix identifier parsing not breaking on the `|>` pipe operator [#2156](https://github.com/apache/datafusion-sqlparser-rs/pull/2156) (alexander-beedie) +- [MySQL, Oracle] Parse optimizer hints [#2162](https://github.com/apache/datafusion-sqlparser-rs/pull/2162) (xitep) +- Redshift: Support implicit string concatenation using newline [#2167](https://github.com/apache/datafusion-sqlparser-rs/pull/2167) (yoavcloud) +- PostgreSQL: Fix REPLICA IDENTITY to use NOTHING [#2179](https://github.com/apache/datafusion-sqlparser-rs/pull/2179) (mvzink) +- Add ENFORCED/NOT ENFORCED support for column-level CHECK constraints [#2180](https://github.com/apache/datafusion-sqlparser-rs/pull/2180) (mvzink) +- Implement `core::error::Error` for `ParserError` and `TokenizerError` [#2189](https://github.com/apache/datafusion-sqlparser-rs/pull/2189) (LucaCappelletti94) +- Moved more structs outside of Statement to facilitate reuse [#2188](https://github.com/apache/datafusion-sqlparser-rs/pull/2188) (LucaCappelletti94) +- Fix parsing cast operator after parenthesized `DEFAULT` expression [#2168](https://github.com/apache/datafusion-sqlparser-rs/pull/2168) (isaacparker0) +- Streamlined derivation of new `Dialect` objects [#2174](https://github.com/apache/datafusion-sqlparser-rs/pull/2174) (alexander-beedie) +- MSSQL: Support standalone BEGIN...END blocks [#2186](https://github.com/apache/datafusion-sqlparser-rs/pull/2186) (guan404ming) +- MySQL: Add support for `SELECT` modifiers [#2172](https://github.com/apache/datafusion-sqlparser-rs/pull/2172) (mvzink) +- MySQL: Add support for DEFAULT CHARACTER SET in CREATE DATABASE [#2182](https://github.com/apache/datafusion-sqlparser-rs/pull/2182) (mvzink) +- [Oracle] Support hierarchical queries [#2185](https://github.com/apache/datafusion-sqlparser-rs/pull/2185) (xitep) +- MySQL: Allow optional constraint name after CONSTRAINT keyword [#2183](https://github.com/apache/datafusion-sqlparser-rs/pull/2183) (mvzink) +- Added missing derives to dialect marker structs [#2191](https://github.com/apache/datafusion-sqlparser-rs/pull/2191) (LucaCappelletti94) +- Fixed overflow error, recursion counter was not included for parenthesis [#2199](https://github.com/apache/datafusion-sqlparser-rs/pull/2199) (LucaCappelletti94) +- Add support for C-style comments [#2034](https://github.com/apache/datafusion-sqlparser-rs/pull/2034) (altmannmarcelo) +- PostgreSQL: Support PostgreSQL ANALYZE with optional table and column [#2187](https://github.com/apache/datafusion-sqlparser-rs/pull/2187) (guan404ming) +- Add Tokenizer custom token mapper support [#2184](https://github.com/apache/datafusion-sqlparser-rs/pull/2184) (askalt) +- Fix MAP literals parsing [#2205](https://github.com/apache/datafusion-sqlparser-rs/pull/2205) (Samyak2) + +## Credits + +Thank you to everyone who contributed to this release. Here is a breakdown of commits (PRs merged) per contributor. + +``` + 9 Yoav Cohen + 7 Luca Cappelletti + 7 Michael Victor Zink + 7 xitep + 6 Andy Grove + 4 Andriy Romanov + 2 Alexander Beedie + 2 James Vorderbruggen + 2 isaacparker0 + 2 jnlt3 + 1 Andrew Lamb + 1 Denis Goncharenko + 1 Filipe Guerreiro + 1 Guan-Ming (Wesley) Chiu + 1 Louis Vialar + 1 Martin Grigorov + 1 Ophir LOJKINE + 1 Samyak Sarnayak + 1 Simon Sawert + 1 finchxxia +``` + +Thank you also to everyone who contributed in other ways such as filing issues, reviewing PRs, and providing feedback on this release. + diff --git a/derive/Cargo.toml b/derive/Cargo.toml index f2f54926b5..9dfa5daf82 100644 --- a/derive/Cargo.toml +++ b/derive/Cargo.toml @@ -18,7 +18,7 @@ [package] name = "sqlparser_derive" description = "Procedural (proc) macros for sqlparser" -version = "0.4.0" +version = "0.5.0" authors = ["sqlparser-rs authors"] homepage = "https://github.com/sqlparser-rs/sqlparser-rs" documentation = "https://docs.rs/sqlparser_derive/" From 272c25ed83b97cce5a217f601e8b00e9117ce51f Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Fri, 6 Feb 2026 16:34:30 -0500 Subject: [PATCH 066/121] Fix changelog (#2206) --- changelog/0.61.0.md | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/changelog/0.61.0.md b/changelog/0.61.0.md index 299f687727..86a9701d53 100644 --- a/changelog/0.61.0.md +++ b/changelog/0.61.0.md @@ -19,7 +19,7 @@ under the License. # sqlparser-rs 0.61.0 Changelog -This release consists of 58 commits from 20 contributors. See credits at the end of this changelog for more information. +This release consists of 66 commits from 22 contributors. See credits at the end of this changelog for more information. **Performance related:** @@ -99,24 +99,26 @@ This release consists of 58 commits from 20 contributors. See credits at the end Thank you to everyone who contributed to this release. Here is a breakdown of commits (PRs merged) per contributor. ``` + 9 Luca Cappelletti 9 Yoav Cohen - 7 Luca Cappelletti - 7 Michael Victor Zink + 8 Michael Victor Zink 7 xitep 6 Andy Grove 4 Andriy Romanov 2 Alexander Beedie + 2 Andrew Lamb + 2 Guan-Ming (Wesley) Chiu 2 James Vorderbruggen + 2 Samyak Sarnayak 2 isaacparker0 2 jnlt3 - 1 Andrew Lamb + 1 Albert Skalt 1 Denis Goncharenko 1 Filipe Guerreiro - 1 Guan-Ming (Wesley) Chiu 1 Louis Vialar + 1 Marcelo Altmann 1 Martin Grigorov 1 Ophir LOJKINE - 1 Samyak Sarnayak 1 Simon Sawert 1 finchxxia ``` From 329a94cc0119face45ac6d44f115e33c5abc33c7 Mon Sep 17 00:00:00 2001 From: Luca Cappelletti Date: Sat, 7 Feb 2026 07:34:10 +0100 Subject: [PATCH 067/121] Fix panic on incomplete REGEXP/MATCH expressions in SQLite dialect (#2197) --- src/dialect/sqlite.rs | 5 ++++- tests/sqlparser_sqlite.rs | 4 ++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/src/dialect/sqlite.rs b/src/dialect/sqlite.rs index 765b387489..b44a1c5b8a 100644 --- a/src/dialect/sqlite.rs +++ b/src/dialect/sqlite.rs @@ -89,7 +89,10 @@ impl Dialect for SQLiteDialect { ] { if parser.parse_keyword(keyword) { let left = Box::new(expr.clone()); - let right = Box::new(parser.parse_expr().unwrap()); + let right = Box::new(match parser.parse_expr() { + Ok(expr) => expr, + Err(e) => return Some(Err(e)), + }); return Some(Ok(Expr::BinaryOp { left, op, right })); } } diff --git a/tests/sqlparser_sqlite.rs b/tests/sqlparser_sqlite.rs index da311ac064..ffe94ab8ad 100644 --- a/tests/sqlparser_sqlite.rs +++ b/tests/sqlparser_sqlite.rs @@ -602,6 +602,10 @@ fn test_regexp_operator() { } ); sqlite().verified_only_select(r#"SELECT count(*) FROM messages WHERE msg_text REGEXP '\d+'"#); + + // Should return an error, not panic + assert!(sqlite().parse_sql_statements("SELECT 1 REGEXP").is_err()); + assert!(sqlite().parse_sql_statements("SELECT 1 MATCH").is_err()); } #[test] From d0a0b3e11b2b18804f0a9ef2c2055de929ccfcf3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20Milenkovi=C4=87?= Date: Mon, 9 Feb 2026 09:10:17 +0000 Subject: [PATCH 068/121] chore: add confirmation before tarball is released (#2208) --- dev/release/release-tarball.sh | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/dev/release/release-tarball.sh b/dev/release/release-tarball.sh index e59b2776cc..8e0e40214b 100755 --- a/dev/release/release-tarball.sh +++ b/dev/release/release-tarball.sh @@ -43,6 +43,13 @@ fi version=$1 rc=$2 +read -r -p "Proceed to release tarball for ${version}-rc${rc}? [y/N]: " answer +answer=${answer:-no} +if [ "${answer}" != "y" ]; then + echo "Cancelled tarball release!" + exit 1 +fi + tmp_dir=tmp-apache-datafusion-dist echo "Recreate temporary directory: ${tmp_dir}" From d52681969e43cf0d66ea9c4e09fc32beb8d17ad2 Mon Sep 17 00:00:00 2001 From: Minjun Kim <48622976+funcpp@users.noreply.github.com> Date: Mon, 9 Feb 2026 18:23:04 +0900 Subject: [PATCH 069/121] Databricks: Add support for `OPTIMIZE`, `PARTITIONED BY`, and `STRUCT` (#2170) --- src/ast/mod.rs | 35 +++++- src/dialect/databricks.rs | 5 + src/parser/mod.rs | 63 +++++++++-- tests/sqlparser_databricks.rs | 198 ++++++++++++++++++++++++++++++++++ 4 files changed, 290 insertions(+), 11 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 010a8189b0..cc34ec7a23 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -4578,22 +4578,40 @@ pub enum Statement { /// Legacy copy-style options. options: Vec, }, + /// ClickHouse: /// ```sql /// OPTIMIZE TABLE [db.]name [ON CLUSTER cluster] [PARTITION partition | PARTITION ID 'partition_id'] [FINAL] [DEDUPLICATE [BY expression]] /// ``` - /// /// See ClickHouse + /// + /// Databricks: + /// ```sql + /// OPTIMIZE table_name [WHERE predicate] [ZORDER BY (col_name1 [, ...])] + /// ``` + /// See Databricks OptimizeTable { /// Table name to optimize. name: ObjectName, + /// Whether the `TABLE` keyword was present (ClickHouse uses `OPTIMIZE TABLE`, Databricks uses `OPTIMIZE`). + has_table_keyword: bool, /// Optional cluster identifier. + /// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/optimize) on_cluster: Option, /// Optional partition spec. + /// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/optimize) partition: Option, /// Whether `FINAL` was specified. + /// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/optimize) include_final: bool, /// Optional deduplication settings. + /// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/optimize) deduplicate: Option, + /// Optional WHERE predicate. + /// [Databricks](https://docs.databricks.com/en/sql/language-manual/delta-optimize.html) + predicate: Option, + /// Optional ZORDER BY columns. + /// [Databricks](https://docs.databricks.com/en/sql/language-manual/delta-optimize.html) + zorder: Option>, }, /// ```sql /// LISTEN @@ -6069,12 +6087,19 @@ impl fmt::Display for Statement { } Statement::OptimizeTable { name, + has_table_keyword, on_cluster, partition, include_final, deduplicate, + predicate, + zorder, } => { - write!(f, "OPTIMIZE TABLE {name}")?; + write!(f, "OPTIMIZE")?; + if *has_table_keyword { + write!(f, " TABLE")?; + } + write!(f, " {name}")?; if let Some(on_cluster) = on_cluster { write!(f, " ON CLUSTER {on_cluster}")?; } @@ -6087,6 +6112,12 @@ impl fmt::Display for Statement { if let Some(deduplicate) = deduplicate { write!(f, " {deduplicate}")?; } + if let Some(predicate) = predicate { + write!(f, " WHERE {predicate}")?; + } + if let Some(zorder) = zorder { + write!(f, " ZORDER BY ({})", display_comma_separated(zorder))?; + } Ok(()) } Statement::LISTEN { channel } => { diff --git a/src/dialect/databricks.rs b/src/dialect/databricks.rs index e903b0735f..55e4f56cc5 100644 --- a/src/dialect/databricks.rs +++ b/src/dialect/databricks.rs @@ -85,4 +85,9 @@ impl Dialect for DatabricksDialect { fn supports_values_as_table_factor(&self) -> bool { true } + + /// See + fn supports_optimize_table(&self) -> bool { + true + } } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 0b152f2bec..c40ed427ea 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -693,7 +693,6 @@ impl<'a> Parser<'a> { // `INSTALL` is duckdb specific https://duckdb.org/docs/extensions/overview Keyword::INSTALL if self.dialect.supports_install() => self.parse_install(), Keyword::LOAD => self.parse_load(), - // `OPTIMIZE` is clickhouse specific https://clickhouse.tech/docs/en/sql-reference/statements/optimize/ Keyword::OPTIMIZE if self.dialect.supports_optimize_table() => { self.parse_optimize_table() } @@ -3382,24 +3381,28 @@ impl<'a> Parser<'a> { /// /// ```sql /// [field_name] field_type + /// field_name: field_type /// ``` /// /// [struct]: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#declaring_a_struct_type /// [tuple]: https://clickhouse.com/docs/en/sql-reference/data-types/tuple + /// [databricks]: https://docs.databricks.com/en/sql/language-manual/data-types/struct-type.html fn parse_struct_field_def( &mut self, ) -> Result<(StructField, MatchedTrailingBracket), ParserError> { // Look beyond the next item to infer whether both field name // and type are specified. - let is_anonymous_field = !matches!( + let is_named_field = matches!( (self.peek_nth_token(0).token, self.peek_nth_token(1).token), - (Token::Word(_), Token::Word(_)) + (Token::Word(_), Token::Word(_)) | (Token::Word(_), Token::Colon) ); - let field_name = if is_anonymous_field { - None + let field_name = if is_named_field { + let name = self.parse_identifier()?; + let _ = self.consume_token(&Token::Colon); + Some(name) } else { - Some(self.parse_identifier()?) + None }; let (field_type, trailing_bracket) = self.parse_data_type_helper()?; @@ -7985,7 +7988,8 @@ impl<'a> Parser<'a> { pub fn parse_hive_distribution(&mut self) -> Result { if self.parse_keywords(&[Keyword::PARTITIONED, Keyword::BY]) { self.expect_token(&Token::LParen)?; - let columns = self.parse_comma_separated(Parser::parse_column_def)?; + let columns = + self.parse_comma_separated(|parser| parser.parse_column_def_inner(true))?; self.expect_token(&Token::RParen)?; Ok(HiveDistributionStyle::PARTITIONED { columns }) } else { @@ -8809,9 +8813,19 @@ impl<'a> Parser<'a> { /// Parse column definition. pub fn parse_column_def(&mut self) -> Result { + self.parse_column_def_inner(false) + } + + fn parse_column_def_inner( + &mut self, + optional_data_type: bool, + ) -> Result { let col_name = self.parse_identifier()?; let data_type = if self.is_column_type_sqlite_unspecified() { DataType::Unspecified + } else if optional_data_type { + self.maybe_parse(|parser| parser.parse_data_type())? + .unwrap_or(DataType::Unspecified) } else { self.parse_data_type()? }; @@ -11917,7 +11931,8 @@ impl<'a> Parser<'a> { let field_defs = self.parse_duckdb_struct_type_def()?; Ok(DataType::Struct(field_defs, StructBracketKind::Parentheses)) } - Keyword::STRUCT if dialect_is!(dialect is BigQueryDialect | GenericDialect) => { + Keyword::STRUCT if dialect_is!(dialect is BigQueryDialect | DatabricksDialect | GenericDialect) => + { self.prev_token(); let (field_defs, _trailing_bracket) = self.parse_struct_type_def(Self::parse_struct_field_def)?; @@ -18480,13 +18495,23 @@ impl<'a> Parser<'a> { } } + /// ClickHouse: /// ```sql /// OPTIMIZE TABLE [db.]name [ON CLUSTER cluster] [PARTITION partition | PARTITION ID 'partition_id'] [FINAL] [DEDUPLICATE [BY expression]] /// ``` /// [ClickHouse](https://clickhouse.com/docs/en/sql-reference/statements/optimize) + /// + /// Databricks: + /// ```sql + /// OPTIMIZE table_name [WHERE predicate] [ZORDER BY (col_name1 [, ...])] + /// ``` + /// [Databricks](https://docs.databricks.com/en/sql/language-manual/delta-optimize.html) pub fn parse_optimize_table(&mut self) -> Result { - self.expect_keyword_is(Keyword::TABLE)?; + let has_table_keyword = self.parse_keyword(Keyword::TABLE); + let name = self.parse_object_name(false)?; + + // ClickHouse-specific options let on_cluster = self.parse_optional_on_cluster()?; let partition = if self.parse_keyword(Keyword::PARTITION) { @@ -18500,6 +18525,7 @@ impl<'a> Parser<'a> { }; let include_final = self.parse_keyword(Keyword::FINAL); + let deduplicate = if self.parse_keyword(Keyword::DEDUPLICATE) { if self.parse_keyword(Keyword::BY) { Some(Deduplicate::ByExpression(self.parse_expr()?)) @@ -18510,12 +18536,31 @@ impl<'a> Parser<'a> { None }; + // Databricks-specific options + let predicate = if self.parse_keyword(Keyword::WHERE) { + Some(self.parse_expr()?) + } else { + None + }; + + let zorder = if self.parse_keywords(&[Keyword::ZORDER, Keyword::BY]) { + self.expect_token(&Token::LParen)?; + let columns = self.parse_comma_separated(|p| p.parse_expr())?; + self.expect_token(&Token::RParen)?; + Some(columns) + } else { + None + }; + Ok(Statement::OptimizeTable { name, + has_table_keyword, on_cluster, partition, include_final, deduplicate, + predicate, + zorder, }) } diff --git a/tests/sqlparser_databricks.rs b/tests/sqlparser_databricks.rs index b088afd78b..6a7534ad48 100644 --- a/tests/sqlparser_databricks.rs +++ b/tests/sqlparser_databricks.rs @@ -390,3 +390,201 @@ fn parse_table_time_travel() { .parse_sql_statements("SELECT 1 FROM t1 VERSION AS OF 1 - 2",) .is_err()) } + +#[test] +fn parse_optimize_table() { + // Basic OPTIMIZE (Databricks style - no TABLE keyword) + databricks().verified_stmt("OPTIMIZE my_table"); + databricks().verified_stmt("OPTIMIZE db.my_table"); + databricks().verified_stmt("OPTIMIZE catalog.db.my_table"); + + // With WHERE clause + databricks().verified_stmt("OPTIMIZE my_table WHERE date = '2023-01-01'"); + databricks() + .verified_stmt("OPTIMIZE my_table WHERE date >= '2023-01-01' AND date < '2023-02-01'"); + + // With ZORDER BY clause + databricks().verified_stmt("OPTIMIZE my_table ZORDER BY (col1)"); + databricks().verified_stmt("OPTIMIZE my_table ZORDER BY (col1, col2)"); + databricks().verified_stmt("OPTIMIZE my_table ZORDER BY (col1, col2, col3)"); + + // Combined WHERE and ZORDER BY + databricks().verified_stmt("OPTIMIZE my_table WHERE date = '2023-01-01' ZORDER BY (col1)"); + databricks() + .verified_stmt("OPTIMIZE my_table WHERE date >= '2023-01-01' ZORDER BY (col1, col2)"); + + // Verify AST structure + match databricks() + .verified_stmt("OPTIMIZE my_table WHERE date = '2023-01-01' ZORDER BY (col1, col2)") + { + Statement::OptimizeTable { + name, + has_table_keyword, + on_cluster, + partition, + include_final, + deduplicate, + predicate, + zorder, + } => { + assert_eq!(name.to_string(), "my_table"); + assert!(!has_table_keyword); + assert!(on_cluster.is_none()); + assert!(partition.is_none()); + assert!(!include_final); + assert!(deduplicate.is_none()); + assert!(predicate.is_some()); + assert_eq!( + zorder, + Some(vec![ + Expr::Identifier(Ident::new("col1")), + Expr::Identifier(Ident::new("col2")), + ]) + ); + } + _ => unreachable!(), + } + + // Negative cases + assert_eq!( + databricks() + .parse_sql_statements("OPTIMIZE my_table ZORDER BY") + .unwrap_err(), + ParserError::ParserError("Expected: (, found: EOF".to_string()) + ); + assert_eq!( + databricks() + .parse_sql_statements("OPTIMIZE my_table ZORDER BY ()") + .unwrap_err(), + ParserError::ParserError("Expected: an expression, found: )".to_string()) + ); +} + +#[test] +fn parse_create_table_partitioned_by() { + // Databricks allows PARTITIONED BY with just column names (referencing existing columns) + // https://docs.databricks.com/en/sql/language-manual/sql-ref-partition.html + + // Single partition column without type + databricks().verified_stmt("CREATE TABLE t (col1 STRING, col2 INT) PARTITIONED BY (col1)"); + + // Multiple partition columns without types + databricks().verified_stmt( + "CREATE TABLE t (col1 STRING, col2 INT, col3 DATE) PARTITIONED BY (col1, col2)", + ); + + // Partition columns with types (new columns not in table spec) + databricks().verified_stmt("CREATE TABLE t (name STRING) PARTITIONED BY (year INT, month INT)"); + + // Mixed: some with types, some without + databricks() + .verified_stmt("CREATE TABLE t (id INT, name STRING) PARTITIONED BY (region, year INT)"); + + // Verify AST structure for column without type + match databricks().verified_stmt("CREATE TABLE t (col1 STRING) PARTITIONED BY (col1)") { + Statement::CreateTable(CreateTable { + name, + columns, + hive_distribution, + .. + }) => { + assert_eq!(name.to_string(), "t"); + assert_eq!(columns.len(), 1); + assert_eq!(columns[0].name.to_string(), "col1"); + match hive_distribution { + HiveDistributionStyle::PARTITIONED { + columns: partition_cols, + } => { + assert_eq!(partition_cols.len(), 1); + assert_eq!(partition_cols[0].name.to_string(), "col1"); + assert_eq!(partition_cols[0].data_type, DataType::Unspecified); + } + _ => unreachable!(), + } + } + _ => unreachable!(), + } + + // Verify AST structure for column with type + match databricks().verified_stmt("CREATE TABLE t (name STRING) PARTITIONED BY (year INT)") { + Statement::CreateTable(CreateTable { + hive_distribution: + HiveDistributionStyle::PARTITIONED { + columns: partition_cols, + }, + .. + }) => { + assert_eq!(partition_cols.len(), 1); + assert_eq!(partition_cols[0].name.to_string(), "year"); + assert_eq!(partition_cols[0].data_type, DataType::Int(None)); + } + _ => unreachable!(), + } +} + +#[test] +fn parse_databricks_struct_type() { + // Databricks uses colon-separated struct field syntax (colon is optional) + // https://docs.databricks.com/en/sql/language-manual/data-types/struct-type.html + + // Basic struct with colon syntax - parses to canonical form without colons + databricks().one_statement_parses_to( + "CREATE TABLE t (col1 STRUCT)", + "CREATE TABLE t (col1 STRUCT)", + ); + + // Nested array of struct (the original issue case) + databricks().one_statement_parses_to( + "CREATE TABLE t (col1 ARRAY>)", + "CREATE TABLE t (col1 ARRAY>)", + ); + + // Multiple struct columns + databricks().one_statement_parses_to( + "CREATE TABLE t (col1 STRUCT, col2 STRUCT)", + "CREATE TABLE t (col1 STRUCT, col2 STRUCT)", + ); + + // Deeply nested structs + databricks().one_statement_parses_to( + "CREATE TABLE t (col1 STRUCT>)", + "CREATE TABLE t (col1 STRUCT>)", + ); + + // Struct with array field + databricks().one_statement_parses_to( + "CREATE TABLE t (col1 STRUCT, name: STRING>)", + "CREATE TABLE t (col1 STRUCT, name STRING>)", + ); + + // Syntax without colons should also work (BigQuery compatible) + databricks().verified_stmt("CREATE TABLE t (col1 STRUCT)"); + + // Verify AST structure + match databricks().one_statement_parses_to( + "CREATE TABLE t (col1 STRUCT)", + "CREATE TABLE t (col1 STRUCT)", + ) { + Statement::CreateTable(CreateTable { columns, .. }) => { + assert_eq!(columns.len(), 1); + assert_eq!(columns[0].name.to_string(), "col1"); + match &columns[0].data_type { + DataType::Struct(fields, StructBracketKind::AngleBrackets) => { + assert_eq!(fields.len(), 2); + assert_eq!( + fields[0].field_name.as_ref().map(|i| i.to_string()), + Some("field1".to_string()) + ); + assert_eq!(fields[0].field_type, DataType::String(None)); + assert_eq!( + fields[1].field_name.as_ref().map(|i| i.to_string()), + Some("field2".to_string()) + ); + assert_eq!(fields[1].field_type, DataType::Int(None)); + } + _ => unreachable!(), + } + } + _ => unreachable!(), + } +} From 03f00cdb2724d482ac7f909c9e79080942b8bce3 Mon Sep 17 00:00:00 2001 From: Luca Cappelletti Date: Tue, 10 Feb 2026 12:09:18 +0100 Subject: [PATCH 070/121] Fix panic in `SET AUTHORIZATION` parsing when scope modifier is missing (#2201) --- src/parser/mod.rs | 11 ++++++++++- tests/sqlparser_common.rs | 10 ++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index c40ed427ea..8f3ae38fc0 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -14546,6 +14546,15 @@ impl<'a> Parser<'a> { } .into()); } else if self.parse_keyword(Keyword::AUTHORIZATION) { + let scope = match scope { + Some(s) => s, + None => { + return self.expected_at( + "SESSION, LOCAL, or other scope modifier before AUTHORIZATION", + self.get_current_index(), + ) + } + }; let auth_value = if self.parse_keyword(Keyword::DEFAULT) { SetSessionAuthorizationParamKind::Default } else { @@ -14553,7 +14562,7 @@ impl<'a> Parser<'a> { SetSessionAuthorizationParamKind::User(value) }; return Ok(Set::SetSessionAuthorization(SetSessionAuthorizationParam { - scope: scope.expect("SET ... AUTHORIZATION must have a scope"), + scope, kind: auth_value, }) .into()); diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 3c32e627c4..899dba8dde 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -18333,6 +18333,16 @@ fn test_parse_set_session_authorization() { ); } +#[test] +fn test_set_authorization_without_scope_errors() { + // This should return a parser error, not panic. + let res = parse_sql_statements("SET AUTHORIZATION TIME TIME"); + assert!( + res.is_err(), + "SET AUTHORIZATION without a scope modifier (e.g. SESSION) should error" + ); +} + #[test] fn parse_select_parenthesized_wildcard() { // Test SELECT DISTINCT(*) which uses a parenthesized wildcard From 75f6f4b17ae316694e877ce15ee81f692ba297a3 Mon Sep 17 00:00:00 2001 From: finchxxia <13153363548@163.com> Date: Tue, 10 Feb 2026 19:18:57 +0800 Subject: [PATCH 071/121] Snowflake: Add multi table insert support (#2148) --- src/ast/dml.rs | 198 +++++++++++++++++++-- src/ast/mod.rs | 4 +- src/ast/spans.rs | 8 +- src/dialect/snowflake.rs | 201 +++++++++++++++++++++- src/parser/mod.rs | 4 + tests/sqlparser_postgres.rs | 12 ++ tests/sqlparser_snowflake.rs | 321 +++++++++++++++++++++++++++++++++++ 7 files changed, 733 insertions(+), 15 deletions(-) diff --git a/src/ast/dml.rs b/src/ast/dml.rs index 4c36f70599..f9c8823a22 100644 --- a/src/ast/dml.rs +++ b/src/ast/dml.rs @@ -96,10 +96,35 @@ pub struct Insert { /// /// [ClickHouse formats JSON insert](https://clickhouse.com/docs/en/interfaces/formats#json-inserting-data) pub format_clause: Option, + /// For Snowflake multi-table insert: specifies the type (`ALL` or `FIRST`) + /// + /// - `None` means this is a regular single-table INSERT + /// - `Some(All)` means `INSERT ALL` (all matching WHEN clauses are executed) + /// - `Some(First)` means `INSERT FIRST` (only the first matching WHEN clause is executed) + /// + /// See: + pub multi_table_insert_type: Option, + /// For multi-table insert: additional INTO clauses (unconditional) + /// + /// Used for `INSERT ALL INTO t1 INTO t2 ... SELECT ...` + /// + /// See: + pub multi_table_into_clauses: Vec, + /// For conditional multi-table insert: WHEN clauses + /// + /// Used for `INSERT ALL/FIRST WHEN cond THEN INTO t1 ... SELECT ...` + /// + /// See: + pub multi_table_when_clauses: Vec, + /// For conditional multi-table insert: ELSE clause + /// + /// See: + pub multi_table_else_clause: Option>, } impl Display for Insert { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + // SQLite OR conflict has a special format: INSERT OR ... INTO table_name let table_name = if let Some(alias) = &self.table_alias { format!("{0} AS {alias}", self.table) } else { @@ -126,29 +151,46 @@ impl Display for Insert { write!(f, " {hint}")?; } if let Some(priority) = self.priority { - write!(f, " {priority}",)?; + write!(f, " {priority}")?; } - write!( - f, - "{ignore}{over}{int}{tbl} {table_name} ", - table_name = table_name, - ignore = if self.ignore { " IGNORE" } else { "" }, - over = if self.overwrite { " OVERWRITE" } else { "" }, - int = if self.into { " INTO" } else { "" }, - tbl = if self.has_table_keyword { " TABLE" } else { "" }, - )?; + if self.ignore { + write!(f, " IGNORE")?; + } + + if self.overwrite { + write!(f, " OVERWRITE")?; + } + + if let Some(insert_type) = &self.multi_table_insert_type { + write!(f, " {}", insert_type)?; + } + + if self.into { + write!(f, " INTO")?; + } + + if self.has_table_keyword { + write!(f, " TABLE")?; + } + + if !table_name.is_empty() { + write!(f, " {table_name} ")?; + } } + if !self.columns.is_empty() { write!(f, "({})", display_comma_separated(&self.columns))?; SpaceOrNewline.fmt(f)?; } + if let Some(ref parts) = self.partitioned { if !parts.is_empty() { write!(f, "PARTITION ({})", display_comma_separated(parts))?; SpaceOrNewline.fmt(f)?; } } + if !self.after_columns.is_empty() { write!(f, "({})", display_comma_separated(&self.after_columns))?; SpaceOrNewline.fmt(f)?; @@ -159,7 +201,31 @@ impl Display for Insert { SpaceOrNewline.fmt(f)?; } + for into_clause in &self.multi_table_into_clauses { + SpaceOrNewline.fmt(f)?; + write!(f, "{}", into_clause)?; + } + + for when_clause in &self.multi_table_when_clauses { + SpaceOrNewline.fmt(f)?; + write!(f, "{}", when_clause)?; + } + + if let Some(else_clauses) = &self.multi_table_else_clause { + SpaceOrNewline.fmt(f)?; + write!(f, "ELSE")?; + for into_clause in else_clauses { + SpaceOrNewline.fmt(f)?; + write!(f, "{}", into_clause)?; + } + } + if let Some(source) = &self.source { + if !self.multi_table_into_clauses.is_empty() + || !self.multi_table_when_clauses.is_empty() + { + SpaceOrNewline.fmt(f)?; + } source.fmt(f)?; } else if !self.assignments.is_empty() { write!(f, "SET")?; @@ -189,6 +255,7 @@ impl Display for Insert { f.write_str("RETURNING")?; indented_list(f, returning)?; } + Ok(()) } } @@ -695,3 +762,114 @@ impl fmt::Display for OutputClause { } } } + +/// A WHEN clause in a conditional multi-table INSERT. +/// +/// Syntax: +/// ```sql +/// WHEN n1 > 100 THEN +/// INTO t1 +/// INTO t2 (c1, c2) VALUES (n1, n2) +/// ``` +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct MultiTableInsertWhenClause { + /// The condition for this WHEN clause + pub condition: Expr, + /// The INTO clauses to execute when the condition is true + pub into_clauses: Vec, +} + +impl Display for MultiTableInsertWhenClause { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "WHEN {} THEN", self.condition)?; + for into_clause in &self.into_clauses { + SpaceOrNewline.fmt(f)?; + write!(f, "{}", into_clause)?; + } + Ok(()) + } +} + +/// An INTO clause in a multi-table INSERT. +/// +/// Syntax: +/// ```sql +/// INTO [ ( [ , ... ] ) ] [ VALUES ( { | DEFAULT | NULL } [ , ... ] ) ] +/// ``` +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct MultiTableInsertIntoClause { + /// The target table + pub table_name: ObjectName, + /// The target columns (optional) + pub columns: Vec, + /// The VALUES clause (optional) + pub values: Option, +} + +impl Display for MultiTableInsertIntoClause { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "INTO {}", self.table_name)?; + if !self.columns.is_empty() { + write!(f, " ({})", display_comma_separated(&self.columns))?; + } + if let Some(values) = &self.values { + write!(f, " VALUES ({})", display_comma_separated(&values.values))?; + } + Ok(()) + } +} + +/// The VALUES clause in a multi-table INSERT INTO clause. +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct MultiTableInsertValues { + /// The values to insert (can be column references, DEFAULT, or NULL) + pub values: Vec, +} + +/// A value in a multi-table INSERT VALUES clause. +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum MultiTableInsertValue { + /// A column reference or expression from the source + Expr(Expr), + /// The DEFAULT keyword + Default, +} + +impl Display for MultiTableInsertValue { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + MultiTableInsertValue::Expr(expr) => write!(f, "{}", expr), + MultiTableInsertValue::Default => write!(f, "DEFAULT"), + } + } +} + +/// The type of multi-table INSERT statement(Snowflake). +/// +/// See: +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum MultiTableInsertType { + /// `INSERT ALL` - all matching WHEN clauses are executed + All, + /// `INSERT FIRST` - only the first matching WHEN clause is executed + First, +} + +impl Display for MultiTableInsertType { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + MultiTableInsertType::All => write!(f, "ALL"), + MultiTableInsertType::First => write!(f, "FIRST"), + } + } +} diff --git a/src/ast/mod.rs b/src/ast/mod.rs index cc34ec7a23..65568b77c6 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -83,7 +83,9 @@ pub use self::ddl::{ }; pub use self::dml::{ Delete, Insert, Merge, MergeAction, MergeClause, MergeClauseKind, MergeInsertExpr, - MergeInsertKind, MergeUpdateExpr, OutputClause, Update, + MergeInsertKind, MergeUpdateExpr, MultiTableInsertIntoClause, MultiTableInsertType, + MultiTableInsertValue, MultiTableInsertValues, MultiTableInsertWhenClause, OutputClause, + Update, }; pub use self::operator::{BinaryOperator, UnaryOperator}; pub use self::query::{ diff --git a/src/ast/spans.rs b/src/ast/spans.rs index bdd430e7a6..8354cea6d9 100644 --- a/src/ast/spans.rs +++ b/src/ast/spans.rs @@ -1312,8 +1312,12 @@ impl Spanned for Insert { priority: _, // todo, mysql specific insert_alias: _, // todo, mysql specific assignments, - settings: _, // todo, clickhouse specific - format_clause: _, // todo, clickhouse specific + settings: _, // todo, clickhouse specific + format_clause: _, // todo, clickhouse specific + multi_table_insert_type: _, // snowflake multi-table insert + multi_table_into_clauses: _, // snowflake multi-table insert + multi_table_when_clauses: _, // snowflake multi-table insert + multi_table_else_clause: _, // snowflake multi-table insert } = self; union_spans( diff --git a/src/dialect/snowflake.rs b/src/dialect/snowflake.rs index 3b6fa1c296..14e4ad45a1 100644 --- a/src/dialect/snowflake.rs +++ b/src/dialect/snowflake.rs @@ -31,13 +31,16 @@ use crate::ast::{ ColumnPolicy, ColumnPolicyProperty, ContactEntry, CopyIntoSnowflakeKind, CreateTable, CreateTableLikeKind, DollarQuotedString, Ident, IdentityParameters, IdentityProperty, IdentityPropertyFormatKind, IdentityPropertyKind, IdentityPropertyOrder, InitializeKind, - ObjectName, ObjectNamePart, RefreshModeKind, RowAccessPolicy, ShowObjects, SqlOption, - Statement, StorageSerializationPolicy, TagsColumnOption, Value, WrappedCollection, + Insert, MultiTableInsertIntoClause, MultiTableInsertType, MultiTableInsertValue, + MultiTableInsertValues, MultiTableInsertWhenClause, ObjectName, ObjectNamePart, + RefreshModeKind, RowAccessPolicy, ShowObjects, SqlOption, Statement, + StorageSerializationPolicy, TableObject, TagsColumnOption, Value, WrappedCollection, }; use crate::dialect::{Dialect, Precedence}; use crate::keywords::Keyword; use crate::parser::{IsOptional, Parser, ParserError}; use crate::tokenizer::Token; +use crate::tokenizer::TokenWithSpan; #[cfg(not(feature = "std"))] use alloc::boxed::Box; #[cfg(not(feature = "std"))] @@ -353,6 +356,33 @@ impl Dialect for SnowflakeDialect { parser.prev_token(); } + // Check for multi-table INSERT + // `INSERT [OVERWRITE] ALL ... or INSERT [OVERWRITE] FIRST ...` + if parser.parse_keyword(Keyword::INSERT) { + let insert_token = parser.get_current_token().clone(); + let overwrite = parser.parse_keyword(Keyword::OVERWRITE); + + // Check for ALL or FIRST keyword + if let Some(kw) = parser.parse_one_of_keywords(&[Keyword::ALL, Keyword::FIRST]) { + let multi_table_insert_type = match kw { + Keyword::FIRST => MultiTableInsertType::First, + _ => MultiTableInsertType::All, + }; + return Some(parse_multi_table_insert( + parser, + insert_token, + overwrite, + multi_table_insert_type, + )); + } + + // Not a multi-table insert, rewind + if overwrite { + parser.prev_token(); // rewind OVERWRITE + } + parser.prev_token(); // rewind INSERT + } + None } @@ -1678,3 +1708,170 @@ fn parse_show_objects(terse: bool, parser: &mut Parser) -> Result +/// +/// -- Conditional multi-table insert +/// INSERT [ OVERWRITE ] { FIRST | ALL } +/// { WHEN THEN intoClause [ ... ] } +/// [ ... ] +/// [ ELSE intoClause ] +/// +/// ``` +/// +/// See: +fn parse_multi_table_insert( + parser: &mut Parser, + insert_token: TokenWithSpan, + overwrite: bool, + multi_table_insert_type: MultiTableInsertType, +) -> Result { + // Check if this is conditional (has WHEN clauses) or unconditional (direct INTO clauses) + let is_conditional = parser.peek_keyword(Keyword::WHEN); + + let (multi_table_into_clauses, multi_table_when_clauses, multi_table_else_clause) = + if is_conditional { + // Conditional multi-table insert: WHEN clauses + let (when_clauses, else_clause) = parse_multi_table_insert_when_clauses(parser)?; + (vec![], when_clauses, else_clause) + } else { + // Unconditional multi-table insert: direct INTO clauses + let into_clauses = parse_multi_table_insert_into_clauses(parser)?; + (into_clauses, vec![], None) + }; + + // Parse the source query + let source = parser.parse_query()?; + + Ok(Statement::Insert(Insert { + insert_token: insert_token.into(), + optimizer_hint: None, + or: None, + ignore: false, + into: false, + table: TableObject::TableName(ObjectName(vec![])), // Not used for multi-table insert + table_alias: None, + columns: vec![], + overwrite, + source: Some(source), + assignments: vec![], + partitioned: None, + after_columns: vec![], + has_table_keyword: false, + on: None, + returning: None, + replace_into: false, + priority: None, + insert_alias: None, + settings: None, + format_clause: None, + multi_table_insert_type: Some(multi_table_insert_type), + multi_table_into_clauses, + multi_table_when_clauses, + multi_table_else_clause, + })) +} + +/// Parse one or more INTO clauses for multi-table INSERT. +fn parse_multi_table_insert_into_clauses( + parser: &mut Parser, +) -> Result, ParserError> { + let mut into_clauses = vec![]; + while parser.parse_keyword(Keyword::INTO) { + into_clauses.push(parse_multi_table_insert_into_clause(parser)?); + } + if into_clauses.is_empty() { + return parser.expected("INTO clause in multi-table INSERT", parser.peek_token()); + } + Ok(into_clauses) +} + +/// Parse a single INTO clause for multi-table INSERT. +/// +/// Syntax: `INTO
[ ( ) ] [ VALUES ( ) ]` +fn parse_multi_table_insert_into_clause( + parser: &mut Parser, +) -> Result { + let table_name = parser.parse_object_name(false)?; + + // Parse optional column list: ( [, ...] ) + let columns = parser + .maybe_parse(|p| p.parse_parenthesized_column_list(IsOptional::Mandatory, false))? + .unwrap_or_default(); + + // Parse optional VALUES clause + let values = if parser.parse_keyword(Keyword::VALUES) { + parser.expect_token(&Token::LParen)?; + let values = parser.parse_comma_separated(parse_multi_table_insert_value)?; + parser.expect_token(&Token::RParen)?; + Some(MultiTableInsertValues { values }) + } else { + None + }; + + Ok(MultiTableInsertIntoClause { + table_name, + columns, + values, + }) +} + +/// Parse a single value in a multi-table INSERT VALUES clause. +fn parse_multi_table_insert_value( + parser: &mut Parser, +) -> Result { + if parser.parse_keyword(Keyword::DEFAULT) { + Ok(MultiTableInsertValue::Default) + } else { + Ok(MultiTableInsertValue::Expr(parser.parse_expr()?)) + } +} + +/// Parse WHEN clauses for conditional multi-table INSERT. +fn parse_multi_table_insert_when_clauses( + parser: &mut Parser, +) -> Result< + ( + Vec, + Option>, + ), + ParserError, +> { + let mut when_clauses = vec![]; + let mut else_clause = None; + + // Parse WHEN clauses + while parser.parse_keyword(Keyword::WHEN) { + let condition = parser.parse_expr()?; + parser.expect_keyword(Keyword::THEN)?; + + // Parse INTO clauses for this WHEN + let into_clauses = parse_multi_table_insert_into_clauses(parser)?; + + when_clauses.push(MultiTableInsertWhenClause { + condition, + into_clauses, + }); + } + + // Parse optional ELSE clause + if parser.parse_keyword(Keyword::ELSE) { + else_clause = Some(parse_multi_table_insert_into_clauses(parser)?); + } + + if when_clauses.is_empty() { + return parser.expected( + "at least one WHEN clause in conditional multi-table INSERT", + parser.peek_token(), + ); + } + + Ok((when_clauses, else_clause)) +} diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 8f3ae38fc0..2d702978cf 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -17171,6 +17171,10 @@ impl<'a> Parser<'a> { insert_alias, settings, format_clause, + multi_table_insert_type: None, + multi_table_into_clauses: vec![], + multi_table_when_clauses: vec![], + multi_table_else_clause: None, } .into()) } diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index cfb03737c9..f8c7381366 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -5454,6 +5454,10 @@ fn test_simple_postgres_insert_with_alias() { insert_alias: None, settings: None, format_clause: None, + multi_table_insert_type: None, + multi_table_into_clauses: vec![], + multi_table_when_clauses: vec![], + multi_table_else_clause: None, }) ) } @@ -5529,6 +5533,10 @@ fn test_simple_postgres_insert_with_alias() { insert_alias: None, settings: None, format_clause: None, + multi_table_insert_type: None, + multi_table_into_clauses: vec![], + multi_table_when_clauses: vec![], + multi_table_else_clause: None, }) ) } @@ -5602,6 +5610,10 @@ fn test_simple_insert_with_quoted_alias() { insert_alias: None, settings: None, format_clause: None, + multi_table_insert_type: None, + multi_table_into_clauses: vec![], + multi_table_when_clauses: vec![], + multi_table_else_clause: None, }) ) } diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index 014a241fac..222a9e533b 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -3421,6 +3421,327 @@ fn test_subquery_sample() { .verified_stmt("SELECT * FROM (SELECT * FROM mytable) SAMPLE (10) SEED (42)"); } +#[test] +fn test_multi_table_insert_unconditional() { + // Basic unconditional multi-table insert + // See: https://docs.snowflake.com/en/sql-reference/sql/insert-multi-table + snowflake().verified_stmt("INSERT ALL INTO t1 SELECT n1, n2, n3 FROM src"); + + // Multiple INTO clauses + snowflake().verified_stmt("INSERT ALL INTO t1 INTO t2 SELECT n1, n2, n3 FROM src"); + + // With column list + snowflake().verified_stmt("INSERT ALL INTO t1 (c1, c2, c3) SELECT n1, n2, n3 FROM src"); + + // With VALUES clause + snowflake().verified_stmt( + "INSERT ALL INTO t1 (c1, c2, c3) VALUES (n2, n1, DEFAULT) SELECT n1, n2, n3 FROM src", + ); + + // Complex example from Snowflake docs + snowflake().verified_stmt( + "INSERT ALL INTO t1 INTO t1 (c1, c2, c3) VALUES (n2, n1, DEFAULT) INTO t2 (c1, c2, c3) INTO t2 VALUES (n3, n2, n1) SELECT n1, n2, n3 FROM src" + ); + + // With OVERWRITE + snowflake().verified_stmt("INSERT OVERWRITE ALL INTO t1 INTO t2 SELECT n1, n2, n3 FROM src"); +} + +#[test] +fn test_multi_table_insert_conditional() { + // Basic conditional multi-table insert with WHEN clause + // See: https://docs.snowflake.com/en/sql-reference/sql/insert-multi-table + snowflake().verified_stmt("INSERT ALL WHEN n1 > 100 THEN INTO t1 SELECT n1 FROM src"); + + // Multiple WHEN clauses + snowflake().verified_stmt( + "INSERT ALL WHEN n1 > 100 THEN INTO t1 WHEN n1 > 10 THEN INTO t2 SELECT n1 FROM src", + ); + + // WHEN with multiple INTO clauses + snowflake().verified_stmt("INSERT ALL WHEN n1 > 10 THEN INTO t1 INTO t2 SELECT n1 FROM src"); + + // With ELSE clause + snowflake() + .verified_stmt("INSERT ALL WHEN n1 > 100 THEN INTO t1 ELSE INTO t2 SELECT n1 FROM src"); + + // Complex conditional insert from Snowflake docs + snowflake().verified_stmt( + "INSERT ALL WHEN n1 > 100 THEN INTO t1 WHEN n1 > 10 THEN INTO t1 INTO t2 ELSE INTO t2 SELECT n1 FROM src" + ); + + // INSERT FIRST - only first matching WHEN clause executes + snowflake().verified_stmt( + "INSERT FIRST WHEN n1 > 100 THEN INTO t1 WHEN n1 > 10 THEN INTO t1 INTO t2 ELSE INTO t2 SELECT n1 FROM src" + ); + + // With OVERWRITE + snowflake().verified_stmt( + "INSERT OVERWRITE ALL WHEN n1 > 100 THEN INTO t1 ELSE INTO t2 SELECT n1 FROM src", + ); + + // WHEN with always-true condition + snowflake().verified_stmt("INSERT ALL WHEN 1 = 1 THEN INTO t1 SELECT n1 FROM src"); +} + +#[test] +fn test_multi_table_insert_with_values() { + // INTO clause with VALUES using column references + snowflake().verified_stmt("INSERT ALL INTO t1 VALUES (n1, n2) SELECT n1, n2 FROM src"); + + // INTO clause with VALUES using DEFAULT + snowflake().verified_stmt( + "INSERT ALL INTO t1 (c1, c2, c3) VALUES (n1, n2, DEFAULT) SELECT n1, n2 FROM src", + ); + + // INTO clause with VALUES using NULL + snowflake().verified_stmt( + "INSERT ALL INTO t1 (c1, c2, c3) VALUES (n1, NULL, n2) SELECT n1, n2 FROM src", + ); + + // Positional alias in VALUES + snowflake().verified_stmt("INSERT ALL INTO t1 VALUES ($1, $2) SELECT 1, 50 AS an_alias"); +} + +/// Unit tests for multi-table INSERT AST structure validation +#[test] +fn test_multi_table_insert_ast_unconditional() { + // Test basic unconditional multi-table insert AST + let sql = "INSERT ALL INTO t1 INTO t2 (c1, c2) SELECT n1, n2 FROM src"; + let stmt = snowflake().verified_stmt(sql); + + match stmt { + Statement::Insert(Insert { + multi_table_insert_type, + overwrite, + multi_table_into_clauses, + multi_table_when_clauses, + multi_table_else_clause, + source, + .. + }) => { + // Should be INSERT ALL (not FIRST) + assert_eq!(multi_table_insert_type, Some(MultiTableInsertType::All)); + assert!(!overwrite); + + // Should have 2 INTO clauses + assert_eq!(multi_table_into_clauses.len(), 2); + + // First INTO clause: INTO t1 + assert_eq!(multi_table_into_clauses[0].table_name.to_string(), "t1"); + assert!(multi_table_into_clauses[0].columns.is_empty()); + assert!(multi_table_into_clauses[0].values.is_none()); + + // Second INTO clause: INTO t2 (c1, c2) + assert_eq!(multi_table_into_clauses[1].table_name.to_string(), "t2"); + assert_eq!(multi_table_into_clauses[1].columns.len(), 2); + assert_eq!(multi_table_into_clauses[1].columns[0].to_string(), "c1"); + assert_eq!(multi_table_into_clauses[1].columns[1].to_string(), "c2"); + assert!(multi_table_into_clauses[1].values.is_none()); + + // No WHEN clauses for unconditional insert + assert!(multi_table_when_clauses.is_empty()); + assert!(multi_table_else_clause.is_none()); + + // Should have source query + assert!(source.is_some()); + } + _ => panic!("Expected INSERT statement"), + } +} + +#[test] +fn test_multi_table_insert_ast_with_values() { + // Test INTO clause with VALUES + let sql = "INSERT ALL INTO t1 (c1, c2, c3) VALUES (n1, n2, DEFAULT) SELECT n1, n2 FROM src"; + let stmt = snowflake().verified_stmt(sql); + + match stmt { + Statement::Insert(Insert { + multi_table_into_clauses, + .. + }) => { + assert_eq!(multi_table_into_clauses.len(), 1); + + let into_clause = &multi_table_into_clauses[0]; + assert_eq!(into_clause.table_name.to_string(), "t1"); + assert_eq!(into_clause.columns.len(), 3); + + // Check VALUES clause + let values = into_clause.values.as_ref().expect("Expected VALUES clause"); + assert_eq!(values.values.len(), 3); + + // First value: n1 (expression) + match &values.values[0] { + MultiTableInsertValue::Expr(expr) => { + assert_eq!(expr.to_string(), "n1"); + } + _ => panic!("Expected Expr"), + } + + // Second value: n2 (expression) + match &values.values[1] { + MultiTableInsertValue::Expr(expr) => { + assert_eq!(expr.to_string(), "n2"); + } + _ => panic!("Expected Expr"), + } + + // Third value: DEFAULT + match &values.values[2] { + MultiTableInsertValue::Default => {} + _ => panic!("Expected DEFAULT"), + } + } + _ => panic!("Expected INSERT statement"), + } +} + +#[test] +fn test_multi_table_insert_ast_conditional() { + // Test conditional multi-table insert with WHEN clauses + let sql = "INSERT ALL WHEN n1 > 100 THEN INTO t1 WHEN n1 > 10 THEN INTO t2 INTO t3 ELSE INTO t4 SELECT n1 FROM src"; + let stmt = snowflake().verified_stmt(sql); + + match stmt { + Statement::Insert(Insert { + multi_table_insert_type, + multi_table_into_clauses, + multi_table_when_clauses, + multi_table_else_clause, + .. + }) => { + // Should be INSERT ALL (not FIRST) + assert_eq!(multi_table_insert_type, Some(MultiTableInsertType::All)); + + // Unconditional INTO clauses should be empty for conditional insert + assert!(multi_table_into_clauses.is_empty()); + + // Should have 2 WHEN clauses + assert_eq!(multi_table_when_clauses.len(), 2); + + // First WHEN clause: WHEN n1 > 100 THEN INTO t1 + assert_eq!( + multi_table_when_clauses[0].condition.to_string(), + "n1 > 100" + ); + assert_eq!(multi_table_when_clauses[0].into_clauses.len(), 1); + assert_eq!( + multi_table_when_clauses[0].into_clauses[0] + .table_name + .to_string(), + "t1" + ); + + // Second WHEN clause: WHEN n1 > 10 THEN INTO t2 INTO t3 + assert_eq!(multi_table_when_clauses[1].condition.to_string(), "n1 > 10"); + assert_eq!(multi_table_when_clauses[1].into_clauses.len(), 2); + assert_eq!( + multi_table_when_clauses[1].into_clauses[0] + .table_name + .to_string(), + "t2" + ); + assert_eq!( + multi_table_when_clauses[1].into_clauses[1] + .table_name + .to_string(), + "t3" + ); + + // ELSE clause: ELSE INTO t4 + let else_clause = multi_table_else_clause.expect("Expected ELSE clause"); + assert_eq!(else_clause.len(), 1); + assert_eq!(else_clause[0].table_name.to_string(), "t4"); + } + _ => panic!("Expected INSERT statement"), + } +} + +#[test] +fn test_multi_table_insert_ast_first() { + // Test INSERT FIRST vs INSERT ALL + let sql = + "INSERT FIRST WHEN n1 > 100 THEN INTO t1 WHEN n1 > 10 THEN INTO t2 SELECT n1 FROM src"; + let stmt = snowflake().verified_stmt(sql); + + match stmt { + Statement::Insert(Insert { + multi_table_insert_type, + multi_table_when_clauses, + .. + }) => { + // Should be INSERT FIRST + assert_eq!(multi_table_insert_type, Some(MultiTableInsertType::First)); + assert_eq!(multi_table_when_clauses.len(), 2); + } + _ => panic!("Expected INSERT statement"), + } +} + +#[test] +fn test_multi_table_insert_ast_overwrite() { + // Test INSERT OVERWRITE ALL + let sql = "INSERT OVERWRITE ALL INTO t1 INTO t2 SELECT n1 FROM src"; + let stmt = snowflake().verified_stmt(sql); + + match stmt { + Statement::Insert(Insert { + overwrite, + multi_table_insert_type, + multi_table_into_clauses, + .. + }) => { + assert!(overwrite); + assert_eq!(multi_table_insert_type, Some(MultiTableInsertType::All)); + assert_eq!(multi_table_into_clauses.len(), 2); + } + _ => panic!("Expected INSERT statement"), + } +} + +#[test] +fn test_multi_table_insert_ast_complex_values() { + // Test complex VALUES with expressions + let sql = "INSERT ALL INTO t1 VALUES (n1 + n2, n3 * 2, DEFAULT) SELECT n1, n2, n3 FROM src"; + let stmt = snowflake().verified_stmt(sql); + + match stmt { + Statement::Insert(Insert { + multi_table_into_clauses, + .. + }) => { + assert_eq!(multi_table_into_clauses.len(), 1); + + let values = multi_table_into_clauses[0] + .values + .as_ref() + .expect("Expected VALUES"); + assert_eq!(values.values.len(), 3); + + // First value: n1 + n2 (binary expression) + match &values.values[0] { + MultiTableInsertValue::Expr(Expr::BinaryOp { op, .. }) => { + assert_eq!(*op, BinaryOperator::Plus); + } + _ => panic!("Expected BinaryOp expression"), + } + + // Second value: n3 * 2 (binary expression) + match &values.values[1] { + MultiTableInsertValue::Expr(Expr::BinaryOp { op, .. }) => { + assert_eq!(*op, BinaryOperator::Multiply); + } + _ => panic!("Expected BinaryOp expression"), + } + + // Third value: DEFAULT + assert!(matches!(&values.values[2], MultiTableInsertValue::Default)); + } + _ => panic!("Expected INSERT statement"), + } +} + #[test] fn parse_ls_and_rm() { snowflake().one_statement_parses_to("LS @~", "LIST @~"); From 3a42e6527d9072d953dd01e2ba3e3fc91cc0063d Mon Sep 17 00:00:00 2001 From: "Guan-Ming (Wesley) Chiu" <105915352+guan404ming@users.noreply.github.com> Date: Tue, 10 Feb 2026 22:48:27 +0800 Subject: [PATCH 072/121] MSSQL: Support `THROW` statement (#2202) Signed-off-by: Guan-Ming (Wesley) Chiu <105915352+guan404ming@users.noreply.github.com> Co-authored-by: Ifeanyi Ubah <7816405+iffyio@users.noreply.github.com> --- src/ast/mod.rs | 44 ++++++++++++++++++++++++++++++++++++++++ src/ast/spans.rs | 1 + src/keywords.rs | 1 + src/parser/mod.rs | 28 +++++++++++++++++++++++++ tests/sqlparser_mssql.rs | 37 +++++++++++++++++++++++++++++++++ 5 files changed, 111 insertions(+) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 65568b77c6..601af1bd51 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -2818,6 +2818,41 @@ impl fmt::Display for RaiseStatementValue { } } +/// A MSSQL `THROW` statement. +/// +/// ```sql +/// THROW [ error_number, message, state ] +/// ``` +/// +/// [MSSQL](https://learn.microsoft.com/en-us/sql/t-sql/language-elements/throw-transact-sql) +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct ThrowStatement { + /// Error number expression. + pub error_number: Option>, + /// Error message expression. + pub message: Option>, + /// State expression. + pub state: Option>, +} + +impl fmt::Display for ThrowStatement { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let ThrowStatement { + error_number, + message, + state, + } = self; + + write!(f, "THROW")?; + if let (Some(error_number), Some(message), Some(state)) = (error_number, message, state) { + write!(f, " {error_number}, {message}, {state}")?; + } + Ok(()) + } +} + /// Represents an expression assignment within a variable `DECLARE` statement. /// /// Examples: @@ -4700,6 +4735,8 @@ pub enum Statement { /// Additional `WITH` options for RAISERROR. options: Vec, }, + /// A MSSQL `THROW` statement. + Throw(ThrowStatement), /// ```sql /// PRINT msg_str | @local_variable | string_expr /// ``` @@ -6157,6 +6194,7 @@ impl fmt::Display for Statement { } Ok(()) } + Statement::Throw(s) => write!(f, "{s}"), Statement::Print(s) => write!(f, "{s}"), Statement::Return(r) => write!(f, "{r}"), Statement::List(command) => write!(f, "LIST {command}"), @@ -11687,6 +11725,12 @@ impl From for Statement { } } +impl From for Statement { + fn from(t: ThrowStatement) -> Self { + Self::Throw(t) + } +} + impl From for Statement { fn from(f: Function) -> Self { Self::Call(f) diff --git a/src/ast/spans.rs b/src/ast/spans.rs index 8354cea6d9..f4bdf85a39 100644 --- a/src/ast/spans.rs +++ b/src/ast/spans.rs @@ -481,6 +481,7 @@ impl Spanned for Statement { Statement::UNLISTEN { .. } => Span::empty(), Statement::RenameTable { .. } => Span::empty(), Statement::RaisError { .. } => Span::empty(), + Statement::Throw(_) => Span::empty(), Statement::Print { .. } => Span::empty(), Statement::Return { .. } => Span::empty(), Statement::List(..) | Statement::Remove(..) => Span::empty(), diff --git a/src/keywords.rs b/src/keywords.rs index f84f4d213a..7950b1918c 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -1029,6 +1029,7 @@ define_keywords!( TEXT, TEXTFILE, THEN, + THROW, TIES, TIME, TIMEFORMAT, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 2d702978cf..7a2bda8aca 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -670,6 +670,10 @@ impl<'a> Parser<'a> { Keyword::RELEASE => self.parse_release(), Keyword::COMMIT => self.parse_commit(), Keyword::RAISERROR => Ok(self.parse_raiserror()?), + Keyword::THROW => { + self.prev_token(); + self.parse_throw().map(Into::into) + } Keyword::ROLLBACK => self.parse_rollback(), Keyword::ASSERT => self.parse_assert(), // `PREPARE`, `EXECUTE` and `DEALLOCATE` are Postgres-specific @@ -18296,6 +18300,30 @@ impl<'a> Parser<'a> { } } + /// Parse a MSSQL `THROW` statement. + /// + /// See [Statement::Throw] + pub fn parse_throw(&mut self) -> Result { + self.expect_keyword_is(Keyword::THROW)?; + + let error_number = self.maybe_parse(|p| p.parse_expr().map(Box::new))?; + let (message, state) = if error_number.is_some() { + self.expect_token(&Token::Comma)?; + let message = Box::new(self.parse_expr()?); + self.expect_token(&Token::Comma)?; + let state = Box::new(self.parse_expr()?); + (Some(message), Some(state)) + } else { + (None, None) + }; + + Ok(ThrowStatement { + error_number, + message, + state, + }) + } + /// Parse a SQL `DEALLOCATE` statement pub fn parse_deallocate(&mut self) -> Result { let prepare = self.parse_keyword(Keyword::PREPARE); diff --git a/tests/sqlparser_mssql.rs b/tests/sqlparser_mssql.rs index 84b8658b0e..82e6f46213 100644 --- a/tests/sqlparser_mssql.rs +++ b/tests/sqlparser_mssql.rs @@ -1665,6 +1665,43 @@ fn test_parse_raiserror() { let _ = ms().verified_stmt(sql); } +#[test] +fn test_parse_throw() { + // THROW with arguments + let sql = r#"THROW 51000, 'Record does not exist.', 1"#; + let s = ms().verified_stmt(sql); + assert_eq!( + s, + Statement::Throw(ThrowStatement { + error_number: Some(Box::new(Expr::Value( + (Value::Number("51000".parse().unwrap(), false)).with_empty_span() + ))), + message: Some(Box::new(Expr::Value( + (Value::SingleQuotedString("Record does not exist.".to_string())).with_empty_span() + ))), + state: Some(Box::new(Expr::Value( + (Value::Number("1".parse().unwrap(), false)).with_empty_span() + ))), + }) + ); + + // THROW with variable references + let sql = r#"THROW @ErrorNumber, @ErrorMessage, @ErrorState"#; + let _ = ms().verified_stmt(sql); + + // Re-throw (no arguments) + let sql = r#"THROW"#; + let s = ms().verified_stmt(sql); + assert_eq!( + s, + Statement::Throw(ThrowStatement { + error_number: None, + message: None, + state: None, + }) + ); +} + #[test] fn parse_use() { let valid_object_names = [ From 8e36e8ee85e8d24c3bd6948298a4675e341a1db3 Mon Sep 17 00:00:00 2001 From: Alex <45256796+RPG-Alex@users.noreply.github.com> Date: Tue, 10 Feb 2026 23:08:33 +0800 Subject: [PATCH 073/121] Updated Fuzzer harness to address all dialects (#2203) --- fuzz/fuzz_targets/fuzz_parse_sql.rs | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/fuzz/fuzz_targets/fuzz_parse_sql.rs b/fuzz/fuzz_targets/fuzz_parse_sql.rs index 446b036cd7..15c198cb23 100644 --- a/fuzz/fuzz_targets/fuzz_parse_sql.rs +++ b/fuzz/fuzz_targets/fuzz_parse_sql.rs @@ -16,14 +16,35 @@ // under the License. use honggfuzz::fuzz; -use sqlparser::dialect::GenericDialect; +use sqlparser::dialect::{ + AnsiDialect, BigQueryDialect, ClickHouseDialect, DatabricksDialect, DuckDbDialect, + GenericDialect, HiveDialect, MsSqlDialect, MySqlDialect, OracleDialect, PostgreSqlDialect, + RedshiftSqlDialect, SQLiteDialect, SnowflakeDialect, +}; use sqlparser::parser::Parser; fn main() { + let dialects: Vec> = vec![ + Box::new(AnsiDialect::default()), + Box::new(BigQueryDialect::default()), + Box::new(ClickHouseDialect::default()), + Box::new(DatabricksDialect::default()), + Box::new(DuckDbDialect::default()), + Box::new(GenericDialect::default()), + Box::new(HiveDialect::default()), + Box::new(MsSqlDialect::default()), + Box::new(MySqlDialect::default()), + Box::new(OracleDialect::default()), + Box::new(PostgreSqlDialect::default()), + Box::new(RedshiftSqlDialect::default()), + Box::new(SQLiteDialect::default()), + Box::new(SnowflakeDialect::default()), + ]; loop { fuzz!(|data: String| { - let dialect = GenericDialect {}; - let _ = Parser::parse_sql(&dialect, &data); + for dialect in &dialects { + let _ = Parser::parse_sql(dialect.as_ref(), &data); + } }); } } From 4b4a9d7411f7a855999069e6a0d10202cdcab7b9 Mon Sep 17 00:00:00 2001 From: Yoav Cohen <59807311+yoavcloud@users.noreply.github.com> Date: Fri, 13 Feb 2026 09:39:31 +0100 Subject: [PATCH 074/121] Snowflake: Lambda functions (#2192) --- src/ast/mod.rs | 23 +++++++++++- src/dialect/snowflake.rs | 5 +++ src/parser/mod.rs | 71 ++++++++++++++++++++++++++++------- tests/sqlparser_common.rs | 17 ++++++++- tests/sqlparser_databricks.rs | 16 +++++++- 5 files changed, 114 insertions(+), 18 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 601af1bd51..4d8c536a9b 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -1423,7 +1423,7 @@ impl fmt::Display for AccessExpr { #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct LambdaFunction { /// The parameters to the lambda function. - pub params: OneOrManyWithParens, + pub params: OneOrManyWithParens, /// The body of the lambda function. pub body: Box, /// The syntax style used to write the lambda function. @@ -1448,6 +1448,27 @@ impl fmt::Display for LambdaFunction { } } +/// A parameter to a lambda function, optionally with a data type. +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct LambdaFunctionParameter { + /// The name of the parameter + pub name: Ident, + /// The optional data type of the parameter + /// [Snowflake Syntax](https://docs.snowflake.com/en/sql-reference/functions/filter#arguments) + pub data_type: Option, +} + +impl fmt::Display for LambdaFunctionParameter { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match &self.data_type { + Some(dt) => write!(f, "{} {}", self.name, dt), + None => write!(f, "{}", self.name), + } + } +} + /// The syntax style for a lambda function. #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash, Copy)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] diff --git a/src/dialect/snowflake.rs b/src/dialect/snowflake.rs index 14e4ad45a1..d647091600 100644 --- a/src/dialect/snowflake.rs +++ b/src/dialect/snowflake.rs @@ -662,6 +662,11 @@ impl Dialect for SnowflakeDialect { fn supports_select_wildcard_rename(&self) -> bool { true } + + /// See + fn supports_lambda_functions(&self) -> bool { + true + } } // Peeks ahead to identify tokens that are expected after diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 7a2bda8aca..7dc758153e 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1606,10 +1606,34 @@ impl<'a> Parser<'a> { value: self.parse_introduced_string_expr()?.into(), }) } + // An unreserved word (likely an identifier) is followed by an arrow, + // which indicates a lambda function with a single, untyped parameter. + // For example: `a -> a * 2`. Token::Arrow if self.dialect.supports_lambda_functions() => { self.expect_token(&Token::Arrow)?; Ok(Expr::Lambda(LambdaFunction { - params: OneOrManyWithParens::One(w.to_ident(w_span)), + params: OneOrManyWithParens::One(LambdaFunctionParameter { + name: w.to_ident(w_span), + data_type: None, + }), + body: Box::new(self.parse_expr()?), + syntax: LambdaSyntax::Arrow, + })) + } + // An unreserved word (likely an identifier) that is followed by another word (likley a data type) + // which is then followed by an arrow, which indicates a lambda function with a single, typed parameter. + // For example: `a INT -> a * 2`. + Token::Word(_) + if self.dialect.supports_lambda_functions() + && self.peek_nth_token_ref(1).token == Token::Arrow => + { + let data_type = self.parse_data_type()?; + self.expect_token(&Token::Arrow)?; + Ok(Expr::Lambda(LambdaFunction { + params: OneOrManyWithParens::One(LambdaFunctionParameter { + name: w.to_ident(w_span), + data_type: Some(data_type), + }), body: Box::new(self.parse_expr()?), syntax: LambdaSyntax::Arrow, })) @@ -2195,7 +2219,7 @@ impl<'a> Parser<'a> { return Ok(None); } self.maybe_parse(|p| { - let params = p.parse_comma_separated(|p| p.parse_identifier())?; + let params = p.parse_comma_separated(|p| p.parse_lambda_function_parameter())?; p.expect_token(&Token::RParen)?; p.expect_token(&Token::Arrow)?; let expr = p.parse_expr()?; @@ -2207,7 +2231,7 @@ impl<'a> Parser<'a> { }) } - /// Parses a lambda expression using the `LAMBDA` keyword syntax. + /// Parses a lambda expression following the `LAMBDA` keyword syntax. /// /// Syntax: `LAMBDA : ` /// @@ -2217,30 +2241,49 @@ impl<'a> Parser<'a> { /// /// See fn parse_lambda_expr(&mut self) -> Result { + // Parse the parameters: either a single identifier or comma-separated identifiers + let params = self.parse_lambda_function_parameters()?; + // Expect the colon separator + self.expect_token(&Token::Colon)?; + // Parse the body expression + let body = self.parse_expr()?; + Ok(Expr::Lambda(LambdaFunction { + params, + body: Box::new(body), + syntax: LambdaSyntax::LambdaKeyword, + })) + } + + /// Parses the parameters of a lambda function with optional typing. + fn parse_lambda_function_parameters( + &mut self, + ) -> Result, ParserError> { // Parse the parameters: either a single identifier or comma-separated identifiers let params = if self.consume_token(&Token::LParen) { // Parenthesized parameters: (x, y) - let params = self.parse_comma_separated(|p| p.parse_identifier())?; + let params = self.parse_comma_separated(|p| p.parse_lambda_function_parameter())?; self.expect_token(&Token::RParen)?; OneOrManyWithParens::Many(params) } else { // Unparenthesized parameters: x or x, y - let params = self.parse_comma_separated(|p| p.parse_identifier())?; + let params = self.parse_comma_separated(|p| p.parse_lambda_function_parameter())?; if params.len() == 1 { OneOrManyWithParens::One(params.into_iter().next().unwrap()) } else { OneOrManyWithParens::Many(params) } }; - // Expect the colon separator - self.expect_token(&Token::Colon)?; - // Parse the body expression - let body = self.parse_expr()?; - Ok(Expr::Lambda(LambdaFunction { - params, - body: Box::new(body), - syntax: LambdaSyntax::LambdaKeyword, - })) + Ok(params) + } + + /// Parses a single parameter of a lambda function, with optional typing. + fn parse_lambda_function_parameter(&mut self) -> Result { + let name = self.parse_identifier()?; + let data_type = match self.peek_token().token { + Token::Word(_) => self.maybe_parse(|p| p.parse_data_type())?, + _ => None, + }; + Ok(LambdaFunctionParameter { name, data_type }) } /// Tries to parse the body of an [ODBC escaping sequence] diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 899dba8dde..5822153ac9 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -15925,7 +15925,16 @@ fn test_lambdas() { ] ), Expr::Lambda(LambdaFunction { - params: OneOrManyWithParens::Many(vec![Ident::new("p1"), Ident::new("p2")]), + params: OneOrManyWithParens::Many(vec![ + LambdaFunctionParameter { + name: Ident::new("p1"), + data_type: None + }, + LambdaFunctionParameter { + name: Ident::new("p2"), + data_type: None + } + ]), body: Box::new(Expr::Case { case_token: AttachedToken::empty(), end_token: AttachedToken::empty(), @@ -15970,6 +15979,12 @@ fn test_lambdas() { "map_zip_with(map(1, 'a', 2, 'b'), map(1, 'x', 2, 'y'), (k, v1, v2) -> concat(v1, v2))", ); dialects.verified_expr("transform(array(1, 2, 3), x -> x + 1)"); + + // Ensure all lambda variants are parsed correctly + dialects.verified_expr("a -> a * 2"); // Single parameter without type + dialects.verified_expr("a INT -> a * 2"); // Single parameter with type + dialects.verified_expr("(a, b) -> a * b"); // Multiple parameters without types + dialects.verified_expr("(a INT, b FLOAT) -> a * b"); // Multiple parameters with types } #[test] diff --git a/tests/sqlparser_databricks.rs b/tests/sqlparser_databricks.rs index 6a7534ad48..24d06ef2fd 100644 --- a/tests/sqlparser_databricks.rs +++ b/tests/sqlparser_databricks.rs @@ -72,7 +72,10 @@ fn test_databricks_exists() { ] ), Expr::Lambda(LambdaFunction { - params: OneOrManyWithParens::One(Ident::new("x")), + params: OneOrManyWithParens::One(LambdaFunctionParameter { + name: Ident::new("x"), + data_type: None + }), body: Box::new(Expr::IsNull(Box::new(Expr::Identifier(Ident::new("x"))))), syntax: LambdaSyntax::Arrow, }) @@ -109,7 +112,16 @@ fn test_databricks_lambdas() { ] ), Expr::Lambda(LambdaFunction { - params: OneOrManyWithParens::Many(vec![Ident::new("p1"), Ident::new("p2")]), + params: OneOrManyWithParens::Many(vec![ + LambdaFunctionParameter { + name: Ident::new("p1"), + data_type: None + }, + LambdaFunctionParameter { + name: Ident::new("p2"), + data_type: None + } + ]), body: Box::new(Expr::Case { case_token: AttachedToken::empty(), end_token: AttachedToken::empty(), From 6a48f44f99d015a4f7a86275e2521ea2cc45fdae Mon Sep 17 00:00:00 2001 From: Alex <45256796+RPG-Alex@users.noreply.github.com> Date: Sat, 14 Feb 2026 14:55:57 +0800 Subject: [PATCH 075/121] Updated `parse_infix(..)` in `mysql.rs` and `sqlite.rs` to handle error rather than `unwrap()` (#2207) --- src/dialect/mysql.rs | 9 +++++++-- tests/sqlparser_mysql.rs | 8 ++++++++ 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/src/dialect/mysql.rs b/src/dialect/mysql.rs index 51a43f8935..b31a2310db 100644 --- a/src/dialect/mysql.rs +++ b/src/dialect/mysql.rs @@ -102,10 +102,15 @@ impl Dialect for MySqlDialect { ) -> Option> { // Parse DIV as an operator if parser.parse_keyword(Keyword::DIV) { + let left = Box::new(expr.clone()); + let right = Box::new(match parser.parse_expr() { + Ok(expr) => expr, + Err(e) => return Some(Err(e)), + }); Some(Ok(Expr::BinaryOp { - left: Box::new(expr.clone()), + left, op: BinaryOperator::MyIntegerDivide, - right: Box::new(parser.parse_expr().unwrap()), + right, })) } else { None diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index 1b9d12f8cf..4ad0404b08 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -3627,6 +3627,14 @@ fn parse_div_infix() { mysql().verified_stmt(r#"SELECT 5 DIV 2"#); } +#[test] +fn parse_div_infix_propagates_parse_error() { + let err = mysql() + .parse_sql_statements("SELECT 5 DIV") + .expect_err("expected an error"); + assert_matches!(err, ParserError::ParserError(_)); +} + #[test] fn parse_drop_temporary_table() { let sql = "DROP TEMPORARY TABLE foo"; From 798fbe4b934bd6409839f75e88efb5ca98e5abb9 Mon Sep 17 00:00:00 2001 From: "Guan-Ming (Wesley) Chiu" <105915352+guan404ming@users.noreply.github.com> Date: Sat, 14 Feb 2026 15:00:24 +0800 Subject: [PATCH 076/121] MSSQL: Add support for TRAN shorthand (#2212) Signed-off-by: Guan-Ming (Wesley) Chiu <105915352+guan404ming@users.noreply.github.com> --- src/ast/mod.rs | 6 +++++- src/dialect/mssql.rs | 8 ++++++-- src/keywords.rs | 1 + src/parser/mod.rs | 15 +++++++++------ tests/sqlparser_mssql.rs | 28 ++++++++++++++++++++++++++++ 5 files changed, 49 insertions(+), 9 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 4d8c536a9b..eda2822600 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -6390,7 +6390,7 @@ impl Display for CascadeOption { } } -/// Transaction started with [ TRANSACTION | WORK ] +/// Transaction started with [ TRANSACTION | WORK | TRAN ] #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] @@ -6399,6 +6399,9 @@ pub enum BeginTransactionKind { Transaction, /// Alternate `WORK` keyword. Work, + /// MSSQL shorthand `TRAN` keyword. + /// See + Tran, } impl Display for BeginTransactionKind { @@ -6406,6 +6409,7 @@ impl Display for BeginTransactionKind { match self { BeginTransactionKind::Transaction => write!(f, "TRANSACTION"), BeginTransactionKind::Work => write!(f, "WORK"), + BeginTransactionKind::Tran => write!(f, "TRAN"), } } } diff --git a/src/dialect/mssql.rs b/src/dialect/mssql.rs index e763165d51..a2127f0da5 100644 --- a/src/dialect/mssql.rs +++ b/src/dialect/mssql.rs @@ -151,8 +151,12 @@ impl Dialect for MsSqlDialect { let is_block = parser .maybe_parse(|p| { if p.parse_transaction_modifier().is_some() - || p.parse_one_of_keywords(&[Keyword::TRANSACTION, Keyword::WORK]) - .is_some() + || p.parse_one_of_keywords(&[ + Keyword::TRANSACTION, + Keyword::WORK, + Keyword::TRAN, + ]) + .is_some() || matches!(p.peek_token_ref().token, Token::SemiColon | Token::EOF) { p.expected("statement", p.peek_token()) diff --git a/src/keywords.rs b/src/keywords.rs index 7950b1918c..f1dbcd9378 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -1051,6 +1051,7 @@ define_keywords!( TOTP, TRACE, TRAILING, + TRAN, TRANSACTION, TRANSIENT, TRANSLATE, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 7dc758153e..1c20014d5b 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -18142,11 +18142,14 @@ impl<'a> Parser<'a> { /// Parse a 'BEGIN' statement pub fn parse_begin(&mut self) -> Result { let modifier = self.parse_transaction_modifier(); - let transaction = match self.parse_one_of_keywords(&[Keyword::TRANSACTION, Keyword::WORK]) { - Some(Keyword::TRANSACTION) => Some(BeginTransactionKind::Transaction), - Some(Keyword::WORK) => Some(BeginTransactionKind::Work), - _ => None, - }; + let transaction = + match self.parse_one_of_keywords(&[Keyword::TRANSACTION, Keyword::WORK, Keyword::TRAN]) + { + Some(Keyword::TRANSACTION) => Some(BeginTransactionKind::Transaction), + Some(Keyword::WORK) => Some(BeginTransactionKind::Work), + Some(Keyword::TRAN) => Some(BeginTransactionKind::Tran), + _ => None, + }; Ok(Statement::StartTransaction { modes: self.parse_transaction_modes()?, begin: true, @@ -18280,7 +18283,7 @@ impl<'a> Parser<'a> { /// Parse an optional `AND [NO] CHAIN` clause for `COMMIT` and `ROLLBACK` statements pub fn parse_commit_rollback_chain(&mut self) -> Result { - let _ = self.parse_one_of_keywords(&[Keyword::TRANSACTION, Keyword::WORK]); + let _ = self.parse_one_of_keywords(&[Keyword::TRANSACTION, Keyword::WORK, Keyword::TRAN]); if self.parse_keyword(Keyword::AND) { let chain = !self.parse_keyword(Keyword::NO); self.expect_keyword_is(Keyword::CHAIN)?; diff --git a/tests/sqlparser_mssql.rs b/tests/sqlparser_mssql.rs index 82e6f46213..cf9ae89809 100644 --- a/tests/sqlparser_mssql.rs +++ b/tests/sqlparser_mssql.rs @@ -2665,3 +2665,31 @@ fn parse_mssql_begin_end_block() { _ => panic!("Expected StartTransaction, got: {stmt:?}"), } } + +/// MSSQL supports `TRAN` as shorthand for `TRANSACTION`. +/// See +#[test] +fn parse_mssql_tran_shorthand() { + // BEGIN TRAN + let sql = "BEGIN TRAN"; + let stmt = ms().verified_stmt(sql); + match &stmt { + Statement::StartTransaction { + begin, + transaction, + has_end_keyword, + .. + } => { + assert!(begin); + assert_eq!(*transaction, Some(BeginTransactionKind::Tran)); + assert!(!has_end_keyword); + } + _ => panic!("Expected StartTransaction, got: {stmt:?}"), + } + + // COMMIT TRAN normalizes to COMMIT (same as COMMIT TRANSACTION) + ms().one_statement_parses_to("COMMIT TRAN", "COMMIT"); + + // ROLLBACK TRAN normalizes to ROLLBACK (same as ROLLBACK TRANSACTION) + ms().one_statement_parses_to("ROLLBACK TRAN", "ROLLBACK"); +} From 2ea773a1ad0f18d2577846bb8201c0769734c098 Mon Sep 17 00:00:00 2001 From: Andriy Romanov Date: Fri, 13 Feb 2026 23:00:47 -0800 Subject: [PATCH 077/121] Fixed select dollar column from stage for snowflake (#2165) --- src/dialect/mod.rs | 1 + src/dialect/snowflake.rs | 4 +++- src/parser/mod.rs | 46 ++++++++++++++++++++++++++++++++++++ tests/sqlparser_snowflake.rs | 12 ++++++++++ 4 files changed, 62 insertions(+), 1 deletion(-) diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index d0b87d962a..6e374d3d82 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -49,6 +49,7 @@ pub use self::mysql::MySqlDialect; pub use self::oracle::OracleDialect; pub use self::postgresql::PostgreSqlDialect; pub use self::redshift::RedshiftSqlDialect; +pub use self::snowflake::parse_snowflake_stage_name; pub use self::snowflake::SnowflakeDialect; pub use self::sqlite::SQLiteDialect; diff --git a/src/dialect/snowflake.rs b/src/dialect/snowflake.rs index d647091600..31a17225f3 100644 --- a/src/dialect/snowflake.rs +++ b/src/dialect/snowflake.rs @@ -1230,7 +1230,7 @@ pub fn parse_stage_name_identifier(parser: &mut Parser) -> Result { + Token::LParen | Token::RParen => { parser.prev_token(); break; } @@ -1248,6 +1248,8 @@ pub fn parse_stage_name_identifier(parser: &mut Parser) -> Result Result { match parser.next_token().token { Token::AtSign => { diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 1c20014d5b..e708217da2 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1284,6 +1284,11 @@ impl<'a> Parser<'a> { // SQLite has single-quoted identifiers id_parts.push(Ident::with_quote('\'', s)) } + Token::Placeholder(s) => { + // Snowflake uses $1, $2, etc. for positional column references + // in staged data queries like: SELECT t.$1 FROM @stage t + id_parts.push(Ident::new(s)) + } Token::Mul => { return Ok(Expr::QualifiedWildcard( ObjectName::from(id_parts), @@ -1946,6 +1951,13 @@ impl<'a> Parser<'a> { chain.push(AccessExpr::Dot(expr)); self.advance_token(); // The consumed string } + Token::Placeholder(s) => { + // Snowflake uses $1, $2, etc. for positional column references + // in staged data queries like: SELECT t.$1 FROM @stage t + let expr = Expr::Identifier(Ident::with_span(next_token.span, s)); + chain.push(AccessExpr::Dot(expr)); + self.advance_token(); // The consumed placeholder + } // Fallback to parsing an arbitrary expression, but restrict to expression // types that are valid after the dot operator. This ensures that e.g. // `T.interval` is parsed as a compound identifier, not as an interval @@ -15435,6 +15447,9 @@ impl<'a> Parser<'a> { && self.peek_keyword_with_tokens(Keyword::SEMANTIC_VIEW, &[Token::LParen]) { self.parse_semantic_view_table_factor() + } else if self.peek_token_ref().token == Token::AtSign { + // Stage reference: @mystage or @namespace.stage (e.g. Snowflake) + self.parse_snowflake_stage_table_factor() } else { let name = self.parse_object_name(true)?; @@ -15531,6 +15546,37 @@ impl<'a> Parser<'a> { } } + /// Parse a Snowflake stage reference as a table factor. + /// Handles syntax like: `@mystage1 (file_format => 'myformat', pattern => '...')` + /// + /// See: + fn parse_snowflake_stage_table_factor(&mut self) -> Result { + // Parse the stage name starting with @ + let name = crate::dialect::parse_snowflake_stage_name(self)?; + + // Parse optional stage options like (file_format => 'myformat', pattern => '...') + let args = if self.consume_token(&Token::LParen) { + Some(self.parse_table_function_args()?) + } else { + None + }; + + let alias = self.maybe_parse_table_alias()?; + + Ok(TableFactor::Table { + name, + alias, + args, + with_hints: vec![], + version: None, + partitions: vec![], + with_ordinality: false, + json_path: None, + sample: None, + index_hints: vec![], + }) + } + fn maybe_parse_table_sample(&mut self) -> Result>, ParserError> { let modifier = if self.parse_keyword(Keyword::TABLESAMPLE) { TableSampleModifier::TableSample diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index 222a9e533b..43444016f2 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -4878,3 +4878,15 @@ fn test_truncate_table_if_exists() { snowflake().verified_stmt("TRUNCATE TABLE my_table"); snowflake().verified_stmt("TRUNCATE IF EXISTS my_table"); } + +#[test] +fn test_select_dollar_column_from_stage() { + // With table function args and alias + snowflake().verified_stmt("SELECT t.$1, t.$2 FROM @mystage1(file_format => 'myformat', pattern => '.*data.*[.]csv.gz') t"); + // Without table function args, with alias + snowflake().verified_stmt("SELECT t.$1, t.$2 FROM @mystage1 t"); + // Without table function args, without alias + snowflake().verified_stmt("SELECT $1, $2 FROM @mystage1"); + // With table function args, without alias + snowflake().verified_stmt("SELECT $1, $2 FROM @mystage1(file_format => 'myformat')"); +} From 63eeaa0d7e4bdec852b34b6f125dfb90c1d7f3e9 Mon Sep 17 00:00:00 2001 From: xitep Date: Wed, 18 Feb 2026 10:24:01 +0100 Subject: [PATCH 078/121] [PIVOT] Optional AS keyword for aliases (#2209) --- src/ast/query.rs | 7 ++++-- src/parser/mod.rs | 49 +++++++++++++++++++++++++-------------- tests/sqlparser_common.rs | 12 ++++++++++ 3 files changed, 49 insertions(+), 19 deletions(-) diff --git a/src/ast/query.rs b/src/ast/query.rs index b8f605be54..6d95216df1 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -1589,6 +1589,7 @@ pub enum TableFactor { /// /// [BigQuery](https://cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#pivot_operator) /// [Snowflake](https://docs.snowflake.com/en/sql-reference/constructs/pivot) + /// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/SELECT.html#GUID-CFA006CA-6FF1-4972-821E-6996142A51C6__GUID-68257B27-1C4C-4C47-8140-5C60E0E65D35) Pivot { /// The input table to pivot. table: Box, @@ -1610,8 +1611,10 @@ pub enum TableFactor { /// table UNPIVOT [ { INCLUDE | EXCLUDE } NULLS ] (value FOR name IN (column1, [ column2, ... ])) [ alias ] /// ``` /// - /// See . - /// See . + /// [Snowflake](https://docs.snowflake.com/en/sql-reference/constructs/unpivot) + /// [Databricks](https://docs.databricks.com/aws/en/sql/language-manual/sql-ref-syntax-qry-select-unpivot) + /// [BigQuery](https://docs.cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax#unpivot_operator) + /// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/SELECT.html#GUID-CFA006CA-6FF1-4972-821E-6996142A51C6__GUID-9B4E0389-413C-4014-94A1-0A0571BDF7E1) Unpivot { /// The input table to unpivot. table: Box, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index e708217da2..eba9b32dcd 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -13617,7 +13617,7 @@ impl<'a> Parser<'a> { Keyword::PIVOT => { self.expect_token(&Token::LParen)?; let aggregate_functions = - self.parse_comma_separated(Self::parse_aliased_function_call)?; + self.parse_comma_separated(Self::parse_pivot_aggregate_function)?; self.expect_keyword_is(Keyword::FOR)?; let value_column = self.parse_period_separated(|p| p.parse_identifier())?; self.expect_keyword_is(Keyword::IN)?; @@ -16242,20 +16242,6 @@ impl<'a> Parser<'a> { }) } - fn parse_aliased_function_call(&mut self) -> Result { - let function_name = match self.next_token().token { - Token::Word(w) => Ok(w.value), - _ => self.expected("a function identifier", self.peek_token()), - }?; - let expr = self.parse_function(ObjectName::from(vec![Ident::new(function_name)]))?; - let alias = if self.parse_keyword(Keyword::AS) { - Some(self.parse_identifier()?) - } else { - None - }; - - Ok(ExprWithAlias { expr, alias }) - } /// Parses an expression with an optional alias /// /// Examples: @@ -16289,13 +16275,40 @@ impl<'a> Parser<'a> { Ok(ExprWithAlias { expr, alias }) } + /// Parse an expression followed by an optional alias; Unlike + /// [Self::parse_expr_with_alias] the "AS" keyword between the expression + /// and the alias is optional. + fn parse_expr_with_alias_optional_as_keyword(&mut self) -> Result { + let expr = self.parse_expr()?; + let alias = self.parse_identifier_optional_alias()?; + Ok(ExprWithAlias { expr, alias }) + } + + /// Parses a plain function call with an optional alias for the `PIVOT` clause + fn parse_pivot_aggregate_function(&mut self) -> Result { + let function_name = match self.next_token().token { + Token::Word(w) => Ok(w.value), + _ => self.expected("a function identifier", self.peek_token()), + }?; + let expr = self.parse_function(ObjectName::from(vec![Ident::new(function_name)]))?; + let alias = { + fn validator(explicit: bool, kw: &Keyword, parser: &mut Parser) -> bool { + // ~ for a PIVOT aggregate function the alias must not be a "FOR"; in any dialect + kw != &Keyword::FOR && parser.dialect.is_select_item_alias(explicit, kw, parser) + } + self.parse_optional_alias_inner(None, validator)? + }; + Ok(ExprWithAlias { expr, alias }) + } + /// Parse a PIVOT table factor (ClickHouse/Oracle style pivot), returning a TableFactor. pub fn parse_pivot_table_factor( &mut self, table: TableFactor, ) -> Result { self.expect_token(&Token::LParen)?; - let aggregate_functions = self.parse_comma_separated(Self::parse_aliased_function_call)?; + let aggregate_functions = + self.parse_comma_separated(Self::parse_pivot_aggregate_function)?; self.expect_keyword_is(Keyword::FOR)?; let value_column = if self.peek_token_ref().token == Token::LParen { self.parse_parenthesized_column_list_inner(Mandatory, false, |p| { @@ -16317,7 +16330,9 @@ impl<'a> Parser<'a> { } else if self.peek_sub_query() { PivotValueSource::Subquery(self.parse_query()?) } else { - PivotValueSource::List(self.parse_comma_separated(Self::parse_expr_with_alias)?) + PivotValueSource::List( + self.parse_comma_separated(Self::parse_expr_with_alias_optional_as_keyword)?, + ) }; self.expect_token(&Token::RParen)?; diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 5822153ac9..182854d13b 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -11357,6 +11357,18 @@ fn parse_pivot_table() { verified_stmt(multiple_value_columns_sql).to_string(), multiple_value_columns_sql ); + + // assert optional "AS" keyword for aliases for pivot values + one_statement_parses_to( + "SELECT * FROM t PIVOT(SUM(1) FOR a.abc IN (1 x, 'two' y, three z))", + "SELECT * FROM t PIVOT(SUM(1) FOR a.abc IN (1 AS x, 'two' AS y, three AS z))", + ); + + // assert optional "AS" keyword for aliases for pivot aggregate function + one_statement_parses_to( + "SELECT * FROM t PIVOT(SUM(1) x, COUNT(42) y FOR a.abc IN (1))", + "SELECT * FROM t PIVOT(SUM(1) AS x, COUNT(42) AS y FOR a.abc IN (1))", + ); } #[test] From 40350e321f918005e07f818fa31d816f4f8b4c8c Mon Sep 17 00:00:00 2001 From: Alexander Beedie Date: Wed, 18 Feb 2026 13:36:51 +0400 Subject: [PATCH 079/121] Fix `derive_dialect!` proc macro for use from external crates (#2219) Co-authored-by: Alexander Beedie --- derive/src/dialect.rs | 99 +++++++++++++++++++++++++++++++------------ 1 file changed, 73 insertions(+), 26 deletions(-) diff --git a/derive/src/dialect.rs b/derive/src/dialect.rs index 9873e4f7b5..9066bf9645 100644 --- a/derive/src/dialect.rs +++ b/derive/src/dialect.rs @@ -120,24 +120,20 @@ impl Parse for DeriveDialectInput { /// Entry point for the `derive_dialect!` macro pub(crate) fn derive_dialect(input: DeriveDialectInput) -> proc_macro::TokenStream { - let err = |msg: String| { - Error::new(proc_macro2::Span::call_site(), msg) - .to_compile_error() - .into() - }; + match derive_dialect_inner(input) { + Ok(tokens) => tokens.into(), + Err(e) => e.to_compile_error().into(), + } +} - let source = match read_dialect_mod_file() { - Ok(s) => s, - Err(e) => return err(format!("Failed to read dialect/mod.rs: {e}")), - }; - let file: File = match syn::parse_str(&source) { - Ok(f) => f, - Err(e) => return err(format!("Failed to parse source: {e}")), - }; - let methods = match extract_dialect_methods(&file) { - Ok(m) => m, - Err(e) => return e.to_compile_error().into(), - }; +fn derive_dialect_inner(input: DeriveDialectInput) -> syn::Result { + let call_site = proc_macro2::Span::call_site(); + + let source = read_dialect_mod_file() + .map_err(|e| Error::new(call_site, format!("Failed to read dialect/mod.rs: {e}")))?; + let file: File = syn::parse_str::(&source) + .map_err(|e| Error::new(call_site, format!("Failed to parse source: {e}")))?; + let methods = extract_dialect_methods(&file)?; // Validate overrides let bool_names: HashSet<_> = methods @@ -147,20 +143,23 @@ pub(crate) fn derive_dialect(input: DeriveDialectInput) -> proc_macro::TokenStre .collect(); for (key, value) in &input.overrides { let key_str = key.to_string(); - let err = |msg| Error::new(key.span(), msg).to_compile_error().into(); match value { Override::Bool(_) if !bool_names.contains(&key_str) => { - return err(format!("Unknown boolean method `{key_str}`")); + return Err(Error::new( + key.span(), + format!("Unknown boolean method `{key_str}`"), + )); } Override::Char(_) | Override::None if key_str != "identifier_quote_style" => { - return err(format!( - "Char/None only valid for `identifier_quote_style`, not `{key_str}`" + return Err(Error::new( + key.span(), + format!("Char/None only valid for `identifier_quote_style`, not `{key_str}`"), )); } _ => {} } } - generate_derived_dialect(&input, &methods).into() + Ok(generate_derived_dialect(&input, &methods)) } /// Generate the complete derived `Dialect` implementation @@ -258,11 +257,59 @@ fn extract_param_names(sig: &Signature) -> Vec<&Ident> { } /// Read the `dialect/mod.rs` file that contains the Dialect trait. +/// +/// Searches for the file in the following order: +/// 1. `$CARGO_MANIFEST_DIR/src/dialect/mod.rs` - works when the macro is +/// invoked from within the `sqlparser` crate itself (e.g. in tests). +/// 2. `/../src/dialect/mod.rs` - works when +/// `sqlparser_derive` lives in a workspace alongside the main crate +/// (the standard `derive/` layout). +/// 3. Sibling directories of the compiled `sqlparser_derive` crate in the +/// Cargo registry - works when an external crate uses `derive_dialect!` +/// via a registry dependency. fn read_dialect_mod_file() -> Result { - let manifest_dir = - std::env::var("CARGO_MANIFEST_DIR").map_err(|_| "CARGO_MANIFEST_DIR not set")?; - let path = std::path::Path::new(&manifest_dir).join("src/dialect/mod.rs"); - std::fs::read_to_string(&path).map_err(|e| format!("Failed to read {}: {e}", path.display())) + use std::path::{Path, PathBuf}; + + const DERIVE_CRATE_DIR: &str = env!("CARGO_MANIFEST_DIR"); + let derive_dir = Path::new(DERIVE_CRATE_DIR); + let mut candidates: Vec = Vec::new(); + + // The crate being compiled (eg: within sqlparser). + if let Ok(manifest_dir) = std::env::var("CARGO_MANIFEST_DIR") { + candidates.push(Path::new(&manifest_dir).join("src/dialect/mod.rs")); + } + // Workspace layout: the main crate is the parent of `derive/`. + candidates.push(derive_dir.join("../src/dialect/mod.rs")); + + // Cargo registry: look for sibling `sqlparser-*` directories (prefer newest). + if let Some(parent) = derive_dir.parent() { + if let Ok(entries) = std::fs::read_dir(parent) { + let mut siblings: Vec<_> = entries + .filter_map(|e| e.ok()) + .filter(|e| { + let name = e.file_name(); + let name = name.to_string_lossy(); + name.starts_with("sqlparser-") && !name.starts_with("sqlparser-derive") + }) + .collect(); + siblings.sort_by(|a, b| b.file_name().cmp(&a.file_name())); + candidates.extend( + siblings + .into_iter() + .map(|e| e.path().join("src/dialect/mod.rs")), + ); + } + } + for path in &candidates { + if let Ok(content) = std::fs::read_to_string(path) { + return Ok(content); + } + } + Err(format!( + "Could not find `sqlparser` dialect/mod.rs file. \ + Searched in $CARGO_MANIFEST_DIR/src/dialect/mod.rs and \ + the `sqlparser_derive` crate at {DERIVE_CRATE_DIR}" + )) } /// Extract all methods from the `Dialect` trait (excluding `dialect` for TypeId) From 0f37c278fb43e42e42941c763b9329b199e494a9 Mon Sep 17 00:00:00 2001 From: Alexander Beedie Date: Thu, 19 Feb 2026 15:27:18 +0400 Subject: [PATCH 080/121] Resolve breaking change that set "supports_lambda_functions" on `GenericDialect` (#2224) --- src/dialect/generic.rs | 4 ---- tests/sqlparser_common.rs | 1 + tests/sqlparser_duckdb.rs | 8 ++++---- 3 files changed, 5 insertions(+), 8 deletions(-) diff --git a/src/dialect/generic.rs b/src/dialect/generic.rs index 5e929d738f..1cf195e637 100644 --- a/src/dialect/generic.rs +++ b/src/dialect/generic.rs @@ -225,10 +225,6 @@ impl Dialect for GenericDialect { true } - fn supports_lambda_functions(&self) -> bool { - true - } - fn supports_select_wildcard_replace(&self) -> bool { true } diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 182854d13b..4f01ea0cee 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -1663,6 +1663,7 @@ fn parse_json_ops_without_colon() { Arrow, all_dialects_except(|d| d.supports_lambda_functions()), ), + ("->", Arrow, pg_and_generic()), ("->>", LongArrow, all_dialects()), ("#>", HashArrow, pg_and_generic()), ("#>>", HashLongArrow, pg_and_generic()), diff --git a/tests/sqlparser_duckdb.rs b/tests/sqlparser_duckdb.rs index b9ae26491f..91eb2799c6 100644 --- a/tests/sqlparser_duckdb.rs +++ b/tests/sqlparser_duckdb.rs @@ -882,17 +882,17 @@ fn parse_extract_single_quotes() { fn test_duckdb_lambda_function() { // Test basic lambda with list_filter let sql = "SELECT [3, 4, 5, 6].list_filter(lambda x : x > 4)"; - duckdb_and_generic().verified_stmt(sql); + duckdb().verified_stmt(sql); // Test lambda with arrow syntax (also supported by DuckDB) let sql_arrow = "SELECT list_filter([1, 2, 3], x -> x > 1)"; - duckdb_and_generic().verified_stmt(sql_arrow); + duckdb().verified_stmt(sql_arrow); // Test lambda with multiple parameters (with index) let sql_multi = "SELECT list_filter([1, 3, 1, 5], lambda x, i : x > i)"; - duckdb_and_generic().verified_stmt(sql_multi); + duckdb().verified_stmt(sql_multi); // Test lambda in list_transform let sql_transform = "SELECT list_transform([1, 2, 3], lambda x : x * 2)"; - duckdb_and_generic().verified_stmt(sql_transform); + duckdb().verified_stmt(sql_transform); } From fb10a76f1772fc7f5805931dc308792f3c3ee716 Mon Sep 17 00:00:00 2001 From: "Guan-Ming (Wesley) Chiu" <105915352+guan404ming@users.noreply.github.com> Date: Thu, 19 Feb 2026 19:28:19 +0800 Subject: [PATCH 081/121] MSSQL: Add support for WAITFOR statement (#2210) Signed-off-by: Guan-Ming (Wesley) Chiu <105915352+guan404ming@users.noreply.github.com> Signed-off-by: Guan-Ming Chiu Co-authored-by: Ifeanyi Ubah <7816405+iffyio@users.noreply.github.com> --- src/ast/mod.rs | 46 ++++++++++++++++++++++++++++++++++++++++ src/ast/spans.rs | 1 + src/keywords.rs | 2 ++ src/parser/mod.rs | 17 +++++++++++++++ tests/sqlparser_mssql.rs | 37 ++++++++++++++++++++++++++++++++ 5 files changed, 103 insertions(+) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index eda2822600..dbf5003ccd 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -4764,6 +4764,10 @@ pub enum Statement { /// /// See: Print(PrintStatement), + /// MSSQL `WAITFOR` statement. + /// + /// See: + WaitFor(WaitForStatement), /// ```sql /// RETURN [ expression ] /// ``` @@ -6217,6 +6221,7 @@ impl fmt::Display for Statement { } Statement::Throw(s) => write!(f, "{s}"), Statement::Print(s) => write!(f, "{s}"), + Statement::WaitFor(s) => write!(f, "{s}"), Statement::Return(r) => write!(f, "{r}"), Statement::List(command) => write!(f, "LIST {command}"), Statement::Remove(command) => write!(f, "REMOVE {command}"), @@ -10964,6 +10969,47 @@ impl fmt::Display for PrintStatement { } } +/// The type of `WAITFOR` statement (MSSQL). +/// +/// See: +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum WaitForType { + /// `WAITFOR DELAY 'time_to_pass'` + Delay, + /// `WAITFOR TIME 'time_to_execute'` + Time, +} + +impl fmt::Display for WaitForType { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + WaitForType::Delay => write!(f, "DELAY"), + WaitForType::Time => write!(f, "TIME"), + } + } +} + +/// MSSQL `WAITFOR` statement. +/// +/// See: +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct WaitForStatement { + /// `DELAY` or `TIME`. + pub wait_type: WaitForType, + /// The time expression. + pub expr: Expr, +} + +impl fmt::Display for WaitForStatement { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "WAITFOR {} {}", self.wait_type, self.expr) + } +} + /// Represents a `Return` statement. /// /// [MsSql triggers](https://learn.microsoft.com/en-us/sql/t-sql/statements/create-trigger-transact-sql) diff --git a/src/ast/spans.rs b/src/ast/spans.rs index f4bdf85a39..d792c13caa 100644 --- a/src/ast/spans.rs +++ b/src/ast/spans.rs @@ -483,6 +483,7 @@ impl Spanned for Statement { Statement::RaisError { .. } => Span::empty(), Statement::Throw(_) => Span::empty(), Statement::Print { .. } => Span::empty(), + Statement::WaitFor(_) => Span::empty(), Statement::Return { .. } => Span::empty(), Statement::List(..) | Statement::Remove(..) => Span::empty(), Statement::ExportData(ExportData { diff --git a/src/keywords.rs b/src/keywords.rs index f1dbcd9378..cc2b9e9dd0 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -312,6 +312,7 @@ define_keywords!( DEFINE, DEFINED, DEFINER, + DELAY, DELAYED, DELAY_KEY_WRITE, DELEGATED, @@ -1138,6 +1139,7 @@ define_keywords!( VIRTUAL, VOLATILE, VOLUME, + WAITFOR, WAREHOUSE, WAREHOUSES, WEEK, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index eba9b32dcd..4f32422f8a 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -703,6 +703,8 @@ impl<'a> Parser<'a> { // `COMMENT` is snowflake specific https://docs.snowflake.com/en/sql-reference/sql/comment Keyword::COMMENT if self.dialect.supports_comment_on() => self.parse_comment(), Keyword::PRINT => self.parse_print(), + // `WAITFOR` is MSSQL specific https://learn.microsoft.com/en-us/sql/t-sql/language-elements/waitfor-transact-sql + Keyword::WAITFOR => self.parse_waitfor(), Keyword::RETURN => self.parse_return(), Keyword::EXPORT => { self.prev_token(); @@ -19289,6 +19291,21 @@ impl<'a> Parser<'a> { })) } + /// Parse [Statement::WaitFor] + /// + /// See: + fn parse_waitfor(&mut self) -> Result { + let wait_type = if self.parse_keyword(Keyword::DELAY) { + WaitForType::Delay + } else if self.parse_keyword(Keyword::TIME) { + WaitForType::Time + } else { + return self.expected("DELAY or TIME", self.peek_token()); + }; + let expr = self.parse_expr()?; + Ok(Statement::WaitFor(WaitForStatement { wait_type, expr })) + } + /// Parse [Statement::Return] fn parse_return(&mut self) -> Result { match self.maybe_parse(|p| p.parse_expr())? { diff --git a/tests/sqlparser_mssql.rs b/tests/sqlparser_mssql.rs index cf9ae89809..d7d11ba669 100644 --- a/tests/sqlparser_mssql.rs +++ b/tests/sqlparser_mssql.rs @@ -1702,6 +1702,43 @@ fn test_parse_throw() { ); } +#[test] +fn test_parse_waitfor() { + // WAITFOR DELAY + let sql = "WAITFOR DELAY '00:00:05'"; + let stmt = ms_and_generic().verified_stmt(sql); + assert_eq!( + stmt, + Statement::WaitFor(WaitForStatement { + wait_type: WaitForType::Delay, + expr: Expr::Value( + (Value::SingleQuotedString("00:00:05".to_string())).with_empty_span() + ), + }) + ); + + // WAITFOR TIME + let sql = "WAITFOR TIME '14:30:00'"; + let stmt = ms_and_generic().verified_stmt(sql); + assert_eq!( + stmt, + Statement::WaitFor(WaitForStatement { + wait_type: WaitForType::Time, + expr: Expr::Value( + (Value::SingleQuotedString("14:30:00".to_string())).with_empty_span() + ), + }) + ); + + // WAITFOR DELAY with variable + let sql = "WAITFOR DELAY @WaitTime"; + let _ = ms_and_generic().verified_stmt(sql); + + // Error: WAITFOR without DELAY or TIME + let res = ms_and_generic().parse_sql_statements("WAITFOR '00:00:05'"); + assert!(res.is_err()); +} + #[test] fn parse_use() { let valid_object_names = [ From 15dc6a22a99131072f05b94f59f1532852209cc6 Mon Sep 17 00:00:00 2001 From: "Guan-Ming (Wesley) Chiu" <105915352+guan404ming@users.noreply.github.com> Date: Thu, 19 Feb 2026 19:29:28 +0800 Subject: [PATCH 082/121] PostgreSQL: Support PRIMARY KEY/UNIQUE USING INDEX (#2213) Signed-off-by: Guan-Ming Chiu --- src/ast/mod.rs | 4 +-- src/ast/spans.rs | 2 ++ src/ast/table_constraints.rs | 68 ++++++++++++++++++++++++++++++++++++ src/parser/mod.rs | 31 ++++++++++++++++ tests/sqlparser_postgres.rs | 39 +++++++++++++++++++++ 5 files changed, 142 insertions(+), 2 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index dbf5003ccd..d534b300b4 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -136,8 +136,8 @@ mod dml; pub mod helpers; pub mod table_constraints; pub use table_constraints::{ - CheckConstraint, ForeignKeyConstraint, FullTextOrSpatialConstraint, IndexConstraint, - PrimaryKeyConstraint, TableConstraint, UniqueConstraint, + CheckConstraint, ConstraintUsingIndex, ForeignKeyConstraint, FullTextOrSpatialConstraint, + IndexConstraint, PrimaryKeyConstraint, TableConstraint, UniqueConstraint, }; mod operator; mod query; diff --git a/src/ast/spans.rs b/src/ast/spans.rs index d792c13caa..74f19e831e 100644 --- a/src/ast/spans.rs +++ b/src/ast/spans.rs @@ -626,6 +626,8 @@ impl Spanned for TableConstraint { TableConstraint::Check(constraint) => constraint.span(), TableConstraint::Index(constraint) => constraint.span(), TableConstraint::FulltextOrSpatial(constraint) => constraint.span(), + TableConstraint::PrimaryKeyUsingIndex(constraint) + | TableConstraint::UniqueUsingIndex(constraint) => constraint.span(), } } } diff --git a/src/ast/table_constraints.rs b/src/ast/table_constraints.rs index cb3c2376d6..9ba196a81e 100644 --- a/src/ast/table_constraints.rs +++ b/src/ast/table_constraints.rs @@ -101,6 +101,22 @@ pub enum TableConstraint { /// [1]: https://dev.mysql.com/doc/refman/8.0/en/fulltext-natural-language.html /// [2]: https://dev.mysql.com/doc/refman/8.0/en/spatial-types.html FulltextOrSpatial(FullTextOrSpatialConstraint), + /// PostgreSQL [definition][1] for promoting an existing unique index to a + /// `PRIMARY KEY` constraint: + /// + /// `[ CONSTRAINT constraint_name ] PRIMARY KEY USING INDEX index_name + /// [ DEFERRABLE | NOT DEFERRABLE ] [ INITIALLY DEFERRED | INITIALLY IMMEDIATE ]` + /// + /// [1]: https://www.postgresql.org/docs/current/sql-altertable.html + PrimaryKeyUsingIndex(ConstraintUsingIndex), + /// PostgreSQL [definition][1] for promoting an existing unique index to a + /// `UNIQUE` constraint: + /// + /// `[ CONSTRAINT constraint_name ] UNIQUE USING INDEX index_name + /// [ DEFERRABLE | NOT DEFERRABLE ] [ INITIALLY DEFERRED | INITIALLY IMMEDIATE ]` + /// + /// [1]: https://www.postgresql.org/docs/current/sql-altertable.html + UniqueUsingIndex(ConstraintUsingIndex), } impl From for TableConstraint { @@ -148,6 +164,8 @@ impl fmt::Display for TableConstraint { TableConstraint::Check(constraint) => constraint.fmt(f), TableConstraint::Index(constraint) => constraint.fmt(f), TableConstraint::FulltextOrSpatial(constraint) => constraint.fmt(f), + TableConstraint::PrimaryKeyUsingIndex(c) => c.fmt_with_keyword(f, "PRIMARY KEY"), + TableConstraint::UniqueUsingIndex(c) => c.fmt_with_keyword(f, "UNIQUE"), } } } @@ -535,3 +553,53 @@ impl crate::ast::Spanned for UniqueConstraint { ) } } + +/// PostgreSQL constraint that promotes an existing unique index to a table constraint. +/// +/// `[ CONSTRAINT constraint_name ] { UNIQUE | PRIMARY KEY } USING INDEX index_name +/// [ DEFERRABLE | NOT DEFERRABLE ] [ INITIALLY DEFERRED | INITIALLY IMMEDIATE ]` +/// +/// See +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct ConstraintUsingIndex { + /// Optional constraint name. + pub name: Option, + /// The name of the existing unique index to promote. + pub index_name: Ident, + /// Optional characteristics like `DEFERRABLE`. + pub characteristics: Option, +} + +impl ConstraintUsingIndex { + /// Format as `[CONSTRAINT name] USING INDEX index_name [characteristics]`. + pub fn fmt_with_keyword(&self, f: &mut fmt::Formatter, keyword: &str) -> fmt::Result { + use crate::ast::ddl::{display_constraint_name, display_option_spaced}; + write!( + f, + "{}{} USING INDEX {}", + display_constraint_name(&self.name), + keyword, + self.index_name, + )?; + write!(f, "{}", display_option_spaced(&self.characteristics))?; + Ok(()) + } +} + +impl crate::ast::Spanned for ConstraintUsingIndex { + fn span(&self) -> Span { + let start = self + .name + .as_ref() + .map(|i| i.span) + .unwrap_or(self.index_name.span); + let end = self + .characteristics + .as_ref() + .map(|c| c.span()) + .unwrap_or(self.index_name.span); + start.union(&end) + } +} diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 4f32422f8a..6c9314d951 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -9333,6 +9333,21 @@ impl<'a> Parser<'a> { } } + /// Parse `index_name [ DEFERRABLE | NOT DEFERRABLE ] [ INITIALLY DEFERRED | INITIALLY IMMEDIATE ]` + /// after `{ PRIMARY KEY | UNIQUE } USING INDEX`. + fn parse_constraint_using_index( + &mut self, + name: Option, + ) -> Result { + let index_name = self.parse_identifier()?; + let characteristics = self.parse_constraint_characteristics()?; + Ok(ConstraintUsingIndex { + name, + index_name, + characteristics, + }) + } + /// Parse optional constraint characteristics such as `DEFERRABLE`, `INITIALLY` and `ENFORCED`. pub fn parse_constraint_characteristics( &mut self, @@ -9397,6 +9412,14 @@ impl<'a> Parser<'a> { let next_token = self.next_token(); match next_token.token { Token::Word(w) if w.keyword == Keyword::UNIQUE => { + // PostgreSQL: UNIQUE USING INDEX index_name + // https://www.postgresql.org/docs/current/sql-altertable.html + if self.parse_keywords(&[Keyword::USING, Keyword::INDEX]) { + return Ok(Some(TableConstraint::UniqueUsingIndex( + self.parse_constraint_using_index(name)?, + ))); + } + let index_type_display = self.parse_index_type_display(); if !dialect_of!(self is GenericDialect | MySqlDialect) && !index_type_display.is_none() @@ -9432,6 +9455,14 @@ impl<'a> Parser<'a> { // after `PRIMARY` always stay `KEY` self.expect_keyword_is(Keyword::KEY)?; + // PostgreSQL: PRIMARY KEY USING INDEX index_name + // https://www.postgresql.org/docs/current/sql-altertable.html + if self.parse_keywords(&[Keyword::USING, Keyword::INDEX]) { + return Ok(Some(TableConstraint::PrimaryKeyUsingIndex( + self.parse_constraint_using_index(name)?, + ))); + } + // optional index name let index_name = self.parse_optional_ident()?; let index_type = self.parse_optional_using_then_index_type()?; diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index f8c7381366..d79e2b833e 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -627,6 +627,45 @@ fn parse_alter_table_constraints_unique_nulls_distinct() { pg_and_generic().verified_stmt("ALTER TABLE t ADD CONSTRAINT b UNIQUE (c)"); } +#[test] +fn parse_alter_table_constraint_using_index() { + // PRIMARY KEY USING INDEX + // https://www.postgresql.org/docs/current/sql-altertable.html + let sql = "ALTER TABLE tab ADD CONSTRAINT c PRIMARY KEY USING INDEX my_index"; + match pg_and_generic().verified_stmt(sql) { + Statement::AlterTable(alter_table) => match &alter_table.operations[0] { + AlterTableOperation::AddConstraint { + constraint: TableConstraint::PrimaryKeyUsingIndex(c), + .. + } => { + assert_eq!(c.name.as_ref().unwrap().to_string(), "c"); + assert_eq!(c.index_name.to_string(), "my_index"); + assert!(c.characteristics.is_none()); + } + _ => unreachable!(), + }, + _ => unreachable!(), + } + + // UNIQUE USING INDEX + pg_and_generic().verified_stmt("ALTER TABLE tab ADD CONSTRAINT c UNIQUE USING INDEX my_index"); + + // Without constraint name + pg_and_generic().verified_stmt("ALTER TABLE tab ADD PRIMARY KEY USING INDEX my_index"); + pg_and_generic().verified_stmt("ALTER TABLE tab ADD UNIQUE USING INDEX my_index"); + + // With DEFERRABLE + pg_and_generic().verified_stmt( + "ALTER TABLE tab ADD CONSTRAINT c PRIMARY KEY USING INDEX my_index DEFERRABLE", + ); + pg_and_generic().verified_stmt( + "ALTER TABLE tab ADD CONSTRAINT c UNIQUE USING INDEX my_index NOT DEFERRABLE INITIALLY IMMEDIATE", + ); + pg_and_generic().verified_stmt( + "ALTER TABLE tab ADD CONSTRAINT c PRIMARY KEY USING INDEX my_index DEFERRABLE INITIALLY DEFERRED", + ); +} + #[test] fn parse_alter_table_disable() { pg_and_generic().verified_stmt("ALTER TABLE tab DISABLE ROW LEVEL SECURITY"); From bfd944c6ceb34e0a201e64e0d6ba6a8483f8d5f2 Mon Sep 17 00:00:00 2001 From: Alexander Beedie Date: Fri, 20 Feb 2026 18:26:20 +0400 Subject: [PATCH 083/121] Prefer use of `peek_token_ref` over `peek_token` where valid (#2225) Co-authored-by: Alexander Beedie --- src/dialect/mod.rs | 54 ++-- src/dialect/mssql.rs | 6 +- src/dialect/mysql.rs | 2 +- src/dialect/oracle.rs | 4 +- src/dialect/postgresql.rs | 4 +- src/dialect/snowflake.rs | 40 +-- src/parser/alter.rs | 20 +- src/parser/mod.rs | 513 ++++++++++++++++++++------------------ 8 files changed, 337 insertions(+), 306 deletions(-) diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index 6e374d3d82..b1be1590de 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -756,17 +756,17 @@ pub trait Dialect: Debug + Any { }; } - let token = parser.peek_token(); + let token = parser.peek_token_ref(); debug!("get_next_precedence_full() {token:?}"); - match token.token { + match &token.token { Token::Word(w) if w.keyword == Keyword::OR => Ok(p!(Or)), Token::Word(w) if w.keyword == Keyword::AND => Ok(p!(And)), Token::Word(w) if w.keyword == Keyword::XOR => Ok(p!(Xor)), Token::Word(w) if w.keyword == Keyword::AT => { match ( - parser.peek_nth_token(1).token, - parser.peek_nth_token(2).token, + &parser.peek_nth_token_ref(1).token, + &parser.peek_nth_token_ref(2).token, ) { (Token::Word(w), Token::Word(w2)) if w.keyword == Keyword::TIME && w2.keyword == Keyword::ZONE => @@ -777,28 +777,30 @@ pub trait Dialect: Debug + Any { } } - Token::Word(w) if w.keyword == Keyword::NOT => match parser.peek_nth_token(1).token { - // The precedence of NOT varies depending on keyword that - // follows it. If it is followed by IN, BETWEEN, or LIKE, - // it takes on the precedence of those tokens. Otherwise, it - // is not an infix operator, and therefore has zero - // precedence. - Token::Word(w) if w.keyword == Keyword::IN => Ok(p!(Between)), - Token::Word(w) if w.keyword == Keyword::BETWEEN => Ok(p!(Between)), - Token::Word(w) if w.keyword == Keyword::LIKE => Ok(p!(Like)), - Token::Word(w) if w.keyword == Keyword::ILIKE => Ok(p!(Like)), - Token::Word(w) if w.keyword == Keyword::RLIKE => Ok(p!(Like)), - Token::Word(w) if w.keyword == Keyword::REGEXP => Ok(p!(Like)), - Token::Word(w) if w.keyword == Keyword::MATCH => Ok(p!(Like)), - Token::Word(w) if w.keyword == Keyword::SIMILAR => Ok(p!(Like)), - Token::Word(w) if w.keyword == Keyword::MEMBER => Ok(p!(Like)), - Token::Word(w) - if w.keyword == Keyword::NULL && !parser.in_column_definition_state() => - { - Ok(p!(Is)) + Token::Word(w) if w.keyword == Keyword::NOT => { + match &parser.peek_nth_token_ref(1).token { + // The precedence of NOT varies depending on keyword that + // follows it. If it is followed by IN, BETWEEN, or LIKE, + // it takes on the precedence of those tokens. Otherwise, it + // is not an infix operator, and therefore has zero + // precedence. + Token::Word(w) if w.keyword == Keyword::IN => Ok(p!(Between)), + Token::Word(w) if w.keyword == Keyword::BETWEEN => Ok(p!(Between)), + Token::Word(w) if w.keyword == Keyword::LIKE => Ok(p!(Like)), + Token::Word(w) if w.keyword == Keyword::ILIKE => Ok(p!(Like)), + Token::Word(w) if w.keyword == Keyword::RLIKE => Ok(p!(Like)), + Token::Word(w) if w.keyword == Keyword::REGEXP => Ok(p!(Like)), + Token::Word(w) if w.keyword == Keyword::MATCH => Ok(p!(Like)), + Token::Word(w) if w.keyword == Keyword::SIMILAR => Ok(p!(Like)), + Token::Word(w) if w.keyword == Keyword::MEMBER => Ok(p!(Like)), + Token::Word(w) + if w.keyword == Keyword::NULL && !parser.in_column_definition_state() => + { + Ok(p!(Is)) + } + _ => Ok(self.prec_unknown()), } - _ => Ok(self.prec_unknown()), - }, + } Token::Word(w) if w.keyword == Keyword::NOTNULL && self.supports_notnull_operator() => { Ok(p!(Is)) } @@ -861,7 +863,7 @@ pub trait Dialect: Debug + Any { Token::DoubleColon | Token::ExclamationMark | Token::LBracket | Token::CaretAt => { Ok(p!(DoubleColon)) } - Token::Colon => match parser.peek_nth_token(1).token { + Token::Colon => match &parser.peek_nth_token_ref(1).token { // When colon is followed by a string or a number, it's usually in MAP syntax. Token::SingleQuotedString(_) | Token::Number(_, _) => Ok(self.prec_unknown()), // In other cases, it's used in semi-structured data traversal like in variant or JSON diff --git a/src/dialect/mssql.rs b/src/dialect/mssql.rs index a2127f0da5..42e05858ff 100644 --- a/src/dialect/mssql.rs +++ b/src/dialect/mssql.rs @@ -159,7 +159,7 @@ impl Dialect for MsSqlDialect { .is_some() || matches!(p.peek_token_ref().token, Token::SemiColon | Token::EOF) { - p.expected("statement", p.peek_token()) + p.expected_ref("statement", p.peek_token_ref()) } else { Ok(()) } @@ -189,8 +189,8 @@ impl Dialect for MsSqlDialect { } fn get_next_precedence(&self, parser: &Parser) -> Option> { - let token = parser.peek_token(); - match token.token { + let token = parser.peek_token_ref(); + match &token.token { // lowest prec to prevent it from turning into a binary op Token::Colon => Some(Ok(self.prec_unknown())), _ => None, diff --git a/src/dialect/mysql.rs b/src/dialect/mysql.rs index b31a2310db..bdced4826b 100644 --- a/src/dialect/mysql.rs +++ b/src/dialect/mysql.rs @@ -244,7 +244,7 @@ fn parse_lock_tables_type(parser: &mut Parser) -> Result Option> { - let t = parser.peek_token(); + let t = parser.peek_token_ref(); debug!("get_next_precedence() {t:?}"); - match t.token { + match &t.token { Token::StringConcat => Some(Ok(self.prec_value(Precedence::PlusMinus))), _ => None, } diff --git a/src/dialect/postgresql.rs b/src/dialect/postgresql.rs index 8e4d78a448..b0511f6d2e 100644 --- a/src/dialect/postgresql.rs +++ b/src/dialect/postgresql.rs @@ -105,12 +105,12 @@ impl Dialect for PostgreSqlDialect { } fn get_next_precedence(&self, parser: &Parser) -> Option> { - let token = parser.peek_token(); + let token = parser.peek_token_ref(); debug!("get_next_precedence() {token:?}"); // we only return some custom value here when the behaviour (not merely the numeric value) differs // from the default implementation - match token.token { + match &token.token { Token::Word(w) if w.keyword == Keyword::COLLATE && !parser.in_column_definition_state() => { diff --git a/src/dialect/snowflake.rs b/src/dialect/snowflake.rs index 31a17225f3..8af1367f0e 100644 --- a/src/dialect/snowflake.rs +++ b/src/dialect/snowflake.rs @@ -265,7 +265,7 @@ impl Dialect for SnowflakeDialect { let set = match parser.parse_one_of_keywords(&[Keyword::SET, Keyword::UNSET]) { Some(Keyword::SET) => true, Some(Keyword::UNSET) => false, - _ => return Some(parser.expected("SET or UNSET", parser.peek_token())), + _ => return Some(parser.expected_ref("SET or UNSET", parser.peek_token_ref())), }; return Some(parse_alter_session(parser, set)); } @@ -417,9 +417,9 @@ impl Dialect for SnowflakeDialect { } fn get_next_precedence(&self, parser: &Parser) -> Option> { - let token = parser.peek_token(); + let token = parser.peek_token_ref(); // Snowflake supports the `:` cast operator unlike other dialects - match token.token { + match &token.token { Token::Colon => Some(Ok(self.prec_value(Precedence::DoubleColon))), _ => None, } @@ -715,9 +715,9 @@ fn parse_alter_dynamic_table(parser: &mut Parser) -> Result Result Result ident.push('-'), Token::Number(n, _) => ident.push_str(n), Token::Word(w) => ident.push_str(&w.to_string()), - _ => return parser.expected("stage name identifier", parser.peek_token()), + _ => return parser.expected_ref("stage name identifier", parser.peek_token_ref()), } } Ok(Ident::new(ident)) @@ -1273,7 +1276,7 @@ pub fn parse_snowflake_stage_name(parser: &mut Parser) -> Result` /// and `COPY INTO ` which have different syntax. pub fn parse_copy_into(parser: &mut Parser) -> Result { - let kind = match parser.peek_token().token { + let kind = match &parser.peek_token_ref().token { // Indicates an internal stage Token::AtSign => CopyIntoSnowflakeKind::Location, // Indicates an external stage, i.e. s3://, gcs:// or azure:// @@ -1346,7 +1349,7 @@ pub fn parse_copy_into(parser: &mut Parser) -> Result { from_stage_alias = if parser.parse_keyword(Keyword::AS) { Some(match parser.next_token().token { Token::Word(w) => Ok(Ident::new(w.value)), - _ => parser.expected("stage alias", parser.peek_token()), + _ => parser.expected_ref("stage alias", parser.peek_token_ref()), }?) } else { None @@ -1404,7 +1407,10 @@ pub fn parse_copy_into(parser: &mut Parser) -> Result { // In `COPY INTO ` the copy options do not have a shared key // like in `COPY INTO
` Token::Word(key) => copy_options.push(parser.parse_key_value_option(&key)?), - _ => return parser.expected("another copy option, ; or EOF'", parser.peek_token()), + _ => { + return parser + .expected_ref("another copy option, ; or EOF'", parser.peek_token_ref()) + } } } } @@ -1499,7 +1505,7 @@ fn parse_select_item_for_data_load( // parse element element = Some(Ident::new(match parser.next_token().token { Token::Word(w) => Ok(w.value), - _ => parser.expected("file_col_num", parser.peek_token()), + _ => parser.expected_ref("file_col_num", parser.peek_token_ref()), }?)); } _ => { @@ -1512,7 +1518,7 @@ fn parse_select_item_for_data_load( if parser.parse_keyword(Keyword::AS) { item_as = Some(match parser.next_token().token { Token::Word(w) => Ok(Ident::new(w.value)), - _ => parser.expected("column item alias", parser.peek_token()), + _ => parser.expected_ref("column item alias", parser.peek_token_ref()), }?); } @@ -1540,7 +1546,7 @@ fn parse_stage_params(parser: &mut Parser) -> Result Ok(word), - _ => parser.expected("a URL statement", parser.peek_token()), + _ => parser.expected_ref("a URL statement", parser.peek_token_ref()), }?) } @@ -1555,7 +1561,7 @@ fn parse_stage_params(parser: &mut Parser) -> Result Ok(word), - _ => parser.expected("an endpoint statement", parser.peek_token()), + _ => parser.expected_ref("an endpoint statement", parser.peek_token_ref()), }?) } @@ -1795,7 +1801,7 @@ fn parse_multi_table_insert_into_clauses( into_clauses.push(parse_multi_table_insert_into_clause(parser)?); } if into_clauses.is_empty() { - return parser.expected("INTO clause in multi-table INSERT", parser.peek_token()); + return parser.expected_ref("INTO clause in multi-table INSERT", parser.peek_token_ref()); } Ok(into_clauses) } @@ -1874,9 +1880,9 @@ fn parse_multi_table_insert_when_clauses( } if when_clauses.is_empty() { - return parser.expected( + return parser.expected_ref( "at least one WHEN clause in conditional multi-table INSERT", - parser.peek_token(), + parser.peek_token_ref(), ); } diff --git a/src/parser/alter.rs b/src/parser/alter.rs index c64c4a409f..ce1220e166 100644 --- a/src/parser/alter.rs +++ b/src/parser/alter.rs @@ -182,9 +182,9 @@ impl Parser<'_> { } else if self.parse_keyword(Keyword::AUTHORIZATIONS) { None } else { - return self.expected( + return self.expected_ref( "REMOVE DELEGATED AUTHORIZATION OF ROLE | REMOVE DELEGATED AUTHORIZATIONS", - self.peek_token(), + self.peek_token_ref(), ); }; self.expect_keywords(&[Keyword::FROM, Keyword::SECURITY, Keyword::INTEGRATION])?; @@ -340,7 +340,7 @@ impl Parser<'_> { } else if self.parse_keyword(Keyword::DUO) { Ok(MfaMethodKind::Duo) } else { - self.expected("PASSKEY, TOTP or DUO", self.peek_token()) + self.expected_ref("PASSKEY, TOTP or DUO", self.peek_token_ref()) } } @@ -358,10 +358,10 @@ impl Parser<'_> { let role_name = self.parse_identifier()?; AlterRoleOperation::RenameRole { role_name } } else { - return self.expected("= after WITH NAME ", self.peek_token()); + return self.expected_ref("= after WITH NAME ", self.peek_token_ref()); } } else { - return self.expected("'ADD' or 'DROP' or 'WITH NAME'", self.peek_token()); + return self.expected_ref("'ADD' or 'DROP' or 'WITH NAME'", self.peek_token_ref()); }; Ok(Statement::AlterRole { @@ -385,7 +385,7 @@ impl Parser<'_> { let role_name = self.parse_identifier()?; AlterRoleOperation::RenameRole { role_name } } else { - return self.expected("TO after RENAME", self.peek_token()); + return self.expected_ref("TO after RENAME", self.peek_token_ref()); } // SET } else if self.parse_keyword(Keyword::SET) { @@ -412,10 +412,10 @@ impl Parser<'_> { in_database, } } else { - self.expected("config value", self.peek_token())? + self.expected_ref("config value", self.peek_token_ref())? } } else { - self.expected("'TO' or '=' or 'FROM CURRENT'", self.peek_token())? + self.expected_ref("'TO' or '=' or 'FROM CURRENT'", self.peek_token_ref())? } // RESET } else if self.parse_keyword(Keyword::RESET) { @@ -442,7 +442,7 @@ impl Parser<'_> { } // check option if options.is_empty() { - return self.expected("option", self.peek_token())?; + return self.expected_ref("option", self.peek_token_ref())?; } AlterRoleOperation::WithOptions { options } @@ -504,7 +504,7 @@ impl Parser<'_> { self.expect_keyword_is(Keyword::UNTIL)?; RoleOption::ValidUntil(Expr::Value(self.parse_value()?)) } - _ => self.expected("option", self.peek_token())?, + _ => self.expected_ref("option", self.peek_token_ref())?, }; Ok(option) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 6c9314d951..de12078a50 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -504,7 +504,7 @@ impl<'a> Parser<'a> { expecting_statement_delimiter = false; } - match self.peek_token().token { + match &self.peek_token_ref().token { Token::EOF => break, // end of statement @@ -517,7 +517,7 @@ impl<'a> Parser<'a> { } if expecting_statement_delimiter { - return self.expected("end of statement", self.peek_token()); + return self.expected_ref("end of statement", self.peek_token_ref()); } let statement = self.parse_statement()?; @@ -944,7 +944,10 @@ impl<'a> Parser<'a> { let mut export = false; if !dialect_of!(self is MySqlDialect | GenericDialect) { - return parser_err!("Unsupported statement FLUSH", self.peek_token().span.start); + return parser_err!( + "Unsupported statement FLUSH", + self.peek_token_ref().span.start + ); } let location = if self.parse_keyword(Keyword::NO_WRITE_TO_BINLOG) { @@ -1007,10 +1010,10 @@ impl<'a> Parser<'a> { FlushType::Tables } else { - return self.expected( + return self.expected_ref( "BINARY LOGS, ENGINE LOGS, ERROR LOGS, GENERAL LOGS, HOSTS, LOGS, PRIVILEGES, OPTIMIZER_COSTS,\ RELAY LOGS [FOR CHANNEL channel], SLOW LOGS, STATUS, USER_RESOURCES", - self.peek_token(), + self.peek_token_ref(), ); }; @@ -1136,7 +1139,8 @@ impl<'a> Parser<'a> { let ident = self.parse_identifier()?; options.push(AttachDuckDBDatabaseOption::Type(ident)); } else { - return self.expected("expected one of: ), READ_ONLY, TYPE", self.peek_token()); + return self + .expected_ref("expected one of: ), READ_ONLY, TYPE", self.peek_token_ref()); }; if self.consume_token(&Token::RParen) { @@ -1144,7 +1148,7 @@ impl<'a> Parser<'a> { } else if self.consume_token(&Token::Comma) { continue; } else { - return self.expected("expected one of: ')', ','", self.peek_token()); + return self.expected_ref("expected one of: ')', ','", self.peek_token_ref()); } } } @@ -1267,7 +1271,7 @@ impl<'a> Parser<'a> { let next_token = self.next_token(); match next_token.token { t @ (Token::Word(_) | Token::SingleQuotedString(_)) => { - if self.peek_token().token == Token::Period { + if self.peek_token_ref().token == Token::Period { let mut id_parts: Vec = vec![match t { Token::Word(w) => w.into_ident(next_token.span), Token::SingleQuotedString(s) => Ident::with_quote('\'', s), @@ -1418,7 +1422,7 @@ impl<'a> Parser<'a> { Ok(expr) => expr, _ => { self.prev_token(); - return self.expected("wildcard or identifier", self.peek_token()); + return self.expected_ref("wildcard or identifier", self.peek_token_ref()); } } }; @@ -1449,7 +1453,7 @@ impl<'a> Parser<'a> { })?; Ok(rename_tables.into()) } else { - self.expected("KEYWORD `TABLE` after RENAME", self.peek_token()) + self.expected_ref("KEYWORD `TABLE` after RENAME", self.peek_token_ref()) } } @@ -1531,7 +1535,7 @@ impl<'a> Parser<'a> { Ok(Some(self.parse_array_expr(true)?)) } Keyword::ARRAY - if self.peek_token() == Token::LParen + if self.peek_token_ref().token == Token::LParen && !dialect_of!(self is ClickHouseDialect | DatabricksDialect) => { self.expect_token(&Token::LParen)?; @@ -1974,7 +1978,10 @@ impl<'a> Parser<'a> { | Expr::Identifier(_) | Expr::Value(_) | Expr::Function(_) => Ok(expr), - _ => parser.expected("an identifier or value", parser.peek_token()), + _ => parser.expected_ref( + "an identifier or value", + parser.peek_token_ref(), + ), } })?; @@ -2020,7 +2027,8 @@ impl<'a> Parser<'a> { let tok_index = self.get_current_index(); if let Some(wildcard_token) = ending_wildcard { if !Self::is_all_ident(&root, &chain) { - return self.expected("an identifier or a '*' after '.'", self.peek_token()); + return self + .expected_ref("an identifier or a '*' after '.'", self.peek_token_ref()); }; Ok(Expr::QualifiedWildcard( ObjectName::from(Self::exprs_to_idents(root, chain)?), @@ -2540,7 +2548,7 @@ impl<'a> Parser<'a> { } else if self.parse_keyword(Keyword::FOLLOWING) { Ok(WindowFrameBound::Following(rows)) } else { - self.expected("PRECEDING or FOLLOWING", self.peek_token()) + self.expected_ref("PRECEDING or FOLLOWING", self.peek_token_ref()) } } } @@ -2849,7 +2857,7 @@ impl<'a> Parser<'a> { Keyword::SUBSTRING => false, _ => { self.prev_token(); - return self.expected("SUBSTR or SUBSTRING", self.peek_token()); + return self.expected_ref("SUBSTR or SUBSTRING", self.peek_token_ref()); } }; self.expect_token(&Token::LParen)?; @@ -2908,7 +2916,7 @@ impl<'a> Parser<'a> { pub fn parse_trim_expr(&mut self) -> Result { self.expect_token(&Token::LParen)?; let mut trim_where = None; - if let Token::Word(word) = self.peek_token().token { + if let Token::Word(word) = &self.peek_token_ref().token { if [Keyword::BOTH, Keyword::LEADING, Keyword::TRAILING].contains(&word.keyword) { trim_where = Some(self.parse_trim_where()?); } @@ -2992,14 +3000,14 @@ impl<'a> Parser<'a> { | Token::QuoteDelimitedStringLiteral(_) | Token::NationalQuoteDelimitedStringLiteral(_) | Token::HexStringLiteral(_) => Some(Box::new(self.parse_expr()?)), - _ => self.expected( + _ => self.expected_ref( "either filler, WITH, or WITHOUT in LISTAGG", - self.peek_token(), + self.peek_token_ref(), )?, }; let with_count = self.parse_keyword(Keyword::WITH); if !with_count && !self.parse_keyword(Keyword::WITHOUT) { - self.expected("either WITH or WITHOUT in LISTAGG", self.peek_token())?; + self.expected_ref("either WITH or WITHOUT in LISTAGG", self.peek_token_ref())?; } self.expect_keyword_is(Keyword::COUNT)?; Ok(Some(ListAggOnOverflow::Truncate { filler, with_count })) @@ -3230,7 +3238,7 @@ impl<'a> Parser<'a> { } else if self.dialect.require_interval_qualifier() { return parser_err!( "INTERVAL requires a unit after the literal value", - self.peek_token().span.start + self.peek_token_ref().span.start ); } else { None @@ -3273,7 +3281,7 @@ impl<'a> Parser<'a> { /// Peek at the next token and determine if it is a temporal unit /// like `second`. pub fn next_token_is_temporal_unit(&mut self) -> bool { - if let Token::Word(word) = self.peek_token().token { + if let Token::Word(word) = &self.peek_token_ref().token { matches!( word.keyword, Keyword::YEAR @@ -3331,7 +3339,7 @@ impl<'a> Parser<'a> { if trailing_bracket.0 { return parser_err!( "unmatched > in STRUCT literal", - self.peek_token().span.start + self.peek_token_ref().span.start ); } @@ -3363,7 +3371,7 @@ impl<'a> Parser<'a> { if typed_syntax { return parser_err!("Typed syntax does not allow AS", { self.prev_token(); - self.peek_token().span.start + self.peek_token_ref().span.start }); } let field_name = self.parse_identifier()?; @@ -3398,7 +3406,7 @@ impl<'a> Parser<'a> { self.expect_keyword_is(Keyword::STRUCT)?; // Nothing to do if we have no type information. - if Token::Lt != self.peek_token() { + if self.peek_token_ref().token != Token::Lt { return Ok((Default::default(), false.into())); } self.next_token(); @@ -3650,7 +3658,7 @@ impl<'a> Parser<'a> { self.next_token(); true.into() } - _ => return self.expected(">", self.peek_token()), + _ => return self.expected_ref(">", self.peek_token_ref()), } } else { false.into() @@ -3919,9 +3927,9 @@ impl<'a> Parser<'a> { } else if let Ok(is_normalized) = self.parse_unicode_is_normalized(expr) { Ok(is_normalized) } else { - self.expected( + self.expected_ref( "[NOT] NULL | TRUE | FALSE | DISTINCT | [form] NORMALIZED FROM after IS", - self.peek_token(), + self.peek_token_ref(), ) } } @@ -3994,7 +4002,7 @@ impl<'a> Parser<'a> { escape_char: self.parse_escape_char()?, }) } else { - self.expected("IN or BETWEEN after NOT", self.peek_token()) + self.expected_ref("IN or BETWEEN after NOT", self.peek_token_ref()) } } Keyword::NOTNULL if dialect.supports_notnull_operator() => { @@ -4010,7 +4018,7 @@ impl<'a> Parser<'a> { array: Box::new(array), })) } else { - self.expected("OF after MEMBER", self.peek_token()) + self.expected_ref("OF after MEMBER", self.peek_token_ref()) } } // Can only happen if `get_next_precedence` got out of sync with this function @@ -4767,7 +4775,7 @@ impl<'a> Parser<'a> { if !self.consume_token(&Token::Comma) { break; } else if self.options.trailing_commas { - match self.peek_token().token { + match &self.peek_token_ref().token { Token::Word(kw) if kw.keyword == Keyword::ON => { break; } @@ -4930,7 +4938,7 @@ impl<'a> Parser<'a> { where F: FnMut(&mut Parser<'a>) -> Result, { - if self.peek_token().token == end_token { + if self.peek_token_ref().token == end_token { return Ok(vec![]); } @@ -5007,7 +5015,7 @@ impl<'a> Parser<'a> { /// Parse either `ALL`, `DISTINCT` or `DISTINCT ON (...)`. Returns [`None`] if `ALL` is parsed /// and results in a [`ParserError`] if both `ALL` and `DISTINCT` are found. pub fn parse_all_or_distinct(&mut self) -> Result, ParserError> { - let loc = self.peek_token().span.start; + let loc = self.peek_token_ref().span.start; let distinct = match self.parse_one_of_keywords(&[Keyword::ALL, Keyword::DISTINCT]) { Some(Keyword::ALL) => { if self.peek_keyword(Keyword::DISTINCT) { @@ -5094,9 +5102,9 @@ impl<'a> Parser<'a> { } else if self.parse_keyword(Keyword::USER) { self.parse_create_user(or_replace).map(Into::into) } else if or_replace { - self.expected( + self.expected_ref( "[EXTERNAL] TABLE or [MATERIALIZED] VIEW or FUNCTION after CREATE OR REPLACE", - self.peek_token(), + self.peek_token_ref(), ) } else if self.parse_keyword(Keyword::EXTENSION) { self.parse_create_extension().map(Into::into) @@ -5132,7 +5140,7 @@ impl<'a> Parser<'a> { } else if self.parse_keyword(Keyword::SERVER) { self.parse_pg_create_server() } else { - self.expected("an object type after CREATE", self.peek_token()) + self.expected_ref("an object type after CREATE", self.peek_token_ref()) } } @@ -5175,7 +5183,7 @@ impl<'a> Parser<'a> { let mut storage_specifier = None; let mut name = None; - if self.peek_token() != Token::LParen { + if self.peek_token_ref().token != Token::LParen { if self.parse_keyword(Keyword::IN) { storage_specifier = self.parse_identifier().ok() } else { @@ -5184,7 +5192,7 @@ impl<'a> Parser<'a> { // Storage specifier may follow the name if storage_specifier.is_none() - && self.peek_token() != Token::LParen + && self.peek_token_ref().token != Token::LParen && self.parse_keyword(Keyword::IN) { storage_specifier = self.parse_identifier().ok(); @@ -5209,7 +5217,7 @@ impl<'a> Parser<'a> { (true, false) => Some(true), (false, true) => Some(false), (false, false) => None, - _ => self.expected("TEMPORARY or PERSISTENT", self.peek_token())?, + _ => self.expected_ref("TEMPORARY or PERSISTENT", self.peek_token_ref())?, }; Ok(Statement::CreateSecret { @@ -5228,14 +5236,14 @@ impl<'a> Parser<'a> { let (mut table_flag, mut options, mut has_as, mut query) = (None, vec![], false, None); if self.parse_keyword(Keyword::TABLE) { let table_name = self.parse_object_name(false)?; - if self.peek_token().token != Token::EOF { - if let Token::Word(word) = self.peek_token().token { + if self.peek_token_ref().token != Token::EOF { + if let Token::Word(word) = &self.peek_token_ref().token { if word.keyword == Keyword::OPTIONS { options = self.parse_options(Keyword::OPTIONS)? } }; - if self.peek_token().token != Token::EOF { + if self.peek_token_ref().token != Token::EOF { let (a, q) = self.parse_as_query()?; has_as = a; query = Some(q); @@ -5261,14 +5269,14 @@ impl<'a> Parser<'a> { table_flag = Some(self.parse_object_name(false)?); if self.parse_keyword(Keyword::TABLE) { let table_name = self.parse_object_name(false)?; - if self.peek_token() != Token::EOF { - if let Token::Word(word) = self.peek_token().token { + if self.peek_token_ref().token != Token::EOF { + if let Token::Word(word) = &self.peek_token_ref().token { if word.keyword == Keyword::OPTIONS { options = self.parse_options(Keyword::OPTIONS)? } }; - if self.peek_token() != Token::EOF { + if self.peek_token_ref().token != Token::EOF { let (a, q) = self.parse_as_query()?; has_as = a; query = Some(q); @@ -5291,10 +5299,10 @@ impl<'a> Parser<'a> { }) } } else { - if self.peek_token() == Token::EOF { + if self.peek_token_ref().token == Token::EOF { self.prev_token(); } - self.expected("a `TABLE` keyword", self.peek_token()) + self.expected_ref("a `TABLE` keyword", self.peek_token_ref()) } } } @@ -5309,7 +5317,7 @@ impl<'a> Parser<'a> { } _ => Ok((false, self.parse_query()?)), }, - _ => self.expected("a QUERY statement", self.peek_token()), + _ => self.expected_ref("a QUERY statement", self.peek_token_ref()), } } @@ -5522,7 +5530,7 @@ impl<'a> Parser<'a> { .map(Into::into) } else { self.prev_token(); - self.expected("an object type after CREATE", self.peek_token()) + self.expected_ref("an object type after CREATE", self.peek_token_ref()) } } @@ -5623,7 +5631,8 @@ impl<'a> Parser<'a> { } else if self.parse_keyword(Keyword::SAFE) { body.parallel = Some(FunctionParallel::Safe); } else { - return self.expected("one of UNSAFE | RESTRICTED | SAFE", self.peek_token()); + return self + .expected_ref("one of UNSAFE | RESTRICTED | SAFE", self.peek_token_ref()); } } else if self.parse_keyword(Keyword::SECURITY) { ensure_not_set(&body.security, "SECURITY { DEFINER | INVOKER }")?; @@ -5632,7 +5641,7 @@ impl<'a> Parser<'a> { } else if self.parse_keyword(Keyword::INVOKER) { body.security = Some(FunctionSecurity::Invoker); } else { - return self.expected("DEFINER or INVOKER", self.peek_token()); + return self.expected_ref("DEFINER or INVOKER", self.peek_token_ref()); } } else if self.parse_keyword(Keyword::SET) { let name = self.parse_identifier()?; @@ -5640,7 +5649,7 @@ impl<'a> Parser<'a> { FunctionSetValue::FromCurrent } else { if !self.consume_token(&Token::Eq) && !self.parse_keyword(Keyword::TO) { - return self.expected("= or TO", self.peek_token()); + return self.expected_ref("= or TO", self.peek_token_ref()); } let values = self.parse_comma_separated(Parser::parse_expr)?; FunctionSetValue::Values(values) @@ -5846,7 +5855,7 @@ impl<'a> Parser<'a> { end_token: AttachedToken(end_token), })) } else if self.parse_keyword(Keyword::RETURN) { - if self.peek_token() == Token::LParen { + if self.peek_token_ref().token == Token::LParen { Some(CreateFunctionBody::AsReturnExpr(self.parse_expr()?)) } else if self.peek_keyword(Keyword::SELECT) { let select = self.parse_select()?; @@ -5854,11 +5863,11 @@ impl<'a> Parser<'a> { } else { parser_err!( "Expected a subquery (or bare SELECT statement) after RETURN", - self.peek_token().span.start + self.peek_token_ref().span.start )? } } else { - parser_err!("Unparsable function body", self.peek_token().span.start)? + parser_err!("Unparsable function body", self.peek_token_ref().span.start)? }; Ok(CreateFunction { @@ -5936,7 +5945,7 @@ impl<'a> Parser<'a> { // This dummy error is ignored in `maybe_parse` parser_err!( "The DEFAULT keyword is not a type", - parser.peek_token().span.start + parser.peek_token_ref().span.start ) } else { parser.parse_data_type() @@ -5978,7 +5987,7 @@ impl<'a> Parser<'a> { if !dialect_of!(self is PostgreSqlDialect | SQLiteDialect | GenericDialect | MySqlDialect | MsSqlDialect) { self.prev_token(); - return self.expected("an object type after DROP", self.peek_token()); + return self.expected_ref("an object type after DROP", self.peek_token_ref()); } let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); let trigger_name = self.parse_object_name(false)?; @@ -6014,7 +6023,7 @@ impl<'a> Parser<'a> { if !dialect_of!(self is PostgreSqlDialect | SQLiteDialect | GenericDialect | MySqlDialect | MsSqlDialect) { self.prev_token(); - return self.expected("an object type after CREATE", self.peek_token()); + return self.expected_ref("an object type after CREATE", self.peek_token_ref()); } let name = self.parse_object_name(false)?; @@ -6215,7 +6224,7 @@ impl<'a> Parser<'a> { }) } else { self.prev_token(); - self.expected("an object type after CREATE", self.peek_token()) + self.expected_ref("an object type after CREATE", self.peek_token_ref()) } } @@ -6641,7 +6650,7 @@ impl<'a> Parser<'a> { Ok(()) } } else { - self.expected("ROLE or GROUP after IN", self.peek_token()) + self.expected_ref("ROLE or GROUP after IN", self.peek_token_ref()) } } Keyword::ROLE => { @@ -7067,7 +7076,8 @@ impl<'a> Parser<'a> { let sort_family = self.parse_object_name(false)?; Some(OperatorPurpose::ForOrderBy { sort_family }) } else { - return self.expected("SEARCH or ORDER BY after FOR", self.peek_token()); + return self + .expected_ref("SEARCH or ORDER BY after FOR", self.peek_token_ref()); } } else { None @@ -7083,23 +7093,24 @@ impl<'a> Parser<'a> { let support_number = self.parse_literal_uint()?; // Optional operator types - let op_types = - if self.consume_token(&Token::LParen) && self.peek_token() != Token::RParen { - let mut types = vec![]; - loop { - types.push(self.parse_data_type()?); - if !self.consume_token(&Token::Comma) { - break; - } + let op_types = if self.consume_token(&Token::LParen) + && self.peek_token_ref().token != Token::RParen + { + let mut types = vec![]; + loop { + types.push(self.parse_data_type()?); + if !self.consume_token(&Token::Comma) { + break; } - self.expect_token(&Token::RParen)?; - Some(types) - } else if self.consume_token(&Token::LParen) { - self.expect_token(&Token::RParen)?; - Some(vec![]) - } else { - None - }; + } + self.expect_token(&Token::RParen)?; + Some(types) + } else if self.consume_token(&Token::LParen) { + self.expect_token(&Token::RParen)?; + Some(vec![]) + } else { + None + }; let function_name = self.parse_object_name(false)?; @@ -7107,7 +7118,7 @@ impl<'a> Parser<'a> { let argument_types = if self.consume_token(&Token::LParen) { let mut types = vec![]; loop { - if self.peek_token() == Token::RParen { + if self.peek_token_ref().token == Token::RParen { break; } types.push(self.parse_data_type()?); @@ -7208,9 +7219,9 @@ impl<'a> Parser<'a> { self.parse_drop_operator() }; } else { - return self.expected( + return self.expected_ref( "CONNECTOR, DATABASE, EXTENSION, FUNCTION, INDEX, OPERATOR, POLICY, PROCEDURE, ROLE, SCHEMA, SECRET, SEQUENCE, STAGE, TABLE, TRIGGER, TYPE, VIEW, MATERIALIZED VIEW or USER after DROP", - self.peek_token(), + self.peek_token_ref(), ); }; // Many dialects support the non-standard `IF EXISTS` clause and allow @@ -7218,7 +7229,7 @@ impl<'a> Parser<'a> { let if_exists = self.parse_keywords(&[Keyword::IF, Keyword::EXISTS]); let names = self.parse_comma_separated(|p| p.parse_object_name(false))?; - let loc = self.peek_token().span.start; + let loc = self.peek_token_ref().span.start; let cascade = self.parse_keyword(Keyword::CASCADE); let restrict = self.parse_keyword(Keyword::RESTRICT); let purge = self.parse_keyword(Keyword::PURGE); @@ -7364,7 +7375,7 @@ impl<'a> Parser<'a> { (true, false) => Some(true), (false, true) => Some(false), (false, false) => None, - _ => self.expected("TEMPORARY or PERSISTENT", self.peek_token())?, + _ => self.expected_ref("TEMPORARY or PERSISTENT", self.peek_token_ref())?, }; Ok(Statement::DropSecret { @@ -7539,7 +7550,7 @@ impl<'a> Parser<'a> { ), } } else if self.parse_keyword(Keyword::RESULTSET) { - let assigned_expr = if self.peek_token().token != Token::SemiColon { + let assigned_expr = if self.peek_token_ref().token != Token::SemiColon { self.parse_snowflake_variable_declaration_expression()? } else { // Nothing more to do. The statement has no further parameters. @@ -7548,7 +7559,7 @@ impl<'a> Parser<'a> { (Some(DeclareType::ResultSet), None, assigned_expr, None) } else if self.parse_keyword(Keyword::EXCEPTION) { - let assigned_expr = if self.peek_token().token == Token::LParen { + let assigned_expr = if self.peek_token_ref().token == Token::LParen { Some(DeclareAssignment::Expr(Box::new(self.parse_expr()?))) } else { // Nothing more to do. The statement has no further parameters. @@ -7562,7 +7573,7 @@ impl<'a> Parser<'a> { self.parse_snowflake_variable_declaration_expression()? { (Some(assigned_expr), None) - } else if let Token::Word(_) = self.peek_token().token { + } else if let Token::Word(_) = &self.peek_token_ref().token { let data_type = self.parse_data_type()?; ( self.parse_snowflake_variable_declaration_expression()?, @@ -7786,7 +7797,7 @@ impl<'a> Parser<'a> { self.expect_keyword(Keyword::IN)?; FetchPosition::In } else { - return parser_err!("Expected FROM or IN", self.peek_token().span.start); + return parser_err!("Expected FROM or IN", self.peek_token_ref().span.start); }; let name = self.parse_identifier()?; @@ -7816,9 +7827,9 @@ impl<'a> Parser<'a> { } else if self.parse_keyword(Keyword::TEMP) || self.parse_keyword(Keyword::TEMPORARY) { DiscardObject::TEMP } else { - return self.expected( + return self.expected_ref( "ALL, PLANS, SEQUENCES, TEMP or TEMPORARY after DISCARD", - self.peek_token(), + self.peek_token_ref(), ); }; Ok(Statement::Discard { object_type }) @@ -7963,7 +7974,7 @@ impl<'a> Parser<'a> { .map(|k| match k { Keyword::CASCADE => Ok(ReferentialAction::Cascade), Keyword::RESTRICT => Ok(ReferentialAction::Restrict), - _ => self.expected("CASCADE or RESTRICT", self.peek_token()), + _ => self.expected_ref("CASCADE or RESTRICT", self.peek_token_ref()), }) .transpose()?, })) @@ -8276,9 +8287,9 @@ impl<'a> Parser<'a> { if self.peek_keyword(Keyword::FOR) || self.peek_keyword(Keyword::DEFAULT) { Some(self.parse_partition_for_values()?) } else { - return self.expected( + return self.expected_ref( "FOR VALUES or DEFAULT after PARTITION OF", - self.peek_token(), + self.peek_token_ref(), ); } } else { @@ -8306,7 +8317,7 @@ impl<'a> Parser<'a> { let order_by = if self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) { if self.consume_token(&Token::LParen) { - let columns = if self.peek_token() != Token::RParen { + let columns = if self.peek_token_ref().token != Token::RParen { self.parse_comma_separated(|p| p.parse_expr())? } else { vec![] @@ -8438,8 +8449,8 @@ impl<'a> Parser<'a> { if self.parse_keyword(Keyword::IN) { // FOR VALUES IN (expr, ...) self.expect_token(&Token::LParen)?; - if self.peek_token() == Token::RParen { - return self.expected("at least one value", self.peek_token()); + if self.peek_token_ref().token == Token::RParen { + return self.expected_ref("at least one value", self.peek_token_ref()); } let values = self.parse_comma_separated(Parser::parse_expr)?; self.expect_token(&Token::RParen)?; @@ -8447,15 +8458,15 @@ impl<'a> Parser<'a> { } else if self.parse_keyword(Keyword::FROM) { // FOR VALUES FROM (...) TO (...) self.expect_token(&Token::LParen)?; - if self.peek_token() == Token::RParen { - return self.expected("at least one value", self.peek_token()); + if self.peek_token_ref().token == Token::RParen { + return self.expected_ref("at least one value", self.peek_token_ref()); } let from = self.parse_comma_separated(Parser::parse_partition_bound_value)?; self.expect_token(&Token::RParen)?; self.expect_keyword(Keyword::TO)?; self.expect_token(&Token::LParen)?; - if self.peek_token() == Token::RParen { - return self.expected("at least one value", self.peek_token()); + if self.peek_token_ref().token == Token::RParen { + return self.expected_ref("at least one value", self.peek_token_ref()); } let to = self.parse_comma_separated(Parser::parse_partition_bound_value)?; self.expect_token(&Token::RParen)?; @@ -8471,7 +8482,7 @@ impl<'a> Parser<'a> { self.expect_token(&Token::RParen)?; Ok(ForValues::With { modulus, remainder }) } else { - self.expected("IN, FROM, or WITH after FOR VALUES", self.peek_token()) + self.expected_ref("IN, FROM, or WITH after FOR VALUES", self.peek_token_ref()) } } @@ -8528,7 +8539,7 @@ impl<'a> Parser<'a> { )); }; - if let Token::Word(word) = self.peek_token().token { + if let Token::Word(word) = &self.peek_token_ref().token { if word.keyword == Keyword::OPTIONS { table_options = CreateTableOptions::Options(self.parse_options(Keyword::OPTIONS)?) @@ -8586,7 +8597,7 @@ impl<'a> Parser<'a> { let engine = match value.token { Token::Word(w) => { - let parameters = if self.peek_token() == Token::LParen { + let parameters = if self.peek_token_ref().token == Token::LParen { self.parse_parenthesized_identifiers()? } else { vec![] @@ -8794,7 +8805,7 @@ impl<'a> Parser<'a> { return Ok(Some(params)); } loop { - if let Token::Word(_) = self.peek_token().token { + if let Token::Word(_) = &self.peek_token_ref().token { params.push(self.parse_procedure_param()?) } let comma = self.consume_token(&Token::Comma); @@ -8802,7 +8813,10 @@ impl<'a> Parser<'a> { // allow a trailing comma, even though it's not in standard break; } else if !comma { - return self.expected("',' or ')' after parameter definition", self.peek_token()); + return self.expected_ref( + "',' or ')' after parameter definition", + self.peek_token_ref(), + ); } } Ok(Some(params)) @@ -8819,17 +8833,21 @@ impl<'a> Parser<'a> { loop { if let Some(constraint) = self.parse_optional_table_constraint()? { constraints.push(constraint); - } else if let Token::Word(_) = self.peek_token().token { + } else if let Token::Word(_) = &self.peek_token_ref().token { columns.push(self.parse_column_def()?); } else { - return self.expected("column name or constraint definition", self.peek_token()); + return self.expected_ref( + "column name or constraint definition", + self.peek_token_ref(), + ); } let comma = self.consume_token(&Token::Comma); - let rparen = self.peek_token().token == Token::RParen; + let rparen = self.peek_token_ref().token == Token::RParen; if !comma && !rparen { - return self.expected("',' or ')' after column definition", self.peek_token()); + return self + .expected_ref("',' or ')' after column definition", self.peek_token_ref()); }; if rparen @@ -8897,9 +8915,9 @@ impl<'a> Parser<'a> { if let Some(option) = self.parse_optional_column_option()? { options.push(ColumnOptionDef { name, option }); } else { - return self.expected( + return self.expected_ref( "constraint details after CONSTRAINT ", - self.peek_token(), + self.peek_token_ref(), ); } } else if let Some(option) = self.parse_optional_column_option()? { @@ -8917,7 +8935,7 @@ impl<'a> Parser<'a> { fn is_column_type_sqlite_unspecified(&mut self) -> bool { if dialect_of!(self is SQLiteDialect) { - match self.peek_token().token { + match &self.peek_token_ref().token { Token::Word(word) => matches!( word.keyword, Keyword::CONSTRAINT @@ -8982,7 +9000,7 @@ impl<'a> Parser<'a> { { // The expression is optional for the EPHEMERAL syntax, so we need to check // if the column definition has remaining tokens before parsing the expression. - if matches!(self.peek_token().token, Token::Comma | Token::RParen) { + if matches!(self.peek_token_ref().token, Token::Comma | Token::RParen) { Ok(Some(ColumnOption::Ephemeral(None))) } else { Ok(Some(ColumnOption::Ephemeral(Some(self.parse_expr()?)))) @@ -9220,7 +9238,7 @@ impl<'a> Parser<'a> { )) } else if dialect_of!(self is PostgreSqlDialect) { // Postgres' AS IDENTITY branches are above, this one needs STORED - self.expected("STORED", self.peek_token()) + self.expected_ref("STORED", self.peek_token_ref()) } else if self.parse_keywords(&[Keyword::VIRTUAL]) { Ok((GeneratedAs::Always, Some(GeneratedExpressionMode::Virtual))) } else { @@ -9313,9 +9331,9 @@ impl<'a> Parser<'a> { } else if self.parse_keywords(&[Keyword::SET, Keyword::DEFAULT]) { Ok(ReferentialAction::SetDefault) } else { - self.expected( + self.expected_ref( "one of RESTRICT, CASCADE, SET NULL, NO ACTION or SET DEFAULT", - self.peek_token(), + self.peek_token_ref(), ) } } @@ -9329,7 +9347,7 @@ impl<'a> Parser<'a> { } else if self.parse_keyword(Keyword::SIMPLE) { Ok(ConstraintReferenceMatchKind::Simple) } else { - self.expected("one of FULL, PARTIAL or SIMPLE", self.peek_token()) + self.expected_ref("one of FULL, PARTIAL or SIMPLE", self.peek_token_ref()) } } @@ -9366,7 +9384,7 @@ impl<'a> Parser<'a> { } else if self.parse_keyword(Keyword::IMMEDIATE) { cc.initially = Some(DeferrableInitial::Immediate); } else { - self.expected("one of DEFERRED or IMMEDIATE", self.peek_token())?; + self.expected_ref("one of DEFERRED or IMMEDIATE", self.peek_token_ref())?; } } else if cc.enforced.is_none() && self.parse_keyword(Keyword::ENFORCED) { cc.enforced = Some(true); @@ -9424,8 +9442,10 @@ impl<'a> Parser<'a> { if !dialect_of!(self is GenericDialect | MySqlDialect) && !index_type_display.is_none() { - return self - .expected("`index_name` or `(column_name [, ...])`", self.peek_token()); + return self.expected_ref( + "`index_name` or `(column_name [, ...])`", + self.peek_token_ref(), + ); } let nulls_distinct = self.parse_optional_nulls_distinct()?; @@ -9636,7 +9656,7 @@ impl<'a> Parser<'a> { &mut self, keyword: Keyword, ) -> Result>, ParserError> { - if let Token::Word(word) = self.peek_token().token { + if let Token::Word(word) = &self.peek_token_ref().token { if word.keyword == keyword { return Ok(Some(self.parse_options(keyword)?)); } @@ -9964,9 +9984,9 @@ impl<'a> Parser<'a> { let name = self.parse_identifier()?; AlterTableOperation::DisableTrigger { name } } else { - return self.expected( + return self.expected_ref( "ROW LEVEL SECURITY, RULE, or TRIGGER after DISABLE", - self.peek_token(), + self.peek_token_ref(), ); } } else if self.parse_keyword(Keyword::ENABLE) { @@ -9991,9 +10011,9 @@ impl<'a> Parser<'a> { let name = self.parse_identifier()?; AlterTableOperation::EnableTrigger { name } } else { - return self.expected( + return self.expected_ref( "ALWAYS, REPLICA, ROW LEVEL SECURITY, RULE, or TRIGGER after ENABLE", - self.peek_token(), + self.peek_token_ref(), ); } } else if self.parse_keywords(&[ @@ -10189,7 +10209,7 @@ impl<'a> Parser<'a> { let mut sequence_options: Option> = None; - if self.peek_token().token == Token::LParen { + if self.peek_token_ref().token == Token::LParen { self.expect_token(&Token::LParen)?; sequence_options = Some(self.parse_create_sequence_options()?); self.expect_token(&Token::RParen)?; @@ -10206,7 +10226,7 @@ impl<'a> Parser<'a> { "SET/DROP NOT NULL, SET DEFAULT, or SET DATA TYPE after ALTER COLUMN" }; - return self.expected(message, self.peek_token()); + return self.expected_ref(message, self.peek_token_ref()); }; AlterTableOperation::AlterColumn { column_name, op } } else if self.parse_keyword(Keyword::SWAP) { @@ -10279,9 +10299,9 @@ impl<'a> Parser<'a> { Some(Keyword::EXCLUSIVE) => AlterTableLock::Exclusive, Some(Keyword::NONE) => AlterTableLock::None, Some(Keyword::SHARED) => AlterTableLock::Shared, - _ => self.expected( + _ => self.expected_ref( "DEFAULT, EXCLUSIVE, NONE or SHARED after LOCK [=]", - self.peek_token(), + self.peek_token_ref(), )?, }; AlterTableOperation::Lock { equals, lock } @@ -10297,9 +10317,9 @@ impl<'a> Parser<'a> { Some(Keyword::INSTANT) => AlterTableAlgorithm::Instant, Some(Keyword::INPLACE) => AlterTableAlgorithm::Inplace, Some(Keyword::COPY) => AlterTableAlgorithm::Copy, - _ => self.expected( + _ => self.expected_ref( "DEFAULT, INSTANT, INPLACE, or COPY after ALGORITHM [=]", - self.peek_token(), + self.peek_token_ref(), )?, }; AlterTableOperation::Algorithm { equals, algorithm } @@ -10317,9 +10337,9 @@ impl<'a> Parser<'a> { } else if self.parse_keywords(&[Keyword::USING, Keyword::INDEX]) { ReplicaIdentity::Index(self.parse_identifier()?) } else { - return self.expected( + return self.expected_ref( "NOTHING, FULL, DEFAULT, or USING INDEX index_name after REPLICA IDENTITY", - self.peek_token(), + self.peek_token_ref(), ); }; @@ -10339,9 +10359,9 @@ impl<'a> Parser<'a> { if !options.is_empty() { AlterTableOperation::SetOptionsParens { options } } else { - return self.expected( + return self.expected_ref( "ADD, RENAME, PARTITION, SWAP, DROP, REPLICA IDENTITY, SET, or SET TBLPROPERTIES after ALTER TABLE", - self.peek_token(), + self.peek_token_ref(), ); } } @@ -10412,10 +10432,10 @@ impl<'a> Parser<'a> { let index_name = self.parse_object_name(false)?; AlterIndexOperation::RenameIndex { index_name } } else { - return self.expected("TO after RENAME", self.peek_token()); + return self.expected_ref("TO after RENAME", self.peek_token_ref()); } } else { - return self.expected("RENAME after ALTER INDEX", self.peek_token()); + return self.expected_ref("RENAME after ALTER INDEX", self.peek_token_ref()); }; Ok(Statement::AlterIndex { @@ -10682,7 +10702,7 @@ impl<'a> Parser<'a> { let sort_family = self.parse_object_name(false)?; Some(OperatorPurpose::ForOrderBy { sort_family }) } else { - return self.expected("SEARCH or ORDER BY after FOR", self.peek_token()); + return self.expected_ref("SEARCH or ORDER BY after FOR", self.peek_token_ref()); } } else { None @@ -10701,22 +10721,23 @@ impl<'a> Parser<'a> { let support_number = self.parse_literal_uint()?; // Optional operator types - let op_types = if self.consume_token(&Token::LParen) && self.peek_token() != Token::RParen { - let types = self.parse_comma_separated(Parser::parse_data_type)?; - self.expect_token(&Token::RParen)?; - Some(types) - } else if self.consume_token(&Token::LParen) { - self.expect_token(&Token::RParen)?; - Some(vec![]) - } else { - None - }; + let op_types = + if self.consume_token(&Token::LParen) && self.peek_token_ref().token != Token::RParen { + let types = self.parse_comma_separated(Parser::parse_data_type)?; + self.expect_token(&Token::RParen)?; + Some(types) + } else if self.consume_token(&Token::LParen) { + self.expect_token(&Token::RParen)?; + Some(vec![]) + } else { + None + }; let function_name = self.parse_object_name(false)?; // Function argument types let argument_types = if self.consume_token(&Token::LParen) { - if self.peek_token() == Token::RParen { + if self.peek_token_ref().token == Token::RParen { self.expect_token(&Token::RParen)?; vec![] } else { @@ -10777,7 +10798,7 @@ impl<'a> Parser<'a> { } else if self.parse_keyword(Keyword::FUNCTION) { self.parse_operator_family_add_function() } else { - self.expected("OPERATOR or FUNCTION", self.peek_token()) + self.expected_ref("OPERATOR or FUNCTION", self.peek_token_ref()) } } @@ -10788,7 +10809,7 @@ impl<'a> Parser<'a> { } else if self.parse_keyword(Keyword::FUNCTION) { self.parse_operator_family_drop_function() } else { - self.expected("OPERATOR or FUNCTION", self.peek_token()) + self.expected_ref("OPERATOR or FUNCTION", self.peek_token_ref()) } } @@ -10904,12 +10925,12 @@ impl<'a> Parser<'a> { /// or `CALL procedure_name` statement pub fn parse_call(&mut self) -> Result { let object_name = self.parse_object_name(false)?; - if self.peek_token().token == Token::LParen { + if self.peek_token_ref().token == Token::LParen { match self.parse_function(object_name)? { Expr::Function(f) => Ok(Statement::Call(f)), other => parser_err!( format!("Expected a simple procedure call but found: {other}"), - self.peek_token().span.start + self.peek_token_ref().span.start ), } } else { @@ -10943,7 +10964,7 @@ impl<'a> Parser<'a> { let to = match self.parse_one_of_keywords(&[Keyword::FROM, Keyword::TO]) { Some(Keyword::FROM) => false, Some(Keyword::TO) => true, - _ => self.expected("FROM or TO", self.peek_token())?, + _ => self.expected_ref("FROM or TO", self.peek_token_ref())?, }; if !to { // Use a separate if statement to prevent Rust compiler from complaining about @@ -11051,7 +11072,7 @@ impl<'a> Parser<'a> { CopyOption::ForceNull(self.parse_parenthesized_column_list(Mandatory, false)?) } Some(Keyword::ENCODING) => CopyOption::Encoding(self.parse_literal_string()?), - _ => self.expected("option", self.peek_token())?, + _ => self.expected_ref("option", self.peek_token_ref())?, }; Ok(ret) } @@ -11102,7 +11123,7 @@ impl<'a> Parser<'a> { Some(Keyword::ACCEPTANYDATE) => CopyLegacyOption::AcceptAnyDate, Some(Keyword::ACCEPTINVCHARS) => { let _ = self.parse_keyword(Keyword::AS); // [ AS ] - let ch = if matches!(self.peek_token().token, Token::SingleQuotedString(_)) { + let ch = if matches!(self.peek_token_ref().token, Token::SingleQuotedString(_)) { Some(self.parse_literal_string()?) } else { None @@ -11140,7 +11161,7 @@ impl<'a> Parser<'a> { }), Some(Keyword::DATEFORMAT) => { let _ = self.parse_keyword(Keyword::AS); - let fmt = if matches!(self.peek_token().token, Token::SingleQuotedString(_)) { + let fmt = if matches!(self.peek_token_ref().token, Token::SingleQuotedString(_)) { Some(self.parse_literal_string()?) } else { None @@ -11175,7 +11196,7 @@ impl<'a> Parser<'a> { } Some(Keyword::JSON) => { let _ = self.parse_keyword(Keyword::AS); - let fmt = if matches!(self.peek_token().token, Token::SingleQuotedString(_)) { + let fmt = if matches!(self.peek_token_ref().token, Token::SingleQuotedString(_)) { Some(self.parse_literal_string()?) } else { None @@ -11246,7 +11267,7 @@ impl<'a> Parser<'a> { } Some(Keyword::TIMEFORMAT) => { let _ = self.parse_keyword(Keyword::AS); - let fmt = if matches!(self.peek_token().token, Token::SingleQuotedString(_)) { + let fmt = if matches!(self.peek_token_ref().token, Token::SingleQuotedString(_)) { Some(self.parse_literal_string()?) } else { None @@ -11255,7 +11276,7 @@ impl<'a> Parser<'a> { } Some(Keyword::TRUNCATECOLUMNS) => CopyLegacyOption::TruncateColumns, Some(Keyword::ZSTD) => CopyLegacyOption::Zstd, - _ => self.expected("option", self.peek_token())?, + _ => self.expected_ref("option", self.peek_token_ref())?, }; Ok(ret) } @@ -11309,7 +11330,7 @@ impl<'a> Parser<'a> { self.parse_comma_separated(|p| p.parse_identifier())?, ) } - _ => self.expected("csv option", self.peek_token())?, + _ => self.expected_ref("csv option", self.peek_token_ref())?, }; Ok(ret) } @@ -11527,7 +11548,7 @@ impl<'a> Parser<'a> { Value::Placeholder(_) => Ok(value_wrapper), _ => { self.prev_token(); - self.expected("literal number", self.peek_token()) + self.expected_ref("literal number", self.peek_token_ref()) } } } @@ -11636,7 +11657,7 @@ impl<'a> Parser<'a> { match self.parse_one_of_keywords(&[Keyword::TRUE, Keyword::FALSE]) { Some(Keyword::TRUE) => Ok(true), Some(Keyword::FALSE) => Ok(false), - _ => self.expected("TRUE or FALSE", self.peek_token()), + _ => self.expected_ref("TRUE or FALSE", self.peek_token_ref()), } } @@ -11654,7 +11675,7 @@ impl<'a> Parser<'a> { Some(Keyword::NFD) => Ok(NormalizationForm::NFD), Some(Keyword::NFKC) => Ok(NormalizationForm::NFKC), Some(Keyword::NFKD) => Ok(NormalizationForm::NFKD), - _ => parser.expected("unicode normalization form", parser.peek_token()), + _ => parser.expected_ref("unicode normalization form", parser.peek_token_ref()), } })?; if self.parse_keyword(Keyword::NORMALIZED) { @@ -11664,7 +11685,7 @@ impl<'a> Parser<'a> { negated: neg, }); } - self.expected("unicode normalization form", self.peek_token()) + self.expected_ref("unicode normalization form", self.peek_token_ref()) } /// Parse parenthesized enum members, used with `ENUM(...)` type definitions. @@ -12072,7 +12093,7 @@ impl<'a> Parser<'a> { Keyword::TABLE => { // an LParen after the TABLE keyword indicates that table columns are being defined // whereas no LParen indicates an anonymous table expression will be returned - if self.peek_token() == Token::LParen { + if self.peek_token_ref().token == Token::LParen { let columns = self.parse_returns_table_columns()?; Ok(DataType::Table(Some(columns))) } else { @@ -12243,9 +12264,9 @@ impl<'a> Parser<'a> { Keyword::IGNORE => TableIndexHintType::Ignore, Keyword::FORCE => TableIndexHintType::Force, _ => { - return self.expected( + return self.expected_ref( "expected to match USE/IGNORE/FORCE keyword", - self.peek_token(), + self.peek_token_ref(), ) } }; @@ -12253,7 +12274,8 @@ impl<'a> Parser<'a> { Some(Keyword::INDEX) => TableIndexType::Index, Some(Keyword::KEY) => TableIndexType::Key, _ => { - return self.expected("expected to match INDEX/KEY keyword", self.peek_token()) + return self + .expected_ref("expected to match INDEX/KEY keyword", self.peek_token_ref()) } }; let for_clause = if self.parse_keyword(Keyword::FOR) { @@ -12264,9 +12286,9 @@ impl<'a> Parser<'a> { } else if self.parse_keywords(&[Keyword::GROUP, Keyword::BY]) { TableIndexHintForClause::GroupBy } else { - return self.expected( + return self.expected_ref( "expected to match FOR/ORDER BY/GROUP BY table hint in for clause", - self.peek_token(), + self.peek_token_ref(), ); }; Some(clause) @@ -12275,7 +12297,7 @@ impl<'a> Parser<'a> { }; self.expect_token(&Token::LParen)?; - let index_names = if self.peek_token().token != Token::RParen { + let index_names = if self.peek_token_ref().token != Token::RParen { self.parse_comma_separated(Parser::parse_identifier)? } else { vec![] @@ -12376,7 +12398,7 @@ impl<'a> Parser<'a> { _ => { return parser_err!( "BUG: expected to match GroupBy modifier keyword", - self.peek_token().span.start + self.peek_token_ref().span.start ) } }); @@ -12533,7 +12555,7 @@ impl<'a> Parser<'a> { } } else { loop { - if allow_wildcards && self.peek_token().token == Token::Mul { + if allow_wildcards && self.peek_token_ref().token == Token::Mul { let span = self.next_token().span; parts.push(ObjectNamePart::Identifier(Ident { value: Token::Mul.to_string(), @@ -12548,7 +12570,7 @@ impl<'a> Parser<'a> { } } else if self.dialect.supports_object_name_double_dot_notation() && parts.len() == 1 - && matches!(self.peek_token().token, Token::Period) + && matches!(self.peek_token_ref().token, Token::Period) { // Empty string here means default schema parts.push(ObjectNamePart::Identifier(Ident::new(""))); @@ -12778,7 +12800,7 @@ impl<'a> Parser<'a> { } // If next token is period, then it is part of an ObjectName and we don't expect whitespace // after the number. - !matches!(self.peek_token().token, Token::Period) + !matches!(self.peek_token_ref().token, Token::Period) } _ => { return self @@ -12806,7 +12828,7 @@ impl<'a> Parser<'a> { /// Parses a parenthesized, comma-separated list of column definitions within a view. fn parse_view_columns(&mut self) -> Result, ParserError> { if self.consume_token(&Token::LParen) { - if self.peek_token().token == Token::RParen { + if self.peek_token_ref().token == Token::RParen { self.next_token(); Ok(vec![]) } else { @@ -12913,7 +12935,7 @@ impl<'a> Parser<'a> { F: FnMut(&mut Parser) -> Result, { if self.consume_token(&Token::LParen) { - if allow_empty && self.peek_token().token == Token::RParen { + if allow_empty && self.peek_token_ref().token == Token::RParen { self.next_token(); Ok(vec![]) } else { @@ -12924,7 +12946,7 @@ impl<'a> Parser<'a> { } else if optional == Optional { Ok(vec![]) } else { - self.expected("a list of columns in parentheses", self.peek_token()) + self.expected_ref("a list of columns in parentheses", self.peek_token_ref()) } } @@ -12997,7 +13019,7 @@ impl<'a> Parser<'a> { Keyword::SECOND => Ok(Some(IntervalFields::DayToSecond)), _ => { self.prev_token(); - self.expected("HOUR, MINUTE, or SECOND", self.peek_token()) + self.expected_ref("HOUR, MINUTE, or SECOND", self.peek_token_ref()) } } } else { @@ -13012,7 +13034,7 @@ impl<'a> Parser<'a> { Keyword::SECOND => Ok(Some(IntervalFields::HourToSecond)), _ => { self.prev_token(); - self.expected("MINUTE or SECOND", self.peek_token()) + self.expected_ref("MINUTE or SECOND", self.peek_token_ref()) } } } else { @@ -13032,9 +13054,9 @@ impl<'a> Parser<'a> { Some(Keyword::SECOND) => Ok(Some(IntervalFields::Second)), Some(_) => { self.prev_token(); - self.expected( + self.expected_ref( "YEAR, MONTH, DAY, HOUR, MINUTE, or SECOND", - self.peek_token(), + self.peek_token_ref(), ) } None => Ok(None), @@ -13296,9 +13318,9 @@ impl<'a> Parser<'a> { if dialect_of!(self is ClickHouseDialect | GenericDialect) { Some(KillType::Mutation) } else { - self.expected( + self.expected_ref( "Unsupported type for KILL, allowed: CONNECTION | QUERY", - self.peek_token(), + self.peek_token_ref(), )? } } @@ -13324,7 +13346,7 @@ impl<'a> Parser<'a> { // although not all features may be implemented. if describe_alias == DescribeAlias::Explain && self.dialect.supports_explain_with_utility_options() - && self.peek_token().token == Token::LParen + && self.peek_token_ref().token == Token::LParen { options = Some(self.parse_utility_options()?) } else if self.parse_keywords(&[Keyword::QUERY, Keyword::PLAN]) { @@ -13763,7 +13785,7 @@ impl<'a> Parser<'a> { pub fn parse_for_xml(&mut self) -> Result { let for_xml = if self.parse_keyword(Keyword::RAW) { let mut element_name = None; - if self.peek_token().token == Token::LParen { + if self.peek_token_ref().token == Token::LParen { self.expect_token(&Token::LParen)?; element_name = Some(self.parse_literal_string()?); self.expect_token(&Token::RParen)?; @@ -13775,7 +13797,7 @@ impl<'a> Parser<'a> { ForXml::Explicit } else if self.parse_keyword(Keyword::PATH) { let mut element_name = None; - if self.peek_token().token == Token::LParen { + if self.peek_token_ref().token == Token::LParen { self.expect_token(&Token::LParen)?; element_name = Some(self.parse_literal_string()?); self.expect_token(&Token::RParen)?; @@ -13790,7 +13812,7 @@ impl<'a> Parser<'a> { let mut binary_base64 = false; let mut root = None; let mut r#type = false; - while self.peek_token().token == Token::Comma { + while self.peek_token_ref().token == Token::Comma { self.next_token(); if self.parse_keyword(Keyword::ELEMENTS) { elements = true; @@ -13828,7 +13850,7 @@ impl<'a> Parser<'a> { let mut root = None; let mut include_null_values = false; let mut without_array_wrapper = false; - while self.peek_token().token == Token::Comma { + while self.peek_token_ref().token == Token::Comma { self.next_token(); if self.parse_keyword(Keyword::ROOT) { self.expect_token(&Token::LParen)?; @@ -13942,9 +13964,9 @@ impl<'a> Parser<'a> { } else if self.parse_keyword(Keyword::TABLE) { SetExpr::Table(Box::new(self.parse_as_table()?)) } else { - return self.expected( + return self.expected_ref( "SELECT, VALUES, or a subquery in the query body", - self.peek_token(), + self.peek_token_ref(), ); }; @@ -14349,9 +14371,9 @@ impl<'a> Parser<'a> { Keyword::SQL_CALC_FOUND_ROWS => modifiers.sql_calc_found_rows = true, _ => { self.prev_token(); - return self.expected( + return self.expected_ref( "HIGH_PRIORITY, STRAIGHT_JOIN, or other MySQL select modifier", - self.peek_token(), + self.peek_token_ref(), ); } } @@ -14385,7 +14407,7 @@ impl<'a> Parser<'a> { { Some(ValueTableMode::AsStruct) } else if self.parse_keyword(Keyword::AS) { - self.expected("VALUE or STRUCT", self.peek_token())? + self.expected_ref("VALUE or STRUCT", self.peek_token_ref())? } else { None }; @@ -14511,7 +14533,7 @@ impl<'a> Parser<'a> { } else if let Ok(expr) = self.parse_expr() { expr } else { - self.expected("variable value", self.peek_token())? + self.expected_ref("variable value", self.peek_token_ref())? }; values.push(value); @@ -14543,13 +14565,13 @@ impl<'a> Parser<'a> { // Parenthesized assignments are handled in the `parse_set` function after // trying to parse list of assignments using this function. // If a dialect supports both, and we find a LParen, we early exit from this function. - self.expected("Unparenthesized assignment", self.peek_token())? + self.expected_ref("Unparenthesized assignment", self.peek_token_ref())? } else { self.parse_object_name(false)? }; if !(self.consume_token(&Token::Eq) || self.parse_keyword(Keyword::TO)) { - return self.expected("assignment operator", self.peek_token()); + return self.expected_ref("assignment operator", self.peek_token_ref()); } let value = self.parse_expr()?; @@ -14724,7 +14746,7 @@ impl<'a> Parser<'a> { return self.parse_set_session_params(); }; - self.expected("equals sign or TO", self.peek_token()) + self.expected_ref("equals sign or TO", self.peek_token_ref()) } /// Parse session parameter assignments after `SET` when no `=` or `TO` is present. @@ -14740,7 +14762,7 @@ impl<'a> Parser<'a> { Some(Keyword::PROFILE) => SessionParamStatsTopic::Profile, Some(Keyword::TIME) => SessionParamStatsTopic::Time, Some(Keyword::XML) => SessionParamStatsTopic::Xml, - _ => return self.expected("IO, PROFILE, TIME or XML", self.peek_token()), + _ => return self.expected_ref("IO, PROFILE, TIME or XML", self.peek_token_ref()), }; let value = self.parse_session_param_value()?; Ok( @@ -14798,7 +14820,7 @@ impl<'a> Parser<'a> { } else if self.parse_keyword(Keyword::OFF) { Ok(SessionParamValue::Off) } else { - self.expected("ON or OFF", self.peek_token()) + self.expected_ref("ON or OFF", self.peek_token_ref()) } } @@ -15071,7 +15093,7 @@ impl<'a> Parser<'a> { // MSSQL extension, similar to CROSS JOIN LATERAL JoinOperator::CrossApply } else { - return self.expected("JOIN or APPLY after CROSS", self.peek_token()); + return self.expected_ref("JOIN or APPLY after CROSS", self.peek_token_ref()); }; let relation = self.parse_table_factor()?; let join_operator = if matches!(join_operator, JoinOperator::CrossJoin(_)) @@ -15191,14 +15213,15 @@ impl<'a> Parser<'a> { JoinOperator::FullOuter } Keyword::OUTER => { - return self.expected("LEFT, RIGHT, or FULL", self.peek_token()); + return self.expected_ref("LEFT, RIGHT, or FULL", self.peek_token_ref()); } Keyword::STRAIGHT_JOIN => { let _ = self.next_token(); // consume STRAIGHT_JOIN JoinOperator::StraightJoin } _ if natural => { - return self.expected("a join type after NATURAL", self.peek_token()); + return self + .expected_ref("a join type after NATURAL", self.peek_token_ref()); } _ => break, }; @@ -15380,7 +15403,7 @@ impl<'a> Parser<'a> { } else { // The SQL spec prohibits derived tables and bare tables from // appearing alone in parentheses (e.g. `FROM (mytable)`) - self.expected("joined table", self.peek_token()) + self.expected_ref("joined table", self.peek_token_ref()) } } else if self.dialect.supports_values_as_table_factor() && matches!( @@ -15667,7 +15690,7 @@ impl<'a> Parser<'a> { } else { return parser_err!( "Expecting number or byte length e.g. 100M", - self.peek_token().span.start + self.peek_token_ref().span.start ); } } @@ -15860,7 +15883,7 @@ impl<'a> Parser<'a> { let mut facts = Vec::new(); let mut where_clause = None; - while self.peek_token().token != Token::RParen { + while self.peek_token_ref().token != Token::RParen { if self.parse_keyword(Keyword::DIMENSIONS) { if !dimensions.is_empty() { return Err(ParserError::ParserError( @@ -15895,7 +15918,7 @@ impl<'a> Parser<'a> { "Expected one of DIMENSIONS, METRICS, FACTS or WHERE, got {}", self.peek_token().token ), - self.peek_token().span.start + self.peek_token_ref().span.start )?; } } @@ -16115,7 +16138,7 @@ impl<'a> Parser<'a> { fn parse_concat_pattern(&mut self) -> Result { let mut patterns = vec![self.parse_repetition_pattern()?]; - while !matches!(self.peek_token().token, Token::RParen | Token::Pipe) { + while !matches!(self.peek_token_ref().token, Token::RParen | Token::Pipe) { patterns.push(self.parse_repetition_pattern()?); } match <[MatchRecognizePattern; 1]>::try_from(patterns) { @@ -16321,7 +16344,7 @@ impl<'a> Parser<'a> { fn parse_pivot_aggregate_function(&mut self) -> Result { let function_name = match self.next_token().token { Token::Word(w) => Ok(w.value), - _ => self.expected("a function identifier", self.peek_token()), + _ => self.expected_ref("a function identifier", self.peek_token_ref()), }?; let expr = self.parse_function(ObjectName::from(vec![Ident::new(function_name)]))?; let alias = { @@ -16437,7 +16460,7 @@ impl<'a> Parser<'a> { Ok(JoinConstraint::Using(columns)) } else { Ok(JoinConstraint::None) - //self.expected("ON, or USING after JOIN", self.peek_token()) + //self.expected_ref("ON, or USING after JOIN", self.peek_token_ref()) } } @@ -16724,7 +16747,7 @@ impl<'a> Parser<'a> { if let Some(name) = objects?.first() { self.parse_grant_procedure_or_function(name, &kw)? } else { - self.expected("procedure or function name", self.peek_token())? + self.expected_ref("procedure or function name", self.peek_token_ref())? } } Some(Keyword::TABLE) | None => Some(GrantObjects::Tables(objects?)), @@ -16761,7 +16784,7 @@ impl<'a> Parser<'a> { name: name.clone(), arg_types, })), - _ => self.expected("procedure or function keywords", self.peek_token())?, + _ => self.expected_ref("procedure or function keywords", self.peek_token_ref())?, } } @@ -16882,7 +16905,7 @@ impl<'a> Parser<'a> { } else if self.parse_keyword(Keyword::DROP) { Ok(Action::Drop) } else { - self.expected("a privilege keyword", self.peek_token())? + self.expected_ref("a privilege keyword", self.peek_token_ref())? } } @@ -16951,7 +16974,7 @@ impl<'a> Parser<'a> { } else if self.parse_keyword(Keyword::TAG) { Ok(ActionApplyType::Tag) } else { - self.expected("GRANT APPLY type", self.peek_token()) + self.expected_ref("GRANT APPLY type", self.peek_token_ref()) } } @@ -16987,7 +17010,7 @@ impl<'a> Parser<'a> { } else if self.parse_keyword(Keyword::WAREHOUSES) { Ok(ActionManageType::Warehouses) } else { - self.expected("GRANT MANAGE type", self.peek_token()) + self.expected_ref("GRANT MANAGE type", self.peek_token_ref()) } } @@ -17043,7 +17066,7 @@ impl<'a> Parser<'a> { None => { return parser_err!( "DENY statements must specify an object", - self.peek_token().span.start + self.peek_token_ref().span.start ) } }; @@ -17098,7 +17121,7 @@ impl<'a> Parser<'a> { if !dialect_of!(self is MySqlDialect | GenericDialect) { return parser_err!( "Unsupported statement REPLACE", - self.peek_token().span.start + self.peek_token_ref().span.start ); } @@ -17242,7 +17265,7 @@ impl<'a> Parser<'a> { let conflict_target = if self.parse_keywords(&[Keyword::ON, Keyword::CONSTRAINT]) { Some(ConflictTarget::OnConstraint(self.parse_object_name(false)?)) - } else if self.peek_token() == Token::LParen { + } else if self.peek_token_ref().token == Token::LParen { Some(ConflictTarget::Columns( self.parse_parenthesized_column_list(IsOptional::Mandatory, false)?, )) @@ -17676,7 +17699,7 @@ impl<'a> Parser<'a> { } fn parse_duplicate_treatment(&mut self) -> Result, ParserError> { - let loc = self.peek_token().span.start; + let loc = self.peek_token_ref().span.start; match ( self.parse_keyword(Keyword::ALL), self.parse_keyword(Keyword::DISTINCT), @@ -17708,7 +17731,7 @@ impl<'a> Parser<'a> { Expr::Identifier(v) if v.value.to_lowercase() == "from" && v.quote_style.is_none() => { parser_err!( format!("Expected an expression, found: {}", v), - self.peek_token().span.start + self.peek_token_ref().span.start ) } Expr::BinaryOp { @@ -17721,7 +17744,7 @@ impl<'a> Parser<'a> { let Expr::Identifier(alias) = *left else { return parser_err!( "BUG: expected identifier expression as alias", - self.peek_token().span.start + self.peek_token_ref().span.start ); }; Ok(SelectItem::ExprWithAlias { @@ -17842,13 +17865,13 @@ impl<'a> Parser<'a> { &mut self, ) -> Result, ParserError> { let opt_except = if self.parse_keyword(Keyword::EXCEPT) { - if self.peek_token().token == Token::LParen { + if self.peek_token_ref().token == Token::LParen { let idents = self.parse_parenthesized_column_list(Mandatory, false)?; match &idents[..] { [] => { - return self.expected( + return self.expected_ref( "at least one column should be parsed by the expect clause", - self.peek_token(), + self.peek_token_ref(), )?; } [first, idents @ ..] => Some(ExceptSelectItem { @@ -18336,7 +18359,7 @@ impl<'a> Parser<'a> { } else if self.parse_keyword(Keyword::SNAPSHOT) { TransactionIsolationLevel::Snapshot } else { - self.expected("isolation level", self.peek_token())? + self.expected_ref("isolation level", self.peek_token_ref())? }; TransactionMode::IsolationLevel(iso_level) } else if self.parse_keywords(&[Keyword::READ, Keyword::ONLY]) { @@ -18344,7 +18367,7 @@ impl<'a> Parser<'a> { } else if self.parse_keywords(&[Keyword::READ, Keyword::WRITE]) { TransactionMode::AccessMode(TransactionAccessMode::ReadWrite) } else if required { - self.expected("transaction mode", self.peek_token())? + self.expected_ref("transaction mode", self.peek_token_ref())? } else { break; }; @@ -18433,9 +18456,9 @@ impl<'a> Parser<'a> { Keyword::LOG => Ok(RaisErrorOption::Log), Keyword::NOWAIT => Ok(RaisErrorOption::NoWait), Keyword::SETERROR => Ok(RaisErrorOption::SetError), - _ => self.expected( + _ => self.expected_ref( "LOG, NOWAIT OR SETERROR raiserror option", - self.peek_token(), + self.peek_token_ref(), ), } } @@ -18553,12 +18576,12 @@ impl<'a> Parser<'a> { pub fn parse_unload(&mut self) -> Result { self.expect_keyword(Keyword::UNLOAD)?; self.expect_token(&Token::LParen)?; - let (query, query_text) = if matches!(self.peek_token().token, Token::SingleQuotedString(_)) - { - (None, Some(self.parse_literal_string()?)) - } else { - (Some(self.parse_query()?), None) - }; + let (query, query_text) = + if matches!(self.peek_token_ref().token, Token::SingleQuotedString(_)) { + (None, Some(self.parse_literal_string()?)) + } else { + (Some(self.parse_query()?), None) + }; self.expect_token(&Token::RParen)?; self.expect_keyword_is(Keyword::TO)?; @@ -18607,7 +18630,7 @@ impl<'a> Parser<'a> { v @ Value::Placeholder(_) => Ok(v), _ => { self.prev_token(); - self.expected("number or string or ? placeholder", self.peek_token()) + self.expected_ref("number or string or ? placeholder", self.peek_token_ref()) } } } @@ -18669,9 +18692,9 @@ impl<'a> Parser<'a> { table_format, }) } else { - self.expected( + self.expected_ref( "`DATA` or an extension name after `LOAD`", - self.peek_token(), + self.peek_token_ref(), ) } } @@ -18884,7 +18907,7 @@ impl<'a> Parser<'a> { } else if self.dialect.supports_window_clause_named_window_reference() { NamedWindowExpr::NamedWindow(self.parse_identifier()?) } else { - return self.expected("(", self.peek_token()); + return self.expected_ref("(", self.peek_token_ref()); }; Ok(NamedWindowDefinition(ident, window_expr)) @@ -18986,7 +19009,7 @@ impl<'a> Parser<'a> { // CREATE TYPE name AS (attributes) - Composite self.parse_create_type_composite(name) } else { - self.expected("ENUM, RANGE, or '(' after AS", self.peek_token()) + self.expected_ref("ENUM, RANGE, or '(' after AS", self.peek_token_ref()) } } @@ -19101,7 +19124,7 @@ impl<'a> Parser<'a> { let name = self.parse_object_name(false)?; Ok(UserDefinedTypeRangeOption::MultirangeTypeName(name)) } - _ => self.expected("range option keyword", self.peek_token()), + _ => self.expected_ref("range option keyword", self.peek_token_ref()), } } @@ -19289,7 +19312,7 @@ impl<'a> Parser<'a> { self.parse_keyword(Keyword::TRUE) || !self.parse_keyword(Keyword::FALSE); Ok(UserDefinedTypeSqlDefinitionOption::Collatable(value)) } - _ => self.expected("SQL definition option keyword", self.peek_token()), + _ => self.expected_ref("SQL definition option keyword", self.peek_token_ref()), } } @@ -19331,7 +19354,7 @@ impl<'a> Parser<'a> { } else if self.parse_keyword(Keyword::TIME) { WaitForType::Time } else { - return self.expected("DELAY or TIME", self.peek_token()); + return self.expected_ref("DELAY or TIME", self.peek_token_ref()); }; let expr = self.parse_expr()?; Ok(Statement::WaitFor(WaitForStatement { wait_type, expr })) @@ -19453,7 +19476,7 @@ impl<'a> Parser<'a> { Some(Keyword::FROM) => ShowStatementInClause::FROM, Some(Keyword::IN) => ShowStatementInClause::IN, None => return Ok(None), - _ => return self.expected("FROM or IN", self.peek_token()), + _ => return self.expected_ref("FROM or IN", self.peek_token_ref()), }; let (parent_type, parent_name) = match self.parse_one_of_keywords(&[ @@ -19488,9 +19511,9 @@ impl<'a> Parser<'a> { Keyword::TABLE => (Some(ShowStatementInParentType::Table), parent_name), Keyword::VIEW => (Some(ShowStatementInParentType::View), parent_name), _ => { - return self.expected( + return self.expected_ref( "one of ACCOUNT, DATABASE, SCHEMA, TABLE or VIEW", - self.peek_token(), + self.peek_token_ref(), ) } } @@ -19566,7 +19589,7 @@ impl<'a> Parser<'a> { if parenthesized { break; } else { - return self.expected(" another option or EOF", self.peek_token()); + return self.expected_ref(" another option or EOF", self.peek_token_ref()); } } Token::EOF | Token::SemiColon => break, @@ -19650,7 +19673,7 @@ impl<'a> Parser<'a> { }), } } - _ => self.expected("expected option value", self.peek_token()), + _ => self.expected_ref("expected option value", self.peek_token_ref()), } } From a99fe54c73399757002eb8e2bdc2dbf4fa4e9647 Mon Sep 17 00:00:00 2001 From: "Guan-Ming (Wesley) Chiu" <105915352+guan404ming@users.noreply.github.com> Date: Fri, 20 Feb 2026 23:03:23 +0800 Subject: [PATCH 084/121] PostgreSQL: Support more COMMENT ON object types (#2220) Signed-off-by: Guan-Ming Chiu --- src/ast/mod.rs | 42 ++++++++++++++++++++++++++++++--------- src/parser/mod.rs | 40 +++++++++++++++++++++++++++++++------ tests/sqlparser_common.rs | 15 +++++++++++--- 3 files changed, 79 insertions(+), 18 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index d534b300b4..a06526ec9b 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -2439,30 +2439,54 @@ impl fmt::Display for ShowCreateObject { pub enum CommentObject { /// A table column. Column, - /// A table. - Table, + /// A database. + Database, + /// A domain. + Domain, /// An extension. Extension, + /// A function. + Function, + /// An index. + Index, + /// A materialized view. + MaterializedView, + /// A procedure. + Procedure, + /// A role. + Role, /// A schema. Schema, - /// A database. - Database, + /// A sequence. + Sequence, + /// A table. + Table, + /// A type. + Type, /// A user. User, - /// A role. - Role, + /// A view. + View, } impl fmt::Display for CommentObject { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { CommentObject::Column => f.write_str("COLUMN"), - CommentObject::Table => f.write_str("TABLE"), + CommentObject::Database => f.write_str("DATABASE"), + CommentObject::Domain => f.write_str("DOMAIN"), CommentObject::Extension => f.write_str("EXTENSION"), + CommentObject::Function => f.write_str("FUNCTION"), + CommentObject::Index => f.write_str("INDEX"), + CommentObject::MaterializedView => f.write_str("MATERIALIZED VIEW"), + CommentObject::Procedure => f.write_str("PROCEDURE"), + CommentObject::Role => f.write_str("ROLE"), CommentObject::Schema => f.write_str("SCHEMA"), - CommentObject::Database => f.write_str("DATABASE"), + CommentObject::Sequence => f.write_str("SEQUENCE"), + CommentObject::Table => f.write_str("TABLE"), + CommentObject::Type => f.write_str("TYPE"), CommentObject::User => f.write_str("USER"), - CommentObject::Role => f.write_str("ROLE"), + CommentObject::View => f.write_str("VIEW"), } } } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index de12078a50..a7ee54155c 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -901,23 +901,51 @@ impl<'a> Parser<'a> { Token::Word(w) if w.keyword == Keyword::COLUMN => { (CommentObject::Column, self.parse_object_name(false)?) } - Token::Word(w) if w.keyword == Keyword::TABLE => { - (CommentObject::Table, self.parse_object_name(false)?) + Token::Word(w) if w.keyword == Keyword::DATABASE => { + (CommentObject::Database, self.parse_object_name(false)?) + } + Token::Word(w) if w.keyword == Keyword::DOMAIN => { + (CommentObject::Domain, self.parse_object_name(false)?) } Token::Word(w) if w.keyword == Keyword::EXTENSION => { (CommentObject::Extension, self.parse_object_name(false)?) } + Token::Word(w) if w.keyword == Keyword::FUNCTION => { + (CommentObject::Function, self.parse_object_name(false)?) + } + Token::Word(w) if w.keyword == Keyword::INDEX => { + (CommentObject::Index, self.parse_object_name(false)?) + } + Token::Word(w) if w.keyword == Keyword::MATERIALIZED => { + self.expect_keyword_is(Keyword::VIEW)?; + ( + CommentObject::MaterializedView, + self.parse_object_name(false)?, + ) + } + Token::Word(w) if w.keyword == Keyword::PROCEDURE => { + (CommentObject::Procedure, self.parse_object_name(false)?) + } + Token::Word(w) if w.keyword == Keyword::ROLE => { + (CommentObject::Role, self.parse_object_name(false)?) + } Token::Word(w) if w.keyword == Keyword::SCHEMA => { (CommentObject::Schema, self.parse_object_name(false)?) } - Token::Word(w) if w.keyword == Keyword::DATABASE => { - (CommentObject::Database, self.parse_object_name(false)?) + Token::Word(w) if w.keyword == Keyword::SEQUENCE => { + (CommentObject::Sequence, self.parse_object_name(false)?) + } + Token::Word(w) if w.keyword == Keyword::TABLE => { + (CommentObject::Table, self.parse_object_name(false)?) + } + Token::Word(w) if w.keyword == Keyword::TYPE => { + (CommentObject::Type, self.parse_object_name(false)?) } Token::Word(w) if w.keyword == Keyword::USER => { (CommentObject::User, self.parse_object_name(false)?) } - Token::Word(w) if w.keyword == Keyword::ROLE => { - (CommentObject::Role, self.parse_object_name(false)?) + Token::Word(w) if w.keyword == Keyword::VIEW => { + (CommentObject::View, self.parse_object_name(false)?) } _ => self.expected("comment object_type", token)?, }; diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 4f01ea0cee..c37cfa4493 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -15185,14 +15185,23 @@ fn parse_comments() { _ => unreachable!(), } + // https://www.postgresql.org/docs/current/sql-comment.html let object_types = [ ("COLUMN", CommentObject::Column), + ("DATABASE", CommentObject::Database), + ("DOMAIN", CommentObject::Domain), ("EXTENSION", CommentObject::Extension), - ("TABLE", CommentObject::Table), + ("FUNCTION", CommentObject::Function), + ("INDEX", CommentObject::Index), + ("MATERIALIZED VIEW", CommentObject::MaterializedView), + ("PROCEDURE", CommentObject::Procedure), + ("ROLE", CommentObject::Role), ("SCHEMA", CommentObject::Schema), - ("DATABASE", CommentObject::Database), + ("SEQUENCE", CommentObject::Sequence), + ("TABLE", CommentObject::Table), + ("TYPE", CommentObject::Type), ("USER", CommentObject::User), - ("ROLE", CommentObject::Role), + ("VIEW", CommentObject::View), ]; for (keyword, expected_object_type) in object_types.iter() { match all_dialects_where(|d| d.supports_comment_on()) From e81eb14a147f32e68942aea668ab56c1548394b2 Mon Sep 17 00:00:00 2001 From: Marcelo Altmann Date: Fri, 20 Feb 2026 13:01:06 -0300 Subject: [PATCH 085/121] Allow custom OptimizerHints (#2216) --- src/ast/dml.rs | 31 ++++++----- src/ast/mod.rs | 16 +++++- src/ast/query.rs | 6 +- src/ast/spans.rs | 10 ++-- src/dialect/snowflake.rs | 2 +- src/parser/merge.rs | 4 +- src/parser/mod.rs | 100 +++++++++++++++++++--------------- tests/sqlparser_bigquery.rs | 4 +- tests/sqlparser_clickhouse.rs | 2 +- tests/sqlparser_common.rs | 30 +++++----- tests/sqlparser_duckdb.rs | 4 +- tests/sqlparser_mssql.rs | 6 +- tests/sqlparser_mysql.rs | 50 +++++++++++++---- tests/sqlparser_oracle.rs | 43 +++++++++------ tests/sqlparser_postgres.rs | 12 ++-- tests/sqlparser_sqlite.rs | 2 +- 16 files changed, 191 insertions(+), 131 deletions(-) diff --git a/src/ast/dml.rs b/src/ast/dml.rs index f9c8823a22..a0be916dee 100644 --- a/src/ast/dml.rs +++ b/src/ast/dml.rs @@ -43,11 +43,11 @@ use super::{ pub struct Insert { /// Token for the `INSERT` keyword (or its substitutes) pub insert_token: AttachedToken, - /// A query optimizer hint + /// Query optimizer hints /// /// [MySQL](https://dev.mysql.com/doc/refman/8.4/en/optimizer-hints.html) /// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Comments.html#GUID-D316D545-89E2-4D54-977F-FC97815CD62E) - pub optimizer_hint: Option, + pub optimizer_hints: Vec, /// Only for Sqlite pub or: Option, /// Only for mysql @@ -133,7 +133,7 @@ impl Display for Insert { if let Some(on_conflict) = self.or { f.write_str("INSERT")?; - if let Some(hint) = self.optimizer_hint.as_ref() { + for hint in &self.optimizer_hints { write!(f, " {hint}")?; } write!(f, " {on_conflict} INTO {table_name} ")?; @@ -147,7 +147,7 @@ impl Display for Insert { "INSERT" } )?; - if let Some(hint) = self.optimizer_hint.as_ref() { + for hint in &self.optimizer_hints { write!(f, " {hint}")?; } if let Some(priority) = self.priority { @@ -267,11 +267,11 @@ impl Display for Insert { pub struct Delete { /// Token for the `DELETE` keyword pub delete_token: AttachedToken, - /// A query optimizer hint + /// Query optimizer hints /// /// [MySQL](https://dev.mysql.com/doc/refman/8.4/en/optimizer-hints.html) /// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Comments.html#GUID-D316D545-89E2-4D54-977F-FC97815CD62E) - pub optimizer_hint: Option, + pub optimizer_hints: Vec, /// Multi tables delete are supported in mysql pub tables: Vec, /// FROM @@ -291,7 +291,7 @@ pub struct Delete { impl Display for Delete { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.write_str("DELETE")?; - if let Some(hint) = self.optimizer_hint.as_ref() { + for hint in &self.optimizer_hints { f.write_str(" ")?; hint.fmt(f)?; } @@ -345,11 +345,11 @@ impl Display for Delete { pub struct Update { /// Token for the `UPDATE` keyword pub update_token: AttachedToken, - /// A query optimizer hint + /// Query optimizer hints /// /// [MySQL](https://dev.mysql.com/doc/refman/8.4/en/optimizer-hints.html) /// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Comments.html#GUID-D316D545-89E2-4D54-977F-FC97815CD62E) - pub optimizer_hint: Option, + pub optimizer_hints: Vec, /// TABLE pub table: TableWithJoins, /// Column assignments @@ -368,11 +368,12 @@ pub struct Update { impl Display for Update { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - f.write_str("UPDATE ")?; - if let Some(hint) = self.optimizer_hint.as_ref() { - hint.fmt(f)?; + f.write_str("UPDATE")?; + for hint in &self.optimizer_hints { f.write_str(" ")?; + hint.fmt(f)?; } + f.write_str(" ")?; if let Some(or) = &self.or { or.fmt(f)?; f.write_str(" ")?; @@ -419,10 +420,10 @@ impl Display for Update { pub struct Merge { /// The `MERGE` token that starts the statement. pub merge_token: AttachedToken, - /// A query optimizer hint + /// Query optimizer hints /// /// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Comments.html#GUID-D316D545-89E2-4D54-977F-FC97815CD62E) - pub optimizer_hint: Option, + pub optimizer_hints: Vec, /// optional INTO keyword pub into: bool, /// Specifies the table to merge @@ -440,7 +441,7 @@ pub struct Merge { impl Display for Merge { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.write_str("MERGE")?; - if let Some(hint) = self.optimizer_hint.as_ref() { + for hint in &self.optimizer_hints { write!(f, " {hint}")?; } if self.into { diff --git a/src/ast/mod.rs b/src/ast/mod.rs index a06526ec9b..61b0f65b2b 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -11674,12 +11674,19 @@ pub struct ResetStatement { /// `SELECT`, `INSERT`, `UPDATE`, `REPLACE`, `MERGE`, and `DELETE` keywords in /// the corresponding statements. /// -/// See [Select::optimizer_hint] +/// See [Select::optimizer_hints] #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct OptimizerHint { - /// the raw test of the optimizer hint without its markers + /// An optional prefix between the comment marker and `+`. + /// + /// Standard optimizer hints like `/*+ ... */` have an empty prefix, + /// while system-specific hints like `/*abc+ ... */` have `prefix = "abc"`. + /// The prefix is any sequence of ASCII alphanumeric characters + /// immediately before the `+` marker. + pub prefix: String, + /// the raw text of the optimizer hint without its markers pub text: String, /// the style of the comment which `text` was extracted from, /// e.g. `/*+...*/` or `--+...` @@ -11709,11 +11716,14 @@ impl fmt::Display for OptimizerHint { match &self.style { OptimizerHintStyle::SingleLine { prefix } => { f.write_str(prefix)?; + f.write_str(&self.prefix)?; f.write_str("+")?; f.write_str(&self.text) } OptimizerHintStyle::MultiLine => { - f.write_str("/*+")?; + f.write_str("/*")?; + f.write_str(&self.prefix)?; + f.write_str("+")?; f.write_str(&self.text)?; f.write_str("*/") } diff --git a/src/ast/query.rs b/src/ast/query.rs index 6d95216df1..ff617a38e4 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -445,11 +445,11 @@ impl SelectModifiers { pub struct Select { /// Token for the `SELECT` keyword pub select_token: AttachedToken, - /// A query optimizer hint + /// Query optimizer hints /// /// [MySQL](https://dev.mysql.com/doc/refman/8.4/en/optimizer-hints.html) /// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Comments.html#GUID-D316D545-89E2-4D54-977F-FC97815CD62E) - pub optimizer_hint: Option, + pub optimizer_hints: Vec, /// `SELECT [DISTINCT] ...` pub distinct: Option, /// MySQL-specific SELECT modifiers. @@ -521,7 +521,7 @@ impl fmt::Display for Select { } } - if let Some(hint) = self.optimizer_hint.as_ref() { + for hint in &self.optimizer_hints { f.write_str(" ")?; hint.fmt(f)?; } diff --git a/src/ast/spans.rs b/src/ast/spans.rs index 74f19e831e..128fe01bee 100644 --- a/src/ast/spans.rs +++ b/src/ast/spans.rs @@ -900,7 +900,7 @@ impl Spanned for Delete { fn span(&self) -> Span { let Delete { delete_token, - optimizer_hint: _, + optimizer_hints: _, tables, from, using, @@ -934,7 +934,7 @@ impl Spanned for Update { fn span(&self) -> Span { let Update { update_token, - optimizer_hint: _, + optimizer_hints: _, table, assignments, from, @@ -1298,7 +1298,7 @@ impl Spanned for Insert { fn span(&self) -> Span { let Insert { insert_token, - optimizer_hint: _, + optimizer_hints: _, or: _, // enum, sqlite specific ignore: _, // bool into: _, // bool @@ -2246,7 +2246,7 @@ impl Spanned for Select { fn span(&self) -> Span { let Select { select_token, - optimizer_hint: _, + optimizer_hints: _, distinct: _, // todo select_modifiers: _, top: _, // todo, mysql specific @@ -2840,7 +2840,7 @@ WHERE id = 1 // ~ individual tokens within the statement let Statement::Merge(Merge { merge_token, - optimizer_hint: _, + optimizer_hints: _, into: _, table: _, source: _, diff --git a/src/dialect/snowflake.rs b/src/dialect/snowflake.rs index 8af1367f0e..984e384fd8 100644 --- a/src/dialect/snowflake.rs +++ b/src/dialect/snowflake.rs @@ -1765,7 +1765,7 @@ fn parse_multi_table_insert( Ok(Statement::Insert(Insert { insert_token: insert_token.into(), - optimizer_hint: None, + optimizer_hints: vec![], or: None, ignore: false, into: false, diff --git a/src/parser/merge.rs b/src/parser/merge.rs index 31f435f8f9..a927bc4b1c 100644 --- a/src/parser/merge.rs +++ b/src/parser/merge.rs @@ -43,7 +43,7 @@ impl Parser<'_> { /// Parse a `MERGE` statement pub fn parse_merge(&mut self, merge_token: TokenWithSpan) -> Result { - let optimizer_hint = self.maybe_parse_optimizer_hint()?; + let optimizer_hints = self.maybe_parse_optimizer_hints()?; let into = self.parse_keyword(Keyword::INTO); let table = self.parse_table_factor()?; @@ -60,7 +60,7 @@ impl Parser<'_> { Ok(Merge { merge_token: merge_token.into(), - optimizer_hint, + optimizer_hints, into, table, source, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index a7ee54155c..16eb7a8b19 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -13272,7 +13272,7 @@ impl<'a> Parser<'a> { /// Parse a `DELETE` statement and return `Statement::Delete`. pub fn parse_delete(&mut self, delete_token: TokenWithSpan) -> Result { - let optimizer_hint = self.maybe_parse_optimizer_hint()?; + let optimizer_hints = self.maybe_parse_optimizer_hints()?; let (tables, with_from_keyword) = if !self.parse_keyword(Keyword::FROM) { // `FROM` keyword is optional in BigQuery SQL. // https://cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax#delete_statement @@ -13316,7 +13316,7 @@ impl<'a> Parser<'a> { Ok(Statement::Delete(Delete { delete_token: delete_token.into(), - optimizer_hint, + optimizer_hints, tables, from: if with_from_keyword { FromTable::WithFromKeyword(from) @@ -14088,7 +14088,7 @@ impl<'a> Parser<'a> { if !self.peek_keyword(Keyword::SELECT) { return Ok(Select { select_token: AttachedToken(from_token), - optimizer_hint: None, + optimizer_hints: vec![], distinct: None, select_modifiers: None, top: None, @@ -14117,7 +14117,7 @@ impl<'a> Parser<'a> { } let select_token = self.expect_keyword(Keyword::SELECT)?; - let optimizer_hint = self.maybe_parse_optimizer_hint()?; + let optimizer_hints = self.maybe_parse_optimizer_hints()?; let value_table_mode = self.parse_value_table_mode()?; let (select_modifiers, distinct_select_modifier) = @@ -14276,7 +14276,7 @@ impl<'a> Parser<'a> { Ok(Select { select_token: AttachedToken(select_token), - optimizer_hint, + optimizer_hints, distinct, select_modifiers, top, @@ -14306,53 +14306,65 @@ impl<'a> Parser<'a> { }) } - /// Parses an optional optimizer hint at the current token position + /// Parses optimizer hints at the current token position. + /// + /// Collects all `/*prefix+...*/` and `--prefix+...` patterns. + /// The `prefix` is any run of ASCII alphanumeric characters between the + /// comment marker and `+` (e.g. `""` for `/*+...*/`, `"abc"` for `/*abc+...*/`). /// /// [MySQL](https://dev.mysql.com/doc/refman/8.4/en/optimizer-hints.html#optimizer-hints-overview) /// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/Comments.html#GUID-D316D545-89E2-4D54-977F-FC97815CD62E) - fn maybe_parse_optimizer_hint(&mut self) -> Result, ParserError> { + fn maybe_parse_optimizer_hints(&mut self) -> Result, ParserError> { let supports_hints = self.dialect.supports_comment_optimizer_hint(); if !supports_hints { - return Ok(None); + return Ok(vec![]); } + let mut hints = vec![]; loop { let t = self.peek_nth_token_no_skip_ref(0); - match &t.token { - Token::Whitespace(ws) => { - match ws { - Whitespace::SingleLineComment { comment, .. } - | Whitespace::MultiLineComment(comment) => { - return Ok(match comment.strip_prefix("+") { - None => None, - Some(text) => { - let hint = OptimizerHint { - text: text.into(), - style: if let Whitespace::SingleLineComment { - prefix, .. - } = ws - { - OptimizerHintStyle::SingleLine { - prefix: prefix.clone(), - } - } else { - OptimizerHintStyle::MultiLine - }, - }; - // Consume the comment token - self.next_token_no_skip(); - Some(hint) - } - }); - } - Whitespace::Space | Whitespace::Tab | Whitespace::Newline => { - // Consume the token and try with the next whitespace or comment - self.next_token_no_skip(); - } + let Token::Whitespace(ws) = &t.token else { + break; + }; + match ws { + Whitespace::SingleLineComment { comment, prefix } => { + if let Some((hint_prefix, text)) = Self::extract_hint_prefix_and_text(comment) { + hints.push(OptimizerHint { + prefix: hint_prefix, + text, + style: OptimizerHintStyle::SingleLine { + prefix: prefix.clone(), + }, + }); + } + self.next_token_no_skip(); + } + Whitespace::MultiLineComment(comment) => { + if let Some((hint_prefix, text)) = Self::extract_hint_prefix_and_text(comment) { + hints.push(OptimizerHint { + prefix: hint_prefix, + text, + style: OptimizerHintStyle::MultiLine, + }); } + self.next_token_no_skip(); + } + Whitespace::Space | Whitespace::Tab | Whitespace::Newline => { + self.next_token_no_skip(); } - _ => return Ok(None), } } + Ok(hints) + } + + /// Checks if a comment's content starts with `[ASCII-alphanumeric]*+` + /// and returns `(prefix, text_after_plus)` if so. + fn extract_hint_prefix_and_text(comment: &str) -> Option<(String, String)> { + let (before_plus, text) = comment.split_once('+')?; + if before_plus.chars().all(|c| c.is_ascii_alphanumeric()) { + Some((before_plus.to_string(), text.to_string())) + } else { + None + } } /// Parses MySQL SELECT modifiers and DISTINCT/ALL in any order. @@ -17173,7 +17185,7 @@ impl<'a> Parser<'a> { /// Parse an INSERT statement pub fn parse_insert(&mut self, insert_token: TokenWithSpan) -> Result { - let optimizer_hint = self.maybe_parse_optimizer_hint()?; + let optimizer_hints = self.maybe_parse_optimizer_hints()?; let or = self.parse_conflict_clause(); let priority = if !dialect_of!(self is MySqlDialect | GenericDialect) { None @@ -17343,7 +17355,7 @@ impl<'a> Parser<'a> { Ok(Insert { insert_token: insert_token.into(), - optimizer_hint, + optimizer_hints, or, table: table_object, table_alias, @@ -17451,7 +17463,7 @@ impl<'a> Parser<'a> { /// Parse an `UPDATE` statement and return `Statement::Update`. pub fn parse_update(&mut self, update_token: TokenWithSpan) -> Result { - let optimizer_hint = self.maybe_parse_optimizer_hint()?; + let optimizer_hints = self.maybe_parse_optimizer_hints()?; let or = self.parse_conflict_clause(); let table = self.parse_table_and_joins()?; let from_before_set = if self.parse_keyword(Keyword::FROM) { @@ -17487,7 +17499,7 @@ impl<'a> Parser<'a> { }; Ok(Update { update_token: update_token.into(), - optimizer_hint, + optimizer_hints, table, assignments, from, diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index cf843ea2b3..ce962cb807 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -2681,7 +2681,7 @@ fn test_export_data() { }), Span::empty() )), - optimizer_hint: None, + optimizer_hints: vec![], distinct: None, select_modifiers: None, top: None, @@ -2787,7 +2787,7 @@ fn test_export_data() { }), Span::empty() )), - optimizer_hint: None, + optimizer_hints: vec![], distinct: None, select_modifiers: None, top: None, diff --git a/tests/sqlparser_clickhouse.rs b/tests/sqlparser_clickhouse.rs index b8b4e33708..82f79577b9 100644 --- a/tests/sqlparser_clickhouse.rs +++ b/tests/sqlparser_clickhouse.rs @@ -41,7 +41,7 @@ fn parse_map_access_expr() { assert_eq!( Select { select_token: AttachedToken::empty(), - optimizer_hint: None, + optimizer_hints: vec![], distinct: None, select_modifiers: None, top: None, diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index c37cfa4493..a3b5404d30 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -455,7 +455,7 @@ fn parse_update_set_from() { stmt, Statement::Update(Update { update_token: AttachedToken::empty(), - optimizer_hint: None, + optimizer_hints: vec![], table: TableWithJoins { relation: table_from_name(ObjectName::from(vec![Ident::new("t1")])), joins: vec![], @@ -471,7 +471,7 @@ fn parse_update_set_from() { with: None, body: Box::new(SetExpr::Select(Box::new(Select { select_token: AttachedToken::empty(), - optimizer_hint: None, + optimizer_hints: vec![], distinct: None, select_modifiers: None, top: None, @@ -551,9 +551,9 @@ fn parse_update_with_table_alias() { returning, or: None, limit: None, - optimizer_hint: None, + optimizer_hints, update_token: _, - }) => { + }) if optimizer_hints.is_empty() => { assert_eq!( TableWithJoins { relation: TableFactor::Table { @@ -5819,7 +5819,7 @@ fn test_parse_named_window() { let actual_select_only = dialects.verified_only_select(sql); let expected = Select { select_token: AttachedToken::empty(), - optimizer_hint: None, + optimizer_hints: vec![], distinct: None, select_modifiers: None, top: None, @@ -6551,7 +6551,7 @@ fn parse_interval_and_or_xor() { with: None, body: Box::new(SetExpr::Select(Box::new(Select { select_token: AttachedToken::empty(), - optimizer_hint: None, + optimizer_hints: vec![], distinct: None, select_modifiers: None, top: None, @@ -8929,7 +8929,7 @@ fn lateral_function() { let actual_select_only = verified_only_select(sql); let expected = Select { select_token: AttachedToken::empty(), - optimizer_hint: None, + optimizer_hints: vec![], distinct: None, select_modifiers: None, top: None, @@ -9932,7 +9932,7 @@ fn parse_merge() { with: None, body: Box::new(SetExpr::Select(Box::new(Select { select_token: AttachedToken::empty(), - optimizer_hint: None, + optimizer_hints: vec![], distinct: None, select_modifiers: None, top: None, @@ -12356,7 +12356,7 @@ fn parse_unload() { query: Some(Box::new(Query { body: Box::new(SetExpr::Select(Box::new(Select { select_token: AttachedToken::empty(), - optimizer_hint: None, + optimizer_hints: vec![], distinct: None, select_modifiers: None, top: None, @@ -12677,7 +12677,7 @@ fn parse_connect_by() { dialects.verified_only_select(connect_by_1), Select { select_token: AttachedToken::empty(), - optimizer_hint: None, + optimizer_hints: vec![], distinct: None, select_modifiers: None, top: None, @@ -12744,7 +12744,7 @@ fn parse_connect_by() { dialects.verified_only_select(connect_by_2), Select { select_token: AttachedToken::empty(), - optimizer_hint: None, + optimizer_hints: vec![], distinct: None, select_modifiers: None, top: None, @@ -12812,7 +12812,7 @@ fn parse_connect_by() { dialects.verified_only_select(connect_by_3), Select { select_token: AttachedToken::empty(), - optimizer_hint: None, + optimizer_hints: vec![], distinct: None, select_modifiers: None, top: None, @@ -12900,7 +12900,7 @@ fn parse_connect_by() { dialects.verified_only_select(connect_by_5), Select { select_token: AttachedToken::empty(), - optimizer_hint: None, + optimizer_hints: vec![], distinct: None, select_modifiers: None, top: None, @@ -13863,7 +13863,7 @@ fn test_extract_seconds_ok() { with: None, body: Box::new(SetExpr::Select(Box::new(Select { select_token: AttachedToken::empty(), - optimizer_hint: None, + optimizer_hints: vec![], distinct: None, select_modifiers: None, top: None, @@ -16028,7 +16028,7 @@ fn test_select_from_first() { with: None, body: Box::new(SetExpr::Select(Box::new(Select { select_token: AttachedToken::empty(), - optimizer_hint: None, + optimizer_hints: vec![], distinct: None, select_modifiers: None, top: None, diff --git a/tests/sqlparser_duckdb.rs b/tests/sqlparser_duckdb.rs index 91eb2799c6..e0e3f143b6 100644 --- a/tests/sqlparser_duckdb.rs +++ b/tests/sqlparser_duckdb.rs @@ -266,7 +266,7 @@ fn test_select_union_by_name() { set_quantifier: *expected_quantifier, left: Box::::new(SetExpr::Select(Box::new(Select { select_token: AttachedToken::empty(), - optimizer_hint: None, + optimizer_hints: vec![], distinct: None, select_modifiers: None, top: None, @@ -299,7 +299,7 @@ fn test_select_union_by_name() { }))), right: Box::::new(SetExpr::Select(Box::new(Select { select_token: AttachedToken::empty(), - optimizer_hint: None, + optimizer_hints: vec![], distinct: None, select_modifiers: None, top: None, diff --git a/tests/sqlparser_mssql.rs b/tests/sqlparser_mssql.rs index d7d11ba669..b5fd1e77e9 100644 --- a/tests/sqlparser_mssql.rs +++ b/tests/sqlparser_mssql.rs @@ -141,7 +141,7 @@ fn parse_create_procedure() { pipe_operators: vec![], body: Box::new(SetExpr::Select(Box::new(Select { select_token: AttachedToken::empty(), - optimizer_hint: None, + optimizer_hints: vec![], distinct: None, select_modifiers: None, top: None, @@ -1350,7 +1350,7 @@ fn parse_substring_in_select() { body: Box::new(SetExpr::Select(Box::new(Select { select_token: AttachedToken::empty(), - optimizer_hint: None, + optimizer_hints: vec![], distinct: Some(Distinct::Distinct), select_modifiers: None, top: None, @@ -1509,7 +1509,7 @@ fn parse_mssql_declare() { body: Box::new(SetExpr::Select(Box::new(Select { select_token: AttachedToken::empty(), - optimizer_hint: None, + optimizer_hints: vec![], distinct: None, select_modifiers: None, top: None, diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index 4ad0404b08..30405623dc 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -1435,7 +1435,7 @@ fn parse_escaped_quote_identifiers_with_escape() { with: None, body: Box::new(SetExpr::Select(Box::new(Select { select_token: AttachedToken::empty(), - optimizer_hint: None, + optimizer_hints: vec![], distinct: None, select_modifiers: None, top: None, @@ -1492,7 +1492,7 @@ fn parse_escaped_quote_identifiers_with_no_escape() { with: None, body: Box::new(SetExpr::Select(Box::new(Select { select_token: AttachedToken::empty(), - optimizer_hint: None, + optimizer_hints: vec![], distinct: None, select_modifiers: None, top: None, @@ -1541,7 +1541,7 @@ fn parse_escaped_backticks_with_escape() { with: None, body: Box::new(SetExpr::Select(Box::new(Select { select_token: AttachedToken::empty(), - optimizer_hint: None, + optimizer_hints: vec![], distinct: None, select_modifiers: None, top: None, @@ -1594,7 +1594,7 @@ fn parse_escaped_backticks_with_no_escape() { with: None, body: Box::new(SetExpr::Select(Box::new(Select { select_token: AttachedToken::empty(), - optimizer_hint: None, + optimizer_hints: vec![], distinct: None, select_modifiers: None, top: None, @@ -2415,7 +2415,7 @@ fn parse_select_with_numeric_prefix_column_name() { q.body, Box::new(SetExpr::Select(Box::new(Select { select_token: AttachedToken::empty(), - optimizer_hint: None, + optimizer_hints: vec![], distinct: None, select_modifiers: None, top: None, @@ -2591,7 +2591,7 @@ fn parse_select_with_concatenation_of_exp_number_and_numeric_prefix_column() { q.body, Box::new(SetExpr::Select(Box::new(Select { select_token: AttachedToken::empty(), - optimizer_hint: None, + optimizer_hints: vec![], distinct: None, select_modifiers: None, top: None, @@ -2660,9 +2660,9 @@ fn parse_update_with_joins() { returning, or: None, limit: None, - optimizer_hint: None, + optimizer_hints, update_token: _, - }) => { + }) if optimizer_hints.is_empty() => { assert_eq!( TableWithJoins { relation: TableFactor::Table { @@ -3226,7 +3226,7 @@ fn parse_substring_in_select() { with: None, body: Box::new(SetExpr::Select(Box::new(Select { select_token: AttachedToken::empty(), - optimizer_hint: None, + optimizer_hints: vec![], distinct: Some(Distinct::Distinct), select_modifiers: None, top: None, @@ -3572,7 +3572,7 @@ fn parse_hex_string_introducer() { with: None, body: Box::new(SetExpr::Select(Box::new(Select { select_token: AttachedToken::empty(), - optimizer_hint: None, + optimizer_hints: vec![], distinct: None, select_modifiers: None, top: None, @@ -4649,6 +4649,36 @@ fn test_optimizer_hints() { "\ DELETE /*+ foobar */ FROM table_name", ); + + // prefixed hints: any alphanumeric prefix before `+` is captured + let select = mysql_dialect.verified_only_select("SELECT /*abc+ text */ 1"); + assert_eq!(select.optimizer_hints.len(), 1); + assert_eq!(select.optimizer_hints[0].prefix, "abc"); + assert_eq!(select.optimizer_hints[0].text, " text "); + + // multiple hints with different prefixes + let select = mysql_dialect.verified_only_select("SELECT /*+ A */ /*x2+ B */ 1"); + assert_eq!(select.optimizer_hints.len(), 2); + assert_eq!(select.optimizer_hints[0].prefix, ""); + assert_eq!(select.optimizer_hints[0].text, " A "); + assert_eq!(select.optimizer_hints[1].prefix, "x2"); + assert_eq!(select.optimizer_hints[1].text, " B "); + + // hints mixed with regular comments: regular comments are skipped + let select = mysql_dialect.verified_only_select_with_canonical( + "SELECT /*+ A */ /* Regular comment */ /*x2+ B */ 1", + "SELECT /*+ A */ /*x2+ B */ 1", + ); + assert_eq!(select.optimizer_hints.len(), 2); + assert_eq!(select.optimizer_hints[0].prefix, ""); + assert_eq!(select.optimizer_hints[0].text, " A "); + assert_eq!(select.optimizer_hints[1].prefix, "x2"); + assert_eq!(select.optimizer_hints[1].text, " B "); + + // prefixed hints in INSERT/UPDATE/DELETE + mysql_dialect.verified_stmt("INSERT /*abc+ append */ INTO t2 VALUES (2)"); + mysql_dialect.verified_stmt("UPDATE /*abc+ PARALLEL */ table_name SET column1 = 1"); + mysql_dialect.verified_stmt("DELETE /*abc+ ENABLE_DML */ FROM table_name"); } #[test] diff --git a/tests/sqlparser_oracle.rs b/tests/sqlparser_oracle.rs index 0dbccdb5eb..8f7bb86702 100644 --- a/tests/sqlparser_oracle.rs +++ b/tests/sqlparser_oracle.rs @@ -338,36 +338,34 @@ fn parse_national_quote_delimited_string_but_is_a_word() { fn test_optimizer_hints() { let oracle_dialect = oracle(); - // selects + // selects: all `/*+...*/` comments are collected as hints let select = oracle_dialect.verified_only_select_with_canonical( - "SELECT /*+one two three*/ /*+not a hint!*/ 1 FROM dual", - "SELECT /*+one two three*/ 1 FROM dual", - ); - assert_eq!( - select - .optimizer_hint - .as_ref() - .map(|hint| hint.text.as_str()), - Some("one two three") + "SELECT /*+one two three*/ /*+four five six*/ 1 FROM dual", + "SELECT /*+one two three*/ /*+four five six*/ 1 FROM dual", ); + assert_eq!(select.optimizer_hints.len(), 2); + assert_eq!(select.optimizer_hints[0].text, "one two three"); + assert_eq!(select.optimizer_hints[0].prefix, ""); + assert_eq!(select.optimizer_hints[1].text, "four five six"); + // regular comments are skipped, hints after them are still collected let select = oracle_dialect.verified_only_select_with_canonical( - "SELECT /*one two three*/ /*+not a hint!*/ 1 FROM dual", - "SELECT 1 FROM dual", + "SELECT /*one two three*/ /*+four five six*/ 1 FROM dual", + "SELECT /*+four five six*/ 1 FROM dual", ); - assert_eq!(select.optimizer_hint, None); + assert_eq!(select.optimizer_hints.len(), 1); + assert_eq!(select.optimizer_hints[0].text, "four five six"); let select = oracle_dialect.verified_only_select_with_canonical( "SELECT --+ one two three /* asdf */\n 1 FROM dual", "SELECT --+ one two three /* asdf */\n 1 FROM dual", ); + assert_eq!(select.optimizer_hints.len(), 1); assert_eq!( - select - .optimizer_hint - .as_ref() - .map(|hint| hint.text.as_str()), - Some(" one two three /* asdf */\n") + select.optimizer_hints[0].text, + " one two three /* asdf */\n" ); + assert_eq!(select.optimizer_hints[0].prefix, ""); // inserts oracle_dialect.verified_stmt("INSERT /*+ append */ INTO t1 SELECT * FROM all_objects"); @@ -387,6 +385,15 @@ fn test_optimizer_hints() { (pt.person_id, pt.first_name, pt.last_name, pt.title) \ VALUES (ps.person_id, ps.first_name, ps.last_name, ps.title)", ); + + // single-line prefixed hint (Oracle supports `--` without trailing whitespace) + let select = oracle_dialect.verified_only_select_with_canonical( + "SELECT --abc+ text\n 1 FROM dual", + "SELECT --abc+ text\n 1 FROM dual", + ); + assert_eq!(select.optimizer_hints.len(), 1); + assert_eq!(select.optimizer_hints[0].prefix, "abc"); + assert_eq!(select.optimizer_hints[0].text, " text\n"); } #[test] diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index d79e2b833e..03517876d0 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -1330,7 +1330,7 @@ fn parse_copy_to() { with: None, body: Box::new(SetExpr::Select(Box::new(Select { select_token: AttachedToken::empty(), - optimizer_hint: None, + optimizer_hints: vec![], distinct: None, select_modifiers: None, top: None, @@ -3112,7 +3112,7 @@ fn parse_array_subquery_expr() { set_quantifier: SetQuantifier::None, left: Box::new(SetExpr::Select(Box::new(Select { select_token: AttachedToken::empty(), - optimizer_hint: None, + optimizer_hints: vec![], distinct: None, select_modifiers: None, top: None, @@ -3140,7 +3140,7 @@ fn parse_array_subquery_expr() { }))), right: Box::new(SetExpr::Select(Box::new(Select { select_token: AttachedToken::empty(), - optimizer_hint: None, + optimizer_hints: vec![], distinct: None, select_modifiers: None, top: None, @@ -5436,7 +5436,7 @@ fn test_simple_postgres_insert_with_alias() { statement, Statement::Insert(Insert { insert_token: AttachedToken::empty(), - optimizer_hint: None, + optimizer_hints: vec![], or: None, ignore: false, into: true, @@ -5512,7 +5512,7 @@ fn test_simple_postgres_insert_with_alias() { statement, Statement::Insert(Insert { insert_token: AttachedToken::empty(), - optimizer_hint: None, + optimizer_hints: vec![], or: None, ignore: false, into: true, @@ -5590,7 +5590,7 @@ fn test_simple_insert_with_quoted_alias() { statement, Statement::Insert(Insert { insert_token: AttachedToken::empty(), - optimizer_hint: None, + optimizer_hints: vec![], or: None, ignore: false, into: true, diff --git a/tests/sqlparser_sqlite.rs b/tests/sqlparser_sqlite.rs index ffe94ab8ad..a8fa8db223 100644 --- a/tests/sqlparser_sqlite.rs +++ b/tests/sqlparser_sqlite.rs @@ -477,7 +477,7 @@ fn parse_update_tuple_row_values() { assert_eq!( sqlite().verified_stmt("UPDATE x SET (a, b) = (1, 2)"), Statement::Update(Update { - optimizer_hint: None, + optimizer_hints: vec![], or: None, assignments: vec![Assignment { target: AssignmentTarget::Tuple(vec![ From 36e8ce602d75ebe607ec45f065cfb529f9e1ec5b Mon Sep 17 00:00:00 2001 From: Alexander Beedie Date: Mon, 23 Feb 2026 17:30:09 +0400 Subject: [PATCH 086/121] Optimise out string allocations and copies in keyword lookup (#2226) Co-authored-by: Alexander Beedie --- src/parser/mod.rs | 68 +++++++++++++++++++++--------------------- src/tokenizer.rs | 75 ++++++++++++++++++++++++++++++----------------- 2 files changed, 83 insertions(+), 60 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 16eb7a8b19..0767e4321a 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1618,8 +1618,9 @@ impl<'a> Parser<'a> { w: &Word, w_span: Span, ) -> Result { - match self.peek_token().token { - Token::LParen if !self.peek_outer_join_operator() => { + let is_outer_join = self.peek_outer_join_operator(); + match &self.peek_token_ref().token { + Token::LParen if !is_outer_join => { let id_parts = vec![w.to_ident(w_span)]; self.parse_function(ObjectName::from(id_parts)) } @@ -2244,8 +2245,8 @@ impl<'a> Parser<'a> { fn parse_utility_option(&mut self) -> Result { let name = self.parse_identifier()?; - let next_token = self.peek_token(); - if next_token == Token::Comma || next_token == Token::RParen { + let next_token = self.peek_token_ref(); + if next_token == &Token::Comma || next_token == &Token::RParen { return Ok(UtilityOption { name, arg: None }); } let arg = self.parse_expr()?; @@ -2329,7 +2330,7 @@ impl<'a> Parser<'a> { /// Parses a single parameter of a lambda function, with optional typing. fn parse_lambda_function_parameter(&mut self) -> Result { let name = self.parse_identifier()?; - let data_type = match self.peek_token().token { + let data_type = match &self.peek_token_ref().token { Token::Word(_) => self.maybe_parse(|p| p.parse_data_type())?, _ => None, }; @@ -2566,7 +2567,7 @@ impl<'a> Parser<'a> { let rows = if self.parse_keyword(Keyword::UNBOUNDED) { None } else { - Some(Box::new(match self.peek_token().token { + Some(Box::new(match &self.peek_token_ref().token { Token::SingleQuotedString(_) => self.parse_interval()?, _ => self.parse_expr()?, })) @@ -3015,7 +3016,7 @@ impl<'a> Parser<'a> { Ok(Some(ListAggOnOverflow::Error)) } else { self.expect_keyword_is(Keyword::TRUNCATE)?; - let filler = match self.peek_token().token { + let filler = match &self.peek_token_ref().token { Token::Word(w) if w.keyword == Keyword::WITH || w.keyword == Keyword::WITHOUT => { @@ -3128,7 +3129,7 @@ impl<'a> Parser<'a> { /// /// Represented in the AST as `Expr::UnaryOp` with `UnaryOperator::Not`. pub fn parse_not(&mut self) -> Result { - match self.peek_token().token { + match &self.peek_token_ref().token { Token::Word(w) => match w.keyword { Keyword::EXISTS => { let negated = true; @@ -3677,7 +3678,7 @@ impl<'a> Parser<'a> { trailing_bracket: MatchedTrailingBracket, ) -> Result { let trailing_bracket = if !trailing_bracket.0 { - match self.peek_token().token { + match &self.peek_token_ref().token { Token::Gt => { self.next_token(); false.into() @@ -5337,7 +5338,7 @@ impl<'a> Parser<'a> { /// Parse 'AS' before as query,such as `WITH XXX AS SELECT XXX` oer `CACHE TABLE AS SELECT XXX` pub fn parse_as_query(&mut self) -> Result<(bool, Box), ParserError> { - match self.peek_token().token { + match &self.peek_token_ref().token { Token::Word(word) => match word.keyword { Keyword::AS => { self.next_token(); @@ -5854,7 +5855,7 @@ impl<'a> Parser<'a> { } _ => parser_err!( "Expected table column definitions after TABLE keyword", - p.peek_token().span.start + p.peek_token_ref().span.start )?, }; @@ -7499,7 +7500,7 @@ impl<'a> Parser<'a> { pub fn parse_big_query_declare(&mut self) -> Result { let names = self.parse_comma_separated(Parser::parse_identifier)?; - let data_type = match self.peek_token().token { + let data_type = match &self.peek_token_ref().token { Token::Word(w) if w.keyword == Keyword::DEFAULT => None, _ => Some(self.parse_data_type()?), }; @@ -7563,7 +7564,7 @@ impl<'a> Parser<'a> { let (declare_type, for_query, assigned_expr, data_type) = if self.parse_keyword(Keyword::CURSOR) { self.expect_keyword_is(Keyword::FOR)?; - match self.peek_token().token { + match &self.peek_token_ref().token { Token::Word(w) if w.keyword == Keyword::SELECT => ( Some(DeclareType::Cursor), Some(self.parse_query()?), @@ -7626,7 +7627,7 @@ impl<'a> Parser<'a> { stmts.push(stmt); if self.consume_token(&Token::SemiColon) { - match self.peek_token().token { + match &self.peek_token_ref().token { Token::Word(w) if ALL_KEYWORDS .binary_search(&w.value.to_uppercase().as_str()) @@ -7680,7 +7681,7 @@ impl<'a> Parser<'a> { let ident = self.parse_identifier()?; if !ident.value.starts_with('@') && !matches!( - self.peek_token().token, + &self.peek_token_ref().token, Token::Word(w) if w.keyword == Keyword::CURSOR ) { @@ -7692,7 +7693,7 @@ impl<'a> Parser<'a> { } }?; - let (declare_type, data_type) = match self.peek_token().token { + let (declare_type, data_type) = match &self.peek_token_ref().token { Token::Word(w) => match w.keyword { Keyword::CURSOR => { self.next_token(); @@ -7739,7 +7740,7 @@ impl<'a> Parser<'a> { pub fn parse_snowflake_variable_declaration_expression( &mut self, ) -> Result, ParserError> { - Ok(match self.peek_token().token { + Ok(match &self.peek_token_ref().token { Token::Word(w) if w.keyword == Keyword::DEFAULT => { self.next_token(); // Skip `DEFAULT` Some(DeclareAssignment::Default(Box::new(self.parse_expr()?))) @@ -7763,7 +7764,7 @@ impl<'a> Parser<'a> { pub fn parse_mssql_variable_declaration_expression( &mut self, ) -> Result, ParserError> { - Ok(match self.peek_token().token { + Ok(match &self.peek_token_ref().token { Token::Eq => { self.next_token(); // Skip `=` Some(DeclareAssignment::MsSqlAssignment(Box::new( @@ -8457,7 +8458,7 @@ impl<'a> Parser<'a> { } else { parser_err!( "Expecting DELETE ROWS, PRESERVE ROWS or DROP", - self.peek_token() + self.peek_token_ref() ) } } @@ -9602,7 +9603,7 @@ impl<'a> Parser<'a> { { let display_as_key = w.keyword == Keyword::KEY; - let name = match self.peek_token().token { + let name = match &self.peek_token_ref().token { Token::Word(word) if word.keyword == Keyword::USING => None, _ => self.parse_optional_ident()?, }; @@ -9803,7 +9804,7 @@ impl<'a> Parser<'a> { pub fn parse_sql_option(&mut self) -> Result { let is_mssql = dialect_of!(self is MsSqlDialect|GenericDialect); - match self.peek_token().token { + match &self.peek_token_ref().token { Token::Word(w) if w.keyword == Keyword::HEAP && is_mssql => { Ok(SqlOption::Ident(self.parse_identifier()?)) } @@ -11740,7 +11741,7 @@ impl<'a> Parser<'a> { if trailing_bracket.0 { return parser_err!( format!("unmatched > after parsing data type {ty}"), - self.peek_token() + self.peek_token_ref() ); } @@ -15172,7 +15173,7 @@ impl<'a> Parser<'a> { } } else { let natural = self.parse_keyword(Keyword::NATURAL); - let peek_keyword = if let Token::Word(w) = self.peek_token().token { + let peek_keyword = if let Token::Word(w) = &self.peek_token_ref().token { w.keyword } else { Keyword::NoKeyword @@ -15549,7 +15550,7 @@ impl<'a> Parser<'a> { } else { let name = self.parse_object_name(true)?; - let json_path = match self.peek_token().token { + let json_path = match &self.peek_token_ref().token { Token::LBracket if self.dialect.supports_partiql() => Some(self.parse_json_path()?), _ => None, }; @@ -15953,12 +15954,13 @@ impl<'a> Parser<'a> { } where_clause = Some(self.parse_expr()?); } else { + let tok = self.peek_token_ref(); return parser_err!( format!( "Expected one of DIMENSIONS, METRICS, FACTS or WHERE, got {}", - self.peek_token().token + tok.token ), - self.peek_token_ref().span.start + tok.span.start )?; } } @@ -18979,7 +18981,7 @@ impl<'a> Parser<'a> { /// Parse a window specification. pub fn parse_window_spec(&mut self) -> Result { - let window_name = match self.peek_token().token { + let window_name = match &self.peek_token_ref().token { Token::Word(word) if word.keyword == Keyword::NoKeyword => { self.parse_optional_ident()? } @@ -19277,9 +19279,9 @@ impl<'a> Parser<'a> { Some(Keyword::DOUBLE) => Ok(UserDefinedTypeSqlDefinitionOption::Alignment( Alignment::Double, )), - _ => self.expected( + _ => self.expected_ref( "alignment value (char, int2, int4, or double)", - self.peek_token(), + self.peek_token_ref(), ), } } @@ -19304,9 +19306,9 @@ impl<'a> Parser<'a> { Some(Keyword::MAIN) => Ok(UserDefinedTypeSqlDefinitionOption::Storage( UserDefinedTypeStorage::Main, )), - _ => self.expected( + _ => self.expected_ref( "storage value (plain, external, extended, or main)", - self.peek_token(), + self.peek_token_ref(), ), } } @@ -19645,9 +19647,9 @@ impl<'a> Parser<'a> { break; } _ => { - return self.expected( + return self.expected_ref( "another option, EOF, SemiColon, Comma or ')'", - self.peek_token(), + self.peek_token_ref(), ) } }; diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 852b73164f..5ca686d46c 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -413,24 +413,42 @@ impl Token { /// When `quote_style` is `None`, the parser attempts a case-insensitive keyword /// lookup and sets the `Word::keyword` accordingly. pub fn make_word(word: &str, quote_style: Option) -> Self { - // Only perform keyword lookup for unquoted identifiers. - // Use to_ascii_uppercase() since SQL keywords are ASCII, - // avoiding Unicode case conversion overhead. - let keyword = if quote_style.is_none() { - let word_uppercase = word.to_ascii_uppercase(); - ALL_KEYWORDS - .binary_search(&word_uppercase.as_str()) - .map_or(Keyword::NoKeyword, |x| ALL_KEYWORDS_INDEX[x]) - } else { - Keyword::NoKeyword - }; - Token::Word(Word { + keyword: keyword_lookup(word, quote_style), value: word.to_string(), quote_style, - keyword, }) } + + /// Like [`Self::make_word`] but takes ownership of the word `String`, + /// avoiding an extra allocation when the caller already has an owned value. + fn make_word_owned(word: String, quote_style: Option) -> Self { + Token::Word(Word { + keyword: keyword_lookup(&word, quote_style), + value: word, + quote_style, + }) + } +} + +/// Case-insensitive keyword lookup using binary search over [`ALL_KEYWORDS`]. +fn keyword_lookup(word: &str, quote_style: Option) -> Keyword { + if quote_style.is_some() { + return Keyword::NoKeyword; + } + ALL_KEYWORDS + .binary_search_by(|probe| { + let probe = probe.as_bytes(); + let word = word.as_bytes(); + for (p, w) in probe.iter().zip(word.iter()) { + let cmp = p.cmp(&w.to_ascii_uppercase()); + if cmp != core::cmp::Ordering::Equal { + return cmp; + } + } + probe.len().cmp(&word.len()) + }) + .map_or(Keyword::NoKeyword, |x| ALL_KEYWORDS_INDEX[x]) } /// A keyword (like SELECT) or an optionally quoted SQL identifier @@ -1041,7 +1059,7 @@ impl<'a> Tokenizer<'a> { return Ok(Some(Token::Number(s, false))); } - Ok(Some(Token::make_word(&word, None))) + Ok(Some(Token::make_word_owned(word, None))) } /// Get the next token or return None @@ -1099,7 +1117,7 @@ impl<'a> Tokenizer<'a> { _ => { // regular identifier starting with an "b" or "B" let s = self.tokenize_word(b, chars); - Ok(Some(Token::make_word(&s, None))) + Ok(Some(Token::make_word_owned(s, None))) } } } @@ -1126,7 +1144,7 @@ impl<'a> Tokenizer<'a> { _ => { // regular identifier starting with an "r" or "R" let s = self.tokenize_word(b, chars); - Ok(Some(Token::make_word(&s, None))) + Ok(Some(Token::make_word_owned(s, None))) } } } @@ -1151,13 +1169,13 @@ impl<'a> Tokenizer<'a> { .map(|s| Some(Token::NationalQuoteDelimitedStringLiteral(s))) } else { let s = self.tokenize_word(String::from_iter([n, q]), chars); - Ok(Some(Token::make_word(&s, None))) + Ok(Some(Token::make_word_owned(s, None))) } } _ => { // regular identifier starting with an "N" let s = self.tokenize_word(n, chars); - Ok(Some(Token::make_word(&s, None))) + Ok(Some(Token::make_word_owned(s, None))) } } } @@ -1168,7 +1186,7 @@ impl<'a> Tokenizer<'a> { .map(|s| Some(Token::QuoteDelimitedStringLiteral(s))) } else { let s = self.tokenize_word(q, chars); - Ok(Some(Token::make_word(&s, None))) + Ok(Some(Token::make_word_owned(s, None))) } } // PostgreSQL accepts "escape" string constants, which are an extension to the SQL standard. @@ -1184,7 +1202,7 @@ impl<'a> Tokenizer<'a> { _ => { // regular identifier starting with an "E" or "e" let s = self.tokenize_word(x, chars); - Ok(Some(Token::make_word(&s, None))) + Ok(Some(Token::make_word_owned(s, None))) } } } @@ -1203,7 +1221,7 @@ impl<'a> Tokenizer<'a> { } // regular identifier starting with an "U" or "u" let s = self.tokenize_word(x, chars); - Ok(Some(Token::make_word(&s, None))) + Ok(Some(Token::make_word_owned(s, None))) } // The spec only allows an uppercase 'X' to introduce a hex // string, but PostgreSQL, at least, allows a lowercase 'x' too. @@ -1218,7 +1236,7 @@ impl<'a> Tokenizer<'a> { _ => { // regular identifier starting with an "X" let s = self.tokenize_word(x, chars); - Ok(Some(Token::make_word(&s, None))) + Ok(Some(Token::make_word_owned(s, None))) } } } @@ -1267,7 +1285,7 @@ impl<'a> Tokenizer<'a> { // delimited (quoted) identifier quote_start if self.dialect.is_delimited_identifier_start(ch) => { let word = self.tokenize_quoted_identifier(quote_start, chars)?; - Ok(Some(Token::make_word(&word, Some(quote_start)))) + Ok(Some(Token::make_word_owned(word, Some(quote_start)))) } // Potentially nested delimited (quoted) identifier quote_start @@ -1291,7 +1309,7 @@ impl<'a> Tokenizer<'a> { let Some(nested_quote_start) = nested_quote_start else { let word = self.tokenize_quoted_identifier(quote_start, chars)?; - return Ok(Some(Token::make_word(&word, Some(quote_start)))); + return Ok(Some(Token::make_word_owned(word, Some(quote_start)))); }; let mut word = vec![]; @@ -1319,7 +1337,10 @@ impl<'a> Tokenizer<'a> { } chars.next(); // skip close delimiter - Ok(Some(Token::make_word(&word.concat(), Some(quote_start)))) + Ok(Some(Token::make_word_owned( + word.concat(), + Some(quote_start), + ))) } // numbers and period '0'..='9' | '.' => { @@ -1429,12 +1450,12 @@ impl<'a> Tokenizer<'a> { if !word.is_empty() { s += word.as_str(); - return Ok(Some(Token::make_word(s.as_str(), None))); + return Ok(Some(Token::make_word_owned(s, None))); } } else if prev_token == Some(&Token::Period) { // If the previous token was a period, thus not belonging to a number, // the value we have is part of an identifier. - return Ok(Some(Token::make_word(s.as_str(), None))); + return Ok(Some(Token::make_word_owned(s, None))); } } From 0b589b2555be7a29879220744124ec7b350e6bbf Mon Sep 17 00:00:00 2001 From: xitep Date: Mon, 23 Feb 2026 15:06:31 +0100 Subject: [PATCH 087/121] [Oracle] Table alias for INSERTed table (#2214) Co-authored-by: Ifeanyi Ubah --- src/ast/dml.rs | 17 ++++-- src/ast/mod.rs | 11 ++++ src/ast/spans.rs | 2 +- src/dialect/mod.rs | 5 ++ src/dialect/oracle.rs | 4 ++ src/dialect/postgresql.rs | 4 ++ src/parser/mod.rs | 33 +++++++---- tests/sqlparser_oracle.rs | 108 +++++++++++++++++++++++++++++++++++- tests/sqlparser_postgres.rs | 33 +++++++---- 9 files changed, 186 insertions(+), 31 deletions(-) diff --git a/src/ast/dml.rs b/src/ast/dml.rs index a0be916dee..06f731c5ca 100644 --- a/src/ast/dml.rs +++ b/src/ast/dml.rs @@ -33,7 +33,8 @@ use super::{ display_comma_separated, helpers::attached_token::AttachedToken, query::InputFormatClause, Assignment, Expr, FromTable, Ident, InsertAliases, MysqlInsertPriority, ObjectName, OnInsert, OptimizerHint, OrderByExpr, Query, SelectInto, SelectItem, Setting, SqliteOnConflict, - TableFactor, TableObject, TableWithJoins, UpdateTableFromKind, Values, + TableAliasWithoutColumns, TableFactor, TableObject, TableWithJoins, UpdateTableFromKind, + Values, }; /// INSERT statement. @@ -56,8 +57,9 @@ pub struct Insert { pub into: bool, /// TABLE pub table: TableObject, - /// table_name as foo (for PostgreSQL) - pub table_alias: Option, + /// `table_name as foo` (for PostgreSQL) + /// `table_name foo` (for Oracle) + pub table_alias: Option, /// COLUMNS pub columns: Vec, /// Overwrite (Hive) @@ -125,8 +127,13 @@ pub struct Insert { impl Display for Insert { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { // SQLite OR conflict has a special format: INSERT OR ... INTO table_name - let table_name = if let Some(alias) = &self.table_alias { - format!("{0} AS {alias}", self.table) + let table_name = if let Some(table_alias) = &self.table_alias { + format!( + "{table} {as_keyword}{alias}", + table = self.table, + as_keyword = if table_alias.explicit { "AS " } else { "" }, + alias = table_alias.alias + ) } else { self.table.to_string() }; diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 61b0f65b2b..7b1e9447d5 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -6480,6 +6480,17 @@ pub struct InsertAliases { pub col_aliases: Option>, } +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +/// Optional alias for an `INSERT` table; i.e. the table to be inserted into +pub struct TableAliasWithoutColumns { + /// `true` if the aliases was explicitly introduced with the "AS" keyword + pub explicit: bool, + /// the alias name itself + pub alias: Ident, +} + #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] diff --git a/src/ast/spans.rs b/src/ast/spans.rs index 128fe01bee..b29a134b46 100644 --- a/src/ast/spans.rs +++ b/src/ast/spans.rs @@ -1327,7 +1327,7 @@ impl Spanned for Insert { union_spans( core::iter::once(insert_token.0.span) .chain(core::iter::once(table.span())) - .chain(table_alias.as_ref().map(|i| i.span)) + .chain(table_alias.iter().map(|k| k.alias.span)) .chain(columns.iter().map(|i| i.span)) .chain(source.as_ref().map(|q| q.span())) .chain(assignments.iter().map(|i| i.span())) diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index b1be1590de..bcca455eca 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -1240,6 +1240,11 @@ pub trait Dialect: Debug + Any { false } + /// Returns true if this dialect supports `INSERT INTO t [[AS] alias] ...`. + fn supports_insert_table_alias(&self) -> bool { + false + } + /// Returns true if this dialect supports `SET` statements without an explicit /// assignment operator such as `=`. For example: `SET SHOWPLAN_XML ON`. fn supports_set_stmt_without_operator(&self) -> bool { diff --git a/src/dialect/oracle.rs b/src/dialect/oracle.rs index c2147eae70..dce0493d3b 100644 --- a/src/dialect/oracle.rs +++ b/src/dialect/oracle.rs @@ -110,4 +110,8 @@ impl Dialect for OracleDialect { fn supports_comment_optimizer_hint(&self) -> bool { true } + + fn supports_insert_table_alias(&self) -> bool { + true + } } diff --git a/src/dialect/postgresql.rs b/src/dialect/postgresql.rs index b0511f6d2e..13bd82bfdb 100644 --- a/src/dialect/postgresql.rs +++ b/src/dialect/postgresql.rs @@ -288,4 +288,8 @@ impl Dialect for PostgreSqlDialect { fn supports_interval_options(&self) -> bool { true } + + fn supports_insert_table_alias(&self) -> bool { + true + } } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 0767e4321a..bb11d79c2d 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -17231,12 +17231,27 @@ impl<'a> Parser<'a> { let table = self.parse_keyword(Keyword::TABLE); let table_object = self.parse_table_object()?; - let table_alias = - if dialect_of!(self is PostgreSqlDialect) && self.parse_keyword(Keyword::AS) { - Some(self.parse_identifier()?) + let table_alias = if self.dialect.supports_insert_table_alias() + && !self.peek_sub_query() + && self + .peek_one_of_keywords(&[Keyword::DEFAULT, Keyword::VALUES]) + .is_none() + { + if self.parse_keyword(Keyword::AS) { + Some(TableAliasWithoutColumns { + explicit: true, + alias: self.parse_identifier()?, + }) } else { - None - }; + self.maybe_parse(|parser| parser.parse_identifier())? + .map(|alias| TableAliasWithoutColumns { + explicit: false, + alias, + }) + } + } else { + None + }; let is_mysql = dialect_of!(self is MySqlDialect); @@ -19477,14 +19492,8 @@ impl<'a> Parser<'a> { /// Returns true if the next keyword indicates a sub query, i.e. SELECT or WITH fn peek_sub_query(&mut self) -> bool { - if self - .parse_one_of_keywords(&[Keyword::SELECT, Keyword::WITH]) + self.peek_one_of_keywords(&[Keyword::SELECT, Keyword::WITH]) .is_some() - { - self.prev_token(); - return true; - } - false } pub(crate) fn parse_show_stmt_options(&mut self) -> Result { diff --git a/tests/sqlparser_oracle.rs b/tests/sqlparser_oracle.rs index 8f7bb86702..b34a9308d9 100644 --- a/tests/sqlparser_oracle.rs +++ b/tests/sqlparser_oracle.rs @@ -21,7 +21,10 @@ use pretty_assertions::assert_eq; use sqlparser::{ - ast::{BinaryOperator, Expr, Ident, QuoteDelimitedString, Value, ValueWithSpan}, + ast::{ + BinaryOperator, Expr, Ident, Insert, ObjectName, Query, QuoteDelimitedString, SetExpr, + Statement, TableAliasWithoutColumns, TableObject, Value, ValueWithSpan, + }, dialect::OracleDialect, parser::ParserError, tokenizer::Span, @@ -421,3 +424,106 @@ fn test_connect_by() { ORDER BY \"Employee\", \"Manager\", \"Pathlen\", \"Path\"", ); } + +#[test] +fn test_insert_with_table_alias() { + let oracle_dialect = oracle(); + + fn verify_table_name_with_alias(stmt: &Statement, exp_table_name: &str, exp_table_alias: &str) { + assert!(matches!(stmt, + Statement::Insert(Insert { + table: TableObject::TableName(table_name), + table_alias: Some(TableAliasWithoutColumns { + explicit: false, + alias: Ident { + value: table_alias, + quote_style: None, + span: _ + } + }), + .. + }) + if table_alias == exp_table_alias + && table_name == &ObjectName::from(vec![Ident { + value: exp_table_name.into(), + quote_style: None, + span: Span::empty(), + }]) + )); + } + + let stmt = oracle_dialect.verified_stmt( + "INSERT INTO foo_t t \ + SELECT 1, 2, 3 FROM dual", + ); + verify_table_name_with_alias(&stmt, "foo_t", "t"); + + let stmt = oracle_dialect.verified_stmt( + "INSERT INTO foo_t asdf (a, b, c) \ + SELECT 1, 2, 3 FROM dual", + ); + verify_table_name_with_alias(&stmt, "foo_t", "asdf"); + + let stmt = oracle_dialect.verified_stmt( + "INSERT INTO foo_t t (a, b, c) \ + VALUES (1, 2, 3)", + ); + verify_table_name_with_alias(&stmt, "foo_t", "t"); + + let stmt = oracle_dialect.verified_stmt( + "INSERT INTO foo_t t \ + VALUES (1, 2, 3)", + ); + verify_table_name_with_alias(&stmt, "foo_t", "t"); +} + +#[test] +fn test_insert_without_alias() { + let oracle_dialect = oracle(); + + // check DEFAULT + let sql = "INSERT INTO t default SELECT 'a' FROM dual"; + assert_eq!( + oracle_dialect.parse_sql_statements(sql), + Err(ParserError::ParserError( + "Expected: SELECT, VALUES, or a subquery in the query body, found: default".into() + )) + ); + + // check SELECT + let sql = "INSERT INTO t SELECT 'a' FROM dual"; + let stmt = oracle_dialect.verified_stmt(sql); + assert!(matches!( + &stmt, + Statement::Insert(Insert { + table_alias: None, + source: Some(source), + .. + }) + if matches!(&**source, Query { body, .. } if matches!(&**body, SetExpr::Select(_))))); + + // check WITH + let sql = "INSERT INTO dual WITH w AS (SELECT 1 AS y FROM dual) SELECT y FROM w"; + let stmt = oracle_dialect.verified_stmt(sql); + assert!(matches!( + &stmt, + Statement::Insert(Insert { + table_alias: None, + source: Some(source), + .. + }) + if matches!(&**source, Query { body, .. } if matches!(&**body, SetExpr::Select(_))))); + + // check VALUES + let sql = "INSERT INTO t VALUES (1)"; + let stmt = oracle_dialect.verified_stmt(sql); + assert!(matches!( + stmt, + Statement::Insert(Insert { + table_alias: None, + source: Some(source), + .. + }) + if matches!(&*source, Query { body, .. } if matches!(&**body, SetExpr::Values(_))) + )); +} diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 03517876d0..510f6ccc53 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -5445,10 +5445,13 @@ fn test_simple_postgres_insert_with_alias() { quote_style: None, span: Span::empty(), }])), - table_alias: Some(Ident { - value: "test_table".to_string(), - quote_style: None, - span: Span::empty(), + table_alias: Some(TableAliasWithoutColumns { + explicit: true, + alias: Ident { + value: "test_table".to_string(), + quote_style: None, + span: Span::empty(), + } }), columns: vec![ Ident { @@ -5521,10 +5524,13 @@ fn test_simple_postgres_insert_with_alias() { quote_style: None, span: Span::empty(), }])), - table_alias: Some(Ident { - value: "test_table".to_string(), - quote_style: None, - span: Span::empty(), + table_alias: Some(TableAliasWithoutColumns { + explicit: true, + alias: Ident { + value: "test_table".to_string(), + quote_style: None, + span: Span::empty(), + } }), columns: vec![ Ident { @@ -5599,10 +5605,13 @@ fn test_simple_insert_with_quoted_alias() { quote_style: None, span: Span::empty(), }])), - table_alias: Some(Ident { - value: "Test_Table".to_string(), - quote_style: Some('"'), - span: Span::empty(), + table_alias: Some(TableAliasWithoutColumns { + explicit: true, + alias: Ident { + value: "Test_Table".to_string(), + quote_style: Some('"'), + span: Span::empty(), + } }), columns: vec![ Ident { From d9b53a0cdb369124d9b6ce6237959e66bad859af Mon Sep 17 00:00:00 2001 From: xitep Date: Tue, 24 Feb 2026 14:14:39 +0100 Subject: [PATCH 088/121] Introduce `Visit::visit_select` (#2235) --- src/ast/query.rs | 5 +++-- src/ast/visitor.rs | 52 +++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 54 insertions(+), 3 deletions(-) diff --git a/src/ast/query.rs b/src/ast/query.rs index ff617a38e4..b4d3fdb2bc 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -156,12 +156,12 @@ pub enum SetExpr { /// UNION/EXCEPT/INTERSECT of two queries /// A set operation combining two query expressions. SetOperation { + /// Left operand of the set operation. + left: Box, /// The set operator used (e.g. `UNION`, `EXCEPT`). op: SetOperator, /// Optional quantifier (`ALL`, `DISTINCT`, etc.). set_quantifier: SetQuantifier, - /// Left operand of the set operation. - left: Box, /// Right operand of the set operation. right: Box, }, @@ -442,6 +442,7 @@ impl SelectModifiers { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +#[cfg_attr(feature = "visitor", visit(with = "visit_select"))] pub struct Select { /// Token for the `SELECT` keyword pub select_token: AttachedToken, diff --git a/src/ast/visitor.rs b/src/ast/visitor.rs index 5d841655b5..5f9b374896 100644 --- a/src/ast/visitor.rs +++ b/src/ast/visitor.rs @@ -17,7 +17,7 @@ //! Recursive visitors for ast Nodes. See [`Visitor`] for more details. -use crate::ast::{Expr, ObjectName, Query, Statement, TableFactor, Value}; +use crate::ast::{Expr, ObjectName, Query, Select, Statement, TableFactor, Value}; use core::ops::ControlFlow; /// A type that can be visited by a [`Visitor`]. See [`Visitor`] for @@ -207,6 +207,16 @@ pub trait Visitor { ControlFlow::Continue(()) } + /// Invoked for any [Select] that appear in the AST before visiting children + fn pre_visit_select(&mut self, _select: &Select) -> ControlFlow { + ControlFlow::Continue(()) + } + + /// Invoked for any [Select] that appear in the AST after visiting children + fn post_visit_select(&mut self, _select: &Select) -> ControlFlow { + ControlFlow::Continue(()) + } + /// Invoked for any relations (e.g. tables) that appear in the AST before visiting children fn pre_visit_relation(&mut self, _relation: &ObjectName) -> ControlFlow { ControlFlow::Continue(()) @@ -319,6 +329,16 @@ pub trait VisitorMut { ControlFlow::Continue(()) } + /// Invoked for any [Select] that appear in the AST before visiting children + fn pre_visit_select(&mut self, _select: &mut Select) -> ControlFlow { + ControlFlow::Continue(()) + } + + /// Invoked for any [Select] that appear in the AST after visiting children + fn post_visit_select(&mut self, _select: &mut Select) -> ControlFlow { + ControlFlow::Continue(()) + } + /// Invoked for any relations (e.g. tables) that appear in the AST before visiting children fn pre_visit_relation(&mut self, _relation: &mut ObjectName) -> ControlFlow { ControlFlow::Continue(()) @@ -709,6 +729,16 @@ mod tests { ControlFlow::Continue(()) } + fn pre_visit_select(&mut self, select: &Select) -> ControlFlow { + self.visited.push(format!("PRE: SELECT: {select}")); + ControlFlow::Continue(()) + } + + fn post_visit_select(&mut self, select: &Select) -> ControlFlow { + self.visited.push(format!("POST: SELECT: {select}")); + ControlFlow::Continue(()) + } + fn pre_visit_relation(&mut self, relation: &ObjectName) -> ControlFlow { self.visited.push(format!("PRE: RELATION: {relation}")); ControlFlow::Continue(()) @@ -779,10 +809,12 @@ mod tests { vec![ "PRE: STATEMENT: SELECT * FROM table_name AS my_table", "PRE: QUERY: SELECT * FROM table_name AS my_table", + "PRE: SELECT: SELECT * FROM table_name AS my_table", "PRE: TABLE FACTOR: table_name AS my_table", "PRE: RELATION: table_name", "POST: RELATION: table_name", "POST: TABLE FACTOR: table_name AS my_table", + "POST: SELECT: SELECT * FROM table_name AS my_table", "POST: QUERY: SELECT * FROM table_name AS my_table", "POST: STATEMENT: SELECT * FROM table_name AS my_table", ], @@ -792,6 +824,7 @@ mod tests { vec![ "PRE: STATEMENT: SELECT * FROM t1 JOIN t2 ON t1.id = t2.t1_id", "PRE: QUERY: SELECT * FROM t1 JOIN t2 ON t1.id = t2.t1_id", + "PRE: SELECT: SELECT * FROM t1 JOIN t2 ON t1.id = t2.t1_id", "PRE: TABLE FACTOR: t1", "PRE: RELATION: t1", "POST: RELATION: t1", @@ -806,6 +839,7 @@ mod tests { "PRE: EXPR: t2.t1_id", "POST: EXPR: t2.t1_id", "POST: EXPR: t1.id = t2.t1_id", + "POST: SELECT: SELECT * FROM t1 JOIN t2 ON t1.id = t2.t1_id", "POST: QUERY: SELECT * FROM t1 JOIN t2 ON t1.id = t2.t1_id", "POST: STATEMENT: SELECT * FROM t1 JOIN t2 ON t1.id = t2.t1_id", ], @@ -815,20 +849,24 @@ mod tests { vec![ "PRE: STATEMENT: SELECT * FROM t1 WHERE EXISTS (SELECT column FROM t2)", "PRE: QUERY: SELECT * FROM t1 WHERE EXISTS (SELECT column FROM t2)", + "PRE: SELECT: SELECT * FROM t1 WHERE EXISTS (SELECT column FROM t2)", "PRE: TABLE FACTOR: t1", "PRE: RELATION: t1", "POST: RELATION: t1", "POST: TABLE FACTOR: t1", "PRE: EXPR: EXISTS (SELECT column FROM t2)", "PRE: QUERY: SELECT column FROM t2", + "PRE: SELECT: SELECT column FROM t2", "PRE: EXPR: column", "POST: EXPR: column", "PRE: TABLE FACTOR: t2", "PRE: RELATION: t2", "POST: RELATION: t2", "POST: TABLE FACTOR: t2", + "POST: SELECT: SELECT column FROM t2", "POST: QUERY: SELECT column FROM t2", "POST: EXPR: EXISTS (SELECT column FROM t2)", + "POST: SELECT: SELECT * FROM t1 WHERE EXISTS (SELECT column FROM t2)", "POST: QUERY: SELECT * FROM t1 WHERE EXISTS (SELECT column FROM t2)", "POST: STATEMENT: SELECT * FROM t1 WHERE EXISTS (SELECT column FROM t2)", ], @@ -838,20 +876,24 @@ mod tests { vec![ "PRE: STATEMENT: SELECT * FROM t1 WHERE EXISTS (SELECT column FROM t2)", "PRE: QUERY: SELECT * FROM t1 WHERE EXISTS (SELECT column FROM t2)", + "PRE: SELECT: SELECT * FROM t1 WHERE EXISTS (SELECT column FROM t2)", "PRE: TABLE FACTOR: t1", "PRE: RELATION: t1", "POST: RELATION: t1", "POST: TABLE FACTOR: t1", "PRE: EXPR: EXISTS (SELECT column FROM t2)", "PRE: QUERY: SELECT column FROM t2", + "PRE: SELECT: SELECT column FROM t2", "PRE: EXPR: column", "POST: EXPR: column", "PRE: TABLE FACTOR: t2", "PRE: RELATION: t2", "POST: RELATION: t2", "POST: TABLE FACTOR: t2", + "POST: SELECT: SELECT column FROM t2", "POST: QUERY: SELECT column FROM t2", "POST: EXPR: EXISTS (SELECT column FROM t2)", + "POST: SELECT: SELECT * FROM t1 WHERE EXISTS (SELECT column FROM t2)", "POST: QUERY: SELECT * FROM t1 WHERE EXISTS (SELECT column FROM t2)", "POST: STATEMENT: SELECT * FROM t1 WHERE EXISTS (SELECT column FROM t2)", ], @@ -861,24 +903,30 @@ mod tests { vec![ "PRE: STATEMENT: SELECT * FROM t1 WHERE EXISTS (SELECT column FROM t2) UNION SELECT * FROM t3", "PRE: QUERY: SELECT * FROM t1 WHERE EXISTS (SELECT column FROM t2) UNION SELECT * FROM t3", + "PRE: SELECT: SELECT * FROM t1 WHERE EXISTS (SELECT column FROM t2)", "PRE: TABLE FACTOR: t1", "PRE: RELATION: t1", "POST: RELATION: t1", "POST: TABLE FACTOR: t1", "PRE: EXPR: EXISTS (SELECT column FROM t2)", "PRE: QUERY: SELECT column FROM t2", + "PRE: SELECT: SELECT column FROM t2", "PRE: EXPR: column", "POST: EXPR: column", "PRE: TABLE FACTOR: t2", "PRE: RELATION: t2", "POST: RELATION: t2", "POST: TABLE FACTOR: t2", + "POST: SELECT: SELECT column FROM t2", "POST: QUERY: SELECT column FROM t2", "POST: EXPR: EXISTS (SELECT column FROM t2)", + "POST: SELECT: SELECT * FROM t1 WHERE EXISTS (SELECT column FROM t2)", + "PRE: SELECT: SELECT * FROM t3", "PRE: TABLE FACTOR: t3", "PRE: RELATION: t3", "POST: RELATION: t3", "POST: TABLE FACTOR: t3", + "POST: SELECT: SELECT * FROM t3", "POST: QUERY: SELECT * FROM t1 WHERE EXISTS (SELECT column FROM t2) UNION SELECT * FROM t3", "POST: STATEMENT: SELECT * FROM t1 WHERE EXISTS (SELECT column FROM t2) UNION SELECT * FROM t3", ], @@ -892,6 +940,7 @@ mod tests { vec![ "PRE: STATEMENT: SELECT * FROM monthly_sales PIVOT(SUM(a.amount) FOR a.MONTH IN ('JAN', 'FEB', 'MAR', 'APR')) AS p (c, d) ORDER BY EMPID", "PRE: QUERY: SELECT * FROM monthly_sales PIVOT(SUM(a.amount) FOR a.MONTH IN ('JAN', 'FEB', 'MAR', 'APR')) AS p (c, d) ORDER BY EMPID", + "PRE: SELECT: SELECT * FROM monthly_sales PIVOT(SUM(a.amount) FOR a.MONTH IN ('JAN', 'FEB', 'MAR', 'APR')) AS p (c, d)", "PRE: TABLE FACTOR: monthly_sales PIVOT(SUM(a.amount) FOR a.MONTH IN ('JAN', 'FEB', 'MAR', 'APR')) AS p (c, d)", "PRE: TABLE FACTOR: monthly_sales", "PRE: RELATION: monthly_sales", @@ -912,6 +961,7 @@ mod tests { "PRE: EXPR: 'APR'", "POST: EXPR: 'APR'", "POST: TABLE FACTOR: monthly_sales PIVOT(SUM(a.amount) FOR a.MONTH IN ('JAN', 'FEB', 'MAR', 'APR')) AS p (c, d)", + "POST: SELECT: SELECT * FROM monthly_sales PIVOT(SUM(a.amount) FOR a.MONTH IN ('JAN', 'FEB', 'MAR', 'APR')) AS p (c, d)", "PRE: EXPR: EMPID", "POST: EXPR: EMPID", "POST: QUERY: SELECT * FROM monthly_sales PIVOT(SUM(a.amount) FOR a.MONTH IN ('JAN', 'FEB', 'MAR', 'APR')) AS p (c, d) ORDER BY EMPID", From 982068ec51cbd60cb8489fb50286c344ba339e0c Mon Sep 17 00:00:00 2001 From: Luca Cappelletti Date: Wed, 25 Feb 2026 15:01:39 +0100 Subject: [PATCH 089/121] Add support for INTERVAL keyword as unquoted identifier in PostgreSQL (#2238) --- src/dialect/postgresql.rs | 10 ++++++++++ tests/sqlparser_postgres.rs | 6 ++++++ 2 files changed, 16 insertions(+) diff --git a/src/dialect/postgresql.rs b/src/dialect/postgresql.rs index 13bd82bfdb..0b7ed2a729 100644 --- a/src/dialect/postgresql.rs +++ b/src/dialect/postgresql.rs @@ -33,6 +33,8 @@ use crate::keywords::Keyword; use crate::parser::{Parser, ParserError}; use crate::tokenizer::Token; +use super::keywords::RESERVED_FOR_IDENTIFIER; + /// A [`Dialect`] for [PostgreSQL](https://www.postgresql.org/) #[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] @@ -81,6 +83,14 @@ impl Dialect for PostgreSqlDialect { true } + fn is_reserved_for_identifier(&self, kw: Keyword) -> bool { + if matches!(kw, Keyword::INTERVAL) { + false + } else { + RESERVED_FOR_IDENTIFIER.contains(&kw) + } + } + /// See fn is_custom_operator_part(&self, ch: char) -> bool { matches!( diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 510f6ccc53..7c19f51e5e 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -5777,6 +5777,12 @@ fn parse_interval_data_type() { } } +#[test] +fn parse_interval_keyword_as_unquoted_identifier() { + pg().verified_stmt("SELECT MAX(interval) FROM tbl"); + pg().verified_expr("INTERVAL '1 day'"); +} + #[test] fn parse_create_table_with_options() { let sql = "CREATE TABLE t (c INT) WITH (foo = 'bar', a = 123)"; From 8afcad8fcb915d0366d4cdda9aa9958365f968ae Mon Sep 17 00:00:00 2001 From: Yoabot Date: Thu, 26 Feb 2026 11:37:30 +0100 Subject: [PATCH 090/121] Redshift: support wildcard select items with alias (#2230) --- src/ast/query.rs | 7 +++++++ src/ast/spans.rs | 4 +++- src/dialect/mod.rs | 12 ++++++++++++ src/dialect/redshift.rs | 4 ++++ src/parser/mod.rs | 11 +++++++++++ tests/sqlparser_common.rs | 20 ++++++++++++++++++++ 6 files changed, 57 insertions(+), 1 deletion(-) diff --git a/src/ast/query.rs b/src/ast/query.rs index b4d3fdb2bc..159f02a6c1 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -934,6 +934,9 @@ pub struct WildcardAdditionalOptions { pub opt_replace: Option, /// `[RENAME ...]`. pub opt_rename: Option, + /// `[AS ]`. + /// Redshift syntax: + pub opt_alias: Option, } impl Default for WildcardAdditionalOptions { @@ -945,6 +948,7 @@ impl Default for WildcardAdditionalOptions { opt_except: None, opt_replace: None, opt_rename: None, + opt_alias: None, } } } @@ -966,6 +970,9 @@ impl fmt::Display for WildcardAdditionalOptions { if let Some(rename) = &self.opt_rename { write!(f, " {rename}")?; } + if let Some(alias) = &self.opt_alias { + write!(f, " AS {alias}")?; + } Ok(()) } } diff --git a/src/ast/spans.rs b/src/ast/spans.rs index b29a134b46..1e21620803 100644 --- a/src/ast/spans.rs +++ b/src/ast/spans.rs @@ -1824,6 +1824,7 @@ impl Spanned for WildcardAdditionalOptions { opt_except, opt_replace, opt_rename, + opt_alias, } = self; union_spans( @@ -1832,7 +1833,8 @@ impl Spanned for WildcardAdditionalOptions { .chain(opt_exclude.as_ref().map(|i| i.span())) .chain(opt_rename.as_ref().map(|i| i.span())) .chain(opt_replace.as_ref().map(|i| i.span())) - .chain(opt_except.as_ref().map(|i| i.span())), + .chain(opt_except.as_ref().map(|i| i.span())) + .chain(opt_alias.as_ref().map(|i| i.span)), ) } } diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index bcca455eca..698c12ec9f 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -1517,6 +1517,18 @@ pub trait Dialect: Debug + Any { false } + /// Returns true if this dialect supports aliasing a wildcard select item. + /// + /// Example: + /// ```sql + /// SELECT t.* AS alias FROM t + /// ``` + /// + /// [Redshift](https://docs.aws.amazon.com/redshift/latest/dg/r_SELECT_list.html) + fn supports_select_wildcard_with_alias(&self) -> bool { + false + } + /// Returns true if this dialect supports the `OPTIMIZE TABLE` statement. /// /// Example: diff --git a/src/dialect/redshift.rs b/src/dialect/redshift.rs index 21958e3829..5969ee55e6 100644 --- a/src/dialect/redshift.rs +++ b/src/dialect/redshift.rs @@ -141,6 +141,10 @@ impl Dialect for RedshiftSqlDialect { true } + fn supports_select_wildcard_with_alias(&self) -> bool { + true + } + fn supports_select_exclude(&self) -> bool { true } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index bb11d79c2d..667a153523 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -17864,6 +17864,16 @@ impl<'a> Parser<'a> { None }; + let opt_alias = if self.dialect.supports_select_wildcard_with_alias() { + if self.parse_keyword(Keyword::AS) { + Some(self.parse_identifier()?) + } else { + None + } + } else { + None + }; + Ok(WildcardAdditionalOptions { wildcard_token: wildcard_token.into(), opt_ilike, @@ -17871,6 +17881,7 @@ impl<'a> Parser<'a> { opt_except, opt_rename, opt_replace, + opt_alias, }) } diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index a3b5404d30..3f0ca96a03 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -1280,6 +1280,26 @@ fn parse_select_expr_star() { dialects.verified_only_select("SELECT myfunc().* EXCEPT (foo) FROM T"); } +#[test] +fn parse_select_wildcard_with_alias() { + let dialects = all_dialects_where(|d| d.supports_select_wildcard_with_alias()); + + // qualified wildcard with alias + dialects + .parse_sql_statements("SELECT t.* AS all_cols FROM t") + .unwrap(); + + // unqualified wildcard with alias + dialects + .parse_sql_statements("SELECT * AS all_cols FROM t") + .unwrap(); + + // mixed: regular column + qualified wildcard with alias + dialects + .parse_sql_statements("SELECT a.id, b.* AS b_cols FROM a JOIN b ON (a.id = b.a_id)") + .unwrap(); +} + #[test] fn test_eof_after_as() { let res = parse_sql_statements("SELECT foo AS"); From e87241a153609f0d4716954797b24e205424b945 Mon Sep 17 00:00:00 2001 From: Yoabot Date: Thu, 26 Feb 2026 12:00:53 +0100 Subject: [PATCH 091/121] Snowflake: support wildcard with EXCLUDE in function arguments (#2231) --- src/ast/mod.rs | 5 +++++ src/ast/spans.rs | 2 ++ src/parser/mod.rs | 21 ++++++++++++++++++++- tests/sqlparser_common.rs | 16 ++++++++++++++++ 4 files changed, 43 insertions(+), 1 deletion(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 7b1e9447d5..1e430171ee 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -7625,6 +7625,10 @@ pub enum FunctionArgExpr { QualifiedWildcard(ObjectName), /// An unqualified `*` wildcard. Wildcard, + /// An unqualified `*` wildcard with additional options, e.g. `* EXCLUDE(col)`. + /// + /// Used in Snowflake to support expressions like `HASH(* EXCLUDE(col))`. + WildcardWithOptions(WildcardAdditionalOptions), } impl From for FunctionArgExpr { @@ -7643,6 +7647,7 @@ impl fmt::Display for FunctionArgExpr { FunctionArgExpr::Expr(expr) => write!(f, "{expr}"), FunctionArgExpr::QualifiedWildcard(prefix) => write!(f, "{prefix}.*"), FunctionArgExpr::Wildcard => f.write_str("*"), + FunctionArgExpr::WildcardWithOptions(opts) => write!(f, "*{opts}"), } } } diff --git a/src/ast/spans.rs b/src/ast/spans.rs index 1e21620803..0b95c3ed70 100644 --- a/src/ast/spans.rs +++ b/src/ast/spans.rs @@ -2130,6 +2130,7 @@ impl Spanned for FunctionArg { /// /// Missing spans: /// - [FunctionArgExpr::Wildcard] +/// - [FunctionArgExpr::WildcardWithOptions] impl Spanned for FunctionArgExpr { fn span(&self) -> Span { match self { @@ -2138,6 +2139,7 @@ impl Spanned for FunctionArgExpr { union_spans(object_name.0.iter().map(|i| i.span())) } FunctionArgExpr::Wildcard => Span::empty(), + FunctionArgExpr::WildcardWithOptions(_) => Span::empty(), } } } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 667a153523..bea566bbe8 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -17576,7 +17576,26 @@ impl<'a> Parser<'a> { if let Some(arg) = arg { return Ok(arg); } - Ok(FunctionArg::Unnamed(self.parse_wildcard_expr()?.into())) + let wildcard_expr = self.parse_wildcard_expr()?; + let arg_expr: FunctionArgExpr = match wildcard_expr { + Expr::Wildcard(ref token) if self.dialect.supports_select_wildcard_exclude() => { + // Support `* EXCLUDE(col1, col2, ...)` inside function calls (e.g. Snowflake's + // `HASH(* EXCLUDE(col))`). Parse the options the same way SELECT items do. + let opts = self.parse_wildcard_additional_options(token.0.clone())?; + if opts.opt_exclude.is_some() + || opts.opt_except.is_some() + || opts.opt_replace.is_some() + || opts.opt_rename.is_some() + || opts.opt_ilike.is_some() + { + FunctionArgExpr::WildcardWithOptions(opts) + } else { + wildcard_expr.into() + } + } + other => other.into(), + }; + Ok(FunctionArg::Unnamed(arg_expr)) } fn parse_function_named_arg_operator(&mut self) -> Result { diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 3f0ca96a03..982bf10885 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -18583,3 +18583,19 @@ fn parse_array_subscript() { dialects.verified_stmt("SELECT arr[1][2]"); dialects.verified_stmt("SELECT arr[:][:]"); } + +#[test] +fn test_wildcard_func_arg() { + // Wildcard (*) and wildcard with EXCLUDE as a function argument. + // Documented for Snowflake's HASH function but parsed for any dialect that + // supports the wildcard-EXCLUDE select syntax. + let dialects = all_dialects_where(|d| d.supports_select_wildcard_exclude()); + + // Wildcard with EXCLUDE — canonical form has a space before the parenthesised column list. + dialects.one_statement_parses_to( + "SELECT HASH(* EXCLUDE(col1)) FROM t", + "SELECT HASH(* EXCLUDE (col1)) FROM t", + ); + dialects.verified_expr("HASH(* EXCLUDE (col1))"); + dialects.verified_expr("HASH(* EXCLUDE (col1, col2))"); +} From bd7f70e82048cab324dba6224fb3dd17757d8477 Mon Sep 17 00:00:00 2001 From: Yoabot Date: Thu, 26 Feb 2026 12:11:53 +0100 Subject: [PATCH 092/121] MSSQL: prevent statement-starting keywords from being consumed as implicit aliases (#2233) --- src/dialect/mssql.rs | 56 ++++++++++++++++++++++++++++++++++++---- tests/sqlparser_mssql.rs | 53 +++++++++++++++++++++++++++++++++++++ 2 files changed, 104 insertions(+), 5 deletions(-) diff --git a/src/dialect/mssql.rs b/src/dialect/mssql.rs index 42e05858ff..8ad765dd33 100644 --- a/src/dialect/mssql.rs +++ b/src/dialect/mssql.rs @@ -129,9 +129,30 @@ impl Dialect for MsSqlDialect { fn is_select_item_alias(&self, explicit: bool, kw: &Keyword, parser: &mut Parser) -> bool { match kw { - // List of keywords that cannot be used as select item aliases in MSSQL - // regardless of whether the alias is explicit or implicit - Keyword::IF | Keyword::ELSE => false, + // List of keywords that cannot be used as select item (column) aliases in MSSQL + // regardless of whether the alias is explicit or implicit. + // + // These are T-SQL statement-starting keywords; allowing them as implicit aliases + // causes the parser to consume the keyword as an alias for the previous expression, + // then fail on the token that follows (e.g. `TABLE`, `@var`, `sp_name`, …). + Keyword::IF + | Keyword::ELSE + | Keyword::DECLARE + | Keyword::EXEC + | Keyword::EXECUTE + | Keyword::INSERT + | Keyword::UPDATE + | Keyword::DELETE + | Keyword::DROP + | Keyword::CREATE + | Keyword::ALTER + | Keyword::TRUNCATE + | Keyword::PRINT + | Keyword::WHILE + | Keyword::RETURN + | Keyword::THROW + | Keyword::RAISERROR + | Keyword::MERGE => false, _ => explicit || self.is_column_alias(kw, parser), } } @@ -139,8 +160,33 @@ impl Dialect for MsSqlDialect { fn is_table_factor_alias(&self, explicit: bool, kw: &Keyword, parser: &mut Parser) -> bool { match kw { // List of keywords that cannot be used as table aliases in MSSQL - // regardless of whether the alias is explicit or implicit - Keyword::IF | Keyword::ELSE => false, + // regardless of whether the alias is explicit or implicit. + // + // These are T-SQL statement-starting keywords. Without blocking them here, + // a bare `SELECT * FROM t` followed by a newline and one of these keywords + // would cause the parser to consume the keyword as a table alias for `t`, + // then fail on the token that follows (e.g. `@var`, `sp_name`, `TABLE`, …). + // + // `SET` is already covered by the global `RESERVED_FOR_TABLE_ALIAS` list; + // the keywords below are MSSQL-specific additions. + Keyword::IF + | Keyword::ELSE + | Keyword::DECLARE + | Keyword::EXEC + | Keyword::EXECUTE + | Keyword::INSERT + | Keyword::UPDATE + | Keyword::DELETE + | Keyword::DROP + | Keyword::CREATE + | Keyword::ALTER + | Keyword::TRUNCATE + | Keyword::PRINT + | Keyword::WHILE + | Keyword::RETURN + | Keyword::THROW + | Keyword::RAISERROR + | Keyword::MERGE => false, _ => explicit || self.is_table_alias(kw, parser), } } diff --git a/tests/sqlparser_mssql.rs b/tests/sqlparser_mssql.rs index b5fd1e77e9..6c8412a4ae 100644 --- a/tests/sqlparser_mssql.rs +++ b/tests/sqlparser_mssql.rs @@ -2730,3 +2730,56 @@ fn parse_mssql_tran_shorthand() { // ROLLBACK TRAN normalizes to ROLLBACK (same as ROLLBACK TRANSACTION) ms().one_statement_parses_to("ROLLBACK TRAN", "ROLLBACK"); } + +#[test] +fn test_tsql_statement_keywords_not_implicit_aliases() { + // T-SQL statement-starting keywords must never be consumed as implicit + // aliases for a preceding SELECT item or table reference when using + // newline-delimited multi-statement scripts. + + // Without the fix, the parser would consume a statement-starting keyword + // as an implicit alias for the preceding SELECT item or table reference, + // then fail on the next token. Verify parsing succeeds and each input + // produces the expected number of statements. + + // Keywords that should not become implicit column aliases + let col_alias_cases: &[(&str, usize)] = &[ + ("select 1\ndeclare @x as int", 2), + ("select 1\nexec sp_who", 2), + ("select 1\ninsert into t values (1)", 2), + ("select 1\nupdate t set col=1", 2), + ("select 1\ndelete from t", 2), + ("select 1\ndrop table t", 2), + ("select 1\ncreate table t (id int)", 2), + ("select 1\nalter table t add col int", 2), + ("select 1\nreturn", 2), + ]; + for (sql, expected) in col_alias_cases { + let stmts = tsql() + .parse_sql_statements(sql) + .unwrap_or_else(|e| panic!("failed to parse {sql:?}: {e}")); + assert_eq!( + stmts.len(), + *expected, + "expected {expected} stmts for: {sql:?}" + ); + } + + // Keywords that should not become implicit table aliases + let tbl_alias_cases: &[(&str, usize)] = &[ + ("select * from t\ndeclare @x as int", 2), + ("select * from t\ndrop table t", 2), + ("select * from t\ncreate table u (id int)", 2), + ("select * from t\nexec sp_who", 2), + ]; + for (sql, expected) in tbl_alias_cases { + let stmts = tsql() + .parse_sql_statements(sql) + .unwrap_or_else(|e| panic!("failed to parse {sql:?}: {e}")); + assert_eq!( + stmts.len(), + *expected, + "expected {expected} stmts for: {sql:?}" + ); + } +} From 523d78ea2aceee54c4f999728edcb5603ff25a24 Mon Sep 17 00:00:00 2001 From: Luca Cappelletti Date: Fri, 27 Feb 2026 11:33:53 +0100 Subject: [PATCH 093/121] Support parenthesized `CREATE TABLE ... (LIKE ... INCLUDING/EXCLUDING DEFAULTS)` in `PostgreSQL` (#2242) --- src/dialect/postgresql.rs | 4 ++++ tests/sqlparser_postgres.rs | 39 +++++++++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+) diff --git a/src/dialect/postgresql.rs b/src/dialect/postgresql.rs index 0b7ed2a729..89b677c476 100644 --- a/src/dialect/postgresql.rs +++ b/src/dialect/postgresql.rs @@ -302,4 +302,8 @@ impl Dialect for PostgreSqlDialect { fn supports_insert_table_alias(&self) -> bool { true } + + fn supports_create_table_like_parenthesized(&self) -> bool { + true + } } diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 7c19f51e5e..f4b3a28268 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -593,6 +593,45 @@ fn parse_create_table_constraints_only() { }; } +#[test] +fn parse_create_table_like_with_defaults() { + let sql = "CREATE TABLE new (LIKE old INCLUDING DEFAULTS)"; + match pg().verified_stmt(sql) { + Statement::CreateTable(stmt) => { + assert_eq!( + stmt.name, + ObjectName::from(vec![Ident::new("new".to_string())]) + ); + assert_eq!( + stmt.like, + Some(CreateTableLikeKind::Parenthesized(CreateTableLike { + name: ObjectName::from(vec![Ident::new("old".to_string())]), + defaults: Some(CreateTableLikeDefaults::Including), + })) + ) + } + _ => unreachable!(), + } + + let sql = "CREATE TABLE new (LIKE old EXCLUDING DEFAULTS)"; + match pg().verified_stmt(sql) { + Statement::CreateTable(stmt) => { + assert_eq!( + stmt.name, + ObjectName::from(vec![Ident::new("new".to_string())]) + ); + assert_eq!( + stmt.like, + Some(CreateTableLikeKind::Parenthesized(CreateTableLike { + name: ObjectName::from(vec![Ident::new("old".to_string())]), + defaults: Some(CreateTableLikeDefaults::Excluding), + })) + ) + } + _ => unreachable!(), + } +} + #[test] fn parse_alter_table_constraints_rename() { match alter_table_op( From 6f0e803aa77414e83aefd326e23231c51b60ae32 Mon Sep 17 00:00:00 2001 From: Yoabot Date: Fri, 27 Feb 2026 11:44:59 +0100 Subject: [PATCH 094/121] MSSQL: support EXEC (@sql) dynamic SQL execution (#2234) --- src/parser/mod.rs | 23 ++++++++++++++++++++--- tests/sqlparser_mssql.rs | 23 +++++++++++++++++++++++ 2 files changed, 43 insertions(+), 3 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index bea566bbe8..75db4d2404 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -18583,6 +18583,9 @@ impl<'a> Parser<'a> { /// Parse a SQL `EXECUTE` statement pub fn parse_execute(&mut self) -> Result { + // Track whether the procedure/expression name itself was wrapped in parens, + // i.e. `EXEC (@sql)` (dynamic string execution) vs `EXEC sp_name`. + // When the name has parens there are no additional parameters. let name = if self.dialect.supports_execute_immediate() && self.parse_keyword(Keyword::IMMEDIATE) { @@ -18593,10 +18596,18 @@ impl<'a> Parser<'a> { if has_parentheses { self.expect_token(&Token::RParen)?; } - Some(name) + Some((name, has_parentheses)) }; - let has_parentheses = self.consume_token(&Token::LParen); + let name_had_parentheses = name.as_ref().map(|(_, p)| *p).unwrap_or(false); + + // Only look for a parameter list when the name was NOT wrapped in parens. + // `EXEC (@sql)` is dynamic SQL execution and takes no parameters here. + let has_parentheses = if name_had_parentheses { + false + } else { + self.consume_token(&Token::LParen) + }; let end_kws = &[Keyword::USING, Keyword::OUTPUT, Keyword::DEFAULT]; let end_token = match (has_parentheses, self.peek_token().token) { @@ -18606,12 +18617,18 @@ impl<'a> Parser<'a> { (false, _) => Token::SemiColon, }; - let parameters = self.parse_comma_separated0(Parser::parse_expr, end_token)?; + let parameters = if name_had_parentheses { + vec![] + } else { + self.parse_comma_separated0(Parser::parse_expr, end_token)? + }; if has_parentheses { self.expect_token(&Token::RParen)?; } + let name = name.map(|(n, _)| n); + let into = if self.parse_keyword(Keyword::INTO) { self.parse_comma_separated(Self::parse_identifier)? } else { diff --git a/tests/sqlparser_mssql.rs b/tests/sqlparser_mssql.rs index 6c8412a4ae..8bdb1c2053 100644 --- a/tests/sqlparser_mssql.rs +++ b/tests/sqlparser_mssql.rs @@ -2783,3 +2783,26 @@ fn test_tsql_statement_keywords_not_implicit_aliases() { ); } } + +#[test] +fn test_exec_dynamic_sql() { + // EXEC (@sql) executes a dynamic SQL string held in a variable. + // It must parse as a single Execute statement and not attempt to parse + // parameters after the closing paren. + let stmts = tsql() + .parse_sql_statements("EXEC (@sql)") + .expect("EXEC (@sql) should parse"); + assert_eq!(stmts.len(), 1); + assert!( + matches!(&stmts[0], Statement::Execute { .. }), + "expected Execute, got: {:?}", + stmts[0] + ); + + // Verify that a statement following EXEC (@sql) on the next line is parsed + // as a separate statement and not consumed as a parameter. + let stmts = tsql() + .parse_sql_statements("EXEC (@sql)\nDROP TABLE #tmp") + .expect("EXEC (@sql) followed by DROP TABLE should parse"); + assert_eq!(stmts.len(), 2); +} From 83baf5e89179dd00321330312fe460100d755c25 Mon Sep 17 00:00:00 2001 From: Michael Victor Zink Date: Fri, 27 Feb 2026 02:59:05 -0800 Subject: [PATCH 095/121] Support MySQL KEY keyword in column definitions (#2243) Co-authored-by: Yoav Cohen <59807311+yoavcloud@users.noreply.github.com> --- src/ast/ddl.rs | 2 +- src/dialect/generic.rs | 4 ++++ src/dialect/mod.rs | 12 ++++++++++++ src/dialect/mysql.rs | 5 +++++ src/parser/mod.rs | 23 ++++++++++++++++++++++- tests/sqlparser_mysql.rs | 9 +++++++++ 6 files changed, 53 insertions(+), 2 deletions(-) diff --git a/src/ast/ddl.rs b/src/ast/ddl.rs index 0c4f93e647..3a951f66b5 100644 --- a/src/ast/ddl.rs +++ b/src/ast/ddl.rs @@ -2040,7 +2040,7 @@ impl fmt::Display for ColumnOption { Ok(()) } Unique(constraint) => { - write!(f, "UNIQUE")?; + write!(f, "UNIQUE{:>}", constraint.index_type_display)?; if let Some(characteristics) = &constraint.characteristics { write!(f, " {characteristics}")?; } diff --git a/src/dialect/generic.rs b/src/dialect/generic.rs index 1cf195e637..a7a3c2715a 100644 --- a/src/dialect/generic.rs +++ b/src/dialect/generic.rs @@ -280,4 +280,8 @@ impl Dialect for GenericDialect { fn supports_constraint_keyword_without_name(&self) -> bool { true } + + fn supports_key_column_option(&self) -> bool { + true + } } diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index 698c12ec9f..796b25f05f 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -1195,6 +1195,18 @@ pub trait Dialect: Debug + Any { false } + /// Returns true if the dialect supports the `KEY` keyword as part of + /// column-level constraints in a `CREATE TABLE` statement. + /// + /// When enabled, the parser accepts these MySQL-specific column options: + /// - `UNIQUE [KEY]` — optional `KEY` after `UNIQUE` + /// - `[PRIMARY] KEY` — standalone `KEY` as shorthand for `PRIMARY KEY` + /// + /// + fn supports_key_column_option(&self) -> bool { + false + } + /// Returns true if the specified keyword is reserved and cannot be /// used as an identifier without special handling like quoting. fn is_reserved_for_identifier(&self, kw: Keyword) -> bool { diff --git a/src/dialect/mysql.rs b/src/dialect/mysql.rs index bdced4826b..6b057539e5 100644 --- a/src/dialect/mysql.rs +++ b/src/dialect/mysql.rs @@ -206,6 +206,11 @@ impl Dialect for MySqlDialect { fn supports_constraint_keyword_without_name(&self) -> bool { true } + + /// See: + fn supports_key_column_option(&self) -> bool { + true + } } /// `LOCK TABLES` diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 75db4d2404..a00eab3487 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -9048,12 +9048,18 @@ impl<'a> Parser<'a> { .into(), )) } else if self.parse_keyword(Keyword::UNIQUE) { + let index_type_display = + if self.dialect.supports_key_column_option() && self.parse_keyword(Keyword::KEY) { + KeyOrIndexDisplay::Key + } else { + KeyOrIndexDisplay::None + }; let characteristics = self.parse_constraint_characteristics()?; Ok(Some( UniqueConstraint { name: None, index_name: None, - index_type_display: KeyOrIndexDisplay::None, + index_type_display, index_type: None, columns: vec![], index_options: vec![], @@ -9062,6 +9068,21 @@ impl<'a> Parser<'a> { } .into(), )) + } else if self.dialect.supports_key_column_option() && self.parse_keyword(Keyword::KEY) { + // In MySQL, `KEY` in a column definition is shorthand for `PRIMARY KEY`. + // See: https://dev.mysql.com/doc/refman/8.4/en/create-table.html + let characteristics = self.parse_constraint_characteristics()?; + Ok(Some( + PrimaryKeyConstraint { + name: None, + index_name: None, + index_type: None, + columns: vec![], + index_options: vec![], + characteristics, + } + .into(), + )) } else if self.parse_keyword(Keyword::REFERENCES) { let foreign_table = self.parse_object_name(false)?; // PostgreSQL allows omitting the column list and diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index 30405623dc..b4ae764c2e 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -944,6 +944,15 @@ fn parse_create_table_primary_and_unique_key_characteristic_test() { } } +#[test] +fn parse_create_table_column_key_options() { + mysql_and_generic().verified_stmt("CREATE TABLE foo (x INT UNIQUE KEY)"); + mysql_and_generic().one_statement_parses_to( + "CREATE TABLE foo (x INT KEY)", + "CREATE TABLE foo (x INT PRIMARY KEY)", + ); +} + #[test] fn parse_create_table_comment() { let without_equal = "CREATE TABLE foo (bar INT) COMMENT 'baz'"; From 5b7bc1a52723cc91d4d0b90a44455d716db549d5 Mon Sep 17 00:00:00 2001 From: Luca Cappelletti Date: Fri, 27 Feb 2026 13:38:54 +0100 Subject: [PATCH 096/121] Support two-argument `TRIM(string, characters)` in PostgreSQL (#2240) --- src/ast/mod.rs | 2 +- src/dialect/bigquery.rs | 4 +++ src/dialect/clickhouse.rs | 4 +++ src/dialect/duckdb.rs | 4 +++ src/dialect/generic.rs | 4 +++ src/dialect/mod.rs | 6 +++++ src/dialect/postgresql.rs | 4 +++ src/dialect/snowflake.rs | 4 +++ src/dialect/sqlite.rs | 4 +++ src/parser/mod.rs | 5 ++-- tests/sqlparser_common.rs | 53 ++++++++++++++++++++++++++++----------- 11 files changed, 75 insertions(+), 19 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 1e430171ee..97cc61935d 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -1131,7 +1131,7 @@ pub enum Expr { /// ```sql /// TRIM([BOTH | LEADING | TRAILING] [ FROM] ) /// TRIM() - /// TRIM(, [, characters]) -- only Snowflake or Bigquery + /// TRIM(, [, characters]) -- PostgreSQL, DuckDB, Snowflake, BigQuery, Generic /// ``` Trim { /// The expression to trim from. diff --git a/src/dialect/bigquery.rs b/src/dialect/bigquery.rs index 6cef460676..8fca515182 100644 --- a/src/dialect/bigquery.rs +++ b/src/dialect/bigquery.rs @@ -162,4 +162,8 @@ impl Dialect for BigQueryDialect { fn supports_select_wildcard_replace(&self) -> bool { true } + + fn supports_comma_separated_trim(&self) -> bool { + true + } } diff --git a/src/dialect/clickhouse.rs b/src/dialect/clickhouse.rs index ea4d7a971b..87c762f0bf 100644 --- a/src/dialect/clickhouse.rs +++ b/src/dialect/clickhouse.rs @@ -141,4 +141,8 @@ impl Dialect for ClickHouseDialect { fn supports_select_wildcard_replace(&self) -> bool { true } + + fn supports_comma_separated_trim(&self) -> bool { + true + } } diff --git a/src/dialect/duckdb.rs b/src/dialect/duckdb.rs index 32967c4c58..e70efd6954 100644 --- a/src/dialect/duckdb.rs +++ b/src/dialect/duckdb.rs @@ -129,4 +129,8 @@ impl Dialect for DuckDbDialect { fn supports_select_wildcard_replace(&self) -> bool { true } + + fn supports_comma_separated_trim(&self) -> bool { + true + } } diff --git a/src/dialect/generic.rs b/src/dialect/generic.rs index a7a3c2715a..1d5461fec1 100644 --- a/src/dialect/generic.rs +++ b/src/dialect/generic.rs @@ -284,4 +284,8 @@ impl Dialect for GenericDialect { fn supports_key_column_option(&self) -> bool { true } + + fn supports_comma_separated_trim(&self) -> bool { + true + } } diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index 796b25f05f..8703e402cf 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -1651,6 +1651,12 @@ pub trait Dialect: Debug + Any { fn supports_select_format(&self) -> bool { false } + + /// Returns true if the dialect supports the two-argument comma-separated + /// form of the `TRIM` function: `TRIM(expr, characters)`. + fn supports_comma_separated_trim(&self) -> bool { + false + } } /// Operators for which precedence must be defined. diff --git a/src/dialect/postgresql.rs b/src/dialect/postgresql.rs index 89b677c476..b99a8b5c3d 100644 --- a/src/dialect/postgresql.rs +++ b/src/dialect/postgresql.rs @@ -306,4 +306,8 @@ impl Dialect for PostgreSqlDialect { fn supports_create_table_like_parenthesized(&self) -> bool { true } + + fn supports_comma_separated_trim(&self) -> bool { + true + } } diff --git a/src/dialect/snowflake.rs b/src/dialect/snowflake.rs index 984e384fd8..a9d71fc4b3 100644 --- a/src/dialect/snowflake.rs +++ b/src/dialect/snowflake.rs @@ -667,6 +667,10 @@ impl Dialect for SnowflakeDialect { fn supports_lambda_functions(&self) -> bool { true } + + fn supports_comma_separated_trim(&self) -> bool { + true + } } // Peeks ahead to identify tokens that are expected after diff --git a/src/dialect/sqlite.rs b/src/dialect/sqlite.rs index b44a1c5b8a..39ee622d88 100644 --- a/src/dialect/sqlite.rs +++ b/src/dialect/sqlite.rs @@ -120,4 +120,8 @@ impl Dialect for SQLiteDialect { fn supports_notnull_operator(&self) -> bool { true } + + fn supports_comma_separated_trim(&self) -> bool { + true + } } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index a00eab3487..8d8b55a34d 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -2940,7 +2940,7 @@ impl<'a> Parser<'a> { /// ```sql /// TRIM ([WHERE] ['text' FROM] 'text') /// TRIM ('text') - /// TRIM(, [, characters]) -- only Snowflake or BigQuery + /// TRIM(, [, characters]) -- PostgreSQL, DuckDB, Snowflake, BigQuery, Generic /// ``` pub fn parse_trim_expr(&mut self) -> Result { self.expect_token(&Token::LParen)?; @@ -2961,8 +2961,7 @@ impl<'a> Parser<'a> { trim_what: Some(trim_what), trim_characters: None, }) - } else if self.consume_token(&Token::Comma) - && dialect_of!(self is DuckDbDialect | SnowflakeDialect | BigQueryDialect | GenericDialect) + } else if self.dialect.supports_comma_separated_trim() && self.consume_token(&Token::Comma) { let characters = self.parse_comma_separated(Parser::parse_expr)?; self.expect_token(&Token::RParen)?; diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 982bf10885..8de460d78b 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -8099,23 +8099,46 @@ fn parse_trim() { parse_sql_statements("SELECT TRIM(FOO 'xyz' FROM 'xyzfooxyz')").unwrap_err() ); - //keep Snowflake/BigQuery TRIM syntax failing - let all_expected_snowflake = TestedDialects::new(vec![ - //Box::new(GenericDialect {}), - Box::new(PostgreSqlDialect {}), - Box::new(MsSqlDialect {}), - Box::new(AnsiDialect {}), - //Box::new(SnowflakeDialect {}), - Box::new(HiveDialect {}), - Box::new(RedshiftSqlDialect {}), - Box::new(MySqlDialect {}), - //Box::new(BigQueryDialect {}), - Box::new(SQLiteDialect {}), - ]); + // dialects that support comma-separated TRIM syntax + let dialects = all_dialects_where(|d| d.supports_comma_separated_trim()); + let sql = "SELECT TRIM(' xyz ', ' ')"; + let select = dialects.verified_only_select(sql); assert_eq!( - ParserError::ParserError("Expected: ), found: 'a'".to_owned()), - all_expected_snowflake + &Expr::Trim { + expr: Box::new(Expr::Value( + Value::SingleQuotedString(" xyz ".to_owned()).with_empty_span() + )), + trim_where: None, + trim_what: None, + trim_characters: Some(vec![Expr::Value( + Value::SingleQuotedString(" ".to_owned()).with_empty_span() + )]), + }, + expr_from_projection(only(&select.projection)) + ); + + let sql = "SELECT TRIM('xyz', 'a')"; + let select = dialects.verified_only_select(sql); + assert_eq!( + &Expr::Trim { + expr: Box::new(Expr::Value( + Value::SingleQuotedString("xyz".to_owned()).with_empty_span() + )), + trim_where: None, + trim_what: None, + trim_characters: Some(vec![Expr::Value( + Value::SingleQuotedString("a".to_owned()).with_empty_span() + )]), + }, + expr_from_projection(only(&select.projection)) + ); + + // dialects without comma-style TRIM syntax should fail + let unsupported_dialects = all_dialects_where(|d| !d.supports_comma_separated_trim()); + assert_eq!( + ParserError::ParserError("Expected: ), found: ,".to_owned()), + unsupported_dialects .parse_sql_statements("SELECT TRIM('xyz', 'a')") .unwrap_err() ); From a3cfcac3a1c2f336e39daa08a3113d03628b6c8a Mon Sep 17 00:00:00 2001 From: Michael Victor Zink Date: Fri, 27 Feb 2026 05:47:28 -0800 Subject: [PATCH 097/121] Add Readyset to users in README.md (#2247) --- README.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 9dfe508103..775d074915 100644 --- a/README.md +++ b/README.md @@ -159,9 +159,9 @@ $ cargo run --features json_example --example cli FILENAME.sql [--dialectname] ## Users -This parser is currently being used by the [DataFusion] query engine, [LocustDB], -[Ballista], [GlueSQL], [Opteryx], [Polars], [PRQL], [Qrlew], [JumpWire], [ParadeDB], [CipherStash Proxy], -and [GreptimeDB]. +This parser is currently being used by the [DataFusion] query engine, +[LocustDB], [Ballista], [GlueSQL], [Opteryx], [Polars], [PRQL], [Qrlew], +[JumpWire], [ParadeDB], [CipherStash Proxy], [Readyset] and [GreptimeDB]. If your project is using sqlparser-rs feel free to make a PR to add it to this list. @@ -282,3 +282,4 @@ licensed as above, without any additional terms or conditions. [`GenericDialect`]: https://docs.rs/sqlparser/latest/sqlparser/dialect/struct.GenericDialect.html [CipherStash Proxy]: https://github.com/cipherstash/proxy [GreptimeDB]: https://github.com/GreptimeTeam/greptimedb +[Readyset]: https://github.com/readysettech/readyset From 49bdb5ca2be7eb9b74b8471f63344d8fa296b337 Mon Sep 17 00:00:00 2001 From: Yoabot Date: Fri, 27 Feb 2026 14:58:52 +0100 Subject: [PATCH 098/121] Snowflake: parse EXCLUDE column list as ObjectName to support qualified names (#2244) Co-authored-by: Yoav Cohen <59807311+yoavcloud@users.noreply.github.com> --- src/ast/query.rs | 4 ++-- src/ast/spans.rs | 4 ++-- src/parser/mod.rs | 5 ++-- tests/sqlparser_common.rs | 46 +++++++++++++++++++++++++++++++----- tests/sqlparser_duckdb.rs | 12 ++++++---- tests/sqlparser_snowflake.rs | 16 +++++++++---- 6 files changed, 66 insertions(+), 21 deletions(-) diff --git a/src/ast/query.rs b/src/ast/query.rs index 159f02a6c1..440928ed71 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -1018,13 +1018,13 @@ pub enum ExcludeSelectItem { /// ```plaintext /// /// ``` - Single(Ident), + Single(ObjectName), /// Multiple column names inside parenthesis. /// # Syntax /// ```plaintext /// (, , ...) /// ``` - Multiple(Vec), + Multiple(Vec), } impl fmt::Display for ExcludeSelectItem { diff --git a/src/ast/spans.rs b/src/ast/spans.rs index 0b95c3ed70..43005cfbbc 100644 --- a/src/ast/spans.rs +++ b/src/ast/spans.rs @@ -1849,8 +1849,8 @@ impl Spanned for IlikeSelectItem { impl Spanned for ExcludeSelectItem { fn span(&self) -> Span { match self { - ExcludeSelectItem::Single(ident) => ident.span, - ExcludeSelectItem::Multiple(vec) => union_spans(vec.iter().map(|i| i.span)), + ExcludeSelectItem::Single(name) => name.span(), + ExcludeSelectItem::Multiple(vec) => union_spans(vec.iter().map(|i| i.span())), } } } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 8d8b55a34d..bc91213fa5 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -17951,11 +17951,12 @@ impl<'a> Parser<'a> { ) -> Result, ParserError> { let opt_exclude = if self.parse_keyword(Keyword::EXCLUDE) { if self.consume_token(&Token::LParen) { - let columns = self.parse_comma_separated(|parser| parser.parse_identifier())?; + let columns = + self.parse_comma_separated(|parser| parser.parse_object_name(false))?; self.expect_token(&Token::RParen)?; Some(ExcludeSelectItem::Multiple(columns)) } else { - let column = self.parse_identifier()?; + let column = self.parse_object_name(false)?; Some(ExcludeSelectItem::Single(column)) } } else { diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 8de460d78b..ad7697a7f0 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -17343,7 +17343,9 @@ fn test_select_exclude() { SelectItem::Wildcard(WildcardAdditionalOptions { opt_exclude, .. }) => { assert_eq!( *opt_exclude, - Some(ExcludeSelectItem::Single(Ident::new("c1"))) + Some(ExcludeSelectItem::Single(ObjectName::from(Ident::new( + "c1" + )))) ); } _ => unreachable!(), @@ -17356,8 +17358,8 @@ fn test_select_exclude() { assert_eq!( *opt_exclude, Some(ExcludeSelectItem::Multiple(vec![ - Ident::new("c1"), - Ident::new("c2") + ObjectName::from(Ident::new("c1")), + ObjectName::from(Ident::new("c2")), ])) ); } @@ -17368,7 +17370,9 @@ fn test_select_exclude() { SelectItem::Wildcard(WildcardAdditionalOptions { opt_exclude, .. }) => { assert_eq!( *opt_exclude, - Some(ExcludeSelectItem::Single(Ident::new("c1"))) + Some(ExcludeSelectItem::Single(ObjectName::from(Ident::new( + "c1" + )))) ); } _ => unreachable!(), @@ -17390,7 +17394,9 @@ fn test_select_exclude() { } assert_eq!( select.exclude, - Some(ExcludeSelectItem::Single(Ident::new("c1"))) + Some(ExcludeSelectItem::Single(ObjectName::from(Ident::new( + "c1" + )))) ); let dialects = all_dialects_where(|d| { @@ -17401,7 +17407,9 @@ fn test_select_exclude() { SelectItem::Wildcard(WildcardAdditionalOptions { opt_exclude, .. }) => { assert_eq!( *opt_exclude, - Some(ExcludeSelectItem::Single(Ident::new("c1"))) + Some(ExcludeSelectItem::Single(ObjectName::from(Ident::new( + "c1" + )))) ); } _ => unreachable!(), @@ -17438,6 +17446,32 @@ fn test_select_exclude() { ); } +#[test] +fn test_select_exclude_qualified_names() { + // EXCLUDE should accept qualified names like `f.col` parsed as ObjectName. + let dialects = all_dialects_where(|d| d.supports_select_wildcard_exclude()); + + // Qualified name in multi-column EXCLUDE list: f.* EXCLUDE (f.col1, f.col2) + let select = dialects + .verified_only_select("SELECT f.* EXCLUDE (f.account_canonical_id, f.amount) FROM t AS f"); + match &select.projection[0] { + SelectItem::QualifiedWildcard(_, WildcardAdditionalOptions { opt_exclude, .. }) => { + assert_eq!( + *opt_exclude, + Some(ExcludeSelectItem::Multiple(vec![ + ObjectName::from(vec![Ident::new("f"), Ident::new("account_canonical_id")]), + ObjectName::from(vec![Ident::new("f"), Ident::new("amount")]), + ])) + ); + } + _ => unreachable!(), + } + + // Plain identifiers must still parse successfully. + dialects.verified_only_select("SELECT f.* EXCLUDE (account_canonical_id) FROM t AS f"); + dialects.verified_only_select("SELECT f.* EXCLUDE (col1, col2) FROM t AS f"); +} + #[test] fn test_no_semicolon_required_between_statements() { let sql = r#" diff --git a/tests/sqlparser_duckdb.rs b/tests/sqlparser_duckdb.rs index e0e3f143b6..a061876dfe 100644 --- a/tests/sqlparser_duckdb.rs +++ b/tests/sqlparser_duckdb.rs @@ -156,7 +156,9 @@ fn column_defs(statement: Statement) -> Vec { fn test_select_wildcard_with_exclude() { let select = duckdb().verified_only_select("SELECT * EXCLUDE (col_a) FROM data"); let expected = SelectItem::Wildcard(WildcardAdditionalOptions { - opt_exclude: Some(ExcludeSelectItem::Multiple(vec![Ident::new("col_a")])), + opt_exclude: Some(ExcludeSelectItem::Multiple(vec![ObjectName::from( + Ident::new("col_a"), + )])), ..Default::default() }); assert_eq!(expected, select.projection[0]); @@ -166,7 +168,9 @@ fn test_select_wildcard_with_exclude() { let expected = SelectItem::QualifiedWildcard( SelectItemQualifiedWildcardKind::ObjectName(ObjectName::from(vec![Ident::new("name")])), WildcardAdditionalOptions { - opt_exclude: Some(ExcludeSelectItem::Single(Ident::new("department_id"))), + opt_exclude: Some(ExcludeSelectItem::Single(ObjectName::from(Ident::new( + "department_id", + )))), ..Default::default() }, ); @@ -176,8 +180,8 @@ fn test_select_wildcard_with_exclude() { .verified_only_select("SELECT * EXCLUDE (department_id, employee_id) FROM employee_table"); let expected = SelectItem::Wildcard(WildcardAdditionalOptions { opt_exclude: Some(ExcludeSelectItem::Multiple(vec![ - Ident::new("department_id"), - Ident::new("employee_id"), + ObjectName::from(Ident::new("department_id")), + ObjectName::from(Ident::new("employee_id")), ])), ..Default::default() }); diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index 43444016f2..c51cf3bdf1 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -1474,7 +1474,9 @@ fn snowflake_and_generic() -> TestedDialects { fn test_select_wildcard_with_exclude() { let select = snowflake_and_generic().verified_only_select("SELECT * EXCLUDE (col_a) FROM data"); let expected = SelectItem::Wildcard(WildcardAdditionalOptions { - opt_exclude: Some(ExcludeSelectItem::Multiple(vec![Ident::new("col_a")])), + opt_exclude: Some(ExcludeSelectItem::Multiple(vec![ObjectName::from( + Ident::new("col_a"), + )])), ..Default::default() }); assert_eq!(expected, select.projection[0]); @@ -1484,7 +1486,9 @@ fn test_select_wildcard_with_exclude() { let expected = SelectItem::QualifiedWildcard( SelectItemQualifiedWildcardKind::ObjectName(ObjectName::from(vec![Ident::new("name")])), WildcardAdditionalOptions { - opt_exclude: Some(ExcludeSelectItem::Single(Ident::new("department_id"))), + opt_exclude: Some(ExcludeSelectItem::Single(ObjectName::from(Ident::new( + "department_id", + )))), ..Default::default() }, ); @@ -1494,8 +1498,8 @@ fn test_select_wildcard_with_exclude() { .verified_only_select("SELECT * EXCLUDE (department_id, employee_id) FROM employee_table"); let expected = SelectItem::Wildcard(WildcardAdditionalOptions { opt_exclude: Some(ExcludeSelectItem::Multiple(vec![ - Ident::new("department_id"), - Ident::new("employee_id"), + ObjectName::from(Ident::new("department_id")), + ObjectName::from(Ident::new("employee_id")), ])), ..Default::default() }); @@ -1580,7 +1584,9 @@ fn test_select_wildcard_with_exclude_and_rename() { let select = snowflake_and_generic() .verified_only_select("SELECT * EXCLUDE col_z RENAME col_a AS col_b FROM data"); let expected = SelectItem::Wildcard(WildcardAdditionalOptions { - opt_exclude: Some(ExcludeSelectItem::Single(Ident::new("col_z"))), + opt_exclude: Some(ExcludeSelectItem::Single(ObjectName::from(Ident::new( + "col_z", + )))), opt_rename: Some(RenameSelectItem::Single(IdentWithAlias { ident: Ident::new("col_a"), alias: Ident::new("col_b"), From 31e19429a779793c1ee0338e658d76de053cd203 Mon Sep 17 00:00:00 2001 From: "Guan-Ming (Wesley) Chiu" <105915352+guan404ming@users.noreply.github.com> Date: Fri, 27 Feb 2026 22:57:02 +0800 Subject: [PATCH 099/121] MSSQL: Add support for OUTPUT clause on INSERT/UPDATE/DELETE (#2228) Signed-off-by: Guan-Ming Chiu Signed-off-by: Guan-Ming (Wesley) Chiu <105915352+guan404ming@users.noreply.github.com> Co-authored-by: Ifeanyi Ubah --- src/ast/dml.rs | 26 +++++++++++++++++++++-- src/ast/spans.rs | 8 ++++++- src/dialect/snowflake.rs | 1 + src/keywords.rs | 2 ++ src/parser/merge.rs | 16 +++++++++++++- src/parser/mod.rs | 24 ++++++++++++++++++--- tests/sqlparser_common.rs | 2 ++ tests/sqlparser_mssql.rs | 42 +++++++++++++++++++++++++++++++++++++ tests/sqlparser_mysql.rs | 1 + tests/sqlparser_postgres.rs | 3 +++ tests/sqlparser_sqlite.rs | 1 + 11 files changed, 119 insertions(+), 7 deletions(-) diff --git a/src/ast/dml.rs b/src/ast/dml.rs index 06f731c5ca..e2c48885f6 100644 --- a/src/ast/dml.rs +++ b/src/ast/dml.rs @@ -79,6 +79,9 @@ pub struct Insert { pub on: Option, /// RETURNING pub returning: Option>, + /// OUTPUT (MSSQL) + /// See + pub output: Option, /// Only for mysql pub replace_into: bool, /// Only for mysql @@ -203,6 +206,11 @@ impl Display for Insert { SpaceOrNewline.fmt(f)?; } + if let Some(output) = &self.output { + write!(f, "{output}")?; + SpaceOrNewline.fmt(f)?; + } + if let Some(settings) = &self.settings { write!(f, "SETTINGS {}", display_comma_separated(settings))?; SpaceOrNewline.fmt(f)?; @@ -289,6 +297,9 @@ pub struct Delete { pub selection: Option, /// RETURNING pub returning: Option>, + /// OUTPUT (MSSQL) + /// See + pub output: Option, /// ORDER BY (MySQL) pub order_by: Vec, /// LIMIT (MySQL) @@ -314,6 +325,10 @@ impl Display for Delete { indented_list(f, from)?; } } + if let Some(output) = &self.output { + SpaceOrNewline.fmt(f)?; + write!(f, "{output}")?; + } if let Some(using) = &self.using { SpaceOrNewline.fmt(f)?; f.write_str("USING")?; @@ -367,6 +382,9 @@ pub struct Update { pub selection: Option, /// RETURNING pub returning: Option>, + /// OUTPUT (MSSQL) + /// See + pub output: Option, /// SQLite-specific conflict resolution clause pub or: Option, /// LIMIT @@ -396,6 +414,10 @@ impl Display for Update { f.write_str("SET")?; indented_list(f, &self.assignments)?; } + if let Some(output) = &self.output { + SpaceOrNewline.fmt(f)?; + write!(f, "{output}")?; + } if let Some(UpdateTableFromKind::AfterSet(from)) = &self.from { SpaceOrNewline.fmt(f)?; f.write_str("FROM")?; @@ -717,11 +739,11 @@ impl Display for MergeUpdateExpr { } } -/// A `OUTPUT` Clause in the end of a `MERGE` Statement +/// An `OUTPUT` clause on `MERGE`, `INSERT`, `UPDATE`, or `DELETE` (MSSQL). /// /// Example: /// OUTPUT $action, deleted.* INTO dbo.temp_products; -/// [mssql](https://learn.microsoft.com/en-us/sql/t-sql/queries/output-clause-transact-sql) +/// #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] diff --git a/src/ast/spans.rs b/src/ast/spans.rs index 43005cfbbc..dd62c5ba1e 100644 --- a/src/ast/spans.rs +++ b/src/ast/spans.rs @@ -906,6 +906,7 @@ impl Spanned for Delete { using, selection, returning, + output, order_by, limit, } = self; @@ -923,6 +924,7 @@ impl Spanned for Delete { ) .chain(selection.iter().map(|i| i.span())) .chain(returning.iter().flat_map(|i| i.iter().map(|k| k.span()))) + .chain(output.iter().map(|i| i.span())) .chain(order_by.iter().map(|i| i.span())) .chain(limit.iter().map(|i| i.span())), ), @@ -940,6 +942,7 @@ impl Spanned for Update { from, selection, returning, + output, or: _, limit, } = self; @@ -951,6 +954,7 @@ impl Spanned for Update { .chain(from.iter().map(|i| i.span())) .chain(selection.iter().map(|i| i.span())) .chain(returning.iter().flat_map(|i| i.iter().map(|k| k.span()))) + .chain(output.iter().map(|i| i.span())) .chain(limit.iter().map(|i| i.span())), ) } @@ -1312,6 +1316,7 @@ impl Spanned for Insert { has_table_keyword: _, // bool on, returning, + output, replace_into: _, // bool priority: _, // todo, mysql specific insert_alias: _, // todo, mysql specific @@ -1334,7 +1339,8 @@ impl Spanned for Insert { .chain(partitioned.iter().flat_map(|i| i.iter().map(|k| k.span()))) .chain(after_columns.iter().map(|i| i.span)) .chain(on.as_ref().map(|i| i.span())) - .chain(returning.iter().flat_map(|i| i.iter().map(|k| k.span()))), + .chain(returning.iter().flat_map(|i| i.iter().map(|k| k.span()))) + .chain(output.iter().map(|i| i.span())), ) } } diff --git a/src/dialect/snowflake.rs b/src/dialect/snowflake.rs index a9d71fc4b3..f756c4159e 100644 --- a/src/dialect/snowflake.rs +++ b/src/dialect/snowflake.rs @@ -1784,6 +1784,7 @@ fn parse_multi_table_insert( has_table_keyword: false, on: None, returning: None, + output: None, replace_into: false, priority: None, insert_alias: None, diff --git a/src/keywords.rs b/src/keywords.rs index cc2b9e9dd0..80f679c07c 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -1210,6 +1210,7 @@ pub const RESERVED_FOR_TABLE_ALIAS: &[Keyword] = &[ Keyword::ANTI, Keyword::SEMI, Keyword::RETURNING, + Keyword::OUTPUT, Keyword::ASOF, Keyword::MATCH_CONDITION, // for MSSQL-specific OUTER APPLY (seems reserved in most dialects) @@ -1264,6 +1265,7 @@ pub const RESERVED_FOR_COLUMN_ALIAS: &[Keyword] = &[ Keyword::CLUSTER, Keyword::DISTRIBUTE, Keyword::RETURNING, + Keyword::VALUES, // Reserved only as a column alias in the `SELECT` clause Keyword::FROM, Keyword::INTO, diff --git a/src/parser/merge.rs b/src/parser/merge.rs index a927bc4b1c..619be612bf 100644 --- a/src/parser/merge.rs +++ b/src/parser/merge.rs @@ -218,7 +218,21 @@ impl Parser<'_> { self.parse_parenthesized_qualified_column_list(IsOptional::Optional, allow_empty) } - fn parse_output( + /// Parses an `OUTPUT` clause if present (MSSQL). + pub(super) fn maybe_parse_output_clause( + &mut self, + ) -> Result, ParserError> { + if self.parse_keyword(Keyword::OUTPUT) { + Ok(Some(self.parse_output( + Keyword::OUTPUT, + self.get_current_token().clone(), + )?)) + } else { + Ok(None) + } + } + + pub(super) fn parse_output( &mut self, start_keyword: Keyword, start_token: TokenWithSpan, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index bc91213fa5..75450f75d0 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -13309,6 +13309,9 @@ impl<'a> Parser<'a> { }; let from = self.parse_comma_separated(Parser::parse_table_and_joins)?; + + let output = self.maybe_parse_output_clause()?; + let using = if self.parse_keyword(Keyword::USING) { Some(self.parse_comma_separated(Parser::parse_table_and_joins)?) } else { @@ -13347,6 +13350,7 @@ impl<'a> Parser<'a> { using, selection, returning, + output, order_by, limit, })) @@ -17275,10 +17279,10 @@ impl<'a> Parser<'a> { let is_mysql = dialect_of!(self is MySqlDialect); - let (columns, partitioned, after_columns, source, assignments) = if self + let (columns, partitioned, after_columns, output, source, assignments) = if self .parse_keywords(&[Keyword::DEFAULT, Keyword::VALUES]) { - (vec![], None, vec![], None, vec![]) + (vec![], None, vec![], None, None, vec![]) } else { let (columns, partitioned, after_columns) = if !self.peek_subquery_start() { let columns = self.parse_parenthesized_column_list(Optional, is_mysql)?; @@ -17295,6 +17299,8 @@ impl<'a> Parser<'a> { Default::default() }; + let output = self.maybe_parse_output_clause()?; + let (source, assignments) = if self.peek_keyword(Keyword::FORMAT) || self.peek_keyword(Keyword::SETTINGS) { @@ -17305,7 +17311,14 @@ impl<'a> Parser<'a> { (Some(self.parse_query()?), vec![]) }; - (columns, partitioned, after_columns, source, assignments) + ( + columns, + partitioned, + after_columns, + output, + source, + assignments, + ) }; let (format_clause, settings) = if self.dialect.supports_insert_format() { @@ -17407,6 +17420,7 @@ impl<'a> Parser<'a> { has_table_keyword: table, on, returning, + output, replace_into, priority, insert_alias, @@ -17512,6 +17526,9 @@ impl<'a> Parser<'a> { }; self.expect_keyword(Keyword::SET)?; let assignments = self.parse_comma_separated(Parser::parse_assignment)?; + + let output = self.maybe_parse_output_clause()?; + let from = if from_before_set.is_none() && self.parse_keyword(Keyword::FROM) { Some(UpdateTableFromKind::AfterSet( self.parse_table_with_joins()?, @@ -17542,6 +17559,7 @@ impl<'a> Parser<'a> { from, selection, returning, + output, or, limit, } diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index ad7697a7f0..7bf2764078 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -530,6 +530,7 @@ fn parse_update_set_from() { ])), }), returning: None, + output: None, or: None, limit: None }) @@ -553,6 +554,7 @@ fn parse_update_with_table_alias() { limit: None, optimizer_hints, update_token: _, + output: _, }) if optimizer_hints.is_empty() => { assert_eq!( TableWithJoins { diff --git a/tests/sqlparser_mssql.rs b/tests/sqlparser_mssql.rs index 8bdb1c2053..9033efe008 100644 --- a/tests/sqlparser_mssql.rs +++ b/tests/sqlparser_mssql.rs @@ -2806,3 +2806,45 @@ fn test_exec_dynamic_sql() { .expect("EXEC (@sql) followed by DROP TABLE should parse"); assert_eq!(stmts.len(), 2); } + +// MSSQL OUTPUT clause on INSERT/UPDATE/DELETE +// https://learn.microsoft.com/en-us/sql/t-sql/queries/output-clause-transact-sql +#[test] +fn parse_mssql_insert_with_output() { + ms_and_generic().verified_stmt( + "INSERT INTO customers (name, email) OUTPUT INSERTED.id, INSERTED.name VALUES ('John', 'john@example.com')", + ); +} + +#[test] +fn parse_mssql_insert_with_output_into() { + ms_and_generic().verified_stmt( + "INSERT INTO customers (name, email) OUTPUT INSERTED.id, INSERTED.name INTO @new_ids VALUES ('John', 'john@example.com')", + ); +} + +#[test] +fn parse_mssql_delete_with_output() { + ms_and_generic().verified_stmt("DELETE FROM customers OUTPUT DELETED.* WHERE id = 1"); +} + +#[test] +fn parse_mssql_delete_with_output_into() { + ms_and_generic().verified_stmt( + "DELETE FROM customers OUTPUT DELETED.id, DELETED.name INTO @deleted_rows WHERE active = 0", + ); +} + +#[test] +fn parse_mssql_update_with_output() { + ms_and_generic().verified_stmt( + "UPDATE employees SET salary = salary * 1.1 OUTPUT INSERTED.id, DELETED.salary, INSERTED.salary WHERE department = 'Engineering'", + ); +} + +#[test] +fn parse_mssql_update_with_output_into() { + ms_and_generic().verified_stmt( + "UPDATE employees SET salary = salary * 1.1 OUTPUT INSERTED.id, DELETED.salary, INSERTED.salary INTO @changes WHERE department = 'Engineering'", + ); +} diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index b4ae764c2e..541f7df6ea 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -2671,6 +2671,7 @@ fn parse_update_with_joins() { limit: None, optimizer_hints, update_token: _, + output: _, }) if optimizer_hints.is_empty() => { assert_eq!( TableWithJoins { diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index f4b3a28268..434c5fd7b4 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -5530,6 +5530,7 @@ fn test_simple_postgres_insert_with_alias() { has_table_keyword: false, on: None, returning: None, + output: None, replace_into: false, priority: None, insert_alias: None, @@ -5612,6 +5613,7 @@ fn test_simple_postgres_insert_with_alias() { has_table_keyword: false, on: None, returning: None, + output: None, replace_into: false, priority: None, insert_alias: None, @@ -5692,6 +5694,7 @@ fn test_simple_insert_with_quoted_alias() { has_table_keyword: false, on: None, returning: None, + output: None, replace_into: false, priority: None, insert_alias: None, diff --git a/tests/sqlparser_sqlite.rs b/tests/sqlparser_sqlite.rs index a8fa8db223..33c38fb0a6 100644 --- a/tests/sqlparser_sqlite.rs +++ b/tests/sqlparser_sqlite.rs @@ -496,6 +496,7 @@ fn parse_update_tuple_row_values() { }, from: None, returning: None, + output: None, limit: None, update_token: AttachedToken::empty() }) From 1da2ff779c0e71932fc882fe7ae4e6d674b8cc76 Mon Sep 17 00:00:00 2001 From: Andriy Romanov Date: Tue, 3 Mar 2026 04:13:17 -0800 Subject: [PATCH 100/121] Redshift: Added DISTSTYLE and DISTKEY keywords parsing (#2222) --- src/ast/ddl.rs | 40 ++++++++++++++++++++++++++++ src/ast/helpers/stmt_create_table.rs | 29 +++++++++++++++++--- src/ast/mod.rs | 21 ++++++++------- src/ast/spans.rs | 2 ++ src/keywords.rs | 3 +++ src/parser/mod.rs | 34 +++++++++++++++++++++++ tests/sqlparser_duckdb.rs | 2 ++ tests/sqlparser_mssql.rs | 4 +++ tests/sqlparser_postgres.rs | 2 ++ tests/sqlparser_redshift.rs | 15 +++++++++++ 10 files changed, 138 insertions(+), 14 deletions(-) diff --git a/src/ast/ddl.rs b/src/ast/ddl.rs index 3a951f66b5..895959a3db 100644 --- a/src/ast/ddl.rs +++ b/src/ast/ddl.rs @@ -3032,6 +3032,12 @@ pub struct CreateTable { /// Snowflake "REQUIRE USER" clause for dybamic tables /// pub require_user: bool, + /// Redshift `DISTSTYLE` option + /// + pub diststyle: Option, + /// Redshift `DISTKEY` option + /// + pub distkey: Option, } impl fmt::Display for CreateTable { @@ -3330,6 +3336,12 @@ impl fmt::Display for CreateTable { if self.strict { write!(f, " STRICT")?; } + if let Some(diststyle) = &self.diststyle { + write!(f, " DISTSTYLE {diststyle}")?; + } + if let Some(distkey) = &self.distkey { + write!(f, " DISTKEY({distkey})")?; + } if let Some(query) = &self.query { write!(f, " AS {query}")?; } @@ -3417,6 +3429,34 @@ impl fmt::Display for PartitionBoundValue { } } +/// Redshift distribution style for `CREATE TABLE`. +/// +/// See [Redshift](https://docs.aws.amazon.com/redshift/latest/dg/r_CREATE_TABLE_NEW.html) +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum DistStyle { + /// `DISTSTYLE AUTO` + Auto, + /// `DISTSTYLE EVEN` + Even, + /// `DISTSTYLE KEY` + Key, + /// `DISTSTYLE ALL` + All, +} + +impl fmt::Display for DistStyle { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + DistStyle::Auto => write!(f, "AUTO"), + DistStyle::Even => write!(f, "EVEN"), + DistStyle::Key => write!(f, "KEY"), + DistStyle::All => write!(f, "ALL"), + } + } +} + #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] diff --git a/src/ast/helpers/stmt_create_table.rs b/src/ast/helpers/stmt_create_table.rs index e63c90dbcf..258f9c8353 100644 --- a/src/ast/helpers/stmt_create_table.rs +++ b/src/ast/helpers/stmt_create_table.rs @@ -25,10 +25,11 @@ use serde::{Deserialize, Serialize}; use sqlparser_derive::{Visit, VisitMut}; use crate::ast::{ - ClusteredBy, ColumnDef, CommentDef, CreateTable, CreateTableLikeKind, CreateTableOptions, Expr, - FileFormat, ForValues, HiveDistributionStyle, HiveFormat, Ident, InitializeKind, ObjectName, - OnCommit, OneOrManyWithParens, Query, RefreshModeKind, RowAccessPolicy, Statement, - StorageSerializationPolicy, TableConstraint, TableVersion, Tag, WrappedCollection, + ClusteredBy, ColumnDef, CommentDef, CreateTable, CreateTableLikeKind, CreateTableOptions, + DistStyle, Expr, FileFormat, ForValues, HiveDistributionStyle, HiveFormat, Ident, + InitializeKind, ObjectName, OnCommit, OneOrManyWithParens, Query, RefreshModeKind, + RowAccessPolicy, Statement, StorageSerializationPolicy, TableConstraint, TableVersion, Tag, + WrappedCollection, }; use crate::parser::ParserError; @@ -170,6 +171,10 @@ pub struct CreateTableBuilder { pub initialize: Option, /// Whether operations require a user identity. pub require_user: bool, + /// Redshift `DISTSTYLE` option. + pub diststyle: Option, + /// Redshift `DISTKEY` option. + pub distkey: Option, } impl CreateTableBuilder { @@ -229,6 +234,8 @@ impl CreateTableBuilder { refresh_mode: None, initialize: None, require_user: false, + diststyle: None, + distkey: None, } } /// Set `OR REPLACE` for the CREATE TABLE statement. @@ -504,6 +511,16 @@ impl CreateTableBuilder { self.require_user = require_user; self } + /// Set Redshift `DISTSTYLE` option. + pub fn diststyle(mut self, diststyle: Option) -> Self { + self.diststyle = diststyle; + self + } + /// Set Redshift `DISTKEY` option. + pub fn distkey(mut self, distkey: Option) -> Self { + self.distkey = distkey; + self + } /// Consume the builder and produce a `CreateTable`. pub fn build(self) -> CreateTable { CreateTable { @@ -560,6 +577,8 @@ impl CreateTableBuilder { refresh_mode: self.refresh_mode, initialize: self.initialize, require_user: self.require_user, + diststyle: self.diststyle, + distkey: self.distkey, } } } @@ -635,6 +654,8 @@ impl From for CreateTableBuilder { refresh_mode: table.refresh_mode, initialize: table.initialize, require_user: table.require_user, + diststyle: table.diststyle, + distkey: table.distkey, } } } diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 97cc61935d..6a2f9bd96d 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -70,16 +70,17 @@ pub use self::ddl::{ ConstraintCharacteristics, CreateConnector, CreateDomain, CreateExtension, CreateFunction, CreateIndex, CreateOperator, CreateOperatorClass, CreateOperatorFamily, CreatePolicy, CreatePolicyCommand, CreatePolicyType, CreateTable, CreateTrigger, CreateView, Deduplicate, - DeferrableInitial, DropBehavior, DropExtension, DropFunction, DropOperator, DropOperatorClass, - DropOperatorFamily, DropOperatorSignature, DropPolicy, DropTrigger, ForValues, GeneratedAs, - GeneratedExpressionMode, IdentityParameters, IdentityProperty, IdentityPropertyFormatKind, - IdentityPropertyKind, IdentityPropertyOrder, IndexColumn, IndexOption, IndexType, - KeyOrIndexDisplay, Msck, NullsDistinctOption, OperatorArgTypes, OperatorClassItem, - OperatorFamilyDropItem, OperatorFamilyItem, OperatorOption, OperatorPurpose, Owner, Partition, - PartitionBoundValue, ProcedureParam, ReferentialAction, RenameTableNameKind, ReplicaIdentity, - TagsColumnOption, TriggerObjectKind, Truncate, UserDefinedTypeCompositeAttributeDef, - UserDefinedTypeInternalLength, UserDefinedTypeRangeOption, UserDefinedTypeRepresentation, - UserDefinedTypeSqlDefinitionOption, UserDefinedTypeStorage, ViewColumnDef, + DeferrableInitial, DistStyle, DropBehavior, DropExtension, DropFunction, DropOperator, + DropOperatorClass, DropOperatorFamily, DropOperatorSignature, DropPolicy, DropTrigger, + ForValues, GeneratedAs, GeneratedExpressionMode, IdentityParameters, IdentityProperty, + IdentityPropertyFormatKind, IdentityPropertyKind, IdentityPropertyOrder, IndexColumn, + IndexOption, IndexType, KeyOrIndexDisplay, Msck, NullsDistinctOption, OperatorArgTypes, + OperatorClassItem, OperatorFamilyDropItem, OperatorFamilyItem, OperatorOption, OperatorPurpose, + Owner, Partition, PartitionBoundValue, ProcedureParam, ReferentialAction, RenameTableNameKind, + ReplicaIdentity, TagsColumnOption, TriggerObjectKind, Truncate, + UserDefinedTypeCompositeAttributeDef, UserDefinedTypeInternalLength, + UserDefinedTypeRangeOption, UserDefinedTypeRepresentation, UserDefinedTypeSqlDefinitionOption, + UserDefinedTypeStorage, ViewColumnDef, }; pub use self::dml::{ Delete, Insert, Merge, MergeAction, MergeClause, MergeClauseKind, MergeInsertExpr, diff --git a/src/ast/spans.rs b/src/ast/spans.rs index dd62c5ba1e..7c07519765 100644 --- a/src/ast/spans.rs +++ b/src/ast/spans.rs @@ -582,6 +582,8 @@ impl Spanned for CreateTable { refresh_mode: _, initialize: _, require_user: _, + diststyle: _, // enum, no span + distkey: _, // Ident, todo } = self; union_spans( diff --git a/src/keywords.rs b/src/keywords.rs index 80f679c07c..37a8222702 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -335,7 +335,9 @@ define_keywords!( DISCONNECT, DISTINCT, DISTINCTROW, + DISTKEY, DISTRIBUTE, + DISTSTYLE, DIV, DO, DOMAIN, @@ -379,6 +381,7 @@ define_keywords!( ESCAPE, ESCAPED, ESTIMATE, + EVEN, EVENT, EVERY, EVOLVE, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 75450f75d0..2749969c0e 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -8097,6 +8097,23 @@ impl<'a> Parser<'a> { } } + /// Parse Redshift `DISTSTYLE { AUTO | EVEN | KEY | ALL }`. + /// + /// See + fn parse_dist_style(&mut self) -> Result { + let token = self.next_token(); + match &token.token { + Token::Word(w) => match w.keyword { + Keyword::AUTO => Ok(DistStyle::Auto), + Keyword::EVEN => Ok(DistStyle::Even), + Keyword::KEY => Ok(DistStyle::Key), + Keyword::ALL => Ok(DistStyle::All), + _ => self.expected("AUTO, EVEN, KEY, or ALL", token), + }, + _ => self.expected("AUTO, EVEN, KEY, or ALL", token), + } + } + /// Parse Hive formats. pub fn parse_hive_formats(&mut self) -> Result, ParserError> { let mut hive_format: Option = None; @@ -8367,6 +8384,21 @@ impl<'a> Parser<'a> { let strict = self.parse_keyword(Keyword::STRICT); + // Redshift: DISTSTYLE, DISTKEY + let diststyle = if self.parse_keyword(Keyword::DISTSTYLE) { + Some(self.parse_dist_style()?) + } else { + None + }; + let distkey = if self.parse_keyword(Keyword::DISTKEY) { + self.expect_token(&Token::LParen)?; + let column = self.parse_identifier()?; + self.expect_token(&Token::RParen)?; + Some(column) + } else { + None + }; + // Parse optional `AS ( query )` let query = if self.parse_keyword(Keyword::AS) { Some(self.parse_query()?) @@ -8406,6 +8438,8 @@ impl<'a> Parser<'a> { .table_options(create_table_config.table_options) .primary_key(primary_key) .strict(strict) + .diststyle(diststyle) + .distkey(distkey) .build()) } diff --git a/tests/sqlparser_duckdb.rs b/tests/sqlparser_duckdb.rs index a061876dfe..0512053a46 100644 --- a/tests/sqlparser_duckdb.rs +++ b/tests/sqlparser_duckdb.rs @@ -788,6 +788,8 @@ fn test_duckdb_union_datatype() { refresh_mode: None, initialize: None, require_user: Default::default(), + diststyle: Default::default(), + distkey: Default::default(), }), stmt ); diff --git a/tests/sqlparser_mssql.rs b/tests/sqlparser_mssql.rs index 9033efe008..aa31b6327d 100644 --- a/tests/sqlparser_mssql.rs +++ b/tests/sqlparser_mssql.rs @@ -2006,6 +2006,8 @@ fn parse_create_table_with_valid_options() { refresh_mode: None, initialize: None, require_user: false, + diststyle: None, + distkey: None, }) ); } @@ -2174,6 +2176,8 @@ fn parse_create_table_with_identity_column() { refresh_mode: None, initialize: None, require_user: false, + diststyle: None, + distkey: None, }), ); } diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 434c5fd7b4..7dd624a275 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -6418,6 +6418,8 @@ fn parse_trigger_related_functions() { refresh_mode: None, initialize: None, require_user: false, + diststyle: None, + distkey: None, } ); diff --git a/tests/sqlparser_redshift.rs b/tests/sqlparser_redshift.rs index 90652ff48f..03dfda2c04 100644 --- a/tests/sqlparser_redshift.rs +++ b/tests/sqlparser_redshift.rs @@ -452,3 +452,18 @@ fn parse_vacuum() { _ => unreachable!(), } } + +#[test] +fn test_create_table_diststyle_distkey() { + redshift().verified_stmt( + "CREATE TEMPORARY TABLE tmp_sbk_summary_pp DISTSTYLE KEY DISTKEY(bet_id) AS SELECT 1 AS bet_id", + ); +} + +#[test] +fn test_create_table_diststyle() { + redshift().verified_stmt("CREATE TABLE t1 (c1 INT) DISTSTYLE AUTO"); + redshift().verified_stmt("CREATE TABLE t1 (c1 INT) DISTSTYLE EVEN"); + redshift().verified_stmt("CREATE TABLE t1 (c1 INT) DISTSTYLE KEY DISTKEY(c1)"); + redshift().verified_stmt("CREATE TABLE t1 (c1 INT) DISTSTYLE ALL"); +} From 255e50cfaf56d5eb20e3dc05cfbc618ecfd4b1eb Mon Sep 17 00:00:00 2001 From: xitep Date: Wed, 4 Mar 2026 11:02:34 +0100 Subject: [PATCH 101/121] Allow INSERT columns to be qualified (#2260) --- src/ast/dml.rs | 2 +- src/ast/spans.rs | 2 +- src/parser/mod.rs | 3 +- tests/sqlparser_common.rs | 5 ++- tests/sqlparser_mysql.rs | 67 +++++++++++++++++++++++++++++-------- tests/sqlparser_oracle.rs | 15 +++++++++ tests/sqlparser_postgres.rs | 24 ++++++------- 7 files changed, 88 insertions(+), 30 deletions(-) diff --git a/src/ast/dml.rs b/src/ast/dml.rs index e2c48885f6..446d44b205 100644 --- a/src/ast/dml.rs +++ b/src/ast/dml.rs @@ -61,7 +61,7 @@ pub struct Insert { /// `table_name foo` (for Oracle) pub table_alias: Option, /// COLUMNS - pub columns: Vec, + pub columns: Vec, /// Overwrite (Hive) pub overwrite: bool, /// A SQL query that specifies what to insert diff --git a/src/ast/spans.rs b/src/ast/spans.rs index 7c07519765..3f73af4087 100644 --- a/src/ast/spans.rs +++ b/src/ast/spans.rs @@ -1335,7 +1335,7 @@ impl Spanned for Insert { core::iter::once(insert_token.0.span) .chain(core::iter::once(table.span())) .chain(table_alias.iter().map(|k| k.alias.span)) - .chain(columns.iter().map(|i| i.span)) + .chain(columns.iter().map(|i| i.span())) .chain(source.as_ref().map(|q| q.span())) .chain(assignments.iter().map(|i| i.span())) .chain(partitioned.iter().flat_map(|i| i.iter().map(|k| k.span()))) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 2749969c0e..7764fab216 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -17319,7 +17319,8 @@ impl<'a> Parser<'a> { (vec![], None, vec![], None, None, vec![]) } else { let (columns, partitioned, after_columns) = if !self.peek_subquery_start() { - let columns = self.parse_parenthesized_column_list(Optional, is_mysql)?; + let columns = + self.parse_parenthesized_qualified_column_list(Optional, is_mysql)?; let partitioned = self.parse_insert_partition()?; // Hive allows you to specify columns after partitions as well if you want. diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 7bf2764078..a59e3b96e4 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -153,7 +153,10 @@ fn parse_insert_values() { assert_eq!(table_name.to_string(), expected_table_name); assert_eq!(columns.len(), expected_columns.len()); for (index, column) in columns.iter().enumerate() { - assert_eq!(column, &Ident::new(expected_columns[index].clone())); + assert_eq!( + column, + &ObjectName::from(Ident::new(expected_columns[index].clone())) + ); } match *source.body { SetExpr::Values(Values { diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index 541f7df6ea..6c59997c32 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -1913,7 +1913,13 @@ fn parse_simple_insert() { TableObject::TableName(ObjectName::from(vec![Ident::new("tasks")])), table_name ); - assert_eq!(vec![Ident::new("title"), Ident::new("priority")], columns); + assert_eq!( + vec![ + ObjectName::from(Ident::new("title")), + ObjectName::from(Ident::new("priority")) + ], + columns + ); assert!(on.is_none()); assert_eq!( Some(Box::new(Query { @@ -1978,7 +1984,13 @@ fn parse_ignore_insert() { TableObject::TableName(ObjectName::from(vec![Ident::new("tasks")])), table_name ); - assert_eq!(vec![Ident::new("title"), Ident::new("priority")], columns); + assert_eq!( + vec![ + ObjectName::from(Ident::new("title")), + ObjectName::from(Ident::new("priority")) + ], + columns + ); assert!(on.is_none()); assert!(ignore); assert_eq!( @@ -2028,7 +2040,13 @@ fn parse_priority_insert() { TableObject::TableName(ObjectName::from(vec![Ident::new("tasks")])), table_name ); - assert_eq!(vec![Ident::new("title"), Ident::new("priority")], columns); + assert_eq!( + vec![ + ObjectName::from(Ident::new("title")), + ObjectName::from(Ident::new("priority")) + ], + columns + ); assert!(on.is_none()); assert_eq!(priority, Some(HighPriority)); assert_eq!( @@ -2075,7 +2093,13 @@ fn parse_priority_insert() { TableObject::TableName(ObjectName::from(vec![Ident::new("tasks")])), table_name ); - assert_eq!(vec![Ident::new("title"), Ident::new("priority")], columns); + assert_eq!( + vec![ + ObjectName::from(Ident::new("title")), + ObjectName::from(Ident::new("priority")) + ], + columns + ); assert!(on.is_none()); assert_eq!(priority, Some(LowPriority)); assert_eq!( @@ -2123,7 +2147,10 @@ fn parse_insert_as() { TableObject::TableName(ObjectName::from(vec![Ident::with_quote('`', "table")])), table_name ); - assert_eq!(vec![Ident::with_quote('`', "date")], columns); + assert_eq!( + vec![ObjectName::from(Ident::with_quote('`', "date"))], + columns + ); let insert_alias = insert_alias.unwrap(); assert_eq!( @@ -2176,7 +2203,10 @@ fn parse_insert_as() { table_name ); assert_eq!( - vec![Ident::with_quote('`', "id"), Ident::with_quote('`', "date")], + vec![ + ObjectName::from(Ident::with_quote('`', "id")), + ObjectName::from(Ident::with_quote('`', "date")) + ], columns ); let insert_alias = insert_alias.unwrap(); @@ -2238,7 +2268,13 @@ fn parse_replace_insert() { TableObject::TableName(ObjectName::from(vec![Ident::new("tasks")])), table_name ); - assert_eq!(vec![Ident::new("title"), Ident::new("priority")], columns); + assert_eq!( + vec![ + ObjectName::from(Ident::new("title")), + ObjectName::from(Ident::new("priority")) + ], + columns + ); assert!(on.is_none()); assert!(replace_into); assert_eq!(priority, Some(Delayed)); @@ -2332,12 +2368,12 @@ fn parse_insert_with_on_duplicate_update() { ); assert_eq!( vec![ - Ident::new("name"), - Ident::new("description"), - Ident::new("perm_create"), - Ident::new("perm_read"), - Ident::new("perm_update"), - Ident::new("perm_delete") + ObjectName::from(Ident::new("name")), + ObjectName::from(Ident::new("description")), + ObjectName::from(Ident::new("perm_create")), + ObjectName::from(Ident::new("perm_read")), + ObjectName::from(Ident::new("perm_update")), + ObjectName::from(Ident::new("perm_delete")) ], columns ); @@ -2651,7 +2687,10 @@ fn parse_insert_with_numeric_prefix_column_name() { TableObject::TableName(ObjectName::from(vec![Ident::new("s1"), Ident::new("t1")])), table_name ); - assert_eq!(vec![Ident::new("123col_$@length123")], columns); + assert_eq!( + vec![ObjectName::from(Ident::new("123col_$@length123"))], + columns + ); } _ => unreachable!(), } diff --git a/tests/sqlparser_oracle.rs b/tests/sqlparser_oracle.rs index b34a9308d9..35f083111e 100644 --- a/tests/sqlparser_oracle.rs +++ b/tests/sqlparser_oracle.rs @@ -475,6 +475,21 @@ fn test_insert_with_table_alias() { VALUES (1, 2, 3)", ); verify_table_name_with_alias(&stmt, "foo_t", "t"); + + let stmt = + oracle_dialect.verified_stmt("INSERT INTO foo_t t (t.id, t.val) SELECT 1, 2 FROM dual"); + verify_table_name_with_alias(&stmt, "foo_t", "t"); + if let Statement::Insert(Insert { columns, .. }) = stmt { + assert_eq!( + vec![ + ObjectName::from(vec![Ident::new("t"), Ident::new("id")]), + ObjectName::from(vec![Ident::new("t"), Ident::new("val")]) + ], + columns + ); + } else { + panic!("not an insert statement"); + }; } #[test] diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 7dd624a275..ecf7b6bfc0 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -5493,16 +5493,16 @@ fn test_simple_postgres_insert_with_alias() { } }), columns: vec![ - Ident { + ObjectName::from(Ident { value: "id".to_string(), quote_style: None, span: Span::empty(), - }, - Ident { + }), + ObjectName::from(Ident { value: "a".to_string(), quote_style: None, span: Span::empty(), - } + }) ], overwrite: false, source: Some(Box::new(Query { @@ -5573,16 +5573,16 @@ fn test_simple_postgres_insert_with_alias() { } }), columns: vec![ - Ident { + ObjectName::from(Ident { value: "id".to_string(), quote_style: None, span: Span::empty(), - }, - Ident { + }), + ObjectName::from(Ident { value: "a".to_string(), quote_style: None, span: Span::empty(), - } + }) ], overwrite: false, source: Some(Box::new(Query { @@ -5655,16 +5655,16 @@ fn test_simple_insert_with_quoted_alias() { } }), columns: vec![ - Ident { + ObjectName::from(Ident { value: "id".to_string(), quote_style: None, span: Span::empty(), - }, - Ident { + }), + ObjectName::from(Ident { value: "a".to_string(), quote_style: None, span: Span::empty(), - } + }) ], overwrite: false, source: Some(Box::new(Query { From 915448cf335863da73dee8e3fe0e3275189d703c Mon Sep 17 00:00:00 2001 From: Luca Cappelletti Date: Wed, 4 Mar 2026 14:24:57 +0100 Subject: [PATCH 102/121] Add support for parsing COPY statements from STDIN without a semicolon (#2245) --- src/parser/mod.rs | 13 +++++---- tests/sqlparser_postgres.rs | 56 +++++++++++++++++++++++++++++++++++++ 2 files changed, 63 insertions(+), 6 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 7764fab216..688d1d0273 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -11081,12 +11081,13 @@ impl<'a> Parser<'a> { while let Some(opt) = self.maybe_parse(|parser| parser.parse_copy_legacy_option())? { legacy_options.push(opt); } - let values = if let CopyTarget::Stdin = target { - self.expect_token(&Token::SemiColon)?; - self.parse_tsv() - } else { - vec![] - }; + let values = + if matches!(target, CopyTarget::Stdin) && self.peek_token_ref().token != Token::EOF { + self.expect_token(&Token::SemiColon)?; + self.parse_tsv() + } else { + vec![] + }; Ok(Statement::Copy { source, to, diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index ecf7b6bfc0..c83860ff5b 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -1123,6 +1123,62 @@ PHP ₱ USD $ pg_and_generic().one_statement_parses_to(sql, ""); } +#[test] +fn parse_copy_from_stdin_without_semicolon() { + let stmt = pg().verified_stmt("COPY bitwise_test FROM STDIN NULL 'null'"); + assert_eq!( + stmt, + Statement::Copy { + source: CopySource::Table { + table_name: ObjectName::from(vec!["bitwise_test".into()]), + columns: vec![], + }, + to: false, + target: CopyTarget::Stdin, + options: vec![], + legacy_options: vec![CopyLegacyOption::Null("null".into())], + values: vec![], + } + ); +} + +#[test] +fn parse_copy_from_stdin_without_semicolon_variants() { + // This covers additional COPY ... FROM STDIN shapes without inline payload. + // `parse_copy_from_stdin_without_semicolon` asserts the legacy NULL option details. + let cases = [ + "COPY varbit_table FROM STDIN", + "COPY bit_table FROM STDIN", + "COPY copytest2 (test) FROM STDIN", + "COPY copytest3 FROM STDIN CSV HEADER", + "COPY copytest4 FROM STDIN (HEADER)", + "COPY parted_copytest FROM STDIN", + "COPY tab_progress_reporting FROM STDIN", + "COPY oversized_column_default FROM STDIN", + "COPY x (a, b, c, d, e) FROM STDIN", + "COPY header_copytest (c, a) FROM STDIN", + "COPY atest5 (two) FROM STDIN", + "COPY main_table (a, b) FROM STDIN", + ]; + + for sql in cases { + match pg().verified_stmt(sql) { + Statement::Copy { + to: false, + target: CopyTarget::Stdin, + values, + .. + } => { + assert!( + values.is_empty(), + "expected no inline COPY payload for `{sql}`" + ); + } + _ => panic!("expected COPY ... FROM STDIN statement for `{sql}`"), + } + } +} + #[test] fn test_copy_from() { let stmt = pg().verified_stmt("COPY users FROM 'data.csv'"); From 6691f31b753708bd5952cc453b65440d7177a9bb Mon Sep 17 00:00:00 2001 From: Andriy Romanov Date: Wed, 4 Mar 2026 05:48:33 -0800 Subject: [PATCH 103/121] Fix credentials parsing for redshift (#2262) --- src/ast/mod.rs | 4 ++++ src/parser/mod.rs | 4 ++++ tests/sqlparser_redshift.rs | 7 +++++++ 3 files changed, 15 insertions(+) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 6a2f9bd96d..e201f78420 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -9217,6 +9217,9 @@ pub enum CopyLegacyOption { TruncateColumns, /// ZSTD Zstd, + /// Redshift `CREDENTIALS 'auth-args'` + /// + Credentials(String), } impl fmt::Display for CopyLegacyOption { @@ -9327,6 +9330,7 @@ impl fmt::Display for CopyLegacyOption { } TruncateColumns => write!(f, "TRUNCATECOLUMNS"), Zstd => write!(f, "ZSTD"), + Credentials(s) => write!(f, "CREDENTIALS '{}'", value::escape_single_quote_string(s)), } } } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 688d1d0273..75b5bfa762 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -11177,6 +11177,7 @@ impl<'a> Parser<'a> { Keyword::BZIP2, Keyword::CLEANPATH, Keyword::COMPUPDATE, + Keyword::CREDENTIALS, Keyword::CSV, Keyword::DATEFORMAT, Keyword::DELIMITER, @@ -11234,6 +11235,9 @@ impl<'a> Parser<'a> { }; CopyLegacyOption::CompUpdate { preset, enabled } } + Some(Keyword::CREDENTIALS) => { + CopyLegacyOption::Credentials(self.parse_literal_string()?) + } Some(Keyword::CSV) => CopyLegacyOption::Csv({ let mut opts = vec![]; while let Some(opt) = diff --git a/tests/sqlparser_redshift.rs b/tests/sqlparser_redshift.rs index 03dfda2c04..243b0646c6 100644 --- a/tests/sqlparser_redshift.rs +++ b/tests/sqlparser_redshift.rs @@ -467,3 +467,10 @@ fn test_create_table_diststyle() { redshift().verified_stmt("CREATE TABLE t1 (c1 INT) DISTSTYLE KEY DISTKEY(c1)"); redshift().verified_stmt("CREATE TABLE t1 (c1 INT) DISTSTYLE ALL"); } + +#[test] +fn test_copy_credentials() { + redshift().verified_stmt( + "COPY t1 FROM 's3://bucket/file.csv' CREDENTIALS 'aws_access_key_id=AK;aws_secret_access_key=SK' CSV", + ); +} From 64f4b1fa2b47f957b1c8d39c3c54b0380daa32b8 Mon Sep 17 00:00:00 2001 From: Andriy Romanov Date: Thu, 5 Mar 2026 05:45:41 -0800 Subject: [PATCH 104/121] Added SORTKEY keyword parsing for redshift queries (#2261) --- src/ast/ddl.rs | 18 +++++++++++++++- src/ast/helpers/stmt_create_table.rs | 14 ++++++++++-- src/ast/spans.rs | 6 ++++-- src/keywords.rs | 1 + src/parser/mod.rs | 32 +++++++++++++++++++++++++--- tests/sqlparser_duckdb.rs | 1 + tests/sqlparser_mssql.rs | 2 ++ tests/sqlparser_postgres.rs | 1 + tests/sqlparser_redshift.rs | 26 ++++++++++++++++++++++ 9 files changed, 93 insertions(+), 8 deletions(-) diff --git a/src/ast/ddl.rs b/src/ast/ddl.rs index 895959a3db..6bea28cb74 100644 --- a/src/ast/ddl.rs +++ b/src/ast/ddl.rs @@ -457,6 +457,12 @@ pub enum AlterTableOperation { }, /// Remove the clustering key from the table. DropClusteringKey, + /// Redshift `ALTER SORTKEY (column_list)` + /// + AlterSortKey { + /// Column references in the sort key. + columns: Vec, + }, /// Suspend background reclustering operations. SuspendRecluster, /// Resume background reclustering operations. @@ -993,6 +999,10 @@ impl fmt::Display for AlterTableOperation { write!(f, "DROP CLUSTERING KEY")?; Ok(()) } + AlterTableOperation::AlterSortKey { columns } => { + write!(f, "ALTER SORTKEY({})", display_comma_separated(columns))?; + Ok(()) + } AlterTableOperation::SuspendRecluster => { write!(f, "SUSPEND RECLUSTER")?; Ok(()) @@ -3037,7 +3047,10 @@ pub struct CreateTable { pub diststyle: Option, /// Redshift `DISTKEY` option /// - pub distkey: Option, + pub distkey: Option, + /// Redshift `SORTKEY` option + /// + pub sortkey: Option>, } impl fmt::Display for CreateTable { @@ -3342,6 +3355,9 @@ impl fmt::Display for CreateTable { if let Some(distkey) = &self.distkey { write!(f, " DISTKEY({distkey})")?; } + if let Some(sortkey) = &self.sortkey { + write!(f, " SORTKEY({})", display_comma_separated(sortkey))?; + } if let Some(query) = &self.query { write!(f, " AS {query}")?; } diff --git a/src/ast/helpers/stmt_create_table.rs b/src/ast/helpers/stmt_create_table.rs index 258f9c8353..5963e94047 100644 --- a/src/ast/helpers/stmt_create_table.rs +++ b/src/ast/helpers/stmt_create_table.rs @@ -174,7 +174,9 @@ pub struct CreateTableBuilder { /// Redshift `DISTSTYLE` option. pub diststyle: Option, /// Redshift `DISTKEY` option. - pub distkey: Option, + pub distkey: Option, + /// Redshift `SORTKEY` option. + pub sortkey: Option>, } impl CreateTableBuilder { @@ -236,6 +238,7 @@ impl CreateTableBuilder { require_user: false, diststyle: None, distkey: None, + sortkey: None, } } /// Set `OR REPLACE` for the CREATE TABLE statement. @@ -517,10 +520,15 @@ impl CreateTableBuilder { self } /// Set Redshift `DISTKEY` option. - pub fn distkey(mut self, distkey: Option) -> Self { + pub fn distkey(mut self, distkey: Option) -> Self { self.distkey = distkey; self } + /// Set Redshift `SORTKEY` option. + pub fn sortkey(mut self, sortkey: Option>) -> Self { + self.sortkey = sortkey; + self + } /// Consume the builder and produce a `CreateTable`. pub fn build(self) -> CreateTable { CreateTable { @@ -579,6 +587,7 @@ impl CreateTableBuilder { require_user: self.require_user, diststyle: self.diststyle, distkey: self.distkey, + sortkey: self.sortkey, } } } @@ -656,6 +665,7 @@ impl From for CreateTableBuilder { require_user: table.require_user, diststyle: table.diststyle, distkey: table.distkey, + sortkey: table.sortkey, } } } diff --git a/src/ast/spans.rs b/src/ast/spans.rs index 3f73af4087..57d57b249a 100644 --- a/src/ast/spans.rs +++ b/src/ast/spans.rs @@ -582,8 +582,9 @@ impl Spanned for CreateTable { refresh_mode: _, initialize: _, require_user: _, - diststyle: _, // enum, no span - distkey: _, // Ident, todo + diststyle: _, + distkey: _, + sortkey: _, } = self; union_spans( @@ -1193,6 +1194,7 @@ impl Spanned for AlterTableOperation { AlterTableOperation::OwnerTo { .. } => Span::empty(), AlterTableOperation::ClusterBy { exprs } => union_spans(exprs.iter().map(|e| e.span())), AlterTableOperation::DropClusteringKey => Span::empty(), + AlterTableOperation::AlterSortKey { .. } => Span::empty(), AlterTableOperation::SuspendRecluster => Span::empty(), AlterTableOperation::ResumeRecluster => Span::empty(), AlterTableOperation::Refresh { .. } => Span::empty(), diff --git a/src/keywords.rs b/src/keywords.rs index 37a8222702..9ea85fd3a7 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -953,6 +953,7 @@ define_keywords!( SOME, SORT, SORTED, + SORTKEY, SOURCE, SPATIAL, SPECIFIC, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 75b5bfa762..eaaa95ec8b 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -8384,7 +8384,7 @@ impl<'a> Parser<'a> { let strict = self.parse_keyword(Keyword::STRICT); - // Redshift: DISTSTYLE, DISTKEY + // Redshift: DISTSTYLE, DISTKEY, SORTKEY let diststyle = if self.parse_keyword(Keyword::DISTSTYLE) { Some(self.parse_dist_style()?) } else { @@ -8392,9 +8392,17 @@ impl<'a> Parser<'a> { }; let distkey = if self.parse_keyword(Keyword::DISTKEY) { self.expect_token(&Token::LParen)?; - let column = self.parse_identifier()?; + let expr = self.parse_expr()?; self.expect_token(&Token::RParen)?; - Some(column) + Some(expr) + } else { + None + }; + let sortkey = if self.parse_keyword(Keyword::SORTKEY) { + self.expect_token(&Token::LParen)?; + let columns = self.parse_comma_separated(|p| p.parse_expr())?; + self.expect_token(&Token::RParen)?; + Some(columns) } else { None }; @@ -8440,6 +8448,7 @@ impl<'a> Parser<'a> { .strict(strict) .diststyle(diststyle) .distkey(distkey) + .sortkey(sortkey) .build()) } @@ -9979,6 +9988,18 @@ impl<'a> Parser<'a> { }) } + /// Parse Redshift `ALTER SORTKEY (column_list)`. + /// + /// See + fn parse_alter_sort_key(&mut self) -> Result { + self.expect_keyword_is(Keyword::ALTER)?; + self.expect_keyword_is(Keyword::SORTKEY)?; + self.expect_token(&Token::LParen)?; + let columns = self.parse_comma_separated(|p| p.parse_expr())?; + self.expect_token(&Token::RParen)?; + Ok(AlterTableOperation::AlterSortKey { columns }) + } + /// Parse a single `ALTER TABLE` operation and return an `AlterTableOperation`. pub fn parse_alter_table_operation(&mut self) -> Result { let operation = if self.parse_keyword(Keyword::ADD) { @@ -10257,6 +10278,11 @@ impl<'a> Parser<'a> { column_position, } } else if self.parse_keyword(Keyword::ALTER) { + if self.peek_keyword(Keyword::SORTKEY) { + self.prev_token(); + return self.parse_alter_sort_key(); + } + let _ = self.parse_keyword(Keyword::COLUMN); // [ COLUMN ] let column_name = self.parse_identifier()?; let is_postgresql = dialect_of!(self is PostgreSqlDialect); diff --git a/tests/sqlparser_duckdb.rs b/tests/sqlparser_duckdb.rs index 0512053a46..33a6cb4565 100644 --- a/tests/sqlparser_duckdb.rs +++ b/tests/sqlparser_duckdb.rs @@ -790,6 +790,7 @@ fn test_duckdb_union_datatype() { require_user: Default::default(), diststyle: Default::default(), distkey: Default::default(), + sortkey: Default::default(), }), stmt ); diff --git a/tests/sqlparser_mssql.rs b/tests/sqlparser_mssql.rs index aa31b6327d..da6ecace66 100644 --- a/tests/sqlparser_mssql.rs +++ b/tests/sqlparser_mssql.rs @@ -2008,6 +2008,7 @@ fn parse_create_table_with_valid_options() { require_user: false, diststyle: None, distkey: None, + sortkey: None, }) ); } @@ -2178,6 +2179,7 @@ fn parse_create_table_with_identity_column() { require_user: false, diststyle: None, distkey: None, + sortkey: None, }), ); } diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index c83860ff5b..60aca14b3c 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -6476,6 +6476,7 @@ fn parse_trigger_related_functions() { require_user: false, diststyle: None, distkey: None, + sortkey: None, } ); diff --git a/tests/sqlparser_redshift.rs b/tests/sqlparser_redshift.rs index 243b0646c6..184aa5b69a 100644 --- a/tests/sqlparser_redshift.rs +++ b/tests/sqlparser_redshift.rs @@ -474,3 +474,29 @@ fn test_copy_credentials() { "COPY t1 FROM 's3://bucket/file.csv' CREDENTIALS 'aws_access_key_id=AK;aws_secret_access_key=SK' CSV", ); } + +#[test] +fn test_create_table_sortkey() { + redshift().verified_stmt("CREATE TABLE t1 (c1 INT, c2 INT, c3 TIMESTAMP) SORTKEY(c3)"); + redshift().verified_stmt("CREATE TABLE t1 (c1 INT, c2 INT) SORTKEY(c1, c2)"); +} + +#[test] +fn test_create_table_distkey_sortkey_with_ctas() { + redshift().verified_stmt( + "CREATE TABLE t1 DISTKEY(1) SORTKEY(1, 3) AS SELECT eventid, venueid, dateid, eventname FROM event", + ); +} + +#[test] +fn test_create_table_diststyle_distkey_sortkey() { + redshift().verified_stmt( + "CREATE TABLE t1 (c1 INT, c2 INT) DISTSTYLE KEY DISTKEY(c1) SORTKEY(c1, c2)", + ); +} + +#[test] +fn test_alter_table_alter_sortkey() { + redshift().verified_stmt("ALTER TABLE users ALTER SORTKEY(created_at)"); + redshift().verified_stmt("ALTER TABLE users ALTER SORTKEY(c1, c2)"); +} From 13b88a3fe86f4a64790b3ece3dcea675bd415cff Mon Sep 17 00:00:00 2001 From: Andriy Romanov Date: Mon, 9 Mar 2026 02:15:25 -0700 Subject: [PATCH 105/121] Fixed transaction handling for snowflake (#2263) --- src/dialect/snowflake.rs | 11 +++++++++++ tests/sqlparser_snowflake.rs | 21 +++++++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/src/dialect/snowflake.rs b/src/dialect/snowflake.rs index f756c4159e..6c160a9dd8 100644 --- a/src/dialect/snowflake.rs +++ b/src/dialect/snowflake.rs @@ -247,6 +247,17 @@ impl Dialect for SnowflakeDialect { fn parse_statement(&self, parser: &mut Parser) -> Option> { if parser.parse_keyword(Keyword::BEGIN) { + // Snowflake supports both `BEGIN TRANSACTION` and `BEGIN ... END` blocks. + // If the next keyword indicates a transaction statement, let the + // standard parse_begin() handle it. + if parser + .peek_one_of_keywords(&[Keyword::TRANSACTION, Keyword::WORK, Keyword::NAME]) + .is_some() + || matches!(parser.peek_token_ref().token, Token::SemiColon | Token::EOF) + { + parser.prev_token(); + return None; + } return Some(parser.parse_begin_exception_end()); } diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index c51cf3bdf1..022c644a40 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -4610,6 +4610,27 @@ END assert_eq!(2, exception[1].statements.len()); } +#[test] +fn test_begin_transaction() { + snowflake().verified_stmt("BEGIN TRANSACTION"); + snowflake().verified_stmt("BEGIN WORK"); + + // BEGIN TRANSACTION with statements + let stmts = snowflake() + .parse_sql_statements("BEGIN TRANSACTION; DROP TABLE IF EXISTS bla; COMMIT") + .unwrap(); + assert_eq!(3, stmts.len()); + + // Bare BEGIN (no TRANSACTION keyword) with statements + let stmts = snowflake() + .parse_sql_statements("BEGIN; DROP TABLE IF EXISTS bla; COMMIT") + .unwrap(); + assert_eq!(3, stmts.len()); + + // Bare BEGIN at EOF (no semicolon, no TRANSACTION keyword) + snowflake().verified_stmt("BEGIN"); +} + #[test] fn test_snowflake_fetch_clause_syntax() { let canonical = "SELECT c1 FROM fetch_test FETCH FIRST 2 ROWS ONLY"; From b6003eb98e721943c711672571d7cf252c42ae2b Mon Sep 17 00:00:00 2001 From: Andriy Romanov Date: Mon, 9 Mar 2026 02:27:03 -0700 Subject: [PATCH 106/121] Fixed COPY GRANTS clause parsing for snowflake (#2267) --- src/ast/ddl.rs | 6 ++++++ src/parser/mod.rs | 2 ++ tests/sqlparser_common.rs | 6 ++++++ tests/sqlparser_snowflake.rs | 11 +++++++++++ 4 files changed, 25 insertions(+) diff --git a/src/ast/ddl.rs b/src/ast/ddl.rs index 6bea28cb74..d6da8368eb 100644 --- a/src/ast/ddl.rs +++ b/src/ast/ddl.rs @@ -4293,6 +4293,9 @@ pub struct CreateView { pub if_not_exists: bool, /// if true, has SQLite `TEMP` or `TEMPORARY` clause pub temporary: bool, + /// Snowflake: `COPY GRANTS` clause + /// + pub copy_grants: bool, /// if not None, has Clickhouse `TO` clause, specify the table into which to insert results /// pub to: Option, @@ -4336,6 +4339,9 @@ impl fmt::Display for CreateView { .map(|to| format!(" TO {to}")) .unwrap_or_default() )?; + if self.copy_grants { + write!(f, " COPY GRANTS")?; + } if !self.columns.is_empty() { write!(f, " ({})", display_comma_separated(&self.columns))?; } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index eaaa95ec8b..f9432f867f 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -6375,6 +6375,7 @@ impl<'a> Parser<'a> { let name_before_not_exists = !if_not_exists_first && self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); let if_not_exists = if_not_exists_first || name_before_not_exists; + let copy_grants = self.parse_keywords(&[Keyword::COPY, Keyword::GRANTS]); // Many dialects support `OR ALTER` right after `CREATE`, but we don't (yet). // ANSI SQL and Postgres support RECURSIVE here, but we don't support it either. let columns = self.parse_view_columns()?; @@ -6442,6 +6443,7 @@ impl<'a> Parser<'a> { with_no_schema_binding, if_not_exists, temporary, + copy_grants, to, params: create_view_params, name_before_not_exists, diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index a59e3b96e4..08fb6107fc 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -8317,6 +8317,7 @@ fn parse_create_view() { params, name_before_not_exists: _, secure: _, + copy_grants: _, }) => { assert_eq!(or_alter, false); assert_eq!("myschema.myview", name.to_string()); @@ -8435,6 +8436,7 @@ fn parse_create_view_temporary() { params, name_before_not_exists: _, secure: _, + copy_grants: _, }) => { assert_eq!(or_alter, false); assert_eq!("myschema.myview", name.to_string()); @@ -8476,6 +8478,7 @@ fn parse_create_or_replace_view() { params, name_before_not_exists: _, secure: _, + copy_grants: _, }) => { assert_eq!(or_alter, false); assert_eq!("v", name.to_string()); @@ -8521,6 +8524,7 @@ fn parse_create_or_replace_materialized_view() { params, name_before_not_exists: _, secure: _, + copy_grants: _, }) => { assert_eq!(or_alter, false); assert_eq!("v", name.to_string()); @@ -8562,6 +8566,7 @@ fn parse_create_materialized_view() { params, name_before_not_exists: _, secure: _, + copy_grants: _, }) => { assert_eq!(or_alter, false); assert_eq!("myschema.myview", name.to_string()); @@ -8603,6 +8608,7 @@ fn parse_create_materialized_view_with_cluster_by() { params, name_before_not_exists: _, secure: _, + copy_grants: _, }) => { assert_eq!(or_alter, false); assert_eq!("myschema.myview", name.to_string()); diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index 022c644a40..265f8a9ae5 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -4671,6 +4671,17 @@ fn test_snowflake_create_view_with_composite_policy_name() { snowflake().verified_stmt(create_view_with_tag); } +#[test] +fn test_snowflake_create_view_copy_grants() { + snowflake().verified_stmt("CREATE OR REPLACE VIEW bla COPY GRANTS AS (SELECT * FROM source)"); + snowflake() + .verified_stmt("CREATE OR REPLACE SECURE VIEW bla COPY GRANTS AS (SELECT * FROM source)"); + // COPY GRANTS with column list + snowflake().verified_stmt( + "CREATE OR REPLACE VIEW bla COPY GRANTS (a, b) AS (SELECT a, b FROM source)", + ); +} + #[test] fn test_snowflake_identifier_function() { // Using IDENTIFIER to reference a column From 3fa71143b5a3bd5c97dc94999e0a1652fefd389f Mon Sep 17 00:00:00 2001 From: Andriy Romanov Date: Mon, 9 Mar 2026 02:28:09 -0700 Subject: [PATCH 107/121] Fixed parsing `OPTIONS(format = 'CSV')` when creating external bigquery table (#2268) --- src/parser/mod.rs | 2 ++ tests/sqlparser_bigquery.rs | 10 ++++++++++ 2 files changed, 12 insertions(+) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index f9432f867f..274449ff76 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -6297,6 +6297,8 @@ impl<'a> Parser<'a> { let table_properties = self.parse_options(Keyword::TBLPROPERTIES)?; let table_options = if !table_properties.is_empty() { CreateTableOptions::TableProperties(table_properties) + } else if let Some(options) = self.maybe_parse_options(Keyword::OPTIONS)? { + CreateTableOptions::Options(options) } else { CreateTableOptions::None }; diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index ce962cb807..a6b0906ffa 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -591,6 +591,16 @@ fn parse_create_table_with_options() { bigquery().verified_stmt(sql); } +#[test] +fn parse_create_external_table_with_options() { + bigquery().verified_stmt( + "CREATE EXTERNAL TABLE dataset_id.table1 (hvr_tx_seq STRING) OPTIONS(format = 'CSV')", + ); + bigquery().verified_stmt( + "CREATE EXTERNAL TABLE dataset_id.table1 (hvr_tx_seq STRING) OPTIONS(format = 'CSV', allow_quoted_newlines = true, encoding = 'UTF8')", + ); +} + #[test] fn parse_nested_data_types() { let sql = "CREATE TABLE table (x STRUCT, b BYTES(42)>, y ARRAY>)"; From d38dd78122236a2186ebfa0e252b6b4dcfc3537c Mon Sep 17 00:00:00 2001 From: Michael Bradshaw Date: Thu, 12 Mar 2026 23:12:44 -0600 Subject: [PATCH 108/121] Add support for PostgreSQL LOCK TABLE (#2273) Co-authored-by: Ifeanyi Ubah --- src/ast/mod.rs | 111 ++++++++++++++++++++++++++++++++++++ src/ast/spans.rs | 2 + src/parser/mod.rs | 64 +++++++++++++++++++++ tests/sqlparser_postgres.rs | 60 +++++++++++++++++++ 4 files changed, 237 insertions(+) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index e201f78420..789bf28200 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -4624,6 +4624,12 @@ pub enum Statement { is_eq: bool, }, /// ```sql + /// LOCK [ TABLE ] [ ONLY ] name [ * ] [, ...] [ IN lockmode MODE ] [ NOWAIT ] + /// ``` + /// + /// See + Lock(Lock), + /// ```sql /// LOCK TABLES [READ [LOCAL] | [LOW_PRIORITY] WRITE] /// ``` /// Note: this is a MySQL-specific statement. See @@ -4847,6 +4853,12 @@ impl From for Statement { } } +impl From for Statement { + fn from(lock: Lock) -> Self { + Statement::Lock(lock) + } +} + impl From for Statement { fn from(msck: ddl::Msck) -> Self { Statement::Msck(msck) @@ -6141,6 +6153,7 @@ impl fmt::Display for Statement { } Ok(()) } + Statement::Lock(lock) => lock.fmt(f), Statement::LockTables { tables } => { write!(f, "LOCK TABLES {}", display_comma_separated(tables)) } @@ -6387,6 +6400,104 @@ impl fmt::Display for TruncateTableTarget { } } +/// A `LOCK` statement. +/// +/// See +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub struct Lock { + /// List of tables to lock. + pub tables: Vec, + /// Lock mode. + pub lock_mode: Option, + /// Whether `NOWAIT` was specified. + pub nowait: bool, +} + +impl fmt::Display for Lock { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "LOCK TABLE {}", display_comma_separated(&self.tables))?; + if let Some(lock_mode) = &self.lock_mode { + write!(f, " IN {lock_mode} MODE")?; + } + if self.nowait { + write!(f, " NOWAIT")?; + } + Ok(()) + } +} + +/// Target of a `LOCK TABLE` command +/// +/// See +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub struct LockTableTarget { + /// Name of the table being locked. + #[cfg_attr(feature = "visitor", visit(with = "visit_relation"))] + pub name: ObjectName, + /// Whether `ONLY` was specified to exclude descendant tables. + pub only: bool, + /// Whether `*` was specified to explicitly include descendant tables. + pub has_asterisk: bool, +} + +impl fmt::Display for LockTableTarget { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + if self.only { + write!(f, "ONLY ")?; + } + write!(f, "{}", self.name)?; + if self.has_asterisk { + write!(f, " *")?; + } + Ok(()) + } +} + +/// PostgreSQL lock modes for `LOCK TABLE`. +/// +/// See +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum LockTableMode { + /// `ACCESS SHARE` + AccessShare, + /// `ROW SHARE` + RowShare, + /// `ROW EXCLUSIVE` + RowExclusive, + /// `SHARE UPDATE EXCLUSIVE` + ShareUpdateExclusive, + /// `SHARE` + Share, + /// `SHARE ROW EXCLUSIVE` + ShareRowExclusive, + /// `EXCLUSIVE` + Exclusive, + /// `ACCESS EXCLUSIVE` + AccessExclusive, +} + +impl fmt::Display for LockTableMode { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let text = match self { + Self::AccessShare => "ACCESS SHARE", + Self::RowShare => "ROW SHARE", + Self::RowExclusive => "ROW EXCLUSIVE", + Self::ShareUpdateExclusive => "SHARE UPDATE EXCLUSIVE", + Self::Share => "SHARE", + Self::ShareRowExclusive => "SHARE ROW EXCLUSIVE", + Self::Exclusive => "EXCLUSIVE", + Self::AccessExclusive => "ACCESS EXCLUSIVE", + }; + write!(f, "{text}") + } +} + /// PostgreSQL identity option for TRUNCATE table /// [ RESTART IDENTITY | CONTINUE IDENTITY ] #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] diff --git a/src/ast/spans.rs b/src/ast/spans.rs index 57d57b249a..24fee30dc8 100644 --- a/src/ast/spans.rs +++ b/src/ast/spans.rs @@ -304,6 +304,7 @@ impl Spanned for Values { /// - [Statement::CreateSequence] /// - [Statement::CreateType] /// - [Statement::Pragma] +/// - [Statement::Lock] /// - [Statement::LockTables] /// - [Statement::UnlockTables] /// - [Statement::Unload] @@ -462,6 +463,7 @@ impl Spanned for Statement { Statement::CreateSequence { .. } => Span::empty(), Statement::CreateType { .. } => Span::empty(), Statement::Pragma { .. } => Span::empty(), + Statement::Lock(_) => Span::empty(), Statement::LockTables { .. } => Span::empty(), Statement::UnlockTables => Span::empty(), Statement::Unload { .. } => Span::empty(), diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 274449ff76..9530a4aa28 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -697,6 +697,10 @@ impl<'a> Parser<'a> { // `INSTALL` is duckdb specific https://duckdb.org/docs/extensions/overview Keyword::INSTALL if self.dialect.supports_install() => self.parse_install(), Keyword::LOAD => self.parse_load(), + Keyword::LOCK => { + self.prev_token(); + self.parse_lock_statement().map(Into::into) + } Keyword::OPTIMIZE if self.dialect.supports_optimize_table() => { self.parse_optimize_table() } @@ -18389,6 +18393,66 @@ impl<'a> Parser<'a> { }) } + /// Parse a PostgreSQL `LOCK` statement. + pub fn parse_lock_statement(&mut self) -> Result { + self.expect_keyword(Keyword::LOCK)?; + + if self.peek_keyword(Keyword::TABLES) { + return self.expected_ref("TABLE or a table name", self.peek_token_ref()); + } + + let _ = self.parse_keyword(Keyword::TABLE); + let tables = self.parse_comma_separated(Parser::parse_lock_table_target)?; + let lock_mode = if self.parse_keyword(Keyword::IN) { + let lock_mode = self.parse_lock_table_mode()?; + self.expect_keyword(Keyword::MODE)?; + Some(lock_mode) + } else { + None + }; + let nowait = self.parse_keyword(Keyword::NOWAIT); + + Ok(Lock { + tables, + lock_mode, + nowait, + }) + } + + fn parse_lock_table_target(&mut self) -> Result { + let only = self.parse_keyword(Keyword::ONLY); + let name = self.parse_object_name(false)?; + let has_asterisk = self.consume_token(&Token::Mul); + + Ok(LockTableTarget { + name, + only, + has_asterisk, + }) + } + + fn parse_lock_table_mode(&mut self) -> Result { + if self.parse_keywords(&[Keyword::ACCESS, Keyword::SHARE]) { + Ok(LockTableMode::AccessShare) + } else if self.parse_keywords(&[Keyword::ACCESS, Keyword::EXCLUSIVE]) { + Ok(LockTableMode::AccessExclusive) + } else if self.parse_keywords(&[Keyword::ROW, Keyword::SHARE]) { + Ok(LockTableMode::RowShare) + } else if self.parse_keywords(&[Keyword::ROW, Keyword::EXCLUSIVE]) { + Ok(LockTableMode::RowExclusive) + } else if self.parse_keywords(&[Keyword::SHARE, Keyword::UPDATE, Keyword::EXCLUSIVE]) { + Ok(LockTableMode::ShareUpdateExclusive) + } else if self.parse_keywords(&[Keyword::SHARE, Keyword::ROW, Keyword::EXCLUSIVE]) { + Ok(LockTableMode::ShareRowExclusive) + } else if self.parse_keyword(Keyword::SHARE) { + Ok(LockTableMode::Share) + } else if self.parse_keyword(Keyword::EXCLUSIVE) { + Ok(LockTableMode::Exclusive) + } else { + self.expected_ref("a PostgreSQL LOCK TABLE mode", self.peek_token_ref()) + } + } + /// Parse a VALUES clause pub fn parse_values( &mut self, diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 60aca14b3c..35c4d47865 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -8703,3 +8703,63 @@ fn parse_pg_analyze() { _ => panic!("Expected Analyze, got: {stmt:?}"), } } + +#[test] +fn parse_lock_table() { + pg_and_generic().one_statement_parses_to( + "LOCK public.widgets IN EXCLUSIVE MODE", + "LOCK TABLE public.widgets IN EXCLUSIVE MODE", + ); + pg_and_generic().one_statement_parses_to( + "LOCK TABLE public.widgets NOWAIT", + "LOCK TABLE public.widgets NOWAIT", + ); + + let stmt = pg_and_generic().verified_stmt( + "LOCK TABLE ONLY public.widgets, analytics.events * IN SHARE ROW EXCLUSIVE MODE NOWAIT", + ); + match stmt { + Statement::Lock(lock) => { + assert_eq!(lock.tables.len(), 2); + assert_eq!(lock.tables[0].name.to_string(), "public.widgets"); + assert!(lock.tables[0].only); + assert!(!lock.tables[0].has_asterisk); + assert_eq!(lock.tables[1].name.to_string(), "analytics.events"); + assert!(!lock.tables[1].only); + assert!(lock.tables[1].has_asterisk); + assert_eq!(lock.lock_mode, Some(LockTableMode::ShareRowExclusive)); + assert!(lock.nowait); + } + _ => panic!("Expected Lock, got: {stmt:?}"), + } + + let lock_modes = [ + ("ACCESS SHARE", LockTableMode::AccessShare), + ("ROW SHARE", LockTableMode::RowShare), + ("ROW EXCLUSIVE", LockTableMode::RowExclusive), + ( + "SHARE UPDATE EXCLUSIVE", + LockTableMode::ShareUpdateExclusive, + ), + ("SHARE", LockTableMode::Share), + ("SHARE ROW EXCLUSIVE", LockTableMode::ShareRowExclusive), + ("EXCLUSIVE", LockTableMode::Exclusive), + ("ACCESS EXCLUSIVE", LockTableMode::AccessExclusive), + ]; + + for (mode_sql, expected_mode) in lock_modes { + let stmt = pg_and_generic() + .verified_stmt(&format!("LOCK TABLE public.widgets IN {mode_sql} MODE")); + match stmt { + Statement::Lock(lock) => { + assert_eq!(lock.tables.len(), 1); + assert_eq!(lock.tables[0].name.to_string(), "public.widgets"); + assert!(!lock.tables[0].only); + assert!(!lock.tables[0].has_asterisk); + assert_eq!(lock.lock_mode, Some(expected_mode)); + assert!(!lock.nowait); + } + _ => panic!("Expected Lock, got: {stmt:?}"), + } + } +} From 47b6aac72208209615bbeec121e0931f80fde55f Mon Sep 17 00:00:00 2001 From: whirlun <74815055+whirlun@users.noreply.github.com> Date: Thu, 12 Mar 2026 22:14:12 -0700 Subject: [PATCH 109/121] add support for databricks JSON accessors (#2272) --- src/ast/mod.rs | 11 +++++++++ src/ast/spans.rs | 1 + src/parser/mod.rs | 13 ++++++++--- tests/sqlparser_databricks.rs | 44 +++++++++++++++++++++++++++++++++++ 4 files changed, 66 insertions(+), 3 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 789bf28200..6659878ba5 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -651,6 +651,14 @@ pub enum JsonPathElem { /// The expression used as the bracket key (string or numeric expression). key: Expr, }, + /// Access an object field using colon bracket notation + /// e.g. `obj:['foo']` + /// + /// See + ColonBracket { + /// The expression used as the bracket key (string or numeric expression). + key: Expr, + }, } /// A JSON path. @@ -685,6 +693,9 @@ impl fmt::Display for JsonPath { JsonPathElem::Bracket { key } => { write!(f, "[{key}]")?; } + JsonPathElem::ColonBracket { key } => { + write!(f, ":[{key}]")?; + } } } Ok(()) diff --git a/src/ast/spans.rs b/src/ast/spans.rs index 24fee30dc8..8dd8d8c51a 100644 --- a/src/ast/spans.rs +++ b/src/ast/spans.rs @@ -1799,6 +1799,7 @@ impl Spanned for JsonPathElem { match self { JsonPathElem::Dot { .. } => Span::empty(), JsonPathElem::Bracket { key } => key.span(), + JsonPathElem::ColonBracket { key } => key.span(), } } } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 9530a4aa28..dc47c27b7d 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -4195,8 +4195,9 @@ impl<'a> Parser<'a> { match token.token { Token::Word(Word { value, - // path segments in SF dot notation can be unquoted or double-quoted - quote_style: quote_style @ (Some('"') | None), + // path segments in SF dot notation can be unquoted or double-quoted; + // Databricks also supports backtick-quoted identifiers + quote_style: quote_style @ (Some('"') | Some('`') | None), // some experimentation suggests that snowflake permits // any keyword here unquoted. keyword: _, @@ -4226,6 +4227,12 @@ impl<'a> Parser<'a> { let mut path = Vec::new(); loop { match self.next_token().token { + Token::Colon if path.is_empty() && self.peek_token_ref() == &Token::LBracket => { + self.next_token(); + let key = self.parse_wildcard_expr()?; + self.expect_token(&Token::RBracket)?; + path.push(JsonPathElem::ColonBracket { key }); + } Token::Colon if path.is_empty() => { path.push(self.parse_json_path_object_key()?); } @@ -4233,7 +4240,7 @@ impl<'a> Parser<'a> { path.push(self.parse_json_path_object_key()?); } Token::LBracket => { - let key = self.parse_expr()?; + let key = self.parse_wildcard_expr()?; self.expect_token(&Token::RBracket)?; path.push(JsonPathElem::Bracket { key }); diff --git a/tests/sqlparser_databricks.rs b/tests/sqlparser_databricks.rs index 24d06ef2fd..79b3d0654d 100644 --- a/tests/sqlparser_databricks.rs +++ b/tests/sqlparser_databricks.rs @@ -600,3 +600,47 @@ fn parse_databricks_struct_type() { _ => unreachable!(), } } + +#[test] +fn parse_databricks_json_accessor() { + // Basic colon accessor — unquoted field names are case-insensitive + databricks().verified_only_select("SELECT raw:owner, RAW:owner FROM store_data"); + + // Unquoted field access is case-insensitive; bracket notation is case-sensitive. + databricks().verified_only_select( + "SELECT raw:OWNER AS case_insensitive, raw:['OWNER'] AS case_sensitive FROM store_data", + ); + + // Backtick-quoted keys (Databricks delimited identifiers) normalise to double-quoted output. + databricks().one_statement_parses_to( + "SELECT raw:`zip code`, raw:`Zip Code`, raw:['fb:testid'] FROM store_data", + r#"SELECT raw:"zip code", raw:"Zip Code", raw:['fb:testid'] FROM store_data"#, + ); + + // Dot notation + databricks().verified_only_select("SELECT raw:store.bicycle FROM store_data"); + + // String-key bracket notation after a dot segment + databricks() + .verified_only_select("SELECT raw:store['bicycle'], raw:store['BICYCLE'] FROM store_data"); + + // Integer-index bracket notation + databricks() + .verified_only_select("SELECT raw:store.fruit[0], raw:store.fruit[1] FROM store_data"); + + // Wildcard [*] — including chained and mixed positions + databricks().verified_only_select( + "SELECT raw:store.basket[*], raw:store.basket[*][0] AS first_of_baskets, \ + raw:store.basket[0][*] AS first_basket, raw:store.basket[*][*] AS all_elements_flattened, \ + raw:store.basket[0][2].b AS subfield FROM store_data", + ); + + // Dot access following a wildcard bracket + databricks().verified_only_select("SELECT raw:store.book[*].isbn FROM store_data"); + + // Double-colon cast — type keyword normalises to upper case + databricks().one_statement_parses_to( + "SELECT raw:store.bicycle.price::double FROM store_data", + "SELECT raw:store.bicycle.price::DOUBLE FROM store_data", + ); +} From 738f12dd4d68836cd1746fb24df1db40de58bfd2 Mon Sep 17 00:00:00 2001 From: Andriy Romanov Date: Fri, 13 Mar 2026 02:18:03 -0700 Subject: [PATCH 110/121] Fixed create snapshot table for bigquery (#2269) --- src/ast/ddl.rs | 6 ++++- src/ast/helpers/stmt_create_table.rs | 10 ++++++++ src/ast/spans.rs | 1 + src/parser/mod.rs | 38 +++++++++++++++++++++++++++- tests/sqlparser_bigquery.rs | 22 ++++++++++++++++ tests/sqlparser_duckdb.rs | 1 + tests/sqlparser_mssql.rs | 2 ++ tests/sqlparser_postgres.rs | 1 + 8 files changed, 79 insertions(+), 2 deletions(-) diff --git a/src/ast/ddl.rs b/src/ast/ddl.rs index d6da8368eb..49dc52029d 100644 --- a/src/ast/ddl.rs +++ b/src/ast/ddl.rs @@ -2913,6 +2913,9 @@ pub struct CreateTable { pub volatile: bool, /// `ICEBERG` clause pub iceberg: bool, + /// `SNAPSHOT` clause + /// + pub snapshot: bool, /// Table name #[cfg_attr(feature = "visitor", visit(with = "visit_relation"))] pub name: ObjectName, @@ -3064,9 +3067,10 @@ impl fmt::Display for CreateTable { // `CREATE TABLE t (a INT) AS SELECT a from t2` write!( f, - "CREATE {or_replace}{external}{global}{temporary}{transient}{volatile}{dynamic}{iceberg}TABLE {if_not_exists}{name}", + "CREATE {or_replace}{external}{global}{temporary}{transient}{volatile}{dynamic}{iceberg}{snapshot}TABLE {if_not_exists}{name}", or_replace = if self.or_replace { "OR REPLACE " } else { "" }, external = if self.external { "EXTERNAL " } else { "" }, + snapshot = if self.snapshot { "SNAPSHOT " } else { "" }, global = self.global .map(|global| { if global { diff --git a/src/ast/helpers/stmt_create_table.rs b/src/ast/helpers/stmt_create_table.rs index 5963e94047..6af820e752 100644 --- a/src/ast/helpers/stmt_create_table.rs +++ b/src/ast/helpers/stmt_create_table.rs @@ -81,6 +81,8 @@ pub struct CreateTableBuilder { pub volatile: bool, /// Iceberg-specific table flag. pub iceberg: bool, + /// `SNAPSHOT` table flag. + pub snapshot: bool, /// Whether `DYNAMIC` table option is set. pub dynamic: bool, /// The table name. @@ -191,6 +193,7 @@ impl CreateTableBuilder { transient: false, volatile: false, iceberg: false, + snapshot: false, dynamic: false, name, columns: vec![], @@ -281,6 +284,11 @@ impl CreateTableBuilder { self.iceberg = iceberg; self } + /// Set `SNAPSHOT` table flag (BigQuery). + pub fn snapshot(mut self, snapshot: bool) -> Self { + self.snapshot = snapshot; + self + } /// Set `DYNAMIC` table option. pub fn dynamic(mut self, dynamic: bool) -> Self { self.dynamic = dynamic; @@ -540,6 +548,7 @@ impl CreateTableBuilder { transient: self.transient, volatile: self.volatile, iceberg: self.iceberg, + snapshot: self.snapshot, dynamic: self.dynamic, name: self.name, columns: self.columns, @@ -618,6 +627,7 @@ impl From for CreateTableBuilder { transient: table.transient, volatile: table.volatile, iceberg: table.iceberg, + snapshot: table.snapshot, dynamic: table.dynamic, name: table.name, columns: table.columns, diff --git a/src/ast/spans.rs b/src/ast/spans.rs index 8dd8d8c51a..5777d289ff 100644 --- a/src/ast/spans.rs +++ b/src/ast/spans.rs @@ -540,6 +540,7 @@ impl Spanned for CreateTable { transient: _, // bool volatile: _, // bool iceberg: _, // bool, Snowflake specific + snapshot: _, // bool, BigQuery specific name, columns, constraints, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index dc47c27b7d..801f89e685 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -5110,7 +5110,9 @@ impl<'a> Parser<'a> { let persistent = dialect_of!(self is DuckDbDialect) && self.parse_one_of_keywords(&[Keyword::PERSISTENT]).is_some(); let create_view_params = self.parse_create_view_params()?; - if self.parse_keyword(Keyword::TABLE) { + if self.peek_keywords(&[Keyword::SNAPSHOT, Keyword::TABLE]) { + self.parse_create_snapshot_table().map(Into::into) + } else if self.parse_keyword(Keyword::TABLE) { self.parse_create_table(or_replace, temporary, global, transient) .map(Into::into) } else if self.peek_keyword(Keyword::MATERIALIZED) @@ -6327,6 +6329,40 @@ impl<'a> Parser<'a> { .build()) } + /// Parse `CREATE SNAPSHOT TABLE` statement. + /// + /// + pub fn parse_create_snapshot_table(&mut self) -> Result { + self.expect_keywords(&[Keyword::SNAPSHOT, Keyword::TABLE])?; + let if_not_exists = self.parse_keywords(&[Keyword::IF, Keyword::NOT, Keyword::EXISTS]); + let table_name = self.parse_object_name(true)?; + + self.expect_keyword_is(Keyword::CLONE)?; + let clone = Some(self.parse_object_name(true)?); + + let version = + if self.parse_keywords(&[Keyword::FOR, Keyword::SYSTEM_TIME, Keyword::AS, Keyword::OF]) + { + Some(TableVersion::ForSystemTimeAsOf(self.parse_expr()?)) + } else { + None + }; + + let table_options = if let Some(options) = self.maybe_parse_options(Keyword::OPTIONS)? { + CreateTableOptions::Options(options) + } else { + CreateTableOptions::None + }; + + Ok(CreateTableBuilder::new(table_name) + .snapshot(true) + .if_not_exists(if_not_exists) + .clone_clause(clone) + .version(version) + .table_options(table_options) + .build()) + } + /// Parse a file format for external tables. pub fn parse_file_format(&mut self) -> Result { let next_token = self.next_token(); diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index a6b0906ffa..d3e47f991f 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -2900,3 +2900,25 @@ fn test_alter_schema() { bigquery_and_generic() .verified_stmt("ALTER SCHEMA IF EXISTS mydataset SET OPTIONS (location = 'us')"); } + +#[test] +fn test_create_snapshot_table() { + bigquery_and_generic() + .verified_stmt("CREATE SNAPSHOT TABLE dataset_id.table1 CLONE dataset_id.table2"); + + bigquery().verified_stmt( + "CREATE SNAPSHOT TABLE IF NOT EXISTS dataset_id.table1 CLONE dataset_id.table2", + ); + + bigquery().verified_stmt( + "CREATE SNAPSHOT TABLE dataset_id.table1 CLONE dataset_id.table2 FOR SYSTEM_TIME AS OF TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 1 HOUR)", + ); + + bigquery().verified_stmt( + "CREATE SNAPSHOT TABLE dataset_id.table1 CLONE dataset_id.table2 OPTIONS(expiration_timestamp = TIMESTAMP '2025-01-01 00:00:00 UTC', friendly_name = 'my_table')", + ); + + bigquery().verified_stmt( + "CREATE SNAPSHOT TABLE IF NOT EXISTS dataset_id.table1 CLONE dataset_id.table2 FOR SYSTEM_TIME AS OF TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL 1 HOUR) OPTIONS(expiration_timestamp = TIMESTAMP '2025-01-01 00:00:00 UTC')", + ); +} diff --git a/tests/sqlparser_duckdb.rs b/tests/sqlparser_duckdb.rs index 33a6cb4565..b3c40761c4 100644 --- a/tests/sqlparser_duckdb.rs +++ b/tests/sqlparser_duckdb.rs @@ -709,6 +709,7 @@ fn test_duckdb_union_datatype() { transient: Default::default(), volatile: Default::default(), iceberg: Default::default(), + snapshot: false, dynamic: Default::default(), name: ObjectName::from(vec!["tbl1".into()]), columns: vec![ diff --git a/tests/sqlparser_mssql.rs b/tests/sqlparser_mssql.rs index da6ecace66..7fc030ee43 100644 --- a/tests/sqlparser_mssql.rs +++ b/tests/sqlparser_mssql.rs @@ -1985,6 +1985,7 @@ fn parse_create_table_with_valid_options() { for_values: None, strict: false, iceberg: false, + snapshot: false, copy_grants: false, enable_schema_evolution: None, change_tracking: None, @@ -2120,6 +2121,7 @@ fn parse_create_table_with_identity_column() { transient: false, volatile: false, iceberg: false, + snapshot: false, name: ObjectName::from(vec![Ident { value: "mytable".to_string(), quote_style: None, diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 35c4d47865..2f74d70604 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -6409,6 +6409,7 @@ fn parse_trigger_related_functions() { transient: false, volatile: false, iceberg: false, + snapshot: false, name: ObjectName::from(vec![Ident::new("emp")]), columns: vec![ ColumnDef { From 9d5a171a85b06ca7df083287027d8620007f909b Mon Sep 17 00:00:00 2001 From: Andriy Romanov Date: Fri, 13 Mar 2026 02:18:48 -0700 Subject: [PATCH 111/121] Fixed stage name parsing for snowflake (#2265) --- src/dialect/snowflake.rs | 2 ++ tests/sqlparser_snowflake.rs | 15 +++++++++++++++ 2 files changed, 17 insertions(+) diff --git a/src/dialect/snowflake.rs b/src/dialect/snowflake.rs index 6c160a9dd8..f0f33f8ed0 100644 --- a/src/dialect/snowflake.rs +++ b/src/dialect/snowflake.rs @@ -1258,6 +1258,8 @@ pub fn parse_stage_name_identifier(parser: &mut Parser) -> Result ident.push('/'), Token::Plus => ident.push('+'), Token::Minus => ident.push('-'), + Token::Eq => ident.push('='), + Token::Colon => ident.push(':'), Token::Number(n, _) => ident.push_str(n), Token::Word(w) => ident.push_str(&w.to_string()), _ => return parser.expected_ref("stage name identifier", parser.peek_token_ref()), diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index 265f8a9ae5..0da44aa73f 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -2640,6 +2640,21 @@ fn test_snowflake_copy_into_stage_name_ends_with_parens() { } } +#[test] +fn test_snowflake_stage_name_with_special_chars() { + // Stage path with '=' (Hive-style partitioning) + snowflake().verified_stmt("SELECT * FROM @stage/day=18/23.parquet"); + + // Stage path with ':' (time-based partitioning) + snowflake().verified_stmt("SELECT * FROM @stage/0:18:23/23.parquet"); + + // COPY INTO with '=' in stage path + snowflake().verified_stmt("COPY INTO my_table FROM @stage/day=18/file.parquet"); + + // COPY INTO with ':' in stage path + snowflake().verified_stmt("COPY INTO my_table FROM @stage/0:18:23/file.parquet"); +} + #[test] fn test_snowflake_trim() { let real_sql = r#"SELECT customer_id, TRIM(sub_items.value:item_price_id, '"', "a") AS item_price_id FROM models_staging.subscriptions"#; From 924a116a2edf58db23530d6fc09e19924709e60d Mon Sep 17 00:00:00 2001 From: Andriy Romanov Date: Fri, 13 Mar 2026 05:43:46 -0700 Subject: [PATCH 112/121] Fix STORAGE LIFECYCLE POLICY for snowflake queries (#2264) --- src/ast/ddl.rs | 13 ++++++++++--- src/ast/helpers/stmt_create_table.rs | 17 +++++++++++++++-- src/ast/mod.rs | 24 ++++++++++++++++++++++++ src/ast/spans.rs | 1 + src/dialect/snowflake.rs | 16 +++++++++++++++- src/keywords.rs | 1 + tests/sqlparser_duckdb.rs | 1 + tests/sqlparser_mssql.rs | 2 ++ tests/sqlparser_postgres.rs | 1 + tests/sqlparser_snowflake.rs | 26 ++++++++++++++++++++++++++ 10 files changed, 96 insertions(+), 6 deletions(-) diff --git a/src/ast/ddl.rs b/src/ast/ddl.rs index 49dc52029d..157b209d11 100644 --- a/src/ast/ddl.rs +++ b/src/ast/ddl.rs @@ -48,9 +48,9 @@ use crate::ast::{ HiveFormat, HiveIOFormat, HiveRowFormat, HiveSetLocation, Ident, InitializeKind, MySQLColumnPosition, ObjectName, OnCommit, OneOrManyWithParens, OperateFunctionArg, OrderByExpr, ProjectionSelect, Query, RefreshModeKind, RowAccessPolicy, SequenceOptions, - Spanned, SqlOption, StorageSerializationPolicy, TableVersion, Tag, TriggerEvent, - TriggerExecBody, TriggerObject, TriggerPeriod, TriggerReferencing, Value, ValueWithSpan, - WrappedCollection, + Spanned, SqlOption, StorageLifecyclePolicy, StorageSerializationPolicy, TableVersion, Tag, + TriggerEvent, TriggerExecBody, TriggerObject, TriggerPeriod, TriggerReferencing, Value, + ValueWithSpan, WrappedCollection, }; use crate::display_utils::{DisplayCommaSeparated, Indent, NewLine, SpaceOrNewline}; use crate::keywords::Keyword; @@ -3012,6 +3012,9 @@ pub struct CreateTable { /// Snowflake "WITH ROW ACCESS POLICY" clause /// pub with_row_access_policy: Option, + /// Snowflake `WITH STORAGE LIFECYCLE POLICY` clause + /// + pub with_storage_lifecycle_policy: Option, /// Snowflake "WITH TAG" clause /// pub with_tags: Option>, @@ -3317,6 +3320,10 @@ impl fmt::Display for CreateTable { write!(f, " {row_access_policy}",)?; } + if let Some(storage_lifecycle_policy) = &self.with_storage_lifecycle_policy { + write!(f, " {storage_lifecycle_policy}",)?; + } + if let Some(tag) = &self.with_tags { write!(f, " WITH TAG ({})", display_comma_separated(tag.as_slice()))?; } diff --git a/src/ast/helpers/stmt_create_table.rs b/src/ast/helpers/stmt_create_table.rs index 6af820e752..29589e2198 100644 --- a/src/ast/helpers/stmt_create_table.rs +++ b/src/ast/helpers/stmt_create_table.rs @@ -28,8 +28,8 @@ use crate::ast::{ ClusteredBy, ColumnDef, CommentDef, CreateTable, CreateTableLikeKind, CreateTableOptions, DistStyle, Expr, FileFormat, ForValues, HiveDistributionStyle, HiveFormat, Ident, InitializeKind, ObjectName, OnCommit, OneOrManyWithParens, Query, RefreshModeKind, - RowAccessPolicy, Statement, StorageSerializationPolicy, TableConstraint, TableVersion, Tag, - WrappedCollection, + RowAccessPolicy, Statement, StorageLifecyclePolicy, StorageSerializationPolicy, + TableConstraint, TableVersion, Tag, WrappedCollection, }; use crate::parser::ParserError; @@ -149,6 +149,8 @@ pub struct CreateTableBuilder { pub with_aggregation_policy: Option, /// Optional row access policy applied to the table. pub with_row_access_policy: Option, + /// Optional storage lifecycle policy applied to the table. + pub with_storage_lifecycle_policy: Option, /// Optional tags/labels attached to the table metadata. pub with_tags: Option>, /// Optional base location for staged data. @@ -227,6 +229,7 @@ impl CreateTableBuilder { default_ddl_collation: None, with_aggregation_policy: None, with_row_access_policy: None, + with_storage_lifecycle_policy: None, with_tags: None, base_location: None, external_volume: None, @@ -459,6 +462,14 @@ impl CreateTableBuilder { self.with_row_access_policy = with_row_access_policy; self } + /// Attach a storage lifecycle policy to the table. + pub fn with_storage_lifecycle_policy( + mut self, + with_storage_lifecycle_policy: Option, + ) -> Self { + self.with_storage_lifecycle_policy = with_storage_lifecycle_policy; + self + } /// Attach tags/labels to the table metadata. pub fn with_tags(mut self, with_tags: Option>) -> Self { self.with_tags = with_tags; @@ -582,6 +593,7 @@ impl CreateTableBuilder { default_ddl_collation: self.default_ddl_collation, with_aggregation_policy: self.with_aggregation_policy, with_row_access_policy: self.with_row_access_policy, + with_storage_lifecycle_policy: self.with_storage_lifecycle_policy, with_tags: self.with_tags, base_location: self.base_location, external_volume: self.external_volume, @@ -661,6 +673,7 @@ impl From for CreateTableBuilder { default_ddl_collation: table.default_ddl_collation, with_aggregation_policy: table.with_aggregation_policy, with_row_access_policy: table.with_row_access_policy, + with_storage_lifecycle_policy: table.with_storage_lifecycle_policy, with_tags: table.with_tags, base_location: table.base_location, external_volume: table.external_volume, diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 6659878ba5..cff089bc31 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -10472,6 +10472,30 @@ impl Display for RowAccessPolicy { } } +/// Snowflake `[ WITH ] STORAGE LIFECYCLE POLICY ON ( [ , ... ] )` +/// +/// +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub struct StorageLifecyclePolicy { + /// The fully-qualified policy object name. + pub policy: ObjectName, + /// Column names the policy applies to. + pub on: Vec, +} + +impl Display for StorageLifecyclePolicy { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "WITH STORAGE LIFECYCLE POLICY {} ON ({})", + self.policy, + display_comma_separated(self.on.as_slice()) + ) + } +} + /// Snowflake `WITH TAG ( tag_name = '', ...)` /// /// diff --git a/src/ast/spans.rs b/src/ast/spans.rs index 5777d289ff..74f731a78a 100644 --- a/src/ast/spans.rs +++ b/src/ast/spans.rs @@ -572,6 +572,7 @@ impl Spanned for CreateTable { default_ddl_collation: _, // string, no span with_aggregation_policy: _, // todo, Snowflake specific with_row_access_policy: _, // todo, Snowflake specific + with_storage_lifecycle_policy: _, // todo, Snowflake specific with_tags: _, // todo, Snowflake specific external_volume: _, // todo, Snowflake specific base_location: _, // todo, Snowflake specific diff --git a/src/dialect/snowflake.rs b/src/dialect/snowflake.rs index f0f33f8ed0..416e5051de 100644 --- a/src/dialect/snowflake.rs +++ b/src/dialect/snowflake.rs @@ -33,7 +33,7 @@ use crate::ast::{ IdentityPropertyFormatKind, IdentityPropertyKind, IdentityPropertyOrder, InitializeKind, Insert, MultiTableInsertIntoClause, MultiTableInsertType, MultiTableInsertValue, MultiTableInsertValues, MultiTableInsertWhenClause, ObjectName, ObjectNamePart, - RefreshModeKind, RowAccessPolicy, ShowObjects, SqlOption, Statement, + RefreshModeKind, RowAccessPolicy, ShowObjects, SqlOption, Statement, StorageLifecyclePolicy, StorageSerializationPolicy, TableObject, TagsColumnOption, Value, WrappedCollection, }; use crate::dialect::{Dialect, Precedence}; @@ -917,6 +917,7 @@ pub fn parse_create_table( Keyword::WITH => { parser.expect_one_of_keywords(&[ Keyword::AGGREGATION, + Keyword::STORAGE, Keyword::TAG, Keyword::ROW, ])?; @@ -938,6 +939,19 @@ pub fn parse_create_table( builder = builder.with_row_access_policy(Some(RowAccessPolicy::new(policy, columns))) } + Keyword::STORAGE => { + parser.expect_keywords(&[Keyword::LIFECYCLE, Keyword::POLICY])?; + let policy = parser.parse_object_name(false)?; + parser.expect_keyword_is(Keyword::ON)?; + parser.expect_token(&Token::LParen)?; + let columns = parser.parse_comma_separated(|p| p.parse_identifier())?; + parser.expect_token(&Token::RParen)?; + + builder = builder.with_storage_lifecycle_policy(Some(StorageLifecyclePolicy { + policy, + on: columns, + })) + } Keyword::TAG => { parser.expect_token(&Token::LParen)?; let tags = parser.parse_comma_separated(Parser::parse_tag)?; diff --git a/src/keywords.rs b/src/keywords.rs index 9ea85fd3a7..de552bf2b0 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -573,6 +573,7 @@ define_keywords!( LEFT, LEFTARG, LEVEL, + LIFECYCLE, LIKE, LIKE_REGEX, LIMIT, diff --git a/tests/sqlparser_duckdb.rs b/tests/sqlparser_duckdb.rs index b3c40761c4..671e92b979 100644 --- a/tests/sqlparser_duckdb.rs +++ b/tests/sqlparser_duckdb.rs @@ -776,6 +776,7 @@ fn test_duckdb_union_datatype() { default_ddl_collation: Default::default(), with_aggregation_policy: Default::default(), with_row_access_policy: Default::default(), + with_storage_lifecycle_policy: Default::default(), with_tags: Default::default(), base_location: Default::default(), external_volume: Default::default(), diff --git a/tests/sqlparser_mssql.rs b/tests/sqlparser_mssql.rs index 7fc030ee43..f2156e6477 100644 --- a/tests/sqlparser_mssql.rs +++ b/tests/sqlparser_mssql.rs @@ -1994,6 +1994,7 @@ fn parse_create_table_with_valid_options() { default_ddl_collation: None, with_aggregation_policy: None, with_row_access_policy: None, + with_storage_lifecycle_policy: None, with_tags: None, base_location: None, external_volume: None, @@ -2166,6 +2167,7 @@ fn parse_create_table_with_identity_column() { default_ddl_collation: None, with_aggregation_policy: None, with_row_access_policy: None, + with_storage_lifecycle_policy: None, with_tags: None, base_location: None, external_volume: None, diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 2f74d70604..9486af0402 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -6462,6 +6462,7 @@ fn parse_trigger_related_functions() { default_ddl_collation: None, with_aggregation_policy: None, with_row_access_policy: None, + with_storage_lifecycle_policy: None, with_tags: None, base_location: None, external_volume: None, diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index 0da44aa73f..5bb4a269e8 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -286,6 +286,32 @@ fn test_snowflake_create_table_with_row_access_policy() { } } +#[test] +fn test_snowflake_create_table_with_storage_lifecycle_policy() { + // WITH keyword + match snowflake().verified_stmt( + "CREATE TABLE IF NOT EXISTS my_table (a NUMBER(38, 0), b VARIANT) WITH STORAGE LIFECYCLE POLICY dba.global_settings.my_policy ON (a)", + ) { + Statement::CreateTable(CreateTable { + name, + with_storage_lifecycle_policy, + .. + }) => { + assert_eq!("my_table", name.to_string()); + let policy = with_storage_lifecycle_policy.unwrap(); + assert_eq!("dba.global_settings.my_policy", policy.policy.to_string()); + assert_eq!(vec![Ident::new("a")], policy.on); + } + _ => unreachable!(), + } + + // Without WITH keyword — canonicalizes to WITH form + snowflake().one_statement_parses_to( + "CREATE TABLE my_table (a NUMBER(38, 0)) STORAGE LIFECYCLE POLICY my_policy ON (a, b)", + "CREATE TABLE my_table (a NUMBER(38, 0)) WITH STORAGE LIFECYCLE POLICY my_policy ON (a, b)", + ); +} + #[test] fn test_snowflake_create_table_with_tag() { match snowflake() From 203ced42be28f4e418ca93a73fc75911be19f4b4 Mon Sep 17 00:00:00 2001 From: Andriy Romanov Date: Fri, 13 Mar 2026 05:53:57 -0700 Subject: [PATCH 113/121] Fixed CHANGES keyword parsing for snowflake (#2266) --- src/ast/query.rs | 23 +++++++++++++++++++++++ src/keywords.rs | 1 + src/parser/mod.rs | 26 ++++++++++++++++++++++++++ tests/sqlparser_snowflake.rs | 31 ++++++++++++++++++++++++++++++- 4 files changed, 80 insertions(+), 1 deletion(-) diff --git a/src/ast/query.rs b/src/ast/query.rs index 440928ed71..ca74db4405 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -2578,6 +2578,23 @@ pub enum TableVersion { /// When the table version is defined using a function. /// For example: `SELECT * FROM tbl AT(TIMESTAMP => '2020-08-14 09:30:00')` Function(Expr), + /// Snowflake `CHANGES` clause for change tracking queries. + /// For example: + /// ```sql + /// SELECT * FROM t + /// CHANGES(INFORMATION => DEFAULT) + /// AT(TIMESTAMP => TO_TIMESTAMP_TZ('...')) + /// END(TIMESTAMP => TO_TIMESTAMP_TZ('...')) + /// ``` + /// + Changes { + /// The `CHANGES(INFORMATION => ...)` function-call expression. + changes: Expr, + /// The `AT(TIMESTAMP => ...)` function-call expression. + at: Expr, + /// The optional `END(TIMESTAMP => ...)` function-call expression. + end: Option, + }, } impl Display for TableVersion { @@ -2587,6 +2604,12 @@ impl Display for TableVersion { TableVersion::TimestampAsOf(e) => write!(f, "TIMESTAMP AS OF {e}")?, TableVersion::VersionAsOf(e) => write!(f, "VERSION AS OF {e}")?, TableVersion::Function(func) => write!(f, "{func}")?, + TableVersion::Changes { changes, at, end } => { + write!(f, "{changes} {at}")?; + if let Some(end) = end { + write!(f, " {end}")?; + } + } } Ok(()) } diff --git a/src/keywords.rs b/src/keywords.rs index de552bf2b0..d56d04847e 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -202,6 +202,7 @@ define_keywords!( CENTURY, CHAIN, CHANGE, + CHANGES, CHANGE_TRACKING, CHANNEL, CHAR, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 801f89e685..3bc34bda0b 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -16352,6 +16352,8 @@ impl<'a> Parser<'a> { { let expr = self.parse_expr()?; return Ok(Some(TableVersion::ForSystemTimeAsOf(expr))); + } else if self.peek_keyword(Keyword::CHANGES) { + return self.parse_table_version_changes().map(Some); } else if self.peek_keyword(Keyword::AT) || self.peek_keyword(Keyword::BEFORE) { let func_name = self.parse_object_name(true)?; let func = self.parse_function(func_name)?; @@ -16367,6 +16369,30 @@ impl<'a> Parser<'a> { Ok(None) } + /// Parses the Snowflake `CHANGES` clause for change tracking queries. + /// + /// Syntax: + /// ```sql + /// CHANGES (INFORMATION => DEFAULT) + /// AT (TIMESTAMP => ) + /// [END (TIMESTAMP => )] + /// ``` + /// + /// + fn parse_table_version_changes(&mut self) -> Result { + let changes_name = self.parse_object_name(true)?; + let changes = self.parse_function(changes_name)?; + let at_name = self.parse_object_name(true)?; + let at = self.parse_function(at_name)?; + let end = if self.peek_keyword(Keyword::END) { + let end_name = self.parse_object_name(true)?; + Some(self.parse_function(end_name)?) + } else { + None + }; + Ok(TableVersion::Changes { changes, at, end }) + } + /// Parses MySQL's JSON_TABLE column definition. /// For example: `id INT EXISTS PATH '$' DEFAULT '0' ON EMPTY ERROR ON ERROR` pub fn parse_json_table_column_def(&mut self) -> Result { diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index 5bb4a269e8..666876fa72 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -3249,7 +3249,10 @@ fn parse_view_column_descriptions() { #[test] fn test_parentheses_overflow() { - let max_nesting_level: usize = 25; + // Use a modest nesting level to avoid actual stack overflow on + // CI runners with small thread stacks (debug builds use large frames + // and each nesting level adds extra depth via maybe_parse). + let max_nesting_level: usize = 20; // Verify the recursion check is not too wasteful (num of parentheses within budget) let slack = 3; @@ -4004,6 +4007,32 @@ fn test_timetravel_at_before() { .verified_only_select("SELECT * FROM tbl BEFORE(TIMESTAMP => '2024-12-15 00:00:00')"); } +#[test] +fn test_changes_clause() { + // CHANGES with AT and END + snowflake().verified_stmt( + r#"SELECT a FROM "PCH_ODS_FIDELIO"."SRC_VW_SYS_ACC_MASTER" CHANGES(INFORMATION => DEFAULT) AT(TIMESTAMP => TO_TIMESTAMP_TZ('2026-02-18 11:23:19.660000000')) END(TIMESTAMP => TO_TIMESTAMP_TZ('2026-02-18 11:38:30.211000000'))"#, + ); + + // CHANGES with AT only (no END) + snowflake().verified_stmt( + "SELECT a FROM t CHANGES(INFORMATION => DEFAULT) AT(TIMESTAMP => TO_TIMESTAMP_TZ('2026-02-18 11:23:19.660000000'))", + ); + + // CHANGES with APPEND_ONLY + snowflake().verified_stmt( + "SELECT a FROM t CHANGES(INFORMATION => APPEND_ONLY) AT(TIMESTAMP => TO_TIMESTAMP_TZ('2026-01-01 00:00:00'))", + ); + + // CHANGES with OFFSET + snowflake().verified_stmt("SELECT a FROM t CHANGES(INFORMATION => DEFAULT) AT(OFFSET => -60)"); + + // CHANGES with STATEMENT + snowflake().verified_stmt( + "SELECT a FROM t CHANGES(INFORMATION => DEFAULT) AT(STATEMENT => '8e5d0ca9-005e-44e6-b858-a8f5b37c5726')", + ); +} + #[test] fn test_grant_account_global_privileges() { let privileges = vec![ From b3e176daf302985feef46a3933428b196efb78e8 Mon Sep 17 00:00:00 2001 From: Filipe Guerreiro Date: Fri, 13 Mar 2026 21:57:52 +0900 Subject: [PATCH 114/121] Add SETOF support for PostgreSQL function return types (#2217) --- src/ast/ddl.rs | 24 ++++++++++++++++++++- src/ast/mod.rs | 14 ++++++------- src/keywords.rs | 1 + src/parser/mod.rs | 22 ++++++++++++------- tests/sqlparser_bigquery.rs | 2 +- tests/sqlparser_mssql.rs | 4 ++-- tests/sqlparser_postgres.rs | 42 +++++++++++++++++++++++++++++-------- 7 files changed, 82 insertions(+), 27 deletions(-) diff --git a/src/ast/ddl.rs b/src/ast/ddl.rs index 157b209d11..f0e79e739c 100644 --- a/src/ast/ddl.rs +++ b/src/ast/ddl.rs @@ -3533,6 +3533,28 @@ impl fmt::Display for CreateDomain { } } +/// The return type of a `CREATE FUNCTION` statement. +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +pub enum FunctionReturnType { + /// `RETURNS ` + DataType(DataType), + /// `RETURNS SETOF ` + /// + /// [PostgreSQL](https://www.postgresql.org/docs/current/sql-createfunction.html) + SetOf(DataType), +} + +impl fmt::Display for FunctionReturnType { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + FunctionReturnType::DataType(data_type) => write!(f, "{data_type}"), + FunctionReturnType::SetOf(data_type) => write!(f, "SETOF {data_type}"), + } + } +} + #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] @@ -3553,7 +3575,7 @@ pub struct CreateFunction { /// List of arguments for the function. pub args: Option>, /// The return type of the function. - pub return_type: Option, + pub return_type: Option, /// The expression that defines the function. /// /// Examples: diff --git a/src/ast/mod.rs b/src/ast/mod.rs index cff089bc31..c4d1b50cd5 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -72,13 +72,13 @@ pub use self::ddl::{ CreatePolicyCommand, CreatePolicyType, CreateTable, CreateTrigger, CreateView, Deduplicate, DeferrableInitial, DistStyle, DropBehavior, DropExtension, DropFunction, DropOperator, DropOperatorClass, DropOperatorFamily, DropOperatorSignature, DropPolicy, DropTrigger, - ForValues, GeneratedAs, GeneratedExpressionMode, IdentityParameters, IdentityProperty, - IdentityPropertyFormatKind, IdentityPropertyKind, IdentityPropertyOrder, IndexColumn, - IndexOption, IndexType, KeyOrIndexDisplay, Msck, NullsDistinctOption, OperatorArgTypes, - OperatorClassItem, OperatorFamilyDropItem, OperatorFamilyItem, OperatorOption, OperatorPurpose, - Owner, Partition, PartitionBoundValue, ProcedureParam, ReferentialAction, RenameTableNameKind, - ReplicaIdentity, TagsColumnOption, TriggerObjectKind, Truncate, - UserDefinedTypeCompositeAttributeDef, UserDefinedTypeInternalLength, + ForValues, FunctionReturnType, GeneratedAs, GeneratedExpressionMode, IdentityParameters, + IdentityProperty, IdentityPropertyFormatKind, IdentityPropertyKind, IdentityPropertyOrder, + IndexColumn, IndexOption, IndexType, KeyOrIndexDisplay, Msck, NullsDistinctOption, + OperatorArgTypes, OperatorClassItem, OperatorFamilyDropItem, OperatorFamilyItem, + OperatorOption, OperatorPurpose, Owner, Partition, PartitionBoundValue, ProcedureParam, + ReferentialAction, RenameTableNameKind, ReplicaIdentity, TagsColumnOption, TriggerObjectKind, + Truncate, UserDefinedTypeCompositeAttributeDef, UserDefinedTypeInternalLength, UserDefinedTypeRangeOption, UserDefinedTypeRepresentation, UserDefinedTypeSqlDefinitionOption, UserDefinedTypeStorage, ViewColumnDef, }; diff --git a/src/keywords.rs b/src/keywords.rs index d56d04847e..94458ccb4f 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -938,6 +938,7 @@ define_keywords!( SESSION_USER, SET, SETERROR, + SETOF, SETS, SETTINGS, SHARE, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 3bc34bda0b..6adecb0c66 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -5594,7 +5594,7 @@ impl<'a> Parser<'a> { self.expect_token(&Token::RParen)?; let return_type = if self.parse_keyword(Keyword::RETURNS) { - Some(self.parse_data_type()?) + Some(self.parse_function_return_type()?) } else { None }; @@ -5774,7 +5774,7 @@ impl<'a> Parser<'a> { let (name, args) = self.parse_create_function_name_and_params()?; let return_type = if self.parse_keyword(Keyword::RETURNS) { - Some(self.parse_data_type()?) + Some(self.parse_function_return_type()?) } else { None }; @@ -5877,11 +5877,11 @@ impl<'a> Parser<'a> { }) })?; - let return_type = if return_table.is_some() { - return_table - } else { - Some(self.parse_data_type()?) + let data_type = match return_table { + Some(table_type) => table_type, + None => self.parse_data_type()?, }; + let return_type = Some(FunctionReturnType::DataType(data_type)); let _ = self.parse_keyword(Keyword::AS); @@ -5933,6 +5933,14 @@ impl<'a> Parser<'a> { }) } + fn parse_function_return_type(&mut self) -> Result { + if self.parse_keyword(Keyword::SETOF) { + Ok(FunctionReturnType::SetOf(self.parse_data_type()?)) + } else { + Ok(FunctionReturnType::DataType(self.parse_data_type()?)) + } + } + fn parse_create_function_name_and_params( &mut self, ) -> Result<(ObjectName, Vec), ParserError> { @@ -8608,7 +8616,7 @@ impl<'a> Parser<'a> { } } - /// Parse a single [PartitionBoundValue]. + /// Parse a single partition bound value (MINVALUE, MAXVALUE, or expression). fn parse_partition_bound_value(&mut self) -> Result { if self.parse_keyword(Keyword::MINVALUE) { Ok(PartitionBoundValue::MinValue) diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index d3e47f991f..79db34b06e 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -2289,7 +2289,7 @@ fn test_bigquery_create_function() { Ident::new("myfunction"), ]), args: Some(vec![OperateFunctionArg::with_name("x", DataType::Float64),]), - return_type: Some(DataType::Float64), + return_type: Some(FunctionReturnType::DataType(DataType::Float64)), function_body: Some(CreateFunctionBody::AsAfterOptions(Expr::Value( number("42").with_empty_span() ))), diff --git a/tests/sqlparser_mssql.rs b/tests/sqlparser_mssql.rs index f2156e6477..07dd0fcb61 100644 --- a/tests/sqlparser_mssql.rs +++ b/tests/sqlparser_mssql.rs @@ -255,7 +255,7 @@ fn parse_create_function() { default_expr: None, }, ]), - return_type: Some(DataType::Int(None)), + return_type: Some(FunctionReturnType::DataType(DataType::Int(None))), function_body: Some(CreateFunctionBody::AsBeginEnd(BeginEndStatements { begin_token: AttachedToken::empty(), statements: vec![Statement::Return(ReturnStatement { @@ -430,7 +430,7 @@ fn parse_create_function_parameter_default_values() { data_type: DataType::Int(None), default_expr: Some(Expr::Value((number("42")).with_empty_span())), },]), - return_type: Some(DataType::Int(None)), + return_type: Some(FunctionReturnType::DataType(DataType::Int(None))), function_body: Some(CreateFunctionBody::AsBeginEnd(BeginEndStatements { begin_token: AttachedToken::empty(), statements: vec![Statement::Return(ReturnStatement { diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 9486af0402..6b4f35d78b 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -4441,7 +4441,7 @@ $$"#; DataType::Varchar(None), ), ]), - return_type: Some(DataType::Boolean), + return_type: Some(FunctionReturnType::DataType(DataType::Boolean)), language: Some("plpgsql".into()), behavior: None, called_on_null: None, @@ -4484,7 +4484,7 @@ $$"#; DataType::Int(None) ) ]), - return_type: Some(DataType::Boolean), + return_type: Some(FunctionReturnType::DataType(DataType::Boolean)), language: Some("plpgsql".into()), behavior: None, called_on_null: None, @@ -4531,7 +4531,7 @@ $$"#; DataType::Int(None) ), ]), - return_type: Some(DataType::Boolean), + return_type: Some(FunctionReturnType::DataType(DataType::Boolean)), language: Some("plpgsql".into()), behavior: None, called_on_null: None, @@ -4578,7 +4578,7 @@ $$"#; DataType::Int(None) ), ]), - return_type: Some(DataType::Boolean), + return_type: Some(FunctionReturnType::DataType(DataType::Boolean)), language: Some("plpgsql".into()), behavior: None, called_on_null: None, @@ -4618,7 +4618,7 @@ $$"#; ), OperateFunctionArg::with_name("b", DataType::Varchar(None)), ]), - return_type: Some(DataType::Boolean), + return_type: Some(FunctionReturnType::DataType(DataType::Boolean)), language: Some("plpgsql".into()), behavior: None, called_on_null: None, @@ -4661,7 +4661,7 @@ fn parse_create_function() { OperateFunctionArg::unnamed(DataType::Integer(None)), OperateFunctionArg::unnamed(DataType::Integer(None)), ]), - return_type: Some(DataType::Integer(None)), + return_type: Some(FunctionReturnType::DataType(DataType::Integer(None))), language: Some("SQL".into()), behavior: Some(FunctionBehavior::Immutable), called_on_null: Some(FunctionCalledOnNull::Strict), @@ -4698,6 +4698,30 @@ fn parse_create_function_detailed() { ); } +#[test] +fn parse_create_function_returns_setof() { + pg_and_generic().verified_stmt( + "CREATE FUNCTION get_users() RETURNS SETOF TEXT LANGUAGE sql AS 'SELECT name FROM users'", + ); + pg_and_generic().verified_stmt( + "CREATE FUNCTION get_ids() RETURNS SETOF INTEGER LANGUAGE sql AS 'SELECT id FROM users'", + ); + pg_and_generic().verified_stmt( + r#"CREATE FUNCTION get_all() RETURNS SETOF my_schema."MyType" LANGUAGE sql AS 'SELECT * FROM t'"#, + ); + pg_and_generic().verified_stmt( + "CREATE FUNCTION get_rows() RETURNS SETOF RECORD LANGUAGE sql AS 'SELECT * FROM t'", + ); + + let sql = "CREATE FUNCTION get_names() RETURNS SETOF TEXT LANGUAGE sql AS 'SELECT name FROM t'"; + match pg_and_generic().verified_stmt(sql) { + Statement::CreateFunction(CreateFunction { return_type, .. }) => { + assert_eq!(return_type, Some(FunctionReturnType::SetOf(DataType::Text))); + } + _ => panic!("Expected CreateFunction"), + } +} + #[test] fn parse_create_function_with_security() { let sql = @@ -4773,10 +4797,10 @@ fn parse_create_function_c_with_module_pathname() { "input", DataType::Custom(ObjectName::from(vec![Ident::new("cstring")]), vec![]), ),]), - return_type: Some(DataType::Custom( + return_type: Some(FunctionReturnType::DataType(DataType::Custom( ObjectName::from(vec![Ident::new("cas")]), vec![] - )), + ))), language: Some("c".into()), behavior: Some(FunctionBehavior::Immutable), called_on_null: None, @@ -6493,7 +6517,7 @@ fn parse_trigger_related_functions() { if_not_exists: false, name: ObjectName::from(vec![Ident::new("emp_stamp")]), args: Some(vec![]), - return_type: Some(DataType::Trigger), + return_type: Some(FunctionReturnType::DataType(DataType::Trigger)), function_body: Some( CreateFunctionBody::AsBeforeOptions { body: Expr::Value(( From 50921b173a3ca7a3183398cc83de1dfd1cd165f5 Mon Sep 17 00:00:00 2001 From: xitep Date: Fri, 20 Mar 2026 10:48:53 +0100 Subject: [PATCH 115/121] [Oracle] Support for `INSERT INTO () ...` (#2276) --- src/ast/mod.rs | 11 ++++++++++ src/ast/spans.rs | 1 + src/dialect/mod.rs | 7 +++++++ src/dialect/oracle.rs | 5 +++++ src/parser/mod.rs | 44 ++++++++++++++++++++++++++++++++++++--- tests/sqlparser_common.rs | 34 ++++++++++++++++++++++++++++++ 6 files changed, 99 insertions(+), 3 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index c4d1b50cd5..36c41d6ca2 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -10888,6 +10888,16 @@ pub enum TableObject { /// ``` /// [Clickhouse](https://clickhouse.com/docs/en/sql-reference/table-functions) TableFunction(Function), + + /// Table specified through a sub-query + /// Example: + /// ```sql + /// INSERT INTO + /// (SELECT employee_id, last_name, email, hire_date, job_id, salary, commission_pct FROM employees) + /// VALUES (207, 'Gregory', 'pgregory@example.com', sysdate, 'PU_CLERK', 1.2E3, NULL); + /// ``` + /// [Oracle](https://docs.oracle.com/en/database/oracle/oracle-database/21/sqlrf/INSERT.html#GUID-903F8043-0254-4EE9-ACC1-CB8AC0AF3423__I2126242) + TableQuery(Box), } impl fmt::Display for TableObject { @@ -10895,6 +10905,7 @@ impl fmt::Display for TableObject { match self { Self::TableName(table_name) => write!(f, "{table_name}"), Self::TableFunction(func) => write!(f, "FUNCTION {func}"), + Self::TableQuery(table_query) => write!(f, "({table_query})"), } } } diff --git a/src/ast/spans.rs b/src/ast/spans.rs index 74f731a78a..78698bbedb 100644 --- a/src/ast/spans.rs +++ b/src/ast/spans.rs @@ -2387,6 +2387,7 @@ impl Spanned for TableObject { union_spans(segments.iter().map(|i| i.span())) } TableObject::TableFunction(func) => func.span(), + TableObject::TableQuery(query) => query.span(), } } } diff --git a/src/dialect/mod.rs b/src/dialect/mod.rs index 8703e402cf..fed81b60a4 100644 --- a/src/dialect/mod.rs +++ b/src/dialect/mod.rs @@ -1247,6 +1247,13 @@ pub trait Dialect: Debug + Any { false } + /// Does the dialect support table queries in insertion? + /// + /// e.g. `SELECT INTO () ...` + fn supports_insert_table_query(&self) -> bool { + false + } + /// Does the dialect support insert formats, e.g. `INSERT INTO ... FORMAT ` fn supports_insert_format(&self) -> bool { false diff --git a/src/dialect/oracle.rs b/src/dialect/oracle.rs index dce0493d3b..ccbef5b621 100644 --- a/src/dialect/oracle.rs +++ b/src/dialect/oracle.rs @@ -114,4 +114,9 @@ impl Dialect for OracleDialect { fn supports_insert_table_alias(&self) -> bool { true } + + /// See + fn supports_insert_table_query(&self) -> bool { + true + } } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 6adecb0c66..f617caddce 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -12688,6 +12688,9 @@ impl<'a> Parser<'a> { let fn_name = self.parse_object_name(false)?; self.parse_function_call(fn_name) .map(TableObject::TableFunction) + } else if self.dialect.supports_insert_table_query() && self.peek_subquery_or_cte_start() { + self.parse_parenthesized(|p| p.parse_query()) + .map(TableObject::TableQuery) } else { self.parse_object_name(false).map(TableObject::TableName) } @@ -17601,9 +17604,44 @@ impl<'a> Parser<'a> { /// Returns true if the immediate tokens look like the /// beginning of a subquery. `(SELECT ...` fn peek_subquery_start(&mut self) -> bool { - let [maybe_lparen, maybe_select] = self.peek_tokens(); - Token::LParen == maybe_lparen - && matches!(maybe_select, Token::Word(w) if w.keyword == Keyword::SELECT) + matches!( + self.peek_tokens_ref(), + [ + TokenWithSpan { + token: Token::LParen, + .. + }, + TokenWithSpan { + token: Token::Word(Word { + keyword: Keyword::SELECT, + .. + }), + .. + }, + ] + ) + } + + /// Returns true if the immediate tokens look like the + /// beginning of a subquery possibly preceded by CTEs; + /// i.e. `(WITH ...` or `(SELECT ...`. + fn peek_subquery_or_cte_start(&mut self) -> bool { + matches!( + self.peek_tokens_ref(), + [ + TokenWithSpan { + token: Token::LParen, + .. + }, + TokenWithSpan { + token: Token::Word(Word { + keyword: Keyword::SELECT | Keyword::WITH, + .. + }), + .. + }, + ] + ) } fn parse_conflict_clause(&mut self) -> Option { diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 08fb6107fc..6f9e469599 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -13525,6 +13525,40 @@ fn insert_into_with_parentheses() { dialects.verified_stmt(r#"INSERT INTO t1 ("select", name) (SELECT t2.name FROM t2)"#); } +#[test] +fn test_insert_with_query_table() { + let dialects = all_dialects_where(|d| d.supports_insert_table_query()); + + // a simple query (block); i.e. SELECT ... + let sql = "INSERT INTO (SELECT employee_id, last_name FROM employees) VALUES (207, 'Gregory')"; + dialects.verified_stmt(sql); + + // a full blown query; i.e. `WITH ... SELECT .. ORDER BY ...` + let sql = "INSERT INTO \ + (WITH cte AS (SELECT 1 AS id, 2 AS val FROM dual) SELECT foo_t.id, foo_t.val FROM foo_t \ + WHERE EXISTS (SELECT 1 FROM cte WHERE cte.id = foo_t.id) ORDER BY 1, 2) \ + (id, val) \ + VALUES (1000, 10101)"; + dialects.verified_stmt(sql); + + // an alias to the insert target query table + let sql = "INSERT INTO \ + (WITH cte AS (SELECT 1 AS id, 2 AS val FROM dual) SELECT foo_t.id, foo_t.val FROM foo_t \ + WHERE EXISTS (SELECT 1 FROM cte WHERE cte.id = foo_t.id)) abc \ + (id, val) \ + VALUES (1000, 10101)"; + dialects.verified_stmt(sql); + + // a query table target and a query source + let sql = "INSERT INTO (SELECT foo_t.id, foo_t.val FROM foo_t) SELECT 10, 20 FROM dual"; + dialects.verified_stmt(sql); + + // a query table target and a query source, with explicit columns + let sql = + "INSERT INTO (SELECT foo_t.id, foo_t.val FROM foo_t) (id, val) SELECT 10, 20 FROM dual"; + dialects.verified_stmt(sql); +} + #[test] fn parse_odbc_scalar_function() { let select = verified_only_select("SELECT {fn my_func(1, 2)}"); From 1097a0d5a6a852737b55f96bd38fba3f2ece9bd0 Mon Sep 17 00:00:00 2001 From: Andriy Romanov Date: Fri, 20 Mar 2026 03:03:55 -0700 Subject: [PATCH 116/121] Fixed BACKUP parsing for redshift (#2270) --- src/ast/ddl.rs | 6 ++++++ src/ast/helpers/stmt_create_table.rs | 10 ++++++++++ src/ast/spans.rs | 1 + src/keywords.rs | 2 ++ src/parser/mod.rs | 9 +++++++++ tests/sqlparser_duckdb.rs | 1 + tests/sqlparser_mssql.rs | 2 ++ tests/sqlparser_postgres.rs | 1 + tests/sqlparser_redshift.rs | 11 +++++++++++ 9 files changed, 43 insertions(+) diff --git a/src/ast/ddl.rs b/src/ast/ddl.rs index f0e79e739c..879740f03b 100644 --- a/src/ast/ddl.rs +++ b/src/ast/ddl.rs @@ -3057,6 +3057,9 @@ pub struct CreateTable { /// Redshift `SORTKEY` option /// pub sortkey: Option>, + /// Redshift `BACKUP` option: `BACKUP { YES | NO }` + /// + pub backup: Option, } impl fmt::Display for CreateTable { @@ -3360,6 +3363,9 @@ impl fmt::Display for CreateTable { if self.strict { write!(f, " STRICT")?; } + if let Some(backup) = self.backup { + write!(f, " BACKUP {}", if backup { "YES" } else { "NO" })?; + } if let Some(diststyle) = &self.diststyle { write!(f, " DISTSTYLE {diststyle}")?; } diff --git a/src/ast/helpers/stmt_create_table.rs b/src/ast/helpers/stmt_create_table.rs index 29589e2198..ab2feb6930 100644 --- a/src/ast/helpers/stmt_create_table.rs +++ b/src/ast/helpers/stmt_create_table.rs @@ -181,6 +181,8 @@ pub struct CreateTableBuilder { pub distkey: Option, /// Redshift `SORTKEY` option. pub sortkey: Option>, + /// Redshift `BACKUP` option. + pub backup: Option, } impl CreateTableBuilder { @@ -245,6 +247,7 @@ impl CreateTableBuilder { diststyle: None, distkey: None, sortkey: None, + backup: None, } } /// Set `OR REPLACE` for the CREATE TABLE statement. @@ -548,6 +551,11 @@ impl CreateTableBuilder { self.sortkey = sortkey; self } + /// Set the Redshift `BACKUP` option. + pub fn backup(mut self, backup: Option) -> Self { + self.backup = backup; + self + } /// Consume the builder and produce a `CreateTable`. pub fn build(self) -> CreateTable { CreateTable { @@ -609,6 +617,7 @@ impl CreateTableBuilder { diststyle: self.diststyle, distkey: self.distkey, sortkey: self.sortkey, + backup: self.backup, } } } @@ -689,6 +698,7 @@ impl From for CreateTableBuilder { diststyle: table.diststyle, distkey: table.distkey, sortkey: table.sortkey, + backup: table.backup, } } } diff --git a/src/ast/spans.rs b/src/ast/spans.rs index 78698bbedb..2af57d98ef 100644 --- a/src/ast/spans.rs +++ b/src/ast/spans.rs @@ -589,6 +589,7 @@ impl Spanned for CreateTable { diststyle: _, distkey: _, sortkey: _, + backup: _, } = self; union_spans( diff --git a/src/keywords.rs b/src/keywords.rs index 94458ccb4f..f0f37b1c02 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -145,6 +145,7 @@ define_keywords!( AVG, AVG_ROW_LENGTH, AVRO, + BACKUP, BACKWARD, BASE64, BASE_LOCATION, @@ -1171,6 +1172,7 @@ define_keywords!( XOR, YEAR, YEARS, + YES, ZONE, ZORDER, ZSTD diff --git a/src/parser/mod.rs b/src/parser/mod.rs index f617caddce..cefc0c6f63 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -8443,6 +8443,14 @@ impl<'a> Parser<'a> { let strict = self.parse_keyword(Keyword::STRICT); + // Redshift: BACKUP YES|NO + let backup = if self.parse_keyword(Keyword::BACKUP) { + let keyword = self.expect_one_of_keywords(&[Keyword::YES, Keyword::NO])?; + Some(keyword == Keyword::YES) + } else { + None + }; + // Redshift: DISTSTYLE, DISTKEY, SORTKEY let diststyle = if self.parse_keyword(Keyword::DISTSTYLE) { Some(self.parse_dist_style()?) @@ -8505,6 +8513,7 @@ impl<'a> Parser<'a> { .table_options(create_table_config.table_options) .primary_key(primary_key) .strict(strict) + .backup(backup) .diststyle(diststyle) .distkey(distkey) .sortkey(sortkey) diff --git a/tests/sqlparser_duckdb.rs b/tests/sqlparser_duckdb.rs index 671e92b979..df62685808 100644 --- a/tests/sqlparser_duckdb.rs +++ b/tests/sqlparser_duckdb.rs @@ -793,6 +793,7 @@ fn test_duckdb_union_datatype() { diststyle: Default::default(), distkey: Default::default(), sortkey: Default::default(), + backup: Default::default(), }), stmt ); diff --git a/tests/sqlparser_mssql.rs b/tests/sqlparser_mssql.rs index 07dd0fcb61..733923f4dc 100644 --- a/tests/sqlparser_mssql.rs +++ b/tests/sqlparser_mssql.rs @@ -2011,6 +2011,7 @@ fn parse_create_table_with_valid_options() { diststyle: None, distkey: None, sortkey: None, + backup: None, }) ); } @@ -2184,6 +2185,7 @@ fn parse_create_table_with_identity_column() { diststyle: None, distkey: None, sortkey: None, + backup: None, }), ); } diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 6b4f35d78b..9a4ff418e9 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -6503,6 +6503,7 @@ fn parse_trigger_related_functions() { diststyle: None, distkey: None, sortkey: None, + backup: None, } ); diff --git a/tests/sqlparser_redshift.rs b/tests/sqlparser_redshift.rs index 184aa5b69a..319a818cdf 100644 --- a/tests/sqlparser_redshift.rs +++ b/tests/sqlparser_redshift.rs @@ -500,3 +500,14 @@ fn test_alter_table_alter_sortkey() { redshift().verified_stmt("ALTER TABLE users ALTER SORTKEY(created_at)"); redshift().verified_stmt("ALTER TABLE users ALTER SORTKEY(c1, c2)"); } + +#[test] +fn test_create_table_backup() { + redshift().verified_stmt("CREATE TABLE public.users (id INT, name VARCHAR(255)) BACKUP YES"); + + redshift().verified_stmt("CREATE TABLE staging.events (event_id INT) BACKUP NO"); + + redshift().verified_stmt( + "CREATE TABLE public.users_backup_test BACKUP YES DISTSTYLE AUTO AS SELECT id, name, email FROM public.users", + ); +} From 7c4eac3098063d191337ecb9ac0f695ac205de67 Mon Sep 17 00:00:00 2001 From: Dmitrii Blaginin Date: Tue, 24 Mar 2026 04:39:25 +0000 Subject: [PATCH 117/121] recursive protection for `parse_subexpr` (#2282) --- src/parser/mod.rs | 1 + tests/sqlparser_common.rs | 28 ++++++++++++++++++++++++++++ 2 files changed, 29 insertions(+) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index cefc0c6f63..3a970f7a1a 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1386,6 +1386,7 @@ impl<'a> Parser<'a> { } /// Parse tokens until the precedence changes. + #[cfg_attr(feature = "recursive-protection", recursive::recursive)] pub fn parse_subexpr(&mut self, precedence: u8) -> Result { let _guard = self.recursion_counter.try_decrease()?; debug!("parsing expr"); diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 6f9e469599..cff29bfecd 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -15687,6 +15687,34 @@ fn overflow() { let statement = statements.pop().unwrap(); assert_eq!(statement.to_string(), sql); } + +#[test] +fn parse_deeply_nested_boolean_expr_does_not_stackoverflow() { + fn build_nested_expr(depth: usize) -> String { + if depth == 0 { + return "x = 1".to_string(); + } + format!( + "({} OR {} AND ({}))", + build_nested_expr(0), + build_nested_expr(0), + build_nested_expr(depth - 1) + ) + } + + let depth = 200; + let where_clause = build_nested_expr(depth); + let sql = format!("SELECT pk FROM tab0 WHERE {where_clause}"); + + let mut statements = Parser::new(&GenericDialect {}) + .try_with_sql(&sql) + .expect("tokenize to work") + .with_recursion_limit(depth * 10) + .parse_statements() + .unwrap(); + let statement = statements.pop().unwrap(); + assert_eq!(statement.to_string(), sql); +} #[test] fn parse_select_without_projection() { let dialects = all_dialects_where(|d| d.supports_empty_projections()); From df0d56cfa28ae556a4dd3740c6150af46415bf12 Mon Sep 17 00:00:00 2001 From: Ayman Elkfrawy <120422207+ayman-sigma@users.noreply.github.com> Date: Thu, 26 Mar 2026 08:24:15 -0700 Subject: [PATCH 118/121] Fix the tokenization of `<` edge cases (#2280) --- src/tokenizer.rs | 110 +++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 102 insertions(+), 8 deletions(-) diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 5ca686d46c..c055db8fe5 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -1627,6 +1627,9 @@ impl<'a> Tokenizer<'a> { chars.next(); match chars.peek() { Some('>') => self.consume_for_binop(chars, "<=>", Token::Spaceship), + // `<=+` and `<=-` are not valid combined operators; treat `<=` as + // the operator and leave `+`/`-` to be tokenized separately. + Some('+') | Some('-') => Ok(Some(Token::LtEq)), _ => self.start_binop(chars, "<=", Token::LtEq), } } @@ -1646,13 +1649,15 @@ impl<'a> Tokenizer<'a> { } } Some('<') => self.consume_for_binop(chars, "<<", Token::ShiftLeft), + // `<+` is not a valid combined operator; treat `<` as the operator + // and leave `+` to be tokenized separately. + Some('+') => Ok(Some(Token::Lt)), Some('-') if self.dialect.supports_geometric_types() => { - chars.next(); // consume - match chars.peek() { - Some('>') => { - self.consume_for_binop(chars, "<->", Token::TwoWayArrow) - } - _ => self.start_binop_opt(chars, "<-", None), + if chars.peekable.clone().nth(1) == Some('>') { + chars.next(); // consume `-` + self.consume_for_binop(chars, "<->", Token::TwoWayArrow) + } else { + Ok(Some(Token::Lt)) } } Some('^') if self.dialect.supports_geometric_types() => { @@ -2628,9 +2633,10 @@ fn take_char_from_hex_digits( mod tests { use super::*; use crate::dialect::{ - BigQueryDialect, ClickHouseDialect, HiveDialect, MsSqlDialect, MySqlDialect, SQLiteDialect, + BigQueryDialect, ClickHouseDialect, HiveDialect, MsSqlDialect, MySqlDialect, + PostgreSqlDialect, SQLiteDialect, }; - use crate::test_utils::{all_dialects_except, all_dialects_where}; + use crate::test_utils::{all_dialects, all_dialects_except, all_dialects_where}; use core::fmt::Debug; #[test] @@ -4420,4 +4426,92 @@ mod tests { tokens, ); } + + #[test] + fn tokenize_lt() { + all_dialects().tokenizes_to( + "select a <-50", + vec![ + Token::make_keyword("select"), + Token::Whitespace(Whitespace::Space), + Token::make_word("a", None), + Token::Whitespace(Whitespace::Space), + Token::Lt, + Token::Minus, + Token::Number("50".to_string(), false), + ], + ); + all_dialects().tokenizes_to( + "select a <+50", + vec![ + Token::make_keyword("select"), + Token::Whitespace(Whitespace::Space), + Token::make_word("a", None), + Token::Whitespace(Whitespace::Space), + Token::Lt, + Token::Plus, + Token::Number("50".to_string(), false), + ], + ); + all_dialects().tokenizes_to( + "select a <=-50", + vec![ + Token::make_keyword("select"), + Token::Whitespace(Whitespace::Space), + Token::make_word("a", None), + Token::Whitespace(Whitespace::Space), + Token::LtEq, + Token::Minus, + Token::Number("50".to_string(), false), + ], + ); + all_dialects().tokenizes_to( + "select a <=+50", + vec![ + Token::make_keyword("select"), + Token::Whitespace(Whitespace::Space), + Token::make_word("a", None), + Token::Whitespace(Whitespace::Space), + Token::LtEq, + Token::Plus, + Token::Number("50".to_string(), false), + ], + ); + all_dialects_where(|d| d.supports_geometric_types()).tokenizes_to( + "select a <->b", + vec![ + Token::make_keyword("select"), + Token::Whitespace(Whitespace::Space), + Token::make_word("a", None), + Token::Whitespace(Whitespace::Space), + Token::TwoWayArrow, + Token::make_word("b", None), + ], + ); + + all_dialects().tokenizes_to( + "select a <-b", + vec![ + Token::make_keyword("select"), + Token::Whitespace(Whitespace::Space), + Token::make_word("a", None), + Token::Whitespace(Whitespace::Space), + Token::Lt, + Token::Minus, + Token::make_word("b", None), + ], + ); + all_dialects().tokenizes_to( + "select a <+b", + vec![ + Token::make_keyword("select"), + Token::Whitespace(Whitespace::Space), + Token::make_word("a", None), + Token::Whitespace(Whitespace::Space), + Token::Lt, + Token::Plus, + Token::make_word("b", None), + ], + ); + } } From 6f8e7b85c2570bff270989a2dfb93f7c4921e854 Mon Sep 17 00:00:00 2001 From: xitep Date: Fri, 27 Mar 2026 10:37:44 +0100 Subject: [PATCH 119/121] Expose values through ValueWithSpan (#2281) --- src/ast/helpers/key_value_options.rs | 6 +- src/ast/mod.rs | 46 +++++------ src/ast/query.rs | 16 ++-- src/ast/spans.rs | 12 +-- src/ast/value.rs | 31 +++++-- src/ast/visitor.rs | 18 ++--- src/dialect/snowflake.rs | 15 ++-- src/parser/alter.rs | 2 +- src/parser/mod.rs | 109 +++++++++++++------------ tests/sqlparser_common.rs | 80 ++++++++++-------- tests/sqlparser_mssql.rs | 4 +- tests/sqlparser_mysql.rs | 11 ++- tests/sqlparser_postgres.rs | 2 +- tests/sqlparser_redshift.rs | 2 +- tests/sqlparser_snowflake.rs | 116 +++++++++++++++------------ 15 files changed, 256 insertions(+), 214 deletions(-) diff --git a/src/ast/helpers/key_value_options.rs b/src/ast/helpers/key_value_options.rs index e8e543b01d..2aa59d9d7c 100644 --- a/src/ast/helpers/key_value_options.rs +++ b/src/ast/helpers/key_value_options.rs @@ -29,7 +29,7 @@ use serde::{Deserialize, Serialize}; #[cfg(feature = "visitor")] use sqlparser_derive::{Visit, VisitMut}; -use crate::ast::{display_comma_separated, display_separated, Value}; +use crate::ast::{display_comma_separated, display_separated, ValueWithSpan}; #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] @@ -75,9 +75,9 @@ pub struct KeyValueOption { /// The kind of value for a key-value option. pub enum KeyValueOptionKind { /// A single value. - Single(Value), + Single(ValueWithSpan), /// Multiple values. - Multi(Vec), + Multi(Vec), /// A nested list of key-value options. KeyValueOptions(Box), } diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 36c41d6ca2..d7a3679a46 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -624,9 +624,9 @@ impl fmt::Display for MapEntry { #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum CastFormat { /// A simple cast format specified by a `Value`. - Value(Value), + Value(ValueWithSpan), /// A cast format with an explicit time zone: `(format, timezone)`. - ValueAtTimeZone(Value, Value), + ValueAtTimeZone(ValueWithSpan, ValueWithSpan), } /// An element of a JSON path. @@ -778,7 +778,7 @@ pub enum CeilFloorKind { /// `CEIL( TO )` DateTimeField(DateTimeField), /// `CEIL( [, ])` - Scale(Value), + Scale(ValueWithSpan), } /// A WHEN clause in a CASE expression containing both @@ -956,7 +956,7 @@ pub enum Expr { /// Pattern expression. pattern: Box, /// Optional escape character. - escape_char: Option, + escape_char: Option, }, /// `ILIKE` (case-insensitive `LIKE`) ILike { @@ -970,7 +970,7 @@ pub enum Expr { /// Pattern expression. pattern: Box, /// Optional escape character. - escape_char: Option, + escape_char: Option, }, /// `SIMILAR TO` regex SimilarTo { @@ -981,7 +981,7 @@ pub enum Expr { /// Pattern expression. pattern: Box, /// Optional escape character. - escape_char: Option, + escape_char: Option, }, /// MySQL: `RLIKE` regex or `REGEXP` regex RLike { @@ -1146,12 +1146,12 @@ pub enum Expr { /// TRIM(, [, characters]) -- PostgreSQL, DuckDB, Snowflake, BigQuery, Generic /// ``` Trim { - /// The expression to trim from. - expr: Box, /// Which side to trim: `BOTH`, `LEADING`, or `TRAILING`. trim_where: Option, - /// Optional expression specifying what to trim from the value. + /// Optional expression specifying what to trim from the value `expr`. trim_what: Option>, + /// The expression to trim from. + expr: Box, /// Optional list of characters to trim (dialect-specific). trim_characters: Option>, }, @@ -1292,7 +1292,7 @@ pub enum Expr { /// `(, , ...)`. columns: Vec, /// ``. - match_value: Value, + match_value: ValueWithSpan, /// `` opt_search_modifier: Option, }, @@ -3295,7 +3295,7 @@ pub enum Set { /// Transaction modes (e.g., ISOLATION LEVEL, READ ONLY). modes: Vec, /// Optional snapshot value for transaction snapshot control. - snapshot: Option, + snapshot: Option, /// `true` when the `SESSION` keyword was used. session: bool, }, @@ -4630,7 +4630,7 @@ pub enum Statement { /// Pragma name (possibly qualified). name: ObjectName, /// Optional pragma value. - value: Option, + value: Option, /// Whether the pragma used `=`. is_eq: bool, }, @@ -6752,7 +6752,7 @@ pub enum FetchDirection { /// Fetch a specific count of rows. Count { /// The limit value for the count. - limit: Value, + limit: ValueWithSpan, }, /// Fetch the next row. Next, @@ -6765,12 +6765,12 @@ pub enum FetchDirection { /// Fetch an absolute row by index. Absolute { /// The absolute index value. - limit: Value, + limit: ValueWithSpan, }, /// Fetch a row relative to the current position. Relative { /// The relative offset value. - limit: Value, + limit: ValueWithSpan, }, /// Fetch all rows. All, @@ -6779,7 +6779,7 @@ pub enum FetchDirection { /// Fetch forward by an optional limit. Forward { /// Optional forward limit. - limit: Option, + limit: Option, }, /// Fetch all forward rows. ForwardAll, @@ -6788,7 +6788,7 @@ pub enum FetchDirection { /// Fetch backward by an optional limit. Backward { /// Optional backward limit. - limit: Option, + limit: Option, }, /// Fetch all backward rows. BackwardAll, @@ -8116,7 +8116,7 @@ pub enum FunctionArgumentClause { /// The `SEPARATOR` clause to the [`GROUP_CONCAT`] function in MySQL. /// /// [`GROUP_CONCAT`]: https://dev.mysql.com/doc/refman/8.0/en/aggregate-functions.html#function_group-concat - Separator(Value), + Separator(ValueWithSpan), /// The `ON NULL` clause for some JSON functions. /// /// [MSSQL `JSON_ARRAY`](https://learn.microsoft.com/en-us/sql/t-sql/functions/json-array-transact-sql?view=sql-server-ver16) @@ -9465,7 +9465,7 @@ impl fmt::Display for CopyLegacyOption { #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct FileSize { /// Numeric size value. - pub size: Value, + pub size: ValueWithSpan, /// Optional unit for the size (MB or GB). pub unit: Option, } @@ -10654,11 +10654,11 @@ pub struct ShowStatementOptions { /// Optional scope to show in (for example: TABLE, SCHEMA). pub show_in: Option, /// Optional `STARTS WITH` filter value. - pub starts_with: Option, + pub starts_with: Option, /// Optional `LIMIT` expression. pub limit: Option, /// Optional `FROM` value used with `LIMIT`. - pub limit_from: Option, + pub limit_from: Option, /// Optional filter position (infix or suffix) for `LIKE`/`FILTER`. pub filter_position: Option, } @@ -11474,7 +11474,7 @@ pub struct AlterUserRemoveRoleDelegation { #[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub struct AlterUserAddMfaMethodOtp { /// Optional OTP count parameter. - pub count: Option, + pub count: Option, } /// ```sql @@ -11795,7 +11795,7 @@ pub struct VacuumStatement { /// Optional table to run `VACUUM` on. pub table_name: Option, /// Optional threshold value (percent) for `TO threshold PERCENT`. - pub threshold: Option, + pub threshold: Option, /// Whether `BOOST` was specified. pub boost: bool, } diff --git a/src/ast/query.rs b/src/ast/query.rs index ca74db4405..a52d518b1f 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -1552,7 +1552,7 @@ pub enum TableFactor { json_expr: Expr, /// The path to the array or object to be iterated over. /// It must evaluate to a json array or object. - json_path: Value, + json_path: ValueWithSpan, /// The columns to be extracted from each element of the array or object. /// Each column must have a name and a type. columns: Vec, @@ -1573,7 +1573,7 @@ pub enum TableFactor { json_expr: Expr, /// The path to the array or object to be iterated over. /// It must evaluate to a json array or object. - json_path: Option, + json_path: Option, /// The columns to be extracted from each element of the array or object. /// Each column must have a name and a type. columns: Vec, @@ -1833,7 +1833,7 @@ pub struct TableSampleSeed { /// Seed modifier (e.g. `REPEATABLE` or `SEED`). pub modifier: TableSampleSeedModifier, /// The seed value expression. - pub value: Value, + pub value: ValueWithSpan, } impl fmt::Display for TableSampleSeed { @@ -1889,9 +1889,9 @@ impl fmt::Display for TableSampleUnit { /// Bucket-based sampling clause: `BUCKET OUT OF [ON ]`. pub struct TableSampleBucket { /// The bucket index expression. - pub bucket: Value, + pub bucket: ValueWithSpan, /// The total number of buckets expression. - pub total: Value, + pub total: ValueWithSpan, /// Optional `ON ` specification. pub on: Option, } @@ -3979,7 +3979,7 @@ impl fmt::Display for JsonTableColumn { /// A nested column in a `JSON_TABLE` column list. pub struct JsonTableNestedColumn { /// JSON path expression (must be a literal `Value`). - pub path: Value, + pub path: ValueWithSpan, /// Columns extracted from the matched nested array. pub columns: Vec, } @@ -4011,7 +4011,7 @@ pub struct JsonTableNamedColumn { /// The type of the column to be extracted. pub r#type: DataType, /// The path to the column to be extracted. Must be a literal string. - pub path: Value, + pub path: ValueWithSpan, /// true if the column is a boolean set to true if the given path exists pub exists: bool, /// The empty handling clause of the column @@ -4050,7 +4050,7 @@ pub enum JsonTableColumnErrorHandling { /// `NULL` — return NULL when the path does not match. Null, /// `DEFAULT ` — use the provided `Value` as a default. - Default(Value), + Default(ValueWithSpan), /// `ERROR` — raise an error. Error, } diff --git a/src/ast/spans.rs b/src/ast/spans.rs index 2af57d98ef..d80a3f4d54 100644 --- a/src/ast/spans.rs +++ b/src/ast/spans.rs @@ -46,8 +46,8 @@ use super::{ RenameSelectItem, ReplaceSelectElement, ReplaceSelectItem, Select, SelectInto, SelectItem, SetExpr, SqlOption, Statement, Subscript, SymbolDefinition, TableAlias, TableAliasColumnDef, TableConstraint, TableFactor, TableObject, TableOptionsClustered, TableWithJoins, Update, - UpdateTableFromKind, Use, Value, Values, ViewColumnDef, WhileStatement, - WildcardAdditionalOptions, With, WithFill, + UpdateTableFromKind, Use, Values, ViewColumnDef, WhileStatement, WildcardAdditionalOptions, + With, WithFill, }; /// Given an iterator of spans, return the [Span::union] of all spans. @@ -2185,13 +2185,6 @@ impl Spanned for ValueWithSpan { } } -/// The span is stored in the `ValueWrapper` struct -impl Spanned for Value { - fn span(&self) -> Span { - Span::empty() // # todo: Value needs to store spans before this is possible - } -} - impl Spanned for Join { fn span(&self) -> Span { let Join { @@ -2565,6 +2558,7 @@ impl Spanned for comments::CommentWithSpan { #[cfg(test)] pub mod tests { + use crate::ast::Value; use crate::dialect::{Dialect, GenericDialect, SnowflakeDialect}; use crate::parser::Parser; use crate::tokenizer::{Location, Span}; diff --git a/src/ast/value.rs b/src/ast/value.rs index 8879a252b6..5f069f36cc 100644 --- a/src/ast/value.rs +++ b/src/ast/value.rs @@ -18,7 +18,10 @@ #[cfg(not(feature = "std"))] use alloc::string::String; -use core::fmt; +use core::{ + fmt, + ops::{Deref, DerefMut}, +}; #[cfg(feature = "bigdecimal")] use bigdecimal::BigDecimal; @@ -67,7 +70,11 @@ use sqlparser_derive::{Visit, VisitMut}; /// A `Value` paired with its source `Span` location. #[derive(Debug, Clone, Eq)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] -#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] +#[cfg_attr( + feature = "visitor", + derive(Visit, VisitMut), + visit(with = "visit_value") +)] pub struct ValueWithSpan { /// The wrapped `Value`. pub value: Value, @@ -111,14 +118,24 @@ impl From for Value { } } +impl Deref for ValueWithSpan { + type Target = Value; + + fn deref(&self) -> &Self::Target { + &self.value + } +} + +impl DerefMut for ValueWithSpan { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.value + } +} + /// Primitive SQL values such as number and string #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] -#[cfg_attr( - feature = "visitor", - derive(Visit, VisitMut), - visit(with = "visit_value") -)] +#[cfg_attr(feature = "visitor", derive(Visit, VisitMut))] pub enum Value { /// Numeric literal #[cfg(not(feature = "bigdecimal"))] diff --git a/src/ast/visitor.rs b/src/ast/visitor.rs index 5f9b374896..30673dfa03 100644 --- a/src/ast/visitor.rs +++ b/src/ast/visitor.rs @@ -17,7 +17,7 @@ //! Recursive visitors for ast Nodes. See [`Visitor`] for more details. -use crate::ast::{Expr, ObjectName, Query, Select, Statement, TableFactor, Value}; +use crate::ast::{Expr, ObjectName, Query, Select, Statement, TableFactor, ValueWithSpan}; use core::ops::ControlFlow; /// A type that can be visited by a [`Visitor`]. See [`Visitor`] for @@ -258,12 +258,12 @@ pub trait Visitor { } /// Invoked for any Value that appear in the AST before visiting children - fn pre_visit_value(&mut self, _value: &Value) -> ControlFlow { + fn pre_visit_value(&mut self, _value: &ValueWithSpan) -> ControlFlow { ControlFlow::Continue(()) } /// Invoked for any Value that appear in the AST after visiting children - fn post_visit_value(&mut self, _value: &Value) -> ControlFlow { + fn post_visit_value(&mut self, _value: &ValueWithSpan) -> ControlFlow { ControlFlow::Continue(()) } } @@ -386,12 +386,12 @@ pub trait VisitorMut { } /// Invoked for any value that appear in the AST before visiting children - fn pre_visit_value(&mut self, _value: &mut Value) -> ControlFlow { + fn pre_visit_value(&mut self, _value: &mut ValueWithSpan) -> ControlFlow { ControlFlow::Continue(()) } /// Invoked for any statements that appear in the AST after visiting children - fn post_visit_value(&mut self, _value: &mut Value) -> ControlFlow { + fn post_visit_value(&mut self, _value: &mut ValueWithSpan) -> ControlFlow { ControlFlow::Continue(()) } } @@ -1015,7 +1015,7 @@ mod tests { #[cfg(test)] mod visit_mut_tests { - use crate::ast::{Statement, Value, VisitMut, VisitorMut}; + use crate::ast::{Statement, Value, ValueWithSpan, VisitMut, VisitorMut}; use crate::dialect::GenericDialect; use crate::parser::Parser; use crate::tokenizer::Tokenizer; @@ -1029,13 +1029,13 @@ mod visit_mut_tests { impl VisitorMut for MutatorVisitor { type Break = (); - fn pre_visit_value(&mut self, value: &mut Value) -> ControlFlow { + fn pre_visit_value(&mut self, value: &mut ValueWithSpan) -> ControlFlow { self.index += 1; - *value = Value::SingleQuotedString(format!("REDACTED_{}", self.index)); + value.value = Value::SingleQuotedString(format!("REDACTED_{}", self.index)); ControlFlow::Continue(()) } - fn post_visit_value(&mut self, _value: &mut Value) -> ControlFlow { + fn post_visit_value(&mut self, _value: &mut ValueWithSpan) -> ControlFlow { ControlFlow::Continue(()) } } diff --git a/src/dialect/snowflake.rs b/src/dialect/snowflake.rs index 416e5051de..1ac21d0073 100644 --- a/src/dialect/snowflake.rs +++ b/src/dialect/snowflake.rs @@ -39,8 +39,8 @@ use crate::ast::{ use crate::dialect::{Dialect, Precedence}; use crate::keywords::Keyword; use crate::parser::{IsOptional, Parser, ParserError}; -use crate::tokenizer::Token; use crate::tokenizer::TokenWithSpan; +use crate::tokenizer::{Span, Token}; #[cfg(not(feature = "std"))] use alloc::boxed::Box; #[cfg(not(feature = "std"))] @@ -1634,8 +1634,8 @@ fn parse_session_options( let mut options: Vec = Vec::new(); let empty = String::new; loop { - let next_token = parser.peek_token(); - match next_token.token { + let peeked_token = parser.peek_token(); + match peeked_token.token { Token::SemiColon | Token::EOF => break, Token::Comma => { parser.advance_token(); @@ -1649,12 +1649,17 @@ fn parse_session_options( } else { options.push(KeyValueOption { option_name: key.value, - option_value: KeyValueOptionKind::Single(Value::Placeholder(empty())), + option_value: KeyValueOptionKind::Single( + Value::Placeholder(empty()).with_span(Span { + start: peeked_token.span.end, + end: peeked_token.span.end, + }), + ), }); } } _ => { - return parser.expected("another option or end of statement", next_token); + return parser.expected("another option or end of statement", peeked_token); } } } diff --git a/src/parser/alter.rs b/src/parser/alter.rs index ce1220e166..4000eb26ba 100644 --- a/src/parser/alter.rs +++ b/src/parser/alter.rs @@ -219,7 +219,7 @@ impl Parser<'_> { if self.parse_keywords(&[Keyword::ADD, Keyword::MFA, Keyword::METHOD, Keyword::OTP]) { let count = if self.parse_keyword(Keyword::COUNT) { self.expect_token(&Token::Eq)?; - Some(self.parse_value()?.into()) + Some(self.parse_value()?) } else { None }; diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 3a970f7a1a..6282ed3d72 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -2689,7 +2689,7 @@ impl<'a> Parser<'a> { /// Parse an optional `FORMAT` clause for `CAST` expressions. pub fn parse_optional_cast_format(&mut self) -> Result, ParserError> { if self.parse_keyword(Keyword::FORMAT) { - let value = self.parse_value()?.value; + let value = self.parse_value()?; match self.parse_optional_time_zone()? { Some(tz) => Ok(Some(CastFormat::ValueAtTimeZone(value, tz))), None => Ok(Some(CastFormat::Value(value))), @@ -2700,9 +2700,9 @@ impl<'a> Parser<'a> { } /// Parse an optional `AT TIME ZONE` clause. - pub fn parse_optional_time_zone(&mut self) -> Result, ParserError> { + pub fn parse_optional_time_zone(&mut self) -> Result, ParserError> { if self.parse_keywords(&[Keyword::AT, Keyword::TIME, Keyword::ZONE]) { - self.parse_value().map(|v| Some(v.value)) + self.parse_value().map(Some) } else { Ok(None) } @@ -2834,13 +2834,13 @@ impl<'a> Parser<'a> { CeilFloorKind::DateTimeField(self.parse_date_time_field()?) } else if self.consume_token(&Token::Comma) { // Parse `CEIL/FLOOR(expr, scale)` - match self.parse_value()?.value { - Value::Number(n, s) => CeilFloorKind::Scale(Value::Number(n, s)), - _ => { - return Err(ParserError::ParserError( - "Scale field can only be of number type".to_string(), - )) - } + let v = self.parse_value()?; + if matches!(v.value, Value::Number(_, _)) { + CeilFloorKind::Scale(v) + } else { + return Err(ParserError::ParserError( + "Scale field can only be of number type".to_string(), + )); } } else { CeilFloorKind::DateTimeField(DateTimeField::NoDateTime) @@ -3192,7 +3192,7 @@ impl<'a> Parser<'a> { self.expect_token(&Token::LParen)?; // MySQL is too permissive about the value, IMO we can't validate it perfectly on syntax level. - let match_value = self.parse_value()?.value; + let match_value = self.parse_value()?; let in_natural_language_mode_keywords = &[ Keyword::IN, @@ -4088,9 +4088,9 @@ impl<'a> Parser<'a> { } /// Parse the `ESCAPE CHAR` portion of `LIKE`, `ILIKE`, and `SIMILAR TO` - pub fn parse_escape_char(&mut self) -> Result, ParserError> { + pub fn parse_escape_char(&mut self) -> Result, ParserError> { if self.parse_keyword(Keyword::ESCAPE) { - Ok(Some(self.parse_value()?.into())) + Ok(Some(self.parse_value()?)) } else { Ok(None) } @@ -7846,11 +7846,11 @@ impl<'a> Parser<'a> { FetchDirection::Last } else if self.parse_keyword(Keyword::ABSOLUTE) { FetchDirection::Absolute { - limit: self.parse_number_value()?.value, + limit: self.parse_number_value()?, } } else if self.parse_keyword(Keyword::RELATIVE) { FetchDirection::Relative { - limit: self.parse_number_value()?.value, + limit: self.parse_number_value()?, } } else if self.parse_keyword(Keyword::FORWARD) { if self.parse_keyword(Keyword::ALL) { @@ -7858,7 +7858,7 @@ impl<'a> Parser<'a> { } else { FetchDirection::Forward { // TODO: Support optional - limit: Some(self.parse_number_value()?.value), + limit: Some(self.parse_number_value()?), } } } else if self.parse_keyword(Keyword::BACKWARD) { @@ -7867,14 +7867,14 @@ impl<'a> Parser<'a> { } else { FetchDirection::Backward { // TODO: Support optional - limit: Some(self.parse_number_value()?.value), + limit: Some(self.parse_number_value()?), } } } else if self.parse_keyword(Keyword::ALL) { FetchDirection::All } else { FetchDirection::Count { - limit: self.parse_number_value()?.value, + limit: self.parse_number_value()?, } }; @@ -11392,7 +11392,7 @@ impl<'a> Parser<'a> { } Some(Keyword::MAXFILESIZE) => { let _ = self.parse_keyword(Keyword::AS); - let size = self.parse_number_value()?.value; + let size = self.parse_number_value()?; let unit = match self.parse_one_of_keywords(&[Keyword::MB, Keyword::GB]) { Some(Keyword::MB) => Some(FileSizeUnit::MB), Some(Keyword::GB) => Some(FileSizeUnit::GB), @@ -11465,7 +11465,7 @@ impl<'a> Parser<'a> { } fn parse_file_size(&mut self) -> Result { - let size = self.parse_number_value()?.value; + let size = self.parse_number_value()?; let unit = self.maybe_parse_file_size_unit(); Ok(FileSize { size, unit }) } @@ -14847,7 +14847,7 @@ impl<'a> Parser<'a> { .into()); } else if self.parse_keyword(Keyword::TRANSACTION) { if self.parse_keyword(Keyword::SNAPSHOT) { - let snapshot_id = self.parse_value()?.value; + let snapshot_id = self.parse_value()?; return Ok(Set::SetTransaction { modes: vec![], snapshot: Some(snapshot_id), @@ -15682,7 +15682,7 @@ impl<'a> Parser<'a> { } else if self.parse_keyword_with_tokens(Keyword::JSON_TABLE, &[Token::LParen]) { let json_expr = self.parse_expr()?; self.expect_token(&Token::Comma)?; - let json_path = self.parse_value()?.value; + let json_path = self.parse_value()?; self.expect_keyword_is(Keyword::COLUMNS)?; self.expect_token(&Token::LParen)?; let columns = self.parse_comma_separated(Parser::parse_json_table_column_def)?; @@ -15866,9 +15866,9 @@ impl<'a> Parser<'a> { let parenthesized = self.consume_token(&Token::LParen); let (quantity, bucket) = if parenthesized && self.parse_keyword(Keyword::BUCKET) { - let selected_bucket = self.parse_number_value()?.value; + let selected_bucket = self.parse_number_value()?; self.expect_keywords(&[Keyword::OUT, Keyword::OF])?; - let total = self.parse_number_value()?.value; + let total = self.parse_number_value()?; let on = if self.parse_keyword(Keyword::ON) { Some(self.parse_expr()?) } else { @@ -15946,7 +15946,7 @@ impl<'a> Parser<'a> { modifier: TableSampleSeedModifier, ) -> Result { self.expect_token(&Token::LParen)?; - let value = self.parse_number_value()?.value; + let value = self.parse_number_value()?; self.expect_token(&Token::RParen)?; Ok(TableSampleSeed { modifier, value }) } @@ -15957,7 +15957,7 @@ impl<'a> Parser<'a> { self.expect_token(&Token::LParen)?; let json_expr = self.parse_expr()?; let json_path = if self.consume_token(&Token::Comma) { - Some(self.parse_value()?.value) + Some(self.parse_value()?) } else { None }; @@ -16419,7 +16419,7 @@ impl<'a> Parser<'a> { pub fn parse_json_table_column_def(&mut self) -> Result { if self.parse_keyword(Keyword::NESTED) { let _has_path_keyword = self.parse_keyword(Keyword::PATH); - let path = self.parse_value()?.value; + let path = self.parse_value()?; self.expect_keyword_is(Keyword::COLUMNS)?; let columns = self.parse_parenthesized(|p| { p.parse_comma_separated(Self::parse_json_table_column_def) @@ -16437,7 +16437,7 @@ impl<'a> Parser<'a> { let r#type = self.parse_data_type()?; let exists = self.parse_keyword(Keyword::EXISTS); self.expect_keyword_is(Keyword::PATH)?; - let path = self.parse_value()?.value; + let path = self.parse_value()?; let mut on_empty = None; let mut on_error = None; while let Some(error_handling) = self.parse_json_table_column_error_handling()? { @@ -16494,7 +16494,7 @@ impl<'a> Parser<'a> { } else if self.parse_keyword(Keyword::ERROR) { JsonTableColumnErrorHandling::Error } else if self.parse_keyword(Keyword::DEFAULT) { - JsonTableColumnErrorHandling::Default(self.parse_value()?.value) + JsonTableColumnErrorHandling::Default(self.parse_value()?) } else { return Ok(None); }; @@ -17965,7 +17965,7 @@ impl<'a> Parser<'a> { if dialect_of!(self is GenericDialect | MySqlDialect) && self.parse_keyword(Keyword::SEPARATOR) { - clauses.push(FunctionArgumentClause::Separator(self.parse_value()?.value)); + clauses.push(FunctionArgumentClause::Separator(self.parse_value()?)); } if let Some(on_overflow) = self.parse_listagg_on_overflow()? { @@ -19024,12 +19024,13 @@ impl<'a> Parser<'a> { }) } - fn parse_pragma_value(&mut self) -> Result { - match self.parse_value()?.value { - v @ Value::SingleQuotedString(_) => Ok(v), - v @ Value::DoubleQuotedString(_) => Ok(v), - v @ Value::Number(_, _) => Ok(v), - v @ Value::Placeholder(_) => Ok(v), + fn parse_pragma_value(&mut self) -> Result { + let v = self.parse_value()?; + match &v.value { + Value::SingleQuotedString(_) => Ok(v), + Value::DoubleQuotedString(_) => Ok(v), + Value::Number(_, _) => Ok(v), + Value::Placeholder(_) => Ok(v), _ => { self.prev_token(); self.expected_ref("number or string or ? placeholder", self.peek_token_ref()) @@ -19809,7 +19810,7 @@ impl<'a> Parser<'a> { let threshold = if self.parse_keyword(Keyword::TO) { let value = self.parse_value()?; self.expect_keyword(Keyword::PERCENT)?; - Some(value.value) + Some(value) } else { None }; @@ -19937,9 +19938,9 @@ impl<'a> Parser<'a> { })) } - fn maybe_parse_show_stmt_starts_with(&mut self) -> Result, ParserError> { + fn maybe_parse_show_stmt_starts_with(&mut self) -> Result, ParserError> { if self.parse_keywords(&[Keyword::STARTS, Keyword::WITH]) { - Ok(Some(self.parse_value()?.value)) + Ok(Some(self.parse_value()?)) } else { Ok(None) } @@ -19953,9 +19954,9 @@ impl<'a> Parser<'a> { } } - fn maybe_parse_show_stmt_from(&mut self) -> Result, ParserError> { + fn maybe_parse_show_stmt_from(&mut self) -> Result, ParserError> { if self.parse_keyword(Keyword::FROM) { - Ok(Some(self.parse_value()?.value)) + Ok(Some(self.parse_value()?)) } else { Ok(None) } @@ -20018,30 +20019,31 @@ impl<'a> Parser<'a> { key: &Word, ) -> Result { self.expect_token(&Token::Eq)?; - match self.peek_token().token { + let peeked_token = self.peek_token(); + match peeked_token.token { Token::SingleQuotedString(_) => Ok(KeyValueOption { option_name: key.value.clone(), - option_value: KeyValueOptionKind::Single(self.parse_value()?.into()), + option_value: KeyValueOptionKind::Single(self.parse_value()?), }), Token::Word(word) if word.keyword == Keyword::TRUE || word.keyword == Keyword::FALSE => { Ok(KeyValueOption { option_name: key.value.clone(), - option_value: KeyValueOptionKind::Single(self.parse_value()?.into()), + option_value: KeyValueOptionKind::Single(self.parse_value()?), }) } Token::Number(..) => Ok(KeyValueOption { option_name: key.value.clone(), - option_value: KeyValueOptionKind::Single(self.parse_value()?.into()), + option_value: KeyValueOptionKind::Single(self.parse_value()?), }), Token::Word(word) => { self.next_token(); Ok(KeyValueOption { option_name: key.value.clone(), - option_value: KeyValueOptionKind::Single(Value::Placeholder( - word.value.clone(), - )), + option_value: KeyValueOptionKind::Single( + Value::Placeholder(word.value.clone()).with_span(peeked_token.span), + ), }) } Token::LParen => { @@ -20054,13 +20056,10 @@ impl<'a> Parser<'a> { parser.expect_token(&Token::RParen)?; values })? { - Some(values) => { - let values = values.into_iter().map(|v| v.value).collect(); - Ok(KeyValueOption { - option_name: key.value.clone(), - option_value: KeyValueOptionKind::Multi(values), - }) - } + Some(values) => Ok(KeyValueOption { + option_name: key.value.clone(), + option_value: KeyValueOptionKind::Multi(values), + }), None => Ok(KeyValueOption { option_name: key.value.clone(), option_value: KeyValueOptionKind::KeyValueOptions(Box::new( diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index cff29bfecd..17f368bbb7 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -2104,7 +2104,7 @@ fn parse_ilike() { pattern: Box::new(Expr::Value( (Value::SingleQuotedString("%a".to_string())).with_empty_span() )), - escape_char: Some(Value::SingleQuotedString('^'.to_string())), + escape_char: Some(Value::SingleQuotedString('^'.to_string()).with_empty_span()), any: false, }, select.selection.unwrap() @@ -2168,7 +2168,7 @@ fn parse_like() { pattern: Box::new(Expr::Value( (Value::SingleQuotedString("%a".to_string())).with_empty_span() )), - escape_char: Some(Value::SingleQuotedString('^'.to_string())), + escape_char: Some(Value::SingleQuotedString('^'.to_string()).with_empty_span()), any: false, }, select.selection.unwrap() @@ -2231,7 +2231,7 @@ fn parse_similar_to() { pattern: Box::new(Expr::Value( (Value::SingleQuotedString("%a".to_string())).with_empty_span() )), - escape_char: Some(Value::SingleQuotedString('^'.to_string())), + escape_char: Some(Value::SingleQuotedString('^'.to_string()).with_empty_span()), }, select.selection.unwrap() ); @@ -2248,7 +2248,7 @@ fn parse_similar_to() { pattern: Box::new(Expr::Value( (Value::SingleQuotedString("%a".to_string())).with_empty_span() )), - escape_char: Some(Value::Null), + escape_char: Some(Value::Null.with_empty_span()), }, select.selection.unwrap() ); @@ -2266,7 +2266,7 @@ fn parse_similar_to() { pattern: Box::new(Expr::Value( (Value::SingleQuotedString("%a".to_string())).with_empty_span() )), - escape_char: Some(Value::SingleQuotedString('^'.to_string())), + escape_char: Some(Value::SingleQuotedString('^'.to_string()).with_empty_span()), })), select.selection.unwrap() ); @@ -3324,7 +3324,9 @@ fn parse_ceil_scale() { assert_eq!( &Expr::Ceil { expr: Box::new(Expr::Identifier(Ident::new("d"))), - field: CeilFloorKind::Scale(Value::Number(bigdecimal::BigDecimal::from(2), false)), + field: CeilFloorKind::Scale( + Value::Number(bigdecimal::BigDecimal::from(2), false).with_empty_span() + ), }, expr_from_projection(only(&select.projection)), ); @@ -3333,7 +3335,7 @@ fn parse_ceil_scale() { assert_eq!( &Expr::Ceil { expr: Box::new(Expr::Identifier(Ident::new("d"))), - field: CeilFloorKind::Scale(Value::Number(2.to_string(), false)), + field: CeilFloorKind::Scale(Value::Number(2.to_string(), false).with_empty_span()), }, expr_from_projection(only(&select.projection)), ); @@ -3348,7 +3350,9 @@ fn parse_floor_scale() { assert_eq!( &Expr::Floor { expr: Box::new(Expr::Identifier(Ident::new("d"))), - field: CeilFloorKind::Scale(Value::Number(bigdecimal::BigDecimal::from(2), false)), + field: CeilFloorKind::Scale( + Value::Number(bigdecimal::BigDecimal::from(2), false).with_empty_span() + ), }, expr_from_projection(only(&select.projection)), ); @@ -3357,7 +3361,7 @@ fn parse_floor_scale() { assert_eq!( &Expr::Floor { expr: Box::new(Expr::Identifier(Ident::new("d"))), - field: CeilFloorKind::Scale(Value::Number(2.to_string(), false)), + field: CeilFloorKind::Scale(Value::Number(2.to_string(), false).with_empty_span()), }, expr_from_projection(only(&select.projection)), ); @@ -17666,19 +17670,21 @@ fn parse_create_user() { options: vec![ KeyValueOption { option_name: "PASSWORD".to_string(), - option_value: KeyValueOptionKind::Single(Value::SingleQuotedString( - "secret".to_string() - )), + option_value: KeyValueOptionKind::Single( + Value::SingleQuotedString("secret".to_string()).with_empty_span() + ), }, KeyValueOption { option_name: "MUST_CHANGE_PASSWORD".to_string(), - option_value: KeyValueOptionKind::Single(Value::Boolean(false)), + option_value: KeyValueOptionKind::Single( + Value::Boolean(false).with_empty_span() + ), }, KeyValueOption { option_name: "TYPE".to_string(), - option_value: KeyValueOptionKind::Single(Value::Placeholder( - "SERVICE".to_string() - )), + option_value: KeyValueOptionKind::Single( + Value::Placeholder("SERVICE".to_string()).with_empty_span() + ), }, ], }, @@ -17691,15 +17697,15 @@ fn parse_create_user() { options: vec![ KeyValueOption { option_name: "t1".to_string(), - option_value: KeyValueOptionKind::Single(Value::SingleQuotedString( - "v1".to_string() - )), + option_value: KeyValueOptionKind::Single( + Value::SingleQuotedString("v1".to_string()).with_empty_span() + ), }, KeyValueOption { option_name: "t2".to_string(), - option_value: KeyValueOptionKind::Single(Value::SingleQuotedString( - "v2".to_string() - )), + option_value: KeyValueOptionKind::Single( + Value::SingleQuotedString("v2".to_string()).with_empty_span() + ), }, ] } @@ -18325,9 +18331,9 @@ fn test_parse_alter_user() { alter.set_tag.options, vec![KeyValueOption { option_name: "k1".to_string(), - option_value: KeyValueOptionKind::Single(Value::SingleQuotedString( - "v1".to_string() - )), + option_value: KeyValueOptionKind::Single( + Value::SingleQuotedString("v1".to_string()).with_empty_span() + ), },] ); } @@ -18361,17 +18367,21 @@ fn test_parse_alter_user() { options: vec![ KeyValueOption { option_name: "PASSWORD".to_string(), - option_value: KeyValueOptionKind::Single(Value::SingleQuotedString( - "secret".to_string() - )), + option_value: KeyValueOptionKind::Single( + Value::SingleQuotedString("secret".to_string()).with_empty_span() + ), }, KeyValueOption { option_name: "MUST_CHANGE_PASSWORD".to_string(), - option_value: KeyValueOptionKind::Single(Value::Boolean(true)), + option_value: KeyValueOptionKind::Single( + Value::Boolean(true).with_empty_span() + ), }, KeyValueOption { option_name: "MINS_TO_UNLOCK".to_string(), - option_value: KeyValueOptionKind::Single(number("10")), + option_value: KeyValueOptionKind::Single( + number("10").with_empty_span() + ), }, ] } @@ -18398,7 +18408,8 @@ fn test_parse_alter_user() { option_name: "DEFAULT_SECONDARY_ROLES".to_string(), option_value: KeyValueOptionKind::Multi(vec![Value::SingleQuotedString( "ALL".to_string() - )]) + ) + .with_empty_span()]) }] ); } @@ -18422,9 +18433,9 @@ fn test_parse_alter_user() { options: vec![ KeyValueOption { option_name: "TYPE".to_string(), - option_value: KeyValueOptionKind::Single(Value::Placeholder( - "AWS".to_string() - )), + option_value: KeyValueOptionKind::Single( + Value::Placeholder("AWS".to_string()).with_empty_span() + ), }, KeyValueOption { option_name: "ARN".to_string(), @@ -18432,6 +18443,7 @@ fn test_parse_alter_user() { Value::SingleQuotedString( "arn:aws:iam::123456789:r1/".to_string() ) + .with_empty_span() ), }, ] diff --git a/tests/sqlparser_mssql.rs b/tests/sqlparser_mssql.rs index 733923f4dc..e8ed79492d 100644 --- a/tests/sqlparser_mssql.rs +++ b/tests/sqlparser_mssql.rs @@ -496,7 +496,7 @@ fn parse_mssql_openjson() { json_expr: Expr::CompoundIdentifier( vec![Ident::new("A"), Ident::new("param"),] ), - json_path: Some(Value::SingleQuotedString("$.config".into())), + json_path: Some(Value::SingleQuotedString("$.config".into()).with_empty_span()), columns: vec![ OpenJsonTableColumn { name: Ident::new("kind"), @@ -658,7 +658,7 @@ fn parse_mssql_openjson() { json_expr: Expr::CompoundIdentifier( vec![Ident::new("A"), Ident::new("param"),] ), - json_path: Some(Value::SingleQuotedString("$.config".into())), + json_path: Some(Value::SingleQuotedString("$.config".into()).with_empty_span()), columns: vec![], alias: table_alias(true, "B") }, diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index 6c59997c32..269787c295 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -3810,14 +3810,14 @@ fn parse_json_table() { .relation, TableFactor::JsonTable { json_expr: Expr::Value((Value::SingleQuotedString("[1,2]".to_string())).with_empty_span()), - json_path: Value::SingleQuotedString("$[*]".to_string()), + json_path: Value::SingleQuotedString("$[*]".to_string()).with_empty_span(), columns: vec![ JsonTableColumn::Named(JsonTableNamedColumn { name: Ident::new("x"), r#type: DataType::Int(None), - path: Value::SingleQuotedString("$".to_string()), + path: Value::SingleQuotedString("$".to_string()).with_empty_span(), exists: false, - on_empty: Some(JsonTableColumnErrorHandling::Default(Value::SingleQuotedString("0".to_string()))), + on_empty: Some(JsonTableColumnErrorHandling::Default(Value::SingleQuotedString("0".to_string()).with_empty_span())), on_error: Some(JsonTableColumnErrorHandling::Null), }), ], @@ -4233,7 +4233,10 @@ fn parse_match_against_with_alias() { Ident::new("ReferenceID") ])] ); - assert_eq!(match_value, Value::SingleQuotedString("AAA".to_owned())); + assert_eq!( + match_value, + Value::SingleQuotedString("AAA".to_owned()).with_empty_span() + ); assert_eq!(opt_search_modifier, Some(SearchModifier::InBooleanMode)); } _ => unreachable!(), diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 9a4ff418e9..af0f2be334 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -3287,7 +3287,7 @@ fn test_transaction_statement() { statement, Statement::Set(Set::SetTransaction { modes: vec![], - snapshot: Some(Value::SingleQuotedString(String::from("000003A1-1"))), + snapshot: Some(Value::SingleQuotedString(String::from("000003A1-1")).with_empty_span()), session: false }) ); diff --git a/tests/sqlparser_redshift.rs b/tests/sqlparser_redshift.rs index 319a818cdf..e68368fd48 100644 --- a/tests/sqlparser_redshift.rs +++ b/tests/sqlparser_redshift.rs @@ -446,7 +446,7 @@ fn parse_vacuum() { Ident::new("tbl1"), ])) ); - assert_eq!(v.threshold, Some(number("20"))); + assert_eq!(v.threshold, Some(number("20").with_empty_span())); assert!(v.boost); } _ => unreachable!(), diff --git a/tests/sqlparser_snowflake.rs b/tests/sqlparser_snowflake.rs index 666876fa72..790bf15151 100644 --- a/tests/sqlparser_snowflake.rs +++ b/tests/sqlparser_snowflake.rs @@ -2043,27 +2043,27 @@ fn test_create_stage_with_stage_params() { ); assert!(stage_params.credentials.options.contains(&KeyValueOption { option_name: "AWS_KEY_ID".to_string(), - option_value: KeyValueOptionKind::Single(Value::SingleQuotedString( - "1a2b3c".to_string() - )), + option_value: KeyValueOptionKind::Single( + Value::SingleQuotedString("1a2b3c".to_string()).with_empty_span() + ), })); assert!(stage_params.credentials.options.contains(&KeyValueOption { option_name: "AWS_SECRET_KEY".to_string(), - option_value: KeyValueOptionKind::Single(Value::SingleQuotedString( - "4x5y6z".to_string() - )), + option_value: KeyValueOptionKind::Single( + Value::SingleQuotedString("4x5y6z".to_string()).with_empty_span() + ), })); assert!(stage_params.encryption.options.contains(&KeyValueOption { option_name: "MASTER_KEY".to_string(), - option_value: KeyValueOptionKind::Single(Value::SingleQuotedString( - "key".to_string() - )), + option_value: KeyValueOptionKind::Single( + Value::SingleQuotedString("key".to_string()).with_empty_span() + ), })); assert!(stage_params.encryption.options.contains(&KeyValueOption { option_name: "TYPE".to_string(), - option_value: KeyValueOptionKind::Single(Value::SingleQuotedString( - "AWS_SSE_KMS".to_string() - )), + option_value: KeyValueOptionKind::Single( + Value::SingleQuotedString("AWS_SSE_KMS".to_string()).with_empty_span() + ), })); } _ => unreachable!(), @@ -2087,17 +2087,17 @@ fn test_create_stage_with_directory_table_params() { } => { assert!(directory_table_params.options.contains(&KeyValueOption { option_name: "ENABLE".to_string(), - option_value: KeyValueOptionKind::Single(Value::Boolean(true)), + option_value: KeyValueOptionKind::Single(Value::Boolean(true).with_empty_span()), })); assert!(directory_table_params.options.contains(&KeyValueOption { option_name: "REFRESH_ON_CREATE".to_string(), - option_value: KeyValueOptionKind::Single(Value::Boolean(false)), + option_value: KeyValueOptionKind::Single(Value::Boolean(false).with_empty_span()), })); assert!(directory_table_params.options.contains(&KeyValueOption { option_name: "NOTIFICATION_INTEGRATION".to_string(), - option_value: KeyValueOptionKind::Single(Value::SingleQuotedString( - "some-string".to_string() - )), + option_value: KeyValueOptionKind::Single( + Value::SingleQuotedString("some-string".to_string()).with_empty_span() + ), })); } _ => unreachable!(), @@ -2117,17 +2117,21 @@ fn test_create_stage_with_file_format() { Statement::CreateStage { file_format, .. } => { assert!(file_format.options.contains(&KeyValueOption { option_name: "COMPRESSION".to_string(), - option_value: KeyValueOptionKind::Single(Value::Placeholder("AUTO".to_string())), + option_value: KeyValueOptionKind::Single( + Value::Placeholder("AUTO".to_string()).with_empty_span() + ), })); assert!(file_format.options.contains(&KeyValueOption { option_name: "BINARY_FORMAT".to_string(), - option_value: KeyValueOptionKind::Single(Value::Placeholder("HEX".to_string())), + option_value: KeyValueOptionKind::Single( + Value::Placeholder("HEX".to_string()).with_empty_span() + ), })); assert!(file_format.options.contains(&KeyValueOption { option_name: "ESCAPE".to_string(), - option_value: KeyValueOptionKind::Single(Value::SingleQuotedString( - r#"\\"#.to_string() - )), + option_value: KeyValueOptionKind::Single( + Value::SingleQuotedString(r#"\\"#.to_string()).with_empty_span() + ), })); } _ => unreachable!(), @@ -2149,13 +2153,13 @@ fn test_create_stage_with_copy_options() { Statement::CreateStage { copy_options, .. } => { assert!(copy_options.options.contains(&KeyValueOption { option_name: "ON_ERROR".to_string(), - option_value: KeyValueOptionKind::Single(Value::Placeholder( - "CONTINUE".to_string() - )), + option_value: KeyValueOptionKind::Single( + Value::Placeholder("CONTINUE".to_string()).with_empty_span() + ), })); assert!(copy_options.options.contains(&KeyValueOption { option_name: "FORCE".to_string(), - option_value: KeyValueOptionKind::Single(Value::Boolean(true)), + option_value: KeyValueOptionKind::Single(Value::Boolean(true).with_empty_span()), })); } _ => unreachable!(), @@ -2286,27 +2290,27 @@ fn test_copy_into_with_stage_params() { ); assert!(stage_params.credentials.options.contains(&KeyValueOption { option_name: "AWS_KEY_ID".to_string(), - option_value: KeyValueOptionKind::Single(Value::SingleQuotedString( - "1a2b3c".to_string() - )), + option_value: KeyValueOptionKind::Single( + Value::SingleQuotedString("1a2b3c".to_string()).with_empty_span() + ), })); assert!(stage_params.credentials.options.contains(&KeyValueOption { option_name: "AWS_SECRET_KEY".to_string(), - option_value: KeyValueOptionKind::Single(Value::SingleQuotedString( - "4x5y6z".to_string() - )), + option_value: KeyValueOptionKind::Single( + Value::SingleQuotedString("4x5y6z".to_string()).with_empty_span() + ), })); assert!(stage_params.encryption.options.contains(&KeyValueOption { option_name: "MASTER_KEY".to_string(), - option_value: KeyValueOptionKind::Single(Value::SingleQuotedString( - "key".to_string() - )), + option_value: KeyValueOptionKind::Single( + Value::SingleQuotedString("key".to_string()).with_empty_span() + ), })); assert!(stage_params.encryption.options.contains(&KeyValueOption { option_name: "TYPE".to_string(), - option_value: KeyValueOptionKind::Single(Value::SingleQuotedString( - "AWS_SSE_KMS".to_string() - )), + option_value: KeyValueOptionKind::Single( + Value::SingleQuotedString("AWS_SSE_KMS".to_string()).with_empty_span() + ), })); } _ => unreachable!(), @@ -2457,17 +2461,21 @@ fn test_copy_into_file_format() { Statement::CopyIntoSnowflake { file_format, .. } => { assert!(file_format.options.contains(&KeyValueOption { option_name: "COMPRESSION".to_string(), - option_value: KeyValueOptionKind::Single(Value::Placeholder("AUTO".to_string())), + option_value: KeyValueOptionKind::Single( + Value::Placeholder("AUTO".to_string()).with_empty_span() + ), })); assert!(file_format.options.contains(&KeyValueOption { option_name: "BINARY_FORMAT".to_string(), - option_value: KeyValueOptionKind::Single(Value::Placeholder("HEX".to_string())), + option_value: KeyValueOptionKind::Single( + Value::Placeholder("HEX".to_string()).with_empty_span() + ), })); assert!(file_format.options.contains(&KeyValueOption { option_name: "ESCAPE".to_string(), - option_value: KeyValueOptionKind::Single(Value::SingleQuotedString( - r#"\\"#.to_string() - )), + option_value: KeyValueOptionKind::Single( + Value::SingleQuotedString(r#"\\"#.to_string()).with_empty_span() + ), })); } _ => unreachable!(), @@ -2495,17 +2503,21 @@ fn test_copy_into_file_format() { Statement::CopyIntoSnowflake { file_format, .. } => { assert!(file_format.options.contains(&KeyValueOption { option_name: "COMPRESSION".to_string(), - option_value: KeyValueOptionKind::Single(Value::Placeholder("AUTO".to_string())), + option_value: KeyValueOptionKind::Single( + Value::Placeholder("AUTO".to_string()).with_empty_span() + ), })); assert!(file_format.options.contains(&KeyValueOption { option_name: "BINARY_FORMAT".to_string(), - option_value: KeyValueOptionKind::Single(Value::Placeholder("HEX".to_string())), + option_value: KeyValueOptionKind::Single( + Value::Placeholder("HEX".to_string()).with_empty_span() + ), })); assert!(file_format.options.contains(&KeyValueOption { option_name: "ESCAPE".to_string(), - option_value: KeyValueOptionKind::Single(Value::SingleQuotedString( - r#"\\"#.to_string() - )), + option_value: KeyValueOptionKind::Single( + Value::SingleQuotedString(r#"\\"#.to_string()).with_empty_span() + ), })); } _ => unreachable!(), @@ -2526,13 +2538,13 @@ fn test_copy_into_copy_options() { Statement::CopyIntoSnowflake { copy_options, .. } => { assert!(copy_options.options.contains(&KeyValueOption { option_name: "ON_ERROR".to_string(), - option_value: KeyValueOptionKind::Single(Value::Placeholder( - "CONTINUE".to_string() - )), + option_value: KeyValueOptionKind::Single( + Value::Placeholder("CONTINUE".to_string()).with_empty_span() + ), })); assert!(copy_options.options.contains(&KeyValueOption { option_name: "FORCE".to_string(), - option_value: KeyValueOptionKind::Single(Value::Boolean(true)), + option_value: KeyValueOptionKind::Single(Value::Boolean(true).with_empty_span()), })); } _ => unreachable!(), From 913cf0e79b31d494eef15b44068f88d786e6fe8d Mon Sep 17 00:00:00 2001 From: Minjun Kim <48622976+funcpp@users.noreply.github.com> Date: Thu, 2 Apr 2026 13:11:54 +0900 Subject: [PATCH 120/121] Enable `!` as NOT operator for Databricks dialect (#2287) --- src/dialect/databricks.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/dialect/databricks.rs b/src/dialect/databricks.rs index 55e4f56cc5..876eef22f8 100644 --- a/src/dialect/databricks.rs +++ b/src/dialect/databricks.rs @@ -90,4 +90,9 @@ impl Dialect for DatabricksDialect { fn supports_optimize_table(&self) -> bool { true } + + /// See + fn supports_bang_not_operator(&self) -> bool { + true + } } From 6b64dbaa534ccfd69c26acc3efa77b395b2d89dc Mon Sep 17 00:00:00 2001 From: Ayman Elkfrawy Date: Mon, 6 Apr 2026 17:21:50 -0700 Subject: [PATCH 121/121] Fix compilation errors and update tests after upstream sync - Add missing `pos` field to State initializer in tokenizer hint parsing - Remove spurious `sample` reference that leaked into PassThroughQuery Display - Add `sample` AfterTableAlias check back to Derived Display - Add missing doc comments to sigma types (InExpr, PassThroughQuery, TokenWithRange) required by upstream's new #![forbid(missing_docs)] - Remove unfulfilled #![expect(clippy::unnecessary_unwrap)] from lib.rs - Add has_colon: true to JsonPath initializers in Snowflake-style colon-path tests - Update Databricks a:['b'] test to use new ColonBracket AST node (upstream change) - Exclude DatabricksDialect from parse_array_subscript test since Databricks uses `:` for JSON paths, conflicting with array slice syntax arr[1:2] Co-Authored-By: Claude Sonnet 4.6 --- src/ast/mod.rs | 3 +++ src/ast/query.rs | 8 +++++--- src/lib.rs | 2 -- src/tokenizer.rs | 7 +++++++ tests/sqlparser_common.rs | 9 +++++++++ tests/sqlparser_databricks.rs | 6 +++--- 6 files changed, 27 insertions(+), 8 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 196a4524cf..8ca056ff4a 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -934,8 +934,11 @@ pub enum Expr { /// XXX not valid SQL syntax, this is a hack needed to support parameter substitution /// `[ NOT ] IN ` InExpr { + /// Left-hand expression to test for membership. expr: Box, + /// The expression providing the candidate values (used for parameter substitution). in_expr: Box, + /// `true` when the `NOT` modifier is present. negated: bool, }, /// `[ NOT ] IN UNNEST(array_expression)` diff --git a/src/ast/query.rs b/src/ast/query.rs index f8a9df1946..9fea9e3d1c 100644 --- a/src/ast/query.rs +++ b/src/ast/query.rs @@ -1495,7 +1495,9 @@ pub enum TableFactor { /// A pass-through query string that is not parsed. /// This is useful while building/rewriting queries with a known valid SQL string and to avoid parsing it. PassThroughQuery { + /// The raw SQL query string to pass through without parsing. query: String, + /// Optional alias for the pass-through query. alias: Option, }, /// `TABLE()[ AS ]` @@ -2259,6 +2261,9 @@ impl fmt::Display for TableFactor { if let Some(alias) = alias { write!(f, " {alias}")?; } + if let Some(TableSampleKind::AfterTableAlias(sample)) = sample { + write!(f, " {sample}")?; + } Ok(()) } TableFactor::PassThroughQuery { query, alias } => { @@ -2266,9 +2271,6 @@ impl fmt::Display for TableFactor { if let Some(alias) = alias { write!(f, " {alias}")?; } - if let Some(TableSampleKind::AfterTableAlias(sample)) = sample { - write!(f, " {sample}")?; - } Ok(()) } TableFactor::Function { diff --git a/src/lib.rs b/src/lib.rs index e2e2c39ad5..e68d7f93eb 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -153,8 +153,6 @@ // Splitting complex nodes (expressions, statements, types) into separate types // would bloat the API and hide intent. Extra memory is a worthwhile tradeoff. #![allow(clippy::large_enum_variant)] -// TODO: Fix and remove this. -#![expect(clippy::unnecessary_unwrap)] #![forbid(clippy::unreachable)] #![forbid(missing_docs)] diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 37cadd2121..a664a81c41 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -788,13 +788,18 @@ impl fmt::Display for TokenWithSpan { } } +/// A token together with its byte-offset range in the source string. pub struct TokenWithRange { + /// The token. pub token: Token, + /// The byte offset of the start of the token in the source string. pub start: usize, + /// The byte offset of the end of the token in the source string. pub end: usize, } impl TokenWithRange { + /// Creates a new [`TokenWithRange`] with the given token and byte-offset range. pub fn new(token: Token, start: usize, end: usize) -> Self { Self { token, start, end } } @@ -957,6 +962,7 @@ impl<'a> Tokenizer<'a> { Ok(twl.into_iter().map(|t| t.token).collect()) } + /// Tokenize the statement and produce a vector of tokens with their byte-offset ranges. pub fn tokenize_with_range(&mut self) -> Result, TokenizerError> { let mut tokens = Vec::::new(); let mut state = State { @@ -1057,6 +1063,7 @@ impl<'a> Tokenizer<'a> { // Create a state for tracking position within the hint let mut state = State { peekable: hint_content.chars().peekable(), + pos: 0, line: span.start.line, col: span.start.column, }; diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index e7eb2d5943..9055f93aa0 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -18647,6 +18647,7 @@ fn parse_semi_structured_data_traversal() { SelectItem::UnnamedExpr(Expr::JsonAccess { value: Box::new(Expr::Identifier(Ident::new("a"))), path: JsonPath { + has_colon: true, path: vec![JsonPathElem::Dot { key: "b".to_owned(), quoted: false @@ -18663,6 +18664,7 @@ fn parse_semi_structured_data_traversal() { SelectItem::UnnamedExpr(Expr::JsonAccess { value: Box::new(Expr::Identifier(Ident::new("a"))), path: JsonPath { + has_colon: true, path: vec![JsonPathElem::Dot { key: "my long object key name".to_owned(), quoted: true @@ -18682,6 +18684,7 @@ fn parse_semi_structured_data_traversal() { SelectItem::UnnamedExpr(Expr::JsonAccess { value: Box::new(Expr::Identifier(Ident::new("a"))), path: JsonPath { + has_colon: true, path: vec![JsonPathElem::Dot { key: "select".to_owned(), quoted: false @@ -18691,6 +18694,7 @@ fn parse_semi_structured_data_traversal() { SelectItem::UnnamedExpr(Expr::JsonAccess { value: Box::new(Expr::Identifier(Ident::new("a"))), path: JsonPath { + has_colon: true, path: vec![JsonPathElem::Dot { key: "from".to_owned(), quoted: false @@ -18709,6 +18713,7 @@ fn parse_semi_structured_data_traversal() { vec![SelectItem::UnnamedExpr(Expr::JsonAccess { value: Box::new(Expr::Identifier(Ident::new("a"))), path: JsonPath { + has_colon: true, path: vec![ JsonPathElem::Dot { key: "foo".to_owned(), @@ -18736,6 +18741,7 @@ fn parse_semi_structured_data_traversal() { vec![SelectItem::UnnamedExpr(Expr::JsonAccess { value: Box::new(Expr::Identifier(Ident::new("a"))), path: JsonPath { + has_colon: true, path: vec![ JsonPathElem::Dot { key: "foo".to_owned(), @@ -18762,6 +18768,9 @@ fn parse_array_subscript() { || d.is::() || d.is::() || d.is::() + // Databricks uses `:` for JSON path access (high precedence), which conflicts + // with array slice syntax `arr[1:2]`. + || d.is::() }); dialects.verified_stmt("SELECT arr[1]"); diff --git a/tests/sqlparser_databricks.rs b/tests/sqlparser_databricks.rs index 16e9b22c7d..d7e02ce4cc 100644 --- a/tests/sqlparser_databricks.rs +++ b/tests/sqlparser_databricks.rs @@ -718,16 +718,16 @@ fn parse_semi_structured_data_traversal() { select.projection[0] ); - // asterisk for arrays + // colon bracket notation: a:['b'].c let sql = "SELECT a:['b'].c FROM t"; let select = databricks().verified_only_select(sql); assert_eq!( SelectItem::UnnamedExpr(Expr::JsonAccess { value: Box::new(Expr::Identifier(Ident::new("a"))), path: JsonPath { - has_colon: true, + has_colon: false, path: vec![ - JsonPathElem::Bracket { + JsonPathElem::ColonBracket { key: Expr::value(Value::SingleQuotedString("b".to_owned())), }, JsonPathElem::Dot {