Skip to content

Commit 4c69cda

Browse files
authored
Fix 53 parser issues: keyword types, implicit aliases, empty tuples, CAST syntax (#13)
1 parent d84f12f commit 4c69cda

14 files changed

Lines changed: 1182 additions & 187 deletions

File tree

TODO.md

Lines changed: 127 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -2,120 +2,179 @@
22

33
## Current State
44

5-
- **Tests passing:** 5,197 (76.2%)
6-
- **Tests skipped:** 1,627 (23.8%)
7-
- Parser issues: ~675
8-
- Explain mismatches: ~637
5+
- **Tests passing:** 5,933 (86.9%)
6+
- **Tests skipped:** 891 (13.1%)
97

10-
## Parser Issues
8+
## Recently Fixed (explain layer)
9+
10+
- ✅ TableJoin output - removed join type keywords
11+
- ✅ Table function aliases (e.g., `remote('127.1') AS t1`)
12+
- ✅ Table identifier aliases (e.g., `system.one AS xxx`)
13+
- ✅ Array/tuple cast formatting for `::` syntax
14+
- ✅ SETTINGS placement with FORMAT clause
15+
- ✅ Concat operator `||` flattening into single `concat` function
16+
- ✅ Window function (OVER clause) support
17+
- ✅ Float literal formatting
18+
- ✅ Aliased expression handling for binary/unary/function/identifier
19+
- ✅ PARTITION BY support in CREATE TABLE
20+
- ✅ Server error message stripping from expected output
21+
22+
## Parser Issues (High Priority)
1123

1224
These require changes to `parser/parser.go`:
1325

14-
### Table/Database Names Starting with Numbers
15-
Tables and databases with names starting with digits fail to parse:
26+
### DROP TABLE with Multiple Tables
27+
Parser only captures first table when multiple are specified:
1628
```sql
17-
DROP TABLE IF EXISTS 03657_gby_overflow;
18-
DROP DATABASE IF EXISTS 03710_database;
29+
DROP TABLE IF EXISTS t1, t2, t3;
30+
-- Expected: ExpressionList with 3 TableIdentifiers
31+
-- Got: Single Identifier for t1
1932
```
2033

21-
### FORMAT Null
22-
The `FORMAT Null` clause is not recognized:
34+
### Negative Integer Literals
35+
Negative numbers are parsed as `Function negate` instead of negative literals:
2336
```sql
24-
SELECT ... FORMAT Null;
37+
SELECT -1, -10000;
38+
-- Expected: Literal Int64_-1
39+
-- Got: Function negate (children 1) with Literal UInt64_1
2540
```
2641

27-
### FETCH FIRST ... ROW ONLY
28-
SQL standard fetch syntax is not supported:
42+
### CREATE TABLE with INDEX Clause
43+
INDEX definitions in CREATE TABLE are not captured:
2944
```sql
30-
SELECT ... FETCH FIRST 1 ROW ONLY;
45+
CREATE TABLE t (x Array(String), INDEX idx1 x TYPE bloom_filter(0.025)) ENGINE=MergeTree;
3146
```
3247

33-
### INSERT INTO FUNCTION
34-
Function-based inserts are not supported:
48+
### SETTINGS Inside Function Arguments
49+
SETTINGS clause within function calls is not parsed:
3550
```sql
36-
INSERT INTO FUNCTION file('file.parquet') SELECT ...;
51+
SELECT * FROM icebergS3(s3_conn, filename='test', SETTINGS key='value');
52+
-- The SETTINGS should become a Set child of the function
3753
```
3854

39-
### WITH ... AS Subquery Aliases
40-
Subquery aliases in FROM clauses with keyword `AS`:
55+
### CREATE TABLE with Column TTL
56+
TTL expressions on columns are not captured:
4157
```sql
42-
SELECT * FROM (SELECT 1 x) AS alias;
58+
CREATE TABLE t (c Int TTL expr()) ENGINE=MergeTree;
59+
-- Expected: ColumnDeclaration with 2 children (type + TTL function)
4360
```
4461

45-
### String Concatenation Operator ||
46-
The `||` operator in some contexts:
62+
### Empty Tuple in ORDER BY
63+
`ORDER BY ()` should capture empty tuple expression:
4764
```sql
48-
SELECT currentDatabase() || '_test' AS key;
65+
CREATE TABLE t (...) ENGINE=MergeTree ORDER BY ();
66+
-- Expected: Function tuple (children 1) with empty ExpressionList
67+
-- Got: Storage definition with no ORDER BY
4968
```
5069

51-
### MOD/DIV Operators
52-
The MOD and DIV keywords as operators:
70+
### String Escape Handling
71+
Parser stores escaped characters literally instead of unescaping:
5372
```sql
54-
SELECT number MOD 3, number DIV 3 FROM ...;
73+
SELECT 'x\'e2\'';
74+
-- Parser stores: x\'e2\' (with backslashes)
75+
-- Should store: x'e2' (unescaped)
5576
```
5677

57-
### Reserved Keyword Handling
58-
Keywords like `LEFT`, `RIGHT` used as table aliases:
78+
## Parser Issues (Medium Priority)
79+
80+
### CREATE DICTIONARY
81+
Dictionary definitions are not supported:
5982
```sql
60-
SELECT * FROM numbers(10) AS left RIGHT JOIN ...;
83+
CREATE DICTIONARY d0 (c1 UInt64) PRIMARY KEY c1 LAYOUT(FLAT()) SOURCE(...);
6184
```
6285

63-
### Parameterized Settings
64-
Settings with `$` parameters:
86+
### CREATE USER / CREATE FUNCTION
87+
User and function definitions are not supported:
6588
```sql
66-
SET param_$1 = 'Hello';
89+
CREATE USER test_user GRANTEES ...;
90+
CREATE OR REPLACE FUNCTION myFunc AS ...;
6791
```
6892

69-
### Incomplete CASE Expression
70-
CASE without END:
93+
### QUALIFY Clause
94+
Window function filtering clause:
7195
```sql
72-
SELECT CASE number -- missing END
96+
SELECT x QUALIFY row_number() OVER () = 1;
7397
```
7498

75-
## Explain Output Issues
99+
### INTO OUTFILE with TRUNCATE
100+
Extended INTO OUTFILE syntax:
101+
```sql
102+
SELECT 1, 2 INTO OUTFILE '/dev/null' TRUNCATE FORMAT Npy;
103+
```
76104

77-
These require changes to `internal/explain/`:
105+
### GROUPING SETS
106+
Advanced grouping syntax:
107+
```sql
108+
SELECT ... GROUP BY GROUPING SETS ((a), (b));
109+
```
78110

79-
### Double Equals (==) Operator
80-
The `==` operator creates extra nested equals/tuple nodes:
111+
### view() Table Function
112+
The view() table function in FROM:
81113
```sql
82-
SELECT value == '127.0.0.1:9181'
114+
SELECT * FROM view(SELECT 1 as id);
83115
```
84-
Expected: `Function equals` with `Identifier` and `Literal`
85-
Got: Nested `Function equals` with extra `Function tuple`
86116

87-
### CreateQuery Spacing
88-
Some ClickHouse versions output extra space before `(children`:
117+
### CREATE TABLE ... AS SELECT
118+
CREATE TABLE with inline SELECT:
119+
```sql
120+
CREATE TABLE src ENGINE=Memory AS SELECT 1;
89121
```
90-
CreateQuery d1 (children 1) -- two spaces
91-
CreateQuery d1 (children 1) -- one space (our output)
122+
123+
### Variant() Type with PRIMARY KEY
124+
Complex column definitions:
125+
```sql
126+
CREATE TABLE t (c Variant() PRIMARY KEY) ENGINE=Redis(...);
92127
```
93128

94-
### Server Error Messages in Expected Output
95-
Some test expected outputs include trailing messages:
129+
## Parser Issues (Lower Priority)
130+
131+
### INTERVAL with Dynamic Type
132+
INTERVAL with type cast:
133+
```sql
134+
SELECT INTERVAL 1 MINUTE AS c0, INTERVAL c0::Dynamic DAY;
96135
```
97-
The query succeeded but the server error '42' was expected
136+
137+
### ALTER TABLE with Multiple Operations
138+
Multiple ALTER operations in parentheses:
139+
```sql
140+
ALTER TABLE t (DELETE WHERE ...), (MODIFY SETTING ...), (UPDATE ... WHERE ...);
98141
```
99-
These are not part of the actual EXPLAIN output.
100142

101-
## Lower Priority
143+
### Tuple Type in Column with Subfield Access
144+
Tuple type with engine using subfield:
145+
```sql
146+
CREATE TABLE t (t Tuple(a Int32)) ENGINE=EmbeddedRocksDB() PRIMARY KEY (t.a);
147+
```
102148

103-
### DateTime64 with Timezone
104-
Type parameters with string timezone:
149+
### insert() Function with input()
150+
INSERT using input() function:
105151
```sql
106-
DateTime64(3,'UTC')
152+
INSERT INTO FUNCTION null() SELECT * FROM input('x Int') ...;
107153
```
108154

109-
### Complex Type Expressions
110-
Nested type expressions in column definitions:
155+
## Explain Issues (Remaining)
156+
157+
### Scientific Notation for Floats
158+
Very small/large floats should use scientific notation:
111159
```sql
112-
CREATE TABLE t (c LowCardinality(UUID));
160+
SELECT 2.2250738585072014e-308;
161+
-- Expected: Float64_2.2250738585072014e-308
162+
-- Got: Float64_0.0000...22250738585072014
113163
```
114164

115-
### Parameterized Views
116-
View definitions with parameters:
165+
### Array Literals with Negative Numbers
166+
Arrays with negative integers expand to Function instead of Literal:
117167
```sql
118-
CREATE VIEW v AS SELECT ... WHERE x={parity:Int8};
168+
SELECT [-10000, 5750];
169+
-- Expected: Literal Array_[Int64_-10000, UInt64_5750]
170+
-- Got: Function array with Function negate for -10000
171+
```
172+
173+
### WithElement for CTE Subqueries
174+
Some CTE subqueries should use WithElement wrapper:
175+
```sql
176+
WITH sub AS (SELECT ...) SELECT ...;
177+
-- Expected: WithElement (children 1) > Subquery > SelectWithUnionQuery
119178
```
120179

121180
## Testing Notes
@@ -127,10 +186,15 @@ go test ./parser -timeout 5s -v
127186

128187
Count test results:
129188
```bash
130-
go test ./parser -timeout 5s -v 2>&1 | grep -E 'PASS:|SKIP:' | cut -d':' -f1 | sort | uniq -c
189+
go test ./parser -v 2>&1 | grep -E 'PASS:|SKIP:' | wc -l
131190
```
132191

133192
View explain mismatches:
134193
```bash
135-
go test ./parser -timeout 5s -v 2>&1 | grep -A 30 "TODO: Explain output mismatch" | head -100
194+
go test ./parser -v 2>&1 | grep -A 30 "TODO: Explain output mismatch" | head -100
195+
```
196+
197+
View parser failures:
198+
```bash
199+
go test ./parser -v 2>&1 | grep "TODO: Parser does not yet support" | head -20
136200
```

ast/ast.go

Lines changed: 63 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@
22
package ast
33

44
import (
5+
"encoding/json"
6+
"math"
7+
58
"github.com/kyleconroy/doubleclick/token"
69
)
710

@@ -51,6 +54,7 @@ type SelectQuery struct {
5154
Where Expression `json:"where,omitempty"`
5255
GroupBy []Expression `json:"group_by,omitempty"`
5356
WithRollup bool `json:"with_rollup,omitempty"`
57+
WithCube bool `json:"with_cube,omitempty"`
5458
WithTotals bool `json:"with_totals,omitempty"`
5559
Having Expression `json:"having,omitempty"`
5660
Window []*WindowDefinition `json:"window,omitempty"`
@@ -199,13 +203,14 @@ func (s *SettingExpr) End() token.Position { return s.Position }
199203

200204
// InsertQuery represents an INSERT statement.
201205
type InsertQuery struct {
202-
Position token.Position `json:"-"`
203-
Database string `json:"database,omitempty"`
204-
Table string `json:"table,omitempty"`
205-
Function *FunctionCall `json:"function,omitempty"` // For INSERT INTO FUNCTION syntax
206-
Columns []*Identifier `json:"columns,omitempty"`
207-
Select Statement `json:"select,omitempty"`
208-
Format *Identifier `json:"format,omitempty"`
206+
Position token.Position `json:"-"`
207+
Database string `json:"database,omitempty"`
208+
Table string `json:"table,omitempty"`
209+
Function *FunctionCall `json:"function,omitempty"` // For INSERT INTO FUNCTION syntax
210+
Columns []*Identifier `json:"columns,omitempty"`
211+
Select Statement `json:"select,omitempty"`
212+
Format *Identifier `json:"format,omitempty"`
213+
HasSettings bool `json:"has_settings,omitempty"` // For SETTINGS clause
209214
}
210215

211216
func (i *InsertQuery) Pos() token.Position { return i.Position }
@@ -261,15 +266,27 @@ func (c *ColumnDeclaration) End() token.Position { return c.Position }
261266

262267
// DataType represents a data type.
263268
type DataType struct {
264-
Position token.Position `json:"-"`
265-
Name string `json:"name"`
266-
Parameters []Expression `json:"parameters,omitempty"`
269+
Position token.Position `json:"-"`
270+
Name string `json:"name"`
271+
Parameters []Expression `json:"parameters,omitempty"`
272+
HasParentheses bool `json:"has_parentheses,omitempty"`
267273
}
268274

269275
func (d *DataType) Pos() token.Position { return d.Position }
270276
func (d *DataType) End() token.Position { return d.Position }
271277
func (d *DataType) expressionNode() {}
272278

279+
// NameTypePair represents a named type pair, used in Nested types.
280+
type NameTypePair struct {
281+
Position token.Position `json:"-"`
282+
Name string `json:"name"`
283+
Type *DataType `json:"type"`
284+
}
285+
286+
func (n *NameTypePair) Pos() token.Position { return n.Position }
287+
func (n *NameTypePair) End() token.Position { return n.Position }
288+
func (n *NameTypePair) expressionNode() {}
289+
273290
// CodecExpr represents a CODEC expression.
274291
type CodecExpr struct {
275292
Position token.Position `json:"-"`
@@ -589,6 +606,42 @@ func (l *Literal) Pos() token.Position { return l.Position }
589606
func (l *Literal) End() token.Position { return l.Position }
590607
func (l *Literal) expressionNode() {}
591608

609+
// MarshalJSON handles special float values (NaN, +Inf, -Inf) that JSON doesn't support.
610+
func (l *Literal) MarshalJSON() ([]byte, error) {
611+
type literalAlias Literal
612+
// Handle special float values
613+
if f, ok := l.Value.(float64); ok {
614+
if math.IsNaN(f) {
615+
return json.Marshal(&struct {
616+
*literalAlias
617+
Value string `json:"value"`
618+
}{
619+
literalAlias: (*literalAlias)(l),
620+
Value: "NaN",
621+
})
622+
}
623+
if math.IsInf(f, 1) {
624+
return json.Marshal(&struct {
625+
*literalAlias
626+
Value string `json:"value"`
627+
}{
628+
literalAlias: (*literalAlias)(l),
629+
Value: "+Inf",
630+
})
631+
}
632+
if math.IsInf(f, -1) {
633+
return json.Marshal(&struct {
634+
*literalAlias
635+
Value string `json:"value"`
636+
}{
637+
literalAlias: (*literalAlias)(l),
638+
Value: "-Inf",
639+
})
640+
}
641+
}
642+
return json.Marshal((*literalAlias)(l))
643+
}
644+
592645
// LiteralType represents the type of a literal.
593646
type LiteralType string
594647

internal/explain/explain.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,8 @@ func Node(sb *strings.Builder, node interface{}, depth int) {
6363
explainSubquery(sb, n, indent, depth)
6464
case *ast.AliasedExpr:
6565
explainAliasedExpr(sb, n, depth)
66+
case *ast.WithElement:
67+
explainWithElement(sb, n, indent, depth)
6668
case *ast.Asterisk:
6769
explainAsterisk(sb, n, indent)
6870

@@ -97,6 +99,8 @@ func Node(sb *strings.Builder, node interface{}, depth int) {
9799
explainExtractExpr(sb, n, indent, depth)
98100

99101
// DDL statements
102+
case *ast.InsertQuery:
103+
explainInsertQuery(sb, n, indent, depth)
100104
case *ast.CreateQuery:
101105
explainCreateQuery(sb, n, indent, depth)
102106
case *ast.DropQuery:
@@ -117,6 +121,8 @@ func Node(sb *strings.Builder, node interface{}, depth int) {
117121
// Types
118122
case *ast.DataType:
119123
explainDataType(sb, n, indent, depth)
124+
case *ast.NameTypePair:
125+
explainNameTypePair(sb, n, indent, depth)
120126
case *ast.Parameter:
121127
explainParameter(sb, n, indent)
122128

0 commit comments

Comments
 (0)