Skip to content

Commit 5d0fdbd

Browse files
authored
Merge pull request #3289 from dolthub/zachmu/join-debugging
[no-release-notes] Trace debugger for join planner, bug fix for Doltgres indexed joins
2 parents 4f87463 + 1633431 commit 5d0fdbd

File tree

15 files changed

+833
-309
lines changed

15 files changed

+833
-309
lines changed

optgen/cmd/support/memo_gen.go

Lines changed: 69 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -42,36 +42,36 @@ func DecodeMemoExprs(path string) (MemoExprs, error) {
4242
var _ GenDefs = (*MemoExprs)(nil)
4343

4444
type MemoGen struct {
45-
defines []ExprDef
46-
w io.Writer
45+
defns []ExprDef
46+
w io.Writer
4747
}
4848

49-
func (g *MemoGen) Generate(defines GenDefs, w io.Writer) {
50-
g.defines = defines.(MemoExprs).Exprs
49+
func (g *MemoGen) Generate(defns GenDefs, w io.Writer) {
50+
g.defns = defns.(MemoExprs).Exprs
5151

5252
g.w = w
5353

5454
g.genImport()
55-
for _, define := range g.defines {
56-
g.genType(define)
57-
g.genRelInterfaces(define)
58-
59-
g.genStringer(define)
60-
if define.SourceType != "" {
61-
g.genSourceRelInterface(define)
55+
for _, defn := range g.defns {
56+
g.genType(defn)
57+
g.genRelInterfaces(defn)
58+
59+
g.genStringer(defn)
60+
g.genFormatter(defn)
61+
if defn.SourceType != "" {
62+
g.genSourceRelInterface(defn)
6263
}
63-
if define.Join {
64-
g.genJoinRelInterface(define)
65-
} else if define.Binary {
66-
g.genBinaryGroupInterface(define)
67-
} else if define.Unary {
68-
g.genUnaryGroupInterface(define)
64+
if defn.Join {
65+
g.genJoinRelInterface(defn)
66+
} else if defn.Binary {
67+
g.genBinaryGroupInterface(defn)
68+
} else if defn.Unary {
69+
g.genUnaryGroupInterface(defn)
6970
} else {
70-
g.genChildlessGroupInterface(define)
71+
g.genChildlessGroupInterface(defn)
7172
}
7273
}
73-
g.genFormatters(g.defines)
74-
74+
g.genBuildRelExpr(g.defns)
7575
}
7676

7777
func (g *MemoGen) genImport() {
@@ -83,108 +83,116 @@ func (g *MemoGen) genImport() {
8383
fmt.Fprintf(g.w, ")\n\n")
8484
}
8585

86-
func (g *MemoGen) genType(define ExprDef) {
87-
fmt.Fprintf(g.w, "type %s struct {\n", strings.Title(define.Name))
88-
if define.SourceType != "" {
86+
func (g *MemoGen) genType(defn ExprDef) {
87+
fmt.Fprintf(g.w, "type %s struct {\n", strings.Title(defn.Name))
88+
if defn.SourceType != "" {
8989
fmt.Fprintf(g.w, " *sourceBase\n")
90-
fmt.Fprintf(g.w, " Table %s\n", define.SourceType)
91-
} else if define.Join {
90+
fmt.Fprintf(g.w, " Table %s\n", defn.SourceType)
91+
} else if defn.Join {
9292
fmt.Fprintf(g.w, " *JoinBase\n")
93-
} else if define.Unary {
93+
} else if defn.Unary {
9494
fmt.Fprintf(g.w, " *relBase\n")
9595
fmt.Fprintf(g.w, " Child *ExprGroup\n")
96-
} else if define.Binary {
96+
} else if defn.Binary {
9797
fmt.Fprintf(g.w, " *relBase\n")
9898
fmt.Fprintf(g.w, " Left *ExprGroup\n")
9999
fmt.Fprintf(g.w, " Right *ExprGroup\n")
100100
}
101-
for _, attr := range define.Attrs {
101+
for _, attr := range defn.Attrs {
102102
fmt.Fprintf(g.w, " %s %s\n", strings.Title(attr[0]), attr[1])
103103
}
104104

105105
fmt.Fprintf(g.w, "}\n\n")
106106
}
107107

108-
func (g *MemoGen) genRelInterfaces(define ExprDef) {
109-
fmt.Fprintf(g.w, "var _ RelExpr = (*%s)(nil)\n", define.Name)
110-
if define.SourceType != "" {
111-
fmt.Fprintf(g.w, "var _ SourceRel = (*%s)(nil)\n", define.Name)
112-
} else if define.Join {
113-
fmt.Fprintf(g.w, "var _ JoinRel = (*%s)(nil)\n", define.Name)
114-
} else if define.Unary || define.Binary {
108+
func (g *MemoGen) genRelInterfaces(defn ExprDef) {
109+
fmt.Fprintf(g.w, "var _ RelExpr = (*%s)(nil)\n", defn.Name)
110+
fmt.Fprintf(g.w, "var _ fmt.Formatter = (*%s)(nil)\n", defn.Name)
111+
fmt.Fprintf(g.w, "var _ fmt.Stringer = (*%s)(nil)\n", defn.Name)
112+
if defn.SourceType != "" {
113+
fmt.Fprintf(g.w, "var _ SourceRel = (*%s)(nil)\n", defn.Name)
114+
} else if defn.Join {
115+
fmt.Fprintf(g.w, "var _ JoinRel = (*%s)(nil)\n", defn.Name)
116+
} else if defn.Unary || defn.Binary {
115117
} else {
116118
panic("unreachable")
117119
}
118120
fmt.Fprintf(g.w, "\n")
119121
}
120122

121-
func (g *MemoGen) genScalarInterfaces(define ExprDef) {
122-
fmt.Fprintf(g.w, "var _ ScalarExpr = (*%s)(nil)\n", define.Name)
123+
func (g *MemoGen) genScalarInterfaces(defn ExprDef) {
124+
fmt.Fprintf(g.w, "var _ ScalarExpr = (*%s)(nil)\n", defn.Name)
123125

124126
fmt.Fprintf(g.w, "\n")
125127

126-
fmt.Fprintf(g.w, "func (r *%s) ExprId() ScalarExprId {\n", define.Name)
127-
fmt.Fprintf(g.w, " return ScalarExpr%s\n", strings.Title(define.Name))
128+
fmt.Fprintf(g.w, "func (r *%s) ExprId() ScalarExprId {\n", defn.Name)
129+
fmt.Fprintf(g.w, " return ScalarExpr%s\n", strings.Title(defn.Name))
130+
fmt.Fprintf(g.w, "}\n\n")
131+
}
132+
133+
func (g *MemoGen) genStringer(defn ExprDef) {
134+
fmt.Fprintf(g.w, "func (r *%s) String() string {\n", defn.Name)
135+
fmt.Fprintf(g.w, " return fmt.Sprintf(\"%%s\", r)\n")
128136
fmt.Fprintf(g.w, "}\n\n")
129137
}
130138

131-
func (g *MemoGen) genStringer(define ExprDef) {
132-
fmt.Fprintf(g.w, "func (r *%s) String() string {\n", define.Name)
133-
fmt.Fprintf(g.w, " return FormatExpr(r)\n")
139+
func (g *MemoGen) genFormatter(defn ExprDef) {
140+
fmt.Fprintf(g.w, "func (r *%s) Format(s fmt.State, verb rune) {\n", defn.Name)
141+
fmt.Fprintf(g.w, " FormatExpr(r, s, verb)\n")
134142
fmt.Fprintf(g.w, "}\n\n")
135143
}
136144

137-
func (g *MemoGen) genSourceRelInterface(define ExprDef) {
138-
fmt.Fprintf(g.w, "func (r *%s) Name() string {\n", define.Name)
139-
if !define.SkipName {
145+
func (g *MemoGen) genSourceRelInterface(defn ExprDef) {
146+
fmt.Fprintf(g.w, "func (r *%s) Name() string {\n", defn.Name)
147+
if !defn.SkipName {
140148
fmt.Fprintf(g.w, " return strings.ToLower(r.Table.Name())\n")
141149
} else {
142150
fmt.Fprintf(g.w, " return \"\"\n")
143151
}
144152
fmt.Fprintf(g.w, "}\n\n")
145153

146-
fmt.Fprintf(g.w, "func (r *%s) TableId() sql.TableId {\n", define.Name)
154+
fmt.Fprintf(g.w, "func (r *%s) TableId() sql.TableId {\n", defn.Name)
147155
fmt.Fprintf(g.w, " return TableIdForSource(r.g.Id)\n")
148156
fmt.Fprintf(g.w, "}\n\n")
149157

150-
fmt.Fprintf(g.w, "func (r *%s) TableIdNode() plan.TableIdNode {\n", define.Name)
151-
if define.SkipTableId {
158+
fmt.Fprintf(g.w, "func (r *%s) TableIdNode() plan.TableIdNode {\n", defn.Name)
159+
if defn.SkipTableId {
152160
fmt.Fprintf(g.w, " return nil\n")
153161
} else {
154162
fmt.Fprintf(g.w, " return r.Table\n")
155163
}
156164
fmt.Fprintf(g.w, "}\n\n")
157165

158-
fmt.Fprintf(g.w, "func (r *%s) OutputCols() sql.Schema {\n", define.Name)
166+
fmt.Fprintf(g.w, "func (r *%s) OutputCols() sql.Schema {\n", defn.Name)
159167
fmt.Fprintf(g.w, " return r.Table.Schema()\n")
160168
fmt.Fprintf(g.w, "}\n\n")
161169
}
162170

163-
func (g *MemoGen) genJoinRelInterface(define ExprDef) {
164-
fmt.Fprintf(g.w, "func (r *%s) JoinPrivate() *JoinBase {\n", define.Name)
171+
func (g *MemoGen) genJoinRelInterface(defn ExprDef) {
172+
fmt.Fprintf(g.w, "func (r *%s) JoinPrivate() *JoinBase {\n", defn.Name)
165173
fmt.Fprintf(g.w, " return r.JoinBase\n")
166174
fmt.Fprintf(g.w, "}\n\n")
167175
}
168176

169-
func (g *MemoGen) genBinaryGroupInterface(define ExprDef) {
170-
fmt.Fprintf(g.w, "func (r *%s) Children() []*ExprGroup {\n", define.Name)
177+
func (g *MemoGen) genBinaryGroupInterface(defn ExprDef) {
178+
fmt.Fprintf(g.w, "func (r *%s) Children() []*ExprGroup {\n", defn.Name)
171179
fmt.Fprintf(g.w, " return []*ExprGroup{r.Left, r.Right}\n")
172180
fmt.Fprintf(g.w, "}\n\n")
173181
}
174182

175-
func (g *MemoGen) genChildlessGroupInterface(define ExprDef) {
176-
fmt.Fprintf(g.w, "func (r *%s) Children() []*ExprGroup {\n", define.Name)
183+
func (g *MemoGen) genChildlessGroupInterface(defn ExprDef) {
184+
fmt.Fprintf(g.w, "func (r *%s) Children() []*ExprGroup {\n", defn.Name)
177185
fmt.Fprintf(g.w, " return nil\n")
178186
fmt.Fprintf(g.w, "}\n\n")
179187
}
180188

181-
func (g *MemoGen) genUnaryGroupInterface(define ExprDef) {
182-
fmt.Fprintf(g.w, "func (r *%s) Children() []*ExprGroup {\n", define.Name)
189+
func (g *MemoGen) genUnaryGroupInterface(defn ExprDef) {
190+
fmt.Fprintf(g.w, "func (r *%s) Children() []*ExprGroup {\n", defn.Name)
183191
fmt.Fprintf(g.w, " return []*ExprGroup{r.Child}\n")
184192
fmt.Fprintf(g.w, "}\n\n")
185193

186-
fmt.Fprintf(g.w, "func (r *%s) outputCols() sql.ColSet {\n", define.Name)
187-
switch define.Name {
194+
fmt.Fprintf(g.w, "func (r *%s) outputCols() sql.ColSet {\n", defn.Name)
195+
switch defn.Name {
188196
case "Project":
189197
fmt.Fprintf(g.w, " return getProjectColset(r)\n")
190198

@@ -193,42 +201,14 @@ func (g *MemoGen) genUnaryGroupInterface(define ExprDef) {
193201
}
194202

195203
fmt.Fprintf(g.w, "}\n\n")
196-
197204
}
198205

199-
func (g *MemoGen) genFormatters(defines []ExprDef) {
200-
// printer
201-
fmt.Fprintf(g.w, "func FormatExpr(r exprType) string {\n")
202-
fmt.Fprintf(g.w, " switch r := r.(type) {\n")
203-
for _, d := range defines {
204-
loweredName := strings.ToLower(d.Name)
205-
fmt.Fprintf(g.w, " case *%s:\n", d.Name)
206-
if loweredName == "indexscan" {
207-
fmt.Fprintf(g.w, " if r.Alias != \"\" {\n")
208-
fmt.Fprintf(g.w, " return fmt.Sprintf(\"%s: %%s\", r.Alias)\n", loweredName)
209-
fmt.Fprintf(g.w, " }\n")
210-
}
211-
if d.SourceType != "" {
212-
fmt.Fprintf(g.w, " return fmt.Sprintf(\"%s: %%s\", r.Name())\n", loweredName)
213-
} else if d.Join || d.Binary {
214-
fmt.Fprintf(g.w, " return fmt.Sprintf(\"%s %%d %%d\", r.Left.Id, r.Right.Id)\n", loweredName)
215-
} else if d.Unary {
216-
fmt.Fprintf(g.w, " return fmt.Sprintf(\"%s: %%d\", r.Child.Id)\n", loweredName)
217-
} else {
218-
panic("unreachable")
219-
}
220-
}
221-
fmt.Fprintf(g.w, " default:\n")
222-
fmt.Fprintf(g.w, " panic(fmt.Sprintf(\"unknown RelExpr type: %%T\", r))\n")
223-
fmt.Fprintf(g.w, " }\n")
224-
fmt.Fprintf(g.w, "}\n\n")
225-
226-
// to sqlNode
206+
func (g *MemoGen) genBuildRelExpr(defns []ExprDef) {
227207
fmt.Fprintf(g.w, "func buildRelExpr(b *ExecBuilder, r RelExpr, children ...sql.Node) (sql.Node, error) {\n")
228208
fmt.Fprintf(g.w, " var result sql.Node\n")
229209
fmt.Fprintf(g.w, " var err error\n\n")
230210
fmt.Fprintf(g.w, " switch r := r.(type) {\n")
231-
for _, d := range defines {
211+
for _, d := range defns {
232212
if d.SkipExec {
233213
continue
234214
}

optgen/cmd/support/memo_gen_test.go

Lines changed: 18 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ func TestMemoGen(t *testing.T) {
1212
expected string
1313
}{
1414
expected: `
15-
import (
15+
import (
1616
"fmt"
1717
"strings"
1818
"github.com/dolthub/go-mysql-server/sql"
@@ -26,10 +26,16 @@ func TestMemoGen(t *testing.T) {
2626
}
2727
2828
var _ RelExpr = (*hashJoin)(nil)
29+
var _ fmt.Formatter = (*hashJoin)(nil)
30+
var _ fmt.Stringer = (*hashJoin)(nil)
2931
var _ JoinRel = (*hashJoin)(nil)
3032
3133
func (r *hashJoin) String() string {
32-
return FormatExpr(r)
34+
return fmt.Sprintf("%s", r)
35+
}
36+
37+
func (r *hashJoin) Format(s fmt.State, verb rune) {
38+
FormatExpr(r, s, verb)
3339
}
3440
3541
func (r *hashJoin) JoinPrivate() *JoinBase {
@@ -42,10 +48,16 @@ func TestMemoGen(t *testing.T) {
4248
}
4349
4450
var _ RelExpr = (*tableScan)(nil)
51+
var _ fmt.Formatter = (*tableScan)(nil)
52+
var _ fmt.Stringer = (*tableScan)(nil)
4553
var _ SourceRel = (*tableScan)(nil)
4654
4755
func (r *tableScan) String() string {
48-
return FormatExpr(r)
56+
return fmt.Sprintf("%s", r)
57+
}
58+
59+
func (r *tableScan) Format(s fmt.State, verb rune) {
60+
FormatExpr(r, s, verb)
4961
}
5062
5163
func (r *tableScan) Name() string {
@@ -68,17 +80,6 @@ func TestMemoGen(t *testing.T) {
6880
return nil
6981
}
7082
71-
func FormatExpr(r exprType) string {
72-
switch r := r.(type) {
73-
case *hashJoin:
74-
return fmt.Sprintf("hashjoin %d %d", r.Left.Id, r.Right.Id)
75-
case *tableScan:
76-
return fmt.Sprintf("tablescan: %s", r.Name())
77-
default:
78-
panic(fmt.Sprintf("unknown RelExpr type: %T", r))
79-
}
80-
}
81-
8283
func buildRelExpr(b *ExecBuilder, r RelExpr, children ...sql.Node) (sql.Node, error) {
8384
var result sql.Node
8485
var err error
@@ -96,9 +97,9 @@ func TestMemoGen(t *testing.T) {
9697
return nil, err
9798
}
9899
99-
if withDescribeStats, ok := result.(sql.WithDescribeStats); ok {
100-
withDescribeStats.SetDescribeStats(*DescribeStats(r))
101-
}
100+
if withDescribeStats, ok := result.(sql.WithDescribeStats); ok {
101+
withDescribeStats.SetDescribeStats(*DescribeStats(r))
102+
}
102103
result, err = r.Group().finalize(result)
103104
if err != nil {
104105
return nil, err

sql/analyzer/analyzer.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ import (
3636

3737
const debugAnalyzerKey = "DEBUG_ANALYZER"
3838
const verboseAnalyzerKey = "VERBOSE_ANALYZER"
39+
const traceAnalyzerKey = "TRACE_ANALYZER"
3940

4041
const maxAnalysisIterations = 8
4142

@@ -215,6 +216,7 @@ func (s simpleLogFormatter) Format(entry *logrus.Entry) ([]byte, error) {
215216
func (ab *Builder) Build() *Analyzer {
216217
_, debug := os.LookupEnv(debugAnalyzerKey)
217218
_, verbose := os.LookupEnv(verboseAnalyzerKey)
219+
_, trace := os.LookupEnv(traceAnalyzerKey)
218220
var batches = []*Batch{
219221
{
220222
Desc: "pre-analyzer",
@@ -266,6 +268,7 @@ func (ab *Builder) Build() *Analyzer {
266268
return &Analyzer{
267269
Debug: debug || ab.debug,
268270
Verbose: verbose,
271+
Trace: trace,
269272
contextStack: make([]string, 0),
270273
Batches: batches,
271274
Catalog: NewCatalog(ab.provider),
@@ -297,6 +300,8 @@ type Analyzer struct {
297300
Batches []*Batch
298301
// Whether to log various debugging messages
299302
Debug bool
303+
// Whether to output detailed trace logging for join planning
304+
Trace bool
300305
// Whether to output the query plan at each step of the analyzer
301306
Verbose bool
302307
}

sql/analyzer/costed_index_scan.go

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ func costedIndexLookup(ctx *sql.Context, n sql.Node, a *Analyzer, iat sql.IndexA
131131
if err != nil {
132132
return n, transform.SameTree, err
133133
}
134-
// TODO(next): this is getting a GMSCast node and not getting an index assigned here
134+
135135
ita, stats, filters, err := getCostedIndexScan(ctx, a.Catalog, rt, indexes, SplitConjunction(oldFilter), qFlags)
136136
if err != nil || ita == nil {
137137
return n, transform.SameTree, err
@@ -334,6 +334,9 @@ func getCostedIndexScan(ctx *sql.Context, statsProv sql.StatsProvider, rt sql.Ta
334334
}
335335

336336
func addIndexScans(ctx *sql.Context, m *memo.Memo) error {
337+
m.Tracer.PushDebugContext("addIndexScans")
338+
defer m.Tracer.PopDebugContext()
339+
337340
return memo.DfsRel(m.Root(), func(e memo.RelExpr) error {
338341
filter, ok := e.(*memo.Filter)
339342
if !ok {
@@ -928,7 +931,7 @@ func (b *indexScanRangeBuilder) rangeBuildOr(f *iScanOr, inScan bool) (sql.MySQL
928931
// imprecise filters cannot be removed
929932
b.markImprecise(f)
930933

931-
//todo union the or ranges
934+
// todo union the or ranges
932935
var ret sql.MySQLRangeCollection
933936
for _, c := range f.children {
934937
var ranges sql.MySQLRangeCollection

0 commit comments

Comments
 (0)