Skip to content

Commit 14f6534

Browse files
craig[bot]DrewKimball
andcommitted
Merge #148540
148540: sql,opt: implement region-column inference r=michae2,mw5h,mgartner,yuzefovich a=DrewKimball #### sql,builtins: add nil-check for functions that use evalCtx.Regions The optimizer tests do not initialize the `evalCtx.Regions` field. This could previously cause NPE when the optimizer attempted constant folding for a function that uses the field. This commit hardens all such builtin functions against NPE. Epic: None Release note: None #### opt: add support for region column inference to catalog and test catalog This commit adds support for the new storage param `infer_rbr_region_col_using_constraint` to the optimizer and test catalogs. Informs #148243 Release note: None #### opt: implement region-column inference This commit finishes the implementation for the new storage parameter `infer_rbr_region_col_using_constraint`. If a foreign key constraint is specified via that setting, the constraint will be used to plan a lookup join into the parent table to retrieve the correct values for the region column on INSERT, UPDATE, and UPSERT operations. This will make it easier for customers to add the `crdb_region` column to their foreign-key constraints, since they no longer have to guarantee that an insert into the child table happens in the same region as the matching parent row. Fixes #148243 Release note (sql change): Added support for automatically determining the region column for a REGIONAL BY ROW table using a foreign key constraint. The foreign key is specified by setting a new table storage parameter `infer_rbr_region_col_using_constraint`, and must contain the region column. This can be useful for applications that are unable to guarantee that a child row is inserted or updated from the same region as the matching parent row. Co-authored-by: Drew Kimball <drewk@cockroachlabs.com>
2 parents b1bb226 + b845bd8 commit 14f6534

File tree

12 files changed

+3715
-5
lines changed

12 files changed

+3715
-5
lines changed

pkg/ccl/logictestccl/testdata/logic_test/regional_by_row_foreign_key

Lines changed: 1755 additions & 1 deletion
Large diffs are not rendered by default.

pkg/sql/opt/cat/table.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,13 @@ type Table interface {
171171
// different name in a `REGIONAL BY ROW AS` DDL clause.
172172
HomeRegionColName() (colName string, ok bool)
173173

174+
// RegionalByRowUsingConstraint returns the foreign-key constraint that is
175+
// used to look up the region for each row in a REGIONAL BY ROW table.
176+
// This is only set if the infer_rbr_region_col_using_constraint storage param
177+
// is set. If the storage param is not set, or if the table is not
178+
// REGIONAL BY ROW, RegionalByRowUsingConstraint returns nil.
179+
RegionalByRowUsingConstraint() ForeignKeyConstraint
180+
174181
// GetDatabaseID returns the owning database id of the table, or zero, if the
175182
// owning database could not be determined.
176183
GetDatabaseID() descpb.ID

pkg/sql/opt/exec/explain/plan_gist_factory.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -646,6 +646,11 @@ func (u *unknownTable) HomeRegionColName() (colName string, ok bool) {
646646
return "", false
647647
}
648648

649+
// RegionalByRowUsingConstraint is part of the cat.Table interface.
650+
func (ot *unknownTable) RegionalByRowUsingConstraint() cat.ForeignKeyConstraint {
651+
return nil
652+
}
653+
649654
// GetDatabaseID is part of the cat.Table interface.
650655
func (u *unknownTable) GetDatabaseID() descpb.ID {
651656
return 0

pkg/sql/opt/optbuilder/insert.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -724,6 +724,10 @@ func (mb *mutationBuilder) buildInputForInsert(inScope *scope, inputRows *tree.S
724724
// Add assignment casts for insert columns.
725725
mb.addAssignmentCasts(mb.insertColIDs)
726726
mb.inputForInsertExpr = mb.outScope.expr
727+
728+
// Track whether the value for the region column is explicitly specified. This
729+
// is a no-op if the table isn't regional-by-row.
730+
mb.setRegionColExplicitlyMutated(mb.insertColIDs)
727731
}
728732

729733
// addSynthesizedColsForInsert wraps an Insert input expression with a Project
@@ -756,6 +760,8 @@ func (mb *mutationBuilder) addSynthesizedColsForInsert() {
756760
func (mb *mutationBuilder) buildInsert(
757761
returning *tree.ReturningExprs, vectorInsert bool, hasOnConflict bool,
758762
) {
763+
mb.maybeAddRegionColLookup(opt.InsertOp)
764+
759765
// Disambiguate names so that references in any expressions, such as a
760766
// check constraint, refer to the correct columns.
761767
mb.disambiguateColumns()
@@ -995,6 +1001,8 @@ func (mb *mutationBuilder) setUpsertCols(insertCols tree.NameList) {
9951001
// buildUpsert constructs an Upsert operator, possibly wrapped by a Project
9961002
// operator that corresponds to the given RETURNING clause.
9971003
func (mb *mutationBuilder) buildUpsert(returning *tree.ReturningExprs) {
1004+
mb.maybeAddRegionColLookup(opt.UpsertOp)
1005+
9981006
// Merge input insert and update columns using CASE expressions.
9991007
mb.projectUpsertColumns()
10001008

pkg/sql/opt/optbuilder/mutation_builder.go

Lines changed: 181 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import (
2020
"github.com/cockroachdb/cockroach/pkg/sql/parser"
2121
"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgcode"
2222
"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgerror"
23+
"github.com/cockroachdb/cockroach/pkg/sql/privilege"
2324
"github.com/cockroachdb/cockroach/pkg/sql/sem/cast"
2425
"github.com/cockroachdb/cockroach/pkg/sql/sem/idxtype"
2526
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
@@ -244,6 +245,12 @@ type mutationBuilder struct {
244245
// uniqueWithTombstoneIndexes is the set of unique indexes that ensure uniqueness
245246
// by writing tombstones to all partitions
246247
uniqueWithTombstoneIndexes intsets.Fast
248+
249+
// regionColExplicitlyMutated is true if the target table is regional-by-row
250+
// and the value for the region column is explicitly specified for insert or
251+
// update. Example:
252+
// INSERT INTO t (a, b, region) VALUES (1, 2, 'us-east-1');
253+
regionColExplicitlyMutated bool
247254
}
248255

249256
func (mb *mutationBuilder) init(b *Builder, opName string, tab cat.Table, alias tree.TableName) {
@@ -625,6 +632,21 @@ func (mb *mutationBuilder) extractValuesInput(inputRows *tree.Select) *tree.Valu
625632
return nil
626633
}
627634

635+
// setRegionColExplicitlyMutated should be called for the insert and update
636+
// columns of an INSERT, UPDATE, or UPSERT statement before building the
637+
// synthesized columns. It keeps track of whether the region column is
638+
// explicitly mutated in the statement (e.g. with user-provided values).
639+
func (mb *mutationBuilder) setRegionColExplicitlyMutated(explicitCols opt.OptionalColList) {
640+
if !mb.tab.IsRegionalByRow() {
641+
return
642+
}
643+
// The region column is always the first column in the primary index.
644+
regionColOrd := mb.tab.Index(cat.PrimaryIndex).Column(0).Ordinal()
645+
if explicitCols[regionColOrd] != 0 {
646+
mb.regionColExplicitlyMutated = true
647+
}
648+
}
649+
628650
// replaceDefaultExprs looks for DEFAULT specifiers in input value expressions
629651
// and replaces them with the corresponding default value expression for the
630652
// corresponding column. This is only possible when the input is a VALUES
@@ -861,6 +883,165 @@ func (mb *mutationBuilder) addSynthesizedComputedCols(colIDs opt.OptionalColList
861883
mb.outScope = pb.Finish()
862884
}
863885

886+
// maybeAddRegionColLookup adds a lookup join to the target table of a foreign
887+
// key constraint specified by the "infer_rbr_region_col_using_constraint"
888+
// storage param, if any. It is used by INSERT, UPDATE, and UPSERT statements to
889+
// determine the correct value of the region column for a REGIONAL BY ROW table.
890+
func (mb *mutationBuilder) maybeAddRegionColLookup(op opt.Operator) {
891+
switch op {
892+
case opt.InsertOp, opt.UpdateOp, opt.UpsertOp:
893+
default:
894+
panic(errors.AssertionFailedf("maybeAddRegionColLookup called with unexpected operator %s", op))
895+
}
896+
if !mb.tab.IsRegionalByRow() {
897+
return
898+
}
899+
if mb.regionColExplicitlyMutated {
900+
// Allow the user to explicitly set the region column value, overriding the
901+
// storage param.
902+
return
903+
}
904+
lookupFK := mb.tab.RegionalByRowUsingConstraint()
905+
if lookupFK == nil {
906+
return
907+
}
908+
// An UPDATE may not be mutating any of the foreign-key columns, in which case
909+
// we can stop early.
910+
if op == opt.UpdateOp {
911+
fkColIsMutated := false
912+
for colIdx := range lookupFK.ColumnCount() {
913+
if mb.updateColIDs[lookupFK.OriginColumnOrdinal(mb.tab, colIdx)] != 0 {
914+
fkColIsMutated = true
915+
break
916+
}
917+
}
918+
if !fkColIsMutated {
919+
return
920+
}
921+
}
922+
// Resolve the referenced table.
923+
refTabDescID := int64(lookupFK.ReferencedTableID())
924+
refTab := mb.b.resolveTableRef(&tree.TableRef{TableID: refTabDescID}, privilege.SELECT)
925+
refTabMeta := mb.b.addTable(refTab, tree.NewUnqualifiedTableName(refTab.Name()))
926+
refTabID := refTabMeta.MetaID
927+
928+
// Use the foreign-key columns (apart from the region column, if present) to
929+
// plan a join against the referenced table. The schema changer has already
930+
// verified that the foreign-key contains the region column, so performing a
931+
// lookup using the remaining columns allows us to infer the correct value for
932+
// the region column.
933+
//
934+
// NOTE: The region column is always the first column in the primary index.
935+
f := mb.b.factory
936+
joinCond := make(memo.FiltersExpr, 0, lookupFK.ColumnCount())
937+
originRegionColOrd := mb.tab.Index(cat.PrimaryIndex).Column(0).Ordinal()
938+
var refLookupCols opt.ColSet
939+
var originRegionColID, lookupRegionColID opt.ColumnID
940+
for colIdx := range lookupFK.ColumnCount() {
941+
originColID := mb.mapToReturnColID(lookupFK.OriginColumnOrdinal(mb.tab, colIdx))
942+
refColID := refTabID.ColumnID(lookupFK.ReferencedColumnOrdinal(refTab, colIdx))
943+
if lookupFK.OriginColumnOrdinal(mb.tab, colIdx) == originRegionColOrd {
944+
originRegionColID = originColID
945+
lookupRegionColID = refColID
946+
continue
947+
}
948+
eqExpr := f.ConstructEq(f.ConstructVariable(originColID), f.ConstructVariable(refColID))
949+
joinCond = append(joinCond, f.ConstructFiltersItem(eqExpr))
950+
refLookupCols.Add(refColID)
951+
}
952+
if len(joinCond) == 0 {
953+
panic(errors.AssertionFailedf(
954+
"unable to determine lookup columns using constraint %q", lookupFK.Name()))
955+
}
956+
if originRegionColID == 0 || lookupRegionColID == 0 {
957+
panic(errors.AssertionFailedf(
958+
"expected region column to be part of foreign key constraint %q", lookupFK.Name()))
959+
}
960+
md := mb.b.factory.Metadata()
961+
if !md.ColumnMeta(originRegionColID).Type.Identical(md.ColumnMeta(lookupRegionColID).Type) {
962+
panic(errors.AssertionFailedf("expected parent and child region column types to be identical"))
963+
}
964+
// For non-serializable isolation (or when the var is set), take a shared lock
965+
// when reading from the parent table. This prevents concurrent transactions
966+
// from invalidating the looked-up region column value. This isn't necessary
967+
// for correctness since FK checks still run, but avoids returning an error
968+
// unnecessarily to the user.
969+
locking := noRowLocking
970+
if mb.b.evalCtx.TxnIsoLevel != isolation.Serializable ||
971+
mb.b.evalCtx.SessionData().ImplicitFKLockingForSerializable {
972+
locking = lockingSpec{
973+
&lockingItem{
974+
item: &tree.LockingItem{
975+
Strength: tree.ForShare,
976+
WaitPolicy: tree.LockWaitBlock,
977+
},
978+
},
979+
}
980+
}
981+
refScope := mb.b.buildScan(
982+
refTabMeta,
983+
tableOrdinals(refTab, columnKinds{
984+
includeMutations: false,
985+
includeSystem: false,
986+
includeInverted: false,
987+
}),
988+
&tree.IndexFlags{
989+
IgnoreForeignKeys: true,
990+
AvoidFullScan: mb.b.evalCtx.SessionData().AvoidFullTableScansInMutations,
991+
},
992+
locking,
993+
mb.b.allocScope(),
994+
true, /* disableNotVisibleIndex */
995+
// The scan is exempt from RLS to maintain data integrity.
996+
cat.PolicyScopeExempt,
997+
)
998+
if !refScope.expr.Relational().FuncDeps.ColsAreLaxKey(refLookupCols) {
999+
// The lookup columns must be a lax key, otherwise the join may return
1000+
// multiple rows for a single row in the target table. This should already
1001+
// be enforced by the foreign-key constraint.
1002+
panic(errors.AssertionFailedf(
1003+
"lookup columns using constraint %q must be a lax key", lookupFK.Name()))
1004+
}
1005+
var joinFlags memo.JoinFlags
1006+
if mb.b.evalCtx.SessionData().PreferLookupJoinsForFKs {
1007+
joinFlags = memo.PreferLookupJoinIntoRight
1008+
}
1009+
mb.outScope.expr = mb.b.factory.ConstructLeftJoin(
1010+
mb.outScope.expr, refScope.expr, joinCond, &memo.JoinPrivate{Flags: joinFlags},
1011+
)
1012+
// Build a CASE expression to determine the final value of the region column.
1013+
// Use the looked-up value if non-NULL, and otherwise use the default value
1014+
// which was already projected in the input.
1015+
regionColType := md.ColumnMeta(originRegionColID).Type
1016+
caseExpr := mb.b.factory.ConstructCase(
1017+
memo.TrueSingleton,
1018+
memo.ScalarListExpr{
1019+
f.ConstructWhen(
1020+
f.ConstructIs(f.ConstructVariable(lookupRegionColID), f.ConstructNull(regionColType)),
1021+
f.ConstructVariable(originRegionColID),
1022+
)},
1023+
f.ConstructVariable(lookupRegionColID),
1024+
)
1025+
regionColName := mb.tab.Column(originRegionColOrd).ColName()
1026+
colName := scopeColName(regionColName).WithMetadataName(
1027+
fmt.Sprintf("fk_lookup_%s", regionColName),
1028+
)
1029+
newOutScope := mb.outScope.replace()
1030+
newOutScope.appendColumnsFromScope(mb.outScope)
1031+
regionCol := mb.b.synthesizeColumn(newOutScope, colName, regionColType, nil /* expr */, caseExpr)
1032+
mb.b.constructProjectForScope(mb.outScope, newOutScope)
1033+
mb.outScope = newOutScope
1034+
1035+
// Whether a row is inserted or updated, it will use the newly calculated
1036+
// value for the region column.
1037+
if op == opt.InsertOp || op == opt.UpsertOp {
1038+
mb.insertColIDs[originRegionColOrd] = regionCol.id
1039+
}
1040+
if op == opt.UpdateOp || op == opt.UpsertOp {
1041+
mb.updateColIDs[originRegionColOrd] = regionCol.id
1042+
}
1043+
}
1044+
8641045
// addCheckConstraintCols synthesizes a boolean output column for each check
8651046
// constraint defined on the target table. The mutation operator will report a
8661047
// constraint violation error if the value of the column is false.

0 commit comments

Comments
 (0)