Merge #155242 #155390 #155402 #155406

craig[bot] · Uzair5162 · annaw2193 · craig[bot] · commit dae24c3dd4e1 · 2025-10-14T22:25:59.000Z
155242: sql/stats: avoid mutating input buckets in stripOuterBuckets r=Uzair5162 a=Uzair5162 This commit changes `stripOuterBuckets` to modify and return a copy of the given histogram buckets if it finds outer buckets to remove. Previously, we would mutate the caller's histograms with leading outer buckets in-place by zeroing the range counts on the first non-outer bucket. This effectively corrupts the first histogram bucket in stats passed in from the stats cache. Although this bug has existed since 90e311d (which zeroes the first buckets range counts), it would only impact full statistics that we tried merging with partial stats, as that was the only case in which `stripOuterBuckets` was called. The surface area of this bug increased after db9a344, which calls `stripOuterBuckets` on every full statistic, regardless of whether we end up merging it with partial stats or not. Fixes: #155184 Release note (bug fix): Previously, we could corrupt the first bucket of table statistic histograms in certain cases, causing underestimates for range counts near the lower end of the domain, which is now fixed. 155390: dev: refine behavior of `--cpus` further r=annaw2193 a=annaw2193 - `--local_cpu_resources` changed to `--local_resources=cpu=` - `--cpus` now additionaly implies `--local_test_jobs`, which should be set to an equivalent value. Also, using `--cpus` gives: Epic: None Fixes: #151139 Release Note: None 155402: sql: harden recent change about DO block recursion r=yuzefovich a=yuzefovich This commit reduces the max depth of recursion when evaluating routines with `tail-call-optimization-enabled=false` (recently added in 7b879ef) from 10k to 100. We've just seen a few cases where TestRandomSyntaxSQLSmith failed because DO block didn't respect context cancellation within 5s. I've manually tried it out a few times, and things worked, so my hypothesis is that extremely deep stacks (that are produced with the TCO disabled) is the root cause for slow cancellation, so let's just error out sooner. This commit also brings back the skip of DO blocks in TestComposeCompare (thinking there is that we should stabilize them in other tests first). Fixes: #155208. Fixes: #155210. Release note: None 155406: sql/hints: remove noisy logging r=DrewKimball a=DrewKimball This commit removes a noisy source of log messages by only logging for incremental updates with a non-empty list of events. This prevents spamming log messages every time the resolved timestamp for the rangefeed is incremented. Epic: None Release note: None Co-authored-by: Uzair Ahmad <uzair.ahmad@cockroachlabs.com> Co-authored-by: Anna Wang <anna.wang@cockroachlabs.com> Co-authored-by: Yahor Yuzefovich <yahor@cockroachlabs.com> Co-authored-by: Drew Kimball <drewk@cockroachlabs.com>
diff --git a/dev b/dev
@@ -8,7 +8,7 @@ fi
 set -euo pipefail
 
 # Bump this counter to force rebuilding `dev` on all machines.
-DEV_VERSION=113
+DEV_VERSION=114
 
 THIS_DIR=$(cd "$(dirname "$0")" && pwd)
 BINARY_DIR=$THIS_DIR/bin/dev-versions
diff --git a/pkg/cmd/dev/testdata/datadriven/bench b/pkg/cmd/dev/testdata/datadriven/bench
@@ -26,7 +26,7 @@ exec
 dev bench pkg/spanconfig/spanconfigkvsubscriber -f=BenchmarkSpanConfigDecoder --cpus=10 --ignore-cache=false -v --timeout=50s
 ----
 echo $HOME/.cache
-bazel test --local_cpu_resources=10 --jobs=10 --test_timeout=50 pkg/spanconfig/spanconfigkvsubscriber:all --test_arg -test.run=- --test_arg -test.bench=BenchmarkSpanConfigDecoder --test_sharding_strategy=disabled --test_arg -test.cpu --test_arg 1 --test_arg -test.v --test_arg -test.benchmem --crdb_test_off --crdb_bench --test_env COCKROACH_TEST_FIXTURES_DIR=crdb-mock-test-fixtures/crdb-test-fixtures --sandbox_writable_path=crdb-mock-test-fixtures/crdb-test-fixtures --test_output streamed
+bazel test --local_resources=cpu=10 --jobs=10 --local_test_jobs=10 --test_timeout=50 pkg/spanconfig/spanconfigkvsubscriber:all --test_arg -test.run=- --test_arg -test.bench=BenchmarkSpanConfigDecoder --test_sharding_strategy=disabled --test_arg -test.cpu --test_arg 1 --test_arg -test.v --test_arg -test.benchmem --crdb_test_off --crdb_bench --test_env COCKROACH_TEST_FIXTURES_DIR=crdb-mock-test-fixtures/crdb-test-fixtures --sandbox_writable_path=crdb-mock-test-fixtures/crdb-test-fixtures --test_output streamed
 
 exec
 dev bench pkg/bench -f='BenchmarkTracing/1node/scan/trace=off' --test-args '-test.memprofile=mem.out -test.cpuprofile=cpu.out'
diff --git a/pkg/cmd/dev/testdata/datadriven/dev-build b/pkg/cmd/dev/testdata/datadriven/dev-build
@@ -13,7 +13,7 @@ cp sandbox/pkg/cmd/cockroach-short/cockroach-short_/cockroach-short crdb-checkou
 exec
 dev build cockroach-short --cpus=12
 ----
-bazel build --local_cpu_resources=12 --jobs=12 //pkg/cmd/cockroach-short:cockroach-short --build_event_binary_file=/tmp/path
+bazel build --local_resources=cpu=12 --jobs=12 --local_test_jobs=12 //pkg/cmd/cockroach-short:cockroach-short --build_event_binary_file=/tmp/path
 bazel info workspace --color=no
 mkdir crdb-checkout/bin
 bazel info bazel-bin --color=no
diff --git a/pkg/cmd/dev/testdata/datadriven/testlogic b/pkg/cmd/dev/testdata/datadriven/testlogic
@@ -59,7 +59,7 @@ bazel info workspace --color=no
 bazel info workspace --color=no
 bazel run pkg/cmd/generate-logictest -- -out-dir=crdb-checkout
 bazel run //pkg/gen:schemachanger
-bazel test //pkg/sql/logictest/tests/... --test_env=GOTRACEBACK=all --local_cpu_resources=8 --jobs=8 --test_arg -show-sql --test_timeout=60 --test_env=COCKROACH_STRESS=true --notest_keep_going --runs_per_test=500 --test_filter auto_span_config_reconciliation/ --test_sharding_strategy=disabled --test_output errors
+bazel test //pkg/sql/logictest/tests/... --test_env=GOTRACEBACK=all --local_resources=cpu=8 --jobs=8 --local_test_jobs=8 --test_arg -show-sql --test_timeout=60 --test_env=COCKROACH_STRESS=true --notest_keep_going --runs_per_test=500 --test_filter auto_span_config_reconciliation/ --test_sharding_strategy=disabled --test_output errors
 
 exec
 dev testlogic base --files=auto_span_config_reconciliation --stress
diff --git a/pkg/cmd/dev/util.go b/pkg/cmd/dev/util.go
@@ -264,8 +264,9 @@ func (d *dev) getMergeBaseHash(ctx context.Context) (string, error) {
 
 func addCommonBazelArguments(args *[]string) {
 	if numCPUs != 0 {
-		*args = append(*args, fmt.Sprintf("--local_cpu_resources=%d", numCPUs))
+		*args = append(*args, fmt.Sprintf("--local_resources=cpu=%d", numCPUs))
 		*args = append(*args, fmt.Sprintf("--jobs=%d", numCPUs))
+		*args = append(*args, fmt.Sprintf("--local_test_jobs=%d", numCPUs))
 	}
 	if pgoEnabled {
 		*args = append(*args, "--config=pgo")
diff --git a/pkg/compose/compare/compare/compare_test.go b/pkg/compose/compare/compare/compare_test.go
@@ -79,8 +79,13 @@ func TestCompare(t *testing.T) {
 	}
 	configs := map[string]testConfig{
 		"mutators": {
-			setup:           sqlsmith.Setups[sqlsmith.RandTableSetupName],
-			opts:            []sqlsmith.SmitherOption{sqlsmith.CompareMode()},
+			setup: sqlsmith.Setups[sqlsmith.RandTableSetupName],
+			opts: []sqlsmith.SmitherOption{
+				sqlsmith.CompareMode(),
+				// TODO(yuzefovich): perhaps allow DO blocks again after they
+				// have been stabilized in other tests.
+				sqlsmith.DisableDoBlocks(),
+			},
 			ignoreSQLErrors: true,
 			conns: []testConn{
 				{
diff --git a/pkg/sql/hints/hint_cache.go b/pkg/sql/hints/hint_cache.go
@@ -210,7 +210,8 @@ func (c *StatementHintsCache) onUpdate(
 	if update.Type == rangefeedcache.CompleteUpdate {
 		log.Dev.Info(ctx, "statement_hints rangefeed completed initial scan")
 		c.handleInitialScan(update)
-	} else {
+	} else if len(update.Events) > 0 {
+		// Ignore empty updates that only bump the resolved timestamp.
 		log.Dev.Info(ctx, "statement_hints rangefeed applying incremental update")
 		c.handleIncrementalUpdate(ctx, update)
 	}
@@ -248,10 +249,6 @@ func (c *StatementHintsCache) handleInitialScan(update rangefeedcache.Update[*bu
 func (c *StatementHintsCache) handleIncrementalUpdate(
 	ctx context.Context, update rangefeedcache.Update[*bufferEvent],
 ) {
-	if len(update.Events) == 0 {
-		// Avoid synchronization when we're just bumping the resolved timestamp.
-		return
-	}
 	defer c.generation.Add(1)
 	c.mu.Lock()
 	defer c.mu.Unlock()
diff --git a/pkg/sql/logictest/testdata/logic_test/distsql_stats b/pkg/sql/logictest/testdata/logic_test/distsql_stats
@@ -3890,3 +3890,64 @@ SELECT jsonb_pretty(statistics->0->'histo_buckets') FROM
         "upper_bound": "c"
     }
 ]
+
+# Ensure that stripOuterBuckets doesn't overwrite statistics (see #155184).
+
+statement ok
+CREATE TABLE t155184 (
+  a INT PRIMARY KEY
+) WITH (sql_stats_automatic_collection_enabled = false, sql_stats_histogram_samples_count = 2)
+
+# These stats were created with the following statements:
+#
+#   INSERT INTO t155184 SELECT generate_series(1,10)
+#   ANALYZE t155184
+
+statement ok
+ALTER TABLE t155184 INJECT STATISTICS '[
+    {
+        "avg_size": 1,
+        "columns": [
+            "a"
+        ],
+        "created_at": "2025-10-10 13:41:08.711908",
+        "distinct_count": 10,
+        "histo_buckets": [
+            {
+                "distinct_range": 0,
+                "num_eq": 0,
+                "num_range": 0,
+                "upper_bound": "-9223372036854775808"
+            },
+            {
+                "distinct_range": 3.5,
+                "num_eq": 1,
+                "num_range": 4,
+                "upper_bound": "8"
+            },
+            {
+                "distinct_range": 1,
+                "num_eq": 1,
+                "num_range": 1,
+                "upper_bound": "10"
+            },
+            {
+                "distinct_range": 3.5,
+                "num_eq": 0,
+                "num_range": 4,
+                "upper_bound": "9223372036854775807"
+            }
+        ],
+        "histo_col_type": "INT8",
+        "histo_version": 3,
+        "id": 1114221014922133505,
+        "null_count": 0,
+        "row_count": 10
+    }
+]'
+
+# All we care about is the row counts, so don't error if other parts of the explain output change
+query T
+SELECT info FROM [EXPLAIN SELECT * FROM t155184 WHERE a < 8] WHERE info LIKE '%estimated row count:%'
+----
+  estimated row count: 4 (36% of the table; stats collected <hidden> ago)
diff --git a/pkg/sql/routine.go b/pkg/sql/routine.go
@@ -163,7 +163,7 @@ func (p *planner) EvalRoutineExpr(
 		if routineDepthValue := ctx.Value(routineDepthKey{}); routineDepthValue != nil {
 			routineDepth = routineDepthValue.(int)
 		}
-		const maxDepth = 10000
+		const maxDepth = 100
 		if routineDepth > maxDepth {
 			return nil, pgerror.Newf(pgcode.ProgramLimitExceeded,
 				"routine reached recursion depth limit: %d (probably infinite loop)", maxDepth)
diff --git a/pkg/sql/stats/merge.go b/pkg/sql/stats/merge.go
@@ -86,8 +86,9 @@ func MergedStatistics(
 	return mergedStats
 }
 
-// stripOuterBuckets removes the outer buckets from a histogram without a
-// leading NULL bucket.
+// stripOuterBuckets returns a copy of the histogram buckets with any outer
+// buckets that may have been added by addOuterBuckets removed. histogram must
+// not have a leading NULL bucket.
 func stripOuterBuckets(
 	ctx context.Context, evalCtx *eval.Context, histogram []cat.HistogramBucket,
 ) []cat.HistogramBucket {
@@ -96,17 +97,28 @@ func stripOuterBuckets(
 	}
 	startIdx := 0
 	endIdx := len(histogram)
-	if histogram[0].UpperBound.IsMin(ctx, evalCtx) && histogram[0].NumEq == 0 {
+	hasLowerOuter := histogram[0].UpperBound.IsMin(ctx, evalCtx) && histogram[0].NumEq == 0
+	if hasLowerOuter {
 		startIdx = 1
-		// Set the first range counts to zero to counteract range counts added by
-		// addOuterBuckets.
-		histogram[startIdx].NumRange = 0
-		histogram[startIdx].DistinctRange = 0
 	}
 	if histogram[len(histogram)-1].UpperBound.IsMax(ctx, evalCtx) && histogram[len(histogram)-1].NumEq == 0 {
 		endIdx = len(histogram) - 1
 	}
-	return histogram[startIdx:endIdx]
+	if startIdx == 0 && endIdx == len(histogram) {
+		return histogram
+	}
+	if startIdx >= endIdx {
+		return nil
+	}
+
+	out := append([]cat.HistogramBucket(nil), histogram[startIdx:endIdx]...)
+	if hasLowerOuter {
+		// Set the first range counts to zero to counteract range counts added by
+		// addOuterBuckets.
+		out[0].NumRange = 0
+		out[0].DistinctRange = 0
+	}
+	return out
 }
 
 // mergePartialStatistic merges a full statistic with a more recent partial
diff --git a/pkg/sql/stats/merge_test.go b/pkg/sql/stats/merge_test.go
@@ -15,6 +15,7 @@ import (
 
 	"github.com/cockroachdb/cockroach/pkg/settings/cluster"
 	"github.com/cockroachdb/cockroach/pkg/sql/catalog/descpb"
+	"github.com/cockroachdb/cockroach/pkg/sql/opt/cat"
 	"github.com/cockroachdb/cockroach/pkg/sql/sem/eval"
 	"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
 )
@@ -916,6 +917,92 @@ func TestMergedStatistics(t *testing.T) {
 	}
 }
 
+// TestStripOuterBuckets tests stripOuterBuckets which removes outer buckets
+// added by addOuterBuckets before merging partial statistics.
+func TestStripOuterBuckets(t *testing.T) {
+	ctx := context.Background()
+	st := cluster.MakeTestingClusterSettings()
+	evalCtx := eval.NewTestingEvalContext(st)
+	defer evalCtx.Stop(ctx)
+
+	t.Run("no outer buckets returns input", func(t *testing.T) {
+		buckets := []cat.HistogramBucket{
+			{NumEq: 1, NumRange: 0, DistinctRange: 0, UpperBound: tree.NewDInt(10)},
+			{NumEq: 2, NumRange: 1, DistinctRange: 1, UpperBound: tree.NewDInt(20)},
+		}
+		strippedBuckets := stripOuterBuckets(ctx, evalCtx, buckets)
+		if !reflect.DeepEqual(strippedBuckets, buckets) {
+			t.Fatalf("expected buckets unchanged: %v", strippedBuckets)
+		}
+		if len(strippedBuckets) > 0 && &strippedBuckets[0] != &buckets[0] {
+			t.Fatalf("unexpected copy of backing array when no outer buckets")
+		}
+	})
+
+	testCases := []struct {
+		name     string
+		buckets  []cat.HistogramBucket
+		expected []cat.HistogramBucket
+	}{
+		{
+			buckets: []cat.HistogramBucket{
+				{NumEq: 0, UpperBound: tree.NewDInt(math.MinInt64)},
+				{NumEq: 1, NumRange: 10, DistinctRange: 5, UpperBound: tree.NewDInt(30)},
+				{NumEq: 0, UpperBound: tree.NewDInt(math.MaxInt64)},
+			},
+			expected: []cat.HistogramBucket{
+				{NumEq: 1, NumRange: 0, DistinctRange: 0, UpperBound: tree.NewDInt(30)},
+			},
+		},
+		{
+			buckets: []cat.HistogramBucket{
+				{NumEq: 0, UpperBound: tree.NewDInt(math.MinInt64)},
+				{NumEq: 1, NumRange: 10, DistinctRange: 5, UpperBound: tree.NewDInt(30)},
+				{NumEq: 2, NumRange: 4, DistinctRange: 3, UpperBound: tree.NewDInt(40)},
+			},
+			expected: []cat.HistogramBucket{
+				{NumEq: 1, NumRange: 0, DistinctRange: 0, UpperBound: tree.NewDInt(30)},
+				{NumEq: 2, NumRange: 4, DistinctRange: 3, UpperBound: tree.NewDInt(40)},
+			},
+		},
+		{
+			buckets: []cat.HistogramBucket{
+				{NumEq: 3, NumRange: 0, DistinctRange: 0, UpperBound: tree.NewDInt(30)},
+				{NumEq: 2, NumRange: 4, DistinctRange: 3, UpperBound: tree.NewDInt(40)},
+				{NumEq: 0, UpperBound: tree.NewDInt(math.MaxInt64)},
+			},
+			expected: []cat.HistogramBucket{
+				{NumEq: 3, NumRange: 0, DistinctRange: 0, UpperBound: tree.NewDInt(30)},
+				{NumEq: 2, NumRange: 4, DistinctRange: 3, UpperBound: tree.NewDInt(40)},
+			},
+		},
+	}
+	for i, tc := range testCases {
+		t.Run(tc.name, func(t *testing.T) {
+			buckets := append([]cat.HistogramBucket(nil), tc.buckets...)
+			bucketsCopy := append([]cat.HistogramBucket(nil), buckets...)
+			strippedBuckets := stripOuterBuckets(ctx, evalCtx, buckets)
+
+			if !reflect.DeepEqual(strippedBuckets, tc.expected) {
+				t.Fatalf("test case %d incorrect, stripped buckets:\n%v\nexpected:\n%v",
+					i, strippedBuckets, tc.expected)
+			}
+			if !reflect.DeepEqual(buckets, bucketsCopy) {
+				t.Fatalf("test case %d unexpected mutation of input buckets:\n%v\nexpected:\n%v",
+					i, buckets, bucketsCopy)
+			}
+			if len(strippedBuckets) > 0 {
+				for bi := range buckets {
+					if &strippedBuckets[0] == &buckets[bi] {
+						t.Fatalf("test case %d expected stripped buckets result to copy"+
+							" input slice, but shares backing array", i)
+					}
+				}
+			}
+		})
+	}
+}
+
 func (tabStat *TableStatistic) RoundDistinctRanges() {
 	for i := range tabStat.Histogram {
 		tabStat.Histogram[i].DistinctRange = math.Round(tabStat.Histogram[i].DistinctRange)

Original file line number	Diff line number	Diff line change
`@@ -163,7 +163,7 @@ func (p *planner) EvalRoutineExpr(`
`163`	`163`	`if routineDepthValue := ctx.Value(routineDepthKey{}); routineDepthValue != nil {`
`164`	`164`	`routineDepth = routineDepthValue.(int)`
`165`	`165`	`}`
`166`		`- const maxDepth = 10000`
	`166`	`+ const maxDepth = 100`
`167`	`167`	`if routineDepth > maxDepth {`
`168`	`168`	`return nil, pgerror.Newf(pgcode.ProgramLimitExceeded,`
`169`	`169`	`"routine reached recursion depth limit: %d (probably infinite loop)", maxDepth)`