@@ -21,11 +21,13 @@ import (
2121 "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/roachtestutil"
2222 "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/spec"
2323 "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/test"
24+ "github.com/cockroachdb/cockroach/pkg/jobs"
2425 "github.com/cockroachdb/cockroach/pkg/roachprod/install"
2526 "github.com/cockroachdb/cockroach/pkg/util/timeutil"
2627 "github.com/cockroachdb/cockroach/pkg/workload/histogram"
2728 "github.com/cockroachdb/cockroach/pkg/workload/histogram/exporter"
2829 "github.com/lib/pq"
30+ "github.com/stretchr/testify/require"
2931)
3032
3133func registerInspectThoughput (r registry.Registry ) {
@@ -34,7 +36,7 @@ func registerInspectThoughput(r registry.Registry) {
3436
3537 // Long run: 12 nodes × 8 CPUs, 1B rows, 2 index checks (runs INSPECT twice: 1 index, then 2 indexes), ~5 hours (in v25.4)
3638 const indexesForLongRun = 2
37- r .Add (makeInspectThroughputTest (r , 12 , 8 , 1_000_000_000 , 8 * time .Hour , indexesForLongRun ))
39+ r .Add (makeInspectThroughputTest (r , 12 , 8 , 1_000_000_000 , 11 * time .Hour , indexesForLongRun ))
3840}
3941
4042// initInspectHistograms creates a histogram registry with multiple named metrics.
@@ -225,9 +227,7 @@ func makeInspectThroughputTest(
225227 before := timeutil .Now ()
226228
227229 inspectSQL := fmt .Sprintf ("INSPECT TABLE bulkingest.bulkingest WITH OPTIONS INDEX (%s)" , cfg .indexListSQL )
228- if _ , err := db .Exec (inspectSQL ); err != nil {
229- t .Fatal (err )
230- }
230+ jobID := runInspectInBackground (ctx , t , db , inspectSQL )
231231
232232 // Tick after INSPECT completes to capture elapsed time for this specific metric.
233233 tickHistogram (cfg .metricName )
@@ -249,15 +249,9 @@ func makeInspectThroughputTest(
249249 0
250250 )
251251 FROM system.job_info
252- WHERE job_id = (
253- SELECT job_id
254- FROM [SHOW JOBS]
255- WHERE job_type = 'INSPECT'
256- ORDER BY created DESC
257- LIMIT 1
258- )
252+ WHERE job_id = $1
259253 AND info_key = 'legacy_progress'`
260- err := db .QueryRow (querySQL ).Scan (& jobTotalCheckCount )
254+ err := db .QueryRow (querySQL , jobID ).Scan (& jobTotalCheckCount )
261255 if err != nil {
262256 t .L ().Printf ("Warning: failed to query job total check count: %v" , err )
263257 } else {
@@ -301,3 +295,79 @@ func disableRowCountValidation(t test.Test, db *gosql.DB) {
301295 }
302296 }
303297}
298+
299+ // runInspectInBackground runs an INSPECT command with a short statement timeout,
300+ // forcing it to run as a background job. It then polls the job until completion,
301+ // reporting progress at 10% intervals. Returns the job ID.
302+ func runInspectInBackground (
303+ ctx context.Context , t test.Test , db * gosql.DB , inspectSQL string ,
304+ ) (jobID int64 ) {
305+ // Set a short statement timeout to force INSPECT to run as a background job.
306+ _ , err := db .Exec ("SET statement_timeout = '5s'" )
307+ require .NoError (t , err )
308+
309+ // Reset statement timeout after we're done.
310+ defer func () {
311+ _ , resetErr := db .Exec ("RESET statement_timeout" )
312+ require .NoError (t , resetErr )
313+ }()
314+
315+ _ , err = db .Exec (inspectSQL )
316+
317+ // This may fail if the INSPECT took longer than the statement_timeout to run.
318+ // So, we tolerate only statement timeout errors here.
319+ if err != nil {
320+ require .ErrorContains (t , err , "statement timeout" )
321+ }
322+
323+ // Get the INSPECT job ID.
324+ getJobIDSQL := `
325+ SELECT job_id
326+ FROM [SHOW JOBS]
327+ WHERE job_type = 'INSPECT'
328+ ORDER BY created DESC
329+ LIMIT 1`
330+ if err := db .QueryRow (getJobIDSQL ).Scan (& jobID ); err != nil {
331+ t .Fatalf ("failed to get INSPECT job ID: %v" , err )
332+ }
333+ t .L ().Printf ("INSPECT job ID: %d" , jobID )
334+
335+ // Poll the job until it completes, reporting progress at 10% intervals.
336+ const pollInterval = 5 * time .Second
337+ lastReportedThreshold := - 1
338+ ticker := time .NewTicker (pollInterval )
339+ defer ticker .Stop ()
340+
341+ for {
342+ select {
343+ case <- ctx .Done ():
344+ t .Fatalf ("context canceled while waiting for INSPECT job %d" , jobID )
345+ case <- ticker .C :
346+ var status jobs.State
347+ var fractionCompleted float64
348+ checkJobSQL := `
349+ SELECT status, fraction_completed
350+ FROM [SHOW JOBS]
351+ WHERE job_id = $1`
352+ if err := db .QueryRow (checkJobSQL , jobID ).Scan (& status , & fractionCompleted ); err != nil {
353+ t .Fatalf ("failed to query job %d status: %v" , jobID , err )
354+ }
355+
356+ // Report progress at 10% thresholds (0%, 10%, 20%, ..., 90%).
357+ currentThreshold := int (fractionCompleted * 10 )
358+ if currentThreshold > lastReportedThreshold && currentThreshold < 10 {
359+ t .L ().Printf ("INSPECT job %d: %d%% complete" , jobID , currentThreshold * 10 )
360+ lastReportedThreshold = currentThreshold
361+ }
362+
363+ // Check if job is complete.
364+ switch status {
365+ case jobs .StateSucceeded :
366+ t .L ().Printf ("INSPECT job %d: 100%% complete (succeeded)" , jobID )
367+ return jobID
368+ case jobs .StateFailed , jobs .StateCanceled :
369+ t .Fatalf ("INSPECT job %d finished with status: %s" , jobID , status )
370+ }
371+ }
372+ }
373+ }
0 commit comments