cockroachdb
diff --git a/‎pkg/cmd/roachtest/github.go‎
Lines changed: 34 additions & 13 deletions b/‎pkg/cmd/roachtest/github.go‎
Lines changed: 34 additions & 13 deletions
diff --git a/‎pkg/cmd/roachtest/github_test.go‎
Lines changed: 8 additions & 9 deletions b/‎pkg/cmd/roachtest/github_test.go‎
Lines changed: 8 additions & 9 deletions
diff --git a/‎pkg/cmd/roachtest/main.go‎
Lines changed: 11 additions & 4 deletions b/‎pkg/cmd/roachtest/main.go‎
Lines changed: 11 additions & 4 deletions
diff --git a/‎pkg/cmd/roachtest/run.go‎
Lines changed: 9 additions & 1 deletion b/‎pkg/cmd/roachtest/run.go‎
Lines changed: 9 additions & 1 deletion
diff --git a/‎pkg/cmd/roachtest/test_monitor_test.go‎
Lines changed: 2 additions & 1 deletion b/‎pkg/cmd/roachtest/test_monitor_test.go‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎pkg/cmd/roachtest/test_runner.go‎
Lines changed: 47 additions & 17 deletions b/‎pkg/cmd/roachtest/test_runner.go‎
Lines changed: 47 additions & 17 deletions
@@ -23,21 +23,36 @@ import (
 	"github.com/cockroachdb/cockroach/pkg/roachprod/vm"
 )
 
+// GithubPoster interface allows MaybePost to be mocked in unit tests that test
+// failure modes.
+type GithubPoster interface {
+	MaybePost(
+		t *testImpl, issueInfo *githubIssueInfo, l *logger.Logger, message string,
+		params map[string]string) (
+		*issues.TestFailureIssue, error)
+}
+
+// githubIssues struct implements GithubPoster
 type githubIssues struct {
-	disable      bool
+	disable     bool
+	issuePoster func(context.Context, issues.Logger, issues.IssueFormatter, issues.PostRequest,
+		*issues.Options) (*issues.TestFailureIssue, error)
+	teamLoader func() (team.Map, error)
+}
+
+// githubIssueInfo struct contains information related to this issue on this
+// worker / test
+// separate from githubIssues because githubIssues is shared amongst all workers
+type githubIssueInfo struct {
 	cluster      *clusterImpl
 	vmCreateOpts *vm.CreateOpts
-	issuePoster  func(context.Context, issues.Logger, issues.IssueFormatter, issues.PostRequest, *issues.Options) (*issues.TestFailureIssue, error)
-	teamLoader   func() (team.Map, error)
 }
 
-func newGithubIssues(disable bool, c *clusterImpl, vmCreateOpts *vm.CreateOpts) *githubIssues {
-	return &githubIssues{
-		disable:      disable,
+// newGithubIssueInfo constructor for newGithubIssueInfo
+func newGithubIssueInfo(cluster *clusterImpl, vmCreateOpts *vm.CreateOpts) *githubIssueInfo {
+	return &githubIssueInfo{
+		cluster:      cluster,
 		vmCreateOpts: vmCreateOpts,
-		cluster:      c,
-		issuePoster:  issues.Post,
-		teamLoader:   team.DefaultLoadTeams,
 	}
 }
 
@@ -177,6 +192,7 @@ func (g *githubIssues) createPostRequest(
 	runtimeAssertionsBuild bool,
 	coverageBuild bool,
 	params map[string]string,
+	issueInfo *githubIssueInfo,
 ) (issues.PostRequest, error) {
 	var mention []string
 
@@ -265,8 +281,8 @@ func (g *githubIssues) createPostRequest(
 
 	artifacts := fmt.Sprintf("/%s", testName)
 
-	if g.cluster != nil {
-		issueClusterName = g.cluster.name
+	if issueInfo.cluster != nil {
+		issueClusterName = issueInfo.cluster.name
 	}
 
 	issueMessage := messagePrefix + message
@@ -303,8 +319,13 @@ func (g *githubIssues) createPostRequest(
 	}, nil
 }
 
+// MaybePost entry point for POSTing an issue to GitHub
 func (g *githubIssues) MaybePost(
-	t *testImpl, l *logger.Logger, message string, params map[string]string,
+	t *testImpl,
+	issueInfo *githubIssueInfo,
+	l *logger.Logger,
+	message string,
+	params map[string]string,
 ) (*issues.TestFailureIssue, error) {
 	skipReason := g.shouldPost(t)
 	if skipReason != "" {
@@ -315,7 +336,7 @@ func (g *githubIssues) MaybePost(
 	postRequest, err := g.createPostRequest(
 		t.Name(), t.start, t.end, t.spec, t.failures(),
 		message,
-		roachtestutil.UsingRuntimeAssertions(t), t.goCoverEnabled, params,
+		roachtestutil.UsingRuntimeAssertions(t), t.goCoverEnabled, params, issueInfo,
 	)
 
 	if err != nil {
 
@@ -2,7 +2,6 @@
 //
 // Use of this software is governed by the CockroachDB Software License
 // included in the /LICENSE file.
-
 package main
 
 import (
@@ -33,12 +32,12 @@ import (
 
 var (
 	teamsYaml = `cockroachdb/unowned:
-  aliases:
-    cockroachdb/rfc-prs: other
+ aliases:
+   cockroachdb/rfc-prs: other
 cockroachdb/test-eng:
-  label: T-testeng
+ label: T-testeng
 cockroachdb/dev-inf:
-  label: T-dev-inf`
+ label: T-dev-inf`
 
 	validTeamsFn   = func() (team.Map, error) { return loadYamlTeams(teamsYaml) }
 	invalidTeamsFn = func() (team.Map, error) { return loadYamlTeams("invalid yaml") }
@@ -154,10 +153,9 @@ func TestCreatePostRequest(t *testing.T) {
 		datadriven.RunTest(t, path, func(t *testing.T, d *datadriven.TestData) string {
 			if d.Cmd == "post" {
 				github := &githubIssues{
-					vmCreateOpts: vmOpts,
-					cluster:      testClusterImpl,
-					teamLoader:   teamLoadFn,
+					teamLoader: teamLoadFn,
 				}
+				issueInfo := newGithubIssueInfo(testClusterImpl, vmOpts)
 
 				// See: `formatFailure` which formats failures for roachtests. Try to
 				// follow it here.
@@ -173,10 +171,11 @@ func TestCreatePostRequest(t *testing.T) {
 				}
 				message := b.String()
 
-				params := getTestParameters(ti, github.cluster, github.vmCreateOpts)
+				params := getTestParameters(ti, issueInfo.cluster, issueInfo.vmCreateOpts)
 				req, err := github.createPostRequest(
 					testName, ti.start, ti.end, testSpec, testCase.failures,
 					message, roachtestutil.UsingRuntimeAssertions(ti), ti.goCoverEnabled, params,
+					issueInfo,
 				)
 				if testCase.loadTeamsFailed {
 					// Assert that if TEAMS.yaml cannot be loaded then function errors.
 
@@ -47,6 +47,12 @@ const (
 	// created due to errors during cloud hardware allocation.
 	ExitCodeClusterProvisioningFailed = 11
 
+	// ExitCodeGithubPostFailed is the exit code indicating a failure in posting
+	// results to GitHub successfully.
+	// Note: This error masks the actual roachtest status i.e. this error can
+	// occur with any of the other exit codes.
+	ExitCodeGithubPostFailed = 12
+
 	// runnerLogsDir is the dir under the artifacts root where the test runner log
 	// and other runner-related logs (i.e. cluster creation logs) will be written.
 	runnerLogsDir = "_runner-logs"
@@ -241,11 +247,12 @@ Check --parallelism, --run-forever and --wait-before-next-execution flags`,
 
 	if err := rootCmd.Execute(); err != nil {
 		code := 1
-		if errors.Is(err, errTestsFailed) {
-			code = ExitCodeTestsFailed
-		}
-		if errors.Is(err, errSomeClusterProvisioningFailed) {
+		if errors.Is(err, errGithubPostFailed) {
+			code = ExitCodeGithubPostFailed
+		} else if errors.Is(err, errSomeClusterProvisioningFailed) {
 			code = ExitCodeClusterProvisioningFailed
+		} else if errors.Is(err, errTestsFailed) {
+			code = ExitCodeTestsFailed
 		}
 		// Cobra has already printed the error message.
 		os.Exit(code)
 
@@ -20,6 +20,7 @@ import (
 
 	"github.com/DataDog/datadog-api-client-go/v2/api/datadog"
 	"github.com/DataDog/datadog-api-client-go/v2/api/datadogV1"
+	"github.com/cockroachdb/cockroach/pkg/cmd/bazci/githubpost/issues"
 	"github.com/cockroachdb/cockroach/pkg/cmd/roachtest/registry"
 	"github.com/cockroachdb/cockroach/pkg/cmd/roachtest/roachtestflags"
 	"github.com/cockroachdb/cockroach/pkg/cmd/roachtest/spec"
@@ -138,6 +139,12 @@ func runTests(register func(registry.Registry), filter *registry.TestFilter) err
 		literalArtifactsDir: literalArtifactsDir,
 		runnerLogPath:       runnerLogPath,
 	}
+
+	github := &githubIssues{
+		disable:     runner.config.disableIssue,
+		issuePoster: issues.Post,
+	}
+
 	l.Printf("global random seed: %d", roachtestflags.GlobalSeed)
 	go func() {
 		if err := http.ListenAndServe(
@@ -183,7 +190,8 @@ func runTests(register func(registry.Registry), filter *registry.TestFilter) err
 			goCoverEnabled:         roachtestflags.GoCoverEnabled,
 			exportOpenMetrics:      roachtestflags.ExportOpenmetrics,
 		},
-		lopt)
+		lopt,
+		github)
 
 	// Make sure we attempt to clean up. We run with a non-canceled ctx; the
 	// ctx above might be canceled in case a signal was received. If that's
 
@@ -52,6 +52,7 @@ func TestGlobalMonitorError(t *testing.T) {
 	defer stopper.Stop(ctx)
 	cr := newClusterRegistry()
 	runner := newUnitTestRunner(cr, stopper)
+	github := defaultGithub(runner.config.disableIssue)
 
 	var buf syncedBuffer
 	copt := defaultClusterOpt()
@@ -75,6 +76,6 @@ func TestGlobalMonitorError(t *testing.T) {
 		},
 	}
 	err := runner.Run(ctx, []registry.TestSpec{mockTest}, 1, /* count */
-		defaultParallelism, copt, testOpts{}, lopt)
+		defaultParallelism, copt, testOpts{}, lopt, github)
 	require.Error(t, err)
 }
@@ -62,9 +62,15 @@ func init() {
 var (
 	errTestsFailed = fmt.Errorf("some tests failed")
 
-	// reference error used by main.go at the end of a run of tests
+	// errSomeClusterProvisioningFailed error sent after a run in
+	// [testRunner.Run] if any worker encountered a cluster provisioning error.
+	// Used in main.go to determine the run exit code.
 	errSomeClusterProvisioningFailed = fmt.Errorf("some clusters could not be created")
 
+	// errGithubPostFailed error sent after a run in [testRunner.Run] if any
+	// worker encountered an error when trying to POST to GitHub
+	errGithubPostFailed = fmt.Errorf("failed to POST to GitHub")
+
 	prometheusNameSpace = "roachtest"
 	// prometheusScrapeInterval should be consistent with the scrape interval defined in
 	// https://grafana.testeng.crdb.io/prometheus/config
@@ -174,8 +180,11 @@ type testRunner struct {
 		completed []completedTestInfo
 	}
 
-	// Counts cluster creation errors across all workers.
+	// numClusterErrs Counts cluster creation errors across all workers.
 	numClusterErrs int32
+
+	// numGithubPostErrs Counts GitHub post errors across all workers
+	numGithubPostErrs int32
 }
 
 type perfMetricsCollector struct {
@@ -308,6 +317,7 @@ func (r *testRunner) Run(
 	clustersOpt clustersOpt,
 	topt testOpts,
 	lopt loggingOpt,
+	github GithubPoster,
 ) error {
 	// Validate options.
 	if len(tests) == 0 {
@@ -411,6 +421,7 @@ func (r *testRunner) Run(
 				topt,
 				childLogger,
 				n*count,
+				github,
 			)
 
 			if err != nil {
@@ -448,14 +459,25 @@ func (r *testRunner) Run(
 	passFailLine := r.generateReport()
 	shout(ctx, l, lopt.stdout, passFailLine)
 
+	// For the errors that don't short-circuit the pipeline run, return a joined
+	// error and leave case handling to the caller
+	var err error
+	if r.numGithubPostErrs > 0 {
+		shout(ctx, l, lopt.stdout, "%d errors occurred while posting to github", r.numGithubPostErrs)
+		err = errors.Join(err, errGithubPostFailed)
+	}
 	if r.numClusterErrs > 0 {
 		shout(ctx, l, lopt.stdout, "%d clusters could not be created", r.numClusterErrs)
-		return errSomeClusterProvisioningFailed
+		err = errors.Join(err, errSomeClusterProvisioningFailed)
 	}
-
 	if len(r.status.fail) > 0 {
-		return errTestsFailed
+		shout(ctx, l, lopt.stdout, "%d tests failed", r.status.fail)
+		err = errors.Join(err, errTestsFailed)
+	}
+	if err != nil {
+		return err
 	}
+
 	// To ensure all prometheus metrics have been scraped, ensure shutdown takes
 	// at least one scrapeInterval, unless the roachtest fails or gets cancelled.
 	requiredShutDownTime := prometheusScrapeInterval
@@ -596,6 +618,7 @@ func (r *testRunner) runWorker(
 	topt testOpts,
 	l *logger.Logger,
 	maxTotalFailures int,
+	github GithubPoster,
 ) error {
 	stdout := lopt.stdout
 
@@ -841,18 +864,22 @@ func (r *testRunner) runWorker(
 			runID:                  generateRunID(clustersOpt),
 		}
 		t.ReplaceL(testL)
-		github := newGithubIssues(r.config.disableIssue, c, vmCreateOpts)
-
+		issueInfo := newGithubIssueInfo(c, vmCreateOpts)
 		// handleClusterCreationFailure can be called when the `err` given
 		// occurred for reasons related to creating or setting up a
 		// cluster for a test.
-		handleClusterCreationFailure := func(err error) {
-			t.Error(errClusterProvisioningFailed(err))
-
-			params := getTestParameters(t, github.cluster, github.vmCreateOpts)
+		handleClusterCreationFailure := func(clusterCreateErr error) {
+			t.Error(errClusterProvisioningFailed(clusterCreateErr))
+
+			// Technically don't need the issueInfo struct here because we have access
+			// to the clusterImpl and vm.CreateOpts in runWorker()
+			// but not in runTests() so keeping the invocation of getTestParameters()
+			// the same in both spots
+			params := getTestParameters(t, issueInfo.cluster, issueInfo.vmCreateOpts)
 			logTestParameters(l, params)
-			if _, err := github.MaybePost(t, l, t.failureMsg(), params); err != nil {
-				shout(ctx, l, stdout, "failed to post issue: %s", err)
+			if _, githubErr := github.MaybePost(t, issueInfo, l, t.failureMsg(), params); githubErr != nil {
+				atomic.AddInt32(&r.numGithubPostErrs, 1)
+				shout(ctx, l, stdout, "failed to post issue: %s", githubErr)
 			}
 		}
 
@@ -978,7 +1005,8 @@ func (r *testRunner) runWorker(
 				wStatus.SetTest(t, testToRun)
 				wStatus.SetStatus("running test")
 
-				r.runTest(ctx, t, testToRun.runNum, testToRun.runCount, c, stdout, testL, github)
+				r.runTest(ctx, t, testToRun.runNum, testToRun.runCount, c, stdout, testL,
+					github, issueInfo)
 			}
 		}
 
@@ -1135,7 +1163,8 @@ func (r *testRunner) runTest(
 	c *clusterImpl,
 	stdout io.Writer,
 	l *logger.Logger,
-	github *githubIssues,
+	github GithubPoster,
+	issueInfo *githubIssueInfo,
 ) {
 	testRunID := t.Name()
 	if runCount > 1 {
@@ -1238,11 +1267,12 @@ func (r *testRunner) runTest(
 				}
 
 				output := fmt.Sprintf("%s\ntest artifacts and logs in: %s", failureMsg, t.ArtifactsDir())
-				params := getTestParameters(t, github.cluster, github.vmCreateOpts)
+				params := getTestParameters(t, issueInfo.cluster, issueInfo.vmCreateOpts)
 				logTestParameters(l, params)
-				issue, err := github.MaybePost(t, l, output, params)
+				issue, err := github.MaybePost(t, issueInfo, l, output, params)
 				if err != nil {
 					shout(ctx, l, stdout, "failed to post issue: %s", err)
+					atomic.AddInt32(&r.numGithubPostErrs, 1)
 				}
 
 				// If an issue was created (or comment added) on GitHub,
Original file line number	Diff line number	Diff line change
`@@ -52,6 +52,7 @@ func TestGlobalMonitorError(t *testing.T) {`
`52`	`52`	`defer stopper.Stop(ctx)`
`53`	`53`	`cr := newClusterRegistry()`
`54`	`54`	`runner := newUnitTestRunner(cr, stopper)`
	`55`	`+ github := defaultGithub(runner.config.disableIssue)`
`55`	`56`
`56`	`57`	`var buf syncedBuffer`
`57`	`58`	`copt := defaultClusterOpt()`
`@@ -75,6 +76,6 @@ func TestGlobalMonitorError(t *testing.T) {`
`75`	`76`	`},`
`76`	`77`	`}`
`77`	`78`	`err := runner.Run(ctx, []registry.TestSpec{mockTest}, 1, /* count */`
`78`		`- defaultParallelism, copt, testOpts{}, lopt)`
	`79`	`+ defaultParallelism, copt, testOpts{}, lopt, github)`
`79`	`80`	`require.Error(t, err)`
`80`	`81`	`}`