Skip to content

Commit deb2db8

Browse files
committed
feat(perf): cache vcs.GetFiles() to reduce redundant VCS API volume
Calls to `vcs.GetFiles()` only result in one or two API calls (paging aside). However the method is called each time a CEL expression references `"path".pathChanged()` or the `on-path-change` annotation is checked for matches. Because of this, because matching is evaluated several times in a row, and because some users have tens of PipelineRuns in their `.tekton` directory, in some cases a single push event may cause PaC to make hundreds of API requests listing the same files repeatedly.
1 parent 860c901 commit deb2db8

File tree

9 files changed

+265
-273
lines changed

9 files changed

+265
-273
lines changed

pkg/provider/bitbucketdatacenter/bitbucketdatacenter.go

Lines changed: 65 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ type Provider struct {
4242
projectKey string
4343
repo *v1alpha1.Repository
4444
triggerEvent string
45+
changedFiles *changedfiles.ChangedFiles
4546
}
4647

4748
func (v Provider) Client() *scm.Client {
@@ -358,79 +359,82 @@ func (v *Provider) GetConfig() *info.ProviderConfig {
358359
}
359360

360361
func (v *Provider) GetFiles(ctx context.Context, runevent *info.Event) (changedfiles.ChangedFiles, error) {
361-
OrgAndRepo := fmt.Sprintf("%s/%s", runevent.Organization, runevent.Repository)
362-
if runevent.TriggerTarget == triggertype.PullRequest {
363-
opts := &scm.ListOptions{Page: 1, Size: apiResponseLimit}
364-
changedFiles := changedfiles.ChangedFiles{}
365-
for {
366-
changes, _, err := v.Client().PullRequests.ListChanges(ctx, OrgAndRepo, runevent.PullRequestNumber, opts)
367-
if err != nil {
368-
return changedfiles.ChangedFiles{}, fmt.Errorf("failed to list changes for pull request: %w", err)
369-
}
370-
371-
for _, c := range changes {
372-
changedFiles.All = append(changedFiles.All, c.Path)
373-
if c.Added {
374-
changedFiles.Added = append(changedFiles.Added, c.Path)
375-
}
376-
if c.Modified {
377-
changedFiles.Modified = append(changedFiles.Modified, c.Path)
378-
}
379-
if c.Renamed {
380-
changedFiles.Renamed = append(changedFiles.Renamed, c.Path)
362+
if v.changedFiles == nil {
363+
OrgAndRepo := fmt.Sprintf("%s/%s", runevent.Organization, runevent.Repository)
364+
switch runevent.TriggerTarget {
365+
case triggertype.PullRequest:
366+
opts := &scm.ListOptions{Page: 1, Size: apiResponseLimit}
367+
changedFiles := changedfiles.ChangedFiles{}
368+
for {
369+
changes, _, err := v.Client().PullRequests.ListChanges(ctx, OrgAndRepo, runevent.PullRequestNumber, opts)
370+
if err != nil {
371+
return changedfiles.ChangedFiles{}, fmt.Errorf("failed to list changes for pull request: %w", err)
381372
}
382-
if c.Deleted {
383-
changedFiles.Deleted = append(changedFiles.Deleted, c.Path)
384-
}
385-
}
386373

387-
// In the Jenkins-x/go-scm package, the `isLastPage` field is not available, and the value of
388-
// `response.Page.Last` is set to `0`. Therefore, to determine if there are more items to fetch,
389-
// we can check if the length of the currently fetched items is less than the specified limit.
390-
// If the length is less than the limit, it indicates that there are no more items to retrieve.
391-
if len(changes) < apiResponseLimit {
392-
break
393-
}
374+
for _, c := range changes {
375+
changedFiles.All = append(changedFiles.All, c.Path)
376+
if c.Added {
377+
changedFiles.Added = append(changedFiles.Added, c.Path)
378+
}
379+
if c.Modified {
380+
changedFiles.Modified = append(changedFiles.Modified, c.Path)
381+
}
382+
if c.Renamed {
383+
changedFiles.Renamed = append(changedFiles.Renamed, c.Path)
384+
}
385+
if c.Deleted {
386+
changedFiles.Deleted = append(changedFiles.Deleted, c.Path)
387+
}
388+
}
394389

395-
opts.Page++
396-
}
397-
return changedFiles, nil
398-
}
390+
// In the Jenkins-x/go-scm package, the `isLastPage` field is not available, and the value of
391+
// `response.Page.Last` is set to `0`. Therefore, to determine if there are more items to fetch,
392+
// we can check if the length of the currently fetched items is less than the specified limit.
393+
// If the length is less than the limit, it indicates that there are no more items to retrieve.
394+
if len(changes) < apiResponseLimit {
395+
break
396+
}
399397

400-
if runevent.TriggerTarget == triggertype.Push {
401-
opts := &scm.ListOptions{Page: 1, Size: apiResponseLimit}
402-
changedFiles := changedfiles.ChangedFiles{}
403-
for {
404-
changes, _, err := v.Client().Git.ListChanges(ctx, OrgAndRepo, runevent.SHA, opts)
405-
if err != nil {
406-
return changedfiles.ChangedFiles{}, fmt.Errorf("failed to list changes for commit %s: %w", runevent.SHA, err)
398+
opts.Page++
407399
}
408-
409-
for _, c := range changes {
410-
changedFiles.All = append(changedFiles.All, c.Path)
411-
if c.Added {
412-
changedFiles.Added = append(changedFiles.Added, c.Path)
413-
}
414-
if c.Modified {
415-
changedFiles.Modified = append(changedFiles.Modified, c.Path)
400+
v.changedFiles = &changedFiles
401+
case triggertype.Push:
402+
opts := &scm.ListOptions{Page: 1, Size: apiResponseLimit}
403+
changedFiles := changedfiles.ChangedFiles{}
404+
for {
405+
changes, _, err := v.Client().Git.ListChanges(ctx, OrgAndRepo, runevent.SHA, opts)
406+
if err != nil {
407+
return changedfiles.ChangedFiles{}, fmt.Errorf("failed to list changes for commit %s: %w", runevent.SHA, err)
416408
}
417-
if c.Renamed {
418-
changedFiles.Renamed = append(changedFiles.Renamed, c.Path)
409+
410+
for _, c := range changes {
411+
changedFiles.All = append(changedFiles.All, c.Path)
412+
if c.Added {
413+
changedFiles.Added = append(changedFiles.Added, c.Path)
414+
}
415+
if c.Modified {
416+
changedFiles.Modified = append(changedFiles.Modified, c.Path)
417+
}
418+
if c.Renamed {
419+
changedFiles.Renamed = append(changedFiles.Renamed, c.Path)
420+
}
421+
if c.Deleted {
422+
changedFiles.Deleted = append(changedFiles.Deleted, c.Path)
423+
}
419424
}
420-
if c.Deleted {
421-
changedFiles.Deleted = append(changedFiles.Deleted, c.Path)
425+
426+
if len(changes) < apiResponseLimit {
427+
break
422428
}
423-
}
424429

425-
if len(changes) < apiResponseLimit {
426-
break
430+
opts.Page++
427431
}
428-
429-
opts.Page++
432+
v.changedFiles = &changedFiles
433+
default:
434+
v.changedFiles = &changedfiles.ChangedFiles{}
430435
}
431-
return changedFiles, nil
432436
}
433-
return changedfiles.ChangedFiles{}, nil
437+
return *v.changedFiles, nil
434438
}
435439

436440
func (v *Provider) CreateToken(_ context.Context, _ []string, _ *info.Event) (string, error) {

pkg/provider/bitbucketdatacenter/bitbucketdatacenter_test.go

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,14 @@ import (
2121
"github.com/openshift-pipelines/pipelines-as-code/pkg/params/triggertype"
2222
"github.com/openshift-pipelines/pipelines-as-code/pkg/provider"
2323
bbtest "github.com/openshift-pipelines/pipelines-as-code/pkg/provider/bitbucketdatacenter/test"
24+
"github.com/openshift-pipelines/pipelines-as-code/pkg/test/metrics"
2425

2526
"github.com/jenkins-x/go-scm/scm"
2627
"go.uber.org/zap"
2728
zapobserver "go.uber.org/zap/zaptest/observer"
2829
"gotest.tools/v3/assert"
30+
"knative.dev/pkg/metrics/metricstest"
31+
_ "knative.dev/pkg/metrics/testing"
2932
rtesting "knative.dev/pkg/reconciler/testing"
3033
)
3134

@@ -709,6 +712,7 @@ func TestGetFiles(t *testing.T) {
709712
ctx, _ := rtesting.SetupFakeContext(t)
710713
client, mux, tearDown, tURL := bbtest.SetupBBDataCenterClient()
711714
defer tearDown()
715+
metrics.ResetMetrics()
712716

713717
stats := &bbtest.DiffStats{
714718
Values: tt.changeFiles,
@@ -734,13 +738,17 @@ func TestGetFiles(t *testing.T) {
734738
}
735739
})
736740
}
737-
v := &Provider{client: client, baseURL: tURL}
741+
742+
metricsTags := map[string]string{"provider": "bitbucket-datacenter", "event-type": string(tt.event.TriggerTarget)}
743+
metricstest.CheckStatsNotReported(t, "pipelines_as_code_git_provider_api_request_count")
744+
745+
v := &Provider{client: client, baseURL: tURL, triggerEvent: string(tt.event.TriggerTarget)}
738746
changedFiles, err := v.GetFiles(ctx, tt.event)
739747
if tt.wantError {
740748
assert.Equal(t, err.Error(), tt.errMsg)
741-
return
749+
} else {
750+
assert.NilError(t, err, nil)
742751
}
743-
assert.NilError(t, err, nil)
744752
assert.Equal(t, tt.wantAddedFilesCount, len(changedFiles.Added))
745753
assert.Equal(t, tt.wantDeletedFilesCount, len(changedFiles.Deleted))
746754
assert.Equal(t, tt.wantModifiedFilesCount, len(changedFiles.Modified))
@@ -757,6 +765,17 @@ func TestGetFiles(t *testing.T) {
757765
assert.Equal(t, tt.changeFiles[i].Path.ToString, changedFiles.All[i])
758766
}
759767
}
768+
769+
// Check caching
770+
metricstest.CheckCountData(t, "pipelines_as_code_git_provider_api_request_count", metricsTags, 1)
771+
_, _ = v.GetFiles(ctx, tt.event)
772+
if tt.wantError {
773+
// No caching on error
774+
metricstest.CheckCountData(t, "pipelines_as_code_git_provider_api_request_count", metricsTags, 2)
775+
} else {
776+
// Cache API results on success
777+
metricstest.CheckCountData(t, "pipelines_as_code_git_provider_api_request_count", metricsTags, 1)
778+
}
760779
})
761780
}
762781
}

pkg/provider/github/github.go

Lines changed: 53 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ type Provider struct {
5656
userType string // The type of user i.e bot or not
5757
skippedRun
5858
triggerEvent string
59+
changedFiles *changedfiles.ChangedFiles
5960
}
6061

6162
type skippedRun struct {
@@ -500,67 +501,70 @@ func (v *Provider) getPullRequest(ctx context.Context, runevent *info.Event) (*i
500501
return runevent, nil
501502
}
502503

503-
// GetFiles get a files from pull request.
504+
// GetFiles gets the files changed by a given event.
504505
func (v *Provider) GetFiles(ctx context.Context, runevent *info.Event) (changedfiles.ChangedFiles, error) {
505-
if runevent.TriggerTarget == triggertype.PullRequest {
506-
opt := &github.ListOptions{PerPage: v.PaginedNumber}
507-
changedFiles := changedfiles.ChangedFiles{}
508-
for {
509-
repoCommit, resp, err := wrapAPI(v, "list_pull_request_files", func() ([]*github.CommitFile, *github.Response, error) {
510-
return v.Client().PullRequests.ListFiles(ctx, runevent.Organization, runevent.Repository, runevent.PullRequestNumber, opt)
506+
if v.changedFiles == nil {
507+
switch runevent.TriggerTarget {
508+
case triggertype.PullRequest:
509+
opt := &github.ListOptions{PerPage: v.PaginedNumber}
510+
changedFiles := changedfiles.ChangedFiles{}
511+
for {
512+
repoCommit, resp, err := wrapAPI(v, "list_pull_request_files", func() ([]*github.CommitFile, *github.Response, error) {
513+
return v.Client().PullRequests.ListFiles(ctx, runevent.Organization, runevent.Repository, runevent.PullRequestNumber, opt)
514+
})
515+
if err != nil {
516+
return changedfiles.ChangedFiles{}, err
517+
}
518+
for j := range repoCommit {
519+
changedFiles.All = append(changedFiles.All, *repoCommit[j].Filename)
520+
if *repoCommit[j].Status == "added" {
521+
changedFiles.Added = append(changedFiles.Added, *repoCommit[j].Filename)
522+
}
523+
if *repoCommit[j].Status == "removed" {
524+
changedFiles.Deleted = append(changedFiles.Deleted, *repoCommit[j].Filename)
525+
}
526+
if *repoCommit[j].Status == "modified" {
527+
changedFiles.Modified = append(changedFiles.Modified, *repoCommit[j].Filename)
528+
}
529+
if *repoCommit[j].Status == "renamed" {
530+
changedFiles.Renamed = append(changedFiles.Renamed, *repoCommit[j].Filename)
531+
}
532+
}
533+
if resp.NextPage == 0 {
534+
break
535+
}
536+
opt.Page = resp.NextPage
537+
}
538+
v.changedFiles = &changedFiles
539+
case triggertype.Push:
540+
changedFiles := changedfiles.ChangedFiles{}
541+
rC, _, err := wrapAPI(v, "get_commit_files", func() (*github.RepositoryCommit, *github.Response, error) {
542+
return v.Client().Repositories.GetCommit(ctx, runevent.Organization, runevent.Repository, runevent.SHA, &github.ListOptions{})
511543
})
512544
if err != nil {
513545
return changedfiles.ChangedFiles{}, err
514546
}
515-
for j := range repoCommit {
516-
changedFiles.All = append(changedFiles.All, *repoCommit[j].Filename)
517-
if *repoCommit[j].Status == "added" {
518-
changedFiles.Added = append(changedFiles.Added, *repoCommit[j].Filename)
547+
for i := range rC.Files {
548+
changedFiles.All = append(changedFiles.All, *rC.Files[i].Filename)
549+
if *rC.Files[i].Status == "added" {
550+
changedFiles.Added = append(changedFiles.Added, *rC.Files[i].Filename)
519551
}
520-
if *repoCommit[j].Status == "removed" {
521-
changedFiles.Deleted = append(changedFiles.Deleted, *repoCommit[j].Filename)
552+
if *rC.Files[i].Status == "removed" {
553+
changedFiles.Deleted = append(changedFiles.Deleted, *rC.Files[i].Filename)
522554
}
523-
if *repoCommit[j].Status == "modified" {
524-
changedFiles.Modified = append(changedFiles.Modified, *repoCommit[j].Filename)
555+
if *rC.Files[i].Status == "modified" {
556+
changedFiles.Modified = append(changedFiles.Modified, *rC.Files[i].Filename)
525557
}
526-
if *repoCommit[j].Status == "renamed" {
527-
changedFiles.Renamed = append(changedFiles.Renamed, *repoCommit[j].Filename)
558+
if *rC.Files[i].Status == "renamed" {
559+
changedFiles.Renamed = append(changedFiles.Renamed, *rC.Files[i].Filename)
528560
}
529561
}
530-
if resp.NextPage == 0 {
531-
break
532-
}
533-
opt.Page = resp.NextPage
534-
}
535-
return changedFiles, nil
536-
}
537-
538-
if runevent.TriggerTarget == "push" {
539-
changedFiles := changedfiles.ChangedFiles{}
540-
rC, _, err := wrapAPI(v, "get_commit_files", func() (*github.RepositoryCommit, *github.Response, error) {
541-
return v.Client().Repositories.GetCommit(ctx, runevent.Organization, runevent.Repository, runevent.SHA, &github.ListOptions{})
542-
})
543-
if err != nil {
544-
return changedfiles.ChangedFiles{}, err
545-
}
546-
for i := range rC.Files {
547-
changedFiles.All = append(changedFiles.All, *rC.Files[i].Filename)
548-
if *rC.Files[i].Status == "added" {
549-
changedFiles.Added = append(changedFiles.Added, *rC.Files[i].Filename)
550-
}
551-
if *rC.Files[i].Status == "removed" {
552-
changedFiles.Deleted = append(changedFiles.Deleted, *rC.Files[i].Filename)
553-
}
554-
if *rC.Files[i].Status == "modified" {
555-
changedFiles.Modified = append(changedFiles.Modified, *rC.Files[i].Filename)
556-
}
557-
if *rC.Files[i].Status == "renamed" {
558-
changedFiles.Renamed = append(changedFiles.Renamed, *rC.Files[i].Filename)
559-
}
562+
v.changedFiles = &changedFiles
563+
default:
564+
v.changedFiles = &changedfiles.ChangedFiles{}
560565
}
561-
return changedFiles, nil
562566
}
563-
return changedfiles.ChangedFiles{}, nil
567+
return *v.changedFiles, nil
564568
}
565569

566570
// getObject Get an object from a repository.

0 commit comments

Comments
 (0)