Skip to content

Commit ea46004

Browse files
authored
fix: Removed retrieval of full dependency manifest data (#79)
Signed-off-by: Eddie Knight <knight@linux.com>
1 parent 5450368 commit ea46004

File tree

5 files changed

+183
-245
lines changed

5 files changed

+183
-245
lines changed

data/graphql-data.go

Lines changed: 1 addition & 134 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,5 @@
11
package data
22

3-
import (
4-
"context"
5-
"fmt"
6-
"path/filepath"
7-
8-
"github.com/privateerproj/privateer-sdk/config"
9-
"github.com/shurcooL/githubv4"
10-
)
11-
123
// GraphqlRepoData is used in a query to get general repository information
134
type GraphqlRepoData struct {
145
Repository struct {
@@ -85,22 +76,8 @@ type GraphqlRepoData struct {
8576
Description string
8677
}
8778
ContributingGuidelines struct {
88-
Body string
89-
ResourcePath string
79+
Body string
9080
}
91-
DependencyGraphManifests struct {
92-
TotalCount int
93-
Nodes []struct {
94-
Filename string
95-
Dependencies struct {
96-
TotalCount int
97-
Nodes []struct {
98-
PackageName string
99-
Requirements string
100-
}
101-
} `graphql:"dependencies(first: 100)"`
102-
} `graphql:"nodes"`
103-
} `graphql:"dependencyGraphManifests(first: 100)"`
10481
Releases struct {
10582
Nodes []struct {
10683
TagName string
@@ -115,113 +92,3 @@ type GraphqlRepoData struct {
11592
} `graphql:"releases(first: 1, orderBy: {field: CREATED_AT, direction: DESC})"`
11693
} `graphql:"repository(owner: $owner, name: $name)"`
11794
}
118-
119-
// GraphqlRepoTree is used in a query to get top 3 levels of the repository contents
120-
type GraphqlRepoTree struct {
121-
Repository struct {
122-
Object struct {
123-
Tree struct {
124-
Entries []struct {
125-
Name string
126-
Type string // "blob" for files, "tree" for directories
127-
Path string
128-
Object *struct {
129-
Tree struct {
130-
Entries []struct {
131-
Name string
132-
Type string
133-
Path string
134-
Object *struct {
135-
Tree struct {
136-
Entries []struct {
137-
Name string
138-
Type string
139-
Path string
140-
}
141-
} `graphql:"... on Tree"`
142-
} `graphql:"object"`
143-
}
144-
} `graphql:"... on Tree"`
145-
} `graphql:"object"`
146-
}
147-
} `graphql:"... on Tree"`
148-
} `graphql:"object(expression: $branch)"`
149-
} `graphql:"repository(owner: $owner, name: $name)"`
150-
}
151-
152-
func checkTreeForBinaries(tree *GraphqlRepoTree, binariesFound []string) []string {
153-
for _, entry := range tree.Repository.Object.Tree.Entries {
154-
binariesFound = identifyBinaries(binariesFound, entry.Type, entry.Name)
155-
if entry.Type == "tree" {
156-
for _, subEntry := range entry.Object.Tree.Entries {
157-
binariesFound = identifyBinaries(binariesFound, subEntry.Type, subEntry.Name)
158-
if subEntry.Type == "tree" {
159-
for _, subSubEntry := range subEntry.Object.Tree.Entries {
160-
binariesFound = identifyBinaries(binariesFound, subSubEntry.Type, subSubEntry.Name)
161-
// if subSubEntry.Type == "tree" {
162-
// TODO: The current GraphQL call stops after 3 levels of depth.
163-
// Additional API calls will be required for recursion if another tree is found.
164-
// }
165-
}
166-
}
167-
}
168-
}
169-
}
170-
return binariesFound
171-
}
172-
173-
func identifyBinaries(binariesFound []string, filetype string, filename string) []string {
174-
if filetype == "blob" {
175-
if isBinaryFile(filename) {
176-
binariesFound = append(binariesFound, filename)
177-
}
178-
}
179-
return binariesFound
180-
}
181-
182-
// TODO: this is a lightweight check, looking at filenames only.
183-
// GitHub's GraphQL API has an 'isBinary' field that could be used for a more accurate check,
184-
// but I didn't manage to get that query working as expected.
185-
func isBinaryFile(filename string) bool {
186-
binaryExtensions := map[string]bool{
187-
"": true, ".exe": true, ".dll": true, ".so": true, ".pdf": true,
188-
".zip": true, ".tar": true, ".mp4": true, ".mp3": true,
189-
}
190-
knownFilenames := map[string]bool{
191-
// Extend this with more known filenames as needed
192-
"README": true, "LICENSE": true, "CHANGELOG": true, "CONTRIBUTING": true,
193-
"CODE_OF_CONDUCT": true, "TODO": true, "SECURITY": true, "NOTICE": true, "CODEOWNERS": true,
194-
".gitignore": true, ".gitattributes": true, "Makefile": true, "Dockerfile": true,
195-
"Vagrantfile": true, "Gemfile": true, "Procfile": true, "Brewfile": true, "MANIFEST": true,
196-
}
197-
if knownFilenames[filename] {
198-
return false
199-
}
200-
ext := filepath.Ext(filename)
201-
return binaryExtensions[ext]
202-
}
203-
204-
func fetchGraphqlRepoTree(config *config.Config, client *githubv4.Client, branch string) (tree *GraphqlRepoTree, err error) {
205-
path := "" // TODO: I suspected we should be able to target subdirectories this way, but it hasn't succeeded
206-
207-
fullPath := fmt.Sprintf("%s:%s", branch, path) // Ensure correct format
208-
209-
variables := map[string]interface{}{
210-
"owner": githubv4.String(config.GetString("owner")),
211-
"name": githubv4.String(config.GetString("repo")),
212-
"branch": githubv4.String(fullPath),
213-
}
214-
215-
err = client.Query(context.Background(), &tree, variables)
216-
217-
return tree, err
218-
}
219-
220-
func getSuspectedBinaries(client *githubv4.Client, config *config.Config, branchName string) (suspectedBinaries []string, err error) {
221-
tree, err := fetchGraphqlRepoTree(config, client, branchName)
222-
if err != nil {
223-
return nil, err
224-
}
225-
binaryFileNames := checkTreeForBinaries(tree, []string{})
226-
return binaryFileNames, nil
227-
}
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
package data
2+
3+
import (
4+
"context"
5+
6+
"github.com/privateerproj/privateer-sdk/config"
7+
"github.com/shurcooL/githubv4"
8+
)
9+
10+
type DependencyManifestsPage struct {
11+
Repository struct {
12+
DependencyGraphManifests struct {
13+
TotalCount int
14+
}
15+
} `graphql:"repository(owner: $owner, name: $name)"`
16+
}
17+
18+
type ManifestNode struct {
19+
Filename string
20+
Dependencies []Dependency
21+
}
22+
23+
type Dependency struct {
24+
PackageName string
25+
Requirements string
26+
}
27+
28+
func countDependencyManifests(client *githubv4.Client, cfg *config.Config) (int, error) {
29+
var query DependencyManifestsPage
30+
variables := map[string]interface{}{
31+
"owner": githubv4.String(cfg.GetString("owner")),
32+
"name": githubv4.String(cfg.GetString("repo")),
33+
}
34+
35+
err := client.Query(context.Background(), &query, variables)
36+
if err != nil {
37+
return 0, err
38+
}
39+
40+
return query.Repository.DependencyGraphManifests.TotalCount, nil
41+
}

data/graphql-repo-tree.go

Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
package data
2+
3+
import (
4+
"context"
5+
"fmt"
6+
"path/filepath"
7+
8+
"github.com/privateerproj/privateer-sdk/config"
9+
"github.com/shurcooL/githubv4"
10+
)
11+
12+
// GraphqlRepoTree is used in a query to get top 3 levels of the repository contents
13+
type GraphqlRepoTree struct {
14+
Repository struct {
15+
Object struct {
16+
Tree struct {
17+
Entries []struct {
18+
Name string
19+
Type string // "blob" for files, "tree" for directories
20+
Path string
21+
Object *struct {
22+
Tree struct {
23+
Entries []struct {
24+
Name string
25+
Type string
26+
Path string
27+
Object *struct {
28+
Tree struct {
29+
Entries []struct {
30+
Name string
31+
Type string
32+
Path string
33+
}
34+
} `graphql:"... on Tree"`
35+
} `graphql:"object"`
36+
}
37+
} `graphql:"... on Tree"`
38+
} `graphql:"object"`
39+
}
40+
} `graphql:"... on Tree"`
41+
} `graphql:"object(expression: $branch)"`
42+
} `graphql:"repository(owner: $owner, name: $name)"`
43+
}
44+
45+
func checkTreeForBinaries(tree *GraphqlRepoTree, binariesFound []string) []string {
46+
for _, entry := range tree.Repository.Object.Tree.Entries {
47+
binariesFound = identifyBinaries(binariesFound, entry.Type, entry.Name)
48+
if entry.Type == "tree" {
49+
for _, subEntry := range entry.Object.Tree.Entries {
50+
binariesFound = identifyBinaries(binariesFound, subEntry.Type, subEntry.Name)
51+
if subEntry.Type == "tree" {
52+
for _, subSubEntry := range subEntry.Object.Tree.Entries {
53+
binariesFound = identifyBinaries(binariesFound, subSubEntry.Type, subSubEntry.Name)
54+
// if subSubEntry.Type == "tree" {
55+
// TODO: The current GraphQL call stops after 3 levels of depth.
56+
// Additional API calls will be required for recursion if another tree is found.
57+
// }
58+
}
59+
}
60+
}
61+
}
62+
}
63+
return binariesFound
64+
}
65+
66+
func identifyBinaries(binariesFound []string, filetype string, filename string) []string {
67+
if filetype == "blob" {
68+
if isBinaryFile(filename) {
69+
binariesFound = append(binariesFound, filename)
70+
}
71+
}
72+
return binariesFound
73+
}
74+
75+
// TODO: this is a lightweight check, looking at filenames only.
76+
// GitHub's GraphQL API has an 'isBinary' field that could be used for a more accurate check,
77+
// but I didn't manage to get that query working as expected.
78+
func isBinaryFile(filename string) bool {
79+
binaryExtensions := map[string]bool{
80+
"": true, ".exe": true, ".dll": true, ".so": true, ".pdf": true,
81+
".zip": true, ".tar": true, ".mp4": true, ".mp3": true,
82+
}
83+
knownFilenames := map[string]bool{
84+
// Extend this with more known filenames as needed
85+
"README": true, "LICENSE": true, "CHANGELOG": true, "CONTRIBUTING": true,
86+
"CODE_OF_CONDUCT": true, "TODO": true, "SECURITY": true, "NOTICE": true, "CODEOWNERS": true,
87+
".gitignore": true, ".gitattributes": true, "Makefile": true, "Dockerfile": true,
88+
"Vagrantfile": true, "Gemfile": true, "Procfile": true, "Brewfile": true, "MANIFEST": true,
89+
}
90+
if knownFilenames[filename] {
91+
return false
92+
}
93+
ext := filepath.Ext(filename)
94+
return binaryExtensions[ext]
95+
}
96+
97+
func fetchGraphqlRepoTree(config *config.Config, client *githubv4.Client, branch string) (tree *GraphqlRepoTree, err error) {
98+
path := "" // TODO: I suspected we should be able to target subdirectories this way, but it hasn't succeeded
99+
100+
fullPath := fmt.Sprintf("%s:%s", branch, path) // Ensure correct format
101+
102+
variables := map[string]interface{}{
103+
"owner": githubv4.String(config.GetString("owner")),
104+
"name": githubv4.String(config.GetString("repo")),
105+
"branch": githubv4.String(fullPath),
106+
}
107+
108+
err = client.Query(context.Background(), &tree, variables)
109+
110+
return tree, err
111+
}
112+
113+
func getSuspectedBinaries(client *githubv4.Client, config *config.Config, branchName string) (suspectedBinaries []string, err error) {
114+
tree, err := fetchGraphqlRepoTree(config, client, branchName)
115+
if err != nil {
116+
return nil, err
117+
}
118+
binaryFileNames := checkTreeForBinaries(tree, []string{})
119+
return binaryFileNames, nil
120+
}

data/payload.go

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,10 @@ import (
1313
type Payload struct {
1414
*GraphqlRepoData
1515
*RestData
16-
Config *config.Config
17-
SuspectedBinaries []string
18-
RepositoryMetadata RepositoryMetadata
16+
Config *config.Config
17+
SuspectedBinaries []string
18+
RepositoryMetadata RepositoryMetadata
19+
DependencyManifestsCount int
1920
}
2021

2122
func Loader(config *config.Config) (payload interface{}, err error) {
@@ -34,17 +35,22 @@ func Loader(config *config.Config) (payload interface{}, err error) {
3435
if err != nil {
3536
return nil, err
3637
}
38+
dependencyManifestsCount, err := countDependencyManifests(client, config)
39+
if err != nil {
40+
return nil, err
41+
}
3742

3843
rest, err := getRestData(ghClient, config)
3944
if err != nil {
4045
return nil, err
4146
}
4247
return interface{}(Payload{
43-
GraphqlRepoData: graphql,
44-
RestData: rest,
45-
Config: config,
46-
SuspectedBinaries: suspectedBinaries,
47-
RepositoryMetadata: repositoryMetadata,
48+
GraphqlRepoData: graphql,
49+
RestData: rest,
50+
Config: config,
51+
SuspectedBinaries: suspectedBinaries,
52+
RepositoryMetadata: repositoryMetadata,
53+
DependencyManifestsCount: dependencyManifestsCount,
4854
}), nil
4955
}
5056

0 commit comments

Comments
 (0)