Skip to content

Commit 1e03bcb

Browse files
committed
Draft get-contributors script
1 parent 2c86d15 commit 1e03bcb

File tree

9 files changed

+4542
-30
lines changed

9 files changed

+4542
-30
lines changed

package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@
9090
},
9191
"devDependencies": {
9292
"@graphql-eslint/eslint-plugin": "4.4.0",
93+
"@graphql-eslint/parser": "^0.1.0",
9394
"@next/eslint-plugin-next": "^15.3.3",
9495
"@playwright/test": "^1.54.2",
9596
"@svgr/webpack": "^8.1.0",

pnpm-lock.yaml

Lines changed: 1363 additions & 29 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
generated
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{}
Lines changed: 249 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,249 @@
1+
import { setTimeout as sleep } from "timers/promises"
2+
import { readFileSync, writeFileSync } from "node:fs"
3+
import { fileURLToPath, pathToFileURL } from "node:url"
4+
import { dirname, resolve } from "node:path"
5+
import { graphql } from "./generated"
6+
import { ExecutionResult } from "graphql"
7+
import { TypedDocumentString } from "./generated/graphql"
8+
9+
type RepoRef = `${string}/${string}`
10+
11+
const QUERY = graphql(`
12+
query RepoContributors($owner: String!, $name: String!, $after: String) {
13+
repository(owner: $owner, name: $name) {
14+
defaultBranchRef {
15+
target {
16+
... on Commit {
17+
history(first: 100, after: $after) {
18+
pageInfo {
19+
hasNextPage
20+
endCursor
21+
}
22+
nodes {
23+
author {
24+
user {
25+
login
26+
websiteUrl
27+
}
28+
}
29+
}
30+
}
31+
}
32+
}
33+
}
34+
}
35+
}
36+
`)
37+
38+
export const REPO_TO_PROJECT: Record<RepoRef, string> = {
39+
"graphql/graphql-spec": "GraphQL",
40+
"graphql/graphql-wg": "GraphQL",
41+
"graphql/graphql-js": "graphql-js",
42+
"graphql/graphiql": "GraphiQL",
43+
}
44+
45+
/**
46+
* Fetch contributors for the repos listed in REPO_TO_PROJECT and aggregate by project.
47+
* - Uses GitHub GraphQL API (v4) with a personal access token in env GITHUB_ACCESS_TOKEN
48+
* - Aggregates contributors across multiple repos that map to the same project
49+
* - Sorts contributors per project by contributions (desc)
50+
*
51+
* Returns a map: { [projectName]: Array<{ id, website?, contributions }> }
52+
*/
53+
export async function getContributors(
54+
repoToProject: Record<RepoRef, string> = REPO_TO_PROJECT,
55+
): Promise<ExecutionResult<typeof QUERY>> {
56+
const accessToken = process.env.GITHUB_ACCESS_TOKEN
57+
if (!accessToken) {
58+
console.warn(
59+
"No GITHUB_ACCESS_TOKEN environment variable found. Skipping contributors sync.",
60+
)
61+
return {}
62+
}
63+
64+
// Aggregate contributors per project
65+
const perProject = new Map<
66+
string,
67+
Map<
68+
string,
69+
{
70+
id: string
71+
website?: string
72+
contributions: number
73+
}
74+
>
75+
>()
76+
77+
// Fetch each repo in parallel (a little stagger to be nice to rate limits)
78+
const repos = Object.keys(repoToProject) as RepoRef[]
79+
80+
await Promise.all(
81+
repos.map(async (fullName, i) => {
82+
const project = repoToProject[fullName]
83+
// Exponential-ish tiny backoff per parallel slot to reduce throttling
84+
if (i) await sleep(Math.min(50 * i, 500))
85+
86+
const [owner, name] = fullName.split("/") as [string, string]
87+
88+
try {
89+
const counts = await fetchRepoContributors(owner, name, accessToken)
90+
91+
let projectMap = perProject.get(project)
92+
if (!projectMap) {
93+
projectMap = new Map()
94+
perProject.set(project, projectMap)
95+
}
96+
97+
for (const [login, info] of counts) {
98+
const existing = projectMap.get(login)
99+
if (existing) {
100+
existing.contributions += info.contributions
101+
// Prefer first available website if we don't have one yet
102+
if (!existing.website && info.website) {
103+
existing.website = info.website
104+
}
105+
} else {
106+
projectMap.set(login, {
107+
id: login,
108+
website: info.website,
109+
contributions: info.contributions,
110+
})
111+
}
112+
}
113+
} catch (err) {
114+
console.warn(`Failed to fetch contributors for ${fullName}:`, err)
115+
}
116+
}),
117+
)
118+
119+
// Convert to the requested output shape and sort by contributions
120+
const result: ExecutionResult<typeof QUERY> = {}
121+
for (const [project, map] of perProject) {
122+
const arr = Array.from(map.values()).sort(
123+
(a, b) => b.contributions - a.contributions,
124+
)
125+
result[project] = arr
126+
}
127+
return result
128+
}
129+
130+
/**
131+
* Fetch contributors (by commit authors tied to GitHub users) for a single repo.
132+
* Traverses the full commit history of the default branch using pagination.
133+
*
134+
* Returns a Map: login -> { contributions, website }
135+
*/
136+
async function fetchRepoContributors(
137+
owner: string,
138+
repo: string,
139+
accessToken: string,
140+
) {
141+
// login -> { contributions, website }
142+
const counts = new Map<string, { contributions: number; website?: string }>()
143+
let after: string | null = null
144+
let page = 0
145+
let hasMore = true
146+
147+
while (hasMore) {
148+
const response = await execute(QUERY, {
149+
owner,
150+
name: repo,
151+
after,
152+
}, {
153+
headers: {
154+
Authorization: `Bearer ${accessToken}`,
155+
'User-Agent': 'graphql.org contributors sync client',
156+
},
157+
})
158+
159+
if (response.errors?.length) {
160+
throw new Error(
161+
`GitHub GraphQL errors for ${owner}/${repo}: ${response.errors
162+
.map((e: { message: string }) => e.message)
163+
.join("; ")}`,
164+
)
165+
}
166+
167+
const repoDataRaw = response.data?[0].
168+
if (!repoDataRaw) {
169+
console.warn(`Repository not found: ${owner}/${repo}`)
170+
break
171+
}
172+
const repoData = repoDataRaw as NonNullable<RepoHistoryPage["repository"]>
173+
174+
const defaultBranchRef = repoData.defaultBranchRef
175+
if (!defaultBranchRef || !defaultBranchRef.target) {
176+
console.warn(`Default branch not found for ${owner}/${repo}`)
177+
break
178+
}
179+
const history = defaultBranchRef.target.history
180+
181+
for (const node of history.nodes) {
182+
const user = node.author?.user
183+
if (!user?.login) continue
184+
const prev = counts.get(user.login)
185+
if (prev) {
186+
prev.contributions += 1
187+
// keep existing website unless we don't have one and GitHub provides it
188+
if (!prev.website && user.websiteUrl) prev.website = user.websiteUrl
189+
} else {
190+
counts.set(user.login, {
191+
contributions: 1,
192+
website: user.websiteUrl ?? undefined,
193+
})
194+
}
195+
}
196+
197+
const hasNext = history.pageInfo.hasNextPage
198+
after = history.pageInfo.endCursor
199+
hasMore = hasNext
200+
page += 1
201+
202+
// Brief backoff every few pages to be nicer to the API
203+
if (page % 5 === 0) await sleep(200)
204+
}
205+
206+
return counts
207+
}
208+
209+
// CLI entrypoint: when executed directly, write contributors to data.json next to this file
210+
if (import.meta.url === pathToFileURL(process.argv[1] ?? "").href) {
211+
const __dirname = dirname(fileURLToPath(import.meta.url))
212+
const outPath = resolve(__dirname, "data.json")
213+
getContributors()
214+
.then(data => {
215+
writeFileSync(outPath, JSON.stringify(data, null, 2) + "\n", "utf8")
216+
console.log(
217+
`Wrote ${Object.values(data).reduce((n, arr) => n + arr.length, 0)} contributors across ${Object.keys(data).length} projects to ${outPath}`,
218+
)
219+
})
220+
.catch(err => {
221+
console.error("Failed to write contributors data.json:", err)
222+
process.exitCode = 1
223+
})
224+
}
225+
226+
async function execute<TResult, TVariables>(
227+
query: TypedDocumentString<TResult, TVariables>,
228+
headers?: Record<string, string>,
229+
variables?: TVariables,
230+
) {
231+
const response = await fetch("https://graphql.org/graphql/", {
232+
method: "POST",
233+
headers: {
234+
"Content-Type": "application/json",
235+
Accept: "application/graphql-response+json",
236+
...headers,
237+
},
238+
body: JSON.stringify({
239+
query,
240+
variables,
241+
}),
242+
})
243+
244+
if (!response.ok) {
245+
throw new Error("Network response was not ok")
246+
}
247+
248+
return response.json() as ExecutionResult<TResult>
249+
}
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
import { CodegenConfig } from "@graphql-codegen/cli"
2+
3+
if (!process.env.GITHUB_ACCESS_TOKEN) {
4+
throw new Error("GITHUB_ACCESS_TOKEN environment variable is not set")
5+
}
6+
7+
const config: CodegenConfig = {
8+
overwrite: true,
9+
schema: {
10+
"https://api.github.com/graphql": {
11+
headers: {
12+
Authorization: `Bearer ${process.env.GITHUB_ACCESS_TOKEN}`,
13+
"User-Agent": "graphql.org contributors sync script",
14+
},
15+
},
16+
},
17+
documents: ["./*.ts"],
18+
generates: {
19+
"./generated/": {
20+
preset: "client",
21+
config: {
22+
documentMode: "string",
23+
},
24+
},
25+
},
26+
}
27+
28+
export default config
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
{
2+
"name": "@graphql-website/sync-landing-schema",
3+
"private": true,
4+
"scripts": {
5+
"codegen": "gql-gen --config graphql-codegen.ts",
6+
"download": "node get-contributors.ts"
7+
},
8+
"dependencies": {
9+
"@graphql-codegen/cli": "^6.0.0",
10+
"@graphql-codegen/graphql-modules-preset": "^5.0.0"
11+
}
12+
}

0 commit comments

Comments
 (0)