|
| 1 | +import { setTimeout as sleep } from "timers/promises" |
| 2 | +import { readFileSync, writeFileSync } from "node:fs" |
| 3 | +import { fileURLToPath, pathToFileURL } from "node:url" |
| 4 | +import { dirname, resolve } from "node:path" |
| 5 | +import { graphql } from "./generated" |
| 6 | +import { ExecutionResult } from "graphql" |
| 7 | +import { TypedDocumentString } from "./generated/graphql" |
| 8 | + |
| 9 | +type RepoRef = `${string}/${string}` |
| 10 | + |
| 11 | +const QUERY = graphql(` |
| 12 | + query RepoContributors($owner: String!, $name: String!, $after: String) { |
| 13 | + repository(owner: $owner, name: $name) { |
| 14 | + defaultBranchRef { |
| 15 | + target { |
| 16 | + ... on Commit { |
| 17 | + history(first: 100, after: $after) { |
| 18 | + pageInfo { |
| 19 | + hasNextPage |
| 20 | + endCursor |
| 21 | + } |
| 22 | + nodes { |
| 23 | + author { |
| 24 | + user { |
| 25 | + login |
| 26 | + websiteUrl |
| 27 | + } |
| 28 | + } |
| 29 | + } |
| 30 | + } |
| 31 | + } |
| 32 | + } |
| 33 | + } |
| 34 | + } |
| 35 | + } |
| 36 | +`) |
| 37 | + |
| 38 | +export const REPO_TO_PROJECT: Record<RepoRef, string> = { |
| 39 | + "graphql/graphql-spec": "GraphQL", |
| 40 | + "graphql/graphql-wg": "GraphQL", |
| 41 | + "graphql/graphql-js": "graphql-js", |
| 42 | + "graphql/graphiql": "GraphiQL", |
| 43 | +} |
| 44 | + |
| 45 | +/** |
| 46 | + * Fetch contributors for the repos listed in REPO_TO_PROJECT and aggregate by project. |
| 47 | + * - Uses GitHub GraphQL API (v4) with a personal access token in env GITHUB_ACCESS_TOKEN |
| 48 | + * - Aggregates contributors across multiple repos that map to the same project |
| 49 | + * - Sorts contributors per project by contributions (desc) |
| 50 | + * |
| 51 | + * Returns a map: { [projectName]: Array<{ id, website?, contributions }> } |
| 52 | + */ |
| 53 | +export async function getContributors( |
| 54 | + repoToProject: Record<RepoRef, string> = REPO_TO_PROJECT, |
| 55 | +): Promise<ExecutionResult<typeof QUERY>> { |
| 56 | + const accessToken = process.env.GITHUB_ACCESS_TOKEN |
| 57 | + if (!accessToken) { |
| 58 | + console.warn( |
| 59 | + "No GITHUB_ACCESS_TOKEN environment variable found. Skipping contributors sync.", |
| 60 | + ) |
| 61 | + return {} |
| 62 | + } |
| 63 | + |
| 64 | + // Aggregate contributors per project |
| 65 | + const perProject = new Map< |
| 66 | + string, |
| 67 | + Map< |
| 68 | + string, |
| 69 | + { |
| 70 | + id: string |
| 71 | + website?: string |
| 72 | + contributions: number |
| 73 | + } |
| 74 | + > |
| 75 | + >() |
| 76 | + |
| 77 | + // Fetch each repo in parallel (a little stagger to be nice to rate limits) |
| 78 | + const repos = Object.keys(repoToProject) as RepoRef[] |
| 79 | + |
| 80 | + await Promise.all( |
| 81 | + repos.map(async (fullName, i) => { |
| 82 | + const project = repoToProject[fullName] |
| 83 | + // Exponential-ish tiny backoff per parallel slot to reduce throttling |
| 84 | + if (i) await sleep(Math.min(50 * i, 500)) |
| 85 | + |
| 86 | + const [owner, name] = fullName.split("/") as [string, string] |
| 87 | + |
| 88 | + try { |
| 89 | + const counts = await fetchRepoContributors(owner, name, accessToken) |
| 90 | + |
| 91 | + let projectMap = perProject.get(project) |
| 92 | + if (!projectMap) { |
| 93 | + projectMap = new Map() |
| 94 | + perProject.set(project, projectMap) |
| 95 | + } |
| 96 | + |
| 97 | + for (const [login, info] of counts) { |
| 98 | + const existing = projectMap.get(login) |
| 99 | + if (existing) { |
| 100 | + existing.contributions += info.contributions |
| 101 | + // Prefer first available website if we don't have one yet |
| 102 | + if (!existing.website && info.website) { |
| 103 | + existing.website = info.website |
| 104 | + } |
| 105 | + } else { |
| 106 | + projectMap.set(login, { |
| 107 | + id: login, |
| 108 | + website: info.website, |
| 109 | + contributions: info.contributions, |
| 110 | + }) |
| 111 | + } |
| 112 | + } |
| 113 | + } catch (err) { |
| 114 | + console.warn(`Failed to fetch contributors for ${fullName}:`, err) |
| 115 | + } |
| 116 | + }), |
| 117 | + ) |
| 118 | + |
| 119 | + // Convert to the requested output shape and sort by contributions |
| 120 | + const result: ExecutionResult<typeof QUERY> = {} |
| 121 | + for (const [project, map] of perProject) { |
| 122 | + const arr = Array.from(map.values()).sort( |
| 123 | + (a, b) => b.contributions - a.contributions, |
| 124 | + ) |
| 125 | + result[project] = arr |
| 126 | + } |
| 127 | + return result |
| 128 | +} |
| 129 | + |
| 130 | +/** |
| 131 | + * Fetch contributors (by commit authors tied to GitHub users) for a single repo. |
| 132 | + * Traverses the full commit history of the default branch using pagination. |
| 133 | + * |
| 134 | + * Returns a Map: login -> { contributions, website } |
| 135 | + */ |
| 136 | +async function fetchRepoContributors( |
| 137 | + owner: string, |
| 138 | + repo: string, |
| 139 | + accessToken: string, |
| 140 | +) { |
| 141 | + // login -> { contributions, website } |
| 142 | + const counts = new Map<string, { contributions: number; website?: string }>() |
| 143 | + let after: string | null = null |
| 144 | + let page = 0 |
| 145 | + let hasMore = true |
| 146 | + |
| 147 | + while (hasMore) { |
| 148 | + const response = await execute(QUERY, { |
| 149 | + owner, |
| 150 | + name: repo, |
| 151 | + after, |
| 152 | + }, { |
| 153 | + headers: { |
| 154 | + Authorization: `Bearer ${accessToken}`, |
| 155 | + 'User-Agent': 'graphql.org contributors sync client', |
| 156 | + }, |
| 157 | + }) |
| 158 | + |
| 159 | + if (response.errors?.length) { |
| 160 | + throw new Error( |
| 161 | + `GitHub GraphQL errors for ${owner}/${repo}: ${response.errors |
| 162 | + .map((e: { message: string }) => e.message) |
| 163 | + .join("; ")}`, |
| 164 | + ) |
| 165 | + } |
| 166 | + |
| 167 | + const repoDataRaw = response.data?[0]. |
| 168 | + if (!repoDataRaw) { |
| 169 | + console.warn(`Repository not found: ${owner}/${repo}`) |
| 170 | + break |
| 171 | + } |
| 172 | + const repoData = repoDataRaw as NonNullable<RepoHistoryPage["repository"]> |
| 173 | + |
| 174 | + const defaultBranchRef = repoData.defaultBranchRef |
| 175 | + if (!defaultBranchRef || !defaultBranchRef.target) { |
| 176 | + console.warn(`Default branch not found for ${owner}/${repo}`) |
| 177 | + break |
| 178 | + } |
| 179 | + const history = defaultBranchRef.target.history |
| 180 | + |
| 181 | + for (const node of history.nodes) { |
| 182 | + const user = node.author?.user |
| 183 | + if (!user?.login) continue |
| 184 | + const prev = counts.get(user.login) |
| 185 | + if (prev) { |
| 186 | + prev.contributions += 1 |
| 187 | + // keep existing website unless we don't have one and GitHub provides it |
| 188 | + if (!prev.website && user.websiteUrl) prev.website = user.websiteUrl |
| 189 | + } else { |
| 190 | + counts.set(user.login, { |
| 191 | + contributions: 1, |
| 192 | + website: user.websiteUrl ?? undefined, |
| 193 | + }) |
| 194 | + } |
| 195 | + } |
| 196 | + |
| 197 | + const hasNext = history.pageInfo.hasNextPage |
| 198 | + after = history.pageInfo.endCursor |
| 199 | + hasMore = hasNext |
| 200 | + page += 1 |
| 201 | + |
| 202 | + // Brief backoff every few pages to be nicer to the API |
| 203 | + if (page % 5 === 0) await sleep(200) |
| 204 | + } |
| 205 | + |
| 206 | + return counts |
| 207 | +} |
| 208 | + |
| 209 | +// CLI entrypoint: when executed directly, write contributors to data.json next to this file |
| 210 | +if (import.meta.url === pathToFileURL(process.argv[1] ?? "").href) { |
| 211 | + const __dirname = dirname(fileURLToPath(import.meta.url)) |
| 212 | + const outPath = resolve(__dirname, "data.json") |
| 213 | + getContributors() |
| 214 | + .then(data => { |
| 215 | + writeFileSync(outPath, JSON.stringify(data, null, 2) + "\n", "utf8") |
| 216 | + console.log( |
| 217 | + `Wrote ${Object.values(data).reduce((n, arr) => n + arr.length, 0)} contributors across ${Object.keys(data).length} projects to ${outPath}`, |
| 218 | + ) |
| 219 | + }) |
| 220 | + .catch(err => { |
| 221 | + console.error("Failed to write contributors data.json:", err) |
| 222 | + process.exitCode = 1 |
| 223 | + }) |
| 224 | +} |
| 225 | + |
| 226 | +async function execute<TResult, TVariables>( |
| 227 | + query: TypedDocumentString<TResult, TVariables>, |
| 228 | + headers?: Record<string, string>, |
| 229 | + variables?: TVariables, |
| 230 | +) { |
| 231 | + const response = await fetch("https://graphql.org/graphql/", { |
| 232 | + method: "POST", |
| 233 | + headers: { |
| 234 | + "Content-Type": "application/json", |
| 235 | + Accept: "application/graphql-response+json", |
| 236 | + ...headers, |
| 237 | + }, |
| 238 | + body: JSON.stringify({ |
| 239 | + query, |
| 240 | + variables, |
| 241 | + }), |
| 242 | + }) |
| 243 | + |
| 244 | + if (!response.ok) { |
| 245 | + throw new Error("Network response was not ok") |
| 246 | + } |
| 247 | + |
| 248 | + return response.json() as ExecutionResult<TResult> |
| 249 | +} |
0 commit comments