Skip to content

Commit 49f7ea3

Browse files
committed
wi[
1 parent 0642021 commit 49f7ea3

File tree

4 files changed

+247
-87
lines changed

4 files changed

+247
-87
lines changed

scripts/sync-landing-schema/graphql-codegen.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ const config: CodegenConfig = {
1515
},
1616
},
1717
},
18-
documents: ["./*.ts"],
18+
documents: ["./src/*.ts"],
1919
generates: {
2020
"./generated/": {
2121
preset: "client",
Lines changed: 76 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,44 @@
1-
import { setTimeout as sleep } from "node:timers/promises"
2-
import { execute } from "./execute.ts"
3-
import { QUERY } from "./query.ts"
1+
import { setTimeout as sleep } from "node:timers/promises";
2+
import { execute } from "./execute.ts";
3+
import { QUERY } from "./query.ts";
4+
import { log, warn } from "./logger.ts";
5+
import type { State } from "./state.ts";
6+
import { writeState } from "./state.ts";
47

58
/**
69
* Fetch contributors (by commit authors tied to GitHub users) for a single repo.
710
* Traverses the full commit history of the default branch using pagination.
11+
* Updates state file incrementally.
812
*/
913
export async function fetchRepoContributors(
1014
owner: string,
1115
repo: string,
1216
accessToken: string,
17+
state: State,
18+
options: {
19+
forceRefresh?: boolean;
20+
onProgress?: (data: { page: number; repoSlug: string }) => void;
21+
} = {}
1322
) {
23+
const repoSlug = `${owner}/${repo}`;
24+
const repoState = state.repositories[repoSlug];
25+
1426
const contributors = new Map<
1527
string /* handle */,
1628
{ contributions: number; website?: string }
17-
>()
18-
let after: string | null = null
19-
let page = 0
20-
let hasMore = true
29+
>();
30+
31+
let after: string | null = options.forceRefresh
32+
? null
33+
: repoState?.lastCursor || null;
34+
35+
if (options.forceRefresh) {
36+
log(`Force refreshing ${repoSlug}, deleting existing state.`);
37+
delete state.repositories[repoSlug];
38+
}
39+
40+
let page = 0;
41+
let hasMore = true;
2142

2243
const fetchMore = () =>
2344
execute(
@@ -30,62 +51,82 @@ export async function fetchRepoContributors(
3051
{
3152
Authorization: `Bearer ${accessToken}`,
3253
"User-Agent": "graphql.org contributors sync client",
33-
},
34-
)
54+
}
55+
);
3556

3657
while (hasMore) {
37-
const response = await fetchMore()
58+
const response = await fetchMore();
3859

3960
if (response.errors?.length) {
4061
throw new Error(
41-
`GitHub GraphQL errors for ${owner}/${repo}: ${response.errors
62+
`GitHub GraphQL errors for ${repoSlug}: ${response.errors
4263
.map((e: { message: string }) => e.message)
43-
.join("; ")}`,
44-
)
64+
.join("; ")}`
65+
);
4566
}
4667

47-
const repoData = response.data?.repository
68+
const repoData = response.data?.repository;
4869
if (!repoData) {
49-
console.warn(`Repository not found: ${owner}/${repo}`)
50-
break
70+
warn(`Repository not found: ${repoSlug}`);
71+
break;
5172
}
5273

53-
const defaultBranchRef = repoData.defaultBranchRef
74+
const defaultBranchRef = repoData.defaultBranchRef;
5475
if (!defaultBranchRef?.target) {
55-
console.warn(`Default branch not found for ${owner}/${repo}`)
56-
break
76+
warn(`Default branch not found for ${repoSlug}`);
77+
break;
5778
}
5879

5980
if (!("history" in defaultBranchRef.target)) {
60-
console.warn(`History not found for ${owner}/${repo}`)
61-
break
81+
warn(`History not found for ${repoSlug}`);
82+
break;
6283
}
6384

64-
const history = defaultBranchRef.target.history
85+
const history = defaultBranchRef.target.history;
6586

6687
for (const node of history.nodes || []) {
67-
const user = node?.author?.user
68-
if (!user?.login) continue
69-
const prev = contributors.get(user.login)
88+
const user = node?.author?.user;
89+
if (!user?.login) continue;
90+
const prev = contributors.get(user.login);
7091
if (prev) {
71-
prev.contributions += 1
72-
// keep existing website unless we don't have one and GitHub provides it
73-
prev.website ||= user.websiteUrl
92+
prev.contributions += 1;
93+
prev.website ||= user.websiteUrl;
7494
} else {
7595
contributors.set(user.login, {
7696
contributions: 1,
7797
website: user.websiteUrl ?? undefined,
78-
})
98+
});
7999
}
80100
}
81101

82-
const hasNext = history.pageInfo?.hasNextPage
83-
after = history.pageInfo?.endCursor || null
84-
hasMore = !!hasNext
85-
page += 1
102+
const hasNext = history.pageInfo?.hasNextPage;
103+
after = history.pageInfo?.endCursor || null;
104+
hasMore = !!hasNext;
105+
page += 1;
106+
107+
state.repositories[repoSlug] = {
108+
...state.repositories[repoSlug],
109+
status: "in-progress",
110+
lastCursor: after,
111+
};
112+
await writeState(state);
113+
log(`Processed page ${page} for ${repoSlug}.`);
114+
if (options.onProgress) {
115+
options.onProgress({ page, repoSlug });
116+
}
86117

87-
if (page % 5 === 0) await sleep(200)
118+
if (page % 5 === 0) await sleep(200);
88119
}
89120

90-
return contributors
121+
state.repositories[repoSlug] = {
122+
...state.repositories[repoSlug],
123+
status: "completed",
124+
lastProcessed: new Date().toISOString(),
125+
contributorsCount: contributors.size,
126+
};
127+
await writeState(state);
128+
log(`Finished processing ${repoSlug}.`);
129+
130+
return contributors;
91131
}
132+
Lines changed: 110 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
1-
import { setTimeout as sleep } from "node:timers/promises"
21
import { readFile, writeFile } from "node:fs/promises"
32
import { fileURLToPath, pathToFileURL } from "node:url"
43
import { dirname, resolve } from "node:path"
54
import { fetchRepoContributors } from "./fetch-repo-contributors.ts"
5+
import * as logger from "./logger.ts"
6+
import { readState, writeState } from "./state.ts"
67

78
type RepoRef = `${string}/${string}`
89
interface Contributor {
@@ -15,7 +16,7 @@ type ContributorsForProjects = {
1516
}
1617

1718
const __dirname = dirname(fileURLToPath(import.meta.url))
18-
const outPath = resolve(__dirname, "data.json")
19+
const outPath = resolve(__dirname, "../data.json")
1920

2021
export const REPO_TO_PROJECT: Record<RepoRef, string> = {
2122
// "graphql/graphql-spec": "GraphQL",
@@ -32,71 +33,102 @@ export const REPO_TO_PROJECT: Record<RepoRef, string> = {
3233
*/
3334
export async function getContributors(
3435
repoToProject: Record<RepoRef, string> = REPO_TO_PROJECT,
36+
options: {
37+
forceRefresh?: boolean
38+
onProgress?: (data: { page: number; repoSlug: string }) => void
39+
onContributorFound?: (
40+
contributor: Contributor & { project: string },
41+
) => void
42+
} = {},
3543
): Promise<ContributorsForProjects> {
3644
const accessToken = process.env.GITHUB_ACCESS_TOKEN
3745
if (!accessToken) {
38-
console.warn(
46+
logger.warn(
3947
"No GITHUB_ACCESS_TOKEN environment variable found. Skipping contributors sync.",
4048
)
4149
return {}
4250
}
4351

44-
// Aggregate contributors per project
45-
const perProject = new Map<
46-
string,
47-
Map<
48-
string,
49-
{
50-
id: string
51-
website?: string
52-
contributions: number
53-
}
54-
>
55-
>()
52+
let existingData: ContributorsForProjects = {}
53+
try {
54+
existingData = JSON.parse(await readFile(outPath, "utf8"))
55+
} catch (error) {
56+
logger.log("No existing data.json found, starting fresh.")
57+
}
58+
59+
const perProject = new Map<string, Map<string, Contributor>>()
60+
for (const projectName in existingData) {
61+
const projectMap = new Map<string, Contributor>()
62+
for (const contributor of existingData[projectName]) {
63+
projectMap.set(contributor.id, contributor)
64+
}
65+
perProject.set(projectName, projectMap)
66+
}
5667

57-
// Fetch each repo in parallel (a little stagger to be nice to rate limits)
68+
const state = await readState()
5869
const repos = Object.keys(repoToProject) as RepoRef[]
5970

60-
await Promise.all(
61-
repos.map(async (fullName, i) => {
62-
const project = repoToProject[fullName]
63-
// Exponential-ish tiny backoff per parallel slot to reduce throttling
64-
if (i) await sleep(Math.min(50 * i, 500))
71+
for (const fullName of repos) {
72+
const repoState = state.repositories[fullName]
73+
if (repoState?.status === "completed" && !options.forceRefresh) {
74+
logger.log(`Skipping ${fullName}, already completed.`)
75+
continue
76+
}
6577

66-
const [owner, name] = fullName.split("/") as [string, string]
78+
const project = repoToProject[fullName]
79+
const [owner, name] = fullName.split("/") as [string, string]
6780

68-
try {
69-
const counts = await fetchRepoContributors(owner, name, accessToken)
81+
try {
82+
const counts = await fetchRepoContributors(
83+
owner,
84+
name,
85+
accessToken,
86+
state,
87+
options,
88+
)
7089

71-
let projectMap = perProject.get(project)
72-
if (!projectMap) {
73-
projectMap = new Map()
74-
perProject.set(project, projectMap)
90+
if (options.onContributorFound) {
91+
for (const [login, info] of counts) {
92+
options.onContributorFound({
93+
project,
94+
id: login,
95+
contributions: info.contributions,
96+
website: info.website,
97+
})
7598
}
99+
}
76100

77-
for (const [login, info] of counts) {
78-
const existing = projectMap.get(login)
79-
if (existing) {
80-
existing.contributions += info.contributions
81-
// Prefer first available website if we don't have one yet
82-
if (!existing.website && info.website) {
83-
existing.website = info.website
84-
}
85-
} else {
86-
projectMap.set(login, {
87-
id: login,
88-
website: info.website,
89-
contributions: info.contributions,
90-
})
101+
let projectMap = perProject.get(project)
102+
if (!projectMap) {
103+
projectMap = new Map()
104+
perProject.set(project, projectMap)
105+
}
106+
107+
for (const [login, info] of counts) {
108+
const existing = projectMap.get(login)
109+
if (existing) {
110+
existing.contributions += info.contributions
111+
if (!existing.website && info.website) {
112+
existing.website = info.website
91113
}
114+
} else {
115+
projectMap.set(login, {
116+
id: login,
117+
website: info.website,
118+
contributions: info.contributions,
119+
})
92120
}
93-
} catch (err) {
94-
console.warn(`Failed to fetch contributors for ${fullName}:`, err)
95121
}
96-
}),
97-
)
122+
} catch (err) {
123+
logger.warn(`Failed to fetch contributors for ${fullName}:`, err)
124+
state.repositories[fullName] = {
125+
...state.repositories[fullName],
126+
status: "error",
127+
}
128+
await writeState(state)
129+
}
130+
}
98131

99-
// Convert to the requested output shape and sort by contributions
100132
const result: Record<
101133
string,
102134
Array<{ id: string; website?: string; contributions: number }>
@@ -112,15 +144,42 @@ export async function getContributors(
112144

113145
// CLI entrypoint: when executed directly, write contributors to data.json next to this file
114146
if (import.meta.url === pathToFileURL(process.argv[1] ?? "").href) {
115-
getContributors()
147+
const options = {
148+
forceRefresh: process.argv.includes("--force"),
149+
}
150+
151+
const onProgress = (data: { page: number; repoSlug: string }) => {
152+
process.stderr.write(
153+
`Fetching commits for ${data.repoSlug}: page ${data.page}`,
154+
)
155+
}
156+
157+
const onContributorFound = (
158+
contributor: Contributor & { project: string },
159+
) => {
160+
console.log(JSON.stringify(contributor))
161+
}
162+
163+
getContributors(REPO_TO_PROJECT, {
164+
...options,
165+
onProgress,
166+
onContributorFound,
167+
})
116168
.then(async data => {
169+
process.stderr.write("\n")
117170
await writeFile(outPath, JSON.stringify(data, null, 2) + "\n", "utf8")
118-
console.log(
119-
`Wrote ${Object.values(data).reduce((n, arr) => n + arr.length, 0)} contributors across ${Object.keys(data).length} projects to ${outPath}`,
171+
logger.log(
172+
`Wrote ${Object.values(data).reduce(
173+
(n, arr) => n + arr.length,
174+
0,
175+
)} contributors across ${
176+
Object.keys(data).length
177+
} projects to ${outPath}`,
120178
)
121179
})
122180
.catch(err => {
123-
console.error("Failed to write contributors data.json:", err)
181+
process.stderr.write("\n")
182+
logger.error("Failed to write contributors data.json:", err)
124183
process.exitCode = 1
125184
})
126185
}

0 commit comments

Comments
 (0)