1- import { setTimeout as sleep } from "node:timers/promises"
21import { readFile , writeFile } from "node:fs/promises"
32import { fileURLToPath , pathToFileURL } from "node:url"
43import { dirname , resolve } from "node:path"
54import { fetchRepoContributors } from "./fetch-repo-contributors.ts"
5+ import * as logger from "./logger.ts"
6+ import { readState , writeState } from "./state.ts"
67
78type RepoRef = `${string } /${string } `
89interface Contributor {
@@ -15,7 +16,7 @@ type ContributorsForProjects = {
1516}
1617
1718const __dirname = dirname ( fileURLToPath ( import . meta. url ) )
18- const outPath = resolve ( __dirname , "data.json" )
19+ const outPath = resolve ( __dirname , "../ data.json" )
1920
2021export const REPO_TO_PROJECT : Record < RepoRef , string > = {
2122 // "graphql/graphql-spec": "GraphQL",
@@ -32,71 +33,102 @@ export const REPO_TO_PROJECT: Record<RepoRef, string> = {
3233 */
3334export async function getContributors (
3435 repoToProject : Record < RepoRef , string > = REPO_TO_PROJECT ,
36+ options : {
37+ forceRefresh ?: boolean
38+ onProgress ?: ( data : { page : number ; repoSlug : string } ) => void
39+ onContributorFound ?: (
40+ contributor : Contributor & { project : string } ,
41+ ) => void
42+ } = { } ,
3543) : Promise < ContributorsForProjects > {
3644 const accessToken = process . env . GITHUB_ACCESS_TOKEN
3745 if ( ! accessToken ) {
38- console . warn (
46+ logger . warn (
3947 "No GITHUB_ACCESS_TOKEN environment variable found. Skipping contributors sync." ,
4048 )
4149 return { }
4250 }
4351
44- // Aggregate contributors per project
45- const perProject = new Map <
46- string ,
47- Map <
48- string ,
49- {
50- id : string
51- website ?: string
52- contributions : number
53- }
54- >
55- > ( )
52+ let existingData : ContributorsForProjects = { }
53+ try {
54+ existingData = JSON . parse ( await readFile ( outPath , "utf8" ) )
55+ } catch ( error ) {
56+ logger . log ( "No existing data.json found, starting fresh." )
57+ }
58+
59+ const perProject = new Map < string , Map < string , Contributor > > ( )
60+ for ( const projectName in existingData ) {
61+ const projectMap = new Map < string , Contributor > ( )
62+ for ( const contributor of existingData [ projectName ] ) {
63+ projectMap . set ( contributor . id , contributor )
64+ }
65+ perProject . set ( projectName , projectMap )
66+ }
5667
57- // Fetch each repo in parallel (a little stagger to be nice to rate limits )
68+ const state = await readState ( )
5869 const repos = Object . keys ( repoToProject ) as RepoRef [ ]
5970
60- await Promise . all (
61- repos . map ( async ( fullName , i ) => {
62- const project = repoToProject [ fullName ]
63- // Exponential-ish tiny backoff per parallel slot to reduce throttling
64- if ( i ) await sleep ( Math . min ( 50 * i , 500 ) )
71+ for ( const fullName of repos ) {
72+ const repoState = state . repositories [ fullName ]
73+ if ( repoState ?. status === "completed" && ! options . forceRefresh ) {
74+ logger . log ( `Skipping ${ fullName } , already completed.` )
75+ continue
76+ }
6577
66- const [ owner , name ] = fullName . split ( "/" ) as [ string , string ]
78+ const project = repoToProject [ fullName ]
79+ const [ owner , name ] = fullName . split ( "/" ) as [ string , string ]
6780
68- try {
69- const counts = await fetchRepoContributors ( owner , name , accessToken )
81+ try {
82+ const counts = await fetchRepoContributors (
83+ owner ,
84+ name ,
85+ accessToken ,
86+ state ,
87+ options ,
88+ )
7089
71- let projectMap = perProject . get ( project )
72- if ( ! projectMap ) {
73- projectMap = new Map ( )
74- perProject . set ( project , projectMap )
90+ if ( options . onContributorFound ) {
91+ for ( const [ login , info ] of counts ) {
92+ options . onContributorFound ( {
93+ project,
94+ id : login ,
95+ contributions : info . contributions ,
96+ website : info . website ,
97+ } )
7598 }
99+ }
76100
77- for ( const [ login , info ] of counts ) {
78- const existing = projectMap . get ( login )
79- if ( existing ) {
80- existing . contributions += info . contributions
81- // Prefer first available website if we don't have one yet
82- if ( ! existing . website && info . website ) {
83- existing . website = info . website
84- }
85- } else {
86- projectMap . set ( login , {
87- id : login ,
88- website : info . website ,
89- contributions : info . contributions ,
90- } )
101+ let projectMap = perProject . get ( project )
102+ if ( ! projectMap ) {
103+ projectMap = new Map ( )
104+ perProject . set ( project , projectMap )
105+ }
106+
107+ for ( const [ login , info ] of counts ) {
108+ const existing = projectMap . get ( login )
109+ if ( existing ) {
110+ existing . contributions += info . contributions
111+ if ( ! existing . website && info . website ) {
112+ existing . website = info . website
91113 }
114+ } else {
115+ projectMap . set ( login , {
116+ id : login ,
117+ website : info . website ,
118+ contributions : info . contributions ,
119+ } )
92120 }
93- } catch ( err ) {
94- console . warn ( `Failed to fetch contributors for ${ fullName } :` , err )
95121 }
96- } ) ,
97- )
122+ } catch ( err ) {
123+ logger . warn ( `Failed to fetch contributors for ${ fullName } :` , err )
124+ state . repositories [ fullName ] = {
125+ ...state . repositories [ fullName ] ,
126+ status : "error" ,
127+ }
128+ await writeState ( state )
129+ }
130+ }
98131
99- // Convert to the requested output shape and sort by contributions
100132 const result : Record <
101133 string ,
102134 Array < { id : string ; website ?: string ; contributions : number } >
@@ -112,15 +144,42 @@ export async function getContributors(
112144
113145// CLI entrypoint: when executed directly, write contributors to data.json next to this file
114146if ( import . meta. url === pathToFileURL ( process . argv [ 1 ] ?? "" ) . href ) {
115- getContributors ( )
147+ const options = {
148+ forceRefresh : process . argv . includes ( "--force" ) ,
149+ }
150+
151+ const onProgress = ( data : { page : number ; repoSlug : string } ) => {
152+ process . stderr . write (
153+ `Fetching commits for ${ data . repoSlug } : page ${ data . page } ` ,
154+ )
155+ }
156+
157+ const onContributorFound = (
158+ contributor : Contributor & { project : string } ,
159+ ) => {
160+ console . log ( JSON . stringify ( contributor ) )
161+ }
162+
163+ getContributors ( REPO_TO_PROJECT , {
164+ ...options ,
165+ onProgress,
166+ onContributorFound,
167+ } )
116168 . then ( async data => {
169+ process . stderr . write ( "\n" )
117170 await writeFile ( outPath , JSON . stringify ( data , null , 2 ) + "\n" , "utf8" )
118- console . log (
119- `Wrote ${ Object . values ( data ) . reduce ( ( n , arr ) => n + arr . length , 0 ) } contributors across ${ Object . keys ( data ) . length } projects to ${ outPath } ` ,
171+ logger . log (
172+ `Wrote ${ Object . values ( data ) . reduce (
173+ ( n , arr ) => n + arr . length ,
174+ 0 ,
175+ ) } contributors across ${
176+ Object . keys ( data ) . length
177+ } projects to ${ outPath } `,
120178 )
121179 } )
122180 . catch ( err => {
123- console . error ( "Failed to write contributors data.json:" , err )
181+ process . stderr . write ( "\n" )
182+ logger . error ( "Failed to write contributors data.json:" , err )
124183 process . exitCode = 1
125184 } )
126185}
0 commit comments