Skip to content

Commit aa3fc49

Browse files
authored
Update repository.py
1 parent 2833c48 commit aa3fc49

File tree

1 file changed

+13
-71
lines changed

1 file changed

+13
-71
lines changed

github2pandas/repository.py

Lines changed: 13 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -204,62 +204,30 @@ def __extract_repository_data(self, params: Params) -> dict:
204204
else:
205205
last_commit = self.get_save_api_data(commits,0)
206206
last_commit_date = pd.to_datetime(last_commit.commit.committer.date , format="%Y-%m-%d M:%S")
207-
# last_commit_date on main Branch
208-
209-
# commits = self.repo.get_commits()
210-
# try:
211-
# # problem: No commits in repo
212-
# last_commit_date = pd.to_datetime(commits[0].commit.committer.date , format="%Y-%m-%d M:%S")
213-
# commit_count = commits.totalCount
214-
# except GithubException:
215-
# commit_count = 0
216-
# last_commit_date = numpy.nan
217-
# print("No commits found!")
218-
207+
219208
contributors = self.save_api_call(self.repo.get_contributors,"True")
220209
contributors_count = self.get_save_total_count(contributors)
221-
# contributor = self.repo.get_contributors( 'all')
222-
# try:
223-
# # problem: history or contributor is too large to list them via the API.
224-
# contributors_count = len (list (contributor))
225-
# except GithubException:
226-
# print("Too many contributors, not covered by API!")
227-
# contributors_count = numpy.nan
228210

229211
companies = []
230-
contributors_count2 = contributors_count
231-
if contributors_count > 500:
232-
print("Only first 500 Contributor can hold information!")
233-
contributors_count2 = 500
234-
if params.contributor_companies:
235-
for i in self.progress_bar(range(contributors_count2), "Contributor Companies: "):
236-
contributor = self.get_save_api_data(contributors,i)
237-
if not contributor._organizations_url == GithubObject.NotSet:
238-
companies.append(contributor.company)
239-
filtered_companies = list(filter(None.__ne__, companies))
240-
# companies = []
241-
# if contributor_companies_included:
242-
# for contributor in contributor:
243-
# try:
244-
# companies.append(contributor.company)
245-
# except GithubException:
246-
# print('Contributor does not exist anymore')
247-
# continue
248-
# filtered_companies = list(filter(None.__ne__, companies))
212+
exclude_company_identification = True
213+
if exclude_company_identification:
214+
contributors_count2 = contributors_count
215+
if contributors_count > 500:
216+
print("Only first 500 Contributor can hold information!")
217+
contributors_count2 = 500
218+
if params.contributor_companies:
219+
for i in self.progress_bar(range(contributors_count2), "Contributor Companies: "):
220+
contributor = self.get_save_api_data(contributors,i)
221+
if not contributor._organizations_url == GithubObject.NotSet:
222+
companies.append(contributor.company)
223+
filtered_companies = list(filter(None.__ne__, companies))
249224

250225
read_me = self.save_api_call(self.repo.get_readme)
251226
if read_me is None or read_me._content == GithubObject.NotSet:
252227
readme_content = ""
253228
print("Readme does not exist")
254229
else:
255230
readme_content = read_me.content
256-
# try:
257-
# # problem: readme.md does not exist
258-
# readme_content = self.repo.get_readme().content
259-
# except GithubException:
260-
# readme_content = ""
261-
# print("Readme does not exist")
262-
# problem: sometimes get_readme outputs a None result
263231
if readme_content is None:
264232
readme_length = 0
265233
print("Readme does not exist")
@@ -268,12 +236,6 @@ def __extract_repository_data(self, params: Params) -> dict:
268236

269237
tags = self.save_api_call(self.repo.get_tags)
270238
tag_count = self.get_save_total_count(tags)
271-
# try:
272-
# # problem: empty list of tags
273-
# tag_count = self.repo.get_tags().totalCount
274-
# except GithubException:
275-
# tag_count = 0
276-
# print("No tags assigned to repository")
277239

278240
if self.repo._organization == GithubObject.NotSet:
279241
organization_name = "not known"
@@ -282,32 +244,12 @@ def __extract_repository_data(self, params: Params) -> dict:
282244
else:
283245
organization_name = self.repo.organization.name
284246
repo_type = self.repo.organization.type
285-
# try:
286-
# # problem: organization entry empty
287-
# organization_name = self.repo.organization.name
288-
# repo_type = self.repo.organization.type
289-
# except:
290-
# organization_name = "not known"
291-
# repo_type = "not known"
292-
# print("Organization not valid")
293247

294248
pulls_review_comments_obj = self.save_api_call(self.repo.get_pulls_review_comments)
295249
pulls_review_comments = self.get_save_total_count(pulls_review_comments_obj)
296-
# try:
297-
# # problem: no pull request comments
298-
# pulls_review_comments = self.repo.get_pulls_review_comments().totalCount
299-
# except GithubException:
300-
# pulls_review_comments = "not known"
301-
# print("No pull request comments")
302250

303251
releases = self.save_api_call(self.repo.get_releases)
304252
release_count = self.get_save_total_count(releases)
305-
# try:
306-
# # problem: ???
307-
# release_count = self.repo.get_releases().totalCount,
308-
# except GithubException:
309-
# release_count = 0
310-
# print("Wrong release count output")
311253

312254
branches = self.save_api_call(self.repo.get_branches)
313255
branches_count = self.get_save_total_count(branches)

0 commit comments

Comments
 (0)