44import pandas as pd
55import pygit2 as git2
66import git
7+ import sys
78import git2net
89import shutil
910from pathlib import Path
@@ -165,7 +166,7 @@ def clone_repository(repo, data_root_dir, github_token=None, new_clone=False):
165166 print (" -> An exception occurred" )
166167
167168 @staticmethod
168- def generate_data_base (data_root_dir ):
169+ def generate_data_base (data_root_dir , new_extraction = False ):
169170 """
170171 generate_data_base(data_root_dir)
171172
@@ -175,6 +176,8 @@ def generate_data_base(data_root_dir):
175176 ----------
176177 data_root_dir : str
177178 Data root directory for the repository.
179+ new_extraction: bool, default = False
180+ Start a new complete extraction run
178181
179182 Notes
180183 -----
@@ -196,15 +199,15 @@ def mine_git_repo(git_repo_dir, sqlite_db_file, commits=[],
196199 repo_dir = version_folder .joinpath (Version .VERSION_REPOSITORY_DIR )
197200 sqlite_db_file = version_folder .joinpath (Version .VERSION_DB )
198201
199- if os .path .exists (sqlite_db_file ):
202+ if new_extraction & os .path .exists (sqlite_db_file ):
200203 os .remove (sqlite_db_file )
201204
202205 git2net .mine_git_repo (repo_dir , sqlite_db_file ,
203- extract_complexity = True ,
204- extract_text = True ,
205- no_of_processes = Version .no_of_proceses ,
206- all_branches = True ,
207- max_modifications = 1000 )
206+ extract_complexity = True ,
207+ extract_text = True ,
208+ no_of_processes = Version .no_of_proceses ,
209+ all_branches = True ,
210+ max_modifications = 1000 )
208211
209212 @staticmethod
210213 def generate_version_pandas_tables (repo , data_root_dir , check_for_updates = True ):
0 commit comments