Skip to content

Commit 84832b7

Browse files
Remove hardcoded interface for Javascript tokenizer
This change allows the input and output directories to be provided as command line arguments to the tokenizer script. Previously, this script used hardcoded constants for these directories. Additionally, this change adds error checking code for these inputs and simplifies the logic for iterating over the input project directories.
1 parent 264f608 commit 84832b7

File tree

1 file changed

+34
-16
lines changed

1 file changed

+34
-16
lines changed

tokenizers/javascript/parser.js

Lines changed: 34 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -54,22 +54,40 @@ function extractForFolder(dir, identifiersOnly, outpath, baseDir) {
5454
fs.writeFileSync(outpath, zlib.gzipSync(gzipped));
5555
}
5656

57-
// For each project (folder in folder)
58-
var base_dir = '/mnt/c/Users/t-mialla/Downloads/data';
59-
var all_user_folders = fs.readdirSync(base_dir);
60-
for (var i in all_user_folders) {
61-
var user_folder = path.join(base_dir, all_user_folders[i]);
62-
if (!fs.lstatSync(user_folder).isDirectory()) {
63-
continue;
64-
}
65-
all_project_folders = fs.readdirSync(user_folder);
66-
for (var j in all_project_folders) {
67-
var project_folder = path.join(user_folder, all_project_folders[j]);
68-
if (!fs.statSync(project_folder).isDirectory()) continue;
57+
if (process.argv.length != 4) {
58+
console.error(`Usage: ${process.argv[0]} PROJECTS_FOLDER OUTPUT_FOLDER`);
59+
process.exit(1);
60+
}
6961

70-
console.log('Extracting '+ project_folder)
62+
const base_dir = process.argv[2];
63+
if(!fs.existsSync(base_dir)) {
64+
console.error(`Error: Directory "${base_dir}" does not exist.`);
65+
process.exit(1);
66+
}
67+
if(!fs.lstatSync(base_dir).isDirectory()) {
68+
console.error(`Error: "${base_dir}" is not a directory.`);
69+
process.exit(1);
70+
}
7171

72-
var outpath = path.join('/mnt/c/Users/t-mialla/Downloads/parsedJs', all_user_folders[i] + "__" + all_project_folders[j] + '.jsonl.gz'); // TODO
73-
extractForFolder(project_folder, false, outpath, base_dir);
74-
}
72+
const output_dir = process.argv[3];
73+
if(fs.existsSync(output_dir) && !fs.lstatSync(output_dir).isDirectory()) {
74+
console.error(`Error: "${output_dir}" is not a directory.`);
75+
process.exit(1);
7576
}
77+
78+
if(!fs.existsSync(output_dir)) {
79+
fs.mkdirSync(output_dir);
80+
}
81+
82+
fs.readdirSync(base_dir).forEach(project_name => {
83+
const project_dir = path.join(base_dir, project_name);
84+
if (!fs.lstatSync(project_dir).isDirectory()) {
85+
return;
86+
}
87+
88+
console.log(`Extracting ${project_dir}`);
89+
90+
const project_output_file = path.join(output_dir, project_name + ".json.gz");
91+
extractForFolder(project_dir, false, project_output_file, base_dir);
92+
});
93+

0 commit comments

Comments
 (0)