|
20 | 20 | # ScanCode.io is a free software code scanning tool from nexB Inc. and others. |
21 | 21 | # Visit https://github.com/nexB/scancode.io for support and download. |
22 | 22 |
|
| 23 | +from django.db.models import Q |
| 24 | + |
23 | 25 | from source_inspector import symbols_ctags |
24 | 26 | from source_inspector import symbols_pygments |
| 27 | +from source_inspector import symbols_tree_sitter |
25 | 28 |
|
26 | 29 | from scanpipe.pipes import LoopProgress |
27 | 30 |
|
@@ -103,3 +106,44 @@ def _collect_and_store_pygments_symbols_and_strings(resource): |
103 | 106 | "source_comments": result.get("source_comments"), |
104 | 107 | } |
105 | 108 | ) |
| 109 | + |
| 110 | + |
| 111 | +def collect_and_store_tree_sitter_symbols_and_strings(project, logger=None): |
| 112 | + """ |
| 113 | + Collect symbols from codebase files using tree-sitter and store |
| 114 | + them in the extra data field. |
| 115 | + """ |
| 116 | + project_files = project.codebaseresources.files() |
| 117 | + |
| 118 | + language_qs = Q() |
| 119 | + |
| 120 | + for language in symbols_tree_sitter.TS_LANGUAGE_WHEELS.keys(): |
| 121 | + language_qs |= Q(programming_language__iexact=language) |
| 122 | + |
| 123 | + resources = project_files.filter( |
| 124 | + is_binary=False, |
| 125 | + is_archive=False, |
| 126 | + is_media=False, |
| 127 | + ).filter(language_qs) |
| 128 | + |
| 129 | + resources_count = resources.count() |
| 130 | + |
| 131 | + resource_iterator = resources.iterator(chunk_size=2000) |
| 132 | + progress = LoopProgress(resources_count, logger) |
| 133 | + |
| 134 | + for resource in progress.iter(resource_iterator): |
| 135 | + _collect_and_store_tree_sitter_symbols_and_strings(resource) |
| 136 | + |
| 137 | + |
| 138 | +def _collect_and_store_tree_sitter_symbols_and_strings(resource): |
| 139 | + """ |
| 140 | + Collect symbols ans string from a resource using tree-sitter and store |
| 141 | + them in the extra data field. |
| 142 | + """ |
| 143 | + result = symbols_tree_sitter.get_treesitter_symbols(resource.location) |
| 144 | + resource.update_extra_data( |
| 145 | + { |
| 146 | + "source_symbols": result.get("source_symbols"), |
| 147 | + "source_strings": result.get("source_strings"), |
| 148 | + } |
| 149 | + ) |
0 commit comments