44#include < filesystem>
55
66#include < dlfcn.h>
7+ #include < unistd.h>
78#include < mutex>
89#include < optional>
910#include < cassert>
11+ #include < iostream>
1012
11- #include " swift/extractor/infra/file/FileHash.h"
12- #include " swift/extractor/infra/file/FileHash.h"
13+ #include < picosha2.h>
14+
15+ #include " swift/extractor/infra/file/PathHash.h"
16+ #include " swift/extractor/infra/file/Path.h"
1317
1418#ifdef __APPLE__
1519// path is hardcoded as otherwise redirection could break when setting DYLD_FALLBACK_LIBRARY_PATH
@@ -64,6 +68,28 @@ bool mayBeRedirected(const char* path, int flags = O_RDONLY) {
6468 endsWith (path, " .swiftmodule" ));
6569}
6670
71+ std::optional<std::string> hashFile (const fs::path& path) {
72+ auto fd = original::open (path.c_str (), O_RDONLY | O_CLOEXEC);
73+ if (fd < 0 ) {
74+ auto ec = std::make_error_code (static_cast <std::errc>(errno));
75+ std::cerr << " unable to open " << path << " for reading (" << ec.message () << " )\n " ;
76+ return std::nullopt ;
77+ }
78+ auto hasher = picosha2::hash256_one_by_one ();
79+ constexpr size_t bufferSize = 16 * 1024 ;
80+ char buffer[bufferSize];
81+ ssize_t bytesRead = 0 ;
82+ while ((bytesRead = ::read (fd, buffer, bufferSize)) > 0 ) {
83+ hasher.process (buffer, buffer + bytesRead);
84+ }
85+ ::close (fd);
86+ if (bytesRead < 0 ) {
87+ return std::nullopt ;
88+ }
89+ hasher.finish ();
90+ return get_hash_hex_string (hasher);
91+ }
92+
6793} // namespace
6894
6995namespace codeql {
@@ -72,22 +98,15 @@ class FileInterceptor {
7298 public:
7399 FileInterceptor (fs::path&& workingDir) : workingDir{std::move (workingDir)} {
74100 fs::create_directories (hashesPath ());
75- fs::create_directories (storePath ());
76101 }
77102
78103 int open (const char * path, int flags, mode_t mode = 0 ) const {
79104 fs::path fsPath{path};
80105 assert ((flags & O_ACCMODE) == O_RDONLY);
106+ // try to use the hash map first
81107 errno = 0 ;
82- // first, try the same path underneath the artifact store
83- if (auto ret = original::open (redirectedPath (path).c_str (), flags);
84- ret >= 0 || errno != ENOENT) {
85- return ret;
86- }
87- errno = 0 ;
88- // then try to use the hash map
89108 if (auto hashed = hashPath (path)) {
90- if (auto ret = original::open (hashed->c_str (), flags); ret >= 0 || errno != ENOENT) {
109+ if (auto ret = original::open (hashed->c_str (), flags); errno != ENOENT) {
91110 return ret;
92111 }
93112 }
@@ -96,17 +115,18 @@ class FileInterceptor {
96115
97116 fs::path redirect (const fs::path& target) const {
98117 assert (mayBeRedirected (target.c_str ()));
99- auto ret = redirectedPath (target);
100- fs::create_directories (ret .parent_path ());
118+ auto redirected = redirectedPath (target);
119+ fs::create_directories (redirected .parent_path ());
101120 if (auto hashed = hashPath (target)) {
102121 std::error_code ec;
103- fs::create_symlink (ret, *hashed, ec);
122+ fs::create_symlink (*hashed, redirected , ec);
104123 if (ec) {
105- std::cerr << " Cannot remap file " << ret << " -> " << *hashed << " : " << ec.message ()
124+ std::cerr << " Cannot remap file " << *hashed << " -> " << redirected << " : " << ec.message ()
106125 << " \n " ;
107126 }
127+ return *hashed;
108128 }
109- return ret ;
129+ return redirected ;
110130 }
111131
112132 private:
@@ -119,17 +139,27 @@ class FileInterceptor {
119139 }
120140
121141 std::optional<fs::path> hashPath (const fs::path& target) const {
122- if (auto fd = original::open (target. c_str (), O_RDONLY | O_CLOEXEC); fd >= 0 ) {
123- return hashesPath () / hashFile (fd) ;
142+ if (auto hashed = getHashOfRealFile (target) ) {
143+ return hashesPath () / *hashed ;
124144 }
125145 return std::nullopt ;
126146 }
127147
128148 fs::path workingDir;
129149};
130150
131- int openReal (const fs::path& path) {
132- return original::open (path.c_str (), O_RDONLY | O_CLOEXEC);
151+ std::optional<std::string> getHashOfRealFile (const fs::path& path) {
152+ static std::unordered_map<fs::path, std::string> cache;
153+ auto resolved = resolvePath (path);
154+ if (auto found = cache.find (resolved); found != cache.end ()) {
155+ return found->second ;
156+ }
157+
158+ if (auto hashed = hashFile (resolved)) {
159+ cache.emplace (resolved, *hashed);
160+ return hashed;
161+ }
162+ return std::nullopt ;
133163}
134164
135165fs::path redirect (const fs::path& target) {
@@ -140,8 +170,9 @@ fs::path redirect(const fs::path& target) {
140170 }
141171}
142172
143- std::shared_ptr<FileInterceptor> setupFileInterception (fs::path workginDir) {
144- auto ret = std::make_shared<FileInterceptor>(std::move (workginDir));
173+ std::shared_ptr<FileInterceptor> setupFileInterception (
174+ const SwiftExtractorConfiguration& configuration) {
175+ auto ret = std::make_shared<FileInterceptor>(configuration.getTempArtifactDir ());
145176 fileInterceptorInstance () = ret;
146177 return ret;
147178}
0 commit comments