Skip to content

Commit ff1d40a

Browse files
authored
add web_vfs (#8)
1 parent 06742c5 commit ff1d40a

File tree

12 files changed

+511
-684
lines changed

12 files changed

+511
-684
lines changed

.github/workflows/tests.yml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,11 @@ jobs:
2323
run: |
2424
brew install python sqlite zstd samtools aria2
2525
/usr/local/bin/pip3 install pytest pytest-xdist
26+
# test web_vfs using the older system libcurl
27+
brew uninstall --ignore-dependencies curl
28+
curl --version
29+
which curl
30+
otool -L $(which curl)
2631
- name: build
2732
run: |
2833
CXXFLAGS="-I$(brew --prefix)/include -I$(brew --prefix)/opt/sqlite/include" \

CMakeLists.txt

Lines changed: 28 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -15,23 +15,36 @@ if(${CMAKE_VERSION} VERSION_LESS 3.14)
1515
endmacro()
1616
endif()
1717

18+
set(SQLITECPP_IN_EXTENSION ON CACHE INTERNAL "")
1819
set(SQLITECPP_INTERNAL_SQLITE OFF CACHE INTERNAL "")
1920
set(SQLITE_ENABLE_COLUMN_METADATA OFF CACHE INTERNAL "")
2021
FetchContent_Declare(
2122
sqlitecpp
22-
GIT_REPOSITORY https://github.com/SRombauts/SQLiteCpp.git
23-
GIT_TAG 3.1.1
23+
GIT_REPOSITORY https://github.com/mlin/SQLiteCpp.git
24+
GIT_TAG 49a568f15d025ddcd61d54c437a4c5ef773d2389
2425
)
2526
FetchContent_MakeAvailable(sqlitecpp)
2627
include_directories(${sqlitecpp_SOURCE_DIR}/include)
2728

2829
FetchContent_Declare(
29-
concurrentqueue
30-
GIT_REPOSITORY https://github.com/cameron314/readerwriterqueue.git
31-
GIT_TAG v1.0.3
30+
sqlite_web_vfs
31+
GIT_REPOSITORY https://github.com/mlin/sqlite_web_vfs.git
32+
GIT_TAG 818e4c2
3233
)
34+
FetchContent_MakeAvailable(sqlite_web_vfs)
3335
FetchContent_MakeAvailable(concurrentqueue)
3436
include_directories(${concurrentqueue_SOURCE_DIR})
37+
include_directories(${sqlite_web_vfs_SOURCE_DIR}/src)
38+
if(${CMAKE_SYSTEM_NAME} MATCHES "Linux")
39+
# Don't dlopen() libcurl until first attempt to use web_vfs -- avoids loading its large tree of
40+
# shared library dependencies in most cases. Enabled only for Linux because it seemed to cause
41+
# libcurl routines to intermittently segfault in the macOS Python tests, which we haven't had
42+
# time to track down yet.
43+
add_definitions(-DHTTP_LAZYCURL)
44+
set(LINK_LIBCURL "")
45+
else()
46+
set(LINK_LIBCURL curl)
47+
ENDIF()
3548

3649
project(sqlite_nested_vfs VERSION 1.0
3750
DESCRIPTION "SQLite VFS extension storing database pages in...a SQLite database"
@@ -41,15 +54,15 @@ set(CMAKE_CXX_STANDARD 11)
4154
set(CMAKE_CXX_STANDARD_REQUIRED ON)
4255
set(CMAKE_CXX_EXTENSIONS OFF)
4356

44-
add_library(nested_vfs SHARED src/nested_vfs.cc src/SQLiteNestedVFS.h src/SQLiteVFS.h src/ThreadPool.h)
45-
SET_TARGET_PROPERTIES(nested_vfs PROPERTIES PREFIX "")
46-
target_link_libraries(nested_vfs PRIVATE SQLiteCpp)
57+
if(CMAKE_PROJECT_NAME STREQUAL PROJECT_NAME)
58+
add_library(nested_vfs SHARED src/nested_vfs.cc src/SQLiteNestedVFS.h)
59+
SET_TARGET_PROPERTIES(nested_vfs PROPERTIES PREFIX "")
60+
target_link_libraries(nested_vfs PRIVATE SQLiteCpp ${LINK_LIBCURL})
4761

48-
add_library(zstd_vfs SHARED src/zstd_vfs.cc src/SQLiteNestedVFS.h src/SQLiteVFS.h src/zstd_vfs.h src/ThreadPool.h)
49-
SET_TARGET_PROPERTIES(zstd_vfs PROPERTIES PREFIX "")
50-
target_link_libraries(zstd_vfs PRIVATE SQLiteCpp zstd)
62+
add_library(zstd_vfs SHARED src/zstd_vfs.cc src/SQLiteNestedVFS.h src/zstd_vfs.h)
63+
SET_TARGET_PROPERTIES(zstd_vfs PROPERTIES PREFIX "")
64+
target_link_libraries(zstd_vfs PRIVATE SQLiteCpp zstd ${LINK_LIBCURL})
5165

52-
if(CMAKE_PROJECT_NAME STREQUAL PROJECT_NAME)
5366
FetchContent_Declare(
5467
catch
5568
GIT_REPOSITORY https://github.com/catchorg/Catch2.git
@@ -58,11 +71,11 @@ if(CMAKE_PROJECT_NAME STREQUAL PROJECT_NAME)
5871
FetchContent_MakeAvailable(catch)
5972
include_directories(${catch_SOURCE_DIR}/single_include)
6073

61-
add_executable(test_exe test/test.cc test/test_vfstrace.c src/SQLiteNestedVFS.h src/SQLiteVFS.h src/ThreadPool.h)
62-
target_link_libraries(test_exe PRIVATE SQLiteCpp)
74+
add_executable(test_exe test/test.cc test/test_vfstrace.c src/SQLiteNestedVFS.h)
75+
target_link_libraries(test_exe PRIVATE SQLiteCpp sqlite3)
6376

6477
include(CTest)
6578
enable_testing()
6679
add_test(NAME test_exe COMMAND ./test_exe -d yes)
67-
add_test(NAME pytest COMMAND python3 -m pytest -sv ${CMAKE_CURRENT_SOURCE_DIR}/test/test.py)
80+
add_test(NAME pytest COMMAND env SQLITE_WEB_LOG=99 python3 -m pytest -sv ${CMAKE_CURRENT_SOURCE_DIR}/test/test.py)
6881
endif()

Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ RUN apt-get -qq update && \
88
apt-get -qq install -y --no-install-recommends --no-install-suggests \
99
ca-certificates curl wget git-core \
1010
build-essential cmake valgrind clang-format cppcheck \
11-
sqlite3 libsqlite3-dev libzstd-dev \
11+
sqlite3 libsqlite3-dev libzstd-dev libcurl4-openssl-dev \
1212
python3-pytest pylint black aria2 zstd samtools
1313

1414
ADD . /work

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,8 @@ sqlite3 :memory: -bail \
8585

8686
Repeat the query to see the update.
8787

88+
The extension also *includes* [web_vfs](https://github.com/mlin/sqlite_web_vfs). A compressed database can be read from a HTTP(S) URL by opening the URI `file:/__web__?vfs=zstd&mode=ro&immutable=1&web_url={{PERCENT_ENCODED_URL}}`.
89+
8890
## Limitations
8991

9092
* Linux x86-64 oriented; help wanted for other targets.

src/SQLiteNestedVFS.h

Lines changed: 56 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -949,7 +949,7 @@ class InnerDatabaseFile : public SQLiteVFS::File {
949949
public:
950950
InnerDatabaseFile(std::unique_ptr<SQLite::Database> &&outer_db,
951951
const std::string &inner_db_tablename_prefix, bool read_only, size_t threads,
952-
bool noprefetch)
952+
bool noprefetch, bool web)
953953
: outer_db_(std::move(outer_db)),
954954
inner_db_pages_table_(inner_db_tablename_prefix + "pages"), read_only_(read_only),
955955
// MAX(pageno) instead of COUNT(pageno) because the latter would trigger table scan
@@ -962,14 +962,34 @@ class InnerDatabaseFile : public SQLiteVFS::File {
962962
assert(threads);
963963
fetch_jobs_.reserve(MAX_FETCH_CURSORS); // important! ensure fetch_jobs_.data() never moves
964964
methods_.iVersion = 1;
965-
assert(outer_db_->execAndGet("PRAGMA quick_check").getString() == "ok");
965+
assert(web || outer_db_->execAndGet("PRAGMA quick_check").getString() == "ok");
966966
}
967967
}; // namespace SQLiteNested
968968

969969
// issue when write performance is prioritized over transaction safety / possible corruption
970970
const char *UNSAFE_PRAGMAS =
971971
"PRAGMA journal_mode=OFF; PRAGMA synchronous=OFF; PRAGMA locking_mode=EXCLUSIVE";
972972

973+
// originally found: http://codepad.org/lCypTglt
974+
std::string urlencode(const std::string &s, bool keep_slash = false) {
975+
// RFC 3986 section 2.3 Unreserved Characters (January 2005)
976+
static const std::string unreserved =
977+
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_.~";
978+
979+
std::string escaped = "";
980+
for (size_t i = 0; i < s.length(); i++) {
981+
if (unreserved.find_first_of(s[i]) != std::string::npos || keep_slash && s[i] == '/') {
982+
escaped.push_back(s[i]);
983+
} else {
984+
escaped.append("%");
985+
char buf[3];
986+
sprintf(buf, "%.2X", s[i]);
987+
escaped.append(buf);
988+
}
989+
}
990+
return escaped;
991+
}
992+
973993
class VFS : public SQLiteVFS::Wrapper {
974994
protected:
975995
// subclass may override inner_db_tablename_prefix_ with something encoding-specific, to
@@ -1004,41 +1024,53 @@ class VFS : public SQLiteVFS::Wrapper {
10041024

10051025
virtual std::unique_ptr<SQLiteVFS::File>
10061026
NewInnerDatabaseFile(const char *zName, std::unique_ptr<SQLite::Database> &&outer_db,
1007-
bool read_only, size_t threads, bool noprefetch) {
1027+
bool read_only, size_t threads, bool noprefetch, bool web) {
10081028
return std::unique_ptr<SQLiteVFS::File>(new InnerDatabaseFile(
1009-
std::move(outer_db), inner_db_tablename_prefix_, read_only, threads, noprefetch));
1029+
std::move(outer_db), inner_db_tablename_prefix_, read_only, threads, noprefetch, web));
10101030
}
10111031

10121032
int Open(const char *zName, sqlite3_file *pFile, int flags, int *pOutFlags) override {
10131033
if (zName && zName[0]) {
10141034
std::string sName(zName);
10151035
if (flags & SQLITE_OPEN_MAIN_DB) {
10161036
// strip inner_db_filename_suffix_ to get filename of outer database
1017-
std::string outer_db_filename =
1018-
sName.size() > inner_db_filename_suffix_.size()
1019-
? sName.substr(0, sName.size() - inner_db_filename_suffix_.size())
1020-
: "";
1021-
if (outer_db_filename.empty() ||
1022-
sName.substr(outer_db_filename.size()) != inner_db_filename_suffix_) {
1023-
last_error_ = "inner database filename unexpectedly missing suffix " +
1024-
inner_db_filename_suffix_;
1025-
return SQLITE_CANTOPEN_FULLPATH;
1037+
std::string outer_db_filename = sName;
1038+
bool web = sName == "/__web__";
1039+
if (!web) {
1040+
if (sName.size() > inner_db_filename_suffix_.size()) {
1041+
outer_db_filename =
1042+
sName.substr(0, sName.size() - inner_db_filename_suffix_.size());
1043+
} else {
1044+
outer_db_filename.clear();
1045+
}
1046+
if (outer_db_filename.empty() ||
1047+
sName.substr(outer_db_filename.size()) != inner_db_filename_suffix_) {
1048+
last_error_ = "inner database filename unexpectedly missing suffix " +
1049+
inner_db_filename_suffix_;
1050+
return SQLITE_CANTOPEN_FULLPATH;
1051+
}
10261052
}
10271053

1028-
// TODO: URI-encode outer_db_filename
1029-
std::string outer_db_uri = "file:" + outer_db_filename;
1054+
std::string vfs = outer_vfs_;
1055+
std::string outer_db_uri = "file:" + urlencode(outer_db_filename, true);
10301056
bool unsafe = sqlite3_uri_boolean(zName, "outer_unsafe", 0);
1031-
if (unsafe) {
1057+
if (web) {
1058+
outer_db_uri += "?immutable=1&web_url=";
1059+
outer_db_uri += urlencode(sqlite3_uri_parameter(zName, "web_url"));
1060+
flags &= ~(SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE);
1061+
flags |= SQLITE_OPEN_READONLY;
1062+
vfs = "web";
1063+
} else if (unsafe) {
10321064
outer_db_uri += "?nolock=1&psow=1";
10331065
} else if (sqlite3_uri_boolean(zName, "immutable", 0)) {
10341066
outer_db_uri += "?immutable=1";
10351067
}
1068+
_DBG << outer_db_uri << _EOL;
10361069

10371070
try {
10381071
// open outer database
10391072
std::unique_ptr<SQLite::Database> outer_db(new SQLite::Database(
1040-
outer_db_uri, flags | SQLITE_OPEN_NOMUTEX | SQLITE_OPEN_URI, 0,
1041-
outer_vfs_));
1073+
outer_db_uri, flags | SQLITE_OPEN_NOMUTEX | SQLITE_OPEN_URI, 0, vfs));
10421074
// see comment in Lock() about possibe future relaxation of exclusive
10431075
// locking
10441076
outer_db->exec("PRAGMA locking_mode=EXCLUSIVE");
@@ -1082,7 +1114,7 @@ class VFS : public SQLiteVFS::Wrapper {
10821114

10831115
auto idbf = NewInnerDatabaseFile(zName, std::move(outer_db),
10841116
(flags & SQLITE_OPEN_READONLY),
1085-
(size_t)threads, noprefetch);
1117+
(size_t)threads, noprefetch, web);
10861118
idbf->InitHandle(pFile);
10871119
assert(pFile->pMethods);
10881120
idbf.release();
@@ -1118,6 +1150,10 @@ class VFS : public SQLiteVFS::Wrapper {
11181150
// filesystem, but xOpen() will recognize).
11191151
int FullPathname(const char *zName, int nPathOut, char *zPathOut) override {
11201152
std::string zName2(zName);
1153+
if (zName2 == "/__web__") {
1154+
strncpy(zPathOut, zName, nPathOut);
1155+
return SQLITE_OK;
1156+
}
11211157
if (!zName2.empty() && zName2[0] != '/') {
11221158
if (getcwd(zPathOut, nPathOut) && !strcmp(zPathOut, "/")) {
11231159
// evading bug in sqlite3 os_unix.c unixFullPathname, when given a relative path
@@ -1127,7 +1163,7 @@ class VFS : public SQLiteVFS::Wrapper {
11271163
}
11281164
int rc = SQLiteVFS::Wrapper::FullPathname(zName2.c_str(), nPathOut, zPathOut);
11291165
if (rc != SQLITE_OK && rc != SQLITE_OK_SYMLINK) {
1130-
_DBG << "FullPathNameE " << rc << " " << sqlite3_errstr(rc) << _EOL;
1166+
_DBG << "FullPathName " << rc << " " << sqlite3_errstr(rc) << _EOL;
11311167
return rc;
11321168
}
11331169
std::string outer_db_filename(zPathOut);

0 commit comments

Comments
 (0)