From c57453572dbb98b374912de1223f18ee2f997682 Mon Sep 17 00:00:00 2001 From: Johannes Rudolph Date: Tue, 3 Sep 2024 08:33:00 +0200 Subject: [PATCH 1/8] Copy Files From External Storage To Local Storage for Import --- src/Snapshot.php | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/src/Snapshot.php b/src/Snapshot.php index dcde77b..e247f4c 100644 --- a/src/Snapshot.php +++ b/src/Snapshot.php @@ -5,11 +5,13 @@ use Carbon\Carbon; use Illuminate\Filesystem\FilesystemAdapter as Disk; use Illuminate\Support\Facades\DB; +use Illuminate\Support\Facades\Storage; use Illuminate\Support\LazyCollection; use Spatie\DbSnapshots\Events\DeletedSnapshot; use Spatie\DbSnapshots\Events\DeletingSnapshot; use Spatie\DbSnapshots\Events\LoadedSnapshot; use Spatie\DbSnapshots\Events\LoadingSnapshot; +use Spatie\TemporaryDirectory\TemporaryDirectory; class Snapshot { @@ -90,10 +92,22 @@ protected function shouldIgnoreLine(string $line): bool protected function loadStream(string $connectionName = null) { + $directory = (new TemporaryDirectory(config('db-snapshots.temporary_directory_path')))->create(); + + config([ + 'filesystems.disks.' . self::class => [ + 'driver' => 'local', + 'root' => $directory->path(), + 'throw' => false, + ] + ]); + LazyCollection::make(function () { + Storage::disk(self::class)->writeStream($this->fileName, $this->disk->readStream($this->fileName)); + $stream = $this->compressionExtension === 'gz' - ? gzopen($this->disk->path($this->fileName), 'r') - : $this->disk->readStream($this->fileName); + ? gzopen(Storage::disk(self::class)->path($this->fileName), 'r') + : Storage::disk(self::class)->readStream($this->fileName); $statement = ''; while (! feof($stream)) { @@ -128,6 +142,8 @@ protected function loadStream(string $connectionName = null) } })->each(function (string $statement) use ($connectionName) { DB::connection($connectionName)->unprepared($statement); + })->after(function () use ($directory) { + $directory->delete(); }); } From 4ca38f1998e7d573d2f0d62e1df0ee1cf6a28160 Mon Sep 17 00:00:00 2001 From: Johannes Rudolph Date: Sat, 30 Nov 2024 20:58:29 +0100 Subject: [PATCH 2/8] Fix Errors during Testing --- src/Commands/Create.php | 2 +- src/Events/CreatingSnapshot.php | 3 ++- src/SnapshotFactory.php | 7 ++++--- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/Commands/Create.php b/src/Commands/Create.php index d683606..d0690aa 100644 --- a/src/Commands/Create.php +++ b/src/Commands/Create.php @@ -36,7 +36,7 @@ public function handle() } $extraOptions = $this->option('extraOptions') ?: config('db-snapshots.extraOptions', []); - $extraOptions = is_string($extraOptions) ? explode(',', $exclude) : $exclude; + $extraOptions = is_string($extraOptions) ? explode(',', $extraOptions) : $extraOptions; $snapshot = app(SnapshotFactory::class)->create( diff --git a/src/Events/CreatingSnapshot.php b/src/Events/CreatingSnapshot.php index 35be24c..4e7059a 100644 --- a/src/Events/CreatingSnapshot.php +++ b/src/Events/CreatingSnapshot.php @@ -11,7 +11,8 @@ public function __construct( public FilesystemAdapter $disk, public string $connectionName, public ?array $tables = null, - public ?array $exclude = null + public ?array $exclude = null, + public ?array $extraOptions = null ) { // } diff --git a/src/SnapshotFactory.php b/src/SnapshotFactory.php index c3270e0..3946de1 100644 --- a/src/SnapshotFactory.php +++ b/src/SnapshotFactory.php @@ -36,10 +36,11 @@ public function create(string $snapshotName, string $diskName, string $connectio $disk, $connectionName, $tables, - $exclude + $exclude, + $extraOptions )); - $this->createDump($connectionName, $fileName, $disk, $compress, $tables, $exclude); + $this->createDump($connectionName, $fileName, $disk, $compress, $tables, $exclude, $extraOptions); $snapshot = new Snapshot($disk, $fileName); @@ -64,7 +65,7 @@ protected function getDbDumper(string $connectionName): DbDumper return $factory::createForConnection($connectionName); } - protected function createDump(string $connectionName, string $fileName, FilesystemAdapter $disk, bool $compress = false, ?array $tables = null, ?array $exclude = null): void + protected function createDump(string $connectionName, string $fileName, FilesystemAdapter $disk, bool $compress = false, ?array $tables = null, ?array $exclude = null, array $extraOptions = []): void { $directory = (new TemporaryDirectory(config('db-snapshots.temporary_directory_path')))->create(); From 685f1ca6e055097821ba20e4b71e5c12083f95aa Mon Sep 17 00:00:00 2001 From: Johannes Rudolph Date: Thu, 13 Feb 2025 10:18:54 +0100 Subject: [PATCH 3/8] Update Snapshot.php --- src/Snapshot.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Snapshot.php b/src/Snapshot.php index e247f4c..502a95a 100644 --- a/src/Snapshot.php +++ b/src/Snapshot.php @@ -112,7 +112,7 @@ protected function loadStream(string $connectionName = null) $statement = ''; while (! feof($stream)) { $chunk = $this->compressionExtension === 'gz' - ? gzread($stream, self::STREAM_BUFFER_SIZE) + ? gzgets($stream, self::STREAM_BUFFER_SIZE) : fread($stream, self::STREAM_BUFFER_SIZE); $lines = explode("\n", $chunk); From c347488b66b577e26e1a4c9c77b59d7b4189237e Mon Sep 17 00:00:00 2001 From: Johannes Rudolph Date: Thu, 30 Oct 2025 09:56:15 +0100 Subject: [PATCH 4/8] Refactor method signatures and improve line checks --- src/Snapshot.php | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/src/Snapshot.php b/src/Snapshot.php index 91d39cd..2a024e8 100644 --- a/src/Snapshot.php +++ b/src/Snapshot.php @@ -43,14 +43,14 @@ public function __construct(Disk $disk, string $fileName) $this->name = pathinfo($fileName, PATHINFO_FILENAME); } - public function useStream(): self + public function useStream() { $this->useStream = true; return $this; } - public function load(?string $connectionName = null, bool $dropTables = true): void + public function load(string $connectionName = null, bool $dropTables = true): void { event(new LoadingSnapshot($this)); @@ -67,7 +67,7 @@ public function load(?string $connectionName = null, bool $dropTables = true): v event(new LoadedSnapshot($this)); } - protected function loadAsync(?string $connectionName = null): void + protected function loadAsync(string $connectionName = null) { $dbDumpContents = $this->disk->get($this->fileName); @@ -80,17 +80,27 @@ protected function loadAsync(?string $connectionName = null): void protected function isASqlComment(string $line): bool { - return str_starts_with($line, '--'); + return substr($line, 0, 2) === '--'; } protected function shouldIgnoreLine(string $line): bool { $line = trim($line); - return empty($line) || $this->isASqlComment($line); + // Ignore empty lines, SQL comments, and psql meta-commands (e.g. \\connect, \\., etc.) + if ($line === '' || $this->isASqlComment($line)) { + return true; + } + + // Skip psql meta commands and COPY terminator from pg_dump-like files + if (str_starts_with($line, '\\')) { + return true; + } + + return false; } - protected function loadStream(?string $connectionName = null): void + protected function loadStream(string $connectionName = null) { $directory = (new TemporaryDirectory(config('db-snapshots.temporary_directory_path')))->create(); @@ -112,7 +122,7 @@ protected function loadStream(?string $connectionName = null): void $statement = ''; while (! feof($stream)) { $chunk = $this->compressionExtension === 'gz' - ? gzgets($stream, self::STREAM_BUFFER_SIZE) + ? gzread($stream, self::STREAM_BUFFER_SIZE) : fread($stream, self::STREAM_BUFFER_SIZE); $lines = explode("\n", $chunk); @@ -130,14 +140,14 @@ protected function loadStream(?string $connectionName = null): void break; } - if (str_ends_with(trim($statement), ';')) { + if (substr(trim($statement), -1, 1) === ';') { yield $statement; $statement = ''; } } } - if (str_ends_with(trim($statement), ';')) { + if (substr(trim($statement), -1, 1) === ';') { yield $statement; } })->each(function (string $statement) use ($connectionName) { @@ -166,7 +176,7 @@ public function createdAt(): Carbon return Carbon::createFromTimestamp($this->disk->lastModified($this->fileName)); } - protected function dropAllCurrentTables(): void + protected function dropAllCurrentTables() { DB::connection(DB::getDefaultConnection()) ->getSchemaBuilder() From acd2baecb4e5e7e847e2223f0e518c16dc4e6cb8 Mon Sep 17 00:00:00 2001 From: Johannes Rudolph Date: Thu, 30 Oct 2025 12:05:41 +0100 Subject: [PATCH 5/8] Refactor after method to use tap for directory deletion --- src/Snapshot.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Snapshot.php b/src/Snapshot.php index 2a024e8..66a8565 100644 --- a/src/Snapshot.php +++ b/src/Snapshot.php @@ -152,8 +152,8 @@ protected function loadStream(string $connectionName = null) } })->each(function (string $statement) use ($connectionName) { DB::connection($connectionName)->unprepared($statement); - })->after(function () use ($directory) { - $directory->delete(); + })->tap(function () use ($directory) { + $directory->delete(); }); } From 1c31f3421fb586f000d7899b1e3fdac92f4d7f2d Mon Sep 17 00:00:00 2001 From: Johannes Rudolph Date: Thu, 30 Oct 2025 15:01:03 +0100 Subject: [PATCH 6/8] Fix Left --- src/Snapshot.php | 45 +++++++++++++++++++++++++++++++++++---------- 1 file changed, 35 insertions(+), 10 deletions(-) diff --git a/src/Snapshot.php b/src/Snapshot.php index 66a8565..de85ada 100644 --- a/src/Snapshot.php +++ b/src/Snapshot.php @@ -97,6 +97,12 @@ protected function shouldIgnoreLine(string $line): bool return true; } + // Some dump tools include non-SQL metadata lines like: + // These are not valid SQL statements and must be skipped. + if (str_contains($line, '; Type:') && str_contains($line, 'Schema:')) { + return true; + } + return false; } @@ -120,26 +126,38 @@ protected function loadStream(string $connectionName = null) : Storage::disk(self::class)->readStream($this->fileName); $statement = ''; + $leftover = ''; + while (! feof($stream)) { $chunk = $this->compressionExtension === 'gz' - ? gzread($stream, self::STREAM_BUFFER_SIZE) - : fread($stream, self::STREAM_BUFFER_SIZE); + ? gzread($stream, self::STREAM_BUFFER_SIZE) + : fread($stream, self::STREAM_BUFFER_SIZE); + + if ($chunk === false || $chunk === '') { + continue; + } + + // Prepend any leftover from previous chunk to ensure lines are complete + $chunk = $leftover . $chunk; + $leftover = ''; $lines = explode("\n", $chunk); - foreach ($lines as $idx => $line) { + + // If the chunk didn't end with a newline, the last element is a partial line. + // Save it for the next iteration so that we don't accidentally treat a mid-line + // piece (like the tail of a comment) as a new statement. + if (substr($chunk, -1) !== "\n") { + $leftover = array_pop($lines); + } + + foreach ($lines as $line) { + // Now that we reconstructed full lines, we can correctly ignore comments/meta if ($this->shouldIgnoreLine($line)) { continue; } $statement .= $line; - // Carry-over the last line to the next chunk since it - // is possible that this chunk finished mid-line right on - // a semi-colon. - if (count($lines) == $idx + 1) { - break; - } - if (substr(trim($statement), -1, 1) === ';') { yield $statement; $statement = ''; @@ -147,6 +165,13 @@ protected function loadStream(string $connectionName = null) } } + // Process any leftover line after EOF + if ($leftover !== '') { + if (! $this->shouldIgnoreLine($leftover)) { + $statement .= $leftover; + } + } + if (substr(trim($statement), -1, 1) === ';') { yield $statement; } From 9ef8e911892ef64e8c4736b8667dc0397a6d74f1 Mon Sep 17 00:00:00 2001 From: Johannes Rudolph Date: Sun, 9 Nov 2025 09:48:32 +0100 Subject: [PATCH 7/8] Enhance SQL streaming parser for better line handling Refactor SQL streaming parser to handle various line states and comments more effectively. --- src/Snapshot.php | 245 ++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 221 insertions(+), 24 deletions(-) diff --git a/src/Snapshot.php b/src/Snapshot.php index de85ada..fccd3d3 100644 --- a/src/Snapshot.php +++ b/src/Snapshot.php @@ -125,8 +125,55 @@ protected function loadStream(string $connectionName = null) ? gzopen(Storage::disk(self::class)->path($this->fileName), 'r') : Storage::disk(self::class)->readStream($this->fileName); + // Stateful, PostgreSQL-aware streaming parser $statement = ''; $leftover = ''; + $lineBuffer = ''; + $atLineStart = true; + $skipLine = false; // for psql meta-commands starting with '\\' + + $inSingle = false; // inside '...' + $inDollarTag = null; // holds the full $tag$ delimiter when inside dollar-quoted string + $inBlockComment = false;// inside /* ... */ + $inLineComment = false; // inside -- ... until \n + $inCopy = false; // inside COPY ... FROM stdin data section + $copyLineBuffer = ''; + + $flushLineIfNotIgnored = function () use (&$lineBuffer, &$statement) { + $line = $lineBuffer; + $lineBuffer = ''; + // Decide whether to ignore this line (comments/meta). We only evaluate in neutral state. + if ($this->shouldIgnoreLine($line)) { + return; // drop + } + $statement .= $line; + }; + + $yieldIfTerminated = function () use (&$statement, &$lineBuffer, &$inCopy) { + // Append any remaining buffered line (if not ignored); caller must ensure neutral state + // Evaluate ignore after full line only if a newline was already encountered. + // If semicolon occurs mid-line, we can't defer the decision to EOL. Apply a quick metadata guard. + $trimmedLine = trim($lineBuffer); + if ($trimmedLine !== '' && str_contains($trimmedLine, '; Type:') && str_contains($trimmedLine, 'Schema:')) { + // This is a pg_dump metadata line; drop it entirely and do not terminate. + $lineBuffer = ''; + return false; + } + $statement .= $lineBuffer; + $lineBuffer = ''; + $sql = trim($statement); + if ($sql === '') { + $statement = ''; + return false; + } + // Detect COPY ... FROM stdin; header and enter copy mode. We do NOT yield this to DB + if (preg_match('/^copy\s+.+\s+from\s+stdin;\s*$/is', $sql)) { + $inCopy = true; + $statement = ''; + return false; + } + return $sql; // return the SQL to be yielded by caller + }; while (! feof($stream)) { $chunk = $this->compressionExtension === 'gz' @@ -137,43 +184,193 @@ protected function loadStream(string $connectionName = null) continue; } - // Prepend any leftover from previous chunk to ensure lines are complete - $chunk = $leftover . $chunk; + $data = $leftover . $chunk; $leftover = ''; + $len = strlen($data); + + for ($i = 0; $i < $len; $i++) { + $ch = $data[$i]; + $next = ($i + 1 < $len) ? $data[$i + 1] : null; + + // COPY data mode: consume lines verbatim until a line with "\\." terminator + if ($inCopy) { + $copyLineBuffer .= $ch; + if ($ch === "\n") { + $line = rtrim($copyLineBuffer, "\r\n"); + $copyLineBuffer = ''; + if ($line === '\\.') { + // End of COPY data. Return to neutral state. + $inCopy = false; + $atLineStart = true; + } else { + // Stay in COPY mode; ignore data lines. + $atLineStart = true; + } + } + continue; + } - $lines = explode("\n", $chunk); + // Handle pending line-comment + if ($inLineComment) { + if ($ch === "\n") { + $inLineComment = false; + $atLineStart = true; + $lineBuffer .= "\n"; // preserve newline to keep statement spacing stable + // End of line: commit or drop buffered line + $flushLineIfNotIgnored(); + } + continue; + } - // If the chunk didn't end with a newline, the last element is a partial line. - // Save it for the next iteration so that we don't accidentally treat a mid-line - // piece (like the tail of a comment) as a new statement. - if (substr($chunk, -1) !== "\n") { - $leftover = array_pop($lines); - } + // Handle block comment + if ($inBlockComment) { + if ($ch === '*' && $next === '/') { + $inBlockComment = false; + $i++; // consume '/' + } + if ($ch === "\n") { + $atLineStart = true; + } + continue; + } - foreach ($lines as $line) { - // Now that we reconstructed full lines, we can correctly ignore comments/meta - if ($this->shouldIgnoreLine($line)) { + // Handle inside single-quoted string + if ($inSingle) { + $statement .= $ch; + if ($ch === "'" && $next === "'") { + // escaped quote + $statement .= $next; + $i++; + } elseif ($ch === "'") { + $inSingle = false; + } + if ($ch === "\n") { + $atLineStart = true; + } else { + $atLineStart = false; + } continue; } - $statement .= $line; + // Handle inside dollar-quoted string + if ($inDollarTag !== null) { + // Lookahead for closing tag + $tagLen = strlen($inDollarTag); + if ($ch === '$' && $tagLen > 0) { + if ($i + $tagLen <= $len && substr($data, $i, $tagLen) === $inDollarTag) { + $statement .= $inDollarTag; + $i += $tagLen - 1; + $inDollarTag = null; + $atLineStart = false; + continue; + } + } + // otherwise just append + $statement .= $ch; + if ($ch === "\n") { + $atLineStart = true; + } else { + $atLineStart = false; + } + continue; + } - if (substr(trim($statement), -1, 1) === ';') { - yield $statement; - $statement = ''; + // Neutral state (not in string/comment) + // Start of psql meta-command line (e.g., "\\connect", "\\.") → skip entire line + if ($atLineStart && $ch === '\\') { + $skipLine = true; + } + if ($skipLine) { + if ($ch === "\n") { + $skipLine = false; + $atLineStart = true; + $lineBuffer = ''; + } + continue; } - } - } - // Process any leftover line after EOF - if ($leftover !== '') { - if (! $this->shouldIgnoreLine($leftover)) { - $statement .= $leftover; + // Detect start of line comment + if ($ch === '-' && $next === '-') { + $inLineComment = true; + $i++; // consume second '-' + continue; + } + + // Detect start of block comment + if ($ch === '/' && $next === '*') { + $inBlockComment = true; + $i++; // consume '*' + continue; + } + + // Detect start of single-quoted string + if ($ch === "'") { + $inSingle = true; + $statement .= $ch; + $atLineStart = false; + continue; + } + + // Detect start of dollar-quoted string: $tag$ + if ($ch === '$') { + // find next '$' + $j = $i + 1; + while ($j < $len && $data[$j] !== '$' && preg_match('/[A-Za-z0-9_]/', $data[$j])) { + $j++; + } + if ($j < $len && $data[$j] === '$') { + $tag = substr($data, $i, $j - $i + 1); // includes both '$' + // validate all chars between are [A-Za-z0-9_]* + $between = substr($tag, 1, -1); + if ($between === '' || preg_match('/^[A-Za-z0-9_]+$/', $between)) { + $inDollarTag = $tag; + $statement .= $tag; + $i = $j; + $atLineStart = false; + continue; + } + } + // fallthrough: it's just a '$' char + } + + // Normal character in neutral state + if ($ch === ';') { + // Potential statement terminator + $lineBuffer .= $ch; + $sql = $yieldIfTerminated(); + if ($sql !== false) { + yield $sql; + $statement = ''; + } + $atLineStart = false; + continue; + } + + // Regular char accumulation into current logical line + $lineBuffer .= $ch; + if ($ch === "\n") { + // End of physical line: decide to keep or drop it + $atLineStart = true; + $flushLineIfNotIgnored(); + } else { + $atLineStart = false; + } } + + // Preserve any partial multibyte or token between chunks + // We simply carry over the tail which may cut a token; to be safe carry last few bytes + // However, here we can't easily know token boundaries, so just keep nothing special. + // We'll use $leftover only for incomplete dollar-tag lookahead or similar by setting it explicitly. + // Not needed now. } - if (substr(trim($statement), -1, 1) === ';') { - yield $statement; + // EOF: flush any remaining buffered content safely + if ($lineBuffer !== '') { + $flushLineIfNotIgnored(); + } + $final = trim($statement); + if ($final !== '' && substr($final, -1) === ';') { + yield $final; } })->each(function (string $statement) use ($connectionName) { DB::connection($connectionName)->unprepared($statement); From 15cf5b1699c19283539041b25fb92c63094864f8 Mon Sep 17 00:00:00 2001 From: Johannes Rudolph Date: Sun, 9 Nov 2025 10:13:13 +0100 Subject: [PATCH 8/8] Enhance SQL statement handling for quoted literals Add logic to drop standalone quoted literals in SQL statements. --- src/Snapshot.php | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/Snapshot.php b/src/Snapshot.php index fccd3d3..4125f7b 100644 --- a/src/Snapshot.php +++ b/src/Snapshot.php @@ -352,6 +352,16 @@ protected function loadStream(string $connectionName = null) // End of physical line: decide to keep or drop it $atLineStart = true; $flushLineIfNotIgnored(); + + // If the current accumulated statement is just a standalone quoted literal + // (e.g. a marker line like 'snapshot4'), drop it to avoid concatenation with + // the next real SQL statement. + $trimStmt = trim($statement); + if ($trimStmt !== '' + && !str_contains($trimStmt, ';') + && (preg_match("/^'(?:[^']|'')*'$/", $trimStmt) || preg_match('/^"[^"]*"$/', $trimStmt))) { + $statement = ''; + } } else { $atLineStart = false; }