diff --git a/src/CodecZstd.jl b/src/CodecZstd.jl index dffbcc1..8965204 100644 --- a/src/CodecZstd.jl +++ b/src/CodecZstd.jl @@ -7,6 +7,17 @@ export ZstdDecompressor, ZstdDecompressorStream +if VERSION >= v"1.11.0-DEV.469" + eval(Meta.parse(""" + public + level_bounds, + windowLog_bounds, + windowLogMax_bounds, + DEFAULT_COMPRESSION_LEVEL, + ZSTD_WINDOWLOG_LIMIT_DEFAULT + """)) +end + import TranscodingStreams: TranscodingStreams, TranscodingStream, diff --git a/src/compression.jl b/src/compression.jl index 3f51cc6..6f2c84d 100644 --- a/src/compression.jl +++ b/src/compression.jl @@ -4,6 +4,7 @@ struct ZstdCompressor <: TranscodingStreams.Codec cstream::CStream level::Int + windowLog::Int32 endOp::LibZstd.ZSTD_EndDirective end @@ -11,13 +12,43 @@ function Base.show(io::IO, codec::ZstdCompressor) if codec.endOp == LibZstd.ZSTD_e_end print(io, "ZstdFrameCompressor(level=$(codec.level))") else - print(io, summary(codec), "(level=$(codec.level))") + print(io, summary(codec), "(") + print(io, "level=$(codec.level)") + if codec.windowLog != Int32(0) + print(io, ", windowLog=Int32($(codec.windowLog))") + end + print(io, ")") end end # Same as the zstd command line tool (v1.2.0). const DEFAULT_COMPRESSION_LEVEL = 3 +# This is technically part of the static api, but I don't see how this could be changed easily. +const ZSTD_WINDOWLOG_LIMIT_DEFAULT = Int32(27) + +""" + level_bounds() -> min::Int32, max::Int32 + +Return the minimum and maximum compression levels available. +""" +function level_bounds() + bounds = LibZstd.ZSTD_cParam_getBounds(LibZstd.ZSTD_c_compressionLevel) + @assert !iserror(bounds.error) + Int32(bounds.lowerBound), Int32(bounds.upperBound) +end + +""" + windowLog_bounds() -> min::Int32, max::Int32 + +Return the minimum and maximum windowLog available. +""" +function windowLog_bounds() + bounds = LibZstd.ZSTD_cParam_getBounds(LibZstd.ZSTD_c_windowLog) + @assert !iserror(bounds.error) + Int32(bounds.lowerBound), Int32(bounds.upperBound) +end + """ ZstdCompressor(;level=$(DEFAULT_COMPRESSION_LEVEL)) @@ -31,11 +62,36 @@ Arguments The library also offers negative compression levels, which extend the range of speed vs. ratio preferences. The lower the level, the faster the speed (at the cost of compression). - 0 is a special value for `ZSTD_defaultCLevel()`. - The level will be clamped to the range `ZSTD_minCLevel()` to `ZSTD_maxCLevel()`. + 0 is a special value for the default level of the c library. + The level will be clamped by `level_bounds()`. + +Advanced compression parameters. + +- `windowLog::Int32= Int32(0)`: Maximum allowed back-reference distance, expressed as power of 2. + + This will set a memory budget for streaming decompression, + with larger values requiring more memory + and typically compressing more. + Must be clamped between `windowLog_bounds()[1]` and `windowLog_bounds()[2]` inclusive. + Special: value 0 means "use default windowLog". + Note: Using a windowLog greater than $(ZSTD_WINDOWLOG_LIMIT_DEFAULT) + requires explicitly allowing such size at streaming decompression stage. """ -function ZstdCompressor(;level::Integer=DEFAULT_COMPRESSION_LEVEL) - ZstdCompressor(CStream(), clamp(level, LibZstd.ZSTD_minCLevel(), LibZstd.ZSTD_maxCLevel())) +function ZstdCompressor(; + level::Integer=DEFAULT_COMPRESSION_LEVEL, + windowLog::Int32=Int32(0), + ) + windowLog_range = (:)(windowLog_bounds()...) + if !iszero(windowLog) && windowLog ∉ windowLog_range + # Since this has to be matched on the decompression side, throw instead of clamping. + throw(ArgumentError("windowLog ∈ $(windowLog_range) must hold. Got\nwindowLog => $(windowLog)")) + end + ZstdCompressor( + CStream(), + clamp(level, level_bounds()...), + windowLog, + LibZstd.ZSTD_e_continue, + ) end ZstdCompressor(cstream, level) = ZstdCompressor(cstream, level, :continue) @@ -54,10 +110,15 @@ Arguments which extend the range of speed vs. ratio preferences. The lower the level, the faster the speed (at the cost of compression). 0 is a special value for `ZSTD_defaultCLevel()`. - The level will be clamped to the range `ZSTD_minCLevel()` to `ZSTD_maxCLevel()`. + The level will be clamped by `level_bounds()`. """ function ZstdFrameCompressor(;level::Integer=DEFAULT_COMPRESSION_LEVEL) - ZstdCompressor(CStream(), clamp(level, LibZstd.ZSTD_minCLevel(), LibZstd.ZSTD_maxCLevel()), :end) + ZstdCompressor( + CStream(), + clamp(level, level_bounds()...), + Int32(0), + LibZstd.ZSTD_e_end, + ) end # pretend that ZstdFrameCompressor is a compressor type function TranscodingStreams.transcode(C::typeof(ZstdFrameCompressor), args...) @@ -78,7 +139,7 @@ const ZstdCompressorStream{S} = TranscodingStream{ZstdCompressor,S} where S<:IO Create a new zstd compression stream (see `ZstdCompressor` for `kwargs`). """ function ZstdCompressorStream(stream::IO; kwargs...) - x, y = splitkwargs(kwargs, (:level,)) + x, y = splitkwargs(kwargs, (:level, :windowLog)) return TranscodingStream(ZstdCompressor(;x...), stream; y...) end @@ -105,12 +166,20 @@ function TranscodingStreams.startproc(codec::ZstdCompressor, mode::Symbol, err:: throw(OutOfMemoryError()) end ret = LibZstd.ZSTD_CCtx_setParameter(codec.cstream, LibZstd.ZSTD_c_compressionLevel, clamp(codec.level, Cint)) - # TODO Allow setting other parameters here. if iserror(ret) # This is unreachable according to zstd.h - err[] = ErrorException("zstd initialization error") + err[] = ErrorException("zstd error setting compressionLevel") return :error end + if !iszero(codec.windowLog) + ret = LibZstd.ZSTD_CCtx_setParameter(codec.cstream, LibZstd.ZSTD_c_windowLog, Cint(codec.windowLog)) + if iserror(ret) + # This should be unreachable because windowLog is checked in the constructor. + err[] = ErrorException("zstd error setting windowLog to $(codec.windowLog)") + return :error + end + end + # TODO Allow setting other parameters here. end code = reset!(codec.cstream, 0 #=unknown source size=#) if iserror(code) diff --git a/src/decompression.jl b/src/decompression.jl index d096524..7baf1c7 100644 --- a/src/decompression.jl +++ b/src/decompression.jl @@ -1,21 +1,56 @@ # Decompressor Codec # ================== +""" + windowLogMax_bounds() -> min::Int32, max::Int32 + +Return the minimum and maximum windowLogMax available. +""" +function windowLogMax_bounds() + bounds = LibZstd.ZSTD_dParam_getBounds(LibZstd.ZSTD_d_windowLogMax) + @assert !iserror(bounds.error) + Int32(bounds.lowerBound), Int32(bounds.upperBound) +end + struct ZstdDecompressor <: TranscodingStreams.Codec dstream::DStream + windowLogMax::Int32 end function Base.show(io::IO, codec::ZstdDecompressor) - print(io, summary(codec), "()") + print(io, summary(codec), "(") + if codec.windowLogMax != Int32(0) + print(io, "windowLogMax=Int32($(codec.windowLogMax))") + end + print(io, ")") end """ ZstdDecompressor() Create a new zstd decompression codec. + +Arguments +--------- + +Advanced decompression parameters. + +- `windowLogMax::Int32= Int32(0)`: Select a size limit (in power of 2) beyond which + the streaming API will refuse to allocate memory buffer + in order to protect the host from unreasonable memory requirements. + This parameter is only useful in streaming mode, since no internal buffer is allocated in single-pass mode. + By default, a decompression context accepts window sizes <= (1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT). + Must be clamped between `windowLogMax_bounds()[1]` and `windowLogMax_bounds()[2]` inclusive. + Special: value 0 means "use default maximum windowLog". """ -function ZstdDecompressor() - return ZstdDecompressor(DStream()) +function ZstdDecompressor(; + windowLogMax::Int32=Int32(0), + ) + windowLogMax_range = (:)(windowLogMax_bounds()...) + if !iszero(windowLogMax) && windowLogMax ∉ windowLogMax_range + throw(ArgumentError("windowLogMax ∈ $(windowLogMax_range) must hold. Got\nwindowLogMax => $(windowLogMax)")) + end + return ZstdDecompressor(DStream(), windowLogMax) end const ZstdDecompressorStream{S} = TranscodingStream{ZstdDecompressor,S} where S<:IO @@ -26,7 +61,8 @@ const ZstdDecompressorStream{S} = TranscodingStream{ZstdDecompressor,S} where S< Create a new zstd decompression stream (`kwargs` are passed to `TranscodingStream`). """ function ZstdDecompressorStream(stream::IO; kwargs...) - return TranscodingStream(ZstdDecompressor(), stream; kwargs...) + x, y = splitkwargs(kwargs, (:windowLogMax,)) + return TranscodingStream(ZstdDecompressor(;x...), stream; y...) end @@ -49,7 +85,14 @@ function TranscodingStreams.startproc(codec::ZstdDecompressor, mode::Symbol, err if codec.dstream.ptr == C_NULL throw(OutOfMemoryError()) end - # TODO Allow setting other parameters here. + if !iszero(codec.windowLogMax) + ret = LibZstd.ZSTD_DCtx_setParameter(codec.dstream, LibZstd.ZSTD_d_windowLogMax, Cint(codec.windowLogMax)) + if iserror(ret) + # This should be unreachable because windowLogMax is checked in the constructor. + err[] = ErrorException("zstd error setting windowLogMax") + return :error + end + end end code = reset!(codec.dstream) if iserror(code) @@ -77,7 +120,13 @@ function TranscodingStreams.process(codec::ZstdDecompressor, input::Memory, outp if error_code(code) == Integer(LibZstd.ZSTD_error_memory_allocation) throw(OutOfMemoryError()) end - err[] = ErrorException("zstd decompression error: " * error_name(code)) + err[] = if error_code(code) == Integer(LibZstd.ZSTD_error_frameParameter_windowTooLarge) + ErrorException("zstd decompression error: Window size larger than maximum.\nHint: try increasing `windowLogMax` when constructing the `ZstdDecompressor`") + # TODO It is possible to find the requested window size by parsing the frame header. + # This could be used to get a better error message. + else + ErrorException("zstd decompression error: " * error_name(code)) + end return Δin, Δout, :error else if code == 0 diff --git a/test/runtests.jl b/test/runtests.jl index 73cf82f..e403bf2 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -226,4 +226,82 @@ include("utils.jl") end end end + + @testset "windowLog" begin + # out of bounds values + cbounds = CodecZstd.windowLog_bounds() + dbounds = CodecZstd.windowLogMax_bounds() + @test_throws ArgumentError ZstdCompressor(;windowLog=cbounds[2]+Int32(1)) + @test_throws ArgumentError ZstdCompressor(;windowLog=cbounds[1]-Int32(1)) + @test_throws ArgumentError ZstdDecompressor(;windowLogMax=dbounds[2]+Int32(1)) + @test_throws ArgumentError ZstdDecompressor(;windowLogMax=dbounds[1]-Int32(1)) + + codec = ZstdCompressor(;level=10, windowLog=cbounds[1]) + @test codec isa ZstdCompressor + @test sprint(show, codec) == "ZstdCompressor(level=10, windowLog=Int32($(cbounds[1])))" + + codec = ZstdDecompressor(;windowLogMax=dbounds[1]) + @test codec isa ZstdDecompressor + @test sprint(show, codec) == "ZstdDecompressor(windowLogMax=Int32($(dbounds[1])))" + + @test CodecZstd.ZSTD_WINDOWLOG_LIMIT_DEFAULT ∈ (:)(dbounds...) + @test CodecZstd.ZSTD_WINDOWLOG_LIMIT_DEFAULT ∈ (:)(cbounds...) + + windowLogs = Int32[ + cbounds[1], + Int32(0), + CodecZstd.ZSTD_WINDOWLOG_LIMIT_DEFAULT-1, + CodecZstd.ZSTD_WINDOWLOG_LIMIT_DEFAULT, + CodecZstd.ZSTD_WINDOWLOG_LIMIT_DEFAULT+1, + cbounds[2] + ] + windowLogMaxs = Int32[ + dbounds[1], + Int32(0), + CodecZstd.ZSTD_WINDOWLOG_LIMIT_DEFAULT-1, + CodecZstd.ZSTD_WINDOWLOG_LIMIT_DEFAULT, + CodecZstd.ZSTD_WINDOWLOG_LIMIT_DEFAULT+1, + dbounds[2] + ] + # 32 bit systems don't have enough memory to test upper bound windowLog + if Sys.WORD_SIZE == 32 + pop!(windowLogs) + pop!(windowLogMaxs) + end + + for windowLog in windowLogs + for windowLogMax in windowLogMaxs + uncompressed = rand(UInt8, 3000) + sink = IOBuffer() + # level 22 is needed to get compression to use the full + # ZSTD_WINDOWLOG_LIMIT_DEFAULT when windowLog is 0 + compressor = TranscodingStream(ZstdCompressor(;level = 22, windowLog), sink; stop_on_end=true) + write(compressor, uncompressed) + close(compressor) + compressed = take!(sink) + + decompressor = TranscodingStream(ZstdDecompressor(;windowLogMax), IOBuffer(compressed)) + + actual_windowLog = if iszero(windowLog) + CodecZstd.ZSTD_WINDOWLOG_LIMIT_DEFAULT + else + windowLog + end + actual_windowLogMax = if iszero(windowLogMax) + CodecZstd.ZSTD_WINDOWLOG_LIMIT_DEFAULT + else + windowLogMax + end + if actual_windowLogMax ≥ actual_windowLog + @test read(decompressor) == uncompressed + else + @test_throws( + ErrorException("zstd decompression error: Window size larger than maximum.\nHint: try increasing `windowLogMax` when constructing the `ZstdDecompressor`"), + read(decompressor), + ) + end + close(decompressor) + end + end + end end