From eab717488fd88f2c7ddc15a4e3270fad95166626 Mon Sep 17 00:00:00 2001 From: Folkert de Vries Date: Mon, 1 Dec 2025 12:37:17 +0100 Subject: [PATCH 1/6] prevent re-tagging of `GzState` --- libz-rs-sys-cdylib/src/gz.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/libz-rs-sys-cdylib/src/gz.rs b/libz-rs-sys-cdylib/src/gz.rs index f3ca173b..db69293a 100644 --- a/libz-rs-sys-cdylib/src/gz.rs +++ b/libz-rs-sys-cdylib/src/gz.rs @@ -2588,7 +2588,7 @@ pub unsafe extern "C-unwind" fn gzseek64( // Rewind, then skip to offset. // Safety: `file` points to an initialized `GzState`. - if unsafe { gzrewind(file) } == -1 { + if unsafe { gzrewind_help(state) } == -1 { return -1; } } @@ -2665,6 +2665,10 @@ pub unsafe extern "C-unwind" fn gzrewind(file: gzFile) -> c_int { return -1; }; + unsafe { gzrewind_help(state) } +} + +unsafe fn gzrewind_help(state: &mut GzState) -> c_int { // Check that we're reading and that there's no error. if state.mode != GzMode::GZ_READ || (state.err != Z_OK && state.err != Z_BUF_ERROR) { return -1; From 1d218abf3df41658c9b7e6e25b4a133e738da6a6 Mon Sep 17 00:00:00 2001 From: Folkert de Vries Date: Mon, 1 Dec 2025 13:12:23 +0100 Subject: [PATCH 2/6] use `std::fs::metadata` to get file size in tests --- test-libz-rs-sys/src/gz.rs | 21 ++------------------- 1 file changed, 2 insertions(+), 19 deletions(-) diff --git a/test-libz-rs-sys/src/gz.rs b/test-libz-rs-sys/src/gz.rs index f26ee364..5fc96857 100644 --- a/test-libz-rs-sys/src/gz.rs +++ b/test-libz-rs-sys/src/gz.rs @@ -2033,25 +2033,8 @@ fn gzrewind_error() { // // - `Ok(size)` on success. // - `Err` on error. -fn file_size(path: &str) -> Result { - let mut result = Err(()); - let stat_ptr = unsafe { libc::calloc(1, core::mem::size_of::() as _) }; - if stat_ptr.is_null() { - return result; - } - let ret = unsafe { - libc::stat( - CString::new(path).unwrap().as_ptr(), - stat_ptr.cast::(), - ) - }; - if ret == 0 { - if let Some(stat_info) = unsafe { stat_ptr.cast::().as_ref() } { - result = Ok(stat_info.st_size as usize); - } - } - unsafe { libc::free(stat_ptr) }; - result +fn file_size(path: &str) -> Result { + Ok(std::fs::metadata(path)?.len() as usize) } #[cfg(feature = "gzprintf")] From 2dbc2b4ac1ac24fd50b8bfe8eacef420cbcc98ab Mon Sep 17 00:00:00 2001 From: Folkert de Vries Date: Mon, 1 Dec 2025 13:13:39 +0100 Subject: [PATCH 3/6] work around gz functions not being posix-compatible The C source assumes that when a write succeeds it writes all bytes. that is not strictly required, but effectively true in most cases where zlib is used. However, miri does not respect this. So when running miri, explicitly loop until all bytes are written --- libz-rs-sys-cdylib/src/gz.rs | 39 +++++++++++++++++++++++++++++-- test-libz-rs-sys/src/gz.rs | 45 ++++++++++++++++++++++++++++++++---- 2 files changed, 78 insertions(+), 6 deletions(-) diff --git a/libz-rs-sys-cdylib/src/gz.rs b/libz-rs-sys-cdylib/src/gz.rs index db69293a..88504b1d 100644 --- a/libz-rs-sys-cdylib/src/gz.rs +++ b/libz-rs-sys-cdylib/src/gz.rs @@ -1808,6 +1808,41 @@ fn gz_init(state: &mut GzState) -> Result<(), ()> { Ok(()) } +#[cfg(not(miri))] +unsafe fn write(fd: c_int, buf: *const c_void, count: size_t) -> libc::ssize_t { + unsafe { libc::write(fd, buf, count) } +} + +/// Deal with miri often not writing the full buffer in one go. That is valid, but zlib assumes +/// that writes are to true files and succeed unless e.g. the disk is full. +#[cfg(miri)] +unsafe fn write(fd: c_int, buf: *const c_void, count: size_t) -> libc::ssize_t { + let mut total_written = 0isize; + loop { + let ret = unsafe { + libc::write( + fd, + buf.add(total_written as usize), + count - total_written as usize, + ) + }; + + match ret.cmp(&0) { + Ordering::Less => return ret, + Ordering::Equal => return total_written, + Ordering::Greater => { + total_written += ret; + + if total_written as usize == count { + break; + } + } + } + } + + total_written +} + // Compress whatever is at avail_in and next_in (unless in direct mode) and write // to the output file. // @@ -1824,7 +1859,7 @@ fn gz_comp(state: &mut GzState, flush: c_int) -> Result<(), ()> { // Write directly if requested. if state.direct { let got = unsafe { - libc::write( + write( state.fd, state.stream.next_in.cast::(), state.stream.avail_in as _, @@ -1868,7 +1903,7 @@ fn gz_comp(state: &mut GzState, flush: c_int) -> Result<(), ()> { return Err(()); } if have != 0 { - let ret = unsafe { libc::write(state.fd, state.next.cast::(), have as _) }; + let ret = unsafe { write(state.fd, state.next.cast::(), have as _) }; if ret != have as _ { unsafe { gz_error(state, Some((Z_ERRNO, "write error"))) }; return Err(()); diff --git a/test-libz-rs-sys/src/gz.rs b/test-libz-rs-sys/src/gz.rs index 5fc96857..744fecf1 100644 --- a/test-libz-rs-sys/src/gz.rs +++ b/test-libz-rs-sys/src/gz.rs @@ -35,6 +35,43 @@ fn binary_mode(mode: c_int) -> c_int { } } +#[cfg(not(miri))] +unsafe fn write(fd: c_int, buf: *const c_void, count: size_t) -> libc::ssize_t { + unsafe { libc::write(fd, buf, count) } +} + +/// Deal with miri often not writing the full buffer in one go. That is valid, but zlib assumes +/// that writes are to true files and succeed unless e.g. the disk is full. +#[cfg(miri)] +unsafe fn write(fd: c_int, buf: *const c_void, count: size_t) -> libc::ssize_t { + use std::cmp::Ordering; + + let mut total_written = 0isize; + loop { + let ret = unsafe { + libc::write( + fd, + buf.add(total_written as usize), + count - total_written as usize, + ) + }; + + match ret.cmp(&0) { + Ordering::Less => return ret, + Ordering::Equal => return total_written, + Ordering::Greater => { + total_written += ret; + + if total_written as usize == count { + break; + } + } + } + } + + total_written +} + // Try to gzopen a file path with a specified mode, and panic if the result is unexpected. // NOTE: This is a macro, instead of a function, so that the test runner will report errors // with the line number where it is invoked. @@ -331,7 +368,7 @@ fn gzread_special_cases() { }; assert_ne!(fd, -1); assert_eq!( - unsafe { libc::write(fd, STRING2.as_ptr().cast::(), STRING2.len() as _) }, + unsafe { write(fd, STRING2.as_ptr().cast::(), STRING2.len() as _) }, STRING2.len() as _ ); assert_eq!(unsafe { libc::close(fd) }, 0); @@ -793,7 +830,7 @@ fn gzoffset_gztell_read() { }; assert_ne!(fd, -1); assert_eq!( - unsafe { libc::write(fd, PADDING.as_ptr().cast::(), OFFSET as _) }, + unsafe { write(fd, PADDING.as_ptr().cast::(), OFFSET as _) }, OFFSET as _ ); let source_name = crate_path("src/test-data/issue-109.gz"); @@ -809,7 +846,7 @@ fn gzoffset_gztell_read() { break; } assert_eq!( - unsafe { libc::write(fd, buf.as_ptr().cast(), ret as _) }, + unsafe { write(fd, buf.as_ptr().cast(), ret as _) }, ret as _ ); } @@ -1703,7 +1740,7 @@ fn gzseek_read() { break; } assert_eq!( - unsafe { libc::write(fd, buf.as_mut_ptr().cast::(), ret as _) }, + unsafe { write(fd, buf.as_mut_ptr().cast::(), ret as _) }, ret as _ ); } From 936245fe5704ebaba893ef5a6177820aaf303b93 Mon Sep 17 00:00:00 2001 From: Folkert de Vries Date: Mon, 1 Dec 2025 13:56:43 +0100 Subject: [PATCH 4/6] use std read to read file contents --- test-libz-rs-sys/src/gz.rs | 33 ++++++--------------------------- 1 file changed, 6 insertions(+), 27 deletions(-) diff --git a/test-libz-rs-sys/src/gz.rs b/test-libz-rs-sys/src/gz.rs index 744fecf1..8fde7e90 100644 --- a/test-libz-rs-sys/src/gz.rs +++ b/test-libz-rs-sys/src/gz.rs @@ -1001,15 +1001,8 @@ fn gzputc_basic() { assert_eq!(unsafe { gzclose(file) }, Z_OK); // Validate that the file contains the expected bytes. - let mode = binary_mode(libc::O_RDONLY); - let fd = unsafe { libc::open(CString::new(file_name.as_str()).unwrap().as_ptr(), mode) }; - assert_ne!(fd, -1); - // Try to read more than the expected amount of data, to ensure we get everything. - let mut buf = [0u8; CONTENT.len() + 1]; - let bytes_read = unsafe { libc::read(fd, buf.as_mut_ptr() as *mut c_void, buf.len() as _) }; - assert_ne!(bytes_read, -1); - assert_eq!(&buf[..bytes_read as usize], CONTENT); - assert_eq!(unsafe { libc::close(fd) }, 0); + let actual = std::fs::read(file_name).unwrap(); + assert_eq!(actual, CONTENT); } #[test] @@ -1073,15 +1066,8 @@ fn gzputs_basic() { // Validate that the file contains the expected bytes. const EXPECTED: &str = "zlib string larger than the buffer size"; - let mode = binary_mode(libc::O_RDONLY); - let fd = unsafe { libc::open(CString::new(file_name.as_str()).unwrap().as_ptr(), mode) }; - assert_ne!(fd, -1); - // Try to read more than the expected amount of data, to ensure we get everything. - let mut buf = [0u8; EXPECTED.len() + 1]; - let bytes_read = unsafe { libc::read(fd, buf.as_mut_ptr() as *mut c_void, buf.len() as _) }; - assert_ne!(bytes_read, -1); - assert_eq!(&buf[..bytes_read as usize], EXPECTED.as_bytes()); - assert_eq!(unsafe { libc::close(fd) }, 0); + let actual = std::fs::read(file_name).unwrap(); + assert_eq!(actual, EXPECTED.as_bytes()); } #[test] @@ -1526,16 +1512,9 @@ fn gzfwrite_basic() { assert_eq!(unsafe { gzclose(file) }, Z_OK); // Read in the file and verify that the contents match what was passed to gzfwrite. - let mode = binary_mode(libc::O_RDONLY); - let fd = unsafe { libc::open(CString::new(file_name.as_str()).unwrap().as_ptr(), mode) }; - assert_ne!(fd, -1); const EXPECTED: &[u8] = b"test of gzfwrite"; - let mut buf = [0u8; EXPECTED.len() + 1]; - let ret = unsafe { libc::read(fd, buf.as_mut_ptr().cast(), buf.len() as _) }; - assert_eq!(ret, EXPECTED.len() as _); - assert_eq!(&buf[..EXPECTED.len()], EXPECTED); - - assert_eq!(unsafe { libc::close(fd) }, 0); + let actual = std::fs::read(file_name).unwrap(); + assert_eq!(actual, EXPECTED); } #[test] From 86392168c4bc9b66343c73f7493dd2881a24da36 Mon Sep 17 00:00:00 2001 From: Folkert de Vries Date: Mon, 1 Dec 2025 13:18:53 +0100 Subject: [PATCH 5/6] run gz api tests with miri --- .github/workflows/checks.yaml | 7 ++++++- libz-rs-sys-cdylib/src/gz.rs | 12 +++++------- test-libz-rs-sys/src/gz.rs | 12 +++++------- 3 files changed, 16 insertions(+), 15 deletions(-) diff --git a/.github/workflows/checks.yaml b/.github/workflows/checks.yaml index 30a51c02..0a6baa71 100644 --- a/.github/workflows/checks.yaml +++ b/.github/workflows/checks.yaml @@ -490,11 +490,16 @@ jobs: RUSTFLAGS: "-Ctarget-feature=+avx2,+bmi2,+bmi1" - name: Test allocator with miri run: "cargo +nightly miri nextest run -j4 -p zlib-rs --target ${{ matrix.target }} allocate::" - - name: Test gz logic with miri + - name: Test gz internals with miri working-directory: libz-rs-sys-cdylib run: "cargo +nightly miri nextest run -j4 -p libz-rs-sys-cdylib --target ${{ matrix.target }} --features=gz" env: MIRIFLAGS: "-Zmiri-tree-borrows -Zmiri-disable-isolation" + - name: Test gz api with miri + if: ${{ contains(matrix.target, 'linux') }} + run: "cargo +nightly miri nextest run -j4 -p test-libz-rs-sys --target ${{ matrix.target }} gz::" + env: + MIRIFLAGS: "-Zmiri-disable-isolation" miri-avx512: name: "Miri avx512" diff --git a/libz-rs-sys-cdylib/src/gz.rs b/libz-rs-sys-cdylib/src/gz.rs index 88504b1d..0a394b93 100644 --- a/libz-rs-sys-cdylib/src/gz.rs +++ b/libz-rs-sys-cdylib/src/gz.rs @@ -1809,29 +1809,27 @@ fn gz_init(state: &mut GzState) -> Result<(), ()> { } #[cfg(not(miri))] -unsafe fn write(fd: c_int, buf: *const c_void, count: size_t) -> libc::ssize_t { - unsafe { libc::write(fd, buf, count) } -} +use libc::write; /// Deal with miri often not writing the full buffer in one go. That is valid, but zlib assumes /// that writes are to true files and succeed unless e.g. the disk is full. #[cfg(miri)] unsafe fn write(fd: c_int, buf: *const c_void, count: size_t) -> libc::ssize_t { - let mut total_written = 0isize; + let mut total_written: libc::ssize_t = 0; loop { let ret = unsafe { libc::write( fd, buf.add(total_written as usize), - count - total_written as usize, + (count - total_written as size_t) as _, ) }; match ret.cmp(&0) { - Ordering::Less => return ret, + Ordering::Less => return ret as libc::ssize_t, Ordering::Equal => return total_written, Ordering::Greater => { - total_written += ret; + total_written += ret as libc::ssize_t; if total_written as usize == count { break; diff --git a/test-libz-rs-sys/src/gz.rs b/test-libz-rs-sys/src/gz.rs index 8fde7e90..88393865 100644 --- a/test-libz-rs-sys/src/gz.rs +++ b/test-libz-rs-sys/src/gz.rs @@ -36,9 +36,7 @@ fn binary_mode(mode: c_int) -> c_int { } #[cfg(not(miri))] -unsafe fn write(fd: c_int, buf: *const c_void, count: size_t) -> libc::ssize_t { - unsafe { libc::write(fd, buf, count) } -} +use libc::write; /// Deal with miri often not writing the full buffer in one go. That is valid, but zlib assumes /// that writes are to true files and succeed unless e.g. the disk is full. @@ -46,21 +44,21 @@ unsafe fn write(fd: c_int, buf: *const c_void, count: size_t) -> libc::ssize_t { unsafe fn write(fd: c_int, buf: *const c_void, count: size_t) -> libc::ssize_t { use std::cmp::Ordering; - let mut total_written = 0isize; + let mut total_written: libc::ssize_t = 0; loop { let ret = unsafe { libc::write( fd, buf.add(total_written as usize), - count - total_written as usize, + (count - total_written as size_t) as _, ) }; match ret.cmp(&0) { - Ordering::Less => return ret, + Ordering::Less => return ret as libc::ssize_t, Ordering::Equal => return total_written, Ordering::Greater => { - total_written += ret; + total_written += ret as libc::ssize_t; if total_written as usize == count { break; From df9e1b435bf4689dc071a8d8b42322ed1ed8747b Mon Sep 17 00:00:00 2001 From: Folkert de Vries Date: Mon, 1 Dec 2025 16:13:01 +0100 Subject: [PATCH 6/6] pass `-Zmiri-no-short-fd-operations` --- .github/workflows/checks.yaml | 2 +- libz-rs-sys-cdylib/src/gz.rs | 37 ++---------------------------- test-libz-rs-sys/src/gz.rs | 43 ++++------------------------------- 3 files changed, 7 insertions(+), 75 deletions(-) diff --git a/.github/workflows/checks.yaml b/.github/workflows/checks.yaml index 0a6baa71..ed7d3eb6 100644 --- a/.github/workflows/checks.yaml +++ b/.github/workflows/checks.yaml @@ -499,7 +499,7 @@ jobs: if: ${{ contains(matrix.target, 'linux') }} run: "cargo +nightly miri nextest run -j4 -p test-libz-rs-sys --target ${{ matrix.target }} gz::" env: - MIRIFLAGS: "-Zmiri-disable-isolation" + MIRIFLAGS: "-Zmiri-no-short-fd-operations -Zmiri-disable-isolation" miri-avx512: name: "Miri avx512" diff --git a/libz-rs-sys-cdylib/src/gz.rs b/libz-rs-sys-cdylib/src/gz.rs index 0a394b93..db69293a 100644 --- a/libz-rs-sys-cdylib/src/gz.rs +++ b/libz-rs-sys-cdylib/src/gz.rs @@ -1808,39 +1808,6 @@ fn gz_init(state: &mut GzState) -> Result<(), ()> { Ok(()) } -#[cfg(not(miri))] -use libc::write; - -/// Deal with miri often not writing the full buffer in one go. That is valid, but zlib assumes -/// that writes are to true files and succeed unless e.g. the disk is full. -#[cfg(miri)] -unsafe fn write(fd: c_int, buf: *const c_void, count: size_t) -> libc::ssize_t { - let mut total_written: libc::ssize_t = 0; - loop { - let ret = unsafe { - libc::write( - fd, - buf.add(total_written as usize), - (count - total_written as size_t) as _, - ) - }; - - match ret.cmp(&0) { - Ordering::Less => return ret as libc::ssize_t, - Ordering::Equal => return total_written, - Ordering::Greater => { - total_written += ret as libc::ssize_t; - - if total_written as usize == count { - break; - } - } - } - } - - total_written -} - // Compress whatever is at avail_in and next_in (unless in direct mode) and write // to the output file. // @@ -1857,7 +1824,7 @@ fn gz_comp(state: &mut GzState, flush: c_int) -> Result<(), ()> { // Write directly if requested. if state.direct { let got = unsafe { - write( + libc::write( state.fd, state.stream.next_in.cast::(), state.stream.avail_in as _, @@ -1901,7 +1868,7 @@ fn gz_comp(state: &mut GzState, flush: c_int) -> Result<(), ()> { return Err(()); } if have != 0 { - let ret = unsafe { write(state.fd, state.next.cast::(), have as _) }; + let ret = unsafe { libc::write(state.fd, state.next.cast::(), have as _) }; if ret != have as _ { unsafe { gz_error(state, Some((Z_ERRNO, "write error"))) }; return Err(()); diff --git a/test-libz-rs-sys/src/gz.rs b/test-libz-rs-sys/src/gz.rs index 88393865..913801e1 100644 --- a/test-libz-rs-sys/src/gz.rs +++ b/test-libz-rs-sys/src/gz.rs @@ -35,41 +35,6 @@ fn binary_mode(mode: c_int) -> c_int { } } -#[cfg(not(miri))] -use libc::write; - -/// Deal with miri often not writing the full buffer in one go. That is valid, but zlib assumes -/// that writes are to true files and succeed unless e.g. the disk is full. -#[cfg(miri)] -unsafe fn write(fd: c_int, buf: *const c_void, count: size_t) -> libc::ssize_t { - use std::cmp::Ordering; - - let mut total_written: libc::ssize_t = 0; - loop { - let ret = unsafe { - libc::write( - fd, - buf.add(total_written as usize), - (count - total_written as size_t) as _, - ) - }; - - match ret.cmp(&0) { - Ordering::Less => return ret as libc::ssize_t, - Ordering::Equal => return total_written, - Ordering::Greater => { - total_written += ret as libc::ssize_t; - - if total_written as usize == count { - break; - } - } - } - } - - total_written -} - // Try to gzopen a file path with a specified mode, and panic if the result is unexpected. // NOTE: This is a macro, instead of a function, so that the test runner will report errors // with the line number where it is invoked. @@ -366,7 +331,7 @@ fn gzread_special_cases() { }; assert_ne!(fd, -1); assert_eq!( - unsafe { write(fd, STRING2.as_ptr().cast::(), STRING2.len() as _) }, + unsafe { libc::write(fd, STRING2.as_ptr().cast::(), STRING2.len() as _) }, STRING2.len() as _ ); assert_eq!(unsafe { libc::close(fd) }, 0); @@ -828,7 +793,7 @@ fn gzoffset_gztell_read() { }; assert_ne!(fd, -1); assert_eq!( - unsafe { write(fd, PADDING.as_ptr().cast::(), OFFSET as _) }, + unsafe { libc::write(fd, PADDING.as_ptr().cast::(), OFFSET as _) }, OFFSET as _ ); let source_name = crate_path("src/test-data/issue-109.gz"); @@ -844,7 +809,7 @@ fn gzoffset_gztell_read() { break; } assert_eq!( - unsafe { write(fd, buf.as_ptr().cast(), ret as _) }, + unsafe { libc::write(fd, buf.as_ptr().cast(), ret as _) }, ret as _ ); } @@ -1717,7 +1682,7 @@ fn gzseek_read() { break; } assert_eq!( - unsafe { write(fd, buf.as_mut_ptr().cast::(), ret as _) }, + unsafe { libc::write(fd, buf.as_mut_ptr().cast::(), ret as _) }, ret as _ ); }