Skip to content

Commit 6aa3570

Browse files
committed
windows: Make readLinkW APIs output WTF-16, reduce stack usage of callers
- Affects the following functions: + `std.fs.Dir.readLinkW` + `std.os.windows.ReadLink` + `std.os.windows.ntToWin32Namespace` + `std.posix.readlinkW` + `std.posix.readlinkatW` Each of these functions (except `ntToWin32Namespace`) took WTF-16 as input and would output WTF-8, which makes optimal buffer re-use difficult at callsites and could force unnecessary WTF-16 <-> WTF-8 conversion during an intermediate step. The functions have been updated to output WTF-16, and also allow for the path and the output to re-use the same buffer (i.e. in-place modification), which can reduce the stack usage at callsites. For example, all of `std.fs.Dir.readLink`/`readLinkZ`/`std.posix.readlink`/`readlinkZ`/`readlinkat`/`readlinkatZ` have had their stack usage reduced by one PathSpace struct (64 KiB) when targeting Windows. The new `ntToWin32Namespace` takes an output buffer and returns a slice from that instead of returning a PathSpace, which is necessary to make the above possible.
1 parent 06a7597 commit 6aa3570

File tree

4 files changed

+119
-65
lines changed

4 files changed

+119
-65
lines changed

lib/std/fs/Dir.zig

Lines changed: 22 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1354,8 +1354,14 @@ pub fn readLink(self: Dir, sub_path: []const u8, buffer: []u8) ReadLinkError![]u
13541354
return self.readLinkWasi(sub_path, buffer);
13551355
}
13561356
if (native_os == .windows) {
1357-
const sub_path_w = try windows.sliceToPrefixedFileW(self.fd, sub_path);
1358-
return self.readLinkW(sub_path_w.span(), buffer);
1357+
var sub_path_w = try windows.sliceToPrefixedFileW(self.fd, sub_path);
1358+
const result_w = try self.readLinkW(sub_path_w.span(), &sub_path_w.data);
1359+
1360+
const len = std.unicode.calcWtf8Len(result_w);
1361+
if (len > buffer.len) return error.NameTooLong;
1362+
1363+
const end_index = std.unicode.wtf16LeToWtf8(buffer, result_w);
1364+
return buffer[0..end_index];
13591365
}
13601366
const sub_path_c = try posix.toPosixPath(sub_path);
13611367
return self.readLinkZ(&sub_path_c, buffer);
@@ -1369,15 +1375,24 @@ pub fn readLinkWasi(self: Dir, sub_path: []const u8, buffer: []u8) ![]u8 {
13691375
/// Same as `readLink`, except the `sub_path_c` parameter is null-terminated.
13701376
pub fn readLinkZ(self: Dir, sub_path_c: [*:0]const u8, buffer: []u8) ![]u8 {
13711377
if (native_os == .windows) {
1372-
const sub_path_w = try windows.cStrToPrefixedFileW(self.fd, sub_path_c);
1373-
return self.readLinkW(sub_path_w.span(), buffer);
1378+
var sub_path_w = try windows.cStrToPrefixedFileW(self.fd, sub_path_c);
1379+
const result_w = try self.readLinkW(sub_path_w.span(), &sub_path_w.data);
1380+
1381+
const len = std.unicode.calcWtf8Len(result_w);
1382+
if (len > buffer.len) return error.NameTooLong;
1383+
1384+
const end_index = std.unicode.wtf16LeToWtf8(buffer, result_w);
1385+
return buffer[0..end_index];
13741386
}
13751387
return posix.readlinkatZ(self.fd, sub_path_c, buffer);
13761388
}
13771389

1378-
/// Windows-only. Same as `readLink` except the pathname parameter
1379-
/// is WTF16 LE encoded.
1380-
pub fn readLinkW(self: Dir, sub_path_w: []const u16, buffer: []u8) ![]u8 {
1390+
/// Windows-only. Same as `readLink` except the path parameter
1391+
/// is WTF-16 LE encoded, NT-prefixed.
1392+
///
1393+
/// `sub_path_w` will never be accessed after `buffer` has been written to, so it
1394+
/// is safe to reuse a single buffer for both.
1395+
pub fn readLinkW(self: Dir, sub_path_w: []const u16, buffer: []u16) ![]u16 {
13811396
return windows.ReadLink(self.fd, sub_path_w, buffer);
13821397
}
13831398

lib/std/fs/test.zig

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -193,10 +193,16 @@ test "Dir.readLink" {
193193
// test 1: symlink to a file
194194
try setupSymlink(ctx.dir, file_target_path, "symlink1", .{});
195195
try testReadLink(ctx.dir, canonical_file_target_path, "symlink1");
196+
if (builtin.os.tag == .windows) {
197+
try testReadLinkW(testing.allocator, ctx.dir, canonical_file_target_path, "symlink1");
198+
}
196199

197200
// test 2: symlink to a directory (can be different on Windows)
198201
try setupSymlink(ctx.dir, dir_target_path, "symlink2", .{ .is_directory = true });
199202
try testReadLink(ctx.dir, canonical_dir_target_path, "symlink2");
203+
if (builtin.os.tag == .windows) {
204+
try testReadLinkW(testing.allocator, ctx.dir, canonical_dir_target_path, "symlink2");
205+
}
200206

201207
// test 3: relative path symlink
202208
const parent_file = ".." ++ fs.path.sep_str ++ "target.txt";
@@ -205,6 +211,9 @@ test "Dir.readLink" {
205211
defer subdir.close();
206212
try setupSymlink(subdir, canonical_parent_file, "relative-link.txt", .{});
207213
try testReadLink(subdir, canonical_parent_file, "relative-link.txt");
214+
if (builtin.os.tag == .windows) {
215+
try testReadLinkW(testing.allocator, subdir, canonical_parent_file, "relative-link.txt");
216+
}
208217
}
209218
}.impl);
210219
}
@@ -215,6 +224,17 @@ fn testReadLink(dir: Dir, target_path: []const u8, symlink_path: []const u8) !vo
215224
try testing.expectEqualStrings(target_path, actual);
216225
}
217226

227+
fn testReadLinkW(allocator: mem.Allocator, dir: Dir, target_path: []const u8, symlink_path: []const u8) !void {
228+
const target_path_w = try std.unicode.wtf8ToWtf16LeAlloc(allocator, target_path);
229+
defer allocator.free(target_path_w);
230+
// Calling the W functions directly requires the path to be NT-prefixed
231+
const symlink_path_w = try std.os.windows.sliceToPrefixedFileW(dir.fd, symlink_path);
232+
const wtf16_buffer = try allocator.alloc(u16, target_path_w.len);
233+
defer allocator.free(wtf16_buffer);
234+
const actual = try dir.readLinkW(symlink_path_w.span(), wtf16_buffer);
235+
try testing.expectEqualSlices(u16, target_path_w, actual);
236+
}
237+
218238
fn testReadLinkAbsolute(target_path: []const u8, symlink_path: []const u8) !void {
219239
var buffer: [fs.max_path_bytes]u8 = undefined;
220240
const given = try fs.readLinkAbsolute(symlink_path, buffer[0..]);

lib/std/os/windows.zig

Lines changed: 31 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -894,7 +894,9 @@ pub const ReadLinkError = error{
894894
UnsupportedReparsePointType,
895895
};
896896

897-
pub fn ReadLink(dir: ?HANDLE, sub_path_w: []const u16, out_buffer: []u8) ReadLinkError![]u8 {
897+
/// `sub_path_w` will never be accessed after `out_buffer` has been written to, so it
898+
/// is safe to reuse a single buffer for both.
899+
pub fn ReadLink(dir: ?HANDLE, sub_path_w: []const u16, out_buffer: []u16) ReadLinkError![]u16 {
898900
const result_handle = OpenFile(sub_path_w, .{
899901
.access_mask = FILE_READ_ATTRIBUTES | SYNCHRONIZE,
900902
.dir = dir,
@@ -926,34 +928,33 @@ pub fn ReadLink(dir: ?HANDLE, sub_path_w: []const u16, out_buffer: []u8) ReadLin
926928
const len = buf.SubstituteNameLength >> 1;
927929
const path_buf = @as([*]const u16, &buf.PathBuffer);
928930
const is_relative = buf.Flags & SYMLINK_FLAG_RELATIVE != 0;
929-
return parseReadlinkPath(path_buf[offset..][0..len], is_relative, out_buffer);
931+
return parseReadLinkPath(path_buf[offset..][0..len], is_relative, out_buffer);
930932
},
931933
IO_REPARSE_TAG_MOUNT_POINT => {
932934
const buf: *const MOUNT_POINT_REPARSE_BUFFER = @ptrCast(@alignCast(&reparse_struct.DataBuffer[0]));
933935
const offset = buf.SubstituteNameOffset >> 1;
934936
const len = buf.SubstituteNameLength >> 1;
935937
const path_buf = @as([*]const u16, &buf.PathBuffer);
936-
return parseReadlinkPath(path_buf[offset..][0..len], false, out_buffer);
938+
return parseReadLinkPath(path_buf[offset..][0..len], false, out_buffer);
937939
},
938940
else => {
939941
return error.UnsupportedReparsePointType;
940942
},
941943
}
942944
}
943945

944-
/// Asserts that there is enough space is `out_buffer`.
945-
/// The result is encoded as [WTF-8](https://wtf-8.codeberg.page/).
946-
fn parseReadlinkPath(path: []const u16, is_relative: bool, out_buffer: []u8) []u8 {
947-
const win32_namespace_path = path: {
948-
if (is_relative) break :path path;
949-
const win32_path = ntToWin32Namespace(path) catch |err| switch (err) {
950-
error.NameTooLong => unreachable,
951-
error.NotNtPath => break :path path,
946+
fn parseReadLinkPath(path: []const u16, is_relative: bool, out_buffer: []u16) error{NameTooLong}![]u16 {
947+
path: {
948+
if (is_relative) break :path;
949+
return ntToWin32Namespace(path, out_buffer) catch |err| switch (err) {
950+
error.NameTooLong => |e| return e,
951+
error.NotNtPath => break :path,
952952
};
953-
break :path win32_path.span();
954-
};
955-
const out_len = std.unicode.wtf16LeToWtf8(out_buffer, win32_namespace_path);
956-
return out_buffer[0..out_len];
953+
}
954+
if (out_buffer.len < path.len) return error.NameTooLong;
955+
const dest = out_buffer[0..path.len];
956+
@memcpy(dest, path);
957+
return dest;
957958
}
958959

959960
pub const DeleteFileError = error{
@@ -2584,34 +2585,31 @@ test getUnprefixedPathType {
25842585
/// https://github.com/reactos/reactos/blob/master/modules/rostests/apitests/ntdll/RtlNtPathNameToDosPathName.c
25852586
///
25862587
/// `path` should be encoded as WTF-16LE.
2587-
pub fn ntToWin32Namespace(path: []const u16) !PathSpace {
2588+
///
2589+
/// Supports in-place modification (`path` and `out` may refer to the same slice).
2590+
pub fn ntToWin32Namespace(path: []const u16, out: []u16) error{ NameTooLong, NotNtPath }![]u16 {
25882591
if (path.len > PATH_MAX_WIDE) return error.NameTooLong;
25892592

2590-
var path_space: PathSpace = undefined;
25912593
const namespace_prefix = getNamespacePrefix(u16, path);
25922594
switch (namespace_prefix) {
25932595
.nt => {
25942596
var dest_index: usize = 0;
25952597
var after_prefix = path[4..]; // after the `\??\`
25962598
// The prefix \??\UNC\ means this is a UNC path, in which case the
25972599
// `\??\UNC\` should be replaced by `\\` (two backslashes)
2598-
// TODO: the "UNC" should technically be matched case-insensitively, but
2599-
// it's unlikely to matter since most/all paths passed into this
2600-
// function will have come from the OS meaning it should have
2601-
// the 'canonical' uppercase UNC.
26022600
const is_unc = after_prefix.len >= 4 and
2603-
std.mem.eql(u16, after_prefix[0..3], std.unicode.utf8ToUtf16LeStringLiteral("UNC")) and
2601+
eqlIgnoreCaseWTF16(after_prefix[0..3], std.unicode.utf8ToUtf16LeStringLiteral("UNC")) and
26042602
std.fs.path.PathType.windows.isSep(u16, std.mem.littleToNative(u16, after_prefix[3]));
2603+
const win32_len = path.len - @as(usize, if (is_unc) 6 else 4);
2604+
if (out.len < win32_len) return error.NameTooLong;
26052605
if (is_unc) {
2606-
path_space.data[0] = comptime std.mem.nativeToLittle(u16, '\\');
2606+
out[0] = comptime std.mem.nativeToLittle(u16, '\\');
26072607
dest_index += 1;
26082608
// We want to include the last `\` of `\??\UNC\`
26092609
after_prefix = path[7..];
26102610
}
2611-
@memcpy(path_space.data[dest_index..][0..after_prefix.len], after_prefix);
2612-
path_space.len = dest_index + after_prefix.len;
2613-
path_space.data[path_space.len] = 0;
2614-
return path_space;
2611+
@memmove(out[dest_index..][0..after_prefix.len], after_prefix);
2612+
return out[0..win32_len];
26152613
},
26162614
else => return error.NotNtPath,
26172615
}
@@ -2620,25 +2618,14 @@ pub fn ntToWin32Namespace(path: []const u16) !PathSpace {
26202618
test ntToWin32Namespace {
26212619
const L = std.unicode.utf8ToUtf16LeStringLiteral;
26222620

2623-
try testNtToWin32Namespace(L("UNC"), L("\\??\\UNC"));
2624-
try testNtToWin32Namespace(L("\\\\"), L("\\??\\UNC\\"));
2625-
try testNtToWin32Namespace(L("\\\\path1"), L("\\??\\UNC\\path1"));
2626-
try testNtToWin32Namespace(L("\\\\path1\\path2"), L("\\??\\UNC\\path1\\path2"));
2621+
var mutable_unc_path_buf = L("\\??\\UNC\\path1\\path2").*;
2622+
try std.testing.expectEqualSlices(u16, L("\\\\path1\\path2"), try ntToWin32Namespace(&mutable_unc_path_buf, &mutable_unc_path_buf));
26272623

2628-
try testNtToWin32Namespace(L(""), L("\\??\\"));
2629-
try testNtToWin32Namespace(L("C:"), L("\\??\\C:"));
2630-
try testNtToWin32Namespace(L("C:\\"), L("\\??\\C:\\"));
2631-
try testNtToWin32Namespace(L("C:\\test"), L("\\??\\C:\\test"));
2632-
try testNtToWin32Namespace(L("C:\\test\\"), L("\\??\\C:\\test\\"));
2633-
2634-
try std.testing.expectError(error.NotNtPath, ntToWin32Namespace(L("foo")));
2635-
try std.testing.expectError(error.NotNtPath, ntToWin32Namespace(L("C:\\test")));
2636-
try std.testing.expectError(error.NotNtPath, ntToWin32Namespace(L("\\\\.\\test")));
2637-
}
2624+
var mutable_path_buf = L("\\??\\C:\\test\\").*;
2625+
try std.testing.expectEqualSlices(u16, L("C:\\test\\"), try ntToWin32Namespace(&mutable_path_buf, &mutable_path_buf));
26382626

2639-
fn testNtToWin32Namespace(expected: []const u16, path: []const u16) !void {
2640-
const converted = try ntToWin32Namespace(path);
2641-
try std.testing.expectEqualSlices(u16, expected, converted.span());
2627+
var too_small_buf: [6]u16 = undefined;
2628+
try std.testing.expectError(error.NameTooLong, ntToWin32Namespace(L("\\??\\C:\\test"), &too_small_buf));
26422629
}
26432630

26442631
fn getFullPathNameW(path: [*:0]const u16, out: []u16) !usize {

lib/std/posix.zig

Lines changed: 46 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -3021,26 +3021,42 @@ pub fn readlink(file_path: []const u8, out_buffer: []u8) ReadLinkError![]u8 {
30213021
if (native_os == .wasi and !builtin.link_libc) {
30223022
return readlinkat(AT.FDCWD, file_path, out_buffer);
30233023
} else if (native_os == .windows) {
3024-
const file_path_w = try windows.sliceToPrefixedFileW(null, file_path);
3025-
return readlinkW(file_path_w.span(), out_buffer);
3024+
var file_path_w = try windows.sliceToPrefixedFileW(null, file_path);
3025+
const result_w = try readlinkW(file_path_w.span(), &file_path_w.data);
3026+
3027+
const len = std.unicode.calcWtf8Len(result_w);
3028+
if (len > out_buffer.len) return error.NameTooLong;
3029+
3030+
const end_index = std.unicode.wtf16LeToWtf8(out_buffer, result_w);
3031+
return out_buffer[0..end_index];
30263032
} else {
30273033
const file_path_c = try toPosixPath(file_path);
30283034
return readlinkZ(&file_path_c, out_buffer);
30293035
}
30303036
}
30313037

3032-
/// Windows-only. Same as `readlink` except `file_path` is WTF16 LE encoded.
3033-
/// The result is encoded as [WTF-8](https://wtf-8.codeberg.page/).
3038+
/// Windows-only. Same as `readlink` except `file_path` is WTF-16 LE encoded, NT-prefixed.
3039+
/// The result is encoded as WTF-16 LE.
3040+
///
3041+
/// `file_path` will never be accessed after `out_buffer` has been written to, so it
3042+
/// is safe to reuse a single buffer for both.
3043+
///
30343044
/// See also `readlinkZ`.
3035-
pub fn readlinkW(file_path: []const u16, out_buffer: []u8) ReadLinkError![]u8 {
3045+
pub fn readlinkW(file_path: []const u16, out_buffer: []u16) ReadLinkError![]u16 {
30363046
return windows.ReadLink(fs.cwd().fd, file_path, out_buffer);
30373047
}
30383048

30393049
/// Same as `readlink` except `file_path` is null-terminated.
30403050
pub fn readlinkZ(file_path: [*:0]const u8, out_buffer: []u8) ReadLinkError![]u8 {
30413051
if (native_os == .windows) {
3042-
const file_path_w = try windows.cStrToPrefixedFileW(null, file_path);
3043-
return readlinkW(file_path_w.span(), out_buffer);
3052+
var file_path_w = try windows.cStrToPrefixedFileW(null, file_path);
3053+
const result_w = try readlinkW(file_path_w.span(), &file_path_w.data);
3054+
3055+
const len = std.unicode.calcWtf8Len(result_w);
3056+
if (len > out_buffer.len) return error.NameTooLong;
3057+
3058+
const end_index = std.unicode.wtf16LeToWtf8(out_buffer, result_w);
3059+
return out_buffer[0..end_index];
30443060
} else if (native_os == .wasi and !builtin.link_libc) {
30453061
return readlink(mem.sliceTo(file_path, 0), out_buffer);
30463062
}
@@ -3075,8 +3091,14 @@ pub fn readlinkat(dirfd: fd_t, file_path: []const u8, out_buffer: []u8) ReadLink
30753091
return readlinkatWasi(dirfd, file_path, out_buffer);
30763092
}
30773093
if (native_os == .windows) {
3078-
const file_path_w = try windows.sliceToPrefixedFileW(dirfd, file_path);
3079-
return readlinkatW(dirfd, file_path_w.span(), out_buffer);
3094+
var file_path_w = try windows.sliceToPrefixedFileW(dirfd, file_path);
3095+
const result_w = try readlinkatW(dirfd, file_path_w.span(), &file_path_w.data);
3096+
3097+
const len = std.unicode.calcWtf8Len(result_w);
3098+
if (len > out_buffer.len) return error.NameTooLong;
3099+
3100+
const end_index = std.unicode.wtf16LeToWtf8(out_buffer, result_w);
3101+
return out_buffer[0..end_index];
30803102
}
30813103
const file_path_c = try toPosixPath(file_path);
30823104
return readlinkatZ(dirfd, &file_path_c, out_buffer);
@@ -3103,19 +3125,29 @@ pub fn readlinkatWasi(dirfd: fd_t, file_path: []const u8, out_buffer: []u8) Read
31033125
}
31043126
}
31053127

3106-
/// Windows-only. Same as `readlinkat` except `file_path` is null-terminated, WTF16 LE encoded.
3107-
/// The result is encoded as [WTF-8](https://wtf-8.codeberg.page/).
3128+
/// Windows-only. Same as `readlinkat` except `file_path` WTF16 LE encoded, NT-prefixed.
3129+
/// The result is encoded as WTF-16 LE.
3130+
///
3131+
/// `file_path` will never be accessed after `out_buffer` has been written to, so it
3132+
/// is safe to reuse a single buffer for both.
3133+
///
31083134
/// See also `readlinkat`.
3109-
pub fn readlinkatW(dirfd: fd_t, file_path: []const u16, out_buffer: []u8) ReadLinkError![]u8 {
3135+
pub fn readlinkatW(dirfd: fd_t, file_path: []const u16, out_buffer: []u16) ReadLinkError![]u16 {
31103136
return windows.ReadLink(dirfd, file_path, out_buffer);
31113137
}
31123138

31133139
/// Same as `readlinkat` except `file_path` is null-terminated.
31143140
/// See also `readlinkat`.
31153141
pub fn readlinkatZ(dirfd: fd_t, file_path: [*:0]const u8, out_buffer: []u8) ReadLinkError![]u8 {
31163142
if (native_os == .windows) {
3117-
const file_path_w = try windows.cStrToPrefixedFileW(dirfd, file_path);
3118-
return readlinkatW(dirfd, file_path_w.span(), out_buffer);
3143+
var file_path_w = try windows.cStrToPrefixedFileW(dirfd, file_path);
3144+
const result_w = try readlinkatW(dirfd, file_path_w.span(), &file_path_w.data);
3145+
3146+
const len = std.unicode.calcWtf8Len(result_w);
3147+
if (len > out_buffer.len) return error.NameTooLong;
3148+
3149+
const end_index = std.unicode.wtf16LeToWtf8(out_buffer, result_w);
3150+
return out_buffer[0..end_index];
31193151
} else if (native_os == .wasi and !builtin.link_libc) {
31203152
return readlinkat(dirfd, mem.sliceTo(file_path, 0), out_buffer);
31213153
}

0 commit comments

Comments
 (0)