Skip to content

Commit bd06987

Browse files
pthrasherWerWolv
andauthored
patterns/zip: Add zip64 support (#395)
Co-authored-by: Nik <werwolv98@gmail.com>
1 parent 74e0862 commit bd06987

File tree

1 file changed

+130
-41
lines changed

1 file changed

+130
-41
lines changed

patterns/zip.hexpat

Lines changed: 130 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -4,20 +4,55 @@
44

55
import std.mem;
66
import std.math;
7+
import std.core;
78
import type.time;
89

9-
struct EndOfCentralDirectory {
10-
u32 headerSignature [[comment("EoCD magic"), name("EoCD PK\\5\\6")]];
11-
u16 diskNum [[comment("Number of this disk "), name("Disk Number")]];
12-
u16 diskStart [[comment("Disk where central directory starts "), name("Central Directory Disk Number")]];
13-
u16 CDRCount [[comment("Number of central directory records on this disk"), name("Central Directory Entries")]];
14-
u16 CentralDirectoryRecordCount [[comment("Total number of entries in the central directory"), name("Total Central Directory Entries")]];
15-
u32 CDSize [[comment("Size of central directory (bytes)"), name("Central Directory Size")]];
16-
u32 CDOffset [[comment("Offset of start of central directory, relative to start of archive"), name("Central Directory Offset")]];
17-
u16 commentLength [[color("00000000")]];
18-
char coment[commentLength] [[name("Comment")]];
10+
11+
using CentralDirectoryFileHeader;
12+
13+
14+
struct EOCD64Locator {
15+
u32 headerSignature [[comment("EoCD magic"), name("EoCD PK\\6\\7")]];
16+
u32 cdrDisk [[comment("Disk number containing the end of central directory record"), name("CDR Disk")]];
17+
u64 eocdOffset [[comment("Offset of end of central directory record"), name("End of Central Directory Record Offset")]];
18+
u32 totalDisks [[comment("Total number of disks"), name("Total Disks")]];
1919
};
2020

21+
struct EndOfCentralDirectory {
22+
u32 magic;
23+
24+
if (magic == 0x06064b50) {
25+
u64 eocdSize [[comment("Size of fixed fields + size of variable data - 12"), name("EOCD Size")]];
26+
u16 madeByVersion [[comment("The version of zip this was authored by"), name("Made By Version")]];
27+
u16 versionNeeded [[comment("The minimum supported ZIP version needed to extract the file"), name("Version Needed")]];
28+
u32 diskNum [[comment("number of this disk"), name("Disk Number")]];
29+
u32 diskStart [[comment("Disk where central directory starts "), name("Central Directory Disk Number")]];
30+
u64 CDRCount [[comment("Number of central directory records on this disk"), name("Central Directory Entries")]];
31+
u64 CentralDirectoryRecordCount [[comment("Total number of entries in the central directory"), name("Total Central Directory Entries")]];
32+
u64 CDSize [[comment("Size of central directory (bytes)"), name("Central Directory Size")]];
33+
u64 CDOffset [[comment("Offset of start of central directory, relative to start of archive"), name("Central Directory Offset")]];
34+
char extra[eocdSize - 44] [[comment("zip64 extensible data sector"), name("Extra Data")]];
35+
EOCD64Locator locator [[name("EOCD Locator")]];
36+
char eocd32[20] [[name("EOCD32")]];
37+
u16 commentLength [[color("00000000")]];
38+
char coment[commentLength] [[name("Comment")]];
39+
CentralDirectoryFileHeader centralDirHeaders[CDRCount] @ (CDOffset) [[name("Files")]];
40+
} else if (magic == 0x06054B50) {
41+
u16 diskNum [[comment("Number of this disk "), name("Disk Number")]];
42+
u16 diskStart [[comment("Disk where central directory starts "), name("Central Directory Disk Number")]];
43+
u16 CDRCount [[comment("Number of central directory records on this disk"), name("Central Directory Entries")]];
44+
u16 CentralDirectoryRecordCount [[comment("Total number of entries in the central directory"), name("Total Central Directory Entries")]];
45+
u32 CDSize [[comment("Size of central directory (bytes)"), name("Central Directory Size")]];
46+
u32 CDOffset [[comment("Offset of start of central directory, relative to start of archive"), name("Central Directory Offset")]];
47+
u16 commentLength [[color("00000000")]];
48+
char coment[commentLength] [[name("Comment")]];
49+
if (CDOffset != 0xFFFFFFFF && CentralDirectoryRecordCount != 0xFFFF) {
50+
CentralDirectoryFileHeader centralDirHeaders[CDRCount] @ (CDOffset) [[name("Files")]];
51+
}
52+
} else {
53+
std::error("Invalid EOCD magic!");
54+
}
55+
};
2156

2257
namespace extra {
2358

@@ -59,11 +94,24 @@ namespace extra {
5994
}
6095
};
6196

97+
struct ZIP64_SizeInfo {
98+
u64 uncompressedSize;
99+
u64 compressedSize;
100+
if (parent.TSize > 16){
101+
u64 localHeaderOffset;;
102+
}
103+
if (parent.TSize > 24){
104+
u32 diskStartNumber;
105+
}
106+
};
107+
62108
struct ExtraField {
63109
u16 tag;
64110
u16 TSize;
65111

66-
if (tag == 0x5455) {
112+
if (tag == 0x0001) {
113+
extra::ZIP64_SizeInfo ZIP64_SizeInfo;
114+
} else if (tag == 0x5455) {
67115
// 0x5455 needs parsed with TSize in context to prevent overshoot from mismatched TSize/flags set
68116
UTFlags Flags;
69117
u64 extraEndFromFlags = $ + 4*(Flags.modification_time_set + Flags.access_time_set + Flags.creation_time_set);
@@ -102,39 +150,41 @@ namespace extra {
102150
}
103151

104152
fn find_eocd() {
105-
// If there is no zip comment, which is the common case,
106-
// the end-of-central-directory record will be 22 bytes long
107-
// at the end of the file; check if size-22 has the signature.
108-
if (std::mem::read_unsigned(std::mem::size()-22, 4, std::mem::Endian::Little) == 0x06054B50) {
109-
return std::mem::size()-22;
110-
} else {
111-
// If it's not there, then there's probably a zip comment;
112-
// search the last 64KB of the file for the signature.
113-
u128 offset_search_from = std::math::max(0, std::mem::size()-65536-22);
114-
u128 prev_address;
115-
while(1){
116-
s128 current_address = std::mem::find_sequence_in_range(0, offset_search_from, std::mem::size(), 0x50,0x4B,0x05,0x06);
117-
118-
//Reached EOF and did not find valid eocd.
119-
if (current_address == -1) {
120-
std::error("Could not find EOCD.");
121-
}
153+
u128 offset_search_from = std::math::max(0, std::mem::size()-65536);
154+
u128 prev_address;
155+
while(1){
156+
s128 current_address = std::mem::find_sequence_in_range(0, offset_search_from, std::mem::size(), 0x50,0x4B,0x05,0x06);
122157

123-
//Potential eocd found. Create a eocd struct
124-
EndOfCentralDirectory EOCD @ current_address;
158+
//Reached EOF and did not find valid eocd.
159+
if (current_address == -1) {
160+
std::error("Could not find EOCD.");
161+
}
162+
163+
//Potential eocd found. Create a eocd struct
164+
EndOfCentralDirectory EOCD32 @ current_address;
125165

166+
if (EOCD32.CDOffset == 0xFFFFFFFF || EOCD32.CentralDirectoryRecordCount == 0xFFFF) {
167+
// this is a zip64 file
168+
if (std::mem::read_unsigned(current_address - 20, 4, std::mem::Endian::Little) == 0x07064B50){
169+
EOCD64Locator locator @ current_address - 20;
170+
EndOfCentralDirectory EOCD64 @ locator.eocdOffset;
171+
//If central directory file header is valid, then we know the eocd offset is valid.
172+
if (std::mem::read_unsigned(EOCD64.CDOffset, 4, std::mem::Endian::Little) == 0x2014B50){
173+
return locator.eocdOffset;
174+
}
175+
}
176+
} else {
126177
//If central directory file header is valid, then we know the eocd offset is valid.
127-
if (std::mem::read_unsigned(EOCD.CDOffset, 4, std::mem::Endian::Little) == 0x2014B50){
178+
if (std::mem::read_unsigned(EOCD32.CDOffset, 4, std::mem::Endian::Little) == 0x2014B50){
128179
return current_address;
129180
}
130-
131-
offset_search_from = current_address + 1;
132-
prev_address = current_address;
133181
}
182+
183+
offset_search_from = current_address + 1;
184+
prev_address = current_address;
134185
}
135186
};
136187

137-
EndOfCentralDirectory fileInfo @ find_eocd() [[name("End of Central Directory Record")]];
138188

139189
enum CompressionMethod : u16 {
140190
None = 0, // The file is stored (no compression)
@@ -181,6 +231,7 @@ bitfield GeneralPurposeBitFlags {
181231
reservedPKWARE_1 : 2;
182232
};
183233

234+
184235
struct LocalFileHeader {
185236
u32 headerSignature [[name("LCF PK\\3\\4")]];
186237
u16 version [[ comment("The minimum supported ZIP specification version needed to extract the file") ]];
@@ -197,12 +248,32 @@ struct LocalFileHeader {
197248
u64 extraEnd = $ + extraFieldLength;
198249
extra::ExtraField extraFields[while (extra::has_extra_field(extraEnd))] [[comment("Extra Fields")]];
199250
padding[extraEnd - $];
200-
u8 data[compressedSize] [[name("File Data")]];
251+
u8 data[get_file_data_size(compressionMethod, compressedSize, uncompressedSize, extraFields)] [[name("File Data")]];
201252
};
202253

203-
union File {
204-
u32 fileOffset [[comment("Offset of local file header, relative to the start of the first disk on which the file occurs.")]];
205-
LocalFileHeader *fileHeader : u32;
254+
fn get_file_data_size(CompressionMethod compressionMethod, u32 compressedSize, u32 uncompressedSize, ref extra::ExtraField extraFields) {
255+
u32 size = 0;
256+
if (compressionMethod == CompressionMethod::None) {
257+
size = uncompressedSize;
258+
} else {
259+
size = compressedSize;
260+
}
261+
262+
if (size != 0xFFFFFFFF) {
263+
return size;
264+
}
265+
266+
u32 extraSize = std::core::member_count(extraFields);
267+
for (u32 i = 0, i < extraSize, i += 1) {
268+
if (extraFields[i].tag == 0x0001) {
269+
if (compressionMethod == CompressionMethod::None) {
270+
return extraFields[i].ZIP64_SizeInfo.uncompressedSize;
271+
} else {
272+
return extraFields[i].ZIP64_SizeInfo.compressedSize;
273+
}
274+
}
275+
}
276+
return 0;
206277
};
207278

208279
struct CentralDirectoryFileHeader {
@@ -222,12 +293,30 @@ struct CentralDirectoryFileHeader {
222293
u16 diskNumber [[comment("Disk number where file starts")]];
223294
u16 internalFileAttributes;
224295
u32 externalFileAttributes;
225-
File file;
296+
u32 localHeaderOffset;
226297
char fileName[fileNameLength];
227298
u64 extraEnd = $ + extraFieldLength;
228299
extra::ExtraField extraFields[while (extra::has_extra_field(extraEnd))] [[comment("Extra Fields")]];
229300
padding[extraEnd - $];
230301
char comment[fileCommentLength] @ extraEnd;
302+
LocalFileHeader localFileHeader @ get_local_header_offset(localHeaderOffset, extraFields) [[name("Local File Header")]];
303+
};
304+
305+
fn get_local_header_offset(u32 localHeaderOffset, ref extra::ExtraField extraFields) {
306+
u32 size = 0;
307+
if (localHeaderOffset != 0xFFFFFFFF) {
308+
return localHeaderOffset;
309+
}
310+
311+
u32 extraSize = std::core::member_count(extraFields);
312+
for (u32 i = 0, i < extraSize, i += 1) {
313+
if (extraFields[i].tag == 0x0001) {
314+
return extraFields[i].ZIP64_SizeInfo.localHeaderOffset;
315+
}
316+
}
317+
318+
std::error("No valid local header offset found!");
231319
};
232320

233-
CentralDirectoryFileHeader centralDirHeaders[fileInfo.CDRCount] @ (fileInfo.CDOffset) [[name("Files")]];
321+
322+
EndOfCentralDirectory fileInfo @ find_eocd() [[name("End of Central Directory Record")]];

0 commit comments

Comments
 (0)