Skip to content

Commit b2a4beb

Browse files
committed
Resolved bad end stream checks that slowed stream object reading.
1 parent dc61080 commit b2a4beb

File tree

1 file changed

+37
-10
lines changed

1 file changed

+37
-10
lines changed

src/PdfSharp/Pdf.IO/Lexer.cs

Lines changed: 37 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -230,22 +230,49 @@ public byte[] ReadStream(int length)
230230
else
231231
pos = _idxChar + 1;
232232

233-
// Verify stream length and resolve if bad
234-
string post_stream = ReadRawString(pos + length, ("endstream").Length);
235-
if (post_stream != "endstream")
233+
// Producer:
234+
// Problem: Incorrect stream length
235+
// Fix: Find the endstream keyword and measure the length
236+
// https://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/PDF32000_2008.pdf 7.3.8
237+
238+
// Producer:
239+
// Problem: Not all pdf producers add a eol marker before endstream
240+
// Fix: double check for endstream without the eol marker
241+
// https://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/PDF32000_2008.pdf 7.3.8
242+
243+
// Verify stream length and resolve if bad
244+
string nendstream = $"{'\n'}endstream";
245+
string endstream = "endstream";
246+
247+
string postStream = ReadRawString(pos + length, nendstream.Length);
248+
249+
bool bValid = postStream == nendstream ||
250+
postStream.StartsWith(endstream); // Not all pdf producers add a eol marker before endstream
251+
252+
if (!bValid)
236253
{
237254
// find the first endstream occurrence
238255
// first check to see if it is within the specified stream length.
239-
int endstream_idx = post_stream.IndexOf("endstream", StringComparison.Ordinal);
240-
if (endstream_idx == -1)
256+
int endStreamIdx = postStream.IndexOf(nendstream, StringComparison.Ordinal);
257+
if (endStreamIdx == -1)
258+
{
259+
endStreamIdx = postStream.IndexOf(endstream, StringComparison.Ordinal);
260+
}
261+
262+
if (endStreamIdx == -1)
241263
{
242-
post_stream = ReadRawString(pos, _pdfLength - pos);
243-
endstream_idx = post_stream.IndexOf("endstream", StringComparison.Ordinal);
244-
}
264+
// TODO:: read in chunks
265+
postStream = ReadRawString(pos, _pdfLength - pos);
266+
endStreamIdx = postStream.IndexOf(nendstream, StringComparison.Ordinal);
267+
if (endStreamIdx == -1)
268+
{
269+
endStreamIdx = postStream.IndexOf(endstream, StringComparison.Ordinal);
270+
}
271+
}
245272

246-
if (endstream_idx != -1)
273+
if (endStreamIdx != -1)
247274
{
248-
length = endstream_idx;
275+
length = endStreamIdx;
249276
}
250277
}
251278

0 commit comments

Comments
 (0)