Skip to content

Commit 3f65706

Browse files
committed
Resolved bad end stream checks that slowed stream object reading.
1 parent dc61080 commit 3f65706

File tree

1 file changed

+62
-13
lines changed

1 file changed

+62
-13
lines changed

src/PdfSharp/Pdf.IO/Lexer.cs

Lines changed: 62 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -230,23 +230,72 @@ public byte[] ReadStream(int length)
230230
else
231231
pos = _idxChar + 1;
232232

233-
// Verify stream length and resolve if bad
234-
string post_stream = ReadRawString(pos + length, ("endstream").Length);
235-
if (post_stream != "endstream")
236-
{
233+
// Producer:
234+
// Problem: Incorrect stream length
235+
// Fix: Find the endstream keyword and measure the length
236+
// https://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/PDF32000_2008.pdf 7.3.8
237+
238+
// Producer:
239+
// Problem: Not all pdf producers add a eol marker before endstream
240+
// Fix: double check for endstream without the eol marker
241+
// https://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/PDF32000_2008.pdf 7.3.8
242+
243+
// Producer:
244+
// Problem: Some pdf producers replace the eol marker with a carriage return
245+
// Fix: double check for endstream without the eol marker
246+
// https://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/PDF32000_2008.pdf 7.3.8
247+
248+
// Verify stream length and resolve if bad
249+
string nendstream = $"{'\n'}endstream";
250+
string rendstream = $"{'\r'}endstream";
251+
string endstream = "endstream";
252+
253+
string postStream = ReadRawString(pos + length, nendstream.Length);
254+
255+
bool bValid = postStream == nendstream ||
256+
postStream == rendstream ||
257+
postStream.StartsWith(endstream); // Not all pdf producers add a eol marker before endstream
258+
259+
if (!bValid)
260+
{
261+
string[] endstreamValues = { nendstream, rendstream, endstream };
262+
263+
int IndexOfEndStream(string val)
264+
{
265+
// Find the smallest value
266+
int offset = -1;
267+
268+
foreach (var es in endstreamValues)
269+
{
270+
int o = val.IndexOf(es, StringComparison.Ordinal);
271+
if (o < offset || offset == -1)
272+
{
273+
offset = o;
274+
}
275+
}
276+
277+
return offset;
278+
}
279+
280+
237281
// find the first endstream occurrence
238282
// first check to see if it is within the specified stream length.
239-
int endstream_idx = post_stream.IndexOf("endstream", StringComparison.Ordinal);
240-
if (endstream_idx == -1)
241-
{
242-
post_stream = ReadRawString(pos, _pdfLength - pos);
243-
endstream_idx = post_stream.IndexOf("endstream", StringComparison.Ordinal);
244-
}
283+
int idxOffset = IndexOfEndStream(postStream);
284+
if (idxOffset != -1)
285+
{
286+
length = length + idxOffset;
287+
}
245288

246-
if (endstream_idx != -1)
289+
if (idxOffset == -1)
247290
{
248-
length = endstream_idx;
249-
}
291+
// TODO:: read in chunks
292+
postStream = ReadRawString(pos, _pdfLength - pos);
293+
idxOffset = IndexOfEndStream(postStream);
294+
if (idxOffset != -1)
295+
{
296+
length = idxOffset;
297+
}
298+
}
250299
}
251300

252301
_pdfSteam.Position = pos;

0 commit comments

Comments
 (0)