Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 69 additions & 1 deletion src/PdfSharp/Pdf.IO/Lexer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,57 @@ public byte[] ReadStream(int length)
else
pos = _idxChar + 1;

_pdfSteam.Position = pos;
// Producer:
// Problem: Incorrect stream length
// Fix: Find the endstream keyword and measure the length
// https://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/PDF32000_2008.pdf 7.3.8

// Producer:
// Problem: Not all pdf producers add a eol marker before endstream
// Fix: double check for endstream without the eol marker
// https://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/PDF32000_2008.pdf 7.3.8

// Producer:
// Problem: Some pdf producers replace the eol marker with a carriage return
// Fix: double check for endstream without the eol marker
// https://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/PDF32000_2008.pdf 7.3.8

// Verify stream length and resolve if bad
string nendstream = "\nendstream";
string rendstream = "\rendstream";
string rnendstream = "\r\nendstream";
string endstream = "endstream";

string postStream = ReadRawString(pos + length, rnendstream.Length);

bool bValid = postStream.StartsWith(nendstream) ||
postStream.StartsWith(rendstream) ||
postStream.StartsWith(rnendstream) ||
postStream.StartsWith(endstream); // Not all pdf producers add a eol marker before endstream

if (!bValid)
{
// find the first endstream occurrence
// first check to see if it is within the specified stream length.
int idxOffset = IndexOfEndStream(postStream);
if (idxOffset != -1)
{
length = length + idxOffset;
}

if (idxOffset == -1)
{
// TODO:: read in chunks
postStream = ReadRawString(pos, _pdfLength - pos);
idxOffset = IndexOfEndStream(postStream);
if (idxOffset != -1)
{
length = idxOffset;
}
}
}

_pdfSteam.Position = pos;
byte[] bytes = new byte[length];
int read = _pdfSteam.Read(bytes, 0, length);
Debug.Assert(read == length);
Expand All @@ -205,6 +255,24 @@ public byte[] ReadStream(int length)
return bytes;
}

private static readonly string[] endstreamValues = { "\nendstream", "\rendstream", "endstream" };
private int IndexOfEndStream(string val)
{
// Find the smallest value
int offset = -1;

foreach (var es in endstreamValues)
{
int o = val.IndexOf(es, StringComparison.Ordinal);
if (o < offset || offset == -1)
{
offset = o;
}
}

return offset;
}

/// <summary>
/// Reads a string in raw encoding.
/// </summary>
Expand Down
43 changes: 9 additions & 34 deletions src/PdfSharp/Pdf.IO/Parser.cs
Original file line number Diff line number Diff line change
Expand Up @@ -266,41 +266,8 @@ public PdfObject ReadObject(PdfObject pdfObject, PdfObjectID objectID, bool incl
{
PdfDictionary dict = (PdfDictionary)pdfObject;
Debug.Assert(checkForStream, "Unexpected stream...");
#if true_
ReadStream(dict);
#else
int length = GetStreamLength(dict);
byte[] bytes = _lexer.ReadStream(length);
#if true_
if (dict.Elements.GetString("/Filter") == "/FlateDecode")
{
if (dict.Elements["/Subtype"] == null)
{
try
{
byte[] decoded = Filtering.FlateDecode.Decode(bytes);
if (decoded.Length == 0)
goto End;
string pageContent = Filtering.FlateDecode.DecodeToString(bytes);
if (pageContent.Length > 100)
pageContent = pageContent.Substring(pageContent.Length - 100);
pageContent.GetType();
bytes = decoded;
dict.Elements.Remove("/Filter");
dict.Elements.SetInteger("/Length", bytes.Length);
}
catch
{
}
}
End: ;
}
#endif
PdfDictionary.PdfStream stream = new PdfDictionary.PdfStream(bytes, dict);
dict.Stream = stream;
ReadSymbol(Symbol.EndStream);
symbol = ScanNextToken();
#endif
symbol = _lexer.Symbol;
}
if (!fromObjecStream && symbol != Symbol.EndObj)
ParserDiagnostics.ThrowParserException(PSSR.UnexpectedToken(_lexer.Token));
Expand All @@ -316,8 +283,16 @@ private void ReadStream(PdfDictionary dict)
{
Symbol symbol = _lexer.Symbol;
Debug.Assert(symbol == Symbol.BeginStream);

int length = GetStreamLength(dict);
byte[] bytes = _lexer.ReadStream(length);

if (bytes.Length != length)
{
// The file is corrupted, but still readable.
dict.Elements["/Length"] = new PdfInteger(bytes.Length);
}

PdfDictionary.PdfStream stream = new PdfDictionary.PdfStream(bytes, dict);
Debug.Assert(dict.Stream == null, "Dictionary already has a stream.");
dict.Stream = stream;
Expand Down