From 4efdd298fdedb4e724dbeff6e73498141af2a8a4 Mon Sep 17 00:00:00 2001
From: Ben Callaghan <bcallaghan@etogy.com>
Date: Wed, 24 Jul 2019 16:45:18 -0600
Subject: [PATCH 1/3] Verify stream length when reading

---
 src/PdfSharp/Pdf.IO/Lexer.cs | 70 +++++++++++++++++++++++++++++++++++-
 1 file changed, 69 insertions(+), 1 deletion(-)
diff --git a/src/PdfSharp/Pdf.IO/Lexer.cs b/src/PdfSharp/Pdf.IO/Lexer.cs
index 5bff4193..b7abe0e5 100644
--- a/src/PdfSharp/Pdf.IO/Lexer.cs
+++ b/src/PdfSharp/Pdf.IO/Lexer.cs
@@ -190,7 +190,57 @@ public byte[] ReadStream(int length)
             else
                 pos = _idxChar + 1;
 
-            _pdfSteam.Position = pos;
+            // Producer: 
+            // Problem: Incorrect stream length
+            // Fix: Find the endstream keyword and measure the length
+            // https://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/PDF32000_2008.pdf 7.3.8
+
+            // Producer: 
+            // Problem: Not all pdf producers add a eol marker before endstream
+            // Fix: double check for endstream without the eol marker
+            // https://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/PDF32000_2008.pdf 7.3.8
+
+            // Producer: 
+            // Problem: Some pdf producers replace the eol marker with a carriage return
+            // Fix: double check for endstream without the eol marker
+            // https://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/PDF32000_2008.pdf 7.3.8
+
+            // Verify stream length and resolve if bad
+            string nendstream = "\nendstream";
+            string rendstream = "\rendstream";
+            string rnendstream = "\r\nendstream";
+            string endstream = "endstream";
+
+			      string postStream = ReadRawString(pos + length, rnendstream.Length);
+
+            bool bValid = postStream.StartsWith(nendstream) ||
+                          postStream.StartsWith(rendstream) ||
+                          postStream.StartsWith(rnendstream) ||
+                          postStream.StartsWith(endstream); // Not all pdf producers add a eol marker before endstream
+
+            if (!bValid)
+            {
+				        // find the first endstream occurrence
+				        // first check to see if it is within the specified stream length.
+                int idxOffset = IndexOfEndStream(postStream);
+                if (idxOffset != -1)
+                {
+                    length = length + idxOffset;
+                }
+
+                if (idxOffset == -1)
+				        {
+                    // TODO:: read in chunks
+					          postStream = ReadRawString(pos, _pdfLength - pos);
+                    idxOffset = IndexOfEndStream(postStream);
+                    if (idxOffset != -1)
+                    {
+                        length = idxOffset;
+                    }
+                }
+			      }
+
+			      _pdfSteam.Position = pos;
             byte[] bytes = new byte[length];
             int read = _pdfSteam.Read(bytes, 0, length);
             Debug.Assert(read == length);
@@ -205,6 +255,24 @@ public byte[] ReadStream(int length)
             return bytes;
         }
 
+        private static readonly string[] endstreamValues = { "\nendstream", "\rendstream", "endstream" };
+        private int IndexOfEndStream(string val)
+        {
+            // Find the smallest value
+            int offset = -1;
+
+            foreach (var es in endstreamValues)
+            {
+                int o = val.IndexOf(es, StringComparison.Ordinal);
+                if (o < offset || offset == -1)
+                {
+                  offset = o;
+                }
+            }
+
+            return offset;
+        }
+
         /// <summary>
         /// Reads a string in raw encoding.
         /// </summary>

From b6f478acb0ebf154be463bd63459c8d8bb79a91a Mon Sep 17 00:00:00 2001
From: Ben Callaghan <bcallaghan@etogy.com>
Date: Wed, 24 Jul 2019 16:48:46 -0600
Subject: [PATCH 2/3] Simplified ReadStream

---
 src/PdfSharp/Pdf.IO/Parser.cs | 35 +----------------------------------
 1 file changed, 1 insertion(+), 34 deletions(-)

diff --git a/src/PdfSharp/Pdf.IO/Parser.cs b/src/PdfSharp/Pdf.IO/Parser.cs
index 07b353a9..ef63cfa5 100644
--- a/src/PdfSharp/Pdf.IO/Parser.cs
+++ b/src/PdfSharp/Pdf.IO/Parser.cs
@@ -266,41 +266,8 @@ public PdfObject ReadObject(PdfObject pdfObject, PdfObjectID objectID, bool incl
             {
                 PdfDictionary dict = (PdfDictionary)pdfObject;
                 Debug.Assert(checkForStream, "Unexpected stream...");
-#if true_
                 ReadStream(dict);
-#else
-                int length = GetStreamLength(dict);
-                byte[] bytes = _lexer.ReadStream(length);
-#if true_
-                if (dict.Elements.GetString("/Filter") == "/FlateDecode")
-                {
-                    if (dict.Elements["/Subtype"] == null)
-                    {
-                        try
-                        {
-                            byte[] decoded = Filtering.FlateDecode.Decode(bytes);
-                            if (decoded.Length == 0)
-                                goto End;
-                            string pageContent = Filtering.FlateDecode.DecodeToString(bytes);
-                            if (pageContent.Length > 100)
-                                pageContent = pageContent.Substring(pageContent.Length - 100);
-                            pageContent.GetType();
-                            bytes = decoded;
-                            dict.Elements.Remove("/Filter");
-                            dict.Elements.SetInteger("/Length", bytes.Length);
-                        }
-                        catch
-                        {
-                        }
-                    }
-                End: ;
-                }
-#endif
-                PdfDictionary.PdfStream stream = new PdfDictionary.PdfStream(bytes, dict);
-                dict.Stream = stream;
-                ReadSymbol(Symbol.EndStream);
-                symbol = ScanNextToken();
-#endif
+                symbol = _lexer.Symbol;
             }
             if (!fromObjecStream && symbol != Symbol.EndObj)
                 ParserDiagnostics.ThrowParserException(PSSR.UnexpectedToken(_lexer.Token));

From 7c662b28dcd8cd789b90d6a7e1ccd5f13b6c9c89 Mon Sep 17 00:00:00 2001
From: Ben Callaghan <bcallaghan@etogy.com>
Date: Wed, 24 Jul 2019 17:16:32 -0600
Subject: [PATCH 3/3] Automatically fix incorrect stream length

---
 src/PdfSharp/Pdf.IO/Parser.cs | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/src/PdfSharp/Pdf.IO/Parser.cs b/src/PdfSharp/Pdf.IO/Parser.cs
index ef63cfa5..cb3af9a4 100644
--- a/src/PdfSharp/Pdf.IO/Parser.cs
+++ b/src/PdfSharp/Pdf.IO/Parser.cs
@@ -283,8 +283,16 @@ private void ReadStream(PdfDictionary dict)
         {
             Symbol symbol = _lexer.Symbol;
             Debug.Assert(symbol == Symbol.BeginStream);
+
             int length = GetStreamLength(dict);
             byte[] bytes = _lexer.ReadStream(length);
+
+            if (bytes.Length != length)
+            {
+                // The file is corrupted, but still readable.
+                dict.Elements["/Length"] = new PdfInteger(bytes.Length);
+            }
+
             PdfDictionary.PdfStream stream = new PdfDictionary.PdfStream(bytes, dict);
             Debug.Assert(dict.Stream == null, "Dictionary already has a stream.");
             dict.Stream = stream;