@@ -230,23 +230,72 @@ public byte[] ReadStream(int length)
230230 else
231231 pos = _idxChar + 1 ;
232232
233- // Verify stream length and resolve if bad
234- string post_stream = ReadRawString ( pos + length , ( "endstream" ) . Length ) ;
235- if ( post_stream != "endstream" )
236- {
233+ // Producer:
234+ // Problem: Incorrect stream length
235+ // Fix: Find the endstream keyword and measure the length
236+ // https://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/PDF32000_2008.pdf 7.3.8
237+
238+ // Producer:
239+ // Problem: Not all pdf producers add a eol marker before endstream
240+ // Fix: double check for endstream without the eol marker
241+ // https://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/PDF32000_2008.pdf 7.3.8
242+
243+ // Producer:
244+ // Problem: Some pdf producers replace the eol marker with a carriage return
245+ // Fix: double check for endstream without the eol marker
246+ // https://www.adobe.com/content/dam/acom/en/devnet/acrobat/pdfs/PDF32000_2008.pdf 7.3.8
247+
248+ // Verify stream length and resolve if bad
249+ string nendstream = $ "{ '\n ' } endstream";
250+ string rendstream = $ "{ '\r ' } endstream";
251+ string endstream = "endstream" ;
252+
253+ string postStream = ReadRawString ( pos + length , nendstream . Length ) ;
254+
255+ bool bValid = postStream == nendstream ||
256+ postStream == rendstream ||
257+ postStream . StartsWith ( endstream ) ; // Not all pdf producers add a eol marker before endstream
258+
259+ if ( ! bValid )
260+ {
261+ string [ ] endstreamValues = { nendstream , rendstream , endstream } ;
262+
263+ int IndexOfEndStream ( string val )
264+ {
265+ // Find the smallest value
266+ int offset = - 1 ;
267+
268+ foreach ( var es in endstreamValues )
269+ {
270+ int o = val . IndexOf ( es , StringComparison . Ordinal ) ;
271+ if ( o < offset || offset == - 1 )
272+ {
273+ offset = o ;
274+ }
275+ }
276+
277+ return offset ;
278+ }
279+
280+
237281 // find the first endstream occurrence
238282 // first check to see if it is within the specified stream length.
239- int endstream_idx = post_stream . IndexOf ( "endstream" , StringComparison . Ordinal ) ;
240- if ( endstream_idx == - 1 )
241- {
242- post_stream = ReadRawString ( pos , _pdfLength - pos ) ;
243- endstream_idx = post_stream . IndexOf ( "endstream" , StringComparison . Ordinal ) ;
244- }
283+ int idxOffset = IndexOfEndStream ( postStream ) ;
284+ if ( idxOffset != - 1 )
285+ {
286+ length = length + idxOffset ;
287+ }
245288
246- if ( endstream_idx ! = - 1 )
289+ if ( idxOffset = = - 1 )
247290 {
248- length = endstream_idx ;
249- }
291+ // TODO:: read in chunks
292+ postStream = ReadRawString ( pos , _pdfLength - pos ) ;
293+ idxOffset = IndexOfEndStream ( postStream ) ;
294+ if ( idxOffset != - 1 )
295+ {
296+ length = idxOffset ;
297+ }
298+ }
250299 }
251300
252301 _pdfSteam . Position = pos ;
0 commit comments