4646 */
4747public abstract class CompressedAnalyzer extends FileAnalyzer {
4848
49+ private static final int CHUNK_SIZE = 8 * 1024 ;
50+
4951 protected Genre g ;
5052
5153 @ Override
@@ -93,10 +95,17 @@ private boolean meetsHugeTextThreshold(StreamSource compressedSrc) throws IOExce
9395 return false ;
9496 }
9597
96- byte [] buf = new byte [8 * 1024 ];
97- int bytesRead = 0 ;
98- int n ;
9998 try (InputStream in = compressedSrc .getStream ()) {
99+ // Try skip first.
100+ SkipResult result = meetsHugeTextThresholdBySkip (in , hugeTextThresholdBytes );
101+ if (result .didMeet ) {
102+ return true ;
103+ }
104+
105+ // Even if some skipped, only read==-1 is a true indicator of EOF.
106+ long bytesRead = result .bytesSkipped ;
107+ byte [] buf = new byte [CHUNK_SIZE ];
108+ long n ;
100109 while ((n = in .read (buf , 0 , buf .length )) != -1 ) {
101110 bytesRead += n ;
102111 if (bytesRead >= hugeTextThresholdBytes ) {
@@ -106,4 +115,30 @@ private boolean meetsHugeTextThreshold(StreamSource compressedSrc) throws IOExce
106115 }
107116 return false ;
108117 }
118+
119+ private SkipResult meetsHugeTextThresholdBySkip (InputStream in , int hugeTextThresholdBytes ) {
120+ long bytesSkipped = 0 ;
121+ long n ;
122+ try {
123+ while ((n = in .skip (CHUNK_SIZE )) > 0 ) {
124+ bytesSkipped += n ;
125+ if (bytesSkipped >= hugeTextThresholdBytes ) {
126+ return new SkipResult (bytesSkipped , true );
127+ }
128+ }
129+ } catch (IOException ignored ) {
130+ // Ignore and assume not capable of skip.
131+ }
132+ return new SkipResult (bytesSkipped , false );
133+ }
134+
135+ private static class SkipResult {
136+ final long bytesSkipped ;
137+ final boolean didMeet ;
138+
139+ SkipResult (long bytesSkipped , boolean didMeet ) {
140+ this .bytesSkipped = bytesSkipped ;
141+ this .didMeet = didMeet ;
142+ }
143+ }
109144}
0 commit comments