@@ -1211,6 +1211,8 @@ private abstract static class BigQuerySourceBase extends BoundedSource<TableRow>
12111211 protected final BigQueryServices bqServices ;
12121212 protected final ValueProvider <String > executingProject ;
12131213
1214+ private List <BoundedSource <TableRow >> cachedSplitResult ;
1215+
12141216 private BigQuerySourceBase (
12151217 String jobIdToken ,
12161218 String extractDestinationDir ,
@@ -1225,19 +1227,30 @@ private BigQuerySourceBase(
12251227 @ Override
12261228 public List <BoundedSource <TableRow >> splitIntoBundles (
12271229 long desiredBundleSizeBytes , PipelineOptions options ) throws Exception {
1228- BigQueryOptions bqOptions = options .as (BigQueryOptions .class );
1229- TableReference tableToExtract = getTableToExtract (bqOptions );
1230- JobService jobService = bqServices .getJobService (bqOptions );
1231- String extractJobId = getExtractJobId (jobIdToken );
1232- List <String > tempFiles = executeExtract (extractJobId , tableToExtract , jobService );
1233-
1234- TableSchema tableSchema = bqServices .getDatasetService (bqOptions ).getTable (
1235- tableToExtract .getProjectId (),
1236- tableToExtract .getDatasetId (),
1237- tableToExtract .getTableId ()).getSchema ();
1238-
1239- cleanupTempResource (bqOptions );
1240- return createSources (tempFiles , tableSchema );
1230+ // splitIntoBundles() can be called multiple times, e.g. Dataflow runner may call it multiple
1231+ // times with different desiredBundleSizeBytes in case the splitIntoBundles() call produces
1232+ // too many sources. We ignore desiredBundleSizeBytes anyway, however in any case, we should
1233+ // not initiate another BigQuery extract job for the repeated splitIntoBundles() calls.
1234+ if (cachedSplitResult == null ) {
1235+ BigQueryOptions bqOptions = options .as (BigQueryOptions .class );
1236+ TableReference tableToExtract = getTableToExtract (bqOptions );
1237+ JobService jobService = bqServices .getJobService (bqOptions );
1238+ String extractJobId = getExtractJobId (jobIdToken );
1239+ List <String > tempFiles = executeExtract (extractJobId , tableToExtract , jobService );
1240+
1241+ TableSchema tableSchema =
1242+ bqServices
1243+ .getDatasetService (bqOptions )
1244+ .getTable (
1245+ tableToExtract .getProjectId (),
1246+ tableToExtract .getDatasetId (),
1247+ tableToExtract .getTableId ())
1248+ .getSchema ();
1249+
1250+ cleanupTempResource (bqOptions );
1251+ cachedSplitResult = createSources (tempFiles , tableSchema );
1252+ }
1253+ return cachedSplitResult ;
12411254 }
12421255
12431256 protected abstract TableReference getTableToExtract (BigQueryOptions bqOptions ) throws Exception ;
0 commit comments