@@ -66,38 +66,35 @@ class StateDataSource extends TableProvider with DataSourceRegister with Logging
6666 val sourceOptions = StateSourceOptions .modifySourceOptions(hadoopConf,
6767 StateSourceOptions .apply(session, hadoopConf, properties))
6868 val stateConf = buildStateStoreConf(sourceOptions.resolvedCpLocation, sourceOptions.batchId)
69- if (sourceOptions.readAllColumnFamilies) {
70- // For readAllColumnFamilies mode, we don't need specific encoder because it returns raw data
71- val keyStateEncoderSpec = NoPrefixKeyStateEncoderSpec (new StructType ())
72- new StateTable (session, schema, sourceOptions, stateConf, keyStateEncoderSpec,
73- None , None , None , None )
74- } else {
75- val stateStoreReaderInfo : StateStoreReaderInfo = getStoreMetadataAndRunChecks(
76- sourceOptions)
69+ if (sourceOptions.internalOnlyReadAllColumnFamilies
70+ && ! stateConf.providerClass.contains(" RocksDB" )) {
71+ throw StateDataSourceErrors .invalidOptionValue(
72+ StateSourceOptions .INTERNAL_ONLY_READ_ALL_COLUMN_FAMILIES ,
73+ " internalOnlyReadAllColumnFamilies is only supported with RocksDBStateStoreProvider. " +
74+ s " Current provider: ${stateConf.providerClass}" )
75+ }
76+ val stateStoreReaderInfo : StateStoreReaderInfo = getStoreMetadataAndRunChecks(
77+ sourceOptions)
7778
78- // The key state encoder spec should be available for all operators except stream-stream joins
79- val keyStateEncoderSpec = if (stateStoreReaderInfo.keyStateEncoderSpecOpt.isDefined) {
80- stateStoreReaderInfo.keyStateEncoderSpecOpt.get
81- } else {
82- val keySchema = SchemaUtil .getSchemaAsDataType(schema, " key" ).asInstanceOf [StructType ]
83- NoPrefixKeyStateEncoderSpec (keySchema)
84- }
85- new StateTable (session, schema, sourceOptions, stateConf, keyStateEncoderSpec,
86- stateStoreReaderInfo.transformWithStateVariableInfoOpt,
87- stateStoreReaderInfo.stateStoreColFamilySchemaOpt,
88- stateStoreReaderInfo.stateSchemaProviderOpt,
89- stateStoreReaderInfo.joinColFamilyOpt)
79+ // The key state encoder spec should be available for all operators except stream-stream joins
80+ val keyStateEncoderSpec = if (stateStoreReaderInfo.keyStateEncoderSpecOpt.isDefined) {
81+ stateStoreReaderInfo.keyStateEncoderSpecOpt.get
82+ } else {
83+ val keySchema = SchemaUtil .getSchemaAsDataType(schema, " key" ).asInstanceOf [StructType ]
84+ NoPrefixKeyStateEncoderSpec (keySchema)
9085 }
86+
87+ new StateTable (session, schema, sourceOptions, stateConf, keyStateEncoderSpec,
88+ stateStoreReaderInfo.transformWithStateVariableInfoOpt,
89+ stateStoreReaderInfo.stateStoreColFamilySchemaOpt,
90+ stateStoreReaderInfo.stateSchemaProviderOpt,
91+ stateStoreReaderInfo.joinColFamilyOpt)
9192 }
9293
9394 override def inferSchema (options : CaseInsensitiveStringMap ): StructType = {
9495 val sourceOptions = StateSourceOptions .modifySourceOptions(hadoopConf,
9596 StateSourceOptions .apply(session, hadoopConf, options))
96- if (sourceOptions.readAllColumnFamilies) {
97- // For readAllColumnFamilies mode, return the binary schema directly
98- return SchemaUtil .getSourceSchema(
99- sourceOptions, new StructType (), new StructType (), None , None )
100- }
97+
10198 val stateStoreReaderInfo : StateStoreReaderInfo = getStoreMetadataAndRunChecks(
10299 sourceOptions)
103100 val oldSchemaFilePaths = StateDataSource .getOldSchemaFilePaths(sourceOptions, hadoopConf)
@@ -382,7 +379,7 @@ case class StateSourceOptions(
382379 stateVarName : Option [String ],
383380 readRegisteredTimers : Boolean ,
384381 flattenCollectionTypes : Boolean ,
385- readAllColumnFamilies : Boolean ,
382+ internalOnlyReadAllColumnFamilies : Boolean ,
386383 startOperatorStateUniqueIds : Option [Array [Array [String ]]] = None ,
387384 endOperatorStateUniqueIds : Option [Array [Array [String ]]] = None ) {
388385 def stateCheckpointLocation : Path = new Path (resolvedCpLocation, DIR_NAME_STATE )
@@ -392,7 +389,7 @@ case class StateSourceOptions(
392389 s " operatorId= $operatorId, storeName= $storeName, joinSide= $joinSide, " +
393390 s " stateVarName= ${stateVarName.getOrElse(" None" )}, + " +
394391 s " flattenCollectionTypes= $flattenCollectionTypes" +
395- s " readAllColumnFamilies= $readAllColumnFamilies "
392+ s " internalOnlyReadAllColumnFamilies= $internalOnlyReadAllColumnFamilies "
396393 if (fromSnapshotOptions.isDefined) {
397394 desc += s " , snapshotStartBatchId= ${fromSnapshotOptions.get.snapshotStartBatchId}"
398395 desc += s " , snapshotPartitionId= ${fromSnapshotOptions.get.snapshotPartitionId}"
@@ -419,7 +416,7 @@ object StateSourceOptions extends DataSourceOptions {
419416 val STATE_VAR_NAME = newOption(" stateVarName" )
420417 val READ_REGISTERED_TIMERS = newOption(" readRegisteredTimers" )
421418 val FLATTEN_COLLECTION_TYPES = newOption(" flattenCollectionTypes" )
422- val READ_ALL_COLUMN_FAMILIES = newOption(" readAllColumnFamilies " )
419+ val INTERNAL_ONLY_READ_ALL_COLUMN_FAMILIES = newOption(" internalOnlyReadAllColumnFamilies " )
423420
424421 object JoinSideValues extends Enumeration {
425422 type JoinSideValues = Value
@@ -505,25 +502,28 @@ object StateSourceOptions extends DataSourceOptions {
505502
506503 val readChangeFeed = Option (options.get(READ_CHANGE_FEED )).exists(_.toBoolean)
507504
508- val readAllColumnFamilies = try {
509- Option (options.get(READ_ALL_COLUMN_FAMILIES ))
505+ val internalOnlyReadAllColumnFamilies = try {
506+ Option (options.get(INTERNAL_ONLY_READ_ALL_COLUMN_FAMILIES ))
510507 .map(_.toBoolean).getOrElse(false )
511508 } catch {
512509 case _ : IllegalArgumentException =>
513- throw StateDataSourceErrors .invalidOptionValue(READ_ALL_COLUMN_FAMILIES ,
510+ throw StateDataSourceErrors .invalidOptionValue(INTERNAL_ONLY_READ_ALL_COLUMN_FAMILIES ,
514511 " Boolean value is expected" )
515512 }
516513
517- if (readAllColumnFamilies && stateVarName.isDefined) {
518- throw StateDataSourceErrors .conflictOptions(Seq (READ_ALL_COLUMN_FAMILIES , STATE_VAR_NAME ))
514+ if (internalOnlyReadAllColumnFamilies && stateVarName.isDefined) {
515+ throw StateDataSourceErrors .conflictOptions(
516+ Seq (INTERNAL_ONLY_READ_ALL_COLUMN_FAMILIES , STATE_VAR_NAME ))
519517 }
520518
521- if (readAllColumnFamilies && joinSide != JoinSideValues .none) {
522- throw StateDataSourceErrors .conflictOptions(Seq (READ_ALL_COLUMN_FAMILIES , JOIN_SIDE ))
519+ if (internalOnlyReadAllColumnFamilies && joinSide != JoinSideValues .none) {
520+ throw StateDataSourceErrors .conflictOptions(
521+ Seq (INTERNAL_ONLY_READ_ALL_COLUMN_FAMILIES , JOIN_SIDE ))
523522 }
524523
525- if (readAllColumnFamilies && readChangeFeed) {
526- throw StateDataSourceErrors .conflictOptions(Seq (READ_ALL_COLUMN_FAMILIES , READ_CHANGE_FEED ))
524+ if (internalOnlyReadAllColumnFamilies && readChangeFeed) {
525+ throw StateDataSourceErrors .conflictOptions(
526+ Seq (INTERNAL_ONLY_READ_ALL_COLUMN_FAMILIES , READ_CHANGE_FEED ))
527527 }
528528
529529 val changeStartBatchId = Option (options.get(CHANGE_START_BATCH_ID )).map(_.toLong)
@@ -650,7 +650,7 @@ object StateSourceOptions extends DataSourceOptions {
650650 resolvedCpLocation, batchId.get, operatorId, storeName, joinSide,
651651 readChangeFeed, fromSnapshotOptions, readChangeFeedOptions,
652652 stateVarName, readRegisteredTimers, flattenCollectionTypes,
653- readAllColumnFamilies , startOperatorStateUniqueIds, endOperatorStateUniqueIds)
653+ internalOnlyReadAllColumnFamilies , startOperatorStateUniqueIds, endOperatorStateUniqueIds)
654654 }
655655
656656 private def getLastCommittedBatch (session : SparkSession , checkpointLocation : String ): Long = {
0 commit comments