@@ -65,38 +65,35 @@ class StateDataSource extends TableProvider with DataSourceRegister with Logging
6565 val sourceOptions = StateSourceOptions .modifySourceOptions(hadoopConf,
6666 StateSourceOptions .apply(session, hadoopConf, properties))
6767 val stateConf = buildStateStoreConf(sourceOptions.resolvedCpLocation, sourceOptions.batchId)
68- if (sourceOptions.readAllColumnFamilies) {
69- // For readAllColumnFamilies mode, we don't need specific encoder because it returns raw data
70- val keyStateEncoderSpec = NoPrefixKeyStateEncoderSpec (new StructType ())
71- new StateTable (session, schema, sourceOptions, stateConf, keyStateEncoderSpec,
72- None , None , None , None )
73- } else {
74- val stateStoreReaderInfo : StateStoreReaderInfo = getStoreMetadataAndRunChecks(
75- sourceOptions)
68+ if (sourceOptions.internalOnlyReadAllColumnFamilies
69+ && ! stateConf.providerClass.contains(" RocksDB" )) {
70+ throw StateDataSourceErrors .invalidOptionValue(
71+ StateSourceOptions .INTERNAL_ONLY_READ_ALL_COLUMN_FAMILIES ,
72+ " internalOnlyReadAllColumnFamilies is only supported with RocksDBStateStoreProvider. " +
73+ s " Current provider: ${stateConf.providerClass}" )
74+ }
75+ val stateStoreReaderInfo : StateStoreReaderInfo = getStoreMetadataAndRunChecks(
76+ sourceOptions)
7677
77- // The key state encoder spec should be available for all operators except stream-stream joins
78- val keyStateEncoderSpec = if (stateStoreReaderInfo.keyStateEncoderSpecOpt.isDefined) {
79- stateStoreReaderInfo.keyStateEncoderSpecOpt.get
80- } else {
81- val keySchema = SchemaUtil .getSchemaAsDataType(schema, " key" ).asInstanceOf [StructType ]
82- NoPrefixKeyStateEncoderSpec (keySchema)
83- }
84- new StateTable (session, schema, sourceOptions, stateConf, keyStateEncoderSpec,
85- stateStoreReaderInfo.transformWithStateVariableInfoOpt,
86- stateStoreReaderInfo.stateStoreColFamilySchemaOpt,
87- stateStoreReaderInfo.stateSchemaProviderOpt,
88- stateStoreReaderInfo.joinColFamilyOpt)
78+ // The key state encoder spec should be available for all operators except stream-stream joins
79+ val keyStateEncoderSpec = if (stateStoreReaderInfo.keyStateEncoderSpecOpt.isDefined) {
80+ stateStoreReaderInfo.keyStateEncoderSpecOpt.get
81+ } else {
82+ val keySchema = SchemaUtil .getSchemaAsDataType(schema, " key" ).asInstanceOf [StructType ]
83+ NoPrefixKeyStateEncoderSpec (keySchema)
8984 }
85+
86+ new StateTable (session, schema, sourceOptions, stateConf, keyStateEncoderSpec,
87+ stateStoreReaderInfo.transformWithStateVariableInfoOpt,
88+ stateStoreReaderInfo.stateStoreColFamilySchemaOpt,
89+ stateStoreReaderInfo.stateSchemaProviderOpt,
90+ stateStoreReaderInfo.joinColFamilyOpt)
9091 }
9192
9293 override def inferSchema (options : CaseInsensitiveStringMap ): StructType = {
9394 val sourceOptions = StateSourceOptions .modifySourceOptions(hadoopConf,
9495 StateSourceOptions .apply(session, hadoopConf, options))
95- if (sourceOptions.readAllColumnFamilies) {
96- // For readAllColumnFamilies mode, return the binary schema directly
97- return SchemaUtil .getSourceSchema(
98- sourceOptions, new StructType (), new StructType (), None , None )
99- }
96+
10097 val stateStoreReaderInfo : StateStoreReaderInfo = getStoreMetadataAndRunChecks(
10198 sourceOptions)
10299 val oldSchemaFilePaths = StateDataSource .getOldSchemaFilePaths(sourceOptions, hadoopConf)
@@ -381,7 +378,7 @@ case class StateSourceOptions(
381378 stateVarName : Option [String ],
382379 readRegisteredTimers : Boolean ,
383380 flattenCollectionTypes : Boolean ,
384- readAllColumnFamilies : Boolean ,
381+ internalOnlyReadAllColumnFamilies : Boolean ,
385382 startOperatorStateUniqueIds : Option [Array [Array [String ]]] = None ,
386383 endOperatorStateUniqueIds : Option [Array [Array [String ]]] = None ) {
387384 def stateCheckpointLocation : Path = new Path (resolvedCpLocation, DIR_NAME_STATE )
@@ -391,7 +388,7 @@ case class StateSourceOptions(
391388 s " operatorId= $operatorId, storeName= $storeName, joinSide= $joinSide, " +
392389 s " stateVarName= ${stateVarName.getOrElse(" None" )}, + " +
393390 s " flattenCollectionTypes= $flattenCollectionTypes" +
394- s " readAllColumnFamilies= $readAllColumnFamilies "
391+ s " internalOnlyReadAllColumnFamilies= $internalOnlyReadAllColumnFamilies "
395392 if (fromSnapshotOptions.isDefined) {
396393 desc += s " , snapshotStartBatchId= ${fromSnapshotOptions.get.snapshotStartBatchId}"
397394 desc += s " , snapshotPartitionId= ${fromSnapshotOptions.get.snapshotPartitionId}"
@@ -418,7 +415,7 @@ object StateSourceOptions extends DataSourceOptions {
418415 val STATE_VAR_NAME = newOption(" stateVarName" )
419416 val READ_REGISTERED_TIMERS = newOption(" readRegisteredTimers" )
420417 val FLATTEN_COLLECTION_TYPES = newOption(" flattenCollectionTypes" )
421- val READ_ALL_COLUMN_FAMILIES = newOption(" readAllColumnFamilies " )
418+ val INTERNAL_ONLY_READ_ALL_COLUMN_FAMILIES = newOption(" internalOnlyReadAllColumnFamilies " )
422419
423420 object JoinSideValues extends Enumeration {
424421 type JoinSideValues = Value
@@ -503,25 +500,28 @@ object StateSourceOptions extends DataSourceOptions {
503500
504501 val readChangeFeed = Option (options.get(READ_CHANGE_FEED )).exists(_.toBoolean)
505502
506- val readAllColumnFamilies = try {
507- Option (options.get(READ_ALL_COLUMN_FAMILIES ))
503+ val internalOnlyReadAllColumnFamilies = try {
504+ Option (options.get(INTERNAL_ONLY_READ_ALL_COLUMN_FAMILIES ))
508505 .map(_.toBoolean).getOrElse(false )
509506 } catch {
510507 case _ : IllegalArgumentException =>
511- throw StateDataSourceErrors .invalidOptionValue(READ_ALL_COLUMN_FAMILIES ,
508+ throw StateDataSourceErrors .invalidOptionValue(INTERNAL_ONLY_READ_ALL_COLUMN_FAMILIES ,
512509 " Boolean value is expected" )
513510 }
514511
515- if (readAllColumnFamilies && stateVarName.isDefined) {
516- throw StateDataSourceErrors .conflictOptions(Seq (READ_ALL_COLUMN_FAMILIES , STATE_VAR_NAME ))
512+ if (internalOnlyReadAllColumnFamilies && stateVarName.isDefined) {
513+ throw StateDataSourceErrors .conflictOptions(
514+ Seq (INTERNAL_ONLY_READ_ALL_COLUMN_FAMILIES , STATE_VAR_NAME ))
517515 }
518516
519- if (readAllColumnFamilies && joinSide != JoinSideValues .none) {
520- throw StateDataSourceErrors .conflictOptions(Seq (READ_ALL_COLUMN_FAMILIES , JOIN_SIDE ))
517+ if (internalOnlyReadAllColumnFamilies && joinSide != JoinSideValues .none) {
518+ throw StateDataSourceErrors .conflictOptions(
519+ Seq (INTERNAL_ONLY_READ_ALL_COLUMN_FAMILIES , JOIN_SIDE ))
521520 }
522521
523- if (readAllColumnFamilies && readChangeFeed) {
524- throw StateDataSourceErrors .conflictOptions(Seq (READ_ALL_COLUMN_FAMILIES , READ_CHANGE_FEED ))
522+ if (internalOnlyReadAllColumnFamilies && readChangeFeed) {
523+ throw StateDataSourceErrors .conflictOptions(
524+ Seq (INTERNAL_ONLY_READ_ALL_COLUMN_FAMILIES , READ_CHANGE_FEED ))
525525 }
526526
527527 val changeStartBatchId = Option (options.get(CHANGE_START_BATCH_ID )).map(_.toLong)
@@ -648,7 +648,7 @@ object StateSourceOptions extends DataSourceOptions {
648648 resolvedCpLocation, batchId.get, operatorId, storeName, joinSide,
649649 readChangeFeed, fromSnapshotOptions, readChangeFeedOptions,
650650 stateVarName, readRegisteredTimers, flattenCollectionTypes,
651- readAllColumnFamilies , startOperatorStateUniqueIds, endOperatorStateUniqueIds)
651+ internalOnlyReadAllColumnFamilies , startOperatorStateUniqueIds, endOperatorStateUniqueIds)
652652 }
653653
654654 private def resolvedCheckpointLocation (
0 commit comments