1616 */
1717package org .apache .spark .sql .execution .datasources .v2 .state
1818
19+ import scala .collection .mutable
20+
1921import org .apache .spark .internal .Logging
2022import org .apache .spark .sql .catalyst .InternalRow
2123import org .apache .spark .sql .catalyst .expressions .{GenericInternalRow , UnsafeRow }
@@ -50,17 +52,8 @@ class StatePartitionReaderFactory(
5052 override def createReader (partition : InputPartition ): PartitionReader [InternalRow ] = {
5153 val stateStoreInputPartition = partition.asInstanceOf [StateStoreInputPartition ]
5254 if (stateStoreInputPartition.sourceOptions.internalOnlyReadAllColumnFamilies) {
53- // Disable format validation because the schema returned by
54- // StatePartitionAllColumnFamiliesReader does not contain the corresponding
55- // keySchema or valueSchema.
56- // It's safe to do so we also don't expect the caller of StatePartitionAllColumnFamiliesReader
57- // to extract specific fields out of the returning row.
58- val modifiedStoreConf = storeConf.withExtraOptions(Map (
59- StateStoreConf .FORMAT_VALIDATION_ENABLED_CONFIG -> " false" ,
60- StateStoreConf .FORMAT_VALIDATION_CHECK_VALUE_CONFIG -> " false"
61- ))
62- new StatePartitionAllColumnFamiliesReader (modifiedStoreConf, hadoopConf,
63- stateStoreInputPartition, schema, keyStateEncoderSpec)
55+ new StatePartitionAllColumnFamiliesReader (storeConf, hadoopConf,
56+ stateStoreInputPartition, schema, keyStateEncoderSpec, joinColFamilyOpt)
6457 } else if (stateStoreInputPartition.sourceOptions.readChangeFeed) {
6558 new StateStoreChangeDataPartitionReader (storeConf, hadoopConf,
6659 stateStoreInputPartition, schema, keyStateEncoderSpec, stateVariableInfoOpt,
@@ -99,12 +92,14 @@ abstract class StatePartitionReaderBase(
9992 if (SchemaUtil .checkVariableType(stateVariableInfoOpt, StateVariableType .MapState )) {
10093 SchemaUtil .getCompositeKeySchema(schema, partition.sourceOptions)
10194 } else if (partition.sourceOptions.internalOnlyReadAllColumnFamilies) {
95+ // StatePartitionAllFamiliesReader will have its own provider won't use this keySchema
10296 placeholderSchema
10397 } else {
10498 SchemaUtil .getSchemaAsDataType(schema, " key" ).asInstanceOf [StructType ]
10599 }
106100 }
107101
102+ // StatePartitionAllFamiliesReader will have its own provider won't use this valueSchema
108103 protected val valueSchema = if (stateVariableInfoOpt.isDefined ||
109104 partition.sourceOptions.internalOnlyReadAllColumnFamilies) {
110105 placeholderSchema
@@ -265,7 +260,8 @@ class StatePartitionAllColumnFamiliesReader(
265260 hadoopConf : SerializableConfiguration ,
266261 partition : StateStoreInputPartition ,
267262 schema : StructType ,
268- keyStateEncoderSpec : KeyStateEncoderSpec )
263+ keyStateEncoderSpec : KeyStateEncoderSpec ,
264+ joinColFamilyOpt : Option [String ])
269265 extends StatePartitionReaderBase (
270266 storeConf,
271267 hadoopConf, partition, schema,
@@ -280,6 +276,37 @@ class StatePartitionAllColumnFamiliesReader(
280276 )
281277 }
282278
279+ private val colFamilyToSchema : mutable.HashMap [String , StateStoreColFamilySchema ] = {
280+ val stateStoreId = StateStoreId (
281+ partition.sourceOptions.stateCheckpointLocation.toString,
282+ partition.sourceOptions.operatorId,
283+ StateStore .PARTITION_ID_TO_CHECK_SCHEMA ,
284+ partition.sourceOptions.storeName)
285+ val stateStoreProviderId = StateStoreProviderId (stateStoreId, partition.queryId)
286+ val manager = new StateSchemaCompatibilityChecker (stateStoreProviderId, hadoopConf.value)
287+ val schemaFile = manager.readSchemaFile()
288+ val schemaMap = mutable.HashMap [String , StateStoreColFamilySchema ]()
289+ schemaFile.foreach { schema => schemaMap.put(schema.colFamilyName, schema)}
290+ schemaMap
291+ }
292+
293+ override lazy val provider : StateStoreProvider = {
294+ val stateStoreId = StateStoreId (
295+ partition.sourceOptions.stateCheckpointLocation.toString,
296+ partition.sourceOptions.operatorId,
297+ partition.partition,
298+ partition.sourceOptions.storeName)
299+ val stateStoreProviderId = StateStoreProviderId (stateStoreId, partition.queryId)
300+ val provider = StateStoreProvider .createAndInit(
301+ stateStoreProviderId,
302+ colFamilyToSchema(StateStore .DEFAULT_COL_FAMILY_NAME ).keySchema,
303+ colFamilyToSchema(StateStore .DEFAULT_COL_FAMILY_NAME ).valueSchema,
304+ keyStateEncoderSpec,
305+ useColumnFamilies = false , storeConf, hadoopConf.value,
306+ useMultipleValuesPerKey = false , None )
307+ provider
308+ }
309+
283310 override lazy val iter : Iterator [InternalRow ] = {
284311 store
285312 .iterator()
0 commit comments