1313from quixstreams .models .types import Headers
1414from quixstreams .state .base import StorePartition
1515from quixstreams .utils .dicts import dict_values
16- from quixstreams .utils .json import loads as json_loads
1716
1817from .exceptions import (
1918 ChangelogTopicPartitionNotAssigned ,
2019 ColumnFamilyHeaderMissing ,
2120 InvalidStoreChangelogOffset ,
2221)
23- from .metadata import (
24- CHANGELOG_CF_MESSAGE_HEADER ,
25- CHANGELOG_PROCESSED_OFFSETS_MESSAGE_HEADER ,
26- )
22+ from .metadata import CHANGELOG_CF_MESSAGE_HEADER
2723
2824logger = logging .getLogger (__name__ )
2925
@@ -50,7 +46,6 @@ def __init__(
5046 changelog_name : str ,
5147 partition_num : int ,
5248 store_partition : StorePartition ,
53- committed_offsets : dict [str , int ],
5449 lowwater : int ,
5550 highwater : int ,
5651 ):
@@ -59,7 +54,6 @@ def __init__(
5954 self ._store_partition = store_partition
6055 self ._changelog_lowwater = lowwater
6156 self ._changelog_highwater = highwater
62- self ._committed_offsets = committed_offsets
6357 self ._recovery_consume_position : Optional [int ] = None
6458 self ._initial_offset : Optional [int ] = None
6559
@@ -154,40 +148,23 @@ def recover_from_changelog_message(
154148 f"Header '{ CHANGELOG_CF_MESSAGE_HEADER } ' missing from changelog message"
155149 )
156150
157- # Parse the processed topic-partition-offset info from the changelog message
158- # headers to determine whether the update should be applied or skipped.
159- # It can be empty if the message was produced by the older version of the lib.
160- processed_offsets = json_loads (
161- headers .get (CHANGELOG_PROCESSED_OFFSETS_MESSAGE_HEADER , b"null" )
162- )
163- if processed_offsets is None or self ._should_apply_changelog (
164- processed_offsets = processed_offsets
165- ):
166- key = changelog_message .key ()
167- if not isinstance (key , bytes ):
168- raise TypeError (
169- f'Invalid changelog key type { type (key )} , expected "bytes"'
170- )
171-
172- value = changelog_message .value ()
173- if not isinstance (value , (bytes , _NoneType )):
174- raise TypeError (
175- f'Invalid changelog value type { type (value )} , expected "bytes"'
176- )
151+ key = changelog_message .key ()
152+ if not isinstance (key , bytes ):
153+ raise TypeError (f'Invalid changelog key type { type (key )} , expected "bytes"' )
177154
178- self ._store_partition .recover_from_changelog_message (
179- cf_name = cf_name ,
180- key = key ,
181- value = value ,
182- offset = changelog_message .offset (),
183- )
184- else :
185- # Even if the changelog update is skipped, roll the changelog offset
186- # to move forward within the changelog topic
187- self ._store_partition .write_changelog_offset (
188- offset = changelog_message .offset (),
155+ value = changelog_message .value ()
156+ if not isinstance (value , (bytes , _NoneType )):
157+ raise TypeError (
158+ f'Invalid changelog value type { type (value )} , expected "bytes"'
189159 )
190160
161+ self ._store_partition .recover_from_changelog_message (
162+ cf_name = cf_name ,
163+ key = key ,
164+ value = value ,
165+ offset = changelog_message .offset (),
166+ )
167+
191168 def set_recovery_consume_position (self , offset : int ):
192169 """
193170 Update the recovery partition with the consumer's position (whenever
@@ -199,26 +176,6 @@ def set_recovery_consume_position(self, offset: int):
199176 """
200177 self ._recovery_consume_position = offset
201178
202- def _should_apply_changelog (self , processed_offsets : dict [str , int ]) -> bool :
203- """
204- Determine whether the changelog update should be skipped.
205-
206- :param processed_offsets: a dict with processed offsets
207- from the changelog message header processed offset.
208-
209- :return: True if update should be applied, else False.
210- """
211- committed_offsets = self ._committed_offsets
212- for topic , processed_offset in processed_offsets .items ():
213- # Skip recovering from the message if its processed offset is ahead of the
214- # current committed offset.
215- # This is a best-effort to recover to a consistent state
216- # if the checkpointing code produced the changelog messages
217- # but failed to commit the source topic offset.
218- if processed_offset >= committed_offsets [topic ]:
219- return False
220- return True
221-
222179
223180class ChangelogProducerFactory :
224181 """
@@ -411,7 +368,6 @@ def _generate_recovery_partitions(
411368 topic_name : Optional [str ],
412369 partition_num : int ,
413370 store_partitions : Dict [str , StorePartition ],
414- committed_offsets : dict [str , int ],
415371 ) -> List [RecoveryPartition ]:
416372 partitions = []
417373 for store_name , store_partition in store_partitions .items ():
@@ -432,7 +388,6 @@ def _generate_recovery_partitions(
432388 changelog_name = changelog_topic .name ,
433389 partition_num = partition_num ,
434390 store_partition = store_partition ,
435- committed_offsets = committed_offsets ,
436391 lowwater = lowwater ,
437392 highwater = highwater ,
438393 )
@@ -443,7 +398,6 @@ def assign_partition(
443398 self ,
444399 topic : Optional [str ],
445400 partition : int ,
446- committed_offsets : dict [str , int ],
447401 store_partitions : Dict [str , StorePartition ],
448402 ):
449403 """
@@ -455,7 +409,6 @@ def assign_partition(
455409 topic_name = topic ,
456410 partition_num = partition ,
457411 store_partitions = store_partitions ,
458- committed_offsets = committed_offsets ,
459412 )
460413
461414 assigned_tps = set (
0 commit comments