From 0a7c919e7097d1adc29a91cf93e4f2c3f1e88ae2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Hoguin?= Date: Tue, 21 Oct 2025 13:24:57 +0200 Subject: [PATCH 01/16] WIP Remove CQv1 --- deps/rabbit/src/rabbit_classic_queue.erl | 1 + .../src/rabbit_classic_queue_index_v2.erl | 275 +--- .../src/rabbit_classic_queue_store_v2.erl | 1 + deps/rabbit/src/rabbit_queue_index.erl | 1417 ----------------- deps/rabbit/src/rabbit_variable_queue.erl | 171 +- deps/rabbit/test/backing_queue_SUITE.erl | 18 +- 6 files changed, 95 insertions(+), 1788 deletions(-) delete mode 100644 deps/rabbit/src/rabbit_queue_index.erl diff --git a/deps/rabbit/src/rabbit_classic_queue.erl b/deps/rabbit/src/rabbit_classic_queue.erl index 97115a07ac8a..d3399685b817 100644 --- a/deps/rabbit/src/rabbit_classic_queue.erl +++ b/deps/rabbit/src/rabbit_classic_queue.erl @@ -430,6 +430,7 @@ supports_stateful_delivery() -> true. deliver(Qs0, Msg0, Options) -> %% add guid to content here instead of in rabbit_basic:message/3, %% as classic queues are the only ones that need it + %% @todo Only if multiple queues. Msg = mc:prepare(store, mc:set_annotation(id, rabbit_guid:gen(), Msg0)), Mandatory = maps:get(mandatory, Options, false), MsgSeqNo = maps:get(correlation, Options, undefined), diff --git a/deps/rabbit/src/rabbit_classic_queue_index_v2.erl b/deps/rabbit/src/rabbit_classic_queue_index_v2.erl index 087e8e355916..4412c77d51c7 100644 --- a/deps/rabbit/src/rabbit_classic_queue_index_v2.erl +++ b/deps/rabbit/src/rabbit_classic_queue_index_v2.erl @@ -7,9 +7,9 @@ -module(rabbit_classic_queue_index_v2). --export([erase/1, init/3, reset_state/1, recover/7, +-export([erase/1, init/2, reset_state/1, recover/5, terminate/3, delete_and_terminate/1, - info/1, publish/7, publish/8, ack/2, read/3]). + info/1, publish/7, ack/2, read/3]). %% Recovery. Unlike other functions in this module, these %% apply to all queues all at once. @@ -18,15 +18,10 @@ %% rabbit_queue_index/rabbit_variable_queue-specific functions. %% Implementation details from the queue index leaking into the %% queue implementation itself. --export([pre_publish/7, flush_pre_publish_cache/2, - sync/1, needs_sync/1, flush/1, +%% @todo TODO +-export([sync/1, needs_sync/1, flush/1, bounds/2, next_segment_boundary/1]). -%% Used to upgrade/downgrade from/to the v1 index. --export([init_for_conversion/3]). --export([init_args/1]). --export([delete_segment_file_for_seq_id/2]). - %% Shared with rabbit_classic_queue_store_v2. -export([queue_dir/2]). @@ -151,17 +146,11 @@ %% This fun must be called when messages that expect %% confirms have either an ack or their entry %% written to disk and file:sync/1 has been called. - on_sync :: on_sync_fun(), - - %% This fun is never called. It is kept so that we - %% can downgrade the queue back to v1. - on_sync_msg :: fun() + on_sync :: on_sync_fun() }). -type state() :: #qi{}. -%% Types copied from rabbit_queue_index. - -type on_sync_fun() :: fun ((sets:set()) -> ok). -type contains_predicate() :: fun ((rabbit_types:msg_id()) -> boolean()). -type shutdown_terms() :: list() | 'non_clean_shutdown'. @@ -176,37 +165,24 @@ erase(#resource{ virtual_host = VHost } = Name) -> Dir = queue_dir(VHostDir, Name), erase_index_dir(Dir). --spec init(rabbit_amqqueue:name(), - on_sync_fun(), on_sync_fun()) -> state(). +-spec init(rabbit_amqqueue:name(), on_sync_fun()) -> state(). %% We do not embed messages and as a result never need the OnSyncMsgFun. -init(#resource{ virtual_host = VHost } = Name, OnSyncFun, OnSyncMsgFun) -> - ?DEBUG("~0p ~0p ~0p", [Name, OnSyncFun, OnSyncMsgFun]), +init(#resource{ virtual_host = VHost } = Name, OnSyncFun) -> + ?DEBUG("~0p ~0p ~0p", [Name, OnSyncFun]), VHostDir = rabbit_vhost:msg_store_dir_path(VHost), Dir = queue_dir(VHostDir, Name), false = rabbit_file:is_file(Dir), %% is_file == is file or dir - init1(Name, Dir, OnSyncFun, OnSyncMsgFun). - -init_args(#qi{ queue_name = QueueName, - on_sync = OnSyncFun, - on_sync_msg = OnSyncMsgFun }) -> - {QueueName, OnSyncFun, OnSyncMsgFun}. - -init_for_conversion(#resource{ virtual_host = VHost } = Name, OnSyncFun, OnSyncMsgFun) -> - ?DEBUG("~0p ~0p ~0p", [Name, OnSyncFun, OnSyncMsgFun]), - VHostDir = rabbit_vhost:msg_store_dir_path(VHost), - Dir = queue_dir(VHostDir, Name), - init1(Name, Dir, OnSyncFun, OnSyncMsgFun). + init1(Name, Dir, OnSyncFun). -init1(Name, Dir, OnSyncFun, OnSyncMsgFun) -> +init1(Name, Dir, OnSyncFun) -> ensure_queue_name_stub_file(Name, Dir), DirBin = rabbit_file:filename_to_binary(Dir), #qi{ queue_name = Name, dir = << DirBin/binary, "/" >>, - on_sync = OnSyncFun, - on_sync_msg = OnSyncMsgFun + on_sync = OnSyncFun }. ensure_queue_name_stub_file(#resource{virtual_host = VHost, name = QName}, Dir) -> @@ -219,16 +195,14 @@ ensure_queue_name_stub_file(#resource{virtual_host = VHost, name = QName}, Dir) reset_state(State = #qi{ queue_name = Name, dir = Dir, - on_sync = OnSyncFun, - on_sync_msg = OnSyncMsgFun }) -> + on_sync = OnSyncFun }) -> ?DEBUG("~0p", [State]), _ = delete_and_terminate(State), - init1(Name, rabbit_file:binary_to_filename(Dir), OnSyncFun, OnSyncMsgFun). + init1(Name, rabbit_file:binary_to_filename(Dir), OnSyncFun). -spec recover(rabbit_amqqueue:name(), shutdown_terms(), boolean(), contains_predicate(), - on_sync_fun(), on_sync_fun(), - main | convert) -> + on_sync_fun()) -> {'undefined' | non_neg_integer(), 'undefined' | non_neg_integer(), state()}. @@ -241,12 +215,12 @@ reset_state(State = #qi{ queue_name = Name, -define(RECOVER_COUNTER_SIZE, 6). recover(#resource{ virtual_host = VHost, name = QueueName } = Name, Terms, - IsMsgStoreClean, ContainsCheckFun, OnSyncFun, OnSyncMsgFun, Context) -> - ?DEBUG("~0p ~0p ~0p ~0p ~0p ~0p", [Name, Terms, IsMsgStoreClean, - ContainsCheckFun, OnSyncFun, OnSyncMsgFun]), + IsMsgStoreClean, ContainsCheckFun, OnSyncFun) -> + ?DEBUG("~0p ~0p ~0p ~0p ~0p", [Name, Terms, IsMsgStoreClean, + ContainsCheckFun, OnSyncFun]), VHostDir = rabbit_vhost:msg_store_dir_path(VHost), Dir = queue_dir(VHostDir, Name), - State0 = init1(Name, Dir, OnSyncFun, OnSyncMsgFun), + State0 = init1(Name, Dir, OnSyncFun), %% We go over all segments if either the index or the %% message store has/had to recover. Otherwise we just %% take our state from Terms. @@ -254,10 +228,6 @@ recover(#resource{ virtual_host = VHost, name = QueueName } = Name, Terms, case IsIndexClean andalso IsMsgStoreClean of true -> State = case proplists:get_value(v2_index_state, Terms, undefined) of - %% We are recovering a queue that was using the v1 index. - undefined when Context =:= main -> - recover_index_v1_clean(State0, Terms, IsMsgStoreClean, - ContainsCheckFun, OnSyncFun, OnSyncMsgFun); {?VERSION, Segments} -> State0#qi{ segments = Segments } end, @@ -268,9 +238,7 @@ recover(#resource{ virtual_host = VHost, name = QueueName } = Name, Terms, State}; false -> CountersRef = counters:new(?RECOVER_COUNTER_SIZE, []), - State = recover_segments(State0, Terms, IsMsgStoreClean, - ContainsCheckFun, OnSyncFun, OnSyncMsgFun, - CountersRef, Context), + State = recover_segments(State0, ContainsCheckFun, CountersRef), ?LOG_WARNING("Queue ~ts in vhost ~ts dropped ~b/~b/~b persistent messages " "and ~b transient messages after unclean shutdown", [QueueName, VHost, @@ -283,11 +251,11 @@ recover(#resource{ virtual_host = VHost, name = QueueName } = Name, Terms, State} end. -recover_segments(State0 = #qi { queue_name = Name, dir = DirBin }, Terms, IsMsgStoreClean, - ContainsCheckFun, OnSyncFun, OnSyncMsgFun, CountersRef, Context) -> +recover_segments(State0 = #qi { queue_name = Name, dir = DirBin }, + ContainsCheckFun, CountersRef) -> Dir = rabbit_file:binary_to_filename(DirBin), SegmentFiles = rabbit_file:wildcard(".*\\" ++ ?SEGMENT_EXTENSION, Dir), - State = case SegmentFiles of + case SegmentFiles of %% No segments found. [] -> State0; @@ -298,26 +266,9 @@ recover_segments(State0 = #qi { queue_name = Name, dir = DirBin }, Terms, IsMsgS || F <- SegmentFiles]), %% We use a temporary store state to check that messages do exist. StoreState0 = rabbit_classic_queue_store_v2:init(Name), - {State1, StoreState} = recover_segments(State0, ContainsCheckFun, StoreState0, CountersRef, Segments), + {State, StoreState} = recover_segments(State0, ContainsCheckFun, StoreState0, CountersRef, Segments), _ = rabbit_classic_queue_store_v2:terminate(StoreState), - State1 - end, - case Context of - convert -> - State; - main -> - %% We try to see if there are segment files from the v1 index. - case rabbit_file:wildcard(".*\\.idx", Dir) of - %% We are recovering a dirty queue that was using the v1 index or in - %% the process of converting from v1 to v2. - [_|_] -> - recover_index_v1_dirty(State, Terms, IsMsgStoreClean, - ContainsCheckFun, OnSyncFun, OnSyncMsgFun, - CountersRef); - %% Otherwise keep default values. - [] -> - State - end + State end. recover_segments(State, _, StoreState, _, []) -> @@ -449,89 +400,6 @@ recover_segment(State, ContainsCheckFun, StoreState0, CountersRef, Fd, Unacked - (SegmentEntryCount - ThisEntry), LocBytes0) end. -recover_index_v1_clean(State0 = #qi{ queue_name = Name }, Terms, IsMsgStoreClean, - ContainsCheckFun, OnSyncFun, OnSyncMsgFun) -> - #resource{virtual_host = VHost, name = QName} = Name, - ?LOG_INFO("Converting queue ~ts in vhost ~ts from v1 to v2 after clean shutdown", [QName, VHost]), - {_, _, V1State} = rabbit_queue_index:recover(Name, Terms, IsMsgStoreClean, - ContainsCheckFun, OnSyncFun, OnSyncMsgFun, - convert), - %% We will ignore the counter results because on clean shutdown - %% we do not need to calculate the values again. This lets us - %% share code with dirty recovery. - CountersRef = counters:new(?RECOVER_COUNTER_SIZE, []), - State = recover_index_v1_common(State0, V1State, CountersRef), - ?LOG_INFO("Queue ~ts in vhost ~ts converted ~b total messages from v1 to v2", - [QName, VHost, counters:get(CountersRef, ?RECOVER_COUNT)]), - State. - -recover_index_v1_dirty(State0 = #qi{ queue_name = Name }, Terms, IsMsgStoreClean, - ContainsCheckFun, OnSyncFun, OnSyncMsgFun, - CountersRef) -> - #resource{virtual_host = VHost, name = QName} = Name, - ?LOG_INFO("Converting queue ~ts in vhost ~ts from v1 to v2 after unclean shutdown", [QName, VHost]), - %% We ignore the count and bytes returned here because we cannot trust - %% rabbit_queue_index: it has a bug that may lead to more bytes being - %% returned than it really has. - %% - %% On top of that some messages may also be in both the v1 and v2 indexes - %% after a crash. - {_, _, V1State} = rabbit_queue_index:recover(Name, Terms, IsMsgStoreClean, - ContainsCheckFun, OnSyncFun, OnSyncMsgFun, - convert), - State = recover_index_v1_common(State0, V1State, CountersRef), - ?LOG_INFO("Queue ~ts in vhost ~ts converted ~b total messages from v1 to v2", - [QName, VHost, counters:get(CountersRef, ?RECOVER_COUNT)]), - State. - -%% At this point all messages are persistent because transient messages -%% were dropped during the v1 index recovery. -recover_index_v1_common(State0 = #qi{ queue_name = Name, dir = DirBin }, - V1State, CountersRef) -> - Dir = rabbit_file:binary_to_filename(DirBin), - %% Use a temporary per-queue store state to store embedded messages. - StoreState0 = rabbit_classic_queue_store_v2:init(Name), - %% Go through the v1 index and publish messages to the v2 index. - {LoSeqId, HiSeqId, _} = rabbit_queue_index:bounds(V1State), - %% When resuming after a crash we need to double check the messages that are both - %% in the v1 and v2 index (effectively the messages below the upper bound of the - %% v2 index that are about to be written to it). - {_, V2HiSeqId, _} = bounds(State0, undefined), - SkipFun = fun - (SeqId, FunState0) when SeqId < V2HiSeqId -> - case read(SeqId, SeqId + 1, FunState0) of - %% Message already exists, skip. - {[_], FunState} -> - {skip, FunState}; - %% Message doesn't exist, write. - {[], FunState} -> - {write, FunState} - end; - %% Message is out of bounds of the v1 index. - (_, FunState) -> - {write, FunState} - end, - %% We use a common function also used with conversion on policy change. - {State1, StoreState} = rabbit_variable_queue:convert_from_v1_to_v2_loop(Name, V1State, State0, StoreState0, - {CountersRef, ?RECOVER_COUNT, ?RECOVER_BYTES}, - LoSeqId, HiSeqId, SkipFun), - %% Terminate the v2 store client. - _ = rabbit_classic_queue_store_v2:terminate(StoreState), - %% Close the v1 index journal handle if any. - JournalHdl = element(4, V1State), - ok = case JournalHdl of - undefined -> ok; - _ -> file_handle_cache:close(JournalHdl) - end, - %% Delete the v1 index files. - OldFiles = ["journal.jif"|rabbit_file:wildcard(".*\\.idx", Dir)], - _ = [rabbit_file:delete(filename:join(Dir, F)) || F <- OldFiles], - %% Ensure that everything in the v2 index is written to disk. - State = flush(State1), - %% Clean up all the garbage that we have surely been creating. - garbage_collect(), - State. - -spec terminate(rabbit_types:vhost(), [any()], State) -> State when State::state(). terminate(VHost, Terms, State0 = #qi { dir = Dir, @@ -579,15 +447,12 @@ info(#qi{ write_buffer = WriteBuffer, write_buffer_updates = NumUpdates }) -> rabbit_types:message_properties(), boolean(), non_neg_integer() | infinity, State) -> State when State::state(). -publish(MsgId, SeqId, Location, Props, IsPersistent, TargetRamCount, State) -> - publish(MsgId, SeqId, Location, Props, IsPersistent, true, TargetRamCount, State). - %% Because we always persist to the msg_store, the Msg(Or)Id argument %% here is always a binary, never a record. -publish(MsgId, SeqId, Location, Props, IsPersistent, ShouldConfirm, TargetRamCount, +publish(MsgId, SeqId, Location, Props, IsPersistent, ShouldConfirm, State0 = #qi { write_buffer = WriteBuffer0, segments = Segments }) -> - ?DEBUG("~0p ~0p ~0p ~0p ~0p ~0p ~0p", [MsgId, SeqId, Location, Props, IsPersistent, TargetRamCount, State0]), + ?DEBUG("~0p ~0p ~0p ~0p ~0p ~0p", [MsgId, SeqId, Location, Props, IsPersistent, State0]), %% Add the entry to the write buffer. WriteBuffer = WriteBuffer0#{SeqId => {MsgId, SeqId, Location, Props, IsPersistent}}, State1 = State0#qi{ write_buffer = WriteBuffer }, @@ -1090,16 +955,47 @@ flush(State) -> sync(State). %% ---- -%% -%% Defer to rabbit_queue_index for recovery for the time being. -%% We can move the functions here when the v1 index is removed. + +-type walker(A) :: fun ((A) -> 'finished' | + {rabbit_types:msg_id(), non_neg_integer(), A}). + +-spec start(rabbit_types:vhost(), [rabbit_amqqueue:name()]) -> {[[any()]], {walker(A), A}}. start(VHost, DurableQueueNames) -> ?DEBUG("~0p ~0p", [VHost, DurableQueueNames]), - %% We replace the queue_index_walker function with our own. - %% Everything else remains the same. - {OrderedTerms, {_QueueIndexWalkerFun, FunState}} = rabbit_queue_index:start(VHost, DurableQueueNames), - {OrderedTerms, {fun queue_index_walker/1, FunState}}. + {ok, RecoveryTermsPid} = rabbit_recovery_terms:start(VHost), + rabbit_vhost_sup_sup:save_vhost_recovery_terms(VHost, RecoveryTermsPid), + {DurableTerms, DurableDirectories} = + lists:foldl( + fun(QName, {RecoveryTerms, ValidDirectories}) -> + DirName = queue_name_to_dir_name(QName), + RecoveryInfo = case rabbit_recovery_terms:read(VHost, DirName) of + {error, _} -> non_clean_shutdown; + {ok, Terms} -> Terms + end, + {[RecoveryInfo | RecoveryTerms], + sets:add_element(DirName, ValidDirectories)} + end, {[], sets:new()}, DurableQueueNames), + %% Any queue directory we've not been asked to recover is considered garbage + ToDelete = [filename:join([rabbit_vhost:msg_store_dir_path(VHost), "queues", Dir]) + || Dir <- lists:subtract(all_queue_directory_names(VHost), + sets:to_list(DurableDirectories))], + ?LOG_DEBUG("Deleting unknown files/folders: ~p", [ToDelete]), + _ = rabbit_file:recursive_delete(ToDelete), + rabbit_recovery_terms:clear(VHost), + %% The backing queue interface requires that the queue recovery terms + %% which come back from start/1 are in the same order as DurableQueueNames + OrderedTerms = lists:reverse(DurableTerms), + {OrderedTerms, {fun queue_index_walker/1, {start, DurableQueueNames}}}. + +all_queue_directory_names(VHost) -> + VHostQueuesPath = filename:join([rabbit_vhost:msg_store_dir_path(VHost), "queues"]), + case filelib:is_dir(VHostQueuesPath) of + true -> + {ok, Dirs} = file:list_dir(VHostQueuesPath), + Dirs; + false -> [] + end. queue_index_walker({start, DurableQueues}) when is_list(DurableQueues) -> ?DEBUG("~0p", [{start, DurableQueues}]), @@ -1120,9 +1016,6 @@ queue_index_walker({next, Gatherer}) when is_pid(Gatherer) -> empty -> ok = gatherer:stop(Gatherer), finished; - %% From v1 index walker. @todo Remove when no longer possible to convert from v1. - {value, {MsgId, Count}} -> - {MsgId, Count, {next, Gatherer}}; {value, MsgIds} -> {MsgIds, {next, Gatherer}} end. @@ -1133,16 +1026,7 @@ queue_index_walker_reader(#resource{ virtual_host = VHost } = Name, Gatherer) -> Dir = queue_dir(VHostDir, Name), SegmentFiles = rabbit_file:wildcard(".*\\" ++ ?SEGMENT_EXTENSION, Dir), _ = [queue_index_walker_segment(filename:join(Dir, F), Gatherer) || F <- SegmentFiles], - %% When there are files belonging to the v1 index, we go through - %% the v1 index walker function as well. - case rabbit_file:wildcard(".*\\.(idx|jif)", Dir) of - [_|_] -> - %% This function will call gatherer:finish/1, we do not - %% need to call it here. - rabbit_queue_index:queue_index_walker_reader(Name, Gatherer); - [] -> - ok = gatherer:finish(Gatherer) - end. + ok = gatherer:finish(Gatherer). queue_index_walker_segment(F, Gatherer) -> ?DEBUG("~0p ~0p", [F, Gatherer]), @@ -1180,27 +1064,11 @@ queue_index_walker_segment(Fd, Gatherer, N, Total, Acc) -> stop(VHost) -> ?DEBUG("~0p", [VHost]), - rabbit_queue_index:stop(VHost). + rabbit_recovery_terms:stop(VHost). %% ---- %% -%% These functions either call the normal functions or are no-ops. -%% They relate to specific optimizations of rabbit_queue_index and -%% rabbit_variable_queue. -%% -%% @todo The way pre_publish works is still fairly puzzling. -%% When the v1 index gets removed we can just drop -%% these functions. - -pre_publish(MsgOrId, SeqId, Location, Props, IsPersistent, TargetRamCount, State) -> - ?DEBUG("~0p ~0p ~0p ~0p ~0p ~0p ~0p", [MsgOrId, SeqId, Location, Props, IsPersistent, TargetRamCount, State]), - publish(MsgOrId, SeqId, Location, Props, IsPersistent, false, TargetRamCount, State). - -flush_pre_publish_cache(TargetRamCount, State) -> - ?DEBUG("~0p ~0p", [TargetRamCount, State]), - State. - -%% See comment in rabbit_queue_index:bounds/1. We do not need to be +%% Technical leftover from CQv1. We do not need to be %% accurate about these values because they are simply used as lowest %% and highest possible bounds. In fact we HAVE to be inaccurate for %% the test suite to pass. This can probably be made more accurate @@ -1237,15 +1105,6 @@ next_segment_boundary(SeqId) -> SegmentEntryCount = segment_entry_count(), (1 + (SeqId div SegmentEntryCount)) * SegmentEntryCount. -%% This function is only used when downgrading to the v1 index. -%% We potentially close the relevant fd and then delete the -%% segment file. -delete_segment_file_for_seq_id(SeqId, State0) -> - SegmentEntryCount = segment_entry_count(), - Segment = SeqId div SegmentEntryCount, - State = delete_segment(Segment, State0), - {[Segment], State}. - %% ---- %% %% Internal. diff --git a/deps/rabbit/src/rabbit_classic_queue_store_v2.erl b/deps/rabbit/src/rabbit_classic_queue_store_v2.erl index 7c28ceb7a377..ab73d926925c 100644 --- a/deps/rabbit/src/rabbit_classic_queue_store_v2.erl +++ b/deps/rabbit/src/rabbit_classic_queue_store_v2.erl @@ -145,6 +145,7 @@ info(#qs{ write_buffer = WriteBuffer }) -> rabbit_types:message_properties(), State) -> {msg_location(), State} when State::state(). +%% TODO!! %% @todo I think we can disable the old message store at the same %% place where we create MsgId. If many queues receive the %% message, then we create an MsgId. If not, we don't. But diff --git a/deps/rabbit/src/rabbit_queue_index.erl b/deps/rabbit/src/rabbit_queue_index.erl deleted file mode 100644 index c8a084bd414a..000000000000 --- a/deps/rabbit/src/rabbit_queue_index.erl +++ /dev/null @@ -1,1417 +0,0 @@ -%% This Source Code Form is subject to the terms of the Mozilla Public -%% License, v. 2.0. If a copy of the MPL was not distributed with this -%% file, You can obtain one at https://mozilla.org/MPL/2.0/. -%% -%% Copyright (c) 2007-2025 Broadcom. All Rights Reserved. The term “Broadcom” refers to Broadcom Inc. and/or its subsidiaries. All rights reserved. -%% - --module(rabbit_queue_index). - --compile({inline, [segment_entry_count/0]}). - --export([erase/1, init/3, reset_state/1, recover/7, - terminate/3, delete_and_terminate/1, info/1, - pre_publish/7, flush_pre_publish_cache/2, - publish/7, publish/8, deliver/2, ack/2, sync/1, needs_sync/1, flush/1, - read/3, next_segment_boundary/1, bounds/1, start/2, stop/1]). - -%% Used by rabbit_vhost to set the segment_entry_count. --export([all_queue_directory_names/1]). - -%% Used by rabbit_classic_queue_index_v2 when upgrading -%% after a non-clean shutdown. --export([queue_index_walker_reader/2]). - -%% Used to upgrade/downgrade to/from the v2 index. --export([init_args/1]). --export([init_for_conversion/3]). --export([delete_segment_file_for_seq_id/2]). --export([delete_journal/1]). - --define(CLEAN_FILENAME, "clean.dot"). - -%%---------------------------------------------------------------------------- - -%% The queue index is responsible for recording the order of messages -%% within a queue on disk. As such it contains records of messages -%% being published, delivered and acknowledged. The publish record -%% includes the sequence ID, message ID and a small quantity of -%% metadata about the message; the delivery and acknowledgement -%% records just contain the sequence ID. A publish record may also -%% contain the complete message if provided to publish/5; this allows -%% the message store to be avoided altogether for small messages. In -%% either case the publish record is stored in memory in the same -%% serialised format it will take on disk. -%% -%% Because of the fact that the queue can decide at any point to send -%% a queue entry to disk, you can not rely on publishes appearing in -%% order. The only thing you can rely on is a message being published, -%% then delivered, then ack'd. -%% -%% In order to be able to clean up ack'd messages, we write to segment -%% files. These files have a fixed number of entries: segment_entry_count() -%% publishes, delivers and acknowledgements. They are numbered, and so -%% it is known that the 0th segment contains messages 0 -> -%% segment_entry_count() - 1, the 1st segment contains messages -%% segment_entry_count() -> 2*segment_entry_count() - 1 and so on. As -%% such, in the segment files, we only refer to message sequence ids -%% by the LSBs as SeqId rem segment_entry_count(). This gives them a -%% fixed size. -%% -%% However, transient messages which are not sent to disk at any point -%% will cause gaps to appear in segment files. Therefore, we delete a -%% segment file whenever the number of publishes == number of acks -%% (note that although it is not fully enforced, it is assumed that a -%% message will never be ackd before it is delivered, thus this test -%% also implies == number of delivers). In practise, this does not -%% cause disk churn in the pathological case because of the journal -%% and caching (see below). -%% -%% Because of the fact that publishes, delivers and acks can occur all -%% over, we wish to avoid lots of seeking. Therefore we have a fixed -%% sized journal to which all actions are appended. When the number of -%% entries in this journal reaches max_journal_entries, the journal -%% entries are scattered out to their relevant files, and the journal -%% is truncated to zero size. Note that entries in the journal must -%% carry the full sequence id, thus the format of entries in the -%% journal is different to that in the segments. -%% -%% The journal is also kept fully in memory, pre-segmented: the state -%% contains a mapping from segment numbers to state-per-segment (this -%% state is held for all segments which have been "seen": thus a -%% segment which has been read but has no pending entries in the -%% journal is still held in this mapping. Also note that a map is -%% used for this mapping, not an array because with an array, you will -%% always have entries from 0). Actions are stored directly in this -%% state. Thus at the point of flushing the journal, firstly no -%% reading from disk is necessary, but secondly if the known number of -%% acks and publishes in a segment are equal, given the known state of -%% the segment file combined with the journal, no writing needs to be -%% done to the segment file either (in fact it is deleted if it exists -%% at all). This is safe given that the set of acks is a subset of the -%% set of publishes. When it is necessary to sync messages, it is -%% sufficient to fsync on the journal: when entries are distributed -%% from the journal to segment files, those segments appended to are -%% fsync'd prior to the journal being truncated. -%% -%% This module is also responsible for scanning the queue index files -%% and seeding the message store on start up. -%% -%% Note that in general, the representation of a message's state as -%% the tuple: {('no_pub'|{IsPersistent, Bin, MsgBin}), -%% ('del'|'no_del'), ('ack'|'no_ack')} is richer than strictly -%% necessary for most operations. However, for startup, and to ensure -%% the safe and correct combination of journal entries with entries -%% read from the segment on disk, this richer representation vastly -%% simplifies and clarifies the code. -%% -%% For notes on Clean Shutdown and startup, see documentation in -%% rabbit_variable_queue. -%% -%% v2 UPDATE: The queue index is still keeping track of delivers -%% as noted in the above comment. However the queue will immediately -%% mark messages as delivered, because it now keeps track of delivers -%% at the queue level. The index still needs to keep track of deliver -%% entries because of its pub->del->ack logic. -%% -%%---------------------------------------------------------------------------- - -%% ---- Journal details ---- - --define(JOURNAL_FILENAME, "journal.jif"). --define(QUEUE_NAME_STUB_FILE, ".queue_name"). - --define(PUB_PERSIST_JPREFIX, 2#00). --define(PUB_TRANS_JPREFIX, 2#01). --define(DEL_JPREFIX, 2#10). --define(ACK_JPREFIX, 2#11). --define(JPREFIX_BITS, 2). --define(SEQ_BYTES, 8). --define(SEQ_BITS, ((?SEQ_BYTES * 8) - ?JPREFIX_BITS)). - -%% ---- Segment details ---- - --define(SEGMENT_EXTENSION, ".idx"). - -%% TODO: The segment size would be configurable, but deriving all the -%% other values is quite hairy and quite possibly noticeably less -%% efficient, depending on how clever the compiler is when it comes to -%% binary generation/matching with constant vs variable lengths. - --define(REL_SEQ_BITS, 14). - -%% seq only is binary 01 followed by 14 bits of rel seq id -%% (range: 0 - 16383) --define(REL_SEQ_ONLY_PREFIX, 01). --define(REL_SEQ_ONLY_PREFIX_BITS, 2). --define(REL_SEQ_ONLY_RECORD_BYTES, 2). - -%% publish record is binary 1 followed by a bit for is_persistent, -%% then 14 bits of rel seq id, 64 bits for message expiry, 32 bits of -%% size and then 128 bits of md5sum msg id. --define(PUB_PREFIX, 1). --define(PUB_PREFIX_BITS, 1). - --define(EXPIRY_BYTES, 8). --define(EXPIRY_BITS, (?EXPIRY_BYTES * 8)). --define(NO_EXPIRY, 0). - --define(MSG_ID_BYTES, 16). %% md5sum is 128 bit or 16 bytes --define(MSG_ID_BITS, (?MSG_ID_BYTES * 8)). - -%% This is the size of the message body content, for stats --define(SIZE_BYTES, 4). --define(SIZE_BITS, (?SIZE_BYTES * 8)). - -%% This is the size of the message record embedded in the queue -%% index. If 0, the message can be found in the message store. --define(EMBEDDED_SIZE_BYTES, 4). --define(EMBEDDED_SIZE_BITS, (?EMBEDDED_SIZE_BYTES * 8)). - -%% 16 bytes for md5sum + 8 for expiry --define(PUB_RECORD_BODY_BYTES, (?MSG_ID_BYTES + ?EXPIRY_BYTES + ?SIZE_BYTES)). -%% + 4 for size --define(PUB_RECORD_SIZE_BYTES, (?PUB_RECORD_BODY_BYTES + ?EMBEDDED_SIZE_BYTES)). - -%% + 2 for seq, bits and prefix --define(PUB_RECORD_PREFIX_BYTES, 2). - -%% ---- misc ---- - --define(PUB, {_, _, _}). %% {IsPersistent, Bin, MsgBin} - --define(READ_MODE, [binary, raw, read]). --define(WRITE_MODE, [write | ?READ_MODE]). - -%%---------------------------------------------------------------------------- - --record(qistate, { - %% queue directory where segment and journal files are stored - dir, - %% map of #segment records - segments, - %% journal file handle obtained from/used by file_handle_cache - journal_handle, - %% how many not yet flushed entries are there - dirty_count, - %% this many not yet flushed journal entries will force a flush - max_journal_entries, - %% callback function invoked when a message is "handled" - %% by the index and potentially can be confirmed to the publisher - on_sync, - on_sync_msg, - %% set of IDs of unconfirmed [to publishers] messages - unconfirmed, - unconfirmed_msg, - %% optimisation - pre_publish_cache, - %% optimisation - delivered_cache, - %% queue name resource record - queue_name}). - --record(segment, { - %% segment ID (an integer) - num, - %% segment file path (see also ?SEGMENT_EXTENSION) - path, - %% index operation log entries in this segment - journal_entries, - entries_to_segment, - %% counter of unacknowledged messages - unacked -}). - --include_lib("rabbit_common/include/rabbit.hrl"). --include_lib("kernel/include/logger.hrl"). - -%%---------------------------------------------------------------------------- - --type hdl() :: ('undefined' | any()). --type segment() :: ('undefined' | - #segment { num :: non_neg_integer(), - path :: file:filename(), - journal_entries :: array:array(), - entries_to_segment :: array:array(), - unacked :: non_neg_integer() - }). --type seg_map() :: {map(), [segment()]}. --type on_sync_fun() :: fun ((sets:set()) -> ok). --type qistate() :: #qistate { dir :: file:filename(), - segments :: 'undefined' | seg_map(), - journal_handle :: hdl(), - dirty_count :: integer(), - max_journal_entries :: non_neg_integer(), - on_sync :: on_sync_fun(), - on_sync_msg :: on_sync_fun(), - unconfirmed :: sets:set(), - unconfirmed_msg :: sets:set(), - pre_publish_cache :: list(), - delivered_cache :: list() - }. --type contains_predicate() :: fun ((rabbit_types:msg_id()) -> boolean()). --type walker(A) :: fun ((A) -> 'finished' | - {rabbit_types:msg_id(), non_neg_integer(), A}). --type shutdown_terms() :: [term()] | 'non_clean_shutdown'. - -%%---------------------------------------------------------------------------- -%% public API -%%---------------------------------------------------------------------------- - --spec erase(rabbit_amqqueue:name()) -> 'ok'. - -erase(#resource{ virtual_host = VHost } = Name) -> - VHostDir = rabbit_vhost:msg_store_dir_path(VHost), - #qistate { dir = Dir } = blank_state(VHostDir, Name), - erase_index_dir(Dir). - -%% used during variable queue purge when there are no pending acks - --spec reset_state(qistate()) -> qistate(). - -reset_state(#qistate{ queue_name = Name, - dir = Dir, - on_sync = OnSyncFun, - on_sync_msg = OnSyncMsgFun, - journal_handle = JournalHdl }) -> - ok = case JournalHdl of - undefined -> ok; - _ -> file_handle_cache:close(JournalHdl) - end, - ok = erase_index_dir(Dir), - blank_state_name_dir_funs(Name, Dir, OnSyncFun, OnSyncMsgFun). - --spec init(rabbit_amqqueue:name(), - on_sync_fun(), on_sync_fun()) -> qistate(). - -init(#resource{ virtual_host = VHost } = Name, OnSyncFun, OnSyncMsgFun) -> - #{segment_entry_count := SegmentEntryCount} = rabbit_vhost:read_config(VHost), - put(segment_entry_count, SegmentEntryCount), - VHostDir = rabbit_vhost:msg_store_dir_path(VHost), - State = #qistate { dir = Dir } = blank_state(VHostDir, Name), - false = rabbit_file:is_file(Dir), %% is_file == is file or dir - State#qistate{on_sync = OnSyncFun, - on_sync_msg = OnSyncMsgFun}. - -init_args(#qistate{ queue_name = QueueName, - on_sync = OnSyncFun, - on_sync_msg = OnSyncMsgFun }) -> - {QueueName, OnSyncFun, OnSyncMsgFun}. - -init_for_conversion(#resource{ virtual_host = VHost } = Name, OnSyncFun, OnSyncMsgFun) -> - #{segment_entry_count := SegmentEntryCount} = rabbit_vhost:read_config(VHost), - put(segment_entry_count, SegmentEntryCount), - VHostDir = rabbit_vhost:msg_store_dir_path(VHost), - State = blank_state(VHostDir, Name), - State#qistate{on_sync = OnSyncFun, - on_sync_msg = OnSyncMsgFun}. - --spec recover(rabbit_amqqueue:name(), shutdown_terms(), boolean(), - contains_predicate(), - on_sync_fun(), on_sync_fun(), - main | convert) -> - {'undefined' | non_neg_integer(), - 'undefined' | non_neg_integer(), qistate()}. - -recover(#resource{ virtual_host = VHost } = Name, Terms, MsgStoreRecovered, - ContainsCheckFun, OnSyncFun, OnSyncMsgFun, - %% We only allow using this module when converting to v2. - convert) -> - #{segment_entry_count := SegmentEntryCount} = rabbit_vhost:read_config(VHost), - put(segment_entry_count, SegmentEntryCount), - VHostDir = rabbit_vhost:msg_store_dir_path(VHost), - State = blank_state(VHostDir, Name), - State1 = State #qistate{on_sync = OnSyncFun, - on_sync_msg = OnSyncMsgFun}, - CleanShutdown = Terms /= non_clean_shutdown, - case CleanShutdown andalso MsgStoreRecovered of - true -> case proplists:get_value(segments, Terms, non_clean_shutdown) of - non_clean_shutdown -> init_dirty(false, ContainsCheckFun, State1); - RecoveredCounts -> init_clean(RecoveredCounts, State1) - end; - false -> init_dirty(CleanShutdown, ContainsCheckFun, State1) - end. - --spec terminate(rabbit_types:vhost(), [any()], qistate()) -> qistate(). - -terminate(VHost, Terms, State = #qistate { dir = Dir }) -> - {SegmentCounts, State1} = terminate(State), - _ = rabbit_recovery_terms:store(VHost, filename:basename(Dir), - [{segments, SegmentCounts} | Terms]), - State1. - --spec delete_and_terminate(qistate()) -> qistate(). - -delete_and_terminate(State) -> - {_SegmentCounts, State1 = #qistate { dir = Dir }} = terminate(State), - ok = rabbit_file:recursive_delete([Dir]), - State1. - --spec info(qistate()) -> []. - -%% No info is implemented for v1 at this time. -info(_) -> []. - -pre_publish(MsgOrId, SeqId, MsgProps, IsPersistent, IsDelivered, JournalSizeHint, - State = #qistate{pre_publish_cache = PPC, - delivered_cache = DC}) -> - State1 = maybe_needs_confirming(MsgProps, MsgOrId, State), - - {Bin, MsgBin} = create_pub_record_body(MsgOrId, MsgProps), - - PPC1 = - [[<<(case IsPersistent of - true -> ?PUB_PERSIST_JPREFIX; - false -> ?PUB_TRANS_JPREFIX - end):?JPREFIX_BITS, - SeqId:?SEQ_BITS, Bin/binary, - (size(MsgBin)):?EMBEDDED_SIZE_BITS>>, MsgBin] | PPC], - - DC1 = - case IsDelivered of - true -> - [SeqId | DC]; - false -> - DC - end, - - State2 = add_to_journal(SeqId, {IsPersistent, Bin, MsgBin}, State1), - maybe_flush_pre_publish_cache( - JournalSizeHint, - State2#qistate{pre_publish_cache = PPC1, - delivered_cache = DC1}). - -%% pre_publish_cache is the entry with most elements when compared to -%% delivered_cache so we only check the former in the guard. -maybe_flush_pre_publish_cache(JournalSizeHint, - #qistate{pre_publish_cache = PPC} = State) -> - case length(PPC) >= segment_entry_count() of - true -> flush_pre_publish_cache(JournalSizeHint, State); - false -> State - end. - -flush_pre_publish_cache(JournalSizeHint, State) -> - State1 = flush_pre_publish_cache(State), - State2 = flush_delivered_cache(State1), - maybe_flush_journal(JournalSizeHint, State2). - -flush_pre_publish_cache(#qistate{pre_publish_cache = []} = State) -> - State; -flush_pre_publish_cache(State = #qistate{pre_publish_cache = PPC}) -> - {JournalHdl, State1} = get_journal_handle(State), - ok = file_handle_cache:append(JournalHdl, lists:reverse(PPC)), - State1#qistate{pre_publish_cache = []}. - -flush_delivered_cache(#qistate{delivered_cache = []} = State) -> - State; -flush_delivered_cache(State = #qistate{delivered_cache = DC}) -> - State1 = deliver(lists:reverse(DC), State), - State1#qistate{delivered_cache = []}. - -publish(MsgOrId, SeqId, _Location, MsgProps, IsPersistent, JournalSizeHint, State) -> - {JournalHdl, State1} = - get_journal_handle( - maybe_needs_confirming(MsgProps, MsgOrId, State)), - {Bin, MsgBin} = create_pub_record_body(MsgOrId, MsgProps), - ok = file_handle_cache:append( - JournalHdl, [<<(case IsPersistent of - true -> ?PUB_PERSIST_JPREFIX; - false -> ?PUB_TRANS_JPREFIX - end):?JPREFIX_BITS, - SeqId:?SEQ_BITS, Bin/binary, - (byte_size(MsgBin)):?EMBEDDED_SIZE_BITS>>, MsgBin]), - maybe_flush_journal( - JournalSizeHint, - add_to_journal(SeqId, {IsPersistent, Bin, MsgBin}, State1)). - -publish(MsgOrId, SeqId, Location, MsgProps, IsPersistent, _, JournalSizeHint, State) -> - publish(MsgOrId, SeqId, Location, MsgProps, IsPersistent, JournalSizeHint, State). - -maybe_needs_confirming(MsgProps, MsgOrId, - State = #qistate{unconfirmed = UC, - unconfirmed_msg = UCM}) -> - MsgId = case MsgOrId of - Id when is_binary(Id) -> Id; - Msg -> - mc:get_annotation(id, Msg) - end, - ?MSG_ID_BYTES = byte_size(MsgId), - case {MsgProps#message_properties.needs_confirming, MsgOrId} of - {true, MsgId} -> UC1 = sets:add_element(MsgId, UC), - State#qistate{unconfirmed = UC1}; - {true, _} -> UCM1 = sets:add_element(MsgId, UCM), - State#qistate{unconfirmed_msg = UCM1}; - {false, _} -> State - end. - --spec deliver([rabbit_variable_queue:seq_id()], qistate()) -> qistate(). - -deliver(SeqIds, State) -> - deliver_or_ack(del, SeqIds, State). - --spec ack([rabbit_variable_queue:seq_id()], qistate()) -> {[], qistate()}. - -ack(SeqIds, State) -> - {[], deliver_or_ack(ack, SeqIds, State)}. - -%% This is called when there are outstanding confirms or when the -%% queue is idle and the journal needs syncing (see needs_sync/1). - --spec sync(qistate()) -> qistate(). - -sync(State = #qistate { journal_handle = undefined }) -> - State; -sync(State = #qistate { journal_handle = JournalHdl }) -> - ok = file_handle_cache:sync(JournalHdl), - notify_sync(State). - --spec needs_sync(qistate()) -> 'confirms' | 'other' | 'false'. - -needs_sync(#qistate{journal_handle = undefined}) -> - false; -needs_sync(#qistate{journal_handle = JournalHdl, - unconfirmed = UC, - unconfirmed_msg = UCM}) -> - case sets:is_empty(UC) andalso sets:is_empty(UCM) of - true -> case file_handle_cache:needs_sync(JournalHdl) of - true -> other; - false -> false - end; - false -> confirms - end. - --spec flush(qistate()) -> qistate(). - -flush(State = #qistate { dirty_count = 0 }) -> State; -flush(State) -> flush_journal(State). - --spec read(rabbit_variable_queue:seq_id(), - rabbit_variable_queue:seq_id(), - qistate()) -> - {[{rabbit_types:msg_id(), rabbit_variable_queue:seq_id(), - rabbit_variable_queue:msg_location(), - rabbit_types:message_properties(), - boolean()}], qistate()}. - -read(StartEnd, StartEnd, State) -> - {[], State}; -read(Start, End, State = #qistate { segments = Segments, - dir = Dir }) when Start =< End -> - %% Start is inclusive, End is exclusive. - LowerB = {StartSeg, _StartRelSeq} = seq_id_to_seg_and_rel_seq_id(Start), - UpperB = {EndSeg, _EndRelSeq} = seq_id_to_seg_and_rel_seq_id(End - 1), - {Messages, Segments1} = - lists:foldr(fun (Seg, Acc) -> - read_bounded_segment(Seg, LowerB, UpperB, Acc, Dir) - end, {[], Segments}, lists:seq(StartSeg, EndSeg)), - {Messages, State #qistate { segments = Segments1 }}. - --spec next_segment_boundary(rabbit_variable_queue:seq_id()) -> rabbit_variable_queue:seq_id(). - -next_segment_boundary(SeqId) -> - {Seg, _RelSeq} = seq_id_to_seg_and_rel_seq_id(SeqId), - reconstruct_seq_id(Seg + 1, 0). - --spec bounds(qistate()) -> - {non_neg_integer(), non_neg_integer(), qistate()}. - -bounds(State = #qistate { segments = Segments }) -> - %% This is not particularly efficient, but only gets invoked on - %% queue initialisation. - SegNums = lists:sort(segment_nums(Segments)), - %% Don't bother trying to figure out the lowest seq_id, merely the - %% seq_id of the start of the lowest segment. That seq_id may not - %% actually exist, but that's fine. The important thing is that - %% the segment exists and the seq_id reported is on a segment - %% boundary. - %% - %% We also don't really care about the max seq_id. Just start the - %% next segment: it makes life much easier. - %% - %% SegNums is sorted, ascending. - {LowSeqId, NextSeqId} = - case SegNums of - [] -> {0, 0}; - [MinSeg|_] -> {reconstruct_seq_id(MinSeg, 0), - reconstruct_seq_id(1 + lists:last(SegNums), 0)} - end, - {LowSeqId, NextSeqId, State}. - --spec start(rabbit_types:vhost(), [rabbit_amqqueue:name()]) -> {[[any()]], {walker(A), A}}. - -start(VHost, DurableQueueNames) -> - {ok, RecoveryTermsPid} = rabbit_recovery_terms:start(VHost), - rabbit_vhost_sup_sup:save_vhost_recovery_terms(VHost, RecoveryTermsPid), - {DurableTerms, DurableDirectories} = - lists:foldl( - fun(QName, {RecoveryTerms, ValidDirectories}) -> - DirName = queue_name_to_dir_name(QName), - RecoveryInfo = case rabbit_recovery_terms:read(VHost, DirName) of - {error, _} -> non_clean_shutdown; - {ok, Terms} -> Terms - end, - {[RecoveryInfo | RecoveryTerms], - sets:add_element(DirName, ValidDirectories)} - end, {[], sets:new()}, DurableQueueNames), - %% Any queue directory we've not been asked to recover is considered garbage - ToDelete = [filename:join([rabbit_vhost:msg_store_dir_path(VHost), "queues", Dir]) - || Dir <- lists:subtract(all_queue_directory_names(VHost), - sets:to_list(DurableDirectories))], - ?LOG_DEBUG("Deleting unknown files/folders: ~p", [ToDelete]), - _ = rabbit_file:recursive_delete(ToDelete), - - rabbit_recovery_terms:clear(VHost), - - %% The backing queue interface requires that the queue recovery terms - %% which come back from start/1 are in the same order as DurableQueueNames - OrderedTerms = lists:reverse(DurableTerms), - {OrderedTerms, {fun queue_index_walker/1, {start, DurableQueueNames}}}. - - -stop(VHost) -> rabbit_recovery_terms:stop(VHost). - -all_queue_directory_names(VHost) -> - VHostQueuesPath = filename:join([rabbit_vhost:msg_store_dir_path(VHost), "queues"]), - case filelib:is_dir(VHostQueuesPath) of - true -> - {ok, Dirs} = file:list_dir(VHostQueuesPath), - Dirs; - false -> [] - end. - -%%---------------------------------------------------------------------------- -%% startup and shutdown -%%---------------------------------------------------------------------------- - -erase_index_dir(Dir) -> - case rabbit_file:is_dir(Dir) of - true -> rabbit_file:recursive_delete([Dir]); - false -> ok - end. - -blank_state(VHostDir, QueueName) -> - Dir = queue_dir(VHostDir, QueueName), - blank_state_name_dir_funs(QueueName, - Dir, - fun (_) -> ok end, - fun (_) -> ok end). - -queue_dir(VHostDir, QueueName) -> - %% Queue directory is - %% {node_database_dir}/msg_stores/vhosts/{vhost}/queues/{queue} - QueueDir = queue_name_to_dir_name(QueueName), - filename:join([VHostDir, "queues", QueueDir]). - -queue_name_to_dir_name(#resource { kind = queue, - virtual_host = VHost, - name = QName }) -> - <> = erlang:md5(<<"queue", VHost/binary, QName/binary>>), - rabbit_misc:format("~.36B", [Num]). - -blank_state_name_dir_funs(Name, Dir, OnSyncFun, OnSyncMsgFun) -> - {ok, MaxJournal} = - application:get_env(rabbit, queue_index_max_journal_entries), - #qistate { dir = Dir, - segments = segments_new(), - journal_handle = undefined, - dirty_count = 0, - max_journal_entries = MaxJournal, - on_sync = OnSyncFun, - on_sync_msg = OnSyncMsgFun, - unconfirmed = sets:new([{version,2}]), - unconfirmed_msg = sets:new([{version,2}]), - pre_publish_cache = [], - delivered_cache = [], - queue_name = Name }. - -init_clean(RecoveredCounts, State) -> - %% Load the journal. Since this is a clean recovery this (almost) - %% gets us back to where we were on shutdown. - State1 = #qistate { dir = Dir, segments = Segments } = load_journal(State), - %% The journal loading only creates records for segments touched - %% by the journal, and the counts are based on the journal entries - %% only. We need *complete* counts for *all* segments. By an - %% amazing coincidence we stored that information on shutdown. - Segments1 = - lists:foldl( - fun ({Seg, UnackedCount}, SegmentsN) -> - Segment = segment_find_or_new(Seg, Dir, SegmentsN), - segment_store(Segment #segment { unacked = UnackedCount }, - SegmentsN) - end, Segments, RecoveredCounts), - %% the counts above include transient messages, which would be the - %% wrong thing to return - {undefined, undefined, State1 # qistate { segments = Segments1 }}. - --define(RECOVER_COUNT, 1). --define(RECOVER_BYTES, 2). --define(RECOVER_COUNTER_SIZE, 2). - -init_dirty(CleanShutdown, ContainsCheckFun, State) -> - %% Recover the journal completely. This will also load segments - %% which have entries in the journal and remove duplicates. The - %% counts will correctly reflect the combination of the segment - %% and the journal. - State1 = #qistate { dir = Dir, segments = Segments } = - recover_journal(State), - {Segments1, Count, Bytes, DirtyCount} = - %% Load each segment in turn and filter out messages that are - %% not in the msg_store, by adding acks to the journal. These - %% acks only go to the RAM journal as it doesn't matter if we - %% lose them. Also mark delivered if not clean shutdown. Also - %% find the number of unacked messages. Also accumulate the - %% dirty count here, so we can call maybe_flush_journal below - %% and avoid unnecessary file system operations. - lists:foldl( - fun (Seg, {Segments2, CountAcc, BytesAcc, DirtyCount}) -> - {{Segment = #segment { unacked = UnackedCount }, Dirty}, - UnackedBytes} = - recover_segment(ContainsCheckFun, CleanShutdown, - segment_find_or_new(Seg, Dir, Segments2), - State1#qistate.max_journal_entries), - {segment_store(Segment, Segments2), - CountAcc + UnackedCount, - BytesAcc + UnackedBytes, DirtyCount + Dirty} - end, {Segments, 0, 0, 0}, all_segment_nums(State1)), - %% We force flush the journal to avoid getting into a bad state - %% when the node gets shut down immediately after init. It takes - %% a few restarts for the problem to materialize itself, with - %% at least one message published, followed by the process crashing, - %% followed by a recovery that is dirty due to term mismatch in the - %% message store, followed by two clean recoveries. This last - %% recovery fails with a crash. - State2 = flush_journal(State1 #qistate { segments = Segments1, - dirty_count = DirtyCount }), - {Count, Bytes, State2}. - -terminate(State = #qistate { journal_handle = JournalHdl, - segments = Segments }) -> - ok = case JournalHdl of - undefined -> ok; - _ -> file_handle_cache:close(JournalHdl) - end, - SegmentCounts = - segment_fold( - fun (#segment { num = Seg, unacked = UnackedCount }, Acc) -> - [{Seg, UnackedCount} | Acc] - end, [], Segments), - {SegmentCounts, State #qistate { journal_handle = undefined, - segments = undefined }}. - -recover_segment(ContainsCheckFun, CleanShutdown, - Segment = #segment { journal_entries = JEntries }, MaxJournal) -> - {SegEntries, UnackedCount} = load_segment(false, Segment), - {SegEntries1, UnackedCountDelta} = - segment_plus_journal(SegEntries, JEntries), - array:sparse_foldl( - fun (RelSeq, {{IsPersistent, Bin, MsgBin}, Del, no_ack}, - {SegmentAndDirtyCount, Bytes}) -> - {MsgOrId, MsgProps} = parse_pub_record_body(Bin, MsgBin), - {recover_message(ContainsCheckFun(MsgOrId), CleanShutdown, - Del, RelSeq, SegmentAndDirtyCount, MaxJournal), - %% @todo If the message is dropped we shouldn't add the size? - Bytes + case IsPersistent of - true -> MsgProps#message_properties.size; - false -> 0 - end} - end, - {{Segment #segment { unacked = UnackedCount + UnackedCountDelta }, 0}, 0}, - SegEntries1). - -recover_message( true, true, _Del, _RelSeq, SegmentAndDirtyCount, _MaxJournal) -> - SegmentAndDirtyCount; -recover_message( true, false, del, _RelSeq, SegmentAndDirtyCount, _MaxJournal) -> - SegmentAndDirtyCount; -recover_message( true, false, no_del, RelSeq, {Segment, _DirtyCount}, MaxJournal) -> - %% force to flush the segment - {add_to_journal(RelSeq, del, Segment), MaxJournal + 1}; -recover_message(false, _, del, RelSeq, {Segment, DirtyCount}, _MaxJournal) -> - {add_to_journal(RelSeq, ack, Segment), DirtyCount + 1}; -recover_message(false, _, no_del, RelSeq, {Segment, DirtyCount}, _MaxJournal) -> - {add_to_journal(RelSeq, ack, - add_to_journal(RelSeq, del, Segment)), - DirtyCount + 2}. - -%%---------------------------------------------------------------------------- -%% msg store startup delta function -%%---------------------------------------------------------------------------- - -queue_index_walker({start, DurableQueues}) when is_list(DurableQueues) -> - {ok, Gatherer} = gatherer:start_link(), - [begin - ok = gatherer:fork(Gatherer), - ok = worker_pool:submit_async( - fun () -> link(Gatherer), - ok = queue_index_walker_reader(QueueName, Gatherer), - unlink(Gatherer), - ok - end) - end || QueueName <- DurableQueues], - queue_index_walker({next, Gatherer}); - -queue_index_walker({next, Gatherer}) when is_pid(Gatherer) -> - case gatherer:out(Gatherer) of - empty -> - ok = gatherer:stop(Gatherer), - finished; - {value, {MsgId, Count}} -> - {MsgId, Count, {next, Gatherer}} - end. - -queue_index_walker_reader(QueueName, Gatherer) -> - ok = scan_queue_segments( - fun (_SeqId, MsgId, _MsgProps, true, _IsDelivered, no_ack, ok) - when is_binary(MsgId) -> - gatherer:sync_in(Gatherer, {MsgId, 1}); - (_SeqId, _MsgId, _MsgProps, _IsPersistent, _IsDelivered, - _IsAcked, Acc) -> - Acc - end, ok, QueueName), - ok = gatherer:finish(Gatherer). - -scan_queue_segments(Fun, Acc, #resource{ virtual_host = VHost } = QueueName) -> - %% Set the segment_entry_count for this worker process. - #{segment_entry_count := SegmentEntryCount} = rabbit_vhost:read_config(VHost), - put(segment_entry_count, SegmentEntryCount), - VHostDir = rabbit_vhost:msg_store_dir_path(VHost), - scan_queue_segments(Fun, Acc, VHostDir, QueueName). - -scan_queue_segments(Fun, Acc, VHostDir, QueueName) -> - State = #qistate { segments = Segments, dir = Dir } = - recover_journal(blank_state(VHostDir, QueueName)), - Result = lists:foldr( - fun (Seg, AccN) -> - segment_entries_foldr( - fun (RelSeq, {{MsgOrId, MsgProps, IsPersistent}, - IsDelivered, IsAcked}, AccM) -> - Fun(reconstruct_seq_id(Seg, RelSeq), MsgOrId, MsgProps, - IsPersistent, IsDelivered, IsAcked, AccM) - end, AccN, segment_find_or_new(Seg, Dir, Segments)) - end, Acc, all_segment_nums(State)), - {_SegmentCounts, _State} = terminate(State), - Result. - -%%---------------------------------------------------------------------------- -%% expiry/binary manipulation -%%---------------------------------------------------------------------------- - -create_pub_record_body(MsgOrId, #message_properties { expiry = Expiry, - size = Size }) -> - ExpiryBin = expiry_to_binary(Expiry), - case MsgOrId of - MsgId when is_binary(MsgId) -> - {<>, <<>>}; - Msg -> - MsgId = mc:get_annotation(id, Msg), - MsgBin = term_to_binary(MsgOrId), - {<>, MsgBin} - end. - -expiry_to_binary(undefined) -> <>; -expiry_to_binary(Expiry) -> <>. - -parse_pub_record_body(<>, MsgBin) -> - %% work around for binary data fragmentation. See - %% rabbit_msg_file:read_next/2 - <> = <>, - Props = #message_properties{expiry = case Expiry of - ?NO_EXPIRY -> undefined; - X -> X - end, - size = Size}, - case MsgBin of - <<>> -> {MsgId, Props}; - _ -> - Msg = binary_to_term(MsgBin), - %% assertion - MsgId = mc:get_annotation(id, Msg), - {Msg, Props} - end. - -%%---------------------------------------------------------------------------- -%% journal manipulation -%%---------------------------------------------------------------------------- - -add_to_journal(SeqId, Action, State = #qistate { dirty_count = DCount, - segments = Segments, - dir = Dir }) -> - {Seg, RelSeq} = seq_id_to_seg_and_rel_seq_id(SeqId), - Segment = segment_find_or_new(Seg, Dir, Segments), - Segment1 = add_to_journal(RelSeq, Action, Segment), - State #qistate { dirty_count = DCount + 1, - segments = segment_store(Segment1, Segments) }; - -add_to_journal(RelSeq, Action, - Segment = #segment { journal_entries = JEntries, - entries_to_segment = EToSeg, - unacked = UnackedCount }) -> - - {Fun, Entry} = action_to_entry(RelSeq, Action, JEntries), - - {JEntries1, EToSeg1} = - case Fun of - set -> - {array:set(RelSeq, Entry, JEntries), - array:set(RelSeq, entry_to_segment(RelSeq, Entry, []), - EToSeg)}; - reset -> - {array:reset(RelSeq, JEntries), - array:reset(RelSeq, EToSeg)} - end, - - Segment #segment { - journal_entries = JEntries1, - entries_to_segment = EToSeg1, - unacked = UnackedCount + case Action of - ?PUB -> +1; - del -> 0; - ack -> -1 - end}. - -action_to_entry(RelSeq, Action, JEntries) -> - case array:get(RelSeq, JEntries) of - undefined -> - {set, - case Action of - ?PUB -> {Action, no_del, no_ack}; - del -> {no_pub, del, no_ack}; - ack -> {no_pub, no_del, ack} - end}; - ({Pub, no_del, no_ack}) when Action == del -> - {set, {Pub, del, no_ack}}; - ({no_pub, del, no_ack}) when Action == ack -> - {set, {no_pub, del, ack}}; - ({?PUB, del, no_ack}) when Action == ack -> - {reset, none}; - %% Special case, missing del - %% See journal_minus_segment1/2 - ({?PUB, no_del, no_ack}) when Action == ack -> - {reset, none} - end. - -maybe_flush_journal(State) -> - maybe_flush_journal(infinity, State). - -maybe_flush_journal(Hint, State = #qistate { dirty_count = DCount, - max_journal_entries = MaxJournal }) - when DCount > MaxJournal orelse (Hint =/= infinity andalso DCount > Hint) -> - flush_journal(State); -maybe_flush_journal(_Hint, State) -> - State. - -flush_journal(State = #qistate { segments = Segments }) -> - Segments1 = - segment_fold( - fun (#segment { unacked = 0, path = Path }, SegmentsN) -> - case rabbit_file:is_file(Path) of - true -> ok = rabbit_file:delete(Path); - false -> ok - end, - SegmentsN; - (#segment {} = Segment, SegmentsN) -> - segment_store(append_journal_to_segment(Segment), SegmentsN) - end, segments_new(), Segments), - {JournalHdl, State1} = - get_journal_handle(State #qistate { segments = Segments1 }), - ok = file_handle_cache:clear(JournalHdl), - notify_sync(State1 #qistate { dirty_count = 0 }). - -append_journal_to_segment(#segment { journal_entries = JEntries, - entries_to_segment = EToSeg, - path = Path } = Segment) -> - case array:sparse_size(JEntries) of - 0 -> Segment; - _ -> - {ok, Hdl} = file_handle_cache:open_with_absolute_path( - Path, ?WRITE_MODE, - [{write_buffer, infinity}]), - %% the file_handle_cache also does a list reverse, so this - %% might not be required here, but before we were doing a - %% sparse_foldr, a lists:reverse/1 seems to be the correct - %% thing to do for now. - _ = file_handle_cache:append(Hdl, lists:reverse(array:to_list(EToSeg))), - ok = file_handle_cache:close(Hdl), - Segment #segment { journal_entries = array_new(), - entries_to_segment = array_new([]) } - end. - -get_journal_handle(State = #qistate { journal_handle = undefined, - dir = Dir, - queue_name = Name }) -> - Path = filename:join(Dir, ?JOURNAL_FILENAME), - ok = rabbit_file:ensure_dir(Path), - ok = ensure_queue_name_stub_file(Dir, Name), - {ok, Hdl} = file_handle_cache:open_with_absolute_path( - Path, ?WRITE_MODE, [{write_buffer, infinity}]), - {Hdl, State #qistate { journal_handle = Hdl }}; -get_journal_handle(State = #qistate { journal_handle = Hdl }) -> - {Hdl, State}. - -%% Loading Journal. This isn't idempotent and will mess up the counts -%% if you call it more than once on the same state. Assumes the counts -%% are 0 to start with. -load_journal(State = #qistate { dir = Dir }) -> - Path = filename:join(Dir, ?JOURNAL_FILENAME), - case rabbit_file:is_file(Path) of - true -> {JournalHdl, State1} = get_journal_handle(State), - Size = rabbit_file:file_size(Path), - {ok, 0} = file_handle_cache:position(JournalHdl, 0), - {ok, JournalBin} = file_handle_cache:read(JournalHdl, Size), - parse_journal_entries(JournalBin, State1); - false -> State - end. - -%% ditto -recover_journal(State) -> - State1 = #qistate { segments = Segments } = load_journal(State), - Segments1 = - segment_map( - fun (Segment = #segment { journal_entries = JEntries, - entries_to_segment = EToSeg, - unacked = UnackedCountInJournal }) -> - %% We want to keep ack'd entries in so that we can - %% remove them if duplicates are in the journal. The - %% counts here are purely from the segment itself. - {SegEntries, UnackedCountInSeg} = load_segment(true, Segment), - {JEntries1, EToSeg1, UnackedCountDuplicates} = - journal_minus_segment(JEntries, EToSeg, SegEntries), - Segment #segment { journal_entries = JEntries1, - entries_to_segment = EToSeg1, - unacked = (UnackedCountInJournal + - UnackedCountInSeg - - UnackedCountDuplicates) } - end, Segments), - State1 #qistate { segments = Segments1 }. - -parse_journal_entries(<>, State) -> - parse_journal_entries(Rest, add_to_journal(SeqId, del, State)); - -parse_journal_entries(<>, State) -> - parse_journal_entries(Rest, add_to_journal(SeqId, ack, State)); -parse_journal_entries(<<0:?JPREFIX_BITS, 0:?SEQ_BITS, - 0:?PUB_RECORD_SIZE_BYTES/unit:8, _/binary>>, State) -> - %% Journal entry composed only of zeroes was probably - %% produced during a dirty shutdown so stop reading - State; -parse_journal_entries(<>, State) -> - IsPersistent = case Prefix of - ?PUB_PERSIST_JPREFIX -> true; - ?PUB_TRANS_JPREFIX -> false - end, - parse_journal_entries( - Rest, add_to_journal(SeqId, {IsPersistent, Bin, MsgBin}, State)); -parse_journal_entries(_ErrOrEoF, State) -> - State. - -deliver_or_ack(_Kind, [], State) -> - State; -deliver_or_ack(Kind, SeqIds, State) -> - JPrefix = case Kind of ack -> ?ACK_JPREFIX; del -> ?DEL_JPREFIX end, - {JournalHdl, State1} = get_journal_handle(State), - ok = file_handle_cache:append( - JournalHdl, - [<> || SeqId <- SeqIds]), - maybe_flush_journal(lists:foldl(fun (SeqId, StateN) -> - add_to_journal(SeqId, Kind, StateN) - end, State1, SeqIds)). - -notify_sync(State = #qistate{unconfirmed = UC, - unconfirmed_msg = UCM, - on_sync = OnSyncFun, - on_sync_msg = OnSyncMsgFun}) -> - State1 = case sets:is_empty(UC) of - true -> State; - false -> OnSyncFun(UC), - State#qistate{unconfirmed = sets:new([{version,2}])} - end, - case sets:is_empty(UCM) of - true -> State1; - false -> OnSyncMsgFun(UCM), - State1#qistate{unconfirmed_msg = sets:new([{version,2}])} - end. - -%%---------------------------------------------------------------------------- -%% segment manipulation -%%---------------------------------------------------------------------------- - -seq_id_to_seg_and_rel_seq_id(SeqId) -> - SegmentEntryCount = segment_entry_count(), - { SeqId div SegmentEntryCount, SeqId rem SegmentEntryCount }. - -reconstruct_seq_id(Seg, RelSeq) -> - (Seg * segment_entry_count()) + RelSeq. - -all_segment_nums(#qistate { dir = Dir, segments = Segments }) -> - lists:sort( - sets:to_list( - lists:foldl( - fun (SegName, Set) -> - sets:add_element( - list_to_integer( - lists:takewhile(fun (C) -> $0 =< C andalso C =< $9 end, - SegName)), Set) - end, sets:from_list(segment_nums(Segments)), - rabbit_file:wildcard(".*\\" ++ ?SEGMENT_EXTENSION, Dir)))). - -segment_find_or_new(Seg, Dir, Segments) -> - case segment_find(Seg, Segments) of - {ok, Segment} -> Segment; - error -> SegName = integer_to_list(Seg) ++ ?SEGMENT_EXTENSION, - Path = filename:join(Dir, SegName), - #segment { num = Seg, - path = Path, - journal_entries = array_new(), - entries_to_segment = array_new([]), - unacked = 0 } - end. - -segment_find(Seg, {_Segments, [Segment = #segment { num = Seg } |_]}) -> - {ok, Segment}; %% 1 or (2, matches head) -segment_find(Seg, {_Segments, [_, Segment = #segment { num = Seg }]}) -> - {ok, Segment}; %% 2, matches tail -segment_find(Seg, {Segments, _}) -> %% no match - maps:find(Seg, Segments). - -segment_store(Segment = #segment { num = Seg }, %% 1 or (2, matches head) - {Segments, [#segment { num = Seg } | Tail]}) -> - {Segments, [Segment | Tail]}; -segment_store(Segment = #segment { num = Seg }, %% 2, matches tail - {Segments, [SegmentA, #segment { num = Seg }]}) -> - {Segments, [Segment, SegmentA]}; -segment_store(Segment = #segment { num = Seg }, {Segments, []}) -> - {maps:remove(Seg, Segments), [Segment]}; -segment_store(Segment = #segment { num = Seg }, {Segments, [SegmentA]}) -> - {maps:remove(Seg, Segments), [Segment, SegmentA]}; -segment_store(Segment = #segment { num = Seg }, - {Segments, [SegmentA, SegmentB]}) -> - {maps:put(SegmentB#segment.num, SegmentB, maps:remove(Seg, Segments)), - [Segment, SegmentA]}. - -segment_fold(Fun, Acc, {Segments, CachedSegments}) -> - maps:fold(fun (_Seg, Segment, Acc1) -> Fun(Segment, Acc1) end, - lists:foldl(Fun, Acc, CachedSegments), Segments). - -segment_map(Fun, {Segments, CachedSegments}) -> - {maps:map(fun (_Seg, Segment) -> Fun(Segment) end, Segments), - lists:map(Fun, CachedSegments)}. - -segment_nums({Segments, CachedSegments}) -> - lists:map(fun (#segment { num = Num }) -> Num end, CachedSegments) ++ - maps:keys(Segments). - -segments_new() -> - {#{}, []}. - -entry_to_segment(_RelSeq, {?PUB, del, ack}, Initial) -> - Initial; -entry_to_segment(RelSeq, {Pub, Del, Ack}, Initial) -> - %% NB: we are assembling the segment in reverse order here, so - %% del/ack comes first. - Buf1 = case {Del, Ack} of - {no_del, no_ack} -> - Initial; - _ -> - Binary = <>, - case {Del, Ack} of - {del, ack} -> [[Binary, Binary] | Initial]; - _ -> [Binary | Initial] - end - end, - case Pub of - no_pub -> - Buf1; - {IsPersistent, Bin, MsgBin} -> - [[<>, MsgBin] | Buf1] - end. - -read_bounded_segment(Seg, {StartSeg, StartRelSeq}, {EndSeg, EndRelSeq}, - {Messages, Segments}, Dir) -> - Segment = segment_find_or_new(Seg, Dir, Segments), - {segment_entries_foldr( - fun (RelSeq, {{MsgOrId, MsgProps, IsPersistent}, _IsDelivered, no_ack}, - Acc) - when (Seg > StartSeg orelse StartRelSeq =< RelSeq) andalso - (Seg < EndSeg orelse EndRelSeq >= RelSeq) -> - MsgLocation = case is_tuple(MsgOrId) of - true -> rabbit_queue_index; - false -> rabbit_msg_store - end, - [{MsgOrId, reconstruct_seq_id(StartSeg, RelSeq), MsgLocation, MsgProps, - IsPersistent} | Acc]; - (_RelSeq, _Value, Acc) -> - Acc - end, Messages, Segment), - segment_store(Segment, Segments)}. - -segment_entries_foldr(Fun, Init, - Segment = #segment { journal_entries = JEntries }) -> - {SegEntries, _UnackedCount} = load_segment(false, Segment), - {SegEntries1, _UnackedCountD} = segment_plus_journal(SegEntries, JEntries), - array:sparse_foldr( - fun (RelSeq, {{IsPersistent, Bin, MsgBin}, Del, Ack}, Acc) -> - {MsgOrId, MsgProps} = parse_pub_record_body(Bin, MsgBin), - Fun(RelSeq, {{MsgOrId, MsgProps, IsPersistent}, Del, Ack}, Acc) - end, Init, SegEntries1). - -%% Loading segments -%% -%% Does not do any combining with the journal at all. -load_segment(KeepAcked, #segment { path = Path }) -> - Empty = {array_new(), 0}, - case rabbit_file:is_file(Path) of - false -> Empty; - true -> Size = rabbit_file:file_size(Path), - {ok, Hdl} = file_handle_cache:open_with_absolute_path( - Path, ?READ_MODE, []), - {ok, 0} = file_handle_cache:position(Hdl, bof), - {ok, SegBin} = file_handle_cache:read(Hdl, Size), - ok = file_handle_cache:close(Hdl), - %% We check if the file is full of 0s. I do not know why this can happen - %% but this happens AT LEAST during v2->v1 conversion when resuming after - %% a crash has happened. Since the file is invalid, we delete it and - %% return no entries instead of just crashing (just like if the file - %% was missing above). We also log some information. - case SegBin of - <<0:Size/unit:8>> -> - ?LOG_WARNING("Deleting invalid v1 segment file ~ts (file only contains NUL bytes)", - [Path]), - _ = rabbit_file:delete(Path), - Empty; - _ -> - Res = parse_segment_entries(SegBin, KeepAcked, Empty), - Res - end - end. - -parse_segment_entries(<>, - KeepAcked, Acc) -> - parse_segment_publish_entry( - Rest, 1 == IsPersistNum, RelSeq, KeepAcked, Acc); -parse_segment_entries(<>, KeepAcked, Acc) -> - parse_segment_entries( - Rest, KeepAcked, add_segment_relseq_entry(KeepAcked, RelSeq, Acc)); -parse_segment_entries(<<>>, _KeepAcked, Acc) -> - Acc. - -parse_segment_publish_entry(<>, - IsPersistent, RelSeq, KeepAcked, - {SegEntries, Unacked}) -> - Obj = {{IsPersistent, Bin, MsgBin}, no_del, no_ack}, - SegEntries1 = array:set(RelSeq, Obj, SegEntries), - parse_segment_entries(Rest, KeepAcked, {SegEntries1, Unacked + 1}); -parse_segment_publish_entry(Rest, _IsPersistent, _RelSeq, KeepAcked, Acc) -> - parse_segment_entries(Rest, KeepAcked, Acc). - -add_segment_relseq_entry(KeepAcked, RelSeq, {SegEntries, Unacked}) -> - case array:get(RelSeq, SegEntries) of - {Pub, no_del, no_ack} -> - {array:set(RelSeq, {Pub, del, no_ack}, SegEntries), Unacked}; - {Pub, del, no_ack} when KeepAcked -> - {array:set(RelSeq, {Pub, del, ack}, SegEntries), Unacked - 1}; - {_Pub, del, no_ack} -> - {array:reset(RelSeq, SegEntries), Unacked - 1} - end. - -array_new() -> - array_new(undefined). - -array_new(Default) -> - array:new([{default, Default}, fixed, {size, segment_entry_count()}]). - -segment_entry_count() -> - get(segment_entry_count). - -bool_to_int(true ) -> 1; -bool_to_int(false) -> 0. - -%%---------------------------------------------------------------------------- -%% journal & segment combination -%%---------------------------------------------------------------------------- - -%% Combine what we have just read from a segment file with what we're -%% holding for that segment in memory. There must be no duplicates. -segment_plus_journal(SegEntries, JEntries) -> - array:sparse_foldl( - fun (RelSeq, JObj, {SegEntriesOut, AdditionalUnacked}) -> - SegEntry = array:get(RelSeq, SegEntriesOut), - {Obj, AdditionalUnackedDelta} = - segment_plus_journal1(SegEntry, JObj), - {case Obj of - undefined -> array:reset(RelSeq, SegEntriesOut); - _ -> array:set(RelSeq, Obj, SegEntriesOut) - end, - AdditionalUnacked + AdditionalUnackedDelta} - end, {SegEntries, 0}, JEntries). - -%% Here, the result is a tuple with the first element containing the -%% item which we may be adding to (for items only in the journal), -%% modifying in (bits in both), or, when returning 'undefined', -%% erasing from (ack in journal, not segment) the segment array. The -%% other element of the tuple is the delta for AdditionalUnacked. -segment_plus_journal1(undefined, {?PUB, no_del, no_ack} = Obj) -> - {Obj, 1}; -segment_plus_journal1(undefined, {?PUB, del, no_ack} = Obj) -> - {Obj, 1}; -segment_plus_journal1(undefined, {?PUB, del, ack}) -> - {undefined, 0}; - -segment_plus_journal1({?PUB = Pub, no_del, no_ack}, {no_pub, del, no_ack}) -> - {{Pub, del, no_ack}, 0}; -segment_plus_journal1({?PUB, no_del, no_ack}, {no_pub, del, ack}) -> - {undefined, -1}; -segment_plus_journal1({?PUB, del, no_ack}, {no_pub, no_del, ack}) -> - {undefined, -1}; - -%% Special case, missing del -%% See journal_minus_segment1/2 -segment_plus_journal1({?PUB, no_del, no_ack}, {no_pub, no_del, ack}) -> - {undefined, -1}. - -%% Remove from the journal entries for a segment, items that are -%% duplicates of entries found in the segment itself. Used on start up -%% to clean up the journal. -%% -%% We need to update the entries_to_segment since they are just a -%% cache of what's on the journal. -journal_minus_segment(JEntries, EToSeg, SegEntries) -> - array:sparse_foldl( - fun (RelSeq, JObj, {JEntriesOut, EToSegOut, UnackedRemoved}) -> - SegEntry = array:get(RelSeq, SegEntries), - {Obj, UnackedRemovedDelta} = - journal_minus_segment1(JObj, SegEntry), - {JEntriesOut1, EToSegOut1} = - case Obj of - keep -> - {JEntriesOut, EToSegOut}; - undefined -> - {array:reset(RelSeq, JEntriesOut), - array:reset(RelSeq, EToSegOut)}; - _ -> - {array:set(RelSeq, Obj, JEntriesOut), - array:set(RelSeq, entry_to_segment(RelSeq, Obj, []), - EToSegOut)} - end, - {JEntriesOut1, EToSegOut1, UnackedRemoved + UnackedRemovedDelta} - end, {JEntries, EToSeg, 0}, JEntries). - -%% Here, the result is a tuple with the first element containing the -%% item we are adding to or modifying in the (initially fresh) journal -%% array. If the item is 'undefined' we leave the journal array -%% alone. The other element of the tuple is the deltas for -%% UnackedRemoved. - -%% Both the same. Must be at least the publish -journal_minus_segment1({?PUB, _Del, no_ack} = Obj, Obj) -> - {undefined, 1}; -journal_minus_segment1({?PUB, _Del, ack} = Obj, Obj) -> - {undefined, 0}; - -%% Just publish in journal -journal_minus_segment1({?PUB, no_del, no_ack}, undefined) -> - {keep, 0}; - -%% Publish and deliver in journal -journal_minus_segment1({?PUB, del, no_ack}, undefined) -> - {keep, 0}; -journal_minus_segment1({?PUB = Pub, del, no_ack}, {Pub, no_del, no_ack}) -> - {{no_pub, del, no_ack}, 1}; - -%% Publish, deliver and ack in journal -journal_minus_segment1({?PUB, del, ack}, undefined) -> - {keep, 0}; -journal_minus_segment1({?PUB = Pub, del, ack}, {Pub, no_del, no_ack}) -> - {{no_pub, del, ack}, 1}; -journal_minus_segment1({?PUB = Pub, del, ack}, {Pub, del, no_ack}) -> - {{no_pub, no_del, ack}, 1}; - -%% Just deliver in journal -journal_minus_segment1({no_pub, del, no_ack}, {?PUB, no_del, no_ack}) -> - {keep, 0}; -journal_minus_segment1({no_pub, del, no_ack}, {?PUB, del, no_ack}) -> - {undefined, 0}; - -%% Just ack in journal -journal_minus_segment1({no_pub, no_del, ack}, {?PUB, del, no_ack}) -> - {keep, 0}; -journal_minus_segment1({no_pub, no_del, ack}, {?PUB, del, ack}) -> - {undefined, -1}; - -%% Just ack in journal, missing del -%% Since 3.10 message delivery is tracked per-queue, not per-message, -%% but to keep queue index v1 format messages are always marked as -%% delivered on publish. But for a message that was published before -%% 3.10 this is not the case and the delivery marker can be missing. -%% As a workaround we add the del marker because if a message is acked -%% it must have been delivered as well. -journal_minus_segment1({no_pub, no_del, ack}, {?PUB, no_del, no_ack}) -> - {{no_pub, del, ack}, 0}; - -%% Deliver and ack in journal -journal_minus_segment1({no_pub, del, ack}, {?PUB, no_del, no_ack}) -> - {keep, 0}; -journal_minus_segment1({no_pub, del, ack}, {?PUB, del, no_ack}) -> - {{no_pub, no_del, ack}, 0}; -journal_minus_segment1({no_pub, del, ack}, {?PUB, del, ack}) -> - {undefined, -1}; - -%% Missing segment. If flush_journal/1 is interrupted after deleting -%% the segment but before truncating the journal we can get these -%% cases: a delivery and an acknowledgement in the journal, or just an -%% acknowledgement in the journal, but with no segment. In both cases -%% we have really forgotten the message; so ignore what's in the -%% journal. -journal_minus_segment1({no_pub, no_del, ack}, undefined) -> - {undefined, 0}; -journal_minus_segment1({no_pub, del, ack}, undefined) -> - {undefined, 0}. - -%%---------------------------------------------------------------------------- -%% Migration functions -%%---------------------------------------------------------------------------- - -ensure_queue_name_stub_file(Dir, #resource{virtual_host = VHost, name = QName}) -> - QueueNameFile = filename:join(Dir, ?QUEUE_NAME_STUB_FILE), - file:write_file(QueueNameFile, <<"VHOST: ", VHost/binary, "\n", - "QUEUE: ", QName/binary, "\n">>). - -%% This function is only used when upgrading to the v2 index. -%% We delete the segment file without updating the state. -%% We will drop the state later on so we don't care much -%% about how accurate it is as long as we can read from -%% subsequent segment files. -delete_segment_file_for_seq_id(SeqId, #qistate { segments = Segments }) -> - {Seg, _} = seq_id_to_seg_and_rel_seq_id(SeqId), - case segment_find(Seg, Segments) of - {ok, #segment { path = Path }} -> - case rabbit_file:delete(Path) of - ok -> ok; - %% The file may not exist on disk yet. - {error, enoent} -> ok - end; - error -> - ok - end. - -delete_journal(#qistate { dir = Dir, journal_handle = JournalHdl }) -> - %% Close the journal handle if any. - ok = case JournalHdl of - undefined -> ok; - _ -> file_handle_cache:close(JournalHdl) - end, - %% Delete the journal file. - _ = rabbit_file:delete(filename:join(Dir, "journal.jif")), - ok. diff --git a/deps/rabbit/src/rabbit_variable_queue.erl b/deps/rabbit/src/rabbit_variable_queue.erl index 0c2ec75767a4..e6dcb2a2d449 100644 --- a/deps/rabbit/src/rabbit_variable_queue.erl +++ b/deps/rabbit/src/rabbit_variable_queue.erl @@ -21,12 +21,8 @@ -export([start/2, stop/1]). -%% This function is used by rabbit_classic_queue_index_v2 -%% to convert v1 queues to v2 after an upgrade to 4.0. --export([convert_from_v1_to_v2_loop/8]). - %% exported for testing only --export([start_msg_store/3, stop_msg_store/1, init/5]). +-export([start_msg_store/3, stop_msg_store/1, init/4]). -include("mc.hrl"). -include_lib("stdlib/include/qlc.hrl"). @@ -187,7 +183,7 @@ persistent_bytes, %% w unacked delta_transient_bytes, %% - target_ram_count, + target_ram_count, %% Unused. ram_msg_count, %% w/o unacked ram_msg_count_prev, ram_ack_count_prev, @@ -196,7 +192,7 @@ in_counter, rates, %% There are two confirms paths: either store/index produce confirms - %% separately (v1 and v2 with per-vhost message store) or the confirms + %% separately (v2 with per-vhost message store) or the confirms %% are produced all at once while syncing/flushing (v2 with per-queue %% message store). The latter is more efficient as it avoids many %% sets operations. @@ -319,7 +315,7 @@ persistent_count :: non_neg_integer(), persistent_bytes :: non_neg_integer(), - target_ram_count :: non_neg_integer() | 'infinity', + target_ram_count :: 'infinity', ram_msg_count :: non_neg_integer(), ram_msg_count_prev :: non_neg_integer(), ram_ack_count_prev :: non_neg_integer(), @@ -367,7 +363,6 @@ %%---------------------------------------------------------------------------- start(VHost, DurableQueues) -> - %% The v2 index walker function covers both v1 and v2 index files. {AllTerms, StartFunState} = rabbit_classic_queue_index_v2:start(VHost, DurableQueues), %% Group recovery terms by vhost. ClientRefs = [Ref || Terms <- AllTerms, @@ -417,14 +412,12 @@ init(Queue, Recover, Callback) -> fun (MsgIds, ActionTaken) -> msgs_written_to_disk(Callback, MsgIds, ActionTaken) end, - fun (MsgIds) -> msg_indices_written_to_disk(Callback, MsgIds) end, - fun (MsgIds) -> msgs_and_indices_written_to_disk(Callback, MsgIds) end). + fun (MsgIds) -> msg_indices_written_to_disk(Callback, MsgIds) end). -init(Q, new, MsgOnDiskFun, MsgIdxOnDiskFun, MsgAndIdxOnDiskFun) when ?is_amqqueue(Q) -> +init(Q, new, MsgOnDiskFun, MsgIdxOnDiskFun) when ?is_amqqueue(Q) -> QueueName = amqqueue:get_name(Q), IsDurable = amqqueue:is_durable(Q), - IndexState = rabbit_classic_queue_index_v2:init(QueueName, - MsgIdxOnDiskFun, MsgAndIdxOnDiskFun), + IndexState = rabbit_classic_queue_index_v2:init(QueueName, MsgIdxOnDiskFun), StoreState = rabbit_classic_queue_store_v2:init(QueueName), VHost = QueueName#resource.virtual_host, init(IsDurable, IndexState, StoreState, 0, 0, [], @@ -437,7 +430,7 @@ init(Q, new, MsgOnDiskFun, MsgIdxOnDiskFun, MsgAndIdxOnDiskFun) when ?is_amqqueu VHost), VHost); %% We can be recovering a transient queue if it crashed -init(Q, Terms, MsgOnDiskFun, MsgIdxOnDiskFun, MsgAndIdxOnDiskFun) when ?is_amqqueue(Q) -> +init(Q, Terms, MsgOnDiskFun, MsgIdxOnDiskFun) when ?is_amqqueue(Q) -> QueueName = amqqueue:get_name(Q), IsDurable = amqqueue:is_durable(Q), {PRef, RecoveryTerms} = process_recovery_terms(Terms), @@ -461,8 +454,7 @@ init(Q, Terms, MsgOnDiskFun, MsgIdxOnDiskFun, MsgAndIdxOnDiskFun) when ?is_amqqu rabbit_vhost_msg_store:successfully_recovered_state( VHost, ?PERSISTENT_MSG_STORE), - ContainsCheckFun, MsgIdxOnDiskFun, MsgAndIdxOnDiskFun, - main), + ContainsCheckFun, MsgIdxOnDiskFun), StoreState = rabbit_classic_queue_store_v2:init(QueueName), init(IsDurable, IndexState, StoreState, DeltaCount, DeltaBytes, RecoveryTerms, @@ -637,17 +629,19 @@ requeue(AckTags, #vqstate { delta = Delta, len = Len } = State) -> %% @todo This can be heavily simplified: if the message falls into delta, %% add it there. Otherwise just add it to q3 in the correct position. + %% @todo I think if the message falls within Q3 we must add it back there, + %% otherwise there's nothing to do? Except update stats. {SeqIds, Q3a, MsgIds, State1} = requeue_merge(lists:sort(AckTags), Q3, [], delta_limit(Delta), State), {Delta1, MsgIds1, State2} = delta_merge(SeqIds, Delta, MsgIds, State1), MsgCount = length(MsgIds1), {MsgIds1, a( - maybe_update_rates(ui( + maybe_update_rates( State2 #vqstate { delta = Delta1, q3 = Q3a, in_counter = InCounter + MsgCount, - len = Len + MsgCount })))}. + len = Len + MsgCount }))}. ackfold(MsgFun, Acc, State, AckTags) -> {AccN, StateN} = @@ -789,7 +783,6 @@ info(backing_queue_status, #vqstate { delta = Delta, q3 = Q3, mode = Mode, len = Len, - target_ram_count = TargetRamCount, next_seq_id = NextSeqId, next_deliver_seq_id = NextDeliverSeqId, ram_pending_ack = RPA, @@ -810,7 +803,7 @@ info(backing_queue_status, #vqstate { {q3 , ?QUEUE:len(Q3)}, {q4 , 0}, {len , Len}, - {target_ram_count , TargetRamCount}, + {target_ram_count , infinity}, {next_seq_id , NextSeqId}, {next_deliver_seq_id , NextDeliverSeqId}, {num_pending_acks , map_size(RPA) + map_size(DPA)}, @@ -843,60 +836,6 @@ zip_msgs_and_acks(Msgs, AckTags, Accumulator, _State) -> set_queue_version(_, State) -> State. -%% This function is used by rabbit_classic_queue_index_v2 -%% to convert v1 queues to v2 after an upgrade to 4.0. -convert_from_v1_to_v2_loop(_, _, V2Index, V2Store, _, HiSeqId, HiSeqId, _) -> - {V2Index, V2Store}; -convert_from_v1_to_v2_loop(QueueName, V1Index0, V2Index0, V2Store0, - Counters = {CountersRef, CountIx, BytesIx}, - LoSeqId, HiSeqId, SkipFun) -> - UpSeqId = lists:min([rabbit_queue_index:next_segment_boundary(LoSeqId), - HiSeqId]), - {Messages, V1Index} = rabbit_queue_index:read(LoSeqId, UpSeqId, V1Index0), - %% We do a garbage collect immediately after the old index read - %% because that may have created a lot of garbage. - garbage_collect(), - {V2Index3, V2Store3} = lists:foldl(fun - %% Move embedded messages to the per-queue store. - ({Msg, SeqId, rabbit_queue_index, Props, IsPersistent}, - {V2Index1, V2Store1}) -> - MsgId = mc:get_annotation(id, Msg), - {MsgLocation, V2Store2} = rabbit_classic_queue_store_v2:write(SeqId, Msg, Props, V2Store1), - V2Index2 = case SkipFun(SeqId, V2Index1) of - {skip, V2Index1a} -> - V2Index1a; - {write, V2Index1a} -> - counters:add(CountersRef, CountIx, 1), - counters:add(CountersRef, BytesIx, Props#message_properties.size), - rabbit_classic_queue_index_v2:publish(MsgId, SeqId, MsgLocation, Props, IsPersistent, infinity, V2Index1a) - end, - {V2Index2, V2Store2}; - %% Keep messages in the per-vhost store where they are. - ({MsgId, SeqId, rabbit_msg_store, Props, IsPersistent}, - {V2Index1, V2Store1}) -> - V2Index2 = case SkipFun(SeqId, V2Index1) of - {skip, V2Index1a} -> - V2Index1a; - {write, V2Index1a} -> - counters:add(CountersRef, CountIx, 1), - counters:add(CountersRef, BytesIx, Props#message_properties.size), - rabbit_classic_queue_index_v2:publish(MsgId, SeqId, rabbit_msg_store, Props, IsPersistent, infinity, V2Index1a) - end, - {V2Index2, V2Store1} - end, {V2Index0, V2Store0}, Messages), - %% Flush to disk to avoid keeping too much in memory between segments. - V2Index = rabbit_classic_queue_index_v2:flush(V2Index3), - V2Store = rabbit_classic_queue_store_v2:sync(V2Store3), - %% We have written everything to disk. We can delete the old segment file - %% to free up much needed space, to avoid doubling disk usage during the upgrade. - rabbit_queue_index:delete_segment_file_for_seq_id(LoSeqId, V1Index), - %% Log some progress to keep the user aware of what's going on, as moving - %% embedded messages can take quite some time. - #resource{virtual_host = VHost, name = Name} = QueueName, - ?LOG_INFO("Queue ~ts in vhost ~ts converted ~b messages from v1 to v2", - [Name, VHost, length(Messages)]), - convert_from_v1_to_v2_loop(QueueName, V1Index, V2Index, V2Store, Counters, UpSeqId, HiSeqId, SkipFun). - %% Get the Timestamp property of the first msg, if present. This is %% the one with the oldest timestamp among the heads of the pending %% acks and unread queues. We can't check disk_pending_acks as these @@ -1382,15 +1321,12 @@ stats_requeued_memory(MS, St) -> St#vqstate{?UP(len, ram_msg_count, +1), ?UP(bytes, +msg_size(MS)), ?UP(unacked_bytes, -msg_size(MS))}. +%% TODO!!! %% @todo For v2 since we don't remove from disk until we ack, we don't need %% to write to disk again on requeue. If the message falls within delta %% we can just drop the MsgStatus. Otherwise we just put it in q3 and %% we don't do any disk writes. %% -%% For v1 I'm not sure? I don't think we need to write to the index -%% at least, but maybe we need to write the message if not embedded? -%% I don't think we need to... -%% %% So we don't need to change anything except how we count stats as %% well as delta stats if the message falls within delta. stats_requeued_disk(MS = #msg_status{is_persistent = true}, St) -> @@ -1786,44 +1722,6 @@ maybe_write_msg_to_disk(Force, MsgStatus = #msg_status { maybe_write_msg_to_disk(_Force, MsgStatus, State) -> {MsgStatus, State}. -%% Due to certain optimisations made inside -%% rabbit_queue_index:pre_publish/7 we need to have two separate -%% functions for index persistence. This one is only used when paging -%% during memory pressure. We didn't want to modify -%% maybe_write_index_to_disk/3 because that function is used in other -%% places. -maybe_batch_write_index_to_disk(_Force, - MsgStatus = #msg_status { - index_on_disk = true }, State) -> - {MsgStatus, State}; -maybe_batch_write_index_to_disk(Force, - MsgStatus = #msg_status { - msg = Msg, - msg_id = MsgId, - seq_id = SeqId, - is_persistent = IsPersistent, - msg_location = MsgLocation, - msg_props = MsgProps}, - State = #vqstate { - target_ram_count = TargetRamCount, - disk_write_count = DiskWriteCount, - index_state = IndexState}) - when Force orelse IsPersistent -> - {MsgOrId, DiskWriteCount1} = - case persist_to(MsgStatus) of - msg_store -> {MsgId, DiskWriteCount}; - queue_store -> {MsgId, DiskWriteCount}; - queue_index -> {prepare_to_store(Msg), DiskWriteCount + 1} - end, - IndexState1 = rabbit_classic_queue_index_v2:pre_publish( - MsgOrId, SeqId, MsgLocation, MsgProps, - IsPersistent, TargetRamCount, IndexState), - {MsgStatus#msg_status{index_on_disk = true}, - State#vqstate{index_state = IndexState1, - disk_write_count = DiskWriteCount1}}; -maybe_batch_write_index_to_disk(_Force, MsgStatus, State) -> - {MsgStatus, State}. - maybe_write_index_to_disk(_Force, MsgStatus = #msg_status { index_on_disk = true }, State) -> {MsgStatus, State}; @@ -1834,8 +1732,7 @@ maybe_write_index_to_disk(Force, MsgStatus = #msg_status { is_persistent = IsPersistent, msg_location = MsgLocation, msg_props = MsgProps}, - State = #vqstate{target_ram_count = TargetRamCount, - disk_write_count = DiskWriteCount, + State = #vqstate{disk_write_count = DiskWriteCount, index_state = IndexState}) when Force orelse IsPersistent -> {MsgOrId, DiskWriteCount1} = @@ -1846,7 +1743,7 @@ maybe_write_index_to_disk(Force, MsgStatus = #msg_status { end, IndexState2 = rabbit_classic_queue_index_v2:publish( MsgOrId, SeqId, MsgLocation, MsgProps, IsPersistent, - persist_to(MsgStatus) =:= msg_store, TargetRamCount, + persist_to(MsgStatus) =:= msg_store, IndexState), {MsgStatus#msg_status{index_on_disk = true}, State#vqstate{index_state = IndexState2, @@ -1859,18 +1756,6 @@ maybe_write_to_disk(ForceMsg, ForceIndex, MsgStatus, State) -> {MsgStatus1, State1} = maybe_write_msg_to_disk(ForceMsg, MsgStatus, State), maybe_write_index_to_disk(ForceIndex, MsgStatus1, State1). -maybe_prepare_write_to_disk(ForceMsg, ForceIndex0, MsgStatus, State) -> - {MsgStatus1, State1} = maybe_write_msg_to_disk(ForceMsg, MsgStatus, State), - %% We want messages written to the v2 per-queue store to also - %% be written to the index for proper accounting. The situation - %% where a message can be in the store but not in the index can - %% only occur when going through this function (not via maybe_write_to_disk). - ForceIndex = case persist_to(MsgStatus) of - queue_store -> true; - _ -> ForceIndex0 - end, - maybe_batch_write_index_to_disk(ForceIndex, MsgStatus1, State1). - determine_persist_to(Msg, #message_properties{size = BodySize}, IndexMaxSize) -> @@ -2071,6 +1956,7 @@ msgs_written_to_disk(Callback, MsgIdSet, written) -> sets:union(MOD, Confirmed) }) end). +%% @todo Having to call run_backing_queue is probably reducing performance... msg_indices_written_to_disk(Callback, MsgIdSet) -> Callback(?MODULE, fun (?MODULE, State = #vqstate { msgs_on_disk = MOD, @@ -2083,11 +1969,6 @@ msg_indices_written_to_disk(Callback, MsgIdSet) -> sets:union(MIOD, Confirmed) }) end). -%% @todo Having to call run_backing_queue is probably reducing performance... -msgs_and_indices_written_to_disk(Callback, MsgIdSet) -> - Callback(?MODULE, - fun (?MODULE, State) -> record_confirms(MsgIdSet, State) end). - %%---------------------------------------------------------------------------- %% Internal plumbing for requeue %%---------------------------------------------------------------------------- @@ -2130,8 +2011,9 @@ delta_merge(SeqIds, Delta, MsgIds, State) -> Acc; {#msg_status { msg_id = MsgId, is_persistent = IsPersistent } = MsgStatus, State1} -> - {_MsgStatus, State2} = - maybe_prepare_write_to_disk(true, true, MsgStatus, State1), +% {_MsgStatus, State2} = +% maybe_prepare_write_to_disk(true, true, MsgStatus, State1), + State2 = State1, {expand_delta(SeqId, Delta0, IsPersistent), [MsgId | MsgIds0], stats_requeued_disk(MsgStatus, State2)} end @@ -2296,9 +2178,7 @@ maybe_deltas_to_betas(DelsAndAcksFun, {List, StoreState3, MCStateP3, MCStateT3} = case WhatToRead of messages -> %% We try to read messages from disk all at once instead of - %% 1 by 1 at fetch time. When v1 is used and messages are - %% embedded, then the message content is already read from - %% disk at this point. For v2 embedded we must do a separate + %% 1 by 1 at fetch time. For v2 embedded we must do a separate %% call to obtain the contents and then merge the contents %% back into the #msg_status records. %% @@ -2419,13 +2299,6 @@ merge_sh_read_msgs([M = {MsgId, _, _, _, _}|MTail], Reads) -> merge_sh_read_msgs(MTail, _Reads) -> MTail. -%% Flushes queue index batch caches and updates queue index state. -ui(#vqstate{index_state = IndexState, - target_ram_count = TargetRamCount} = State) -> - IndexState1 = rabbit_classic_queue_index_v2:flush_pre_publish_cache( - TargetRamCount, IndexState), - State#vqstate{index_state = IndexState1}. - maybe_client_terminate(MSCStateP) -> %% Queue might have been asked to stop by the supervisor, it needs a clean %% shutdown in order for the supervising strategy to work - if it reaches max diff --git a/deps/rabbit/test/backing_queue_SUITE.erl b/deps/rabbit/test/backing_queue_SUITE.erl index 01ff9f5aa259..bb8c4b1a1f92 100644 --- a/deps/rabbit/test/backing_queue_SUITE.erl +++ b/deps/rabbit/test/backing_queue_SUITE.erl @@ -806,7 +806,6 @@ index_mod() -> rabbit_classic_queue_index_v2. bq_queue_index1(_Config) -> - init_queue_index(), IndexMod = index_mod(), SegmentSize = IndexMod:next_segment_boundary(0), TwoSegs = SegmentSize + SegmentSize, @@ -984,7 +983,7 @@ bq_queue_index_props1(_Config) -> MsgId = rabbit_guid:gen(), Props = #message_properties{expiry=12345, size = 10}, Qi1 = IndexMod:publish( - MsgId, 0, memory, Props, true, infinity, Qi0), + MsgId, 0, memory, Props, true, true, Qi0), {[{MsgId, 0, _, Props, _}], Qi2} = IndexMod:read(0, 1, Qi1), Qi2 @@ -1115,7 +1114,6 @@ bq_queue_recover(Config) -> ?MODULE, bq_queue_recover1, [Config]). bq_queue_recover1(Config) -> - init_queue_index(), IndexMod = index_mod(), Count = 2 * IndexMod:next_segment_boundary(0), QName0 = queue_name(Config, <<"bq_queue_recover-q">>), @@ -1684,8 +1682,7 @@ init_test_queue(QName) -> fun (MsgId) -> rabbit_msg_store:contains(MsgId, PersistentClient) end, - fun nop/1, fun nop/1, - main), + fun nop/1), ok = rabbit_msg_store:client_delete_and_terminate(PersistentClient), Res. @@ -1717,13 +1714,6 @@ with_empty_test_queue(Fun) -> IndexMod = index_mod(), IndexMod:delete_and_terminate(Fun(Qi, QName)). -init_queue_index() -> - %% We must set the segment entry count in the process dictionary - %% for tests that call the v1 queue index directly to have a correct - %% value. - put(segment_entry_count, 2048), - ok. - restart_app() -> rabbit:stop(), rabbit:start(). @@ -1743,7 +1733,7 @@ queue_index_publish(SeqIds, Persistent, Qi) -> QiM = IndexMod:publish( MsgId, SeqId, rabbit_msg_store, #message_properties{size = 10}, - Persistent, infinity, QiN), + Persistent, true, QiN), ok = rabbit_msg_store:write(SeqId, MsgId, MsgId, MSCState), {QiM, [{SeqId, MsgId} | SeqIdsMsgIdsAcc]} end, {Qi, []}, SeqIds), @@ -1764,7 +1754,7 @@ variable_queue_init(Q, Recover) -> true -> non_clean_shutdown; false -> new; Terms -> Terms - end, fun nop/2, fun nop/1, fun nop/1). + end, fun nop/2, fun nop/1). variable_queue_read_terms(QName) -> #resource { kind = queue, From b2fadc82fe4655d243211ebab186339c13b47cf1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Hoguin?= Date: Tue, 21 Oct 2025 14:04:24 +0200 Subject: [PATCH 02/16] fixup! WIP Remove CQv1 --- .../src/rabbit_classic_queue_index_v2.erl | 3 +++ deps/rabbit/src/rabbit_variable_queue.erl | 24 ++++++------------- 2 files changed, 10 insertions(+), 17 deletions(-) diff --git a/deps/rabbit/src/rabbit_classic_queue_index_v2.erl b/deps/rabbit/src/rabbit_classic_queue_index_v2.erl index 4412c77d51c7..a1649293088c 100644 --- a/deps/rabbit/src/rabbit_classic_queue_index_v2.erl +++ b/deps/rabbit/src/rabbit_classic_queue_index_v2.erl @@ -22,6 +22,9 @@ -export([sync/1, needs_sync/1, flush/1, bounds/2, next_segment_boundary/1]). +%% Called by rabbit_vhost. +-export([all_queue_directory_names/1]). + %% Shared with rabbit_classic_queue_store_v2. -export([queue_dir/2]). diff --git a/deps/rabbit/src/rabbit_variable_queue.erl b/deps/rabbit/src/rabbit_variable_queue.erl index e6dcb2a2d449..6fdea935c206 100644 --- a/deps/rabbit/src/rabbit_variable_queue.erl +++ b/deps/rabbit/src/rabbit_variable_queue.erl @@ -229,7 +229,6 @@ -type msg_location() :: memory | rabbit_msg_store - | rabbit_queue_index | rabbit_classic_queue_store_v2:msg_location(). -export_type([msg_location/0]). @@ -239,7 +238,7 @@ msg, is_persistent, is_delivered, - msg_location, %% ?IN_SHARED_STORE | ?IN_QUEUE_STORE | ?IN_QUEUE_INDEX | ?IN_MEMORY + msg_location, %% ?IN_SHARED_STORE | ?IN_QUEUE_STORE | ?IN_MEMORY index_on_disk, persist_to, msg_props @@ -260,7 +259,6 @@ -define(IN_SHARED_STORE, rabbit_msg_store). -define(IN_QUEUE_STORE, {rabbit_classic_queue_store_v2, _, _}). --define(IN_QUEUE_INDEX, rabbit_queue_index). -define(IN_MEMORY, memory). -include_lib("rabbit_common/include/rabbit.hrl"). @@ -975,14 +973,10 @@ beta_msg_status({Msg, SeqId, MsgLocation, MsgProps, IsPersistent}) -> MS0#msg_status{msg_id = MsgId, msg = Msg, persist_to = case MsgLocation of - rabbit_queue_index -> queue_index; {rabbit_classic_queue_store_v2, _, _} -> queue_store; rabbit_msg_store -> msg_store end, - msg_location = case MsgLocation of - rabbit_queue_index -> memory; - _ -> MsgLocation - end}. + msg_location = MsgLocation}. beta_msg_status0(SeqId, MsgProps, IsPersistent) -> #msg_status{seq_id = SeqId, @@ -1396,7 +1390,6 @@ remove_from_disk(#msg_status { {StoreState0, record_confirms(sets:add_element(MsgId, sets:new([{version,2}])), State)} end; ?IN_QUEUE_STORE -> {rabbit_classic_queue_store_v2:remove(SeqId, StoreState0), State}; - ?IN_QUEUE_INDEX -> {StoreState0, State}; ?IN_MEMORY -> {StoreState0, State} end, StoreState = rabbit_classic_queue_store_v2:delete_segments(DeletedSegments, StoreState1), @@ -1407,7 +1400,7 @@ remove_from_disk(#msg_status { %% This function exists as a way to improve dropwhile/2 %% performance. The idea of having this function is to optimise calls -%% to rabbit_queue_index by batching delivers and acks, instead of +%% to the queue index by batching delivers and acks, instead of %% sending them one by one. %% %% Instead of removing every message as their are popped from the @@ -1449,7 +1442,7 @@ remove_by_predicate(Pred, State = #vqstate {out_counter = OutCount}) -> %% This function exists as a way to improve fetchwhile/4 %% performance. The idea of having this function is to optimise calls -%% to rabbit_queue_index by batching delivers, instead of sending them +%% to the queue index by batching delivers, instead of sending them %% one by one. %% %% Fun is the function passed to fetchwhile/4 that's @@ -1472,7 +1465,7 @@ fetch_by_predicate(Pred, Fun, FetchAcc, %% We try to do here the same as what remove(true, State) does but %% processing several messages at the same time. The idea is to -%% optimize rabbit_queue_index:deliver/2 calls by sending a list of +%% optimize IndexMod:deliver/2 calls by sending a list of %% SeqIds instead of one by one, thus process_queue_entries1 will %% accumulate the required deliveries, will record_pending_ack for %% each message, and will update stats, like remove/2 does. @@ -1716,8 +1709,7 @@ maybe_write_msg_to_disk(Force, MsgStatus = #msg_status { queue_store -> {MsgLocation, StoreState} = rabbit_classic_queue_store_v2:write(SeqId, prepare_to_store(Msg), Props, StoreState0), {MsgStatus#msg_status{ msg_location = MsgLocation }, State#vqstate{ store_state = StoreState, - disk_write_count = Count + 1}}; - queue_index -> {MsgStatus, State} + disk_write_count = Count + 1}} end; maybe_write_msg_to_disk(_Force, MsgStatus, State) -> {MsgStatus, State}. @@ -1738,8 +1730,7 @@ maybe_write_index_to_disk(Force, MsgStatus = #msg_status { {MsgOrId, DiskWriteCount1} = case persist_to(MsgStatus) of msg_store -> {MsgId, DiskWriteCount}; - queue_store -> {MsgId, DiskWriteCount}; - queue_index -> {prepare_to_store(Msg), DiskWriteCount + 1} + queue_store -> {MsgId, DiskWriteCount} end, IndexState2 = rabbit_classic_queue_index_v2:publish( MsgOrId, SeqId, MsgLocation, MsgProps, IsPersistent, @@ -1907,7 +1898,6 @@ accumulate_ack(#msg_status { seq_id = SeqId, end, case MsgLocation of ?IN_QUEUE_STORE -> [SeqId|SeqIdsInStore]; - ?IN_QUEUE_INDEX -> [SeqId|SeqIdsInStore]; _ -> SeqIdsInStore end, [MsgId | AllMsgIds]}. From e14c44d429f3769e97fe9717f063a6b334c53038 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Hoguin?= Date: Tue, 21 Oct 2025 14:31:13 +0200 Subject: [PATCH 03/16] fixup! WIP Remove CQv1 --- deps/rabbit/src/rabbit_vhost.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deps/rabbit/src/rabbit_vhost.erl b/deps/rabbit/src/rabbit_vhost.erl index 7b08e3fec706..2c14a35ad712 100644 --- a/deps/rabbit/src/rabbit_vhost.erl +++ b/deps/rabbit/src/rabbit_vhost.erl @@ -110,7 +110,7 @@ ensure_config_file(VHost) -> %% The config file does not exist. %% Check if there are queues in this vhost. false -> - QueueDirs = rabbit_queue_index:all_queue_directory_names(VHost), + QueueDirs = rabbit_classic_queue_index_v2:all_queue_directory_names(VHost), SegmentEntryCount = case QueueDirs of %% There are no queues. Write the configured value for %% the segment entry count, or the new RabbitMQ default From 815159e4e6ea004b66e44b21830579ee2ec2ffe0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Hoguin?= Date: Tue, 21 Oct 2025 15:29:01 +0200 Subject: [PATCH 04/16] fixup! WIP Remove CQv1 --- deps/rabbit/src/rabbit_variable_queue.erl | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/deps/rabbit/src/rabbit_variable_queue.erl b/deps/rabbit/src/rabbit_variable_queue.erl index 6fdea935c206..f596fad1023d 100644 --- a/deps/rabbit/src/rabbit_variable_queue.erl +++ b/deps/rabbit/src/rabbit_variable_queue.erl @@ -1718,27 +1718,19 @@ maybe_write_index_to_disk(_Force, MsgStatus = #msg_status { index_on_disk = true }, State) -> {MsgStatus, State}; maybe_write_index_to_disk(Force, MsgStatus = #msg_status { - msg = Msg, msg_id = MsgId, seq_id = SeqId, is_persistent = IsPersistent, msg_location = MsgLocation, msg_props = MsgProps}, - State = #vqstate{disk_write_count = DiskWriteCount, - index_state = IndexState}) + State = #vqstate{index_state = IndexState}) when Force orelse IsPersistent -> - {MsgOrId, DiskWriteCount1} = - case persist_to(MsgStatus) of - msg_store -> {MsgId, DiskWriteCount}; - queue_store -> {MsgId, DiskWriteCount} - end, IndexState2 = rabbit_classic_queue_index_v2:publish( - MsgOrId, SeqId, MsgLocation, MsgProps, IsPersistent, + MsgId, SeqId, MsgLocation, MsgProps, IsPersistent, persist_to(MsgStatus) =:= msg_store, IndexState), {MsgStatus#msg_status{index_on_disk = true}, - State#vqstate{index_state = IndexState2, - disk_write_count = DiskWriteCount1}}; + State#vqstate{index_state = IndexState2}}; maybe_write_index_to_disk(_Force, MsgStatus, State) -> {MsgStatus, State}. From dcba8405b73757bfdd327b50a3f57e64b3b70a0c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Hoguin?= Date: Tue, 21 Oct 2025 15:46:07 +0200 Subject: [PATCH 05/16] fixup! WIP Remove CQv1 --- deps/rabbit/src/rabbit_classic_queue_index_v2.erl | 2 +- deps/rabbit/src/rabbit_variable_queue.erl | 5 +---- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/deps/rabbit/src/rabbit_classic_queue_index_v2.erl b/deps/rabbit/src/rabbit_classic_queue_index_v2.erl index a1649293088c..a4b6977eff2e 100644 --- a/deps/rabbit/src/rabbit_classic_queue_index_v2.erl +++ b/deps/rabbit/src/rabbit_classic_queue_index_v2.erl @@ -448,7 +448,7 @@ info(#qi{ write_buffer = WriteBuffer, write_buffer_updates = NumUpdates }) -> -spec publish(rabbit_types:msg_id(), rabbit_variable_queue:seq_id(), rabbit_variable_queue:msg_location(), rabbit_types:message_properties(), boolean(), - non_neg_integer() | infinity, State) -> State when State::state(). + boolean(), State) -> State when State::state(). %% Because we always persist to the msg_store, the Msg(Or)Id argument %% here is always a binary, never a record. diff --git a/deps/rabbit/src/rabbit_variable_queue.erl b/deps/rabbit/src/rabbit_variable_queue.erl index f596fad1023d..a3506e132942 100644 --- a/deps/rabbit/src/rabbit_variable_queue.erl +++ b/deps/rabbit/src/rabbit_variable_queue.erl @@ -1993,11 +1993,8 @@ delta_merge(SeqIds, Delta, MsgIds, State) -> Acc; {#msg_status { msg_id = MsgId, is_persistent = IsPersistent } = MsgStatus, State1} -> -% {_MsgStatus, State2} = -% maybe_prepare_write_to_disk(true, true, MsgStatus, State1), - State2 = State1, {expand_delta(SeqId, Delta0, IsPersistent), [MsgId | MsgIds0], - stats_requeued_disk(MsgStatus, State2)} + stats_requeued_disk(MsgStatus, State1)} end end, {Delta, MsgIds, State}, SeqIds). From fa9c88d6deb88f167689dbe1a7bb5421104a6d77 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Hoguin?= Date: Wed, 22 Oct 2025 14:59:30 +0200 Subject: [PATCH 06/16] fixup! WIP Remove CQv1 --- deps/rabbit/src/rabbit_classic_queue.erl | 1 - .../src/rabbit_classic_queue_store_v2.erl | 5 ++- deps/rabbit/src/rabbit_guid.erl | 1 + deps/rabbit/src/rabbit_variable_queue.erl | 31 +++---------------- 4 files changed, 8 insertions(+), 30 deletions(-) diff --git a/deps/rabbit/src/rabbit_classic_queue.erl b/deps/rabbit/src/rabbit_classic_queue.erl index d3399685b817..97115a07ac8a 100644 --- a/deps/rabbit/src/rabbit_classic_queue.erl +++ b/deps/rabbit/src/rabbit_classic_queue.erl @@ -430,7 +430,6 @@ supports_stateful_delivery() -> true. deliver(Qs0, Msg0, Options) -> %% add guid to content here instead of in rabbit_basic:message/3, %% as classic queues are the only ones that need it - %% @todo Only if multiple queues. Msg = mc:prepare(store, mc:set_annotation(id, rabbit_guid:gen(), Msg0)), Mandatory = maps:get(mandatory, Options, false), MsgSeqNo = maps:get(correlation, Options, undefined), diff --git a/deps/rabbit/src/rabbit_classic_queue_store_v2.erl b/deps/rabbit/src/rabbit_classic_queue_store_v2.erl index ab73d926925c..354f4e1189aa 100644 --- a/deps/rabbit/src/rabbit_classic_queue_store_v2.erl +++ b/deps/rabbit/src/rabbit_classic_queue_store_v2.erl @@ -145,11 +145,10 @@ info(#qs{ write_buffer = WriteBuffer }) -> rabbit_types:message_properties(), State) -> {msg_location(), State} when State::state(). -%% TODO!! %% @todo I think we can disable the old message store at the same %% place where we create MsgId. If many queues receive the -%% message, then we create an MsgId. If not, we don't. But -%% we can only do this after removing support for v1. +%% message, then we create an MsgId. If not, we don't until +%% strictly necessary (large messages). write(SeqId, Msg, Props, State0 = #qs{ write_buffer = WriteBuffer0, write_buffer_size = WriteBufferSize }) -> ?DEBUG("~0p ~0p ~0p ~0p", [SeqId, Msg, Props, State0]), diff --git a/deps/rabbit/src/rabbit_guid.erl b/deps/rabbit/src/rabbit_guid.erl index d33081c8d865..fd525e5606ab 100644 --- a/deps/rabbit/src/rabbit_guid.erl +++ b/deps/rabbit/src/rabbit_guid.erl @@ -31,6 +31,7 @@ -spec start_link() -> rabbit_types:ok_pid_or_error(). +%% @todo Serial can be in persistent_term instead of process. start_link() -> gen_server:start_link({local, ?SERVER}, ?MODULE, [update_disk_serial()], []). diff --git a/deps/rabbit/src/rabbit_variable_queue.erl b/deps/rabbit/src/rabbit_variable_queue.erl index a3506e132942..e2d6aa459dcd 100644 --- a/deps/rabbit/src/rabbit_variable_queue.erl +++ b/deps/rabbit/src/rabbit_variable_queue.erl @@ -251,7 +251,6 @@ end_seq_id %% end_seq_id is exclusive }). --define(HEADER_GUESS_SIZE, 100). %% see determine_persist_to/2 -define(PERSISTENT_MSG_STORE, msg_store_persistent). -define(TRANSIENT_MSG_STORE, msg_store_transient). @@ -953,7 +952,7 @@ msg_status(IsPersistent, IsDelivered, SeqId, is_delivered = IsDelivered, msg_location = memory, index_on_disk = false, - persist_to = determine_persist_to(Msg, MsgProps, IndexMaxSize), + persist_to = determine_persist_to(Msg, IndexMaxSize), msg_props = MsgProps}. beta_msg_status({MsgId, SeqId, MsgLocation, MsgProps, IsPersistent}) @@ -1739,33 +1738,13 @@ maybe_write_to_disk(ForceMsg, ForceIndex, MsgStatus, State) -> {MsgStatus1, State1} = maybe_write_msg_to_disk(ForceMsg, MsgStatus, State), maybe_write_index_to_disk(ForceIndex, MsgStatus1, State1). -determine_persist_to(Msg, - #message_properties{size = BodySize}, - IndexMaxSize) -> +determine_persist_to(Msg, IndexMaxSize) -> %% The >= is so that you can set the env to 0 and never persist %% to the index. - %% - %% We want this to be fast, so we avoid size(term_to_binary()) - %% here, or using the term size estimation from truncate.erl, both - %% of which are too slow. So instead, if the message body size - %% goes over the limit then we avoid any other checks. - %% - %% If it doesn't we need to decide if the properties will push - %% it past the limit. If we have the encoded properties (usual - %% case) we can just check their size. If we don't (message came - %% via the direct client), we make a guess based on the number of - %% headers. - - %% @todo We can probably simplify this. - {MetaSize, _BodySize} = mc:size(Msg), - case BodySize >= IndexMaxSize of + {MetaSize, BodySize} = mc:size(Msg), + case MetaSize + BodySize >= IndexMaxSize of true -> msg_store; - false -> - Est = MetaSize + BodySize, - case Est >= IndexMaxSize of - true -> msg_store; - false -> queue_store - end + false -> queue_store end. persist_to(#msg_status{persist_to = To}) -> To. From 9edbe65fae563ce298139f112ad3fe8a5292bf0d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Hoguin?= Date: Wed, 22 Oct 2025 15:18:07 +0200 Subject: [PATCH 07/16] fixup! WIP Remove CQv1 --- .../src/rabbit_classic_queue_index_v2.erl | 11 +------- deps/rabbit/src/rabbit_variable_queue.erl | 28 ++++--------------- 2 files changed, 6 insertions(+), 33 deletions(-) diff --git a/deps/rabbit/src/rabbit_classic_queue_index_v2.erl b/deps/rabbit/src/rabbit_classic_queue_index_v2.erl index a4b6977eff2e..67922c57885c 100644 --- a/deps/rabbit/src/rabbit_classic_queue_index_v2.erl +++ b/deps/rabbit/src/rabbit_classic_queue_index_v2.erl @@ -19,7 +19,7 @@ %% Implementation details from the queue index leaking into the %% queue implementation itself. %% @todo TODO --export([sync/1, needs_sync/1, flush/1, +-export([sync/1, needs_sync/1, bounds/2, next_segment_boundary/1]). %% Called by rabbit_vhost. @@ -948,15 +948,6 @@ needs_sync(State = #qi{ confirms = Confirms }) -> false -> confirms end. --spec flush(State) -> State when State::state(). - -flush(State) -> - ?DEBUG("~0p", [State]), - %% Flushing to disk is the same operation as sync - %% except it is called before hibernating or when - %% reducing memory use. - sync(State). - %% ---- -type walker(A) :: fun ((A) -> 'finished' | diff --git a/deps/rabbit/src/rabbit_variable_queue.erl b/deps/rabbit/src/rabbit_variable_queue.erl index e2d6aa459dcd..8a5e1a074029 100644 --- a/deps/rabbit/src/rabbit_variable_queue.erl +++ b/deps/rabbit/src/rabbit_variable_queue.erl @@ -710,30 +710,12 @@ needs_timeout(#vqstate { index_state = IndexState, {false, true} -> false end. -timeout(State = #vqstate { index_state = IndexState0, - store_state = StoreState0, - unconfirmed_simple = UCS, - confirmed = C }) -> - IndexState = rabbit_classic_queue_index_v2:sync(IndexState0), - StoreState = rabbit_classic_queue_store_v2:sync(StoreState0), - State #vqstate { index_state = IndexState, - store_state = StoreState, - unconfirmed_simple = sets:new([{version,2}]), - confirmed = sets:union(C, UCS) }. - -handle_pre_hibernate(State = #vqstate { index_state = IndexState0, - store_state = StoreState0, - msg_store_clients = MSCState0, - unconfirmed_simple = UCS, - confirmed = C }) -> +timeout(State) -> + sync(State). + +handle_pre_hibernate(State = #vqstate{ msg_store_clients = MSCState0 }) -> MSCState = msg_store_pre_hibernate(MSCState0), - IndexState = rabbit_classic_queue_index_v2:flush(IndexState0), - StoreState = rabbit_classic_queue_store_v2:sync(StoreState0), - State #vqstate { index_state = IndexState, - store_state = StoreState, - msg_store_clients = MSCState, - unconfirmed_simple = sets:new([{version,2}]), - confirmed = sets:union(C, UCS) }. + sync(State#vqstate{ msg_store_clients = MSCState }). resume(State) -> a(timeout(State)). From 3d5985e3ab966554888ac2f02c2d01cd11752234 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Hoguin?= Date: Wed, 22 Oct 2025 15:36:34 +0200 Subject: [PATCH 08/16] fixup! WIP Remove CQv1 --- deps/rabbit/src/rabbit_variable_queue.erl | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/deps/rabbit/src/rabbit_variable_queue.erl b/deps/rabbit/src/rabbit_variable_queue.erl index 8a5e1a074029..154a9c280b48 100644 --- a/deps/rabbit/src/rabbit_variable_queue.erl +++ b/deps/rabbit/src/rabbit_variable_queue.erl @@ -717,6 +717,17 @@ handle_pre_hibernate(State = #vqstate{ msg_store_clients = MSCState0 }) -> MSCState = msg_store_pre_hibernate(MSCState0), sync(State#vqstate{ msg_store_clients = MSCState }). +sync(State = #vqstate { index_state = IndexState0, + store_state = StoreState0, + unconfirmed_simple = UCS, + confirmed = C }) -> + IndexState = rabbit_classic_queue_index_v2:sync(IndexState0), + StoreState = rabbit_classic_queue_store_v2:sync(StoreState0), + State #vqstate { index_state = IndexState, + store_state = StoreState, + unconfirmed_simple = sets:new([{version,2}]), + confirmed = sets:union(C, UCS) }. + resume(State) -> a(timeout(State)). msg_rates(#vqstate { rates = #rates { in = AvgIngressRate, From 892bbee8ed48d081c13746e3856a76862c7a9408 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Hoguin?= Date: Wed, 22 Oct 2025 16:15:46 +0200 Subject: [PATCH 09/16] fixup! WIP Remove CQv1 --- deps/rabbit/src/rabbit_variable_queue.erl | 1 + deps/rabbit/test/backing_queue_SUITE.erl | 10 +++++----- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/deps/rabbit/src/rabbit_variable_queue.erl b/deps/rabbit/src/rabbit_variable_queue.erl index 154a9c280b48..9484bc103f82 100644 --- a/deps/rabbit/src/rabbit_variable_queue.erl +++ b/deps/rabbit/src/rabbit_variable_queue.erl @@ -649,6 +649,7 @@ ackfold(MsgFun, Acc, State, AckTags) -> end, {Acc, State}, AckTags), {AccN, a(StateN)}. +%% @todo I think this is never used. Was CMQ. fold(Fun, Acc, State = #vqstate{index_state = IndexState}) -> {Its, IndexState1} = lists:foldl(fun inext/2, {[], IndexState}, [msg_iterator(State), diff --git a/deps/rabbit/test/backing_queue_SUITE.erl b/deps/rabbit/test/backing_queue_SUITE.erl index bb8c4b1a1f92..cb125552a3f3 100644 --- a/deps/rabbit/test/backing_queue_SUITE.erl +++ b/deps/rabbit/test/backing_queue_SUITE.erl @@ -851,7 +851,7 @@ bq_queue_index1(_Config) -> Qi13 end, {_DeletedSegments, Qi16} = IndexMod:ack(SeqIdsB, Qi15), - Qi17 = IndexMod:flush(Qi16), + Qi17 = IndexMod:sync(Qi16), %% Everything will have gone now because #pubs == #acks {NextSeqIdB, NextSeqIdB, Qi18} = IndexMod:bounds(Qi17, NextSeqIdB), %% should get length back as 0 because all persistent @@ -872,7 +872,7 @@ bq_queue_index1(_Config) -> _ -> Qi1 end, {_DeletedSegments, Qi3} = IndexMod:ack(SeqIdsC, Qi2), - Qi4 = IndexMod:flush(Qi3), + Qi4 = IndexMod:sync(Qi3), {Qi5, _SeqIdsMsgIdsC1} = queue_index_publish([SegmentSize], false, Qi4), Qi5 @@ -890,7 +890,7 @@ bq_queue_index1(_Config) -> {Qi3, _SeqIdsMsgIdsC3} = queue_index_publish([SegmentSize], false, Qi2), {_DeletedSegments, Qi4} = IndexMod:ack(SeqIdsC, Qi3), - IndexMod:flush(Qi4) + IndexMod:sync(Qi4) end), %% c) just fill up several segments of all pubs, then +acks @@ -903,7 +903,7 @@ bq_queue_index1(_Config) -> _ -> Qi1 end, {_DeletedSegments, Qi3} = IndexMod:ack(SeqIdsD, Qi2), - IndexMod:flush(Qi3) + IndexMod:sync(Qi3) end), %% d) get messages in all states to a segment, then flush, then do @@ -917,7 +917,7 @@ bq_queue_index1(_Config) -> _ -> Qi1 end, {_DeletedSegments3, Qi3} = IndexMod:ack([0], Qi2), - Qi4 = IndexMod:flush(Qi3), + Qi4 = IndexMod:sync(Qi3), {Qi5, [Eight,Six|_]} = queue_index_publish([3,6,8], false, Qi4), Qi6 = case IndexMod of rabbit_queue_index -> IndexMod:deliver([2,3,5,6], Qi5); From 32e93695c910b4fec4a1f49f5f3433ea7a7b1d91 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Hoguin?= Date: Wed, 22 Oct 2025 16:56:06 +0200 Subject: [PATCH 10/16] Remove rabbit_variable_queue:fold/3 It was only used by CMQs and they have been removed a while back. --- deps/rabbit/src/rabbit_priority_queue.erl | 7 +- deps/rabbit/src/rabbit_variable_queue.erl | 93 +---------------------- deps/rabbit/test/backing_queue_SUITE.erl | 42 +--------- 3 files changed, 3 insertions(+), 139 deletions(-) diff --git a/deps/rabbit/src/rabbit_priority_queue.erl b/deps/rabbit/src/rabbit_priority_queue.erl index 6777ec31bc6a..ef60d05ee6ec 100644 --- a/deps/rabbit/src/rabbit_priority_queue.erl +++ b/deps/rabbit/src/rabbit_priority_queue.erl @@ -29,7 +29,7 @@ purge/1, purge_acks/1, publish/5, publish_delivered/4, discard/3, drain_confirmed/1, dropwhile/2, fetchwhile/4, fetch/2, drop/2, ack/2, requeue/2, - ackfold/4, fold/3, len/1, is_empty/1, depth/1, + ackfold/4, len/1, is_empty/1, depth/1, update_rates/1, needs_timeout/1, timeout/1, handle_pre_hibernate/1, resume/1, msg_rates/1, info/2, invoke/3, is_duplicate/2, set_queue_mode/2, @@ -302,11 +302,6 @@ ackfold(MsgFun, Acc, State = #state{bq = BQ}, AckTags) -> ackfold(MsgFun, Acc, State = #passthrough{bq = BQ, bqs = BQS}, AckTags) -> ?passthrough2(ackfold(MsgFun, Acc, BQS, AckTags)). -fold(Fun, Acc, State = #state{bq = BQ}) -> - fold2(fun (_P, BQSN, AccN) -> BQ:fold(Fun, AccN, BQSN) end, Acc, State); -fold(Fun, Acc, State = #passthrough{bq = BQ, bqs = BQS}) -> - ?passthrough2(fold(Fun, Acc, BQS)). - len(#state{bq = BQ, bqss = BQSs}) -> add0(fun (_P, BQSN) -> BQ:len(BQSN) end, BQSs); len(#passthrough{bq = BQ, bqs = BQS}) -> diff --git a/deps/rabbit/src/rabbit_variable_queue.erl b/deps/rabbit/src/rabbit_variable_queue.erl index 9484bc103f82..16d76e27ebcd 100644 --- a/deps/rabbit/src/rabbit_variable_queue.erl +++ b/deps/rabbit/src/rabbit_variable_queue.erl @@ -12,7 +12,7 @@ publish/5, publish_delivered/4, discard/3, drain_confirmed/1, dropwhile/2, fetchwhile/4, fetch/2, drop/2, ack/2, requeue/2, - ackfold/4, fold/3, len/1, is_empty/1, depth/1, + ackfold/4, len/1, is_empty/1, depth/1, update_rates/1, needs_timeout/1, timeout/1, handle_pre_hibernate/1, resume/1, msg_rates/1, info/2, invoke/3, is_duplicate/2, set_queue_mode/2, @@ -649,14 +649,6 @@ ackfold(MsgFun, Acc, State, AckTags) -> end, {Acc, State}, AckTags), {AccN, a(StateN)}. -%% @todo I think this is never used. Was CMQ. -fold(Fun, Acc, State = #vqstate{index_state = IndexState}) -> - {Its, IndexState1} = lists:foldl(fun inext/2, {[], IndexState}, - [msg_iterator(State), - disk_ack_iterator(State), - ram_ack_iterator(State)]), - ifold(Fun, Acc, Its, State#vqstate{index_state = IndexState1}). - len(#vqstate { len = Len }) -> Len. is_empty(State) -> 0 == len(State). @@ -1985,89 +1977,6 @@ msg_from_pending_ack(SeqId, State) -> delta_limit(?BLANK_DELTA_PATTERN(_)) -> undefined; delta_limit(#delta { start_seq_id = StartSeqId }) -> StartSeqId. -%%---------------------------------------------------------------------------- -%% Iterator -%%---------------------------------------------------------------------------- - -ram_ack_iterator(State) -> - {ack, maps:iterator(State#vqstate.ram_pending_ack)}. - -disk_ack_iterator(State) -> - {ack, maps:iterator(State#vqstate.disk_pending_ack)}. - -msg_iterator(State) -> istate(start, State). - -istate(start, State) -> {q3, State#vqstate.q3, State}; -istate(q3, State) -> {delta, State#vqstate.delta, State}; -istate(delta, _State) -> done. - -next({ack, It}, IndexState) -> - case maps:next(It) of - none -> {empty, IndexState}; - {_SeqId, MsgStatus, It1} -> Next = {ack, It1}, - {value, MsgStatus, true, Next, IndexState} - end; -next(done, IndexState) -> {empty, IndexState}; -next({delta, #delta{start_seq_id = SeqId, - end_seq_id = SeqId}, State}, IndexState) -> - next(istate(delta, State), IndexState); -next({delta, #delta{start_seq_id = SeqId, - end_seq_id = SeqIdEnd} = Delta, State}, IndexState) -> - SeqIdB = rabbit_classic_queue_index_v2:next_segment_boundary(SeqId), - %% It may make sense to limit this based on rate. But this - %% is not called outside of CMQs so I will leave it alone - %% for the time being. - SeqId1 = lists:min([SeqIdB, - %% We must limit the number of messages read at once - %% otherwise the queue will attempt to read up to segment_entry_count() - %% messages from the index each time. The value - %% chosen here is arbitrary. - SeqId + 2048, - SeqIdEnd]), - {List, IndexState1} = rabbit_classic_queue_index_v2:read(SeqId, SeqId1, IndexState), - next({delta, Delta#delta{start_seq_id = SeqId1}, List, State}, IndexState1); -next({delta, Delta, [], State}, IndexState) -> - next({delta, Delta, State}, IndexState); -next({delta, Delta, [{_, SeqId, _, _, _} = M | Rest], State}, IndexState) -> - case is_msg_in_pending_acks(SeqId, State) of - false -> Next = {delta, Delta, Rest, State}, - {value, beta_msg_status(M), false, Next, IndexState}; - true -> next({delta, Delta, Rest, State}, IndexState) - end; -next({Key, Q, State}, IndexState) -> - case ?QUEUE:out(Q) of - {empty, _Q} -> next(istate(Key, State), IndexState); - {{value, MsgStatus}, QN} -> Next = {Key, QN, State}, - {value, MsgStatus, false, Next, IndexState} - end. - -inext(It, {Its, IndexState}) -> - case next(It, IndexState) of - {empty, IndexState1} -> - {Its, IndexState1}; - {value, MsgStatus1, Unacked, It1, IndexState1} -> - {[{MsgStatus1, Unacked, It1} | Its], IndexState1} - end. - -ifold(_Fun, Acc, [], State0) -> - {Acc, State0}; -ifold(Fun, Acc, Its0, State0) -> - [{MsgStatus, Unacked, It} | Rest] = - lists:sort(fun ({#msg_status{seq_id = SeqId1}, _, _}, - {#msg_status{seq_id = SeqId2}, _, _}) -> - SeqId1 =< SeqId2 - end, Its0), - {Msg, State1} = read_msg(MsgStatus, State0), - case Fun(Msg, MsgStatus#msg_status.msg_props, Unacked, Acc) of - {stop, Acc1} -> - {Acc1, State1}; - {cont, Acc1} -> - IndexState0 = State1#vqstate.index_state, - {Its1, IndexState1} = inext(It, {Rest, IndexState0}), - State2 = State1#vqstate{index_state = IndexState1}, - ifold(Fun, Acc1, Its1, State2) - end. - %%---------------------------------------------------------------------------- %% Phase changes %%---------------------------------------------------------------------------- diff --git a/deps/rabbit/test/backing_queue_SUITE.erl b/deps/rabbit/test/backing_queue_SUITE.erl index cb125552a3f3..bda171c2ce76 100644 --- a/deps/rabbit/test/backing_queue_SUITE.erl +++ b/deps/rabbit/test/backing_queue_SUITE.erl @@ -33,8 +33,7 @@ variable_queue_ack_limiting, variable_queue_purge, variable_queue_requeue, - variable_queue_requeue_ram_beta, - variable_queue_fold + variable_queue_requeue_ram_beta ]). -define(BACKING_QUEUE_TESTCASES, [ @@ -162,15 +161,9 @@ orelse Group =:= backing_queue_embed_limit_1024 -> end_per_group1(_, Config) -> Config. -init_per_testcase(Testcase, Config) when Testcase == variable_queue_requeue; - Testcase == variable_queue_fold -> - rabbit_ct_helpers:testcase_started(Config, Testcase); init_per_testcase(Testcase, Config) -> rabbit_ct_helpers:testcase_started(Config, Testcase). -end_per_testcase(Testcase, Config) when Testcase == variable_queue_requeue; - Testcase == variable_queue_fold -> - rabbit_ct_helpers:testcase_finished(Config, Testcase); end_per_testcase(Testcase, Config) -> rabbit_ct_helpers:testcase_finished(Config, Testcase). @@ -1576,39 +1569,6 @@ variable_queue_requeue_ram_beta2(VQ0, _Config) -> {_, VQ8} = rabbit_variable_queue:ack(AcksAll, VQ7), VQ8. -variable_queue_fold(Config) -> - passed = rabbit_ct_broker_helpers:rpc(Config, 0, - ?MODULE, variable_queue_fold1, [Config]). - -variable_queue_fold1(Config) -> - with_fresh_variable_queue( - fun variable_queue_fold2/2, - ?config(variable_queue_type, Config)). - -variable_queue_fold2(VQ0, _Config) -> - {PendingMsgs, RequeuedMsgs, FreshMsgs, VQ1} = - variable_queue_with_holes(VQ0), - Count = rabbit_variable_queue:depth(VQ1), - Msgs = lists:sort(PendingMsgs ++ RequeuedMsgs ++ FreshMsgs), - lists:foldl(fun (Cut, VQ2) -> - test_variable_queue_fold(Cut, Msgs, PendingMsgs, VQ2) - end, VQ1, [0, 1, 2, Count div 2, - Count - 1, Count, Count + 1, Count * 2]). - -test_variable_queue_fold(Cut, Msgs, PendingMsgs, VQ0) -> - {Acc, VQ1} = rabbit_variable_queue:fold( - fun (M, _, Pending, A) -> - MInt = msg2int(M), - Pending = lists:member(MInt, PendingMsgs), %% assert - case MInt =< Cut of - true -> {cont, [MInt | A]}; - false -> {stop, A} - end - end, [], VQ0), - Expected = lists:takewhile(fun (I) -> I =< Cut end, Msgs), - Expected = lists:reverse(Acc), %% assertion - VQ1. - %% same as test_variable_queue_requeue_ram_beta but randomly changing %% the queue mode after every step. variable_queue_mode_change(Config) -> From f8fd4bf6b4126b56c3baa397ff7e9478e4062e2c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Hoguin?= Date: Wed, 22 Oct 2025 17:09:33 +0200 Subject: [PATCH 11/16] fixup! Remove rabbit_variable_queue:fold/3 --- deps/rabbit/src/rabbit_backing_queue.erl | 7 ------- 1 file changed, 7 deletions(-) diff --git a/deps/rabbit/src/rabbit_backing_queue.erl b/deps/rabbit/src/rabbit_backing_queue.erl index 5bae9eef6067..37bd7d4967ee 100644 --- a/deps/rabbit/src/rabbit_backing_queue.erl +++ b/deps/rabbit/src/rabbit_backing_queue.erl @@ -173,13 +173,6 @@ %% each message, its ack tag, and an accumulator. -callback ackfold(msg_fun(A), A, state(), [ack()]) -> {A, state()}. -%% Fold over all the messages in a queue and return the accumulated -%% results, leaving the queue undisturbed. --callback fold(fun((mc:state(), - rabbit_types:message_properties(), - boolean(), A) -> {('stop' | 'cont'), A}), - A, state()) -> {A, state()}. - %% How long is my queue? -callback len(state()) -> non_neg_integer(). From 179f68734744f545d6917f3e5a1ffedf986f3b87 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Hoguin?= Date: Tue, 4 Nov 2025 13:16:03 +0100 Subject: [PATCH 12/16] CQ: Simplify handling of confirms from index writes We avoid an extra unnecessary message to the queue. --- .../src/rabbit_classic_queue_index_v2.erl | 54 ++++++------------- deps/rabbit/src/rabbit_variable_queue.erl | 54 ++++++++++--------- deps/rabbit/test/backing_queue_SUITE.erl | 20 ++++--- 3 files changed, 54 insertions(+), 74 deletions(-) diff --git a/deps/rabbit/src/rabbit_classic_queue_index_v2.erl b/deps/rabbit/src/rabbit_classic_queue_index_v2.erl index 67922c57885c..f36813354a20 100644 --- a/deps/rabbit/src/rabbit_classic_queue_index_v2.erl +++ b/deps/rabbit/src/rabbit_classic_queue_index_v2.erl @@ -7,7 +7,7 @@ -module(rabbit_classic_queue_index_v2). --export([erase/1, init/2, reset_state/1, recover/5, +-export([erase/1, init/1, reset_state/1, recover/4, terminate/3, delete_and_terminate/1, info/1, publish/7, ack/2, read/3]). @@ -144,17 +144,11 @@ %% File descriptors. We will keep up to 4 FDs %% at a time. See comments in reduce_fd_usage/2. - fds = #{} :: #{non_neg_integer() => file:fd()}, - - %% This fun must be called when messages that expect - %% confirms have either an ack or their entry - %% written to disk and file:sync/1 has been called. - on_sync :: on_sync_fun() + fds = #{} :: #{non_neg_integer() => file:fd()} }). -type state() :: #qi{}. --type on_sync_fun() :: fun ((sets:set()) -> ok). -type contains_predicate() :: fun ((rabbit_types:msg_id()) -> boolean()). -type shutdown_terms() :: list() | 'non_clean_shutdown'. @@ -168,24 +162,21 @@ erase(#resource{ virtual_host = VHost } = Name) -> Dir = queue_dir(VHostDir, Name), erase_index_dir(Dir). --spec init(rabbit_amqqueue:name(), on_sync_fun()) -> state(). - -%% We do not embed messages and as a result never need the OnSyncMsgFun. +-spec init(rabbit_amqqueue:name()) -> state(). -init(#resource{ virtual_host = VHost } = Name, OnSyncFun) -> - ?DEBUG("~0p ~0p ~0p", [Name, OnSyncFun]), +init(#resource{ virtual_host = VHost } = Name) -> + ?DEBUG("~0p", [Name]), VHostDir = rabbit_vhost:msg_store_dir_path(VHost), Dir = queue_dir(VHostDir, Name), false = rabbit_file:is_file(Dir), %% is_file == is file or dir - init1(Name, Dir, OnSyncFun). + init1(Name, Dir). -init1(Name, Dir, OnSyncFun) -> +init1(Name, Dir) -> ensure_queue_name_stub_file(Name, Dir), DirBin = rabbit_file:filename_to_binary(Dir), #qi{ queue_name = Name, - dir = << DirBin/binary, "/" >>, - on_sync = OnSyncFun + dir = << DirBin/binary, "/" >> }. ensure_queue_name_stub_file(#resource{virtual_host = VHost, name = QName}, Dir) -> @@ -197,15 +188,13 @@ ensure_queue_name_stub_file(#resource{virtual_host = VHost, name = QName}, Dir) -spec reset_state(State) -> State when State::state(). reset_state(State = #qi{ queue_name = Name, - dir = Dir, - on_sync = OnSyncFun }) -> + dir = Dir }) -> ?DEBUG("~0p", [State]), _ = delete_and_terminate(State), - init1(Name, rabbit_file:binary_to_filename(Dir), OnSyncFun). + init1(Name, rabbit_file:binary_to_filename(Dir)). -spec recover(rabbit_amqqueue:name(), shutdown_terms(), boolean(), - contains_predicate(), - on_sync_fun()) -> + contains_predicate()) -> {'undefined' | non_neg_integer(), 'undefined' | non_neg_integer(), state()}. @@ -218,12 +207,11 @@ reset_state(State = #qi{ queue_name = Name, -define(RECOVER_COUNTER_SIZE, 6). recover(#resource{ virtual_host = VHost, name = QueueName } = Name, Terms, - IsMsgStoreClean, ContainsCheckFun, OnSyncFun) -> - ?DEBUG("~0p ~0p ~0p ~0p ~0p", [Name, Terms, IsMsgStoreClean, - ContainsCheckFun, OnSyncFun]), + IsMsgStoreClean, ContainsCheckFun) -> + ?DEBUG("~0p ~0p ~0p ~0p", [Name, Terms, IsMsgStoreClean, ContainsCheckFun]), VHostDir = rabbit_vhost:msg_store_dir_path(VHost), Dir = queue_dir(VHostDir, Name), - State0 = init1(Name, Dir, OnSyncFun), + State0 = init1(Name, Dir), %% We go over all segments if either the index or the %% message store has/had to recover. Otherwise we just %% take our state from Terms. @@ -922,22 +910,14 @@ parse_entries(<< Status:8, %% ---- %% -%% Syncing and flushing to disk requested by the queue. -%% Note: the v2 no longer calls fsync, it only flushes. +%% Flushing to disk requested by the queue. -spec sync(State) -> State when State::state(). -sync(State0 = #qi{ confirms = Confirms, - on_sync = OnSyncFun }) -> +sync(State0 = #qi{ confirms = Confirms }) -> ?DEBUG("~0p", [State0]), State = flush_buffer(State0, full, segment_entry_count()), - _ = case sets:is_empty(Confirms) of - true -> - ok; - false -> - OnSyncFun(Confirms) - end, - State#qi{ confirms = sets:new([{version,2}]) }. + {Confirms, State#qi{ confirms = sets:new([{version,2}]) }}. -spec needs_sync(state()) -> 'false' | 'confirms'. diff --git a/deps/rabbit/src/rabbit_variable_queue.erl b/deps/rabbit/src/rabbit_variable_queue.erl index 16d76e27ebcd..86764eebf68f 100644 --- a/deps/rabbit/src/rabbit_variable_queue.erl +++ b/deps/rabbit/src/rabbit_variable_queue.erl @@ -22,7 +22,7 @@ -export([start/2, stop/1]). %% exported for testing only --export([start_msg_store/3, stop_msg_store/1, init/4]). +-export([start_msg_store/3, stop_msg_store/1, init/3]). -include("mc.hrl"). -include_lib("stdlib/include/qlc.hrl"). @@ -404,17 +404,16 @@ stop_msg_store(VHost) -> ok. init(Queue, Recover, Callback) -> - init( + init1( Queue, Recover, fun (MsgIds, ActionTaken) -> msgs_written_to_disk(Callback, MsgIds, ActionTaken) - end, - fun (MsgIds) -> msg_indices_written_to_disk(Callback, MsgIds) end). + end). -init(Q, new, MsgOnDiskFun, MsgIdxOnDiskFun) when ?is_amqqueue(Q) -> +init1(Q, new, MsgOnDiskFun) when ?is_amqqueue(Q) -> QueueName = amqqueue:get_name(Q), IsDurable = amqqueue:is_durable(Q), - IndexState = rabbit_classic_queue_index_v2:init(QueueName, MsgIdxOnDiskFun), + IndexState = rabbit_classic_queue_index_v2:init(QueueName), StoreState = rabbit_classic_queue_store_v2:init(QueueName), VHost = QueueName#resource.virtual_host, init(IsDurable, IndexState, StoreState, 0, 0, [], @@ -427,7 +426,7 @@ init(Q, new, MsgOnDiskFun, MsgIdxOnDiskFun) when ?is_amqqueue(Q) -> VHost), VHost); %% We can be recovering a transient queue if it crashed -init(Q, Terms, MsgOnDiskFun, MsgIdxOnDiskFun) when ?is_amqqueue(Q) -> +init1(Q, Terms, MsgOnDiskFun) when ?is_amqqueue(Q) -> QueueName = amqqueue:get_name(Q), IsDurable = amqqueue:is_durable(Q), {PRef, RecoveryTerms} = process_recovery_terms(Terms), @@ -451,7 +450,7 @@ init(Q, Terms, MsgOnDiskFun, MsgIdxOnDiskFun) when ?is_amqqueue(Q) -> rabbit_vhost_msg_store:successfully_recovered_state( VHost, ?PERSISTENT_MSG_STORE), - ContainsCheckFun, MsgIdxOnDiskFun), + ContainsCheckFun), StoreState = rabbit_classic_queue_store_v2:init(QueueName), init(IsDurable, IndexState, StoreState, DeltaCount, DeltaBytes, RecoveryTerms, @@ -714,12 +713,28 @@ sync(State = #vqstate { index_state = IndexState0, store_state = StoreState0, unconfirmed_simple = UCS, confirmed = C }) -> - IndexState = rabbit_classic_queue_index_v2:sync(IndexState0), + {MsgIdSet, IndexState} = rabbit_classic_queue_index_v2:sync(IndexState0), StoreState = rabbit_classic_queue_store_v2:sync(StoreState0), - State #vqstate { index_state = IndexState, - store_state = StoreState, - unconfirmed_simple = sets:new([{version,2}]), - confirmed = sets:union(C, UCS) }. + State1 = State #vqstate { index_state = IndexState, + store_state = StoreState, + unconfirmed_simple = sets:new([{version,2}]), + confirmed = sets:union(C, UCS) }, + index_synced(MsgIdSet, State1). + +index_synced(MsgIdSet, State = #vqstate{ + msgs_on_disk = MOD, + msg_indices_on_disk = MIOD, + unconfirmed = UC }) -> + case sets:is_empty(MsgIdSet) of + true -> + State; + false -> + Confirmed = sets:intersection(UC, MsgIdSet), + record_confirms(sets:intersection(MsgIdSet, MOD), + State #vqstate { + msg_indices_on_disk = + sets:union(MIOD, Confirmed) }) + end. resume(State) -> a(timeout(State)). @@ -1903,19 +1918,6 @@ msgs_written_to_disk(Callback, MsgIdSet, written) -> sets:union(MOD, Confirmed) }) end). -%% @todo Having to call run_backing_queue is probably reducing performance... -msg_indices_written_to_disk(Callback, MsgIdSet) -> - Callback(?MODULE, - fun (?MODULE, State = #vqstate { msgs_on_disk = MOD, - msg_indices_on_disk = MIOD, - unconfirmed = UC }) -> - Confirmed = sets:intersection(UC, MsgIdSet), - record_confirms(sets:intersection(MsgIdSet, MOD), - State #vqstate { - msg_indices_on_disk = - sets:union(MIOD, Confirmed) }) - end). - %%---------------------------------------------------------------------------- %% Internal plumbing for requeue %%---------------------------------------------------------------------------- diff --git a/deps/rabbit/test/backing_queue_SUITE.erl b/deps/rabbit/test/backing_queue_SUITE.erl index bda171c2ce76..130d276d6ae8 100644 --- a/deps/rabbit/test/backing_queue_SUITE.erl +++ b/deps/rabbit/test/backing_queue_SUITE.erl @@ -844,7 +844,7 @@ bq_queue_index1(_Config) -> Qi13 end, {_DeletedSegments, Qi16} = IndexMod:ack(SeqIdsB, Qi15), - Qi17 = IndexMod:sync(Qi16), + {_Confirms, Qi17} = IndexMod:sync(Qi16), %% Everything will have gone now because #pubs == #acks {NextSeqIdB, NextSeqIdB, Qi18} = IndexMod:bounds(Qi17, NextSeqIdB), %% should get length back as 0 because all persistent @@ -865,7 +865,7 @@ bq_queue_index1(_Config) -> _ -> Qi1 end, {_DeletedSegments, Qi3} = IndexMod:ack(SeqIdsC, Qi2), - Qi4 = IndexMod:sync(Qi3), + {_Confirms, Qi4} = IndexMod:sync(Qi3), {Qi5, _SeqIdsMsgIdsC1} = queue_index_publish([SegmentSize], false, Qi4), Qi5 @@ -883,7 +883,8 @@ bq_queue_index1(_Config) -> {Qi3, _SeqIdsMsgIdsC3} = queue_index_publish([SegmentSize], false, Qi2), {_DeletedSegments, Qi4} = IndexMod:ack(SeqIdsC, Qi3), - IndexMod:sync(Qi4) + {_Confirms, Qi5} = IndexMod:sync(Qi4), + Qi5 end), %% c) just fill up several segments of all pubs, then +acks @@ -896,7 +897,8 @@ bq_queue_index1(_Config) -> _ -> Qi1 end, {_DeletedSegments, Qi3} = IndexMod:ack(SeqIdsD, Qi2), - IndexMod:sync(Qi3) + {_Confirms, Qi4} = IndexMod:sync(Qi3), + Qi4 end), %% d) get messages in all states to a segment, then flush, then do @@ -910,7 +912,7 @@ bq_queue_index1(_Config) -> _ -> Qi1 end, {_DeletedSegments3, Qi3} = IndexMod:ack([0], Qi2), - Qi4 = IndexMod:sync(Qi3), + {_Confirms, Qi4} = IndexMod:sync(Qi3), {Qi5, [Eight,Six|_]} = queue_index_publish([3,6,8], false, Qi4), Qi6 = case IndexMod of rabbit_queue_index -> IndexMod:deliver([2,3,5,6], Qi5); @@ -1641,8 +1643,7 @@ init_test_queue(QName) -> QName, [], false, fun (MsgId) -> rabbit_msg_store:contains(MsgId, PersistentClient) - end, - fun nop/1), + end), ok = rabbit_msg_store:client_delete_and_terminate(PersistentClient), Res. @@ -1702,9 +1703,6 @@ queue_index_publish(SeqIds, Persistent, Qi) -> ok = rabbit_msg_store:client_delete_and_terminate(MSCState), {A, B}. -nop(_) -> ok. -nop(_, _) -> ok. - msg_store_client_init(MsgStore, Ref) -> rabbit_vhost_msg_store:client_init(?VHOST, MsgStore, Ref, undefined). @@ -1714,7 +1712,7 @@ variable_queue_init(Q, Recover) -> true -> non_clean_shutdown; false -> new; Terms -> Terms - end, fun nop/2, fun nop/1). + end, fun(_, _) -> ok end). variable_queue_read_terms(QName) -> #resource { kind = queue, From 588947fd9dc85c2f2cb21b18f5bfd7797abff648 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Hoguin?= Date: Fri, 7 Nov 2025 13:31:56 +0100 Subject: [PATCH 13/16] CQ: Rename q3/delta into q_head/q_tail And drop all terminology from antique CQ. Some terminology remains in the backing_queue_SUITE, to be addressed later. --- deps/rabbit/src/rabbit_variable_queue.erl | 487 ++++++++++------------ deps/rabbit/test/backing_queue_SUITE.erl | 50 ++- 2 files changed, 236 insertions(+), 301 deletions(-) diff --git a/deps/rabbit/src/rabbit_variable_queue.erl b/deps/rabbit/src/rabbit_variable_queue.erl index 86764eebf68f..dd80a580d292 100644 --- a/deps/rabbit/src/rabbit_variable_queue.erl +++ b/deps/rabbit/src/rabbit_variable_queue.erl @@ -119,56 +119,19 @@ %% message in the queue (effectively marking all messages as %% delivered, like the v1 index was doing). %% -%% Previous versions of classic queues had a much more complex -%% way of working. Messages were categorized into four groups, -%% and remnants of these terms remain in the code at the time -%% of writing: -%% -%% alpha: this is a message where both the message itself, and its -%% position within the queue are held in RAM -%% -%% beta: this is a message where the message itself is only held on -%% disk (if persisted to the message store) but its position -%% within the queue is held in RAM. -%% -%% gamma: this is a message where the message itself is only held on -%% disk, but its position is both in RAM and on disk. -%% -%% delta: this is a collection of messages, represented by a single -%% term, where the messages and their position are only held on -%% disk. -%% -%% Messages may have been stored in q1, q2, delta, q3 or q4 depending -%% on their location in the queue. The current version of classic -%% queues only use delta (on-disk, for the tail of the queue) or -%% q3 (in-memory, head of the queue). Messages used to move from -%% q1 -> q2 -> delta -> q3 -> q4 (and sometimes q3 -> delta or -%% q4 -> delta to reduce memory use). Now messages only move -%% from delta to q3. Full details on the old mechanisms can be -%% found in previous versions of this file (such as the 3.11 version). -%% -%% In the current version of classic queues, there is no distinction -%% between default and lazy queues. The current behavior is close to -%% lazy queues, except we avoid some write to disks when queues are -%% empty. %%---------------------------------------------------------------------------- -behaviour(rabbit_backing_queue). -record(vqstate, - { q1, %% Unused. - q2, %% Unused. - delta, - q3, - q4, %% Unused. + { q_head, + q_tail, next_seq_id, %% seq_id() of first undelivered message %% everything before this seq_id() was delivered at least once next_deliver_seq_id, ram_pending_ack, %% msgs still in RAM disk_pending_ack, %% msgs in store, paged out - qi_pending_ack, %% Unused. - index_mod, %% Unused. index_state, store_state, msg_store_clients, @@ -176,14 +139,13 @@ transient_threshold, qi_embed_msgs_below, - len, %% w/o unacked @todo No longer needed, is delta+q3. + len, %% w/o unacked @todo No longer needed, is q_head+q_tail. bytes, %% w/o unacked unacked_bytes, persistent_count, %% w unacked persistent_bytes, %% w unacked delta_transient_bytes, %% - target_ram_count, %% Unused. ram_msg_count, %% w/o unacked ram_msg_count_prev, ram_ack_count_prev, @@ -207,11 +169,6 @@ disk_read_count, disk_write_count, - io_batch_size, %% Unused. - - %% default queue or lazy queue - mode, %% Unused. - version = 2, %% Unused. %% Fast path for confirms handling. Instead of having %% index/store keep track of confirms separately and %% doing intersect/subtract/union we just put the messages @@ -244,7 +201,7 @@ msg_props }). --record(delta, +-record(q_tail, { start_seq_id, %% start_seq_id is inclusive count, transient, @@ -275,9 +232,9 @@ ack_out :: float(), timestamp :: rabbit_types:timestamp()}. --type delta() :: #delta { start_seq_id :: non_neg_integer(), - count :: non_neg_integer(), - end_seq_id :: non_neg_integer() }. +-type q_tail() :: #q_tail { start_seq_id :: non_neg_integer(), + count :: non_neg_integer(), + end_seq_id :: non_neg_integer() }. %% The compiler (rightfully) complains that ack() and state() are %% unused. For this reason we duplicate a -spec from @@ -287,16 +244,12 @@ %% these here for documentation purposes. -type ack() :: seq_id(). -type state() :: #vqstate { - q1 :: ?QUEUE:?QUEUE(), - q2 :: ?QUEUE:?QUEUE(), - delta :: delta(), - q3 :: ?QUEUE:?QUEUE(), - q4 :: ?QUEUE:?QUEUE(), + q_head :: ?QUEUE:?QUEUE(), + q_tail :: q_tail(), next_seq_id :: seq_id(), next_deliver_seq_id :: seq_id(), ram_pending_ack :: map(), disk_pending_ack :: map(), - qi_pending_ack :: undefined, index_state :: any(), store_state :: any(), msg_store_clients :: 'undefined' | {{any(), binary()}, @@ -312,7 +265,6 @@ persistent_count :: non_neg_integer(), persistent_bytes :: non_neg_integer(), - target_ram_count :: 'infinity', ram_msg_count :: non_neg_integer(), ram_msg_count_prev :: non_neg_integer(), ram_ack_count_prev :: non_neg_integer(), @@ -329,19 +281,16 @@ disk_read_count :: non_neg_integer(), disk_write_count :: non_neg_integer(), - io_batch_size :: 0, - mode :: 'default' | 'lazy', - version :: 2, unconfirmed_simple :: sets:set()}. --define(BLANK_DELTA, #delta { start_seq_id = undefined, - count = 0, - transient = 0, - end_seq_id = undefined }). --define(BLANK_DELTA_PATTERN(Z), #delta { start_seq_id = Z, - count = 0, - transient = 0, - end_seq_id = Z }). +-define(BLANK_Q_TAIL, #q_tail { start_seq_id = undefined, + count = 0, + transient = 0, + end_seq_id = undefined }). +-define(BLANK_Q_TAIL_PATTERN(Z), #q_tail { start_seq_id = Z, + count = 0, + transient = 0, + end_seq_id = Z }). -define(MICROS_PER_SECOND, 1000000.0). @@ -444,7 +393,7 @@ init1(Q, Terms, MsgOnDiskFun) when ?is_amqqueue(Q) -> end, TransientClient = msg_store_client_init(?TRANSIENT_MSG_STORE, undefined, VHost), - {DeltaCount, DeltaBytes, IndexState} = + {DiskCount, DiskBytes, IndexState} = rabbit_classic_queue_index_v2:recover( QueueName, RecoveryTerms, rabbit_vhost_msg_store:successfully_recovered_state( @@ -453,7 +402,7 @@ init1(Q, Terms, MsgOnDiskFun) when ?is_amqqueue(Q) -> ContainsCheckFun), StoreState = rabbit_classic_queue_store_v2:init(QueueName), init(IsDurable, IndexState, StoreState, - DeltaCount, DeltaBytes, RecoveryTerms, + DiskCount, DiskBytes, RecoveryTerms, PersistentClient, TransientClient, VHost). process_recovery_terms(Terms=non_clean_shutdown) -> @@ -619,23 +568,22 @@ ack(AckTags, State) -> store_state = StoreState, ack_out_counter = AckOutCount + length(AckTags) })}. -requeue(AckTags, #vqstate { delta = Delta, - q3 = Q3, +requeue(AckTags, #vqstate { q_head = QHead0, + q_tail = QTail, in_counter = InCounter, len = Len } = State) -> - %% @todo This can be heavily simplified: if the message falls into delta, - %% add it there. Otherwise just add it to q3 in the correct position. - %% @todo I think if the message falls within Q3 we must add it back there, + %% @todo This can be heavily simplified: if the message falls into q_tail, + %% add it there. Otherwise just add it to q_head in the correct position. + %% @todo I think if the message falls within q_head we must add it back there, %% otherwise there's nothing to do? Except update stats. - {SeqIds, Q3a, MsgIds, State1} = requeue_merge(lists:sort(AckTags), Q3, [], - delta_limit(Delta), State), - {Delta1, MsgIds1, State2} = delta_merge(SeqIds, Delta, MsgIds, - State1), + {SeqIds, QHead, MsgIds, State1} = requeue_merge(lists:sort(AckTags), QHead0, [], + q_tail_limit(QTail), State), + {QTail1, MsgIds1, State2} = q_tail_merge(SeqIds, QTail, MsgIds, State1), MsgCount = length(MsgIds1), {MsgIds1, a( maybe_update_rates( - State2 #vqstate { delta = Delta1, - q3 = Q3a, + State2 #vqstate { q_head = QHead, + q_tail = QTail1, in_counter = InCounter + MsgCount, len = Len + MsgCount }))}. @@ -750,7 +698,8 @@ info(messages_ram, State) -> info(messages_ready_ram, State) + info(messages_unacknowledged_ram, State); info(messages_persistent, #vqstate{persistent_count = PersistentCount}) -> PersistentCount; -info(messages_paged_out, #vqstate{delta = #delta{transient = Count}}) -> +%% @todo Remove. +info(messages_paged_out, #vqstate{q_tail = #q_tail{transient = Count}}) -> Count; info(message_bytes, #vqstate{bytes = Bytes, unacked_bytes = UBytes}) -> @@ -763,23 +712,24 @@ info(message_bytes_ram, #vqstate{ram_bytes = RamBytes}) -> RamBytes; info(message_bytes_persistent, #vqstate{persistent_bytes = PersistentBytes}) -> PersistentBytes; +%% @todo Remove. info(message_bytes_paged_out, #vqstate{delta_transient_bytes = PagedOutBytes}) -> PagedOutBytes; info(head_message_timestamp, #vqstate{ - q3 = Q3, - ram_pending_ack = RPA}) -> - head_message_timestamp(Q3, RPA); + q_head = QHead, + ram_pending_ack = RPA}) -> + head_message_timestamp(QHead, RPA); info(oldest_message_received_timestamp, #vqstate{ - q3 = Q3, - ram_pending_ack = RPA}) -> - oldest_message_received_timestamp(Q3, RPA); + q_head = QHead, + ram_pending_ack = RPA}) -> + oldest_message_received_timestamp(QHead, RPA); info(disk_reads, #vqstate{disk_read_count = Count}) -> Count; info(disk_writes, #vqstate{disk_write_count = Count}) -> Count; info(backing_queue_status, #vqstate { - delta = Delta, q3 = Q3, - mode = Mode, + q_head = QHead, + q_tail = QTail, len = Len, next_seq_id = NextSeqId, next_deliver_seq_id = NextDeliverSeqId, @@ -793,15 +743,10 @@ info(backing_queue_status, #vqstate { out = AvgEgressRate, ack_in = AvgAckIngressRate, ack_out = AvgAckEgressRate }}) -> - [ {mode , Mode}, - {version , 2}, - {q1 , 0}, - {q2 , 0}, - {delta , Delta}, - {q3 , ?QUEUE:len(Q3)}, - {q4 , 0}, + [ {version , 2}, + {q_head , ?QUEUE:len(QHead)}, + {q_tail , QTail}, {len , Len}, - {target_ram_count , infinity}, {next_seq_id , NextSeqId}, {next_deliver_seq_id , NextDeliverSeqId}, {num_pending_acks , map_size(RPA) + map_size(DPA)}, @@ -842,10 +787,10 @@ set_queue_version(_, State) -> %% regarded as unprocessed until acked, this also prevents the result %% apparently oscillating during repeated rejects. %% -head_message_timestamp(Q3, RPA) -> +head_message_timestamp(QHead, RPA) -> HeadMsgs = [ HeadMsgStatus#msg_status.msg || HeadMsgStatus <- - [ get_q_head(Q3), + [ get_q_head(QHead), get_pa_head(RPA) ], HeadMsgStatus /= undefined, HeadMsgStatus#msg_status.msg /= undefined ], @@ -862,10 +807,10 @@ head_message_timestamp(Q3, RPA) -> false -> lists:min(Timestamps) end. -oldest_message_received_timestamp(Q3, RPA) -> +oldest_message_received_timestamp(QHead, RPA) -> HeadMsgs = [ HeadMsgStatus#msg_status.msg || HeadMsgStatus <- - [ get_q_head(Q3), + [ get_q_head(QHead), get_pa_head(RPA) ], HeadMsgStatus /= undefined, HeadMsgStatus#msg_status.msg /= undefined ], @@ -893,7 +838,8 @@ get_pa_head(PA) -> map_get(Smallest, PA) end. -a(State = #vqstate { delta = Delta, q3 = Q3, +a(State = #vqstate { q_head = QHead, + q_tail = QTail, len = Len, bytes = Bytes, unacked_bytes = UnackedBytes, @@ -901,16 +847,16 @@ a(State = #vqstate { delta = Delta, q3 = Q3, persistent_bytes = PersistentBytes, ram_msg_count = RamMsgCount, ram_bytes = RamBytes}) -> - ED = Delta#delta.count == 0, - E3 = ?QUEUE:is_empty(Q3), + ED = QTail#q_tail.count == 0, + E3 = ?QUEUE:is_empty(QHead), LZ = Len == 0, - L3 = ?QUEUE:len(Q3), + L3 = ?QUEUE:len(QHead), - %% if the queue is empty, then delta is empty and q3 is empty. + %% If the queue is empty, then q_head and q_tail are both empty. true = LZ == (ED and E3), - %% There should be no messages in q1, q2, and q4 - true = Delta#delta.count + L3 == Len, + %% All messages are in q_head or q_tail. + true = QTail#q_tail.count + L3 == Len, true = Len >= 0, true = Bytes >= 0, @@ -924,9 +870,9 @@ a(State = #vqstate { delta = Delta, q3 = Q3, State. -d(Delta = #delta { start_seq_id = Start, count = Count, end_seq_id = End }) +qt(QTail = #q_tail { start_seq_id = Start, count = Count, end_seq_id = End }) when Start + Count =< End -> - Delta. + QTail. m(MsgStatus = #msg_status { is_persistent = IsPersistent, msg_location = MsgLocation, @@ -956,10 +902,10 @@ msg_status(IsPersistent, IsDelivered, SeqId, persist_to = determine_persist_to(Msg, IndexMaxSize), msg_props = MsgProps}. -beta_msg_status({MsgId, SeqId, MsgLocation, MsgProps, IsPersistent}) +msg_status({MsgId, SeqId, MsgLocation, MsgProps, IsPersistent}) when is_binary(MsgId) orelse MsgId =:= undefined -> - MS0 = beta_msg_status0(SeqId, MsgProps, IsPersistent), + MS0 = msg_status0(SeqId, MsgProps, IsPersistent), MS0#msg_status{msg_id = MsgId, msg = undefined, persist_to = case is_tuple(MsgLocation) of @@ -967,9 +913,9 @@ beta_msg_status({MsgId, SeqId, MsgLocation, MsgProps, IsPersistent}) false -> msg_store end, msg_location = MsgLocation}; -beta_msg_status({Msg, SeqId, MsgLocation, MsgProps, IsPersistent}) -> +msg_status({Msg, SeqId, MsgLocation, MsgProps, IsPersistent}) -> MsgId = mc:get_annotation(id, Msg), - MS0 = beta_msg_status0(SeqId, MsgProps, IsPersistent), + MS0 = msg_status0(SeqId, MsgProps, IsPersistent), MS0#msg_status{msg_id = MsgId, msg = Msg, persist_to = case MsgLocation of @@ -978,7 +924,7 @@ beta_msg_status({Msg, SeqId, MsgLocation, MsgProps, IsPersistent}) -> end, msg_location = MsgLocation}. -beta_msg_status0(SeqId, MsgProps, IsPersistent) -> +msg_status0(SeqId, MsgProps, IsPersistent) -> #msg_status{seq_id = SeqId, msg = undefined, is_persistent = IsPersistent, @@ -1035,36 +981,6 @@ msg_store_remove(MSCState, IsPersistent, MsgIds) -> rabbit_msg_store:remove(MsgIds, MCSState1) end). -betas_from_index_entries(List, TransientThreshold, DelsAndAcksFun, State = #vqstate{ next_deliver_seq_id = NextDeliverSeqId0 }) -> - {Filtered, NextDeliverSeqId, Acks, RamReadyCount, RamBytes, TransientCount, TransientBytes} = - lists:foldr( - fun ({_MsgOrId, SeqId, _MsgLocation, _MsgProps, IsPersistent} = M, - {Filtered1, NextDeliverSeqId1, Acks1, RRC, RB, TC, TB} = Acc) -> - case SeqId < TransientThreshold andalso not IsPersistent of - true -> {Filtered1, - next_deliver_seq_id(SeqId, NextDeliverSeqId1), - [SeqId | Acks1], RRC, RB, TC, TB}; - false -> MsgStatus = m(beta_msg_status(M)), - HaveMsg = msg_in_ram(MsgStatus), - Size = msg_size(MsgStatus), - case is_msg_in_pending_acks(SeqId, State) of - false -> {?QUEUE:in_r(MsgStatus, Filtered1), - NextDeliverSeqId1, Acks1, - RRC + one_if(HaveMsg), - RB + one_if(HaveMsg) * Size, - TC + one_if(not IsPersistent), - TB + one_if(not IsPersistent) * Size}; - true -> Acc %% [0] - end - end - end, {?QUEUE:new(), NextDeliverSeqId0, [], 0, 0, 0, 0}, List), - {Filtered, RamReadyCount, RamBytes, DelsAndAcksFun(NextDeliverSeqId, Acks, State), - TransientCount, TransientBytes}. -%% [0] We don't increase RamBytes here, even though it pertains to -%% unacked messages too, since if HaveMsg then the message must have -%% been stored in the QI, thus the message must have been in -%% qi_pending_ack, thus it must already have been in RAM. - %% We increase the next_deliver_seq_id only when the next %% message (next seq_id) was delivered. next_deliver_seq_id(SeqId, NextDeliverSeqId) @@ -1078,34 +994,34 @@ is_msg_in_pending_acks(SeqId, #vqstate { ram_pending_ack = RPA, maps:is_key(SeqId, RPA) orelse maps:is_key(SeqId, DPA). -expand_delta(SeqId, ?BLANK_DELTA_PATTERN(X), IsPersistent) -> - d(#delta { start_seq_id = SeqId, count = 1, end_seq_id = SeqId + 1, - transient = one_if(not IsPersistent)}); -expand_delta(SeqId, #delta { start_seq_id = StartSeqId, - count = Count, - transient = Transient } = Delta, +expand_q_tail(SeqId, ?BLANK_Q_TAIL_PATTERN(X), IsPersistent) -> + qt(#q_tail{ start_seq_id = SeqId, count = 1, end_seq_id = SeqId + 1, + transient = one_if(not IsPersistent)}); +expand_q_tail(SeqId, #q_tail{ start_seq_id = StartSeqId, + count = Count, + transient = Transient } = QTail, IsPersistent ) when SeqId < StartSeqId -> - d(Delta #delta { start_seq_id = SeqId, count = Count + 1, - transient = Transient + one_if(not IsPersistent)}); -expand_delta(SeqId, #delta { count = Count, - end_seq_id = EndSeqId, - transient = Transient } = Delta, + qt(QTail #q_tail{ start_seq_id = SeqId, count = Count + 1, + transient = Transient + one_if(not IsPersistent)}); +expand_q_tail(SeqId, #q_tail{ count = Count, + end_seq_id = EndSeqId, + transient = Transient } = QTail, IsPersistent) when SeqId >= EndSeqId -> - d(Delta #delta { count = Count + 1, end_seq_id = SeqId + 1, - transient = Transient + one_if(not IsPersistent)}); -expand_delta(_SeqId, #delta { count = Count, - transient = Transient } = Delta, + qt(QTail #q_tail{ count = Count + 1, end_seq_id = SeqId + 1, + transient = Transient + one_if(not IsPersistent)}); +expand_q_tail(_SeqId, #q_tail{ count = Count, + transient = Transient } = QTail, IsPersistent ) -> - d(Delta #delta { count = Count + 1, - transient = Transient + one_if(not IsPersistent) }). + qt(QTail #q_tail{ count = Count + 1, + transient = Transient + one_if(not IsPersistent) }). %%---------------------------------------------------------------------------- %% Internal major helpers for Public API %%---------------------------------------------------------------------------- -init(IsDurable, IndexState, StoreState, DeltaCount, DeltaBytes, Terms, +init(IsDurable, IndexState, StoreState, DiskCount, DiskBytes, Terms, PersistentClient, TransientClient, VHost) -> NextSeqIdHint = case Terms of @@ -1115,36 +1031,33 @@ init(IsDurable, IndexState, StoreState, DeltaCount, DeltaBytes, Terms, {LowSeqId, HiSeqId, IndexState1} = rabbit_classic_queue_index_v2:bounds(IndexState, NextSeqIdHint), - {NextSeqId, NextDeliverSeqId, DeltaCount1, DeltaBytes1} = + {NextSeqId, NextDeliverSeqId, DiskCount1, DiskBytes1} = case Terms of - non_clean_shutdown -> {HiSeqId, HiSeqId, DeltaCount, DeltaBytes}; + non_clean_shutdown -> {HiSeqId, HiSeqId, DiskCount, DiskBytes}; _ -> NextSeqId0 = proplists:get_value(next_seq_id, Terms, HiSeqId), {NextSeqId0, proplists:get_value(next_deliver_seq_id, Terms, NextSeqId0), proplists:get_value(persistent_count, - Terms, DeltaCount), + Terms, DiskCount), proplists:get_value(persistent_bytes, - Terms, DeltaBytes)} + Terms, DiskBytes)} end, - Delta = case DeltaCount1 == 0 andalso DeltaCount /= undefined of - true -> ?BLANK_DELTA; - false -> d(#delta { start_seq_id = LowSeqId, - count = DeltaCount1, - transient = 0, - end_seq_id = NextSeqId }) + QTail = case DiskCount1 == 0 andalso DiskCount /= undefined of + true -> ?BLANK_Q_TAIL; + false -> qt(#q_tail { start_seq_id = LowSeqId, + count = DiskCount1, + transient = 0, + end_seq_id = NextSeqId }) end, Now = erlang:monotonic_time(), {ok, IndexMaxSize} = application:get_env( rabbit, queue_index_embed_msgs_below), State = #vqstate { - q1 = ?QUEUE:new(), - q2 = ?QUEUE:new(), - delta = Delta, - q3 = ?QUEUE:new(), - q4 = ?QUEUE:new(), + q_head = ?QUEUE:new(), + q_tail = QTail, next_seq_id = NextSeqId, next_deliver_seq_id = NextDeliverSeqId, ram_pending_ack = #{}, @@ -1156,13 +1069,12 @@ init(IsDurable, IndexState, StoreState, DeltaCount, DeltaBytes, Terms, transient_threshold = NextSeqId, qi_embed_msgs_below = IndexMaxSize, - len = DeltaCount1, - persistent_count = DeltaCount1, - bytes = DeltaBytes1, - persistent_bytes = DeltaBytes1, + len = DiskCount1, + persistent_count = DiskCount1, + bytes = DiskBytes1, + persistent_bytes = DiskBytes1, delta_transient_bytes = 0, - target_ram_count = infinity, ram_msg_count = 0, ram_msg_count_prev = 0, ram_ack_count_prev = 0, @@ -1181,11 +1093,8 @@ init(IsDurable, IndexState, StoreState, DeltaCount, DeltaBytes, Terms, disk_read_count = 0, disk_write_count = 0, - io_batch_size = 0, - - mode = default, virtual_host = VHost}, - a(maybe_deltas_to_betas(State)). + a(read_from_q_tail(State)). blank_rates(Now) -> #rates { in = 0.0, @@ -1194,11 +1103,11 @@ blank_rates(Now) -> ack_out = 0.0, timestamp = Now}. -in_r(MsgStatus = #msg_status {}, State = #vqstate { q3 = Q3 }) -> - State #vqstate { q3 = ?QUEUE:in_r(MsgStatus, Q3) }. +in_r(MsgStatus = #msg_status {}, State = #vqstate { q_head = QHead }) -> + State #vqstate { q_head = ?QUEUE:in_r(MsgStatus, QHead) }. queue_out(State) -> - case fetch_from_q3(State) of + case fetch_from_q_head(State) of {empty, _State1} = Result -> Result; {loaded, {MsgStatus, State1}} -> {{value, set_deliver_flag(State, MsgStatus)}, State1} end. @@ -1278,7 +1187,7 @@ stats_pending_acks(MS, St) -> %% Message may or may not be persistent and the contents %% may or may not be in memory. %% -%% Removal from delta_transient_bytes is done by maybe_deltas_to_betas. +%% Removal from delta_transient_bytes is done by read_from_q_tail. stats_removed(MS = #msg_status{is_persistent = true, msg = undefined}, St) -> St#vqstate{?UP(len, persistent_count, -1), ?UP(bytes, persistent_bytes, -msg_size(MS))}; @@ -1317,12 +1226,12 @@ stats_requeued_memory(MS, St) -> %% TODO!!! %% @todo For v2 since we don't remove from disk until we ack, we don't need -%% to write to disk again on requeue. If the message falls within delta -%% we can just drop the MsgStatus. Otherwise we just put it in q3 and +%% to write to disk again on requeue. If the message falls within q_tail +%% we can just drop the MsgStatus. Otherwise we just put it in q_head and %% we don't do any disk writes. %% %% So we don't need to change anything except how we count stats as -%% well as delta stats if the message falls within delta. +%% well as q_tail stats if the message falls within q_tail. stats_requeued_disk(MS = #msg_status{is_persistent = true}, St) -> St#vqstate{?UP(len, +1), ?UP(bytes, +msg_size(MS)), ?UP(unacked_bytes, -msg_size(MS))}; @@ -1518,16 +1427,6 @@ purge_and_index_reset(State) -> State1 = purge1(process_delivers_and_acks_fun(none), State), a(reset_qi_state(State1)). -%% This function removes messages from each of delta and q3. -%% -%% purge_betas_and_deltas/2 loads messages from the queue index, -%% filling up q3. The messages loaded into q3 are removed by calling -%% remove_queue_entries/3 until there are no more messages to be read -%% from the queue index. Messages are read in batches from the queue -%% index. -purge1(AfterFun, State) -> - a(purge_betas_and_deltas(AfterFun, State)). - reset_qi_state(State = #vqstate{ index_state = IndexState0, store_state = StoreState0 }) -> StoreState = rabbit_classic_queue_store_v2:terminate(StoreState0), @@ -1545,18 +1444,25 @@ count_pending_acks(#vqstate { ram_pending_ack = RPA, disk_pending_ack = DPA }) -> map_size(RPA) + map_size(DPA). -%% @todo When doing maybe_deltas_to_betas stats are updated. Then stats +%% This function removes messages from each of q_head and q_tail. +%% +%% It loads messages from the queue index in batches, +%% filling up q_head. The messages loaded into q_head are removed by calling +%% remove_queue_entries/3 until there are no more messages to be read +%% from the queue index. +%% +%% @todo When doing read_from_q_tail stats are updated. Then stats %% are updated again in remove_queue_entries1. All unnecessary since %% we are purging anyway? -purge_betas_and_deltas(DelsAndAcksFun, State) -> +purge1(DelsAndAcksFun, State) -> %% We use the maximum memory limit when purging to get greater performance. MemoryLimit = 2048, - State0 = #vqstate { q3 = Q3 } = maybe_deltas_to_betas(DelsAndAcksFun, State, MemoryLimit, metadata_only), + State0 = #vqstate { q_head = QHead } = read_from_q_tail(DelsAndAcksFun, State, MemoryLimit, metadata_only), - case ?QUEUE:is_empty(Q3) of + case ?QUEUE:is_empty(QHead) of true -> State0; - false -> State1 = remove_queue_entries(Q3, DelsAndAcksFun, State0), - purge_betas_and_deltas(DelsAndAcksFun, State1#vqstate{q3 = ?QUEUE:new()}) + false -> State1 = remove_queue_entries(QHead, DelsAndAcksFun, State0), + purge1(DelsAndAcksFun, State1#vqstate{q_head = ?QUEUE:new()}) end. remove_queue_entries(Q, DelsAndAcksFun, @@ -1608,7 +1514,8 @@ process_delivers_and_acks_fun(_) -> publish1(Msg, MsgProps = #message_properties { needs_confirming = NeedsConfirming }, IsDelivered, _ChPid, PersistFun, - State = #vqstate { q3 = Q3, delta = Delta = #delta { count = DeltaCount }, + State = #vqstate { q_head = QHead, + q_tail = QTail = #q_tail { count = QTailCount }, len = Len, qi_embed_msgs_below = IndexMaxSize, next_seq_id = SeqId, @@ -1626,16 +1533,16 @@ publish1(Msg, %% limit is at 1 because the queue process will need to access this message to know %% expiration information. MemoryLimit = min(1 + floor(2 * OutRate), 2048), - State3 = case DeltaCount of - %% Len is the same as Q3Len when DeltaCount =:= 0. + State3 = case QTailCount of + %% Len is the same as QHead length when QTailCount =:= 0. 0 when Len < MemoryLimit -> {MsgStatus1, State1} = PersistFun(false, false, MsgStatus, State), - State2 = State1 #vqstate { q3 = ?QUEUE:in(m(MsgStatus1), Q3) }, + State2 = State1 #vqstate { q_head = ?QUEUE:in(m(MsgStatus1), QHead) }, stats_published_memory(MsgStatus1, State2); _ -> {MsgStatus1, State1} = PersistFun(true, true, MsgStatus, State), - Delta1 = expand_delta(SeqId, Delta, IsPersistent), - State2 = State1 #vqstate { delta = Delta1 }, + QTail1 = expand_q_tail(SeqId, QTail, IsPersistent), + State2 = State1 #vqstate { q_tail = QTail1 }, stats_published_disk(MsgStatus1, State2) end, {UC1, UCS1} = maybe_needs_confirming(NeedsConfirming, persist_to(MsgStatus), @@ -1951,19 +1858,19 @@ requeue_merge(SeqIds, Q, Front, MsgIds, _Limit, State) -> {SeqIds, ?QUEUE:join(Front, Q), MsgIds, State}. -delta_merge([], Delta, MsgIds, State) -> - {Delta, MsgIds, State}; -delta_merge(SeqIds, Delta, MsgIds, State) -> - lists:foldl(fun (SeqId, {Delta0, MsgIds0, State0} = Acc) -> +q_tail_merge([], QTail, MsgIds, State) -> + {QTail, MsgIds, State}; +q_tail_merge(SeqIds, QTail, MsgIds, State) -> + lists:foldl(fun (SeqId, {QTail0, MsgIds0, State0} = Acc) -> case msg_from_pending_ack(SeqId, State0) of {none, _} -> Acc; {#msg_status { msg_id = MsgId, is_persistent = IsPersistent } = MsgStatus, State1} -> - {expand_delta(SeqId, Delta0, IsPersistent), [MsgId | MsgIds0], + {expand_q_tail(SeqId, QTail0, IsPersistent), [MsgId | MsgIds0], stats_requeued_disk(MsgStatus, State1)} end - end, {Delta, MsgIds, State}, SeqIds). + end, {QTail, MsgIds, State}, SeqIds). %% Mostly opposite of record_pending_ack/2 msg_from_pending_ack(SeqId, State) -> @@ -1976,22 +1883,22 @@ msg_from_pending_ack(SeqId, State) -> State1} end. -delta_limit(?BLANK_DELTA_PATTERN(_)) -> undefined; -delta_limit(#delta { start_seq_id = StartSeqId }) -> StartSeqId. +q_tail_limit(?BLANK_Q_TAIL_PATTERN(_)) -> undefined; +q_tail_limit(#q_tail{ start_seq_id = StartSeqId }) -> StartSeqId. %%---------------------------------------------------------------------------- %% Phase changes %%---------------------------------------------------------------------------- -fetch_from_q3(State = #vqstate { delta = #delta { count = DeltaCount }, - q3 = Q3 }) -> - case ?QUEUE:out(Q3) of - {empty, _Q3} when DeltaCount =:= 0 -> +fetch_from_q_head(State = #vqstate { q_head = QHead, + q_tail = #q_tail { count = QTailCount }}) -> + case ?QUEUE:out(QHead) of + {empty, _QHead} when QTailCount =:= 0 -> {empty, State}; - {empty, _Q3} -> - fetch_from_q3(maybe_deltas_to_betas(State)); - {{value, MsgStatus}, Q3a} -> - State1 = State #vqstate { q3 = Q3a }, + {empty, _QHead} -> + fetch_from_q_head(read_from_q_tail(State)); + {{value, MsgStatus}, QHead1} -> + State1 = State #vqstate { q_head = QHead1 }, {loaded, {MsgStatus, State1}} end. @@ -2003,20 +1910,20 @@ fetch_from_q3(State = #vqstate { delta = #delta { count = DeltaCount }, -define(SHARED_READ_MANY_SIZE_THRESHOLD, 12000). -define(SHARED_READ_MANY_COUNT_THRESHOLD, 10). -maybe_deltas_to_betas(State = #vqstate { rates = #rates{ out = OutRate }}) -> +read_from_q_tail(State = #vqstate { rates = #rates{ out = OutRate }}) -> AfterFun = process_delivers_and_acks_fun(deliver_and_ack), %% We allow from 1 to 2048 messages in memory depending on the consume rate. MemoryLimit = min(1 + floor(2 * OutRate), 2048), - maybe_deltas_to_betas(AfterFun, State, MemoryLimit, messages). + read_from_q_tail(AfterFun, State, MemoryLimit, messages). -maybe_deltas_to_betas(_DelsAndAcksFun, - State = #vqstate {delta = ?BLANK_DELTA_PATTERN(X) }, +read_from_q_tail(_DelsAndAcksFun, + State = #vqstate {q_tail = ?BLANK_Q_TAIL_PATTERN(X) }, _MemoryLimit, _WhatToRead) -> State; -maybe_deltas_to_betas(DelsAndAcksFun, +read_from_q_tail(DelsAndAcksFun, State = #vqstate { - delta = Delta, - q3 = Q3, + q_head = QHead0, + q_tail = QTail, index_state = IndexState, store_state = StoreState, msg_store_clients = {MCStateP, MCStateT}, @@ -2026,18 +1933,18 @@ maybe_deltas_to_betas(DelsAndAcksFun, delta_transient_bytes = DeltaTransientBytes, transient_threshold = TransientThreshold }, MemoryLimit, WhatToRead) -> - #delta { start_seq_id = DeltaSeqId, - count = DeltaCount, - transient = Transient, - end_seq_id = DeltaSeqIdEnd } = Delta, + #q_tail { start_seq_id = QTailSeqId, + count = QTailCount, + transient = Transient, + end_seq_id = QTailSeqIdEnd } = QTail, %% For v2 we want to limit the number of messages read at once to lower %% the memory footprint. We use the consume rate to determine how many %% messages we read. - DeltaSeqLimit = DeltaSeqId + MemoryLimit, - DeltaSeqId1 = - lists:min([rabbit_classic_queue_index_v2:next_segment_boundary(DeltaSeqId), - DeltaSeqLimit, DeltaSeqIdEnd]), - {List0, IndexState1} = rabbit_classic_queue_index_v2:read(DeltaSeqId, DeltaSeqId1, IndexState), + QTailSeqLimit = QTailSeqId + MemoryLimit, + QTailSeqId1 = + lists:min([rabbit_classic_queue_index_v2:next_segment_boundary(QTailSeqId), + QTailSeqLimit, QTailSeqIdEnd]), + {List0, IndexState1} = rabbit_classic_queue_index_v2:read(QTailSeqId, QTailSeqId1, IndexState), {List, StoreState3, MCStateP3, MCStateT3} = case WhatToRead of messages -> %% We try to read messages from disk all at once instead of @@ -2105,40 +2012,40 @@ maybe_deltas_to_betas(DelsAndAcksFun, metadata_only -> {List0, StoreState, MCStateP, MCStateT} end, - {Q3a, RamCountsInc, RamBytesInc, State1, TransientCount, TransientBytes} = - betas_from_index_entries(List, TransientThreshold, - DelsAndAcksFun, - State #vqstate { index_state = IndexState1, - store_state = StoreState3, - msg_store_clients = {MCStateP3, MCStateT3}}), + {QHead1, RamCountsInc, RamBytesInc, State1, TransientCount, TransientBytes} = + become_q_head(List, TransientThreshold, + DelsAndAcksFun, + State #vqstate { index_state = IndexState1, + store_state = StoreState3, + msg_store_clients = {MCStateP3, MCStateT3}}), State2 = State1 #vqstate { ram_msg_count = RamMsgCount + RamCountsInc, ram_bytes = RamBytes + RamBytesInc, disk_read_count = DiskReadCount + RamCountsInc }, - case ?QUEUE:len(Q3a) of + case ?QUEUE:len(QHead1) of 0 -> %% we ignored every message in the segment due to it being %% transient and below the threshold - maybe_deltas_to_betas( + read_from_q_tail( DelsAndAcksFun, State2 #vqstate { - delta = d(Delta #delta { start_seq_id = DeltaSeqId1 })}, + q_tail = qt(QTail #q_tail { start_seq_id = QTailSeqId1 })}, MemoryLimit, WhatToRead); - Q3aLen -> - Q3b = ?QUEUE:join(Q3, Q3a), - case DeltaCount - Q3aLen of + QHead1Len -> + QHead = ?QUEUE:join(QHead0, QHead1), + case QTailCount - QHead1Len of 0 -> - %% delta is now empty - State2 #vqstate { delta = ?BLANK_DELTA, - q3 = Q3b, + %% q_tail is now empty + State2 #vqstate { q_tail = ?BLANK_Q_TAIL, + q_head = QHead, delta_transient_bytes = 0}; N when N > 0 -> - Delta1 = d(#delta { start_seq_id = DeltaSeqId1, - count = N, - %% @todo Probably something wrong, seen it become negative... - transient = Transient - TransientCount, - end_seq_id = DeltaSeqIdEnd }), - State2 #vqstate { delta = Delta1, - q3 = Q3b, + QTail1 = qt(#q_tail { start_seq_id = QTailSeqId1, + count = N, + %% @todo Probably something wrong, seen it become negative... + transient = Transient - TransientCount, + end_seq_id = QTailSeqIdEnd }), + State2 #vqstate { q_head = QHead, + q_tail = QTail1, delta_transient_bytes = DeltaTransientBytes - TransientBytes } end end. @@ -2162,6 +2069,36 @@ merge_sh_read_msgs([M = {MsgId, _, _, _, _}|MTail], Reads) -> merge_sh_read_msgs(MTail, _Reads) -> MTail. +become_q_head(List, TransientThreshold, DelsAndAcksFun, State = #vqstate{ next_deliver_seq_id = NextDeliverSeqId0 }) -> + {Filtered, NextDeliverSeqId, Acks, RamReadyCount, RamBytes, TransientCount, TransientBytes} = + lists:foldr( + fun ({_MsgOrId, SeqId, _MsgLocation, _MsgProps, IsPersistent} = M, + {Filtered1, NextDeliverSeqId1, Acks1, RRC, RB, TC, TB} = Acc) -> + case SeqId < TransientThreshold andalso not IsPersistent of + true -> {Filtered1, + next_deliver_seq_id(SeqId, NextDeliverSeqId1), + [SeqId | Acks1], RRC, RB, TC, TB}; + false -> MsgStatus = m(msg_status(M)), + HaveMsg = msg_in_ram(MsgStatus), + Size = msg_size(MsgStatus), + case is_msg_in_pending_acks(SeqId, State) of + false -> {?QUEUE:in_r(MsgStatus, Filtered1), + NextDeliverSeqId1, Acks1, + RRC + one_if(HaveMsg), + RB + one_if(HaveMsg) * Size, + TC + one_if(not IsPersistent), + TB + one_if(not IsPersistent) * Size}; + true -> Acc %% [0] + end + end + end, {?QUEUE:new(), NextDeliverSeqId0, [], 0, 0, 0, 0}, List), + {Filtered, RamReadyCount, RamBytes, DelsAndAcksFun(NextDeliverSeqId, Acks, State), + TransientCount, TransientBytes}. +%% [0] We don't increase RamBytes here, even though it pertains to +%% unacked messages too, since if HaveMsg then the message must have +%% been stored in the QI, thus the message must have been in +%% qi_pending_ack, thus it must already have been in RAM. + maybe_client_terminate(MSCStateP) -> %% Queue might have been asked to stop by the supervisor, it needs a clean %% shutdown in order for the supervising strategy to work - if it reaches max @@ -2178,16 +2115,16 @@ format_state(#vqstate{} = S) -> format_state(false, #vqstate{} = S) -> S; -format_state(true, #vqstate{q3 = Q3, +format_state(true, #vqstate{q_head = QHead, ram_pending_ack = RamPendingAck, disk_pending_ack = DiskPendingAck, index_state = IndexState, store_state = StoreState} = S) -> - S#vqstate{q3 = format_q3(Q3), + S#vqstate{q_head = format_q_head(QHead), ram_pending_ack = maps:keys(RamPendingAck), disk_pending_ack = maps:keys(DiskPendingAck), index_state = rabbit_classic_queue_index_v2:format_state(IndexState), store_state = rabbit_classic_queue_store_v2:format_state(StoreState)}. -format_q3(Q3) -> - [SeqId || #msg_status{seq_id = SeqId} <- ?QUEUE:to_list(Q3)]. +format_q_head(QHead) -> + [SeqId || #msg_status{seq_id = SeqId} <- ?QUEUE:to_list(QHead)]. diff --git a/deps/rabbit/test/backing_queue_SUITE.erl b/deps/rabbit/test/backing_queue_SUITE.erl index 130d276d6ae8..27cde10fe127 100644 --- a/deps/rabbit/test/backing_queue_SUITE.erl +++ b/deps/rabbit/test/backing_queue_SUITE.erl @@ -21,7 +21,7 @@ -define(VHOST, <<"/">>). -define(VARIABLE_QUEUE_TESTCASES, [ - variable_queue_partial_segments_delta_thing, + variable_queue_partial_segments_q_tail_thing, variable_queue_all_the_bits_not_covered_elsewhere_A, variable_queue_all_the_bits_not_covered_elsewhere_B, variable_queue_drop, @@ -1164,16 +1164,16 @@ get_queue_sup_pid([{_, SupPid, _, _} | Rest], QueuePid) -> get_queue_sup_pid([], _QueuePid) -> undefined. -variable_queue_partial_segments_delta_thing(Config) -> +variable_queue_partial_segments_q_tail_thing(Config) -> passed = rabbit_ct_broker_helpers:rpc(Config, 0, - ?MODULE, variable_queue_partial_segments_delta_thing1, [Config]). + ?MODULE, variable_queue_partial_segments_q_tail_thing1, [Config]). -variable_queue_partial_segments_delta_thing1(Config) -> +variable_queue_partial_segments_q_tail_thing1(Config) -> with_fresh_variable_queue( - fun variable_queue_partial_segments_delta_thing2/2, + fun variable_queue_partial_segments_q_tail_thing2/2, ?config(variable_queue_type, Config)). -variable_queue_partial_segments_delta_thing2(VQ0, _QName) -> +variable_queue_partial_segments_q_tail_thing2(VQ0, _QName) -> IndexMod = index_mod(), SegmentSize = IndexMod:next_segment_boundary(0), HalfSegment = SegmentSize div 2, @@ -1184,25 +1184,25 @@ variable_queue_partial_segments_delta_thing2(VQ0, _QName) -> VQ2, %% We only have one message in memory because the amount in memory %% depends on the consume rate, which is nil in this test. - [{delta, {delta, 1, OneAndAHalfSegment - 1, 0, OneAndAHalfSegment}}, - {q3, 1}, + [{q_head, 1}, + {q_tail, {q_tail, 1, OneAndAHalfSegment - 1, 0, OneAndAHalfSegment}}, {len, OneAndAHalfSegment}]), VQ5 = check_variable_queue_status( variable_queue_publish(true, 1, VQ3), - %% one alpha, but it's in the same segment as the deltas + %% one alpha, but it's in the same segment as the q_tail %% @todo That's wrong now! v1/v2 - [{delta, {delta, 1, OneAndAHalfSegment, 0, OneAndAHalfSegment + 1}}, - {q3, 1}, + [{q_head, 1}, + {q_tail, {q_tail, 1, OneAndAHalfSegment, 0, OneAndAHalfSegment + 1}}, {len, OneAndAHalfSegment + 1}]), {VQ6, AckTags} = variable_queue_fetch(SegmentSize, true, false, SegmentSize + HalfSegment + 1, VQ5), VQ7 = check_variable_queue_status( VQ6, - %% We only read from delta up to the end of the segment, so + %% We only read from q_tail up to the end of the segment, so %% after fetching exactly one segment, we should have no %% messages in memory. - [{delta, {delta, SegmentSize, HalfSegment + 1, 0, OneAndAHalfSegment + 1}}, - {q3, 0}, + [{q_head, 0}, + {q_tail, {q_tail, SegmentSize, HalfSegment + 1, 0, OneAndAHalfSegment + 1}}, {len, HalfSegment + 1}]), {VQ8, AckTags1} = variable_queue_fetch(HalfSegment + 1, true, false, HalfSegment + 1, VQ7), @@ -1548,7 +1548,7 @@ variable_queue_requeue2(VQ0, _Config) -> {empty, VQ3} = rabbit_variable_queue:fetch(true, VQ2), VQ3. -%% requeue from ram_pending_ack into q3, move to delta and then empty queue +%% requeue from ram_pending_ack into q_head, move to q_tail and then empty queue variable_queue_requeue_ram_beta(Config) -> passed = rabbit_ct_broker_helpers:rpc(Config, 0, ?MODULE, variable_queue_requeue_ram_beta1, [Config]). @@ -1768,10 +1768,8 @@ with_fresh_variable_queue(Fun, Mode) -> ok = unin_empty_test_queue(QName), VQ = variable_queue_init(test_amqqueue(QName, true), false), S0 = variable_queue_status(VQ), - assert_props(S0, [{q1, 0}, {q2, 0}, - {delta, - {delta, undefined, 0, 0, undefined}}, - {q3, 0}, {q4, 0}, + assert_props(S0, [{q_head, 0}, + {q_tail, {q_tail, undefined, 0, 0, undefined}}, {len, 0}]), VQ1 = set_queue_mode(Mode, VQ), try @@ -1878,8 +1876,8 @@ requeue_one_by_one(Acks, VQ) -> VQM end, VQ, Acks). -%% Create a vq with messages in q1, delta, and q3, and holes (in the -%% form of pending acks) in the latter two. +%% Historical test case that exercised the many different +%% internal queues. Kept for completeness. variable_queue_with_holes(VQ0) -> Interval = 2048, %% should match vq:IO_BATCH_SIZE IndexMod = index_mod(), @@ -1898,7 +1896,7 @@ variable_queue_with_holes(VQ0) -> {_MsgIds, VQ4} = rabbit_variable_queue:requeue( Acks -- (Subset1 ++ Subset2 ++ Subset3), VQ3), VQ5 = requeue_one_by_one(Subset1, VQ4), - %% by now we have some messages (and holes) in delta + %% by now we have some messages (and holes) in q_tail VQ6 = requeue_one_by_one(Subset2, VQ5), %% add the q1 tail VQ8 = variable_queue_publish( @@ -1916,11 +1914,11 @@ variable_queue_with_holes(VQ0) -> vq_with_holes_assertions(VQ) -> [false = case V of - {delta, _, 0, _, _} -> true; - 0 -> true; - _ -> false + {q_tail, _, 0, _, _} -> true; + 0 -> true; + _ -> false end || {K, V} <- variable_queue_status(VQ), - lists:member(K, [delta, q3])]. + lists:member(K, [q_head, q_tail])]. check_variable_queue_status(VQ0, Props) -> VQ1 = variable_queue_wait_for_shuffling_end(VQ0), From be28f74054d0cc40ffac33daba04b325a3bf55c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Hoguin?= Date: Mon, 10 Nov 2025 12:36:34 +0100 Subject: [PATCH 14/16] CQ: Remove paged out transient metrics These metrics make a lot less sense in today's CQ implementation. Today messages are either in memory about to be sent to the consumer, or on disk because they are further down the queue. Messages are no longer paged out to disk depending on memory pressure. So knowing how many transient messages are on disk is not as important. In practice almost all of them will be. --- deps/rabbit/src/rabbit_backing_queue.erl | 3 +- deps/rabbit/src/rabbit_variable_queue.erl | 83 ++++++------------- deps/rabbit/test/backing_queue_SUITE.erl | 14 ++-- .../priv/www/js/tmpl/classic-queue-stats.ejs | 7 -- deps/rabbitmq_prometheus/metrics.md | 2 - ...etheus_rabbitmq_core_metrics_collector.erl | 19 ++--- 6 files changed, 39 insertions(+), 89 deletions(-) diff --git a/deps/rabbit/src/rabbit_backing_queue.erl b/deps/rabbit/src/rabbit_backing_queue.erl index 37bd7d4967ee..6c0fe97e0f71 100644 --- a/deps/rabbit/src/rabbit_backing_queue.erl +++ b/deps/rabbit/src/rabbit_backing_queue.erl @@ -14,8 +14,7 @@ message_bytes, message_bytes_ready, message_bytes_unacknowledged, message_bytes_ram, message_bytes_persistent, head_message_timestamp, - disk_reads, disk_writes, backing_queue_status, - messages_paged_out, message_bytes_paged_out]). + disk_reads, disk_writes, backing_queue_status]). %% We can't specify a per-queue ack/state with callback signatures -type ack() :: any(). diff --git a/deps/rabbit/src/rabbit_variable_queue.erl b/deps/rabbit/src/rabbit_variable_queue.erl index dd80a580d292..587d539db5a2 100644 --- a/deps/rabbit/src/rabbit_variable_queue.erl +++ b/deps/rabbit/src/rabbit_variable_queue.erl @@ -22,7 +22,7 @@ -export([start/2, stop/1]). %% exported for testing only --export([start_msg_store/3, stop_msg_store/1, init/3]). +-export([start_msg_store/3, stop_msg_store/1]). -include("mc.hrl"). -include_lib("stdlib/include/qlc.hrl"). @@ -144,7 +144,6 @@ unacked_bytes, persistent_count, %% w unacked persistent_bytes, %% w unacked - delta_transient_bytes, %% ram_msg_count, %% w/o unacked ram_msg_count_prev, @@ -204,7 +203,6 @@ -record(q_tail, { start_seq_id, %% start_seq_id is inclusive count, - transient, end_seq_id %% end_seq_id is exclusive }). @@ -285,11 +283,9 @@ -define(BLANK_Q_TAIL, #q_tail { start_seq_id = undefined, count = 0, - transient = 0, end_seq_id = undefined }). -define(BLANK_Q_TAIL_PATTERN(Z), #q_tail { start_seq_id = Z, count = 0, - transient = 0, end_seq_id = Z }). -define(MICROS_PER_SECOND, 1000000.0). @@ -698,9 +694,6 @@ info(messages_ram, State) -> info(messages_ready_ram, State) + info(messages_unacknowledged_ram, State); info(messages_persistent, #vqstate{persistent_count = PersistentCount}) -> PersistentCount; -%% @todo Remove. -info(messages_paged_out, #vqstate{q_tail = #q_tail{transient = Count}}) -> - Count; info(message_bytes, #vqstate{bytes = Bytes, unacked_bytes = UBytes}) -> Bytes + UBytes; @@ -712,9 +705,6 @@ info(message_bytes_ram, #vqstate{ram_bytes = RamBytes}) -> RamBytes; info(message_bytes_persistent, #vqstate{persistent_bytes = PersistentBytes}) -> PersistentBytes; -%% @todo Remove. -info(message_bytes_paged_out, #vqstate{delta_transient_bytes = PagedOutBytes}) -> - PagedOutBytes; info(head_message_timestamp, #vqstate{ q_head = QHead, ram_pending_ack = RPA}) -> @@ -994,28 +984,18 @@ is_msg_in_pending_acks(SeqId, #vqstate { ram_pending_ack = RPA, maps:is_key(SeqId, RPA) orelse maps:is_key(SeqId, DPA). -expand_q_tail(SeqId, ?BLANK_Q_TAIL_PATTERN(X), IsPersistent) -> - qt(#q_tail{ start_seq_id = SeqId, count = 1, end_seq_id = SeqId + 1, - transient = one_if(not IsPersistent)}); +expand_q_tail(SeqId, ?BLANK_Q_TAIL_PATTERN(X)) -> + qt(#q_tail{ start_seq_id = SeqId, count = 1, end_seq_id = SeqId + 1 }); expand_q_tail(SeqId, #q_tail{ start_seq_id = StartSeqId, - count = Count, - transient = Transient } = QTail, - IsPersistent ) + count = Count } = QTail) when SeqId < StartSeqId -> - qt(QTail #q_tail{ start_seq_id = SeqId, count = Count + 1, - transient = Transient + one_if(not IsPersistent)}); + qt(QTail #q_tail{ start_seq_id = SeqId, count = Count + 1 }); expand_q_tail(SeqId, #q_tail{ count = Count, - end_seq_id = EndSeqId, - transient = Transient } = QTail, - IsPersistent) + end_seq_id = EndSeqId } = QTail) when SeqId >= EndSeqId -> - qt(QTail #q_tail{ count = Count + 1, end_seq_id = SeqId + 1, - transient = Transient + one_if(not IsPersistent)}); -expand_q_tail(_SeqId, #q_tail{ count = Count, - transient = Transient } = QTail, - IsPersistent ) -> - qt(QTail #q_tail{ count = Count + 1, - transient = Transient + one_if(not IsPersistent) }). + qt(QTail #q_tail{ count = Count + 1, end_seq_id = SeqId + 1 }); +expand_q_tail(_SeqId, #q_tail{ count = Count } = QTail) -> + qt(QTail #q_tail{ count = Count + 1 }). %%---------------------------------------------------------------------------- %% Internal major helpers for Public API @@ -1048,7 +1028,6 @@ init(IsDurable, IndexState, StoreState, DiskCount, DiskBytes, Terms, true -> ?BLANK_Q_TAIL; false -> qt(#q_tail { start_seq_id = LowSeqId, count = DiskCount1, - transient = 0, end_seq_id = NextSeqId }) end, Now = erlang:monotonic_time(), @@ -1073,7 +1052,6 @@ init(IsDurable, IndexState, StoreState, DiskCount, DiskBytes, Terms, persistent_count = DiskCount1, bytes = DiskBytes1, persistent_bytes = DiskBytes1, - delta_transient_bytes = 0, ram_msg_count = 0, ram_msg_count_prev = 0, @@ -1163,7 +1141,7 @@ stats_published_disk(MS = #msg_status{is_persistent = true}, St) -> ?UP(bytes, persistent_bytes, +msg_size(MS))}; stats_published_disk(MS = #msg_status{is_persistent = false}, St) -> St#vqstate{?UP(len, +1), - ?UP(bytes, delta_transient_bytes, +msg_size(MS))}. + ?UP(bytes, +msg_size(MS))}. %% Pending acks do not add to len. Messages are kept in memory. stats_published_pending_acks(MS = #msg_status{is_persistent = true}, St) -> @@ -1186,8 +1164,6 @@ stats_pending_acks(MS, St) -> %% Message may or may not be persistent and the contents %% may or may not be in memory. -%% -%% Removal from delta_transient_bytes is done by read_from_q_tail. stats_removed(MS = #msg_status{is_persistent = true, msg = undefined}, St) -> St#vqstate{?UP(len, persistent_count, -1), ?UP(bytes, persistent_bytes, -msg_size(MS))}; @@ -1237,7 +1213,6 @@ stats_requeued_disk(MS = #msg_status{is_persistent = true}, St) -> ?UP(bytes, +msg_size(MS)), ?UP(unacked_bytes, -msg_size(MS))}; stats_requeued_disk(MS = #msg_status{is_persistent = false}, St) -> St#vqstate{?UP(len, +1), - ?UP(bytes, delta_transient_bytes, +msg_size(MS)), ?UP(unacked_bytes, -msg_size(MS))}. msg_size(#msg_status{msg_props = #message_properties{size = Size}}) -> Size. @@ -1541,7 +1516,7 @@ publish1(Msg, stats_published_memory(MsgStatus1, State2); _ -> {MsgStatus1, State1} = PersistFun(true, true, MsgStatus, State), - QTail1 = expand_q_tail(SeqId, QTail, IsPersistent), + QTail1 = expand_q_tail(SeqId, QTail), State2 = State1 #vqstate { q_tail = QTail1 }, stats_published_disk(MsgStatus1, State2) end, @@ -1865,9 +1840,8 @@ q_tail_merge(SeqIds, QTail, MsgIds, State) -> case msg_from_pending_ack(SeqId, State0) of {none, _} -> Acc; - {#msg_status { msg_id = MsgId, - is_persistent = IsPersistent } = MsgStatus, State1} -> - {expand_q_tail(SeqId, QTail0, IsPersistent), [MsgId | MsgIds0], + {#msg_status { msg_id = MsgId } = MsgStatus, State1} -> + {expand_q_tail(SeqId, QTail0), [MsgId | MsgIds0], stats_requeued_disk(MsgStatus, State1)} end end, {QTail, MsgIds, State}, SeqIds). @@ -1930,12 +1904,10 @@ read_from_q_tail(DelsAndAcksFun, ram_msg_count = RamMsgCount, ram_bytes = RamBytes, disk_read_count = DiskReadCount, - delta_transient_bytes = DeltaTransientBytes, transient_threshold = TransientThreshold }, MemoryLimit, WhatToRead) -> #q_tail { start_seq_id = QTailSeqId, count = QTailCount, - transient = Transient, end_seq_id = QTailSeqIdEnd } = QTail, %% For v2 we want to limit the number of messages read at once to lower %% the memory footprint. We use the consume rate to determine how many @@ -2012,7 +1984,7 @@ read_from_q_tail(DelsAndAcksFun, metadata_only -> {List0, StoreState, MCStateP, MCStateT} end, - {QHead1, RamCountsInc, RamBytesInc, State1, TransientCount, TransientBytes} = + {QHead1, RamCountsInc, RamBytesInc, State1} = become_q_head(List, TransientThreshold, DelsAndAcksFun, State #vqstate { index_state = IndexState1, @@ -2036,17 +2008,13 @@ read_from_q_tail(DelsAndAcksFun, 0 -> %% q_tail is now empty State2 #vqstate { q_tail = ?BLANK_Q_TAIL, - q_head = QHead, - delta_transient_bytes = 0}; + q_head = QHead }; N when N > 0 -> QTail1 = qt(#q_tail { start_seq_id = QTailSeqId1, - count = N, - %% @todo Probably something wrong, seen it become negative... - transient = Transient - TransientCount, - end_seq_id = QTailSeqIdEnd }), + count = N, + end_seq_id = QTailSeqIdEnd }), State2 #vqstate { q_head = QHead, - q_tail = QTail1, - delta_transient_bytes = DeltaTransientBytes - TransientBytes } + q_tail = QTail1 } end end. @@ -2070,14 +2038,14 @@ merge_sh_read_msgs(MTail, _Reads) -> MTail. become_q_head(List, TransientThreshold, DelsAndAcksFun, State = #vqstate{ next_deliver_seq_id = NextDeliverSeqId0 }) -> - {Filtered, NextDeliverSeqId, Acks, RamReadyCount, RamBytes, TransientCount, TransientBytes} = + {Filtered, NextDeliverSeqId, Acks, RamReadyCount, RamBytes} = lists:foldr( fun ({_MsgOrId, SeqId, _MsgLocation, _MsgProps, IsPersistent} = M, - {Filtered1, NextDeliverSeqId1, Acks1, RRC, RB, TC, TB} = Acc) -> + {Filtered1, NextDeliverSeqId1, Acks1, RRC, RB} = Acc) -> case SeqId < TransientThreshold andalso not IsPersistent of true -> {Filtered1, next_deliver_seq_id(SeqId, NextDeliverSeqId1), - [SeqId | Acks1], RRC, RB, TC, TB}; + [SeqId | Acks1], RRC, RB}; false -> MsgStatus = m(msg_status(M)), HaveMsg = msg_in_ram(MsgStatus), Size = msg_size(MsgStatus), @@ -2085,15 +2053,12 @@ become_q_head(List, TransientThreshold, DelsAndAcksFun, State = #vqstate{ next_d false -> {?QUEUE:in_r(MsgStatus, Filtered1), NextDeliverSeqId1, Acks1, RRC + one_if(HaveMsg), - RB + one_if(HaveMsg) * Size, - TC + one_if(not IsPersistent), - TB + one_if(not IsPersistent) * Size}; + RB + one_if(HaveMsg) * Size}; true -> Acc %% [0] end end - end, {?QUEUE:new(), NextDeliverSeqId0, [], 0, 0, 0, 0}, List), - {Filtered, RamReadyCount, RamBytes, DelsAndAcksFun(NextDeliverSeqId, Acks, State), - TransientCount, TransientBytes}. + end, {?QUEUE:new(), NextDeliverSeqId0, [], 0, 0}, List), + {Filtered, RamReadyCount, RamBytes, DelsAndAcksFun(NextDeliverSeqId, Acks, State)}. %% [0] We don't increase RamBytes here, even though it pertains to %% unacked messages too, since if HaveMsg then the message must have %% been stored in the QI, thus the message must have been in diff --git a/deps/rabbit/test/backing_queue_SUITE.erl b/deps/rabbit/test/backing_queue_SUITE.erl index 27cde10fe127..a15b6069f403 100644 --- a/deps/rabbit/test/backing_queue_SUITE.erl +++ b/deps/rabbit/test/backing_queue_SUITE.erl @@ -1185,14 +1185,14 @@ variable_queue_partial_segments_q_tail_thing2(VQ0, _QName) -> %% We only have one message in memory because the amount in memory %% depends on the consume rate, which is nil in this test. [{q_head, 1}, - {q_tail, {q_tail, 1, OneAndAHalfSegment - 1, 0, OneAndAHalfSegment}}, + {q_tail, {q_tail, 1, OneAndAHalfSegment - 1, OneAndAHalfSegment}}, {len, OneAndAHalfSegment}]), VQ5 = check_variable_queue_status( variable_queue_publish(true, 1, VQ3), %% one alpha, but it's in the same segment as the q_tail %% @todo That's wrong now! v1/v2 [{q_head, 1}, - {q_tail, {q_tail, 1, OneAndAHalfSegment, 0, OneAndAHalfSegment + 1}}, + {q_tail, {q_tail, 1, OneAndAHalfSegment, OneAndAHalfSegment + 1}}, {len, OneAndAHalfSegment + 1}]), {VQ6, AckTags} = variable_queue_fetch(SegmentSize, true, false, SegmentSize + HalfSegment + 1, VQ5), @@ -1202,7 +1202,7 @@ variable_queue_partial_segments_q_tail_thing2(VQ0, _QName) -> %% after fetching exactly one segment, we should have no %% messages in memory. [{q_head, 0}, - {q_tail, {q_tail, SegmentSize, HalfSegment + 1, 0, OneAndAHalfSegment + 1}}, + {q_tail, {q_tail, SegmentSize, HalfSegment + 1, OneAndAHalfSegment + 1}}, {len, HalfSegment + 1}]), {VQ8, AckTags1} = variable_queue_fetch(HalfSegment + 1, true, false, HalfSegment + 1, VQ7), @@ -1769,7 +1769,7 @@ with_fresh_variable_queue(Fun, Mode) -> VQ = variable_queue_init(test_amqqueue(QName, true), false), S0 = variable_queue_status(VQ), assert_props(S0, [{q_head, 0}, - {q_tail, {q_tail, undefined, 0, 0, undefined}}, + {q_tail, {q_tail, undefined, 0, undefined}}, {len, 0}]), VQ1 = set_queue_mode(Mode, VQ), try @@ -1914,9 +1914,9 @@ variable_queue_with_holes(VQ0) -> vq_with_holes_assertions(VQ) -> [false = case V of - {q_tail, _, 0, _, _} -> true; - 0 -> true; - _ -> false + {q_tail, _, 0, _} -> true; + 0 -> true; + _ -> false end || {K, V} <- variable_queue_status(VQ), lists:member(K, [q_head, q_tail])]. diff --git a/deps/rabbitmq_management/priv/www/js/tmpl/classic-queue-stats.ejs b/deps/rabbitmq_management/priv/www/js/tmpl/classic-queue-stats.ejs index d779d6cca7ff..2c5b0e6b943c 100644 --- a/deps/rabbitmq_management/priv/www/js/tmpl/classic-queue-stats.ejs +++ b/deps/rabbitmq_management/priv/www/js/tmpl/classic-queue-stats.ejs @@ -34,7 +34,6 @@ Unacked In memory Persistent - Transient, Paged Out @@ -56,9 +55,6 @@ <%= fmt_num_thousands(queue.messages_persistent) %> - - <%= fmt_num_thousands(queue.messages_paged_out) %> - @@ -80,9 +76,6 @@ <%= fmt_bytes(queue.message_bytes_persistent) %> - - <%= fmt_bytes(queue.message_bytes_paged_out) %> - diff --git a/deps/rabbitmq_prometheus/metrics.md b/deps/rabbitmq_prometheus/metrics.md index 7f61b0d3af94..e54ae4151a9f 100644 --- a/deps/rabbitmq_prometheus/metrics.md +++ b/deps/rabbitmq_prometheus/metrics.md @@ -188,8 +188,6 @@ These metrics are specific to the stream protocol. | rabbitmq_queue_disk_writes_total | Total number of times queue wrote messages to disk | | rabbitmq_queue_messages | Sum of ready and unacknowledged messages - total queue depth | | rabbitmq_queue_messages_bytes | Size in bytes of ready and unacknowledged messages | -| rabbitmq_queue_messages_paged_out | Messages paged out to disk | -| rabbitmq_queue_messages_paged_out_bytes | Size in bytes of messages paged out to disk | | rabbitmq_queue_messages_persistent | Persistent messages | | rabbitmq_queue_messages_persistent_bytes | Size in bytes of persistent messages | | rabbitmq_queue_messages_published_total | Total number of messages published to queues | diff --git a/deps/rabbitmq_prometheus/src/collectors/prometheus_rabbitmq_core_metrics_collector.erl b/deps/rabbitmq_prometheus/src/collectors/prometheus_rabbitmq_core_metrics_collector.erl index 7f6ed70d56dc..759b6b90ed26 100644 --- a/deps/rabbitmq_prometheus/src/collectors/prometheus_rabbitmq_core_metrics_collector.erl +++ b/deps/rabbitmq_prometheus/src/collectors/prometheus_rabbitmq_core_metrics_collector.erl @@ -144,8 +144,6 @@ {2, undefined, queue_messages_bytes, gauge, "Size in bytes of ready and unacknowledged messages", message_bytes}, {2, undefined, queue_messages_ready_bytes, gauge, "Size in bytes of ready messages", message_bytes_ready}, {2, undefined, queue_messages_unacked_bytes, gauge, "Size in bytes of all unacknowledged messages", message_bytes_unacknowledged}, - {2, undefined, queue_messages_paged_out, gauge, "Messages paged out to disk", messages_paged_out}, - {2, undefined, queue_messages_paged_out_bytes, gauge, "Size in bytes of messages paged out to disk", message_bytes_paged_out}, {2, undefined, queue_head_message_timestamp, gauge, "Timestamp of the first message in the queue, if any", head_message_timestamp}, {2, undefined, queue_disk_reads_total, counter, "Total number of times queue read messages from disk", disk_reads}, {2, undefined, queue_disk_writes_total, counter, "Total number of times queue wrote messages to disk", disk_writes}, @@ -673,7 +671,7 @@ get_data(queue_consumer_count = MF, false, VHostsFilter) -> end, empty(MF), Table), [{Table, [{consumers, A1}]}]; get_data(queue_metrics = Table, false, VHostsFilter) -> - {Table, A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, A17} = + {Table, A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15} = ets:foldl(fun ({#resource{kind = queue, virtual_host = VHost}, _, _}, Acc) when is_map(VHostsFilter), map_get(VHost, VHostsFilter) == false -> Acc; @@ -685,8 +683,7 @@ get_data(queue_metrics = Table, false, VHostsFilter) -> {messages_unacknowledged_ram, A7}, {messages_persistent, A8}, {messages_bytes_persistent, A9}, {message_bytes, A10}, {message_bytes_ready, A11}, {message_bytes_unacknowledged, A12}, - {messages_paged_out, A13}, {message_bytes_paged_out, A14}, - {disk_reads, A15}, {disk_writes, A16}, {segments, A17}]}]; + {disk_reads, A13}, {disk_writes, A14}, {segments, A15}]}]; get_data(Table, false, VHostsFilter) when Table == channel_exchange_metrics; Table == queue_coarse_metrics; Table == queue_delivery_metrics; @@ -861,7 +858,7 @@ get_data(Table, _, _) -> sum_queue_metrics(Props, {T, A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, - A12, A13, A14, A15, A16, A17}) -> + A12, A13, A14, A15}) -> {T, sum(proplists:get_value(consumers, Props), A1), sum(proplists:get_value(consumer_utilisation, Props), A2), @@ -875,11 +872,9 @@ sum_queue_metrics(Props, {T, A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, sum(proplists:get_value(message_bytes, Props), A10), sum(proplists:get_value(message_bytes_ready, Props), A11), sum(proplists:get_value(message_bytes_unacknowledged, Props), A12), - sum(proplists:get_value(messages_paged_out, Props), A13), - sum(proplists:get_value(message_bytes_paged_out, Props), A14), - sum(proplists:get_value(disk_reads, Props), A15), - sum(proplists:get_value(disk_writes, Props), A16), - sum(proplists:get_value(segments, Props), A17) + sum(proplists:get_value(disk_reads, Props), A13), + sum(proplists:get_value(disk_writes, Props), A14), + sum(proplists:get_value(segments, Props), A15) }. empty(T) when T == channel_queue_exchange_metrics; T == queue_exchange_metrics; T == channel_process_metrics; T == queue_consumer_count -> @@ -891,7 +886,7 @@ empty(T) when T == channel_exchange_metrics; T == exchange_metrics; T == queue_c empty(T) when T == channel_queue_metrics; T == queue_delivery_metrics; T == channel_metrics -> {T, 0, 0, 0, 0, 0, 0, 0}; empty(queue_metrics = T) -> - {T, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}. + {T, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}. sum(undefined, B) -> B; From 9e8ff487efc11b0936569a49c4297a28c1609a82 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Hoguin?= Date: Mon, 10 Nov 2025 14:49:05 +0100 Subject: [PATCH 15/16] CQ: Remove the internal 'len' field It duplicates the ?QUEUE:len and the #q_tail.count which together provide the same value. --- deps/rabbit/src/rabbit_variable_queue.erl | 96 +++++++---------------- 1 file changed, 28 insertions(+), 68 deletions(-) diff --git a/deps/rabbit/src/rabbit_variable_queue.erl b/deps/rabbit/src/rabbit_variable_queue.erl index 587d539db5a2..7080f75e0656 100644 --- a/deps/rabbit/src/rabbit_variable_queue.erl +++ b/deps/rabbit/src/rabbit_variable_queue.erl @@ -139,7 +139,6 @@ transient_threshold, qi_embed_msgs_below, - len, %% w/o unacked @todo No longer needed, is q_head+q_tail. bytes, %% w/o unacked unacked_bytes, persistent_count, %% w unacked @@ -256,10 +255,8 @@ transient_threshold :: non_neg_integer(), qi_embed_msgs_below :: non_neg_integer(), - len :: non_neg_integer(), bytes :: non_neg_integer(), unacked_bytes :: non_neg_integer(), - persistent_count :: non_neg_integer(), persistent_bytes :: non_neg_integer(), @@ -455,12 +452,12 @@ delete_crashed(Q) when ?is_amqqueue(Q) -> QName = amqqueue:get_name(Q), ok = rabbit_classic_queue_index_v2:erase(QName). -purge(State = #vqstate { len = Len }) -> +purge(State) -> case is_pending_ack_empty(State) and is_unconfirmed_empty(State) of true -> - {Len, purge_and_index_reset(State)}; + {len(State), purge_and_index_reset(State)}; false -> - {Len, purge_when_pending_acks(State)} + {len(State), purge_when_pending_acks(State)} end. purge_acks(State) -> a(purge_pending_ack(false, State)). @@ -566,12 +563,7 @@ ack(AckTags, State) -> requeue(AckTags, #vqstate { q_head = QHead0, q_tail = QTail, - in_counter = InCounter, - len = Len } = State) -> - %% @todo This can be heavily simplified: if the message falls into q_tail, - %% add it there. Otherwise just add it to q_head in the correct position. - %% @todo I think if the message falls within q_head we must add it back there, - %% otherwise there's nothing to do? Except update stats. + in_counter = InCounter } = State) -> {SeqIds, QHead, MsgIds, State1} = requeue_merge(lists:sort(AckTags), QHead0, [], q_tail_limit(QTail), State), {QTail1, MsgIds1, State2} = q_tail_merge(SeqIds, QTail, MsgIds, State1), @@ -580,8 +572,7 @@ requeue(AckTags, #vqstate { q_head = QHead0, maybe_update_rates( State2 #vqstate { q_head = QHead, q_tail = QTail1, - in_counter = InCounter + MsgCount, - len = Len + MsgCount }))}. + in_counter = InCounter + MsgCount }))}. ackfold(MsgFun, Acc, State, AckTags) -> {AccN, StateN} = @@ -592,7 +583,8 @@ ackfold(MsgFun, Acc, State, AckTags) -> end, {Acc, State}, AckTags), {AccN, a(StateN)}. -len(#vqstate { len = Len }) -> Len. +len(#vqstate { q_head = QHead, q_tail = #q_tail{ count = QTailCount }}) -> + ?QUEUE:len(QHead) + QTailCount. is_empty(State) -> 0 == len(State). @@ -717,10 +709,9 @@ info(disk_reads, #vqstate{disk_read_count = Count}) -> Count; info(disk_writes, #vqstate{disk_write_count = Count}) -> Count; -info(backing_queue_status, #vqstate { +info(backing_queue_status, State = #vqstate { q_head = QHead, q_tail = QTail, - len = Len, next_seq_id = NextSeqId, next_deliver_seq_id = NextDeliverSeqId, ram_pending_ack = RPA, @@ -736,7 +727,7 @@ info(backing_queue_status, #vqstate { [ {version , 2}, {q_head , ?QUEUE:len(QHead)}, {q_tail , QTail}, - {len , Len}, + {len , len(State)}, {next_seq_id , NextSeqId}, {next_deliver_seq_id , NextDeliverSeqId}, {num_pending_acks , map_size(RPA) + map_size(DPA)}, @@ -828,33 +819,19 @@ get_pa_head(PA) -> map_get(Smallest, PA) end. -a(State = #vqstate { q_head = QHead, - q_tail = QTail, - len = Len, - bytes = Bytes, +a(State = #vqstate { bytes = Bytes, unacked_bytes = UnackedBytes, persistent_count = PersistentCount, persistent_bytes = PersistentBytes, ram_msg_count = RamMsgCount, ram_bytes = RamBytes}) -> - ED = QTail#q_tail.count == 0, - E3 = ?QUEUE:is_empty(QHead), - LZ = Len == 0, - L3 = ?QUEUE:len(QHead), - - %% If the queue is empty, then q_head and q_tail are both empty. - true = LZ == (ED and E3), - - %% All messages are in q_head or q_tail. - true = QTail#q_tail.count + L3 == Len, - true = Len >= 0, true = Bytes >= 0, true = UnackedBytes >= 0, true = PersistentCount >= 0, true = PersistentBytes >= 0, true = RamMsgCount >= 0, - true = RamMsgCount =< Len, + %% Requeues may lead to RamMsgCount > 2048. true = RamBytes >= 0, true = RamBytes =< Bytes + UnackedBytes, @@ -1048,7 +1025,6 @@ init(IsDurable, IndexState, StoreState, DiskCount, DiskBytes, Terms, transient_threshold = NextSeqId, qi_embed_msgs_below = IndexMaxSize, - len = DiskCount1, persistent_count = DiskCount1, bytes = DiskBytes1, persistent_bytes = DiskBytes1, @@ -1129,21 +1105,20 @@ read_msg(_, MsgId, IsPersistent, rabbit_msg_store, State = #vqstate{msg_store_cl %% When publishing to memory, transient messages do not get written to disk. %% On the other hand, persistent messages are kept in memory as well as disk. stats_published_memory(MS = #msg_status{is_persistent = true}, St) -> - St#vqstate{?UP(len, ram_msg_count, persistent_count, +1), + St#vqstate{?UP(ram_msg_count, persistent_count, +1), ?UP(bytes, ram_bytes, persistent_bytes, +msg_size(MS))}; stats_published_memory(MS = #msg_status{is_persistent = false}, St) -> - St#vqstate{?UP(len, ram_msg_count, +1), + St#vqstate{?UP(ram_msg_count, +1), ?UP(bytes, ram_bytes, +msg_size(MS))}. %% Messages published directly to disk are not kept in memory. stats_published_disk(MS = #msg_status{is_persistent = true}, St) -> - St#vqstate{?UP(len, persistent_count, +1), + St#vqstate{?UP(persistent_count, +1), ?UP(bytes, persistent_bytes, +msg_size(MS))}; stats_published_disk(MS = #msg_status{is_persistent = false}, St) -> - St#vqstate{?UP(len, +1), - ?UP(bytes, +msg_size(MS))}. + St#vqstate{?UP(bytes, +msg_size(MS))}. -%% Pending acks do not add to len. Messages are kept in memory. +%% Pending acks messages are kept in memory. stats_published_pending_acks(MS = #msg_status{is_persistent = true}, St) -> St#vqstate{?UP(persistent_count, +1), ?UP(persistent_bytes, unacked_bytes, ram_bytes, +msg_size(MS))}; @@ -1156,24 +1131,23 @@ stats_published_pending_acks(MS = #msg_status{is_persistent = false}, St) -> %% was fully on disk the content will not be read immediately). %% The contents stay where they are during this operation. stats_pending_acks(MS = #msg_status{msg = undefined}, St) -> - St#vqstate{?UP(len, -1), - ?UP(bytes, -msg_size(MS)), ?UP(unacked_bytes, +msg_size(MS))}; + St#vqstate{?UP(bytes, -msg_size(MS)), ?UP(unacked_bytes, +msg_size(MS))}; stats_pending_acks(MS, St) -> - St#vqstate{?UP(len, ram_msg_count, -1), + St#vqstate{?UP(ram_msg_count, -1), ?UP(bytes, -msg_size(MS)), ?UP(unacked_bytes, +msg_size(MS))}. %% Message may or may not be persistent and the contents %% may or may not be in memory. stats_removed(MS = #msg_status{is_persistent = true, msg = undefined}, St) -> - St#vqstate{?UP(len, persistent_count, -1), + St#vqstate{?UP(persistent_count, -1), ?UP(bytes, persistent_bytes, -msg_size(MS))}; stats_removed(MS = #msg_status{is_persistent = true}, St) -> - St#vqstate{?UP(len, ram_msg_count, persistent_count, -1), + St#vqstate{?UP(ram_msg_count, persistent_count, -1), ?UP(bytes, ram_bytes, persistent_bytes, -msg_size(MS))}; stats_removed(MS = #msg_status{is_persistent = false, msg = undefined}, St) -> - St#vqstate{?UP(len, -1), ?UP(bytes, -msg_size(MS))}; + St#vqstate{?UP(bytes, -msg_size(MS))}; stats_removed(MS = #msg_status{is_persistent = false}, St) -> - St#vqstate{?UP(len, ram_msg_count, -1), + St#vqstate{?UP(ram_msg_count, -1), ?UP(bytes, ram_bytes, -msg_size(MS))}. %% @todo Very confusing that ram_msg_count is without unacked but ram_bytes is with. @@ -1194,26 +1168,15 @@ stats_acked_pending(MS = #msg_status{is_persistent = false}, St) -> %% Notice that this is the reverse of stats_pending_acks. stats_requeued_memory(MS = #msg_status{msg = undefined}, St) -> - St#vqstate{?UP(len, +1), - ?UP(bytes, +msg_size(MS)), ?UP(unacked_bytes, -msg_size(MS))}; + St#vqstate{?UP(bytes, +msg_size(MS)), ?UP(unacked_bytes, -msg_size(MS))}; stats_requeued_memory(MS, St) -> - St#vqstate{?UP(len, ram_msg_count, +1), + St#vqstate{?UP(ram_msg_count, +1), ?UP(bytes, +msg_size(MS)), ?UP(unacked_bytes, -msg_size(MS))}. -%% TODO!!! -%% @todo For v2 since we don't remove from disk until we ack, we don't need -%% to write to disk again on requeue. If the message falls within q_tail -%% we can just drop the MsgStatus. Otherwise we just put it in q_head and -%% we don't do any disk writes. -%% -%% So we don't need to change anything except how we count stats as -%% well as q_tail stats if the message falls within q_tail. stats_requeued_disk(MS = #msg_status{is_persistent = true}, St) -> - St#vqstate{?UP(len, +1), - ?UP(bytes, +msg_size(MS)), ?UP(unacked_bytes, -msg_size(MS))}; + St#vqstate{?UP(bytes, +msg_size(MS)), ?UP(unacked_bytes, -msg_size(MS))}; stats_requeued_disk(MS = #msg_status{is_persistent = false}, St) -> - St#vqstate{?UP(len, +1), - ?UP(unacked_bytes, -msg_size(MS))}. + St#vqstate{?UP(unacked_bytes, -msg_size(MS))}. msg_size(#msg_status{msg_props = #message_properties{size = Size}}) -> Size. @@ -1491,7 +1454,6 @@ publish1(Msg, IsDelivered, _ChPid, PersistFun, State = #vqstate { q_head = QHead, q_tail = QTail = #q_tail { count = QTailCount }, - len = Len, qi_embed_msgs_below = IndexMaxSize, next_seq_id = SeqId, next_deliver_seq_id = NextDeliverSeqId, @@ -1508,9 +1470,9 @@ publish1(Msg, %% limit is at 1 because the queue process will need to access this message to know %% expiration information. MemoryLimit = min(1 + floor(2 * OutRate), 2048), + QHeadLen = ?QUEUE:len(QHead), State3 = case QTailCount of - %% Len is the same as QHead length when QTailCount =:= 0. - 0 when Len < MemoryLimit -> + 0 when QHeadLen < MemoryLimit -> {MsgStatus1, State1} = PersistFun(false, false, MsgStatus, State), State2 = State1 #vqstate { q_head = ?QUEUE:in(m(MsgStatus1), QHead) }, stats_published_memory(MsgStatus1, State2); @@ -1791,8 +1753,6 @@ msgs_written_to_disk(Callback, MsgIdSet, written) -> %% for all message IDs. This is a waste. We should only %% call it for messages that need confirming, and avoid %% this intersection call. - %% - %% The same may apply to msg_indices_written_to_disk as well. Confirmed = sets:intersection(UC, MsgIdSet), record_confirms(sets:intersection(MsgIdSet, MIOD), State #vqstate { From 89707d741c1afd46dc39a3285fe0ea53df766510 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Hoguin?= Date: Mon, 10 Nov 2025 15:35:34 +0100 Subject: [PATCH 16/16] CQ: Remove set_queue_mode/set_queue_version --- deps/rabbit/src/rabbit_amqqueue_process.erl | 20 +--- deps/rabbit/src/rabbit_backing_queue.erl | 4 - deps/rabbit/src/rabbit_priority_queue.erl | 13 +-- deps/rabbit/src/rabbit_variable_queue.erl | 13 +-- deps/rabbit/test/backing_queue_SUITE.erl | 104 +++----------------- 5 files changed, 21 insertions(+), 133 deletions(-) diff --git a/deps/rabbit/src/rabbit_amqqueue_process.erl b/deps/rabbit/src/rabbit_amqqueue_process.erl index 50ef5dd974b2..a658747318d8 100644 --- a/deps/rabbit/src/rabbit_amqqueue_process.erl +++ b/deps/rabbit/src/rabbit_amqqueue_process.erl @@ -490,9 +490,7 @@ process_args_policy(State = #q{q = Q, {<<"message-ttl">>, fun res_min/2, fun init_ttl/2}, {<<"max-length">>, fun res_min/2, fun init_max_length/2}, {<<"max-length-bytes">>, fun res_min/2, fun init_max_bytes/2}, - {<<"overflow">>, fun res_arg/2, fun init_overflow/2}, - {<<"queue-mode">>, fun res_arg/2, fun init_queue_mode/2}, - {<<"queue-version">>, fun res_arg/2, fun init_queue_version/2}], + {<<"overflow">>, fun res_arg/2, fun init_overflow/2}], drop_expired_msgs( lists:foldl(fun({Name, Resolve, Fun}, StateN) -> Fun(rabbit_queue_type_util:args_policy_lookup(Name, Resolve, Q), StateN) @@ -543,22 +541,6 @@ init_overflow(Overflow, State) -> State#q{overflow = OverflowVal} end. -init_queue_mode(undefined, State) -> - State; -init_queue_mode(Mode, State = #q {backing_queue = BQ, - backing_queue_state = BQS}) -> - BQS1 = BQ:set_queue_mode(binary_to_existing_atom(Mode, utf8), BQS), - State#q{backing_queue_state = BQS1}. - -init_queue_version(Version0, State = #q {backing_queue = BQ, - backing_queue_state = BQS}) -> - Version = case Version0 of - undefined -> 2; - _ -> Version0 - end, - BQS1 = BQ:set_queue_version(Version, BQS), - State#q{backing_queue_state = BQS1}. - reply(Reply, NewState) -> {NewState1, Timeout} = next_state(NewState), {reply, Reply, ensure_stats_timer(ensure_rate_timer(NewState1)), Timeout}. diff --git a/deps/rabbit/src/rabbit_backing_queue.erl b/deps/rabbit/src/rabbit_backing_queue.erl index 6c0fe97e0f71..2668777dbd1e 100644 --- a/deps/rabbit/src/rabbit_backing_queue.erl +++ b/deps/rabbit/src/rabbit_backing_queue.erl @@ -215,10 +215,6 @@ %% or discarded previously). -callback is_duplicate(mc:state(), state()) -> {boolean(), state()}. --callback set_queue_mode(queue_mode(), state()) -> state(). - --callback set_queue_version(queue_version(), state()) -> state(). - -callback zip_msgs_and_acks([delivered_publish()], [ack()], Acc, state()) -> Acc. diff --git a/deps/rabbit/src/rabbit_priority_queue.erl b/deps/rabbit/src/rabbit_priority_queue.erl index ef60d05ee6ec..ead0dca42f25 100644 --- a/deps/rabbit/src/rabbit_priority_queue.erl +++ b/deps/rabbit/src/rabbit_priority_queue.erl @@ -32,8 +32,7 @@ ackfold/4, len/1, is_empty/1, depth/1, update_rates/1, needs_timeout/1, timeout/1, handle_pre_hibernate/1, resume/1, msg_rates/1, - info/2, invoke/3, is_duplicate/2, set_queue_mode/2, - set_queue_version/2, + info/2, invoke/3, is_duplicate/2, zip_msgs_and_acks/4, format_state/1]). @@ -390,16 +389,6 @@ is_duplicate(Msg, State = #state{bq = BQ}) -> is_duplicate(Msg, State = #passthrough{bq = BQ, bqs = BQS}) -> ?passthrough2(is_duplicate(Msg, BQS)). -set_queue_mode(Mode, State = #state{bq = BQ}) -> - foreach1(fun (_P, BQSN) -> BQ:set_queue_mode(Mode, BQSN) end, State); -set_queue_mode(Mode, State = #passthrough{bq = BQ, bqs = BQS}) -> - ?passthrough1(set_queue_mode(Mode, BQS)). - -set_queue_version(Version, State = #state{bq = BQ}) -> - foreach1(fun (_P, BQSN) -> BQ:set_queue_version(Version, BQSN) end, State); -set_queue_version(Version, State = #passthrough{bq = BQ, bqs = BQS}) -> - ?passthrough1(set_queue_version(Version, BQS)). - zip_msgs_and_acks(Msgs, AckTags, Accumulator, #state{bqss = [{MaxP, _} |_]}) -> MsgsByPriority = partition_publish_delivered_batch(Msgs, MaxP), lists:foldl(fun (Acks, MAs) -> diff --git a/deps/rabbit/src/rabbit_variable_queue.erl b/deps/rabbit/src/rabbit_variable_queue.erl index 7080f75e0656..df9f5036eed1 100644 --- a/deps/rabbit/src/rabbit_variable_queue.erl +++ b/deps/rabbit/src/rabbit_variable_queue.erl @@ -15,8 +15,8 @@ ackfold/4, len/1, is_empty/1, depth/1, update_rates/1, needs_timeout/1, timeout/1, handle_pre_hibernate/1, resume/1, msg_rates/1, - info/2, invoke/3, is_duplicate/2, set_queue_mode/2, - set_queue_version/2, zip_msgs_and_acks/4, + info/2, invoke/3, is_duplicate/2, + zip_msgs_and_acks/4, format_state/1]). -export([start/2, stop/1]). @@ -746,20 +746,12 @@ invoke( _, _, State) -> State. is_duplicate(_Msg, State) -> {false, State}. -%% Queue mode has been unified. -set_queue_mode(_, State) -> - State. - zip_msgs_and_acks(Msgs, AckTags, Accumulator, _State) -> lists:foldl(fun ({{Msg, _Props}, AckTag}, Acc) -> Id = mc:get_annotation(id, Msg), [{Id, AckTag} | Acc] end, Accumulator, lists:zip(Msgs, AckTags)). -%% Queue version now ignored; only v2 is available. -set_queue_version(_, State) -> - State. - %% Get the Timestamp property of the first msg, if present. This is %% the one with the oldest timestamp among the heads of the pending %% acks and unread queues. We can't check disk_pending_acks as these @@ -1872,6 +1864,7 @@ read_from_q_tail(DelsAndAcksFun, %% For v2 we want to limit the number of messages read at once to lower %% the memory footprint. We use the consume rate to determine how many %% messages we read. + %% @todo Simply ask for N messages instead of low/high bounds. QTailSeqLimit = QTailSeqId + MemoryLimit, QTailSeqId1 = lists:min([rabbit_classic_queue_index_v2:next_segment_boundary(QTailSeqId), diff --git a/deps/rabbit/test/backing_queue_SUITE.erl b/deps/rabbit/test/backing_queue_SUITE.erl index a15b6069f403..71f88ad7f896 100644 --- a/deps/rabbit/test/backing_queue_SUITE.erl +++ b/deps/rabbit/test/backing_queue_SUITE.erl @@ -39,7 +39,7 @@ -define(BACKING_QUEUE_TESTCASES, [ bq_queue_index, bq_queue_index_props, - {variable_queue_default, [parallel], ?VARIABLE_QUEUE_TESTCASES}, + {variable_queue, [parallel], ?VARIABLE_QUEUE_TESTCASES}, bq_variable_queue_delete_msg_store_files_callback, bq_queue_recover ]). @@ -127,8 +127,6 @@ init_per_group1(backing_queue_embed_limit_1024, Config) -> ok = rabbit_ct_broker_helpers:rpc(Config, 0, application, set_env, [rabbit, queue_index_embed_msgs_below, 1024]), Config; -init_per_group1(variable_queue_default, Config) -> - rabbit_ct_helpers:set_config(Config, {variable_queue_type, default}); %% @todo These groups are no longer used? init_per_group1(from_cluster_node1, Config) -> rabbit_ct_helpers:set_config(Config, {test_direction, {0, 1}}); @@ -1169,9 +1167,7 @@ variable_queue_partial_segments_q_tail_thing(Config) -> ?MODULE, variable_queue_partial_segments_q_tail_thing1, [Config]). variable_queue_partial_segments_q_tail_thing1(Config) -> - with_fresh_variable_queue( - fun variable_queue_partial_segments_q_tail_thing2/2, - ?config(variable_queue_type, Config)). + with_fresh_variable_queue(fun variable_queue_partial_segments_q_tail_thing2/2). variable_queue_partial_segments_q_tail_thing2(VQ0, _QName) -> IndexMod = index_mod(), @@ -1216,9 +1212,7 @@ variable_queue_all_the_bits_not_covered_elsewhere_A(Config) -> ?MODULE, variable_queue_all_the_bits_not_covered_elsewhere_A1, [Config]). variable_queue_all_the_bits_not_covered_elsewhere_A1(Config) -> - with_fresh_variable_queue( - fun variable_queue_all_the_bits_not_covered_elsewhere_A2/2, - ?config(variable_queue_type, Config)). + with_fresh_variable_queue(fun variable_queue_all_the_bits_not_covered_elsewhere_A2/2). variable_queue_all_the_bits_not_covered_elsewhere_A2(VQ0, QName) -> IndexMod = index_mod(), @@ -1243,9 +1237,7 @@ variable_queue_all_the_bits_not_covered_elsewhere_B(Config) -> ?MODULE, variable_queue_all_the_bits_not_covered_elsewhere_B1, [Config]). variable_queue_all_the_bits_not_covered_elsewhere_B1(Config) -> - with_fresh_variable_queue( - fun variable_queue_all_the_bits_not_covered_elsewhere_B2/2, - ?config(variable_queue_type, Config)). + with_fresh_variable_queue(fun variable_queue_all_the_bits_not_covered_elsewhere_B2/2). variable_queue_all_the_bits_not_covered_elsewhere_B2(VQ1, QName) -> VQ2 = variable_queue_publish(false, 4, VQ1), @@ -1263,9 +1255,7 @@ variable_queue_drop(Config) -> ?MODULE, variable_queue_drop1, [Config]). variable_queue_drop1(Config) -> - with_fresh_variable_queue( - fun variable_queue_drop2/2, - ?config(variable_queue_type, Config)). + with_fresh_variable_queue(fun variable_queue_drop2/2). variable_queue_drop2(VQ0, _QName) -> %% start by sending a messages @@ -1288,9 +1278,7 @@ variable_queue_fold_msg_on_disk(Config) -> ?MODULE, variable_queue_fold_msg_on_disk1, [Config]). variable_queue_fold_msg_on_disk1(Config) -> - with_fresh_variable_queue( - fun variable_queue_fold_msg_on_disk2/2, - ?config(variable_queue_type, Config)). + with_fresh_variable_queue(fun variable_queue_fold_msg_on_disk2/2). variable_queue_fold_msg_on_disk2(VQ0, _QName) -> VQ1 = variable_queue_publish(true, 1, VQ0), @@ -1304,9 +1292,7 @@ variable_queue_dropfetchwhile(Config) -> ?MODULE, variable_queue_dropfetchwhile1, [Config]). variable_queue_dropfetchwhile1(Config) -> - with_fresh_variable_queue( - fun variable_queue_dropfetchwhile2/2, - ?config(variable_queue_type, Config)). + with_fresh_variable_queue(fun variable_queue_dropfetchwhile2/2). variable_queue_dropfetchwhile2(VQ0, _QName) -> Count = 10, @@ -1352,9 +1338,7 @@ variable_queue_dropwhile_restart(Config) -> ?MODULE, variable_queue_dropwhile_restart1, [Config]). variable_queue_dropwhile_restart1(Config) -> - with_fresh_variable_queue( - fun variable_queue_dropwhile_restart2/2, - ?config(variable_queue_type, Config)). + with_fresh_variable_queue(fun variable_queue_dropwhile_restart2/2). variable_queue_dropwhile_restart2(VQ0, QName) -> Count = 10000, @@ -1391,9 +1375,7 @@ variable_queue_dropwhile_sync_restart(Config) -> ?MODULE, variable_queue_dropwhile_sync_restart1, [Config]). variable_queue_dropwhile_sync_restart1(Config) -> - with_fresh_variable_queue( - fun variable_queue_dropwhile_sync_restart2/2, - ?config(variable_queue_type, Config)). + with_fresh_variable_queue(fun variable_queue_dropwhile_sync_restart2/2). variable_queue_dropwhile_sync_restart2(VQ0, QName) -> Count = 10000, @@ -1433,9 +1415,7 @@ variable_queue_restart_large_seq_id(Config) -> ?MODULE, variable_queue_restart_large_seq_id1, [Config]). variable_queue_restart_large_seq_id1(Config) -> - with_fresh_variable_queue( - fun variable_queue_restart_large_seq_id2/2, - ?config(variable_queue_type, Config)). + with_fresh_variable_queue(fun variable_queue_restart_large_seq_id2/2). variable_queue_restart_large_seq_id2(VQ0, QName) -> Count = 1, @@ -1472,9 +1452,7 @@ variable_queue_ack_limiting(Config) -> ?MODULE, variable_queue_ack_limiting1, [Config]). variable_queue_ack_limiting1(Config) -> - with_fresh_variable_queue( - fun variable_queue_ack_limiting2/2, - ?config(variable_queue_type, Config)). + with_fresh_variable_queue(fun variable_queue_ack_limiting2/2). variable_queue_ack_limiting2(VQ0, _Config) -> %% start by sending in a bunch of messages @@ -1502,9 +1480,7 @@ variable_queue_purge(Config) -> ?MODULE, variable_queue_purge1, [Config]). variable_queue_purge1(Config) -> - with_fresh_variable_queue( - fun variable_queue_purge2/2, - ?config(variable_queue_type, Config)). + with_fresh_variable_queue(fun variable_queue_purge2/2). variable_queue_purge2(VQ0, _Config) -> LenDepth = fun (VQ) -> @@ -1526,9 +1502,7 @@ variable_queue_requeue(Config) -> ?MODULE, variable_queue_requeue1, [Config]). variable_queue_requeue1(Config) -> - with_fresh_variable_queue( - fun variable_queue_requeue2/2, - ?config(variable_queue_type, Config)). + with_fresh_variable_queue(fun variable_queue_requeue2/2). variable_queue_requeue2(VQ0, _Config) -> {_PendingMsgs, RequeuedMsgs, FreshMsgs, VQ1} = @@ -1554,9 +1528,7 @@ variable_queue_requeue_ram_beta(Config) -> ?MODULE, variable_queue_requeue_ram_beta1, [Config]). variable_queue_requeue_ram_beta1(Config) -> - with_fresh_variable_queue( - fun variable_queue_requeue_ram_beta2/2, - ?config(variable_queue_type, Config)). + with_fresh_variable_queue(fun variable_queue_requeue_ram_beta2/2). variable_queue_requeue_ram_beta2(VQ0, _Config) -> IndexMod = index_mod(), @@ -1571,46 +1543,6 @@ variable_queue_requeue_ram_beta2(VQ0, _Config) -> {_, VQ8} = rabbit_variable_queue:ack(AcksAll, VQ7), VQ8. -%% same as test_variable_queue_requeue_ram_beta but randomly changing -%% the queue mode after every step. -variable_queue_mode_change(Config) -> - passed = rabbit_ct_broker_helpers:rpc(Config, 0, - ?MODULE, variable_queue_mode_change1, [Config]). - -variable_queue_mode_change1(Config) -> - with_fresh_variable_queue( - fun variable_queue_mode_change2/2, - ?config(variable_queue_type, Config)). - -variable_queue_mode_change2(VQ0, _Config) -> - IndexMod = index_mod(), - Count = IndexMod:next_segment_boundary(0)*2 + 2, - VQ1 = variable_queue_publish(false, Count, VQ0), - VQ2 = maybe_switch_queue_mode(VQ1), - {VQ3, AcksR} = variable_queue_fetch(Count, false, false, Count, VQ2), - VQ4 = maybe_switch_queue_mode(VQ3), - {Back, Front} = lists:split(Count div 2, AcksR), - {_, VQ5} = rabbit_variable_queue:requeue(erlang:tl(Back), VQ4), - VQ6 = maybe_switch_queue_mode(VQ5), - VQ8 = maybe_switch_queue_mode(VQ6), - {_, VQ9} = rabbit_variable_queue:requeue([erlang:hd(Back)], VQ8), - VQ10 = maybe_switch_queue_mode(VQ9), - VQ11 = requeue_one_by_one(Front, VQ10), - VQ12 = maybe_switch_queue_mode(VQ11), - {VQ13, AcksAll} = variable_queue_fetch(Count, false, true, Count, VQ12), - VQ14 = maybe_switch_queue_mode(VQ13), - {_, VQ15} = rabbit_variable_queue:ack(AcksAll, VQ14), - VQ16 = maybe_switch_queue_mode(VQ15), - VQ16. - -maybe_switch_queue_mode(VQ) -> - Mode = random_queue_mode(), - set_queue_mode(Mode, VQ). - -random_queue_mode() -> - Modes = [lazy, default], - lists:nth(rand:uniform(length(Modes)), Modes). - pub_res({_, VQS}) -> VQS; pub_res(VQS) -> @@ -1758,7 +1690,7 @@ wait_for_confirms(Unconfirmed) -> end end. -with_fresh_variable_queue(Fun, Mode) -> +with_fresh_variable_queue(Fun) -> Ref = make_ref(), Me = self(), %% Run in a separate process since rabbit_msg_store will send @@ -1771,10 +1703,9 @@ with_fresh_variable_queue(Fun, Mode) -> assert_props(S0, [{q_head, 0}, {q_tail, {q_tail, undefined, 0, undefined}}, {len, 0}]), - VQ1 = set_queue_mode(Mode, VQ), try _ = rabbit_variable_queue:delete_and_terminate( - shutdown, Fun(VQ1, QName)), + shutdown, Fun(VQ, QName)), Me ! Ref catch Type:Error:Stacktrace -> @@ -1787,9 +1718,6 @@ with_fresh_variable_queue(Fun, Mode) -> end, passed. -set_queue_mode(Mode, VQ) -> - rabbit_variable_queue:set_queue_mode(Mode, VQ). - variable_queue_publish(IsPersistent, Count, VQ) -> variable_queue_publish(IsPersistent, Count, fun (_N, P) -> P end, VQ).