diff --git a/deps/rabbit/src/rabbit_amqqueue_process.erl b/deps/rabbit/src/rabbit_amqqueue_process.erl index 50ef5dd974b2..a658747318d8 100644 --- a/deps/rabbit/src/rabbit_amqqueue_process.erl +++ b/deps/rabbit/src/rabbit_amqqueue_process.erl @@ -490,9 +490,7 @@ process_args_policy(State = #q{q = Q, {<<"message-ttl">>, fun res_min/2, fun init_ttl/2}, {<<"max-length">>, fun res_min/2, fun init_max_length/2}, {<<"max-length-bytes">>, fun res_min/2, fun init_max_bytes/2}, - {<<"overflow">>, fun res_arg/2, fun init_overflow/2}, - {<<"queue-mode">>, fun res_arg/2, fun init_queue_mode/2}, - {<<"queue-version">>, fun res_arg/2, fun init_queue_version/2}], + {<<"overflow">>, fun res_arg/2, fun init_overflow/2}], drop_expired_msgs( lists:foldl(fun({Name, Resolve, Fun}, StateN) -> Fun(rabbit_queue_type_util:args_policy_lookup(Name, Resolve, Q), StateN) @@ -543,22 +541,6 @@ init_overflow(Overflow, State) -> State#q{overflow = OverflowVal} end. -init_queue_mode(undefined, State) -> - State; -init_queue_mode(Mode, State = #q {backing_queue = BQ, - backing_queue_state = BQS}) -> - BQS1 = BQ:set_queue_mode(binary_to_existing_atom(Mode, utf8), BQS), - State#q{backing_queue_state = BQS1}. - -init_queue_version(Version0, State = #q {backing_queue = BQ, - backing_queue_state = BQS}) -> - Version = case Version0 of - undefined -> 2; - _ -> Version0 - end, - BQS1 = BQ:set_queue_version(Version, BQS), - State#q{backing_queue_state = BQS1}. - reply(Reply, NewState) -> {NewState1, Timeout} = next_state(NewState), {reply, Reply, ensure_stats_timer(ensure_rate_timer(NewState1)), Timeout}. diff --git a/deps/rabbit/src/rabbit_backing_queue.erl b/deps/rabbit/src/rabbit_backing_queue.erl index 5bae9eef6067..2668777dbd1e 100644 --- a/deps/rabbit/src/rabbit_backing_queue.erl +++ b/deps/rabbit/src/rabbit_backing_queue.erl @@ -14,8 +14,7 @@ message_bytes, message_bytes_ready, message_bytes_unacknowledged, message_bytes_ram, message_bytes_persistent, head_message_timestamp, - disk_reads, disk_writes, backing_queue_status, - messages_paged_out, message_bytes_paged_out]). + disk_reads, disk_writes, backing_queue_status]). %% We can't specify a per-queue ack/state with callback signatures -type ack() :: any(). @@ -173,13 +172,6 @@ %% each message, its ack tag, and an accumulator. -callback ackfold(msg_fun(A), A, state(), [ack()]) -> {A, state()}. -%% Fold over all the messages in a queue and return the accumulated -%% results, leaving the queue undisturbed. --callback fold(fun((mc:state(), - rabbit_types:message_properties(), - boolean(), A) -> {('stop' | 'cont'), A}), - A, state()) -> {A, state()}. - %% How long is my queue? -callback len(state()) -> non_neg_integer(). @@ -223,10 +215,6 @@ %% or discarded previously). -callback is_duplicate(mc:state(), state()) -> {boolean(), state()}. --callback set_queue_mode(queue_mode(), state()) -> state(). - --callback set_queue_version(queue_version(), state()) -> state(). - -callback zip_msgs_and_acks([delivered_publish()], [ack()], Acc, state()) -> Acc. diff --git a/deps/rabbit/src/rabbit_classic_queue_index_v2.erl b/deps/rabbit/src/rabbit_classic_queue_index_v2.erl index 087e8e355916..f36813354a20 100644 --- a/deps/rabbit/src/rabbit_classic_queue_index_v2.erl +++ b/deps/rabbit/src/rabbit_classic_queue_index_v2.erl @@ -7,9 +7,9 @@ -module(rabbit_classic_queue_index_v2). --export([erase/1, init/3, reset_state/1, recover/7, +-export([erase/1, init/1, reset_state/1, recover/4, terminate/3, delete_and_terminate/1, - info/1, publish/7, publish/8, ack/2, read/3]). + info/1, publish/7, ack/2, read/3]). %% Recovery. Unlike other functions in this module, these %% apply to all queues all at once. @@ -18,14 +18,12 @@ %% rabbit_queue_index/rabbit_variable_queue-specific functions. %% Implementation details from the queue index leaking into the %% queue implementation itself. --export([pre_publish/7, flush_pre_publish_cache/2, - sync/1, needs_sync/1, flush/1, +%% @todo TODO +-export([sync/1, needs_sync/1, bounds/2, next_segment_boundary/1]). -%% Used to upgrade/downgrade from/to the v1 index. --export([init_for_conversion/3]). --export([init_args/1]). --export([delete_segment_file_for_seq_id/2]). +%% Called by rabbit_vhost. +-export([all_queue_directory_names/1]). %% Shared with rabbit_classic_queue_store_v2. -export([queue_dir/2]). @@ -146,23 +144,11 @@ %% File descriptors. We will keep up to 4 FDs %% at a time. See comments in reduce_fd_usage/2. - fds = #{} :: #{non_neg_integer() => file:fd()}, - - %% This fun must be called when messages that expect - %% confirms have either an ack or their entry - %% written to disk and file:sync/1 has been called. - on_sync :: on_sync_fun(), - - %% This fun is never called. It is kept so that we - %% can downgrade the queue back to v1. - on_sync_msg :: fun() + fds = #{} :: #{non_neg_integer() => file:fd()} }). -type state() :: #qi{}. -%% Types copied from rabbit_queue_index. - --type on_sync_fun() :: fun ((sets:set()) -> ok). -type contains_predicate() :: fun ((rabbit_types:msg_id()) -> boolean()). -type shutdown_terms() :: list() | 'non_clean_shutdown'. @@ -176,37 +162,21 @@ erase(#resource{ virtual_host = VHost } = Name) -> Dir = queue_dir(VHostDir, Name), erase_index_dir(Dir). --spec init(rabbit_amqqueue:name(), - on_sync_fun(), on_sync_fun()) -> state(). +-spec init(rabbit_amqqueue:name()) -> state(). -%% We do not embed messages and as a result never need the OnSyncMsgFun. - -init(#resource{ virtual_host = VHost } = Name, OnSyncFun, OnSyncMsgFun) -> - ?DEBUG("~0p ~0p ~0p", [Name, OnSyncFun, OnSyncMsgFun]), +init(#resource{ virtual_host = VHost } = Name) -> + ?DEBUG("~0p", [Name]), VHostDir = rabbit_vhost:msg_store_dir_path(VHost), Dir = queue_dir(VHostDir, Name), false = rabbit_file:is_file(Dir), %% is_file == is file or dir - init1(Name, Dir, OnSyncFun, OnSyncMsgFun). - -init_args(#qi{ queue_name = QueueName, - on_sync = OnSyncFun, - on_sync_msg = OnSyncMsgFun }) -> - {QueueName, OnSyncFun, OnSyncMsgFun}. + init1(Name, Dir). -init_for_conversion(#resource{ virtual_host = VHost } = Name, OnSyncFun, OnSyncMsgFun) -> - ?DEBUG("~0p ~0p ~0p", [Name, OnSyncFun, OnSyncMsgFun]), - VHostDir = rabbit_vhost:msg_store_dir_path(VHost), - Dir = queue_dir(VHostDir, Name), - init1(Name, Dir, OnSyncFun, OnSyncMsgFun). - -init1(Name, Dir, OnSyncFun, OnSyncMsgFun) -> +init1(Name, Dir) -> ensure_queue_name_stub_file(Name, Dir), DirBin = rabbit_file:filename_to_binary(Dir), #qi{ queue_name = Name, - dir = << DirBin/binary, "/" >>, - on_sync = OnSyncFun, - on_sync_msg = OnSyncMsgFun + dir = << DirBin/binary, "/" >> }. ensure_queue_name_stub_file(#resource{virtual_host = VHost, name = QName}, Dir) -> @@ -218,17 +188,13 @@ ensure_queue_name_stub_file(#resource{virtual_host = VHost, name = QName}, Dir) -spec reset_state(State) -> State when State::state(). reset_state(State = #qi{ queue_name = Name, - dir = Dir, - on_sync = OnSyncFun, - on_sync_msg = OnSyncMsgFun }) -> + dir = Dir }) -> ?DEBUG("~0p", [State]), _ = delete_and_terminate(State), - init1(Name, rabbit_file:binary_to_filename(Dir), OnSyncFun, OnSyncMsgFun). + init1(Name, rabbit_file:binary_to_filename(Dir)). -spec recover(rabbit_amqqueue:name(), shutdown_terms(), boolean(), - contains_predicate(), - on_sync_fun(), on_sync_fun(), - main | convert) -> + contains_predicate()) -> {'undefined' | non_neg_integer(), 'undefined' | non_neg_integer(), state()}. @@ -241,12 +207,11 @@ reset_state(State = #qi{ queue_name = Name, -define(RECOVER_COUNTER_SIZE, 6). recover(#resource{ virtual_host = VHost, name = QueueName } = Name, Terms, - IsMsgStoreClean, ContainsCheckFun, OnSyncFun, OnSyncMsgFun, Context) -> - ?DEBUG("~0p ~0p ~0p ~0p ~0p ~0p", [Name, Terms, IsMsgStoreClean, - ContainsCheckFun, OnSyncFun, OnSyncMsgFun]), + IsMsgStoreClean, ContainsCheckFun) -> + ?DEBUG("~0p ~0p ~0p ~0p", [Name, Terms, IsMsgStoreClean, ContainsCheckFun]), VHostDir = rabbit_vhost:msg_store_dir_path(VHost), Dir = queue_dir(VHostDir, Name), - State0 = init1(Name, Dir, OnSyncFun, OnSyncMsgFun), + State0 = init1(Name, Dir), %% We go over all segments if either the index or the %% message store has/had to recover. Otherwise we just %% take our state from Terms. @@ -254,10 +219,6 @@ recover(#resource{ virtual_host = VHost, name = QueueName } = Name, Terms, case IsIndexClean andalso IsMsgStoreClean of true -> State = case proplists:get_value(v2_index_state, Terms, undefined) of - %% We are recovering a queue that was using the v1 index. - undefined when Context =:= main -> - recover_index_v1_clean(State0, Terms, IsMsgStoreClean, - ContainsCheckFun, OnSyncFun, OnSyncMsgFun); {?VERSION, Segments} -> State0#qi{ segments = Segments } end, @@ -268,9 +229,7 @@ recover(#resource{ virtual_host = VHost, name = QueueName } = Name, Terms, State}; false -> CountersRef = counters:new(?RECOVER_COUNTER_SIZE, []), - State = recover_segments(State0, Terms, IsMsgStoreClean, - ContainsCheckFun, OnSyncFun, OnSyncMsgFun, - CountersRef, Context), + State = recover_segments(State0, ContainsCheckFun, CountersRef), ?LOG_WARNING("Queue ~ts in vhost ~ts dropped ~b/~b/~b persistent messages " "and ~b transient messages after unclean shutdown", [QueueName, VHost, @@ -283,11 +242,11 @@ recover(#resource{ virtual_host = VHost, name = QueueName } = Name, Terms, State} end. -recover_segments(State0 = #qi { queue_name = Name, dir = DirBin }, Terms, IsMsgStoreClean, - ContainsCheckFun, OnSyncFun, OnSyncMsgFun, CountersRef, Context) -> +recover_segments(State0 = #qi { queue_name = Name, dir = DirBin }, + ContainsCheckFun, CountersRef) -> Dir = rabbit_file:binary_to_filename(DirBin), SegmentFiles = rabbit_file:wildcard(".*\\" ++ ?SEGMENT_EXTENSION, Dir), - State = case SegmentFiles of + case SegmentFiles of %% No segments found. [] -> State0; @@ -298,26 +257,9 @@ recover_segments(State0 = #qi { queue_name = Name, dir = DirBin }, Terms, IsMsgS || F <- SegmentFiles]), %% We use a temporary store state to check that messages do exist. StoreState0 = rabbit_classic_queue_store_v2:init(Name), - {State1, StoreState} = recover_segments(State0, ContainsCheckFun, StoreState0, CountersRef, Segments), + {State, StoreState} = recover_segments(State0, ContainsCheckFun, StoreState0, CountersRef, Segments), _ = rabbit_classic_queue_store_v2:terminate(StoreState), - State1 - end, - case Context of - convert -> - State; - main -> - %% We try to see if there are segment files from the v1 index. - case rabbit_file:wildcard(".*\\.idx", Dir) of - %% We are recovering a dirty queue that was using the v1 index or in - %% the process of converting from v1 to v2. - [_|_] -> - recover_index_v1_dirty(State, Terms, IsMsgStoreClean, - ContainsCheckFun, OnSyncFun, OnSyncMsgFun, - CountersRef); - %% Otherwise keep default values. - [] -> - State - end + State end. recover_segments(State, _, StoreState, _, []) -> @@ -449,89 +391,6 @@ recover_segment(State, ContainsCheckFun, StoreState0, CountersRef, Fd, Unacked - (SegmentEntryCount - ThisEntry), LocBytes0) end. -recover_index_v1_clean(State0 = #qi{ queue_name = Name }, Terms, IsMsgStoreClean, - ContainsCheckFun, OnSyncFun, OnSyncMsgFun) -> - #resource{virtual_host = VHost, name = QName} = Name, - ?LOG_INFO("Converting queue ~ts in vhost ~ts from v1 to v2 after clean shutdown", [QName, VHost]), - {_, _, V1State} = rabbit_queue_index:recover(Name, Terms, IsMsgStoreClean, - ContainsCheckFun, OnSyncFun, OnSyncMsgFun, - convert), - %% We will ignore the counter results because on clean shutdown - %% we do not need to calculate the values again. This lets us - %% share code with dirty recovery. - CountersRef = counters:new(?RECOVER_COUNTER_SIZE, []), - State = recover_index_v1_common(State0, V1State, CountersRef), - ?LOG_INFO("Queue ~ts in vhost ~ts converted ~b total messages from v1 to v2", - [QName, VHost, counters:get(CountersRef, ?RECOVER_COUNT)]), - State. - -recover_index_v1_dirty(State0 = #qi{ queue_name = Name }, Terms, IsMsgStoreClean, - ContainsCheckFun, OnSyncFun, OnSyncMsgFun, - CountersRef) -> - #resource{virtual_host = VHost, name = QName} = Name, - ?LOG_INFO("Converting queue ~ts in vhost ~ts from v1 to v2 after unclean shutdown", [QName, VHost]), - %% We ignore the count and bytes returned here because we cannot trust - %% rabbit_queue_index: it has a bug that may lead to more bytes being - %% returned than it really has. - %% - %% On top of that some messages may also be in both the v1 and v2 indexes - %% after a crash. - {_, _, V1State} = rabbit_queue_index:recover(Name, Terms, IsMsgStoreClean, - ContainsCheckFun, OnSyncFun, OnSyncMsgFun, - convert), - State = recover_index_v1_common(State0, V1State, CountersRef), - ?LOG_INFO("Queue ~ts in vhost ~ts converted ~b total messages from v1 to v2", - [QName, VHost, counters:get(CountersRef, ?RECOVER_COUNT)]), - State. - -%% At this point all messages are persistent because transient messages -%% were dropped during the v1 index recovery. -recover_index_v1_common(State0 = #qi{ queue_name = Name, dir = DirBin }, - V1State, CountersRef) -> - Dir = rabbit_file:binary_to_filename(DirBin), - %% Use a temporary per-queue store state to store embedded messages. - StoreState0 = rabbit_classic_queue_store_v2:init(Name), - %% Go through the v1 index and publish messages to the v2 index. - {LoSeqId, HiSeqId, _} = rabbit_queue_index:bounds(V1State), - %% When resuming after a crash we need to double check the messages that are both - %% in the v1 and v2 index (effectively the messages below the upper bound of the - %% v2 index that are about to be written to it). - {_, V2HiSeqId, _} = bounds(State0, undefined), - SkipFun = fun - (SeqId, FunState0) when SeqId < V2HiSeqId -> - case read(SeqId, SeqId + 1, FunState0) of - %% Message already exists, skip. - {[_], FunState} -> - {skip, FunState}; - %% Message doesn't exist, write. - {[], FunState} -> - {write, FunState} - end; - %% Message is out of bounds of the v1 index. - (_, FunState) -> - {write, FunState} - end, - %% We use a common function also used with conversion on policy change. - {State1, StoreState} = rabbit_variable_queue:convert_from_v1_to_v2_loop(Name, V1State, State0, StoreState0, - {CountersRef, ?RECOVER_COUNT, ?RECOVER_BYTES}, - LoSeqId, HiSeqId, SkipFun), - %% Terminate the v2 store client. - _ = rabbit_classic_queue_store_v2:terminate(StoreState), - %% Close the v1 index journal handle if any. - JournalHdl = element(4, V1State), - ok = case JournalHdl of - undefined -> ok; - _ -> file_handle_cache:close(JournalHdl) - end, - %% Delete the v1 index files. - OldFiles = ["journal.jif"|rabbit_file:wildcard(".*\\.idx", Dir)], - _ = [rabbit_file:delete(filename:join(Dir, F)) || F <- OldFiles], - %% Ensure that everything in the v2 index is written to disk. - State = flush(State1), - %% Clean up all the garbage that we have surely been creating. - garbage_collect(), - State. - -spec terminate(rabbit_types:vhost(), [any()], State) -> State when State::state(). terminate(VHost, Terms, State0 = #qi { dir = Dir, @@ -577,17 +436,14 @@ info(#qi{ write_buffer = WriteBuffer, write_buffer_updates = NumUpdates }) -> -spec publish(rabbit_types:msg_id(), rabbit_variable_queue:seq_id(), rabbit_variable_queue:msg_location(), rabbit_types:message_properties(), boolean(), - non_neg_integer() | infinity, State) -> State when State::state(). - -publish(MsgId, SeqId, Location, Props, IsPersistent, TargetRamCount, State) -> - publish(MsgId, SeqId, Location, Props, IsPersistent, true, TargetRamCount, State). + boolean(), State) -> State when State::state(). %% Because we always persist to the msg_store, the Msg(Or)Id argument %% here is always a binary, never a record. -publish(MsgId, SeqId, Location, Props, IsPersistent, ShouldConfirm, TargetRamCount, +publish(MsgId, SeqId, Location, Props, IsPersistent, ShouldConfirm, State0 = #qi { write_buffer = WriteBuffer0, segments = Segments }) -> - ?DEBUG("~0p ~0p ~0p ~0p ~0p ~0p ~0p", [MsgId, SeqId, Location, Props, IsPersistent, TargetRamCount, State0]), + ?DEBUG("~0p ~0p ~0p ~0p ~0p ~0p", [MsgId, SeqId, Location, Props, IsPersistent, State0]), %% Add the entry to the write buffer. WriteBuffer = WriteBuffer0#{SeqId => {MsgId, SeqId, Location, Props, IsPersistent}}, State1 = State0#qi{ write_buffer = WriteBuffer }, @@ -1054,22 +910,14 @@ parse_entries(<< Status:8, %% ---- %% -%% Syncing and flushing to disk requested by the queue. -%% Note: the v2 no longer calls fsync, it only flushes. +%% Flushing to disk requested by the queue. -spec sync(State) -> State when State::state(). -sync(State0 = #qi{ confirms = Confirms, - on_sync = OnSyncFun }) -> +sync(State0 = #qi{ confirms = Confirms }) -> ?DEBUG("~0p", [State0]), State = flush_buffer(State0, full, segment_entry_count()), - _ = case sets:is_empty(Confirms) of - true -> - ok; - false -> - OnSyncFun(Confirms) - end, - State#qi{ confirms = sets:new([{version,2}]) }. + {Confirms, State#qi{ confirms = sets:new([{version,2}]) }}. -spec needs_sync(state()) -> 'false' | 'confirms'. @@ -1080,26 +928,48 @@ needs_sync(State = #qi{ confirms = Confirms }) -> false -> confirms end. --spec flush(State) -> State when State::state(). +%% ---- -flush(State) -> - ?DEBUG("~0p", [State]), - %% Flushing to disk is the same operation as sync - %% except it is called before hibernating or when - %% reducing memory use. - sync(State). +-type walker(A) :: fun ((A) -> 'finished' | + {rabbit_types:msg_id(), non_neg_integer(), A}). -%% ---- -%% -%% Defer to rabbit_queue_index for recovery for the time being. -%% We can move the functions here when the v1 index is removed. +-spec start(rabbit_types:vhost(), [rabbit_amqqueue:name()]) -> {[[any()]], {walker(A), A}}. start(VHost, DurableQueueNames) -> ?DEBUG("~0p ~0p", [VHost, DurableQueueNames]), - %% We replace the queue_index_walker function with our own. - %% Everything else remains the same. - {OrderedTerms, {_QueueIndexWalkerFun, FunState}} = rabbit_queue_index:start(VHost, DurableQueueNames), - {OrderedTerms, {fun queue_index_walker/1, FunState}}. + {ok, RecoveryTermsPid} = rabbit_recovery_terms:start(VHost), + rabbit_vhost_sup_sup:save_vhost_recovery_terms(VHost, RecoveryTermsPid), + {DurableTerms, DurableDirectories} = + lists:foldl( + fun(QName, {RecoveryTerms, ValidDirectories}) -> + DirName = queue_name_to_dir_name(QName), + RecoveryInfo = case rabbit_recovery_terms:read(VHost, DirName) of + {error, _} -> non_clean_shutdown; + {ok, Terms} -> Terms + end, + {[RecoveryInfo | RecoveryTerms], + sets:add_element(DirName, ValidDirectories)} + end, {[], sets:new()}, DurableQueueNames), + %% Any queue directory we've not been asked to recover is considered garbage + ToDelete = [filename:join([rabbit_vhost:msg_store_dir_path(VHost), "queues", Dir]) + || Dir <- lists:subtract(all_queue_directory_names(VHost), + sets:to_list(DurableDirectories))], + ?LOG_DEBUG("Deleting unknown files/folders: ~p", [ToDelete]), + _ = rabbit_file:recursive_delete(ToDelete), + rabbit_recovery_terms:clear(VHost), + %% The backing queue interface requires that the queue recovery terms + %% which come back from start/1 are in the same order as DurableQueueNames + OrderedTerms = lists:reverse(DurableTerms), + {OrderedTerms, {fun queue_index_walker/1, {start, DurableQueueNames}}}. + +all_queue_directory_names(VHost) -> + VHostQueuesPath = filename:join([rabbit_vhost:msg_store_dir_path(VHost), "queues"]), + case filelib:is_dir(VHostQueuesPath) of + true -> + {ok, Dirs} = file:list_dir(VHostQueuesPath), + Dirs; + false -> [] + end. queue_index_walker({start, DurableQueues}) when is_list(DurableQueues) -> ?DEBUG("~0p", [{start, DurableQueues}]), @@ -1120,9 +990,6 @@ queue_index_walker({next, Gatherer}) when is_pid(Gatherer) -> empty -> ok = gatherer:stop(Gatherer), finished; - %% From v1 index walker. @todo Remove when no longer possible to convert from v1. - {value, {MsgId, Count}} -> - {MsgId, Count, {next, Gatherer}}; {value, MsgIds} -> {MsgIds, {next, Gatherer}} end. @@ -1133,16 +1000,7 @@ queue_index_walker_reader(#resource{ virtual_host = VHost } = Name, Gatherer) -> Dir = queue_dir(VHostDir, Name), SegmentFiles = rabbit_file:wildcard(".*\\" ++ ?SEGMENT_EXTENSION, Dir), _ = [queue_index_walker_segment(filename:join(Dir, F), Gatherer) || F <- SegmentFiles], - %% When there are files belonging to the v1 index, we go through - %% the v1 index walker function as well. - case rabbit_file:wildcard(".*\\.(idx|jif)", Dir) of - [_|_] -> - %% This function will call gatherer:finish/1, we do not - %% need to call it here. - rabbit_queue_index:queue_index_walker_reader(Name, Gatherer); - [] -> - ok = gatherer:finish(Gatherer) - end. + ok = gatherer:finish(Gatherer). queue_index_walker_segment(F, Gatherer) -> ?DEBUG("~0p ~0p", [F, Gatherer]), @@ -1180,27 +1038,11 @@ queue_index_walker_segment(Fd, Gatherer, N, Total, Acc) -> stop(VHost) -> ?DEBUG("~0p", [VHost]), - rabbit_queue_index:stop(VHost). + rabbit_recovery_terms:stop(VHost). %% ---- %% -%% These functions either call the normal functions or are no-ops. -%% They relate to specific optimizations of rabbit_queue_index and -%% rabbit_variable_queue. -%% -%% @todo The way pre_publish works is still fairly puzzling. -%% When the v1 index gets removed we can just drop -%% these functions. - -pre_publish(MsgOrId, SeqId, Location, Props, IsPersistent, TargetRamCount, State) -> - ?DEBUG("~0p ~0p ~0p ~0p ~0p ~0p ~0p", [MsgOrId, SeqId, Location, Props, IsPersistent, TargetRamCount, State]), - publish(MsgOrId, SeqId, Location, Props, IsPersistent, false, TargetRamCount, State). - -flush_pre_publish_cache(TargetRamCount, State) -> - ?DEBUG("~0p ~0p", [TargetRamCount, State]), - State. - -%% See comment in rabbit_queue_index:bounds/1. We do not need to be +%% Technical leftover from CQv1. We do not need to be %% accurate about these values because they are simply used as lowest %% and highest possible bounds. In fact we HAVE to be inaccurate for %% the test suite to pass. This can probably be made more accurate @@ -1237,15 +1079,6 @@ next_segment_boundary(SeqId) -> SegmentEntryCount = segment_entry_count(), (1 + (SeqId div SegmentEntryCount)) * SegmentEntryCount. -%% This function is only used when downgrading to the v1 index. -%% We potentially close the relevant fd and then delete the -%% segment file. -delete_segment_file_for_seq_id(SeqId, State0) -> - SegmentEntryCount = segment_entry_count(), - Segment = SeqId div SegmentEntryCount, - State = delete_segment(Segment, State0), - {[Segment], State}. - %% ---- %% %% Internal. diff --git a/deps/rabbit/src/rabbit_classic_queue_store_v2.erl b/deps/rabbit/src/rabbit_classic_queue_store_v2.erl index 7c28ceb7a377..354f4e1189aa 100644 --- a/deps/rabbit/src/rabbit_classic_queue_store_v2.erl +++ b/deps/rabbit/src/rabbit_classic_queue_store_v2.erl @@ -147,8 +147,8 @@ info(#qs{ write_buffer = WriteBuffer }) -> %% @todo I think we can disable the old message store at the same %% place where we create MsgId. If many queues receive the -%% message, then we create an MsgId. If not, we don't. But -%% we can only do this after removing support for v1. +%% message, then we create an MsgId. If not, we don't until +%% strictly necessary (large messages). write(SeqId, Msg, Props, State0 = #qs{ write_buffer = WriteBuffer0, write_buffer_size = WriteBufferSize }) -> ?DEBUG("~0p ~0p ~0p ~0p", [SeqId, Msg, Props, State0]), diff --git a/deps/rabbit/src/rabbit_guid.erl b/deps/rabbit/src/rabbit_guid.erl index d33081c8d865..fd525e5606ab 100644 --- a/deps/rabbit/src/rabbit_guid.erl +++ b/deps/rabbit/src/rabbit_guid.erl @@ -31,6 +31,7 @@ -spec start_link() -> rabbit_types:ok_pid_or_error(). +%% @todo Serial can be in persistent_term instead of process. start_link() -> gen_server:start_link({local, ?SERVER}, ?MODULE, [update_disk_serial()], []). diff --git a/deps/rabbit/src/rabbit_priority_queue.erl b/deps/rabbit/src/rabbit_priority_queue.erl index 6777ec31bc6a..ead0dca42f25 100644 --- a/deps/rabbit/src/rabbit_priority_queue.erl +++ b/deps/rabbit/src/rabbit_priority_queue.erl @@ -29,11 +29,10 @@ purge/1, purge_acks/1, publish/5, publish_delivered/4, discard/3, drain_confirmed/1, dropwhile/2, fetchwhile/4, fetch/2, drop/2, ack/2, requeue/2, - ackfold/4, fold/3, len/1, is_empty/1, depth/1, + ackfold/4, len/1, is_empty/1, depth/1, update_rates/1, needs_timeout/1, timeout/1, handle_pre_hibernate/1, resume/1, msg_rates/1, - info/2, invoke/3, is_duplicate/2, set_queue_mode/2, - set_queue_version/2, + info/2, invoke/3, is_duplicate/2, zip_msgs_and_acks/4, format_state/1]). @@ -302,11 +301,6 @@ ackfold(MsgFun, Acc, State = #state{bq = BQ}, AckTags) -> ackfold(MsgFun, Acc, State = #passthrough{bq = BQ, bqs = BQS}, AckTags) -> ?passthrough2(ackfold(MsgFun, Acc, BQS, AckTags)). -fold(Fun, Acc, State = #state{bq = BQ}) -> - fold2(fun (_P, BQSN, AccN) -> BQ:fold(Fun, AccN, BQSN) end, Acc, State); -fold(Fun, Acc, State = #passthrough{bq = BQ, bqs = BQS}) -> - ?passthrough2(fold(Fun, Acc, BQS)). - len(#state{bq = BQ, bqss = BQSs}) -> add0(fun (_P, BQSN) -> BQ:len(BQSN) end, BQSs); len(#passthrough{bq = BQ, bqs = BQS}) -> @@ -395,16 +389,6 @@ is_duplicate(Msg, State = #state{bq = BQ}) -> is_duplicate(Msg, State = #passthrough{bq = BQ, bqs = BQS}) -> ?passthrough2(is_duplicate(Msg, BQS)). -set_queue_mode(Mode, State = #state{bq = BQ}) -> - foreach1(fun (_P, BQSN) -> BQ:set_queue_mode(Mode, BQSN) end, State); -set_queue_mode(Mode, State = #passthrough{bq = BQ, bqs = BQS}) -> - ?passthrough1(set_queue_mode(Mode, BQS)). - -set_queue_version(Version, State = #state{bq = BQ}) -> - foreach1(fun (_P, BQSN) -> BQ:set_queue_version(Version, BQSN) end, State); -set_queue_version(Version, State = #passthrough{bq = BQ, bqs = BQS}) -> - ?passthrough1(set_queue_version(Version, BQS)). - zip_msgs_and_acks(Msgs, AckTags, Accumulator, #state{bqss = [{MaxP, _} |_]}) -> MsgsByPriority = partition_publish_delivered_batch(Msgs, MaxP), lists:foldl(fun (Acks, MAs) -> diff --git a/deps/rabbit/src/rabbit_queue_index.erl b/deps/rabbit/src/rabbit_queue_index.erl deleted file mode 100644 index c8a084bd414a..000000000000 --- a/deps/rabbit/src/rabbit_queue_index.erl +++ /dev/null @@ -1,1417 +0,0 @@ -%% This Source Code Form is subject to the terms of the Mozilla Public -%% License, v. 2.0. If a copy of the MPL was not distributed with this -%% file, You can obtain one at https://mozilla.org/MPL/2.0/. -%% -%% Copyright (c) 2007-2025 Broadcom. All Rights Reserved. The term “Broadcom” refers to Broadcom Inc. and/or its subsidiaries. All rights reserved. -%% - --module(rabbit_queue_index). - --compile({inline, [segment_entry_count/0]}). - --export([erase/1, init/3, reset_state/1, recover/7, - terminate/3, delete_and_terminate/1, info/1, - pre_publish/7, flush_pre_publish_cache/2, - publish/7, publish/8, deliver/2, ack/2, sync/1, needs_sync/1, flush/1, - read/3, next_segment_boundary/1, bounds/1, start/2, stop/1]). - -%% Used by rabbit_vhost to set the segment_entry_count. --export([all_queue_directory_names/1]). - -%% Used by rabbit_classic_queue_index_v2 when upgrading -%% after a non-clean shutdown. --export([queue_index_walker_reader/2]). - -%% Used to upgrade/downgrade to/from the v2 index. --export([init_args/1]). --export([init_for_conversion/3]). --export([delete_segment_file_for_seq_id/2]). --export([delete_journal/1]). - --define(CLEAN_FILENAME, "clean.dot"). - -%%---------------------------------------------------------------------------- - -%% The queue index is responsible for recording the order of messages -%% within a queue on disk. As such it contains records of messages -%% being published, delivered and acknowledged. The publish record -%% includes the sequence ID, message ID and a small quantity of -%% metadata about the message; the delivery and acknowledgement -%% records just contain the sequence ID. A publish record may also -%% contain the complete message if provided to publish/5; this allows -%% the message store to be avoided altogether for small messages. In -%% either case the publish record is stored in memory in the same -%% serialised format it will take on disk. -%% -%% Because of the fact that the queue can decide at any point to send -%% a queue entry to disk, you can not rely on publishes appearing in -%% order. The only thing you can rely on is a message being published, -%% then delivered, then ack'd. -%% -%% In order to be able to clean up ack'd messages, we write to segment -%% files. These files have a fixed number of entries: segment_entry_count() -%% publishes, delivers and acknowledgements. They are numbered, and so -%% it is known that the 0th segment contains messages 0 -> -%% segment_entry_count() - 1, the 1st segment contains messages -%% segment_entry_count() -> 2*segment_entry_count() - 1 and so on. As -%% such, in the segment files, we only refer to message sequence ids -%% by the LSBs as SeqId rem segment_entry_count(). This gives them a -%% fixed size. -%% -%% However, transient messages which are not sent to disk at any point -%% will cause gaps to appear in segment files. Therefore, we delete a -%% segment file whenever the number of publishes == number of acks -%% (note that although it is not fully enforced, it is assumed that a -%% message will never be ackd before it is delivered, thus this test -%% also implies == number of delivers). In practise, this does not -%% cause disk churn in the pathological case because of the journal -%% and caching (see below). -%% -%% Because of the fact that publishes, delivers and acks can occur all -%% over, we wish to avoid lots of seeking. Therefore we have a fixed -%% sized journal to which all actions are appended. When the number of -%% entries in this journal reaches max_journal_entries, the journal -%% entries are scattered out to their relevant files, and the journal -%% is truncated to zero size. Note that entries in the journal must -%% carry the full sequence id, thus the format of entries in the -%% journal is different to that in the segments. -%% -%% The journal is also kept fully in memory, pre-segmented: the state -%% contains a mapping from segment numbers to state-per-segment (this -%% state is held for all segments which have been "seen": thus a -%% segment which has been read but has no pending entries in the -%% journal is still held in this mapping. Also note that a map is -%% used for this mapping, not an array because with an array, you will -%% always have entries from 0). Actions are stored directly in this -%% state. Thus at the point of flushing the journal, firstly no -%% reading from disk is necessary, but secondly if the known number of -%% acks and publishes in a segment are equal, given the known state of -%% the segment file combined with the journal, no writing needs to be -%% done to the segment file either (in fact it is deleted if it exists -%% at all). This is safe given that the set of acks is a subset of the -%% set of publishes. When it is necessary to sync messages, it is -%% sufficient to fsync on the journal: when entries are distributed -%% from the journal to segment files, those segments appended to are -%% fsync'd prior to the journal being truncated. -%% -%% This module is also responsible for scanning the queue index files -%% and seeding the message store on start up. -%% -%% Note that in general, the representation of a message's state as -%% the tuple: {('no_pub'|{IsPersistent, Bin, MsgBin}), -%% ('del'|'no_del'), ('ack'|'no_ack')} is richer than strictly -%% necessary for most operations. However, for startup, and to ensure -%% the safe and correct combination of journal entries with entries -%% read from the segment on disk, this richer representation vastly -%% simplifies and clarifies the code. -%% -%% For notes on Clean Shutdown and startup, see documentation in -%% rabbit_variable_queue. -%% -%% v2 UPDATE: The queue index is still keeping track of delivers -%% as noted in the above comment. However the queue will immediately -%% mark messages as delivered, because it now keeps track of delivers -%% at the queue level. The index still needs to keep track of deliver -%% entries because of its pub->del->ack logic. -%% -%%---------------------------------------------------------------------------- - -%% ---- Journal details ---- - --define(JOURNAL_FILENAME, "journal.jif"). --define(QUEUE_NAME_STUB_FILE, ".queue_name"). - --define(PUB_PERSIST_JPREFIX, 2#00). --define(PUB_TRANS_JPREFIX, 2#01). --define(DEL_JPREFIX, 2#10). --define(ACK_JPREFIX, 2#11). --define(JPREFIX_BITS, 2). --define(SEQ_BYTES, 8). --define(SEQ_BITS, ((?SEQ_BYTES * 8) - ?JPREFIX_BITS)). - -%% ---- Segment details ---- - --define(SEGMENT_EXTENSION, ".idx"). - -%% TODO: The segment size would be configurable, but deriving all the -%% other values is quite hairy and quite possibly noticeably less -%% efficient, depending on how clever the compiler is when it comes to -%% binary generation/matching with constant vs variable lengths. - --define(REL_SEQ_BITS, 14). - -%% seq only is binary 01 followed by 14 bits of rel seq id -%% (range: 0 - 16383) --define(REL_SEQ_ONLY_PREFIX, 01). --define(REL_SEQ_ONLY_PREFIX_BITS, 2). --define(REL_SEQ_ONLY_RECORD_BYTES, 2). - -%% publish record is binary 1 followed by a bit for is_persistent, -%% then 14 bits of rel seq id, 64 bits for message expiry, 32 bits of -%% size and then 128 bits of md5sum msg id. --define(PUB_PREFIX, 1). --define(PUB_PREFIX_BITS, 1). - --define(EXPIRY_BYTES, 8). --define(EXPIRY_BITS, (?EXPIRY_BYTES * 8)). --define(NO_EXPIRY, 0). - --define(MSG_ID_BYTES, 16). %% md5sum is 128 bit or 16 bytes --define(MSG_ID_BITS, (?MSG_ID_BYTES * 8)). - -%% This is the size of the message body content, for stats --define(SIZE_BYTES, 4). --define(SIZE_BITS, (?SIZE_BYTES * 8)). - -%% This is the size of the message record embedded in the queue -%% index. If 0, the message can be found in the message store. --define(EMBEDDED_SIZE_BYTES, 4). --define(EMBEDDED_SIZE_BITS, (?EMBEDDED_SIZE_BYTES * 8)). - -%% 16 bytes for md5sum + 8 for expiry --define(PUB_RECORD_BODY_BYTES, (?MSG_ID_BYTES + ?EXPIRY_BYTES + ?SIZE_BYTES)). -%% + 4 for size --define(PUB_RECORD_SIZE_BYTES, (?PUB_RECORD_BODY_BYTES + ?EMBEDDED_SIZE_BYTES)). - -%% + 2 for seq, bits and prefix --define(PUB_RECORD_PREFIX_BYTES, 2). - -%% ---- misc ---- - --define(PUB, {_, _, _}). %% {IsPersistent, Bin, MsgBin} - --define(READ_MODE, [binary, raw, read]). --define(WRITE_MODE, [write | ?READ_MODE]). - -%%---------------------------------------------------------------------------- - --record(qistate, { - %% queue directory where segment and journal files are stored - dir, - %% map of #segment records - segments, - %% journal file handle obtained from/used by file_handle_cache - journal_handle, - %% how many not yet flushed entries are there - dirty_count, - %% this many not yet flushed journal entries will force a flush - max_journal_entries, - %% callback function invoked when a message is "handled" - %% by the index and potentially can be confirmed to the publisher - on_sync, - on_sync_msg, - %% set of IDs of unconfirmed [to publishers] messages - unconfirmed, - unconfirmed_msg, - %% optimisation - pre_publish_cache, - %% optimisation - delivered_cache, - %% queue name resource record - queue_name}). - --record(segment, { - %% segment ID (an integer) - num, - %% segment file path (see also ?SEGMENT_EXTENSION) - path, - %% index operation log entries in this segment - journal_entries, - entries_to_segment, - %% counter of unacknowledged messages - unacked -}). - --include_lib("rabbit_common/include/rabbit.hrl"). --include_lib("kernel/include/logger.hrl"). - -%%---------------------------------------------------------------------------- - --type hdl() :: ('undefined' | any()). --type segment() :: ('undefined' | - #segment { num :: non_neg_integer(), - path :: file:filename(), - journal_entries :: array:array(), - entries_to_segment :: array:array(), - unacked :: non_neg_integer() - }). --type seg_map() :: {map(), [segment()]}. --type on_sync_fun() :: fun ((sets:set()) -> ok). --type qistate() :: #qistate { dir :: file:filename(), - segments :: 'undefined' | seg_map(), - journal_handle :: hdl(), - dirty_count :: integer(), - max_journal_entries :: non_neg_integer(), - on_sync :: on_sync_fun(), - on_sync_msg :: on_sync_fun(), - unconfirmed :: sets:set(), - unconfirmed_msg :: sets:set(), - pre_publish_cache :: list(), - delivered_cache :: list() - }. --type contains_predicate() :: fun ((rabbit_types:msg_id()) -> boolean()). --type walker(A) :: fun ((A) -> 'finished' | - {rabbit_types:msg_id(), non_neg_integer(), A}). --type shutdown_terms() :: [term()] | 'non_clean_shutdown'. - -%%---------------------------------------------------------------------------- -%% public API -%%---------------------------------------------------------------------------- - --spec erase(rabbit_amqqueue:name()) -> 'ok'. - -erase(#resource{ virtual_host = VHost } = Name) -> - VHostDir = rabbit_vhost:msg_store_dir_path(VHost), - #qistate { dir = Dir } = blank_state(VHostDir, Name), - erase_index_dir(Dir). - -%% used during variable queue purge when there are no pending acks - --spec reset_state(qistate()) -> qistate(). - -reset_state(#qistate{ queue_name = Name, - dir = Dir, - on_sync = OnSyncFun, - on_sync_msg = OnSyncMsgFun, - journal_handle = JournalHdl }) -> - ok = case JournalHdl of - undefined -> ok; - _ -> file_handle_cache:close(JournalHdl) - end, - ok = erase_index_dir(Dir), - blank_state_name_dir_funs(Name, Dir, OnSyncFun, OnSyncMsgFun). - --spec init(rabbit_amqqueue:name(), - on_sync_fun(), on_sync_fun()) -> qistate(). - -init(#resource{ virtual_host = VHost } = Name, OnSyncFun, OnSyncMsgFun) -> - #{segment_entry_count := SegmentEntryCount} = rabbit_vhost:read_config(VHost), - put(segment_entry_count, SegmentEntryCount), - VHostDir = rabbit_vhost:msg_store_dir_path(VHost), - State = #qistate { dir = Dir } = blank_state(VHostDir, Name), - false = rabbit_file:is_file(Dir), %% is_file == is file or dir - State#qistate{on_sync = OnSyncFun, - on_sync_msg = OnSyncMsgFun}. - -init_args(#qistate{ queue_name = QueueName, - on_sync = OnSyncFun, - on_sync_msg = OnSyncMsgFun }) -> - {QueueName, OnSyncFun, OnSyncMsgFun}. - -init_for_conversion(#resource{ virtual_host = VHost } = Name, OnSyncFun, OnSyncMsgFun) -> - #{segment_entry_count := SegmentEntryCount} = rabbit_vhost:read_config(VHost), - put(segment_entry_count, SegmentEntryCount), - VHostDir = rabbit_vhost:msg_store_dir_path(VHost), - State = blank_state(VHostDir, Name), - State#qistate{on_sync = OnSyncFun, - on_sync_msg = OnSyncMsgFun}. - --spec recover(rabbit_amqqueue:name(), shutdown_terms(), boolean(), - contains_predicate(), - on_sync_fun(), on_sync_fun(), - main | convert) -> - {'undefined' | non_neg_integer(), - 'undefined' | non_neg_integer(), qistate()}. - -recover(#resource{ virtual_host = VHost } = Name, Terms, MsgStoreRecovered, - ContainsCheckFun, OnSyncFun, OnSyncMsgFun, - %% We only allow using this module when converting to v2. - convert) -> - #{segment_entry_count := SegmentEntryCount} = rabbit_vhost:read_config(VHost), - put(segment_entry_count, SegmentEntryCount), - VHostDir = rabbit_vhost:msg_store_dir_path(VHost), - State = blank_state(VHostDir, Name), - State1 = State #qistate{on_sync = OnSyncFun, - on_sync_msg = OnSyncMsgFun}, - CleanShutdown = Terms /= non_clean_shutdown, - case CleanShutdown andalso MsgStoreRecovered of - true -> case proplists:get_value(segments, Terms, non_clean_shutdown) of - non_clean_shutdown -> init_dirty(false, ContainsCheckFun, State1); - RecoveredCounts -> init_clean(RecoveredCounts, State1) - end; - false -> init_dirty(CleanShutdown, ContainsCheckFun, State1) - end. - --spec terminate(rabbit_types:vhost(), [any()], qistate()) -> qistate(). - -terminate(VHost, Terms, State = #qistate { dir = Dir }) -> - {SegmentCounts, State1} = terminate(State), - _ = rabbit_recovery_terms:store(VHost, filename:basename(Dir), - [{segments, SegmentCounts} | Terms]), - State1. - --spec delete_and_terminate(qistate()) -> qistate(). - -delete_and_terminate(State) -> - {_SegmentCounts, State1 = #qistate { dir = Dir }} = terminate(State), - ok = rabbit_file:recursive_delete([Dir]), - State1. - --spec info(qistate()) -> []. - -%% No info is implemented for v1 at this time. -info(_) -> []. - -pre_publish(MsgOrId, SeqId, MsgProps, IsPersistent, IsDelivered, JournalSizeHint, - State = #qistate{pre_publish_cache = PPC, - delivered_cache = DC}) -> - State1 = maybe_needs_confirming(MsgProps, MsgOrId, State), - - {Bin, MsgBin} = create_pub_record_body(MsgOrId, MsgProps), - - PPC1 = - [[<<(case IsPersistent of - true -> ?PUB_PERSIST_JPREFIX; - false -> ?PUB_TRANS_JPREFIX - end):?JPREFIX_BITS, - SeqId:?SEQ_BITS, Bin/binary, - (size(MsgBin)):?EMBEDDED_SIZE_BITS>>, MsgBin] | PPC], - - DC1 = - case IsDelivered of - true -> - [SeqId | DC]; - false -> - DC - end, - - State2 = add_to_journal(SeqId, {IsPersistent, Bin, MsgBin}, State1), - maybe_flush_pre_publish_cache( - JournalSizeHint, - State2#qistate{pre_publish_cache = PPC1, - delivered_cache = DC1}). - -%% pre_publish_cache is the entry with most elements when compared to -%% delivered_cache so we only check the former in the guard. -maybe_flush_pre_publish_cache(JournalSizeHint, - #qistate{pre_publish_cache = PPC} = State) -> - case length(PPC) >= segment_entry_count() of - true -> flush_pre_publish_cache(JournalSizeHint, State); - false -> State - end. - -flush_pre_publish_cache(JournalSizeHint, State) -> - State1 = flush_pre_publish_cache(State), - State2 = flush_delivered_cache(State1), - maybe_flush_journal(JournalSizeHint, State2). - -flush_pre_publish_cache(#qistate{pre_publish_cache = []} = State) -> - State; -flush_pre_publish_cache(State = #qistate{pre_publish_cache = PPC}) -> - {JournalHdl, State1} = get_journal_handle(State), - ok = file_handle_cache:append(JournalHdl, lists:reverse(PPC)), - State1#qistate{pre_publish_cache = []}. - -flush_delivered_cache(#qistate{delivered_cache = []} = State) -> - State; -flush_delivered_cache(State = #qistate{delivered_cache = DC}) -> - State1 = deliver(lists:reverse(DC), State), - State1#qistate{delivered_cache = []}. - -publish(MsgOrId, SeqId, _Location, MsgProps, IsPersistent, JournalSizeHint, State) -> - {JournalHdl, State1} = - get_journal_handle( - maybe_needs_confirming(MsgProps, MsgOrId, State)), - {Bin, MsgBin} = create_pub_record_body(MsgOrId, MsgProps), - ok = file_handle_cache:append( - JournalHdl, [<<(case IsPersistent of - true -> ?PUB_PERSIST_JPREFIX; - false -> ?PUB_TRANS_JPREFIX - end):?JPREFIX_BITS, - SeqId:?SEQ_BITS, Bin/binary, - (byte_size(MsgBin)):?EMBEDDED_SIZE_BITS>>, MsgBin]), - maybe_flush_journal( - JournalSizeHint, - add_to_journal(SeqId, {IsPersistent, Bin, MsgBin}, State1)). - -publish(MsgOrId, SeqId, Location, MsgProps, IsPersistent, _, JournalSizeHint, State) -> - publish(MsgOrId, SeqId, Location, MsgProps, IsPersistent, JournalSizeHint, State). - -maybe_needs_confirming(MsgProps, MsgOrId, - State = #qistate{unconfirmed = UC, - unconfirmed_msg = UCM}) -> - MsgId = case MsgOrId of - Id when is_binary(Id) -> Id; - Msg -> - mc:get_annotation(id, Msg) - end, - ?MSG_ID_BYTES = byte_size(MsgId), - case {MsgProps#message_properties.needs_confirming, MsgOrId} of - {true, MsgId} -> UC1 = sets:add_element(MsgId, UC), - State#qistate{unconfirmed = UC1}; - {true, _} -> UCM1 = sets:add_element(MsgId, UCM), - State#qistate{unconfirmed_msg = UCM1}; - {false, _} -> State - end. - --spec deliver([rabbit_variable_queue:seq_id()], qistate()) -> qistate(). - -deliver(SeqIds, State) -> - deliver_or_ack(del, SeqIds, State). - --spec ack([rabbit_variable_queue:seq_id()], qistate()) -> {[], qistate()}. - -ack(SeqIds, State) -> - {[], deliver_or_ack(ack, SeqIds, State)}. - -%% This is called when there are outstanding confirms or when the -%% queue is idle and the journal needs syncing (see needs_sync/1). - --spec sync(qistate()) -> qistate(). - -sync(State = #qistate { journal_handle = undefined }) -> - State; -sync(State = #qistate { journal_handle = JournalHdl }) -> - ok = file_handle_cache:sync(JournalHdl), - notify_sync(State). - --spec needs_sync(qistate()) -> 'confirms' | 'other' | 'false'. - -needs_sync(#qistate{journal_handle = undefined}) -> - false; -needs_sync(#qistate{journal_handle = JournalHdl, - unconfirmed = UC, - unconfirmed_msg = UCM}) -> - case sets:is_empty(UC) andalso sets:is_empty(UCM) of - true -> case file_handle_cache:needs_sync(JournalHdl) of - true -> other; - false -> false - end; - false -> confirms - end. - --spec flush(qistate()) -> qistate(). - -flush(State = #qistate { dirty_count = 0 }) -> State; -flush(State) -> flush_journal(State). - --spec read(rabbit_variable_queue:seq_id(), - rabbit_variable_queue:seq_id(), - qistate()) -> - {[{rabbit_types:msg_id(), rabbit_variable_queue:seq_id(), - rabbit_variable_queue:msg_location(), - rabbit_types:message_properties(), - boolean()}], qistate()}. - -read(StartEnd, StartEnd, State) -> - {[], State}; -read(Start, End, State = #qistate { segments = Segments, - dir = Dir }) when Start =< End -> - %% Start is inclusive, End is exclusive. - LowerB = {StartSeg, _StartRelSeq} = seq_id_to_seg_and_rel_seq_id(Start), - UpperB = {EndSeg, _EndRelSeq} = seq_id_to_seg_and_rel_seq_id(End - 1), - {Messages, Segments1} = - lists:foldr(fun (Seg, Acc) -> - read_bounded_segment(Seg, LowerB, UpperB, Acc, Dir) - end, {[], Segments}, lists:seq(StartSeg, EndSeg)), - {Messages, State #qistate { segments = Segments1 }}. - --spec next_segment_boundary(rabbit_variable_queue:seq_id()) -> rabbit_variable_queue:seq_id(). - -next_segment_boundary(SeqId) -> - {Seg, _RelSeq} = seq_id_to_seg_and_rel_seq_id(SeqId), - reconstruct_seq_id(Seg + 1, 0). - --spec bounds(qistate()) -> - {non_neg_integer(), non_neg_integer(), qistate()}. - -bounds(State = #qistate { segments = Segments }) -> - %% This is not particularly efficient, but only gets invoked on - %% queue initialisation. - SegNums = lists:sort(segment_nums(Segments)), - %% Don't bother trying to figure out the lowest seq_id, merely the - %% seq_id of the start of the lowest segment. That seq_id may not - %% actually exist, but that's fine. The important thing is that - %% the segment exists and the seq_id reported is on a segment - %% boundary. - %% - %% We also don't really care about the max seq_id. Just start the - %% next segment: it makes life much easier. - %% - %% SegNums is sorted, ascending. - {LowSeqId, NextSeqId} = - case SegNums of - [] -> {0, 0}; - [MinSeg|_] -> {reconstruct_seq_id(MinSeg, 0), - reconstruct_seq_id(1 + lists:last(SegNums), 0)} - end, - {LowSeqId, NextSeqId, State}. - --spec start(rabbit_types:vhost(), [rabbit_amqqueue:name()]) -> {[[any()]], {walker(A), A}}. - -start(VHost, DurableQueueNames) -> - {ok, RecoveryTermsPid} = rabbit_recovery_terms:start(VHost), - rabbit_vhost_sup_sup:save_vhost_recovery_terms(VHost, RecoveryTermsPid), - {DurableTerms, DurableDirectories} = - lists:foldl( - fun(QName, {RecoveryTerms, ValidDirectories}) -> - DirName = queue_name_to_dir_name(QName), - RecoveryInfo = case rabbit_recovery_terms:read(VHost, DirName) of - {error, _} -> non_clean_shutdown; - {ok, Terms} -> Terms - end, - {[RecoveryInfo | RecoveryTerms], - sets:add_element(DirName, ValidDirectories)} - end, {[], sets:new()}, DurableQueueNames), - %% Any queue directory we've not been asked to recover is considered garbage - ToDelete = [filename:join([rabbit_vhost:msg_store_dir_path(VHost), "queues", Dir]) - || Dir <- lists:subtract(all_queue_directory_names(VHost), - sets:to_list(DurableDirectories))], - ?LOG_DEBUG("Deleting unknown files/folders: ~p", [ToDelete]), - _ = rabbit_file:recursive_delete(ToDelete), - - rabbit_recovery_terms:clear(VHost), - - %% The backing queue interface requires that the queue recovery terms - %% which come back from start/1 are in the same order as DurableQueueNames - OrderedTerms = lists:reverse(DurableTerms), - {OrderedTerms, {fun queue_index_walker/1, {start, DurableQueueNames}}}. - - -stop(VHost) -> rabbit_recovery_terms:stop(VHost). - -all_queue_directory_names(VHost) -> - VHostQueuesPath = filename:join([rabbit_vhost:msg_store_dir_path(VHost), "queues"]), - case filelib:is_dir(VHostQueuesPath) of - true -> - {ok, Dirs} = file:list_dir(VHostQueuesPath), - Dirs; - false -> [] - end. - -%%---------------------------------------------------------------------------- -%% startup and shutdown -%%---------------------------------------------------------------------------- - -erase_index_dir(Dir) -> - case rabbit_file:is_dir(Dir) of - true -> rabbit_file:recursive_delete([Dir]); - false -> ok - end. - -blank_state(VHostDir, QueueName) -> - Dir = queue_dir(VHostDir, QueueName), - blank_state_name_dir_funs(QueueName, - Dir, - fun (_) -> ok end, - fun (_) -> ok end). - -queue_dir(VHostDir, QueueName) -> - %% Queue directory is - %% {node_database_dir}/msg_stores/vhosts/{vhost}/queues/{queue} - QueueDir = queue_name_to_dir_name(QueueName), - filename:join([VHostDir, "queues", QueueDir]). - -queue_name_to_dir_name(#resource { kind = queue, - virtual_host = VHost, - name = QName }) -> - <> = erlang:md5(<<"queue", VHost/binary, QName/binary>>), - rabbit_misc:format("~.36B", [Num]). - -blank_state_name_dir_funs(Name, Dir, OnSyncFun, OnSyncMsgFun) -> - {ok, MaxJournal} = - application:get_env(rabbit, queue_index_max_journal_entries), - #qistate { dir = Dir, - segments = segments_new(), - journal_handle = undefined, - dirty_count = 0, - max_journal_entries = MaxJournal, - on_sync = OnSyncFun, - on_sync_msg = OnSyncMsgFun, - unconfirmed = sets:new([{version,2}]), - unconfirmed_msg = sets:new([{version,2}]), - pre_publish_cache = [], - delivered_cache = [], - queue_name = Name }. - -init_clean(RecoveredCounts, State) -> - %% Load the journal. Since this is a clean recovery this (almost) - %% gets us back to where we were on shutdown. - State1 = #qistate { dir = Dir, segments = Segments } = load_journal(State), - %% The journal loading only creates records for segments touched - %% by the journal, and the counts are based on the journal entries - %% only. We need *complete* counts for *all* segments. By an - %% amazing coincidence we stored that information on shutdown. - Segments1 = - lists:foldl( - fun ({Seg, UnackedCount}, SegmentsN) -> - Segment = segment_find_or_new(Seg, Dir, SegmentsN), - segment_store(Segment #segment { unacked = UnackedCount }, - SegmentsN) - end, Segments, RecoveredCounts), - %% the counts above include transient messages, which would be the - %% wrong thing to return - {undefined, undefined, State1 # qistate { segments = Segments1 }}. - --define(RECOVER_COUNT, 1). --define(RECOVER_BYTES, 2). --define(RECOVER_COUNTER_SIZE, 2). - -init_dirty(CleanShutdown, ContainsCheckFun, State) -> - %% Recover the journal completely. This will also load segments - %% which have entries in the journal and remove duplicates. The - %% counts will correctly reflect the combination of the segment - %% and the journal. - State1 = #qistate { dir = Dir, segments = Segments } = - recover_journal(State), - {Segments1, Count, Bytes, DirtyCount} = - %% Load each segment in turn and filter out messages that are - %% not in the msg_store, by adding acks to the journal. These - %% acks only go to the RAM journal as it doesn't matter if we - %% lose them. Also mark delivered if not clean shutdown. Also - %% find the number of unacked messages. Also accumulate the - %% dirty count here, so we can call maybe_flush_journal below - %% and avoid unnecessary file system operations. - lists:foldl( - fun (Seg, {Segments2, CountAcc, BytesAcc, DirtyCount}) -> - {{Segment = #segment { unacked = UnackedCount }, Dirty}, - UnackedBytes} = - recover_segment(ContainsCheckFun, CleanShutdown, - segment_find_or_new(Seg, Dir, Segments2), - State1#qistate.max_journal_entries), - {segment_store(Segment, Segments2), - CountAcc + UnackedCount, - BytesAcc + UnackedBytes, DirtyCount + Dirty} - end, {Segments, 0, 0, 0}, all_segment_nums(State1)), - %% We force flush the journal to avoid getting into a bad state - %% when the node gets shut down immediately after init. It takes - %% a few restarts for the problem to materialize itself, with - %% at least one message published, followed by the process crashing, - %% followed by a recovery that is dirty due to term mismatch in the - %% message store, followed by two clean recoveries. This last - %% recovery fails with a crash. - State2 = flush_journal(State1 #qistate { segments = Segments1, - dirty_count = DirtyCount }), - {Count, Bytes, State2}. - -terminate(State = #qistate { journal_handle = JournalHdl, - segments = Segments }) -> - ok = case JournalHdl of - undefined -> ok; - _ -> file_handle_cache:close(JournalHdl) - end, - SegmentCounts = - segment_fold( - fun (#segment { num = Seg, unacked = UnackedCount }, Acc) -> - [{Seg, UnackedCount} | Acc] - end, [], Segments), - {SegmentCounts, State #qistate { journal_handle = undefined, - segments = undefined }}. - -recover_segment(ContainsCheckFun, CleanShutdown, - Segment = #segment { journal_entries = JEntries }, MaxJournal) -> - {SegEntries, UnackedCount} = load_segment(false, Segment), - {SegEntries1, UnackedCountDelta} = - segment_plus_journal(SegEntries, JEntries), - array:sparse_foldl( - fun (RelSeq, {{IsPersistent, Bin, MsgBin}, Del, no_ack}, - {SegmentAndDirtyCount, Bytes}) -> - {MsgOrId, MsgProps} = parse_pub_record_body(Bin, MsgBin), - {recover_message(ContainsCheckFun(MsgOrId), CleanShutdown, - Del, RelSeq, SegmentAndDirtyCount, MaxJournal), - %% @todo If the message is dropped we shouldn't add the size? - Bytes + case IsPersistent of - true -> MsgProps#message_properties.size; - false -> 0 - end} - end, - {{Segment #segment { unacked = UnackedCount + UnackedCountDelta }, 0}, 0}, - SegEntries1). - -recover_message( true, true, _Del, _RelSeq, SegmentAndDirtyCount, _MaxJournal) -> - SegmentAndDirtyCount; -recover_message( true, false, del, _RelSeq, SegmentAndDirtyCount, _MaxJournal) -> - SegmentAndDirtyCount; -recover_message( true, false, no_del, RelSeq, {Segment, _DirtyCount}, MaxJournal) -> - %% force to flush the segment - {add_to_journal(RelSeq, del, Segment), MaxJournal + 1}; -recover_message(false, _, del, RelSeq, {Segment, DirtyCount}, _MaxJournal) -> - {add_to_journal(RelSeq, ack, Segment), DirtyCount + 1}; -recover_message(false, _, no_del, RelSeq, {Segment, DirtyCount}, _MaxJournal) -> - {add_to_journal(RelSeq, ack, - add_to_journal(RelSeq, del, Segment)), - DirtyCount + 2}. - -%%---------------------------------------------------------------------------- -%% msg store startup delta function -%%---------------------------------------------------------------------------- - -queue_index_walker({start, DurableQueues}) when is_list(DurableQueues) -> - {ok, Gatherer} = gatherer:start_link(), - [begin - ok = gatherer:fork(Gatherer), - ok = worker_pool:submit_async( - fun () -> link(Gatherer), - ok = queue_index_walker_reader(QueueName, Gatherer), - unlink(Gatherer), - ok - end) - end || QueueName <- DurableQueues], - queue_index_walker({next, Gatherer}); - -queue_index_walker({next, Gatherer}) when is_pid(Gatherer) -> - case gatherer:out(Gatherer) of - empty -> - ok = gatherer:stop(Gatherer), - finished; - {value, {MsgId, Count}} -> - {MsgId, Count, {next, Gatherer}} - end. - -queue_index_walker_reader(QueueName, Gatherer) -> - ok = scan_queue_segments( - fun (_SeqId, MsgId, _MsgProps, true, _IsDelivered, no_ack, ok) - when is_binary(MsgId) -> - gatherer:sync_in(Gatherer, {MsgId, 1}); - (_SeqId, _MsgId, _MsgProps, _IsPersistent, _IsDelivered, - _IsAcked, Acc) -> - Acc - end, ok, QueueName), - ok = gatherer:finish(Gatherer). - -scan_queue_segments(Fun, Acc, #resource{ virtual_host = VHost } = QueueName) -> - %% Set the segment_entry_count for this worker process. - #{segment_entry_count := SegmentEntryCount} = rabbit_vhost:read_config(VHost), - put(segment_entry_count, SegmentEntryCount), - VHostDir = rabbit_vhost:msg_store_dir_path(VHost), - scan_queue_segments(Fun, Acc, VHostDir, QueueName). - -scan_queue_segments(Fun, Acc, VHostDir, QueueName) -> - State = #qistate { segments = Segments, dir = Dir } = - recover_journal(blank_state(VHostDir, QueueName)), - Result = lists:foldr( - fun (Seg, AccN) -> - segment_entries_foldr( - fun (RelSeq, {{MsgOrId, MsgProps, IsPersistent}, - IsDelivered, IsAcked}, AccM) -> - Fun(reconstruct_seq_id(Seg, RelSeq), MsgOrId, MsgProps, - IsPersistent, IsDelivered, IsAcked, AccM) - end, AccN, segment_find_or_new(Seg, Dir, Segments)) - end, Acc, all_segment_nums(State)), - {_SegmentCounts, _State} = terminate(State), - Result. - -%%---------------------------------------------------------------------------- -%% expiry/binary manipulation -%%---------------------------------------------------------------------------- - -create_pub_record_body(MsgOrId, #message_properties { expiry = Expiry, - size = Size }) -> - ExpiryBin = expiry_to_binary(Expiry), - case MsgOrId of - MsgId when is_binary(MsgId) -> - {<>, <<>>}; - Msg -> - MsgId = mc:get_annotation(id, Msg), - MsgBin = term_to_binary(MsgOrId), - {<>, MsgBin} - end. - -expiry_to_binary(undefined) -> <>; -expiry_to_binary(Expiry) -> <>. - -parse_pub_record_body(<>, MsgBin) -> - %% work around for binary data fragmentation. See - %% rabbit_msg_file:read_next/2 - <> = <>, - Props = #message_properties{expiry = case Expiry of - ?NO_EXPIRY -> undefined; - X -> X - end, - size = Size}, - case MsgBin of - <<>> -> {MsgId, Props}; - _ -> - Msg = binary_to_term(MsgBin), - %% assertion - MsgId = mc:get_annotation(id, Msg), - {Msg, Props} - end. - -%%---------------------------------------------------------------------------- -%% journal manipulation -%%---------------------------------------------------------------------------- - -add_to_journal(SeqId, Action, State = #qistate { dirty_count = DCount, - segments = Segments, - dir = Dir }) -> - {Seg, RelSeq} = seq_id_to_seg_and_rel_seq_id(SeqId), - Segment = segment_find_or_new(Seg, Dir, Segments), - Segment1 = add_to_journal(RelSeq, Action, Segment), - State #qistate { dirty_count = DCount + 1, - segments = segment_store(Segment1, Segments) }; - -add_to_journal(RelSeq, Action, - Segment = #segment { journal_entries = JEntries, - entries_to_segment = EToSeg, - unacked = UnackedCount }) -> - - {Fun, Entry} = action_to_entry(RelSeq, Action, JEntries), - - {JEntries1, EToSeg1} = - case Fun of - set -> - {array:set(RelSeq, Entry, JEntries), - array:set(RelSeq, entry_to_segment(RelSeq, Entry, []), - EToSeg)}; - reset -> - {array:reset(RelSeq, JEntries), - array:reset(RelSeq, EToSeg)} - end, - - Segment #segment { - journal_entries = JEntries1, - entries_to_segment = EToSeg1, - unacked = UnackedCount + case Action of - ?PUB -> +1; - del -> 0; - ack -> -1 - end}. - -action_to_entry(RelSeq, Action, JEntries) -> - case array:get(RelSeq, JEntries) of - undefined -> - {set, - case Action of - ?PUB -> {Action, no_del, no_ack}; - del -> {no_pub, del, no_ack}; - ack -> {no_pub, no_del, ack} - end}; - ({Pub, no_del, no_ack}) when Action == del -> - {set, {Pub, del, no_ack}}; - ({no_pub, del, no_ack}) when Action == ack -> - {set, {no_pub, del, ack}}; - ({?PUB, del, no_ack}) when Action == ack -> - {reset, none}; - %% Special case, missing del - %% See journal_minus_segment1/2 - ({?PUB, no_del, no_ack}) when Action == ack -> - {reset, none} - end. - -maybe_flush_journal(State) -> - maybe_flush_journal(infinity, State). - -maybe_flush_journal(Hint, State = #qistate { dirty_count = DCount, - max_journal_entries = MaxJournal }) - when DCount > MaxJournal orelse (Hint =/= infinity andalso DCount > Hint) -> - flush_journal(State); -maybe_flush_journal(_Hint, State) -> - State. - -flush_journal(State = #qistate { segments = Segments }) -> - Segments1 = - segment_fold( - fun (#segment { unacked = 0, path = Path }, SegmentsN) -> - case rabbit_file:is_file(Path) of - true -> ok = rabbit_file:delete(Path); - false -> ok - end, - SegmentsN; - (#segment {} = Segment, SegmentsN) -> - segment_store(append_journal_to_segment(Segment), SegmentsN) - end, segments_new(), Segments), - {JournalHdl, State1} = - get_journal_handle(State #qistate { segments = Segments1 }), - ok = file_handle_cache:clear(JournalHdl), - notify_sync(State1 #qistate { dirty_count = 0 }). - -append_journal_to_segment(#segment { journal_entries = JEntries, - entries_to_segment = EToSeg, - path = Path } = Segment) -> - case array:sparse_size(JEntries) of - 0 -> Segment; - _ -> - {ok, Hdl} = file_handle_cache:open_with_absolute_path( - Path, ?WRITE_MODE, - [{write_buffer, infinity}]), - %% the file_handle_cache also does a list reverse, so this - %% might not be required here, but before we were doing a - %% sparse_foldr, a lists:reverse/1 seems to be the correct - %% thing to do for now. - _ = file_handle_cache:append(Hdl, lists:reverse(array:to_list(EToSeg))), - ok = file_handle_cache:close(Hdl), - Segment #segment { journal_entries = array_new(), - entries_to_segment = array_new([]) } - end. - -get_journal_handle(State = #qistate { journal_handle = undefined, - dir = Dir, - queue_name = Name }) -> - Path = filename:join(Dir, ?JOURNAL_FILENAME), - ok = rabbit_file:ensure_dir(Path), - ok = ensure_queue_name_stub_file(Dir, Name), - {ok, Hdl} = file_handle_cache:open_with_absolute_path( - Path, ?WRITE_MODE, [{write_buffer, infinity}]), - {Hdl, State #qistate { journal_handle = Hdl }}; -get_journal_handle(State = #qistate { journal_handle = Hdl }) -> - {Hdl, State}. - -%% Loading Journal. This isn't idempotent and will mess up the counts -%% if you call it more than once on the same state. Assumes the counts -%% are 0 to start with. -load_journal(State = #qistate { dir = Dir }) -> - Path = filename:join(Dir, ?JOURNAL_FILENAME), - case rabbit_file:is_file(Path) of - true -> {JournalHdl, State1} = get_journal_handle(State), - Size = rabbit_file:file_size(Path), - {ok, 0} = file_handle_cache:position(JournalHdl, 0), - {ok, JournalBin} = file_handle_cache:read(JournalHdl, Size), - parse_journal_entries(JournalBin, State1); - false -> State - end. - -%% ditto -recover_journal(State) -> - State1 = #qistate { segments = Segments } = load_journal(State), - Segments1 = - segment_map( - fun (Segment = #segment { journal_entries = JEntries, - entries_to_segment = EToSeg, - unacked = UnackedCountInJournal }) -> - %% We want to keep ack'd entries in so that we can - %% remove them if duplicates are in the journal. The - %% counts here are purely from the segment itself. - {SegEntries, UnackedCountInSeg} = load_segment(true, Segment), - {JEntries1, EToSeg1, UnackedCountDuplicates} = - journal_minus_segment(JEntries, EToSeg, SegEntries), - Segment #segment { journal_entries = JEntries1, - entries_to_segment = EToSeg1, - unacked = (UnackedCountInJournal + - UnackedCountInSeg - - UnackedCountDuplicates) } - end, Segments), - State1 #qistate { segments = Segments1 }. - -parse_journal_entries(<>, State) -> - parse_journal_entries(Rest, add_to_journal(SeqId, del, State)); - -parse_journal_entries(<>, State) -> - parse_journal_entries(Rest, add_to_journal(SeqId, ack, State)); -parse_journal_entries(<<0:?JPREFIX_BITS, 0:?SEQ_BITS, - 0:?PUB_RECORD_SIZE_BYTES/unit:8, _/binary>>, State) -> - %% Journal entry composed only of zeroes was probably - %% produced during a dirty shutdown so stop reading - State; -parse_journal_entries(<>, State) -> - IsPersistent = case Prefix of - ?PUB_PERSIST_JPREFIX -> true; - ?PUB_TRANS_JPREFIX -> false - end, - parse_journal_entries( - Rest, add_to_journal(SeqId, {IsPersistent, Bin, MsgBin}, State)); -parse_journal_entries(_ErrOrEoF, State) -> - State. - -deliver_or_ack(_Kind, [], State) -> - State; -deliver_or_ack(Kind, SeqIds, State) -> - JPrefix = case Kind of ack -> ?ACK_JPREFIX; del -> ?DEL_JPREFIX end, - {JournalHdl, State1} = get_journal_handle(State), - ok = file_handle_cache:append( - JournalHdl, - [<> || SeqId <- SeqIds]), - maybe_flush_journal(lists:foldl(fun (SeqId, StateN) -> - add_to_journal(SeqId, Kind, StateN) - end, State1, SeqIds)). - -notify_sync(State = #qistate{unconfirmed = UC, - unconfirmed_msg = UCM, - on_sync = OnSyncFun, - on_sync_msg = OnSyncMsgFun}) -> - State1 = case sets:is_empty(UC) of - true -> State; - false -> OnSyncFun(UC), - State#qistate{unconfirmed = sets:new([{version,2}])} - end, - case sets:is_empty(UCM) of - true -> State1; - false -> OnSyncMsgFun(UCM), - State1#qistate{unconfirmed_msg = sets:new([{version,2}])} - end. - -%%---------------------------------------------------------------------------- -%% segment manipulation -%%---------------------------------------------------------------------------- - -seq_id_to_seg_and_rel_seq_id(SeqId) -> - SegmentEntryCount = segment_entry_count(), - { SeqId div SegmentEntryCount, SeqId rem SegmentEntryCount }. - -reconstruct_seq_id(Seg, RelSeq) -> - (Seg * segment_entry_count()) + RelSeq. - -all_segment_nums(#qistate { dir = Dir, segments = Segments }) -> - lists:sort( - sets:to_list( - lists:foldl( - fun (SegName, Set) -> - sets:add_element( - list_to_integer( - lists:takewhile(fun (C) -> $0 =< C andalso C =< $9 end, - SegName)), Set) - end, sets:from_list(segment_nums(Segments)), - rabbit_file:wildcard(".*\\" ++ ?SEGMENT_EXTENSION, Dir)))). - -segment_find_or_new(Seg, Dir, Segments) -> - case segment_find(Seg, Segments) of - {ok, Segment} -> Segment; - error -> SegName = integer_to_list(Seg) ++ ?SEGMENT_EXTENSION, - Path = filename:join(Dir, SegName), - #segment { num = Seg, - path = Path, - journal_entries = array_new(), - entries_to_segment = array_new([]), - unacked = 0 } - end. - -segment_find(Seg, {_Segments, [Segment = #segment { num = Seg } |_]}) -> - {ok, Segment}; %% 1 or (2, matches head) -segment_find(Seg, {_Segments, [_, Segment = #segment { num = Seg }]}) -> - {ok, Segment}; %% 2, matches tail -segment_find(Seg, {Segments, _}) -> %% no match - maps:find(Seg, Segments). - -segment_store(Segment = #segment { num = Seg }, %% 1 or (2, matches head) - {Segments, [#segment { num = Seg } | Tail]}) -> - {Segments, [Segment | Tail]}; -segment_store(Segment = #segment { num = Seg }, %% 2, matches tail - {Segments, [SegmentA, #segment { num = Seg }]}) -> - {Segments, [Segment, SegmentA]}; -segment_store(Segment = #segment { num = Seg }, {Segments, []}) -> - {maps:remove(Seg, Segments), [Segment]}; -segment_store(Segment = #segment { num = Seg }, {Segments, [SegmentA]}) -> - {maps:remove(Seg, Segments), [Segment, SegmentA]}; -segment_store(Segment = #segment { num = Seg }, - {Segments, [SegmentA, SegmentB]}) -> - {maps:put(SegmentB#segment.num, SegmentB, maps:remove(Seg, Segments)), - [Segment, SegmentA]}. - -segment_fold(Fun, Acc, {Segments, CachedSegments}) -> - maps:fold(fun (_Seg, Segment, Acc1) -> Fun(Segment, Acc1) end, - lists:foldl(Fun, Acc, CachedSegments), Segments). - -segment_map(Fun, {Segments, CachedSegments}) -> - {maps:map(fun (_Seg, Segment) -> Fun(Segment) end, Segments), - lists:map(Fun, CachedSegments)}. - -segment_nums({Segments, CachedSegments}) -> - lists:map(fun (#segment { num = Num }) -> Num end, CachedSegments) ++ - maps:keys(Segments). - -segments_new() -> - {#{}, []}. - -entry_to_segment(_RelSeq, {?PUB, del, ack}, Initial) -> - Initial; -entry_to_segment(RelSeq, {Pub, Del, Ack}, Initial) -> - %% NB: we are assembling the segment in reverse order here, so - %% del/ack comes first. - Buf1 = case {Del, Ack} of - {no_del, no_ack} -> - Initial; - _ -> - Binary = <>, - case {Del, Ack} of - {del, ack} -> [[Binary, Binary] | Initial]; - _ -> [Binary | Initial] - end - end, - case Pub of - no_pub -> - Buf1; - {IsPersistent, Bin, MsgBin} -> - [[<>, MsgBin] | Buf1] - end. - -read_bounded_segment(Seg, {StartSeg, StartRelSeq}, {EndSeg, EndRelSeq}, - {Messages, Segments}, Dir) -> - Segment = segment_find_or_new(Seg, Dir, Segments), - {segment_entries_foldr( - fun (RelSeq, {{MsgOrId, MsgProps, IsPersistent}, _IsDelivered, no_ack}, - Acc) - when (Seg > StartSeg orelse StartRelSeq =< RelSeq) andalso - (Seg < EndSeg orelse EndRelSeq >= RelSeq) -> - MsgLocation = case is_tuple(MsgOrId) of - true -> rabbit_queue_index; - false -> rabbit_msg_store - end, - [{MsgOrId, reconstruct_seq_id(StartSeg, RelSeq), MsgLocation, MsgProps, - IsPersistent} | Acc]; - (_RelSeq, _Value, Acc) -> - Acc - end, Messages, Segment), - segment_store(Segment, Segments)}. - -segment_entries_foldr(Fun, Init, - Segment = #segment { journal_entries = JEntries }) -> - {SegEntries, _UnackedCount} = load_segment(false, Segment), - {SegEntries1, _UnackedCountD} = segment_plus_journal(SegEntries, JEntries), - array:sparse_foldr( - fun (RelSeq, {{IsPersistent, Bin, MsgBin}, Del, Ack}, Acc) -> - {MsgOrId, MsgProps} = parse_pub_record_body(Bin, MsgBin), - Fun(RelSeq, {{MsgOrId, MsgProps, IsPersistent}, Del, Ack}, Acc) - end, Init, SegEntries1). - -%% Loading segments -%% -%% Does not do any combining with the journal at all. -load_segment(KeepAcked, #segment { path = Path }) -> - Empty = {array_new(), 0}, - case rabbit_file:is_file(Path) of - false -> Empty; - true -> Size = rabbit_file:file_size(Path), - {ok, Hdl} = file_handle_cache:open_with_absolute_path( - Path, ?READ_MODE, []), - {ok, 0} = file_handle_cache:position(Hdl, bof), - {ok, SegBin} = file_handle_cache:read(Hdl, Size), - ok = file_handle_cache:close(Hdl), - %% We check if the file is full of 0s. I do not know why this can happen - %% but this happens AT LEAST during v2->v1 conversion when resuming after - %% a crash has happened. Since the file is invalid, we delete it and - %% return no entries instead of just crashing (just like if the file - %% was missing above). We also log some information. - case SegBin of - <<0:Size/unit:8>> -> - ?LOG_WARNING("Deleting invalid v1 segment file ~ts (file only contains NUL bytes)", - [Path]), - _ = rabbit_file:delete(Path), - Empty; - _ -> - Res = parse_segment_entries(SegBin, KeepAcked, Empty), - Res - end - end. - -parse_segment_entries(<>, - KeepAcked, Acc) -> - parse_segment_publish_entry( - Rest, 1 == IsPersistNum, RelSeq, KeepAcked, Acc); -parse_segment_entries(<>, KeepAcked, Acc) -> - parse_segment_entries( - Rest, KeepAcked, add_segment_relseq_entry(KeepAcked, RelSeq, Acc)); -parse_segment_entries(<<>>, _KeepAcked, Acc) -> - Acc. - -parse_segment_publish_entry(<>, - IsPersistent, RelSeq, KeepAcked, - {SegEntries, Unacked}) -> - Obj = {{IsPersistent, Bin, MsgBin}, no_del, no_ack}, - SegEntries1 = array:set(RelSeq, Obj, SegEntries), - parse_segment_entries(Rest, KeepAcked, {SegEntries1, Unacked + 1}); -parse_segment_publish_entry(Rest, _IsPersistent, _RelSeq, KeepAcked, Acc) -> - parse_segment_entries(Rest, KeepAcked, Acc). - -add_segment_relseq_entry(KeepAcked, RelSeq, {SegEntries, Unacked}) -> - case array:get(RelSeq, SegEntries) of - {Pub, no_del, no_ack} -> - {array:set(RelSeq, {Pub, del, no_ack}, SegEntries), Unacked}; - {Pub, del, no_ack} when KeepAcked -> - {array:set(RelSeq, {Pub, del, ack}, SegEntries), Unacked - 1}; - {_Pub, del, no_ack} -> - {array:reset(RelSeq, SegEntries), Unacked - 1} - end. - -array_new() -> - array_new(undefined). - -array_new(Default) -> - array:new([{default, Default}, fixed, {size, segment_entry_count()}]). - -segment_entry_count() -> - get(segment_entry_count). - -bool_to_int(true ) -> 1; -bool_to_int(false) -> 0. - -%%---------------------------------------------------------------------------- -%% journal & segment combination -%%---------------------------------------------------------------------------- - -%% Combine what we have just read from a segment file with what we're -%% holding for that segment in memory. There must be no duplicates. -segment_plus_journal(SegEntries, JEntries) -> - array:sparse_foldl( - fun (RelSeq, JObj, {SegEntriesOut, AdditionalUnacked}) -> - SegEntry = array:get(RelSeq, SegEntriesOut), - {Obj, AdditionalUnackedDelta} = - segment_plus_journal1(SegEntry, JObj), - {case Obj of - undefined -> array:reset(RelSeq, SegEntriesOut); - _ -> array:set(RelSeq, Obj, SegEntriesOut) - end, - AdditionalUnacked + AdditionalUnackedDelta} - end, {SegEntries, 0}, JEntries). - -%% Here, the result is a tuple with the first element containing the -%% item which we may be adding to (for items only in the journal), -%% modifying in (bits in both), or, when returning 'undefined', -%% erasing from (ack in journal, not segment) the segment array. The -%% other element of the tuple is the delta for AdditionalUnacked. -segment_plus_journal1(undefined, {?PUB, no_del, no_ack} = Obj) -> - {Obj, 1}; -segment_plus_journal1(undefined, {?PUB, del, no_ack} = Obj) -> - {Obj, 1}; -segment_plus_journal1(undefined, {?PUB, del, ack}) -> - {undefined, 0}; - -segment_plus_journal1({?PUB = Pub, no_del, no_ack}, {no_pub, del, no_ack}) -> - {{Pub, del, no_ack}, 0}; -segment_plus_journal1({?PUB, no_del, no_ack}, {no_pub, del, ack}) -> - {undefined, -1}; -segment_plus_journal1({?PUB, del, no_ack}, {no_pub, no_del, ack}) -> - {undefined, -1}; - -%% Special case, missing del -%% See journal_minus_segment1/2 -segment_plus_journal1({?PUB, no_del, no_ack}, {no_pub, no_del, ack}) -> - {undefined, -1}. - -%% Remove from the journal entries for a segment, items that are -%% duplicates of entries found in the segment itself. Used on start up -%% to clean up the journal. -%% -%% We need to update the entries_to_segment since they are just a -%% cache of what's on the journal. -journal_minus_segment(JEntries, EToSeg, SegEntries) -> - array:sparse_foldl( - fun (RelSeq, JObj, {JEntriesOut, EToSegOut, UnackedRemoved}) -> - SegEntry = array:get(RelSeq, SegEntries), - {Obj, UnackedRemovedDelta} = - journal_minus_segment1(JObj, SegEntry), - {JEntriesOut1, EToSegOut1} = - case Obj of - keep -> - {JEntriesOut, EToSegOut}; - undefined -> - {array:reset(RelSeq, JEntriesOut), - array:reset(RelSeq, EToSegOut)}; - _ -> - {array:set(RelSeq, Obj, JEntriesOut), - array:set(RelSeq, entry_to_segment(RelSeq, Obj, []), - EToSegOut)} - end, - {JEntriesOut1, EToSegOut1, UnackedRemoved + UnackedRemovedDelta} - end, {JEntries, EToSeg, 0}, JEntries). - -%% Here, the result is a tuple with the first element containing the -%% item we are adding to or modifying in the (initially fresh) journal -%% array. If the item is 'undefined' we leave the journal array -%% alone. The other element of the tuple is the deltas for -%% UnackedRemoved. - -%% Both the same. Must be at least the publish -journal_minus_segment1({?PUB, _Del, no_ack} = Obj, Obj) -> - {undefined, 1}; -journal_minus_segment1({?PUB, _Del, ack} = Obj, Obj) -> - {undefined, 0}; - -%% Just publish in journal -journal_minus_segment1({?PUB, no_del, no_ack}, undefined) -> - {keep, 0}; - -%% Publish and deliver in journal -journal_minus_segment1({?PUB, del, no_ack}, undefined) -> - {keep, 0}; -journal_minus_segment1({?PUB = Pub, del, no_ack}, {Pub, no_del, no_ack}) -> - {{no_pub, del, no_ack}, 1}; - -%% Publish, deliver and ack in journal -journal_minus_segment1({?PUB, del, ack}, undefined) -> - {keep, 0}; -journal_minus_segment1({?PUB = Pub, del, ack}, {Pub, no_del, no_ack}) -> - {{no_pub, del, ack}, 1}; -journal_minus_segment1({?PUB = Pub, del, ack}, {Pub, del, no_ack}) -> - {{no_pub, no_del, ack}, 1}; - -%% Just deliver in journal -journal_minus_segment1({no_pub, del, no_ack}, {?PUB, no_del, no_ack}) -> - {keep, 0}; -journal_minus_segment1({no_pub, del, no_ack}, {?PUB, del, no_ack}) -> - {undefined, 0}; - -%% Just ack in journal -journal_minus_segment1({no_pub, no_del, ack}, {?PUB, del, no_ack}) -> - {keep, 0}; -journal_minus_segment1({no_pub, no_del, ack}, {?PUB, del, ack}) -> - {undefined, -1}; - -%% Just ack in journal, missing del -%% Since 3.10 message delivery is tracked per-queue, not per-message, -%% but to keep queue index v1 format messages are always marked as -%% delivered on publish. But for a message that was published before -%% 3.10 this is not the case and the delivery marker can be missing. -%% As a workaround we add the del marker because if a message is acked -%% it must have been delivered as well. -journal_minus_segment1({no_pub, no_del, ack}, {?PUB, no_del, no_ack}) -> - {{no_pub, del, ack}, 0}; - -%% Deliver and ack in journal -journal_minus_segment1({no_pub, del, ack}, {?PUB, no_del, no_ack}) -> - {keep, 0}; -journal_minus_segment1({no_pub, del, ack}, {?PUB, del, no_ack}) -> - {{no_pub, no_del, ack}, 0}; -journal_minus_segment1({no_pub, del, ack}, {?PUB, del, ack}) -> - {undefined, -1}; - -%% Missing segment. If flush_journal/1 is interrupted after deleting -%% the segment but before truncating the journal we can get these -%% cases: a delivery and an acknowledgement in the journal, or just an -%% acknowledgement in the journal, but with no segment. In both cases -%% we have really forgotten the message; so ignore what's in the -%% journal. -journal_minus_segment1({no_pub, no_del, ack}, undefined) -> - {undefined, 0}; -journal_minus_segment1({no_pub, del, ack}, undefined) -> - {undefined, 0}. - -%%---------------------------------------------------------------------------- -%% Migration functions -%%---------------------------------------------------------------------------- - -ensure_queue_name_stub_file(Dir, #resource{virtual_host = VHost, name = QName}) -> - QueueNameFile = filename:join(Dir, ?QUEUE_NAME_STUB_FILE), - file:write_file(QueueNameFile, <<"VHOST: ", VHost/binary, "\n", - "QUEUE: ", QName/binary, "\n">>). - -%% This function is only used when upgrading to the v2 index. -%% We delete the segment file without updating the state. -%% We will drop the state later on so we don't care much -%% about how accurate it is as long as we can read from -%% subsequent segment files. -delete_segment_file_for_seq_id(SeqId, #qistate { segments = Segments }) -> - {Seg, _} = seq_id_to_seg_and_rel_seq_id(SeqId), - case segment_find(Seg, Segments) of - {ok, #segment { path = Path }} -> - case rabbit_file:delete(Path) of - ok -> ok; - %% The file may not exist on disk yet. - {error, enoent} -> ok - end; - error -> - ok - end. - -delete_journal(#qistate { dir = Dir, journal_handle = JournalHdl }) -> - %% Close the journal handle if any. - ok = case JournalHdl of - undefined -> ok; - _ -> file_handle_cache:close(JournalHdl) - end, - %% Delete the journal file. - _ = rabbit_file:delete(filename:join(Dir, "journal.jif")), - ok. diff --git a/deps/rabbit/src/rabbit_variable_queue.erl b/deps/rabbit/src/rabbit_variable_queue.erl index 0c2ec75767a4..df9f5036eed1 100644 --- a/deps/rabbit/src/rabbit_variable_queue.erl +++ b/deps/rabbit/src/rabbit_variable_queue.erl @@ -12,21 +12,17 @@ publish/5, publish_delivered/4, discard/3, drain_confirmed/1, dropwhile/2, fetchwhile/4, fetch/2, drop/2, ack/2, requeue/2, - ackfold/4, fold/3, len/1, is_empty/1, depth/1, + ackfold/4, len/1, is_empty/1, depth/1, update_rates/1, needs_timeout/1, timeout/1, handle_pre_hibernate/1, resume/1, msg_rates/1, - info/2, invoke/3, is_duplicate/2, set_queue_mode/2, - set_queue_version/2, zip_msgs_and_acks/4, + info/2, invoke/3, is_duplicate/2, + zip_msgs_and_acks/4, format_state/1]). -export([start/2, stop/1]). -%% This function is used by rabbit_classic_queue_index_v2 -%% to convert v1 queues to v2 after an upgrade to 4.0. --export([convert_from_v1_to_v2_loop/8]). - %% exported for testing only --export([start_msg_store/3, stop_msg_store/1, init/5]). +-export([start_msg_store/3, stop_msg_store/1]). -include("mc.hrl"). -include_lib("stdlib/include/qlc.hrl"). @@ -123,56 +119,19 @@ %% message in the queue (effectively marking all messages as %% delivered, like the v1 index was doing). %% -%% Previous versions of classic queues had a much more complex -%% way of working. Messages were categorized into four groups, -%% and remnants of these terms remain in the code at the time -%% of writing: -%% -%% alpha: this is a message where both the message itself, and its -%% position within the queue are held in RAM -%% -%% beta: this is a message where the message itself is only held on -%% disk (if persisted to the message store) but its position -%% within the queue is held in RAM. -%% -%% gamma: this is a message where the message itself is only held on -%% disk, but its position is both in RAM and on disk. -%% -%% delta: this is a collection of messages, represented by a single -%% term, where the messages and their position are only held on -%% disk. -%% -%% Messages may have been stored in q1, q2, delta, q3 or q4 depending -%% on their location in the queue. The current version of classic -%% queues only use delta (on-disk, for the tail of the queue) or -%% q3 (in-memory, head of the queue). Messages used to move from -%% q1 -> q2 -> delta -> q3 -> q4 (and sometimes q3 -> delta or -%% q4 -> delta to reduce memory use). Now messages only move -%% from delta to q3. Full details on the old mechanisms can be -%% found in previous versions of this file (such as the 3.11 version). -%% -%% In the current version of classic queues, there is no distinction -%% between default and lazy queues. The current behavior is close to -%% lazy queues, except we avoid some write to disks when queues are -%% empty. %%---------------------------------------------------------------------------- -behaviour(rabbit_backing_queue). -record(vqstate, - { q1, %% Unused. - q2, %% Unused. - delta, - q3, - q4, %% Unused. + { q_head, + q_tail, next_seq_id, %% seq_id() of first undelivered message %% everything before this seq_id() was delivered at least once next_deliver_seq_id, ram_pending_ack, %% msgs still in RAM disk_pending_ack, %% msgs in store, paged out - qi_pending_ack, %% Unused. - index_mod, %% Unused. index_state, store_state, msg_store_clients, @@ -180,14 +139,11 @@ transient_threshold, qi_embed_msgs_below, - len, %% w/o unacked @todo No longer needed, is delta+q3. bytes, %% w/o unacked unacked_bytes, persistent_count, %% w unacked persistent_bytes, %% w unacked - delta_transient_bytes, %% - target_ram_count, ram_msg_count, %% w/o unacked ram_msg_count_prev, ram_ack_count_prev, @@ -196,7 +152,7 @@ in_counter, rates, %% There are two confirms paths: either store/index produce confirms - %% separately (v1 and v2 with per-vhost message store) or the confirms + %% separately (v2 with per-vhost message store) or the confirms %% are produced all at once while syncing/flushing (v2 with per-queue %% message store). The latter is more efficient as it avoids many %% sets operations. @@ -211,11 +167,6 @@ disk_read_count, disk_write_count, - io_batch_size, %% Unused. - - %% default queue or lazy queue - mode, %% Unused. - version = 2, %% Unused. %% Fast path for confirms handling. Instead of having %% index/store keep track of confirms separately and %% doing intersect/subtract/union we just put the messages @@ -233,7 +184,6 @@ -type msg_location() :: memory | rabbit_msg_store - | rabbit_queue_index | rabbit_classic_queue_store_v2:msg_location(). -export_type([msg_location/0]). @@ -243,20 +193,18 @@ msg, is_persistent, is_delivered, - msg_location, %% ?IN_SHARED_STORE | ?IN_QUEUE_STORE | ?IN_QUEUE_INDEX | ?IN_MEMORY + msg_location, %% ?IN_SHARED_STORE | ?IN_QUEUE_STORE | ?IN_MEMORY index_on_disk, persist_to, msg_props }). --record(delta, +-record(q_tail, { start_seq_id, %% start_seq_id is inclusive count, - transient, end_seq_id %% end_seq_id is exclusive }). --define(HEADER_GUESS_SIZE, 100). %% see determine_persist_to/2 -define(PERSISTENT_MSG_STORE, msg_store_persistent). -define(TRANSIENT_MSG_STORE, msg_store_transient). @@ -264,7 +212,6 @@ -define(IN_SHARED_STORE, rabbit_msg_store). -define(IN_QUEUE_STORE, {rabbit_classic_queue_store_v2, _, _}). --define(IN_QUEUE_INDEX, rabbit_queue_index). -define(IN_MEMORY, memory). -include_lib("rabbit_common/include/rabbit.hrl"). @@ -282,9 +229,9 @@ ack_out :: float(), timestamp :: rabbit_types:timestamp()}. --type delta() :: #delta { start_seq_id :: non_neg_integer(), - count :: non_neg_integer(), - end_seq_id :: non_neg_integer() }. +-type q_tail() :: #q_tail { start_seq_id :: non_neg_integer(), + count :: non_neg_integer(), + end_seq_id :: non_neg_integer() }. %% The compiler (rightfully) complains that ack() and state() are %% unused. For this reason we duplicate a -spec from @@ -294,16 +241,12 @@ %% these here for documentation purposes. -type ack() :: seq_id(). -type state() :: #vqstate { - q1 :: ?QUEUE:?QUEUE(), - q2 :: ?QUEUE:?QUEUE(), - delta :: delta(), - q3 :: ?QUEUE:?QUEUE(), - q4 :: ?QUEUE:?QUEUE(), + q_head :: ?QUEUE:?QUEUE(), + q_tail :: q_tail(), next_seq_id :: seq_id(), next_deliver_seq_id :: seq_id(), ram_pending_ack :: map(), disk_pending_ack :: map(), - qi_pending_ack :: undefined, index_state :: any(), store_state :: any(), msg_store_clients :: 'undefined' | {{any(), binary()}, @@ -312,14 +255,11 @@ transient_threshold :: non_neg_integer(), qi_embed_msgs_below :: non_neg_integer(), - len :: non_neg_integer(), bytes :: non_neg_integer(), unacked_bytes :: non_neg_integer(), - persistent_count :: non_neg_integer(), persistent_bytes :: non_neg_integer(), - target_ram_count :: non_neg_integer() | 'infinity', ram_msg_count :: non_neg_integer(), ram_msg_count_prev :: non_neg_integer(), ram_ack_count_prev :: non_neg_integer(), @@ -336,19 +276,14 @@ disk_read_count :: non_neg_integer(), disk_write_count :: non_neg_integer(), - io_batch_size :: 0, - mode :: 'default' | 'lazy', - version :: 2, unconfirmed_simple :: sets:set()}. --define(BLANK_DELTA, #delta { start_seq_id = undefined, - count = 0, - transient = 0, - end_seq_id = undefined }). --define(BLANK_DELTA_PATTERN(Z), #delta { start_seq_id = Z, - count = 0, - transient = 0, - end_seq_id = Z }). +-define(BLANK_Q_TAIL, #q_tail { start_seq_id = undefined, + count = 0, + end_seq_id = undefined }). +-define(BLANK_Q_TAIL_PATTERN(Z), #q_tail { start_seq_id = Z, + count = 0, + end_seq_id = Z }). -define(MICROS_PER_SECOND, 1000000.0). @@ -367,7 +302,6 @@ %%---------------------------------------------------------------------------- start(VHost, DurableQueues) -> - %% The v2 index walker function covers both v1 and v2 index files. {AllTerms, StartFunState} = rabbit_classic_queue_index_v2:start(VHost, DurableQueues), %% Group recovery terms by vhost. ClientRefs = [Ref || Terms <- AllTerms, @@ -412,19 +346,16 @@ stop_msg_store(VHost) -> ok. init(Queue, Recover, Callback) -> - init( + init1( Queue, Recover, fun (MsgIds, ActionTaken) -> msgs_written_to_disk(Callback, MsgIds, ActionTaken) - end, - fun (MsgIds) -> msg_indices_written_to_disk(Callback, MsgIds) end, - fun (MsgIds) -> msgs_and_indices_written_to_disk(Callback, MsgIds) end). + end). -init(Q, new, MsgOnDiskFun, MsgIdxOnDiskFun, MsgAndIdxOnDiskFun) when ?is_amqqueue(Q) -> +init1(Q, new, MsgOnDiskFun) when ?is_amqqueue(Q) -> QueueName = amqqueue:get_name(Q), IsDurable = amqqueue:is_durable(Q), - IndexState = rabbit_classic_queue_index_v2:init(QueueName, - MsgIdxOnDiskFun, MsgAndIdxOnDiskFun), + IndexState = rabbit_classic_queue_index_v2:init(QueueName), StoreState = rabbit_classic_queue_store_v2:init(QueueName), VHost = QueueName#resource.virtual_host, init(IsDurable, IndexState, StoreState, 0, 0, [], @@ -437,7 +368,7 @@ init(Q, new, MsgOnDiskFun, MsgIdxOnDiskFun, MsgAndIdxOnDiskFun) when ?is_amqqueu VHost), VHost); %% We can be recovering a transient queue if it crashed -init(Q, Terms, MsgOnDiskFun, MsgIdxOnDiskFun, MsgAndIdxOnDiskFun) when ?is_amqqueue(Q) -> +init1(Q, Terms, MsgOnDiskFun) when ?is_amqqueue(Q) -> QueueName = amqqueue:get_name(Q), IsDurable = amqqueue:is_durable(Q), {PRef, RecoveryTerms} = process_recovery_terms(Terms), @@ -455,17 +386,16 @@ init(Q, Terms, MsgOnDiskFun, MsgIdxOnDiskFun, MsgAndIdxOnDiskFun) when ?is_amqqu end, TransientClient = msg_store_client_init(?TRANSIENT_MSG_STORE, undefined, VHost), - {DeltaCount, DeltaBytes, IndexState} = + {DiskCount, DiskBytes, IndexState} = rabbit_classic_queue_index_v2:recover( QueueName, RecoveryTerms, rabbit_vhost_msg_store:successfully_recovered_state( VHost, ?PERSISTENT_MSG_STORE), - ContainsCheckFun, MsgIdxOnDiskFun, MsgAndIdxOnDiskFun, - main), + ContainsCheckFun), StoreState = rabbit_classic_queue_store_v2:init(QueueName), init(IsDurable, IndexState, StoreState, - DeltaCount, DeltaBytes, RecoveryTerms, + DiskCount, DiskBytes, RecoveryTerms, PersistentClient, TransientClient, VHost). process_recovery_terms(Terms=non_clean_shutdown) -> @@ -522,12 +452,12 @@ delete_crashed(Q) when ?is_amqqueue(Q) -> QName = amqqueue:get_name(Q), ok = rabbit_classic_queue_index_v2:erase(QName). -purge(State = #vqstate { len = Len }) -> +purge(State) -> case is_pending_ack_empty(State) and is_unconfirmed_empty(State) of true -> - {Len, purge_and_index_reset(State)}; + {len(State), purge_and_index_reset(State)}; false -> - {Len, purge_when_pending_acks(State)} + {len(State), purge_when_pending_acks(State)} end. purge_acks(State) -> a(purge_pending_ack(false, State)). @@ -631,23 +561,18 @@ ack(AckTags, State) -> store_state = StoreState, ack_out_counter = AckOutCount + length(AckTags) })}. -requeue(AckTags, #vqstate { delta = Delta, - q3 = Q3, - in_counter = InCounter, - len = Len } = State) -> - %% @todo This can be heavily simplified: if the message falls into delta, - %% add it there. Otherwise just add it to q3 in the correct position. - {SeqIds, Q3a, MsgIds, State1} = requeue_merge(lists:sort(AckTags), Q3, [], - delta_limit(Delta), State), - {Delta1, MsgIds1, State2} = delta_merge(SeqIds, Delta, MsgIds, - State1), +requeue(AckTags, #vqstate { q_head = QHead0, + q_tail = QTail, + in_counter = InCounter } = State) -> + {SeqIds, QHead, MsgIds, State1} = requeue_merge(lists:sort(AckTags), QHead0, [], + q_tail_limit(QTail), State), + {QTail1, MsgIds1, State2} = q_tail_merge(SeqIds, QTail, MsgIds, State1), MsgCount = length(MsgIds1), {MsgIds1, a( - maybe_update_rates(ui( - State2 #vqstate { delta = Delta1, - q3 = Q3a, - in_counter = InCounter + MsgCount, - len = Len + MsgCount })))}. + maybe_update_rates( + State2 #vqstate { q_head = QHead, + q_tail = QTail1, + in_counter = InCounter + MsgCount }))}. ackfold(MsgFun, Acc, State, AckTags) -> {AccN, StateN} = @@ -658,14 +583,8 @@ ackfold(MsgFun, Acc, State, AckTags) -> end, {Acc, State}, AckTags), {AccN, a(StateN)}. -fold(Fun, Acc, State = #vqstate{index_state = IndexState}) -> - {Its, IndexState1} = lists:foldl(fun inext/2, {[], IndexState}, - [msg_iterator(State), - disk_ack_iterator(State), - ram_ack_iterator(State)]), - ifold(Fun, Acc, Its, State#vqstate{index_state = IndexState1}). - -len(#vqstate { len = Len }) -> Len. +len(#vqstate { q_head = QHead, q_tail = #q_tail{ count = QTailCount }}) -> + ?QUEUE:len(QHead) + QTailCount. is_empty(State) -> 0 == len(State). @@ -719,30 +638,39 @@ needs_timeout(#vqstate { index_state = IndexState, {false, true} -> false end. -timeout(State = #vqstate { index_state = IndexState0, - store_state = StoreState0, - unconfirmed_simple = UCS, - confirmed = C }) -> - IndexState = rabbit_classic_queue_index_v2:sync(IndexState0), - StoreState = rabbit_classic_queue_store_v2:sync(StoreState0), - State #vqstate { index_state = IndexState, - store_state = StoreState, - unconfirmed_simple = sets:new([{version,2}]), - confirmed = sets:union(C, UCS) }. - -handle_pre_hibernate(State = #vqstate { index_state = IndexState0, - store_state = StoreState0, - msg_store_clients = MSCState0, - unconfirmed_simple = UCS, - confirmed = C }) -> +timeout(State) -> + sync(State). + +handle_pre_hibernate(State = #vqstate{ msg_store_clients = MSCState0 }) -> MSCState = msg_store_pre_hibernate(MSCState0), - IndexState = rabbit_classic_queue_index_v2:flush(IndexState0), + sync(State#vqstate{ msg_store_clients = MSCState }). + +sync(State = #vqstate { index_state = IndexState0, + store_state = StoreState0, + unconfirmed_simple = UCS, + confirmed = C }) -> + {MsgIdSet, IndexState} = rabbit_classic_queue_index_v2:sync(IndexState0), StoreState = rabbit_classic_queue_store_v2:sync(StoreState0), - State #vqstate { index_state = IndexState, - store_state = StoreState, - msg_store_clients = MSCState, - unconfirmed_simple = sets:new([{version,2}]), - confirmed = sets:union(C, UCS) }. + State1 = State #vqstate { index_state = IndexState, + store_state = StoreState, + unconfirmed_simple = sets:new([{version,2}]), + confirmed = sets:union(C, UCS) }, + index_synced(MsgIdSet, State1). + +index_synced(MsgIdSet, State = #vqstate{ + msgs_on_disk = MOD, + msg_indices_on_disk = MIOD, + unconfirmed = UC }) -> + case sets:is_empty(MsgIdSet) of + true -> + State; + false -> + Confirmed = sets:intersection(UC, MsgIdSet), + record_confirms(sets:intersection(MsgIdSet, MOD), + State #vqstate { + msg_indices_on_disk = + sets:union(MIOD, Confirmed) }) + end. resume(State) -> a(timeout(State)). @@ -758,8 +686,6 @@ info(messages_ram, State) -> info(messages_ready_ram, State) + info(messages_unacknowledged_ram, State); info(messages_persistent, #vqstate{persistent_count = PersistentCount}) -> PersistentCount; -info(messages_paged_out, #vqstate{delta = #delta{transient = Count}}) -> - Count; info(message_bytes, #vqstate{bytes = Bytes, unacked_bytes = UBytes}) -> Bytes + UBytes; @@ -771,25 +697,21 @@ info(message_bytes_ram, #vqstate{ram_bytes = RamBytes}) -> RamBytes; info(message_bytes_persistent, #vqstate{persistent_bytes = PersistentBytes}) -> PersistentBytes; -info(message_bytes_paged_out, #vqstate{delta_transient_bytes = PagedOutBytes}) -> - PagedOutBytes; info(head_message_timestamp, #vqstate{ - q3 = Q3, - ram_pending_ack = RPA}) -> - head_message_timestamp(Q3, RPA); + q_head = QHead, + ram_pending_ack = RPA}) -> + head_message_timestamp(QHead, RPA); info(oldest_message_received_timestamp, #vqstate{ - q3 = Q3, - ram_pending_ack = RPA}) -> - oldest_message_received_timestamp(Q3, RPA); + q_head = QHead, + ram_pending_ack = RPA}) -> + oldest_message_received_timestamp(QHead, RPA); info(disk_reads, #vqstate{disk_read_count = Count}) -> Count; info(disk_writes, #vqstate{disk_write_count = Count}) -> Count; -info(backing_queue_status, #vqstate { - delta = Delta, q3 = Q3, - mode = Mode, - len = Len, - target_ram_count = TargetRamCount, +info(backing_queue_status, State = #vqstate { + q_head = QHead, + q_tail = QTail, next_seq_id = NextSeqId, next_deliver_seq_id = NextDeliverSeqId, ram_pending_ack = RPA, @@ -802,15 +724,10 @@ info(backing_queue_status, #vqstate { out = AvgEgressRate, ack_in = AvgAckIngressRate, ack_out = AvgAckEgressRate }}) -> - [ {mode , Mode}, - {version , 2}, - {q1 , 0}, - {q2 , 0}, - {delta , Delta}, - {q3 , ?QUEUE:len(Q3)}, - {q4 , 0}, - {len , Len}, - {target_ram_count , TargetRamCount}, + [ {version , 2}, + {q_head , ?QUEUE:len(QHead)}, + {q_tail , QTail}, + {len , len(State)}, {next_seq_id , NextSeqId}, {next_deliver_seq_id , NextDeliverSeqId}, {num_pending_acks , map_size(RPA) + map_size(DPA)}, @@ -829,74 +746,12 @@ invoke( _, _, State) -> State. is_duplicate(_Msg, State) -> {false, State}. -%% Queue mode has been unified. -set_queue_mode(_, State) -> - State. - zip_msgs_and_acks(Msgs, AckTags, Accumulator, _State) -> lists:foldl(fun ({{Msg, _Props}, AckTag}, Acc) -> Id = mc:get_annotation(id, Msg), [{Id, AckTag} | Acc] end, Accumulator, lists:zip(Msgs, AckTags)). -%% Queue version now ignored; only v2 is available. -set_queue_version(_, State) -> - State. - -%% This function is used by rabbit_classic_queue_index_v2 -%% to convert v1 queues to v2 after an upgrade to 4.0. -convert_from_v1_to_v2_loop(_, _, V2Index, V2Store, _, HiSeqId, HiSeqId, _) -> - {V2Index, V2Store}; -convert_from_v1_to_v2_loop(QueueName, V1Index0, V2Index0, V2Store0, - Counters = {CountersRef, CountIx, BytesIx}, - LoSeqId, HiSeqId, SkipFun) -> - UpSeqId = lists:min([rabbit_queue_index:next_segment_boundary(LoSeqId), - HiSeqId]), - {Messages, V1Index} = rabbit_queue_index:read(LoSeqId, UpSeqId, V1Index0), - %% We do a garbage collect immediately after the old index read - %% because that may have created a lot of garbage. - garbage_collect(), - {V2Index3, V2Store3} = lists:foldl(fun - %% Move embedded messages to the per-queue store. - ({Msg, SeqId, rabbit_queue_index, Props, IsPersistent}, - {V2Index1, V2Store1}) -> - MsgId = mc:get_annotation(id, Msg), - {MsgLocation, V2Store2} = rabbit_classic_queue_store_v2:write(SeqId, Msg, Props, V2Store1), - V2Index2 = case SkipFun(SeqId, V2Index1) of - {skip, V2Index1a} -> - V2Index1a; - {write, V2Index1a} -> - counters:add(CountersRef, CountIx, 1), - counters:add(CountersRef, BytesIx, Props#message_properties.size), - rabbit_classic_queue_index_v2:publish(MsgId, SeqId, MsgLocation, Props, IsPersistent, infinity, V2Index1a) - end, - {V2Index2, V2Store2}; - %% Keep messages in the per-vhost store where they are. - ({MsgId, SeqId, rabbit_msg_store, Props, IsPersistent}, - {V2Index1, V2Store1}) -> - V2Index2 = case SkipFun(SeqId, V2Index1) of - {skip, V2Index1a} -> - V2Index1a; - {write, V2Index1a} -> - counters:add(CountersRef, CountIx, 1), - counters:add(CountersRef, BytesIx, Props#message_properties.size), - rabbit_classic_queue_index_v2:publish(MsgId, SeqId, rabbit_msg_store, Props, IsPersistent, infinity, V2Index1a) - end, - {V2Index2, V2Store1} - end, {V2Index0, V2Store0}, Messages), - %% Flush to disk to avoid keeping too much in memory between segments. - V2Index = rabbit_classic_queue_index_v2:flush(V2Index3), - V2Store = rabbit_classic_queue_store_v2:sync(V2Store3), - %% We have written everything to disk. We can delete the old segment file - %% to free up much needed space, to avoid doubling disk usage during the upgrade. - rabbit_queue_index:delete_segment_file_for_seq_id(LoSeqId, V1Index), - %% Log some progress to keep the user aware of what's going on, as moving - %% embedded messages can take quite some time. - #resource{virtual_host = VHost, name = Name} = QueueName, - ?LOG_INFO("Queue ~ts in vhost ~ts converted ~b messages from v1 to v2", - [Name, VHost, length(Messages)]), - convert_from_v1_to_v2_loop(QueueName, V1Index, V2Index, V2Store, Counters, UpSeqId, HiSeqId, SkipFun). - %% Get the Timestamp property of the first msg, if present. This is %% the one with the oldest timestamp among the heads of the pending %% acks and unread queues. We can't check disk_pending_acks as these @@ -905,10 +760,10 @@ convert_from_v1_to_v2_loop(QueueName, V1Index0, V2Index0, V2Store0, %% regarded as unprocessed until acked, this also prevents the result %% apparently oscillating during repeated rejects. %% -head_message_timestamp(Q3, RPA) -> +head_message_timestamp(QHead, RPA) -> HeadMsgs = [ HeadMsgStatus#msg_status.msg || HeadMsgStatus <- - [ get_q_head(Q3), + [ get_q_head(QHead), get_pa_head(RPA) ], HeadMsgStatus /= undefined, HeadMsgStatus#msg_status.msg /= undefined ], @@ -925,10 +780,10 @@ head_message_timestamp(Q3, RPA) -> false -> lists:min(Timestamps) end. -oldest_message_received_timestamp(Q3, RPA) -> +oldest_message_received_timestamp(QHead, RPA) -> HeadMsgs = [ HeadMsgStatus#msg_status.msg || HeadMsgStatus <- - [ get_q_head(Q3), + [ get_q_head(QHead), get_pa_head(RPA) ], HeadMsgStatus /= undefined, HeadMsgStatus#msg_status.msg /= undefined ], @@ -956,40 +811,27 @@ get_pa_head(PA) -> map_get(Smallest, PA) end. -a(State = #vqstate { delta = Delta, q3 = Q3, - len = Len, - bytes = Bytes, +a(State = #vqstate { bytes = Bytes, unacked_bytes = UnackedBytes, persistent_count = PersistentCount, persistent_bytes = PersistentBytes, ram_msg_count = RamMsgCount, ram_bytes = RamBytes}) -> - ED = Delta#delta.count == 0, - E3 = ?QUEUE:is_empty(Q3), - LZ = Len == 0, - L3 = ?QUEUE:len(Q3), - %% if the queue is empty, then delta is empty and q3 is empty. - true = LZ == (ED and E3), - - %% There should be no messages in q1, q2, and q4 - true = Delta#delta.count + L3 == Len, - - true = Len >= 0, true = Bytes >= 0, true = UnackedBytes >= 0, true = PersistentCount >= 0, true = PersistentBytes >= 0, true = RamMsgCount >= 0, - true = RamMsgCount =< Len, + %% Requeues may lead to RamMsgCount > 2048. true = RamBytes >= 0, true = RamBytes =< Bytes + UnackedBytes, State. -d(Delta = #delta { start_seq_id = Start, count = Count, end_seq_id = End }) +qt(QTail = #q_tail { start_seq_id = Start, count = Count, end_seq_id = End }) when Start + Count =< End -> - Delta. + QTail. m(MsgStatus = #msg_status { is_persistent = IsPersistent, msg_location = MsgLocation, @@ -1016,13 +858,13 @@ msg_status(IsPersistent, IsDelivered, SeqId, is_delivered = IsDelivered, msg_location = memory, index_on_disk = false, - persist_to = determine_persist_to(Msg, MsgProps, IndexMaxSize), + persist_to = determine_persist_to(Msg, IndexMaxSize), msg_props = MsgProps}. -beta_msg_status({MsgId, SeqId, MsgLocation, MsgProps, IsPersistent}) +msg_status({MsgId, SeqId, MsgLocation, MsgProps, IsPersistent}) when is_binary(MsgId) orelse MsgId =:= undefined -> - MS0 = beta_msg_status0(SeqId, MsgProps, IsPersistent), + MS0 = msg_status0(SeqId, MsgProps, IsPersistent), MS0#msg_status{msg_id = MsgId, msg = undefined, persist_to = case is_tuple(MsgLocation) of @@ -1030,22 +872,18 @@ beta_msg_status({MsgId, SeqId, MsgLocation, MsgProps, IsPersistent}) false -> msg_store end, msg_location = MsgLocation}; -beta_msg_status({Msg, SeqId, MsgLocation, MsgProps, IsPersistent}) -> +msg_status({Msg, SeqId, MsgLocation, MsgProps, IsPersistent}) -> MsgId = mc:get_annotation(id, Msg), - MS0 = beta_msg_status0(SeqId, MsgProps, IsPersistent), + MS0 = msg_status0(SeqId, MsgProps, IsPersistent), MS0#msg_status{msg_id = MsgId, msg = Msg, persist_to = case MsgLocation of - rabbit_queue_index -> queue_index; {rabbit_classic_queue_store_v2, _, _} -> queue_store; rabbit_msg_store -> msg_store end, - msg_location = case MsgLocation of - rabbit_queue_index -> memory; - _ -> MsgLocation - end}. + msg_location = MsgLocation}. -beta_msg_status0(SeqId, MsgProps, IsPersistent) -> +msg_status0(SeqId, MsgProps, IsPersistent) -> #msg_status{seq_id = SeqId, msg = undefined, is_persistent = IsPersistent, @@ -1102,36 +940,6 @@ msg_store_remove(MSCState, IsPersistent, MsgIds) -> rabbit_msg_store:remove(MsgIds, MCSState1) end). -betas_from_index_entries(List, TransientThreshold, DelsAndAcksFun, State = #vqstate{ next_deliver_seq_id = NextDeliverSeqId0 }) -> - {Filtered, NextDeliverSeqId, Acks, RamReadyCount, RamBytes, TransientCount, TransientBytes} = - lists:foldr( - fun ({_MsgOrId, SeqId, _MsgLocation, _MsgProps, IsPersistent} = M, - {Filtered1, NextDeliverSeqId1, Acks1, RRC, RB, TC, TB} = Acc) -> - case SeqId < TransientThreshold andalso not IsPersistent of - true -> {Filtered1, - next_deliver_seq_id(SeqId, NextDeliverSeqId1), - [SeqId | Acks1], RRC, RB, TC, TB}; - false -> MsgStatus = m(beta_msg_status(M)), - HaveMsg = msg_in_ram(MsgStatus), - Size = msg_size(MsgStatus), - case is_msg_in_pending_acks(SeqId, State) of - false -> {?QUEUE:in_r(MsgStatus, Filtered1), - NextDeliverSeqId1, Acks1, - RRC + one_if(HaveMsg), - RB + one_if(HaveMsg) * Size, - TC + one_if(not IsPersistent), - TB + one_if(not IsPersistent) * Size}; - true -> Acc %% [0] - end - end - end, {?QUEUE:new(), NextDeliverSeqId0, [], 0, 0, 0, 0}, List), - {Filtered, RamReadyCount, RamBytes, DelsAndAcksFun(NextDeliverSeqId, Acks, State), - TransientCount, TransientBytes}. -%% [0] We don't increase RamBytes here, even though it pertains to -%% unacked messages too, since if HaveMsg then the message must have -%% been stored in the QI, thus the message must have been in -%% qi_pending_ack, thus it must already have been in RAM. - %% We increase the next_deliver_seq_id only when the next %% message (next seq_id) was delivered. next_deliver_seq_id(SeqId, NextDeliverSeqId) @@ -1145,34 +953,24 @@ is_msg_in_pending_acks(SeqId, #vqstate { ram_pending_ack = RPA, maps:is_key(SeqId, RPA) orelse maps:is_key(SeqId, DPA). -expand_delta(SeqId, ?BLANK_DELTA_PATTERN(X), IsPersistent) -> - d(#delta { start_seq_id = SeqId, count = 1, end_seq_id = SeqId + 1, - transient = one_if(not IsPersistent)}); -expand_delta(SeqId, #delta { start_seq_id = StartSeqId, - count = Count, - transient = Transient } = Delta, - IsPersistent ) +expand_q_tail(SeqId, ?BLANK_Q_TAIL_PATTERN(X)) -> + qt(#q_tail{ start_seq_id = SeqId, count = 1, end_seq_id = SeqId + 1 }); +expand_q_tail(SeqId, #q_tail{ start_seq_id = StartSeqId, + count = Count } = QTail) when SeqId < StartSeqId -> - d(Delta #delta { start_seq_id = SeqId, count = Count + 1, - transient = Transient + one_if(not IsPersistent)}); -expand_delta(SeqId, #delta { count = Count, - end_seq_id = EndSeqId, - transient = Transient } = Delta, - IsPersistent) + qt(QTail #q_tail{ start_seq_id = SeqId, count = Count + 1 }); +expand_q_tail(SeqId, #q_tail{ count = Count, + end_seq_id = EndSeqId } = QTail) when SeqId >= EndSeqId -> - d(Delta #delta { count = Count + 1, end_seq_id = SeqId + 1, - transient = Transient + one_if(not IsPersistent)}); -expand_delta(_SeqId, #delta { count = Count, - transient = Transient } = Delta, - IsPersistent ) -> - d(Delta #delta { count = Count + 1, - transient = Transient + one_if(not IsPersistent) }). + qt(QTail #q_tail{ count = Count + 1, end_seq_id = SeqId + 1 }); +expand_q_tail(_SeqId, #q_tail{ count = Count } = QTail) -> + qt(QTail #q_tail{ count = Count + 1 }). %%---------------------------------------------------------------------------- %% Internal major helpers for Public API %%---------------------------------------------------------------------------- -init(IsDurable, IndexState, StoreState, DeltaCount, DeltaBytes, Terms, +init(IsDurable, IndexState, StoreState, DiskCount, DiskBytes, Terms, PersistentClient, TransientClient, VHost) -> NextSeqIdHint = case Terms of @@ -1182,36 +980,32 @@ init(IsDurable, IndexState, StoreState, DeltaCount, DeltaBytes, Terms, {LowSeqId, HiSeqId, IndexState1} = rabbit_classic_queue_index_v2:bounds(IndexState, NextSeqIdHint), - {NextSeqId, NextDeliverSeqId, DeltaCount1, DeltaBytes1} = + {NextSeqId, NextDeliverSeqId, DiskCount1, DiskBytes1} = case Terms of - non_clean_shutdown -> {HiSeqId, HiSeqId, DeltaCount, DeltaBytes}; + non_clean_shutdown -> {HiSeqId, HiSeqId, DiskCount, DiskBytes}; _ -> NextSeqId0 = proplists:get_value(next_seq_id, Terms, HiSeqId), {NextSeqId0, proplists:get_value(next_deliver_seq_id, Terms, NextSeqId0), proplists:get_value(persistent_count, - Terms, DeltaCount), + Terms, DiskCount), proplists:get_value(persistent_bytes, - Terms, DeltaBytes)} + Terms, DiskBytes)} end, - Delta = case DeltaCount1 == 0 andalso DeltaCount /= undefined of - true -> ?BLANK_DELTA; - false -> d(#delta { start_seq_id = LowSeqId, - count = DeltaCount1, - transient = 0, - end_seq_id = NextSeqId }) + QTail = case DiskCount1 == 0 andalso DiskCount /= undefined of + true -> ?BLANK_Q_TAIL; + false -> qt(#q_tail { start_seq_id = LowSeqId, + count = DiskCount1, + end_seq_id = NextSeqId }) end, Now = erlang:monotonic_time(), {ok, IndexMaxSize} = application:get_env( rabbit, queue_index_embed_msgs_below), State = #vqstate { - q1 = ?QUEUE:new(), - q2 = ?QUEUE:new(), - delta = Delta, - q3 = ?QUEUE:new(), - q4 = ?QUEUE:new(), + q_head = ?QUEUE:new(), + q_tail = QTail, next_seq_id = NextSeqId, next_deliver_seq_id = NextDeliverSeqId, ram_pending_ack = #{}, @@ -1223,13 +1017,10 @@ init(IsDurable, IndexState, StoreState, DeltaCount, DeltaBytes, Terms, transient_threshold = NextSeqId, qi_embed_msgs_below = IndexMaxSize, - len = DeltaCount1, - persistent_count = DeltaCount1, - bytes = DeltaBytes1, - persistent_bytes = DeltaBytes1, - delta_transient_bytes = 0, + persistent_count = DiskCount1, + bytes = DiskBytes1, + persistent_bytes = DiskBytes1, - target_ram_count = infinity, ram_msg_count = 0, ram_msg_count_prev = 0, ram_ack_count_prev = 0, @@ -1248,11 +1039,8 @@ init(IsDurable, IndexState, StoreState, DeltaCount, DeltaBytes, Terms, disk_read_count = 0, disk_write_count = 0, - io_batch_size = 0, - - mode = default, virtual_host = VHost}, - a(maybe_deltas_to_betas(State)). + a(read_from_q_tail(State)). blank_rates(Now) -> #rates { in = 0.0, @@ -1261,11 +1049,11 @@ blank_rates(Now) -> ack_out = 0.0, timestamp = Now}. -in_r(MsgStatus = #msg_status {}, State = #vqstate { q3 = Q3 }) -> - State #vqstate { q3 = ?QUEUE:in_r(MsgStatus, Q3) }. +in_r(MsgStatus = #msg_status {}, State = #vqstate { q_head = QHead }) -> + State #vqstate { q_head = ?QUEUE:in_r(MsgStatus, QHead) }. queue_out(State) -> - case fetch_from_q3(State) of + case fetch_from_q_head(State) of {empty, _State1} = Result -> Result; {loaded, {MsgStatus, State1}} -> {{value, set_deliver_flag(State, MsgStatus)}, State1} end. @@ -1309,21 +1097,20 @@ read_msg(_, MsgId, IsPersistent, rabbit_msg_store, State = #vqstate{msg_store_cl %% When publishing to memory, transient messages do not get written to disk. %% On the other hand, persistent messages are kept in memory as well as disk. stats_published_memory(MS = #msg_status{is_persistent = true}, St) -> - St#vqstate{?UP(len, ram_msg_count, persistent_count, +1), + St#vqstate{?UP(ram_msg_count, persistent_count, +1), ?UP(bytes, ram_bytes, persistent_bytes, +msg_size(MS))}; stats_published_memory(MS = #msg_status{is_persistent = false}, St) -> - St#vqstate{?UP(len, ram_msg_count, +1), + St#vqstate{?UP(ram_msg_count, +1), ?UP(bytes, ram_bytes, +msg_size(MS))}. %% Messages published directly to disk are not kept in memory. stats_published_disk(MS = #msg_status{is_persistent = true}, St) -> - St#vqstate{?UP(len, persistent_count, +1), + St#vqstate{?UP(persistent_count, +1), ?UP(bytes, persistent_bytes, +msg_size(MS))}; stats_published_disk(MS = #msg_status{is_persistent = false}, St) -> - St#vqstate{?UP(len, +1), - ?UP(bytes, delta_transient_bytes, +msg_size(MS))}. + St#vqstate{?UP(bytes, +msg_size(MS))}. -%% Pending acks do not add to len. Messages are kept in memory. +%% Pending acks messages are kept in memory. stats_published_pending_acks(MS = #msg_status{is_persistent = true}, St) -> St#vqstate{?UP(persistent_count, +1), ?UP(persistent_bytes, unacked_bytes, ram_bytes, +msg_size(MS))}; @@ -1336,26 +1123,23 @@ stats_published_pending_acks(MS = #msg_status{is_persistent = false}, St) -> %% was fully on disk the content will not be read immediately). %% The contents stay where they are during this operation. stats_pending_acks(MS = #msg_status{msg = undefined}, St) -> - St#vqstate{?UP(len, -1), - ?UP(bytes, -msg_size(MS)), ?UP(unacked_bytes, +msg_size(MS))}; + St#vqstate{?UP(bytes, -msg_size(MS)), ?UP(unacked_bytes, +msg_size(MS))}; stats_pending_acks(MS, St) -> - St#vqstate{?UP(len, ram_msg_count, -1), + St#vqstate{?UP(ram_msg_count, -1), ?UP(bytes, -msg_size(MS)), ?UP(unacked_bytes, +msg_size(MS))}. %% Message may or may not be persistent and the contents %% may or may not be in memory. -%% -%% Removal from delta_transient_bytes is done by maybe_deltas_to_betas. stats_removed(MS = #msg_status{is_persistent = true, msg = undefined}, St) -> - St#vqstate{?UP(len, persistent_count, -1), + St#vqstate{?UP(persistent_count, -1), ?UP(bytes, persistent_bytes, -msg_size(MS))}; stats_removed(MS = #msg_status{is_persistent = true}, St) -> - St#vqstate{?UP(len, ram_msg_count, persistent_count, -1), + St#vqstate{?UP(ram_msg_count, persistent_count, -1), ?UP(bytes, ram_bytes, persistent_bytes, -msg_size(MS))}; stats_removed(MS = #msg_status{is_persistent = false, msg = undefined}, St) -> - St#vqstate{?UP(len, -1), ?UP(bytes, -msg_size(MS))}; + St#vqstate{?UP(bytes, -msg_size(MS))}; stats_removed(MS = #msg_status{is_persistent = false}, St) -> - St#vqstate{?UP(len, ram_msg_count, -1), + St#vqstate{?UP(ram_msg_count, -1), ?UP(bytes, ram_bytes, -msg_size(MS))}. %% @todo Very confusing that ram_msg_count is without unacked but ram_bytes is with. @@ -1376,30 +1160,15 @@ stats_acked_pending(MS = #msg_status{is_persistent = false}, St) -> %% Notice that this is the reverse of stats_pending_acks. stats_requeued_memory(MS = #msg_status{msg = undefined}, St) -> - St#vqstate{?UP(len, +1), - ?UP(bytes, +msg_size(MS)), ?UP(unacked_bytes, -msg_size(MS))}; + St#vqstate{?UP(bytes, +msg_size(MS)), ?UP(unacked_bytes, -msg_size(MS))}; stats_requeued_memory(MS, St) -> - St#vqstate{?UP(len, ram_msg_count, +1), + St#vqstate{?UP(ram_msg_count, +1), ?UP(bytes, +msg_size(MS)), ?UP(unacked_bytes, -msg_size(MS))}. -%% @todo For v2 since we don't remove from disk until we ack, we don't need -%% to write to disk again on requeue. If the message falls within delta -%% we can just drop the MsgStatus. Otherwise we just put it in q3 and -%% we don't do any disk writes. -%% -%% For v1 I'm not sure? I don't think we need to write to the index -%% at least, but maybe we need to write the message if not embedded? -%% I don't think we need to... -%% -%% So we don't need to change anything except how we count stats as -%% well as delta stats if the message falls within delta. stats_requeued_disk(MS = #msg_status{is_persistent = true}, St) -> - St#vqstate{?UP(len, +1), - ?UP(bytes, +msg_size(MS)), ?UP(unacked_bytes, -msg_size(MS))}; + St#vqstate{?UP(bytes, +msg_size(MS)), ?UP(unacked_bytes, -msg_size(MS))}; stats_requeued_disk(MS = #msg_status{is_persistent = false}, St) -> - St#vqstate{?UP(len, +1), - ?UP(bytes, delta_transient_bytes, +msg_size(MS)), - ?UP(unacked_bytes, -msg_size(MS))}. + St#vqstate{?UP(unacked_bytes, -msg_size(MS))}. msg_size(#msg_status{msg_props = #message_properties{size = Size}}) -> Size. @@ -1460,7 +1229,6 @@ remove_from_disk(#msg_status { {StoreState0, record_confirms(sets:add_element(MsgId, sets:new([{version,2}])), State)} end; ?IN_QUEUE_STORE -> {rabbit_classic_queue_store_v2:remove(SeqId, StoreState0), State}; - ?IN_QUEUE_INDEX -> {StoreState0, State}; ?IN_MEMORY -> {StoreState0, State} end, StoreState = rabbit_classic_queue_store_v2:delete_segments(DeletedSegments, StoreState1), @@ -1471,7 +1239,7 @@ remove_from_disk(#msg_status { %% This function exists as a way to improve dropwhile/2 %% performance. The idea of having this function is to optimise calls -%% to rabbit_queue_index by batching delivers and acks, instead of +%% to the queue index by batching delivers and acks, instead of %% sending them one by one. %% %% Instead of removing every message as their are popped from the @@ -1513,7 +1281,7 @@ remove_by_predicate(Pred, State = #vqstate {out_counter = OutCount}) -> %% This function exists as a way to improve fetchwhile/4 %% performance. The idea of having this function is to optimise calls -%% to rabbit_queue_index by batching delivers, instead of sending them +%% to the queue index by batching delivers, instead of sending them %% one by one. %% %% Fun is the function passed to fetchwhile/4 that's @@ -1536,7 +1304,7 @@ fetch_by_predicate(Pred, Fun, FetchAcc, %% We try to do here the same as what remove(true, State) does but %% processing several messages at the same time. The idea is to -%% optimize rabbit_queue_index:deliver/2 calls by sending a list of +%% optimize IndexMod:deliver/2 calls by sending a list of %% SeqIds instead of one by one, thus process_queue_entries1 will %% accumulate the required deliveries, will record_pending_ack for %% each message, and will update stats, like remove/2 does. @@ -1589,16 +1357,6 @@ purge_and_index_reset(State) -> State1 = purge1(process_delivers_and_acks_fun(none), State), a(reset_qi_state(State1)). -%% This function removes messages from each of delta and q3. -%% -%% purge_betas_and_deltas/2 loads messages from the queue index, -%% filling up q3. The messages loaded into q3 are removed by calling -%% remove_queue_entries/3 until there are no more messages to be read -%% from the queue index. Messages are read in batches from the queue -%% index. -purge1(AfterFun, State) -> - a(purge_betas_and_deltas(AfterFun, State)). - reset_qi_state(State = #vqstate{ index_state = IndexState0, store_state = StoreState0 }) -> StoreState = rabbit_classic_queue_store_v2:terminate(StoreState0), @@ -1616,18 +1374,25 @@ count_pending_acks(#vqstate { ram_pending_ack = RPA, disk_pending_ack = DPA }) -> map_size(RPA) + map_size(DPA). -%% @todo When doing maybe_deltas_to_betas stats are updated. Then stats +%% This function removes messages from each of q_head and q_tail. +%% +%% It loads messages from the queue index in batches, +%% filling up q_head. The messages loaded into q_head are removed by calling +%% remove_queue_entries/3 until there are no more messages to be read +%% from the queue index. +%% +%% @todo When doing read_from_q_tail stats are updated. Then stats %% are updated again in remove_queue_entries1. All unnecessary since %% we are purging anyway? -purge_betas_and_deltas(DelsAndAcksFun, State) -> +purge1(DelsAndAcksFun, State) -> %% We use the maximum memory limit when purging to get greater performance. MemoryLimit = 2048, - State0 = #vqstate { q3 = Q3 } = maybe_deltas_to_betas(DelsAndAcksFun, State, MemoryLimit, metadata_only), + State0 = #vqstate { q_head = QHead } = read_from_q_tail(DelsAndAcksFun, State, MemoryLimit, metadata_only), - case ?QUEUE:is_empty(Q3) of + case ?QUEUE:is_empty(QHead) of true -> State0; - false -> State1 = remove_queue_entries(Q3, DelsAndAcksFun, State0), - purge_betas_and_deltas(DelsAndAcksFun, State1#vqstate{q3 = ?QUEUE:new()}) + false -> State1 = remove_queue_entries(QHead, DelsAndAcksFun, State0), + purge1(DelsAndAcksFun, State1#vqstate{q_head = ?QUEUE:new()}) end. remove_queue_entries(Q, DelsAndAcksFun, @@ -1679,8 +1444,8 @@ process_delivers_and_acks_fun(_) -> publish1(Msg, MsgProps = #message_properties { needs_confirming = NeedsConfirming }, IsDelivered, _ChPid, PersistFun, - State = #vqstate { q3 = Q3, delta = Delta = #delta { count = DeltaCount }, - len = Len, + State = #vqstate { q_head = QHead, + q_tail = QTail = #q_tail { count = QTailCount }, qi_embed_msgs_below = IndexMaxSize, next_seq_id = SeqId, next_deliver_seq_id = NextDeliverSeqId, @@ -1697,16 +1462,16 @@ publish1(Msg, %% limit is at 1 because the queue process will need to access this message to know %% expiration information. MemoryLimit = min(1 + floor(2 * OutRate), 2048), - State3 = case DeltaCount of - %% Len is the same as Q3Len when DeltaCount =:= 0. - 0 when Len < MemoryLimit -> + QHeadLen = ?QUEUE:len(QHead), + State3 = case QTailCount of + 0 when QHeadLen < MemoryLimit -> {MsgStatus1, State1} = PersistFun(false, false, MsgStatus, State), - State2 = State1 #vqstate { q3 = ?QUEUE:in(m(MsgStatus1), Q3) }, + State2 = State1 #vqstate { q_head = ?QUEUE:in(m(MsgStatus1), QHead) }, stats_published_memory(MsgStatus1, State2); _ -> {MsgStatus1, State1} = PersistFun(true, true, MsgStatus, State), - Delta1 = expand_delta(SeqId, Delta, IsPersistent), - State2 = State1 #vqstate { delta = Delta1 }, + QTail1 = expand_q_tail(SeqId, QTail), + State2 = State1 #vqstate { q_tail = QTail1 }, stats_published_disk(MsgStatus1, State2) end, {UC1, UCS1} = maybe_needs_confirming(NeedsConfirming, persist_to(MsgStatus), @@ -1780,77 +1545,28 @@ maybe_write_msg_to_disk(Force, MsgStatus = #msg_status { queue_store -> {MsgLocation, StoreState} = rabbit_classic_queue_store_v2:write(SeqId, prepare_to_store(Msg), Props, StoreState0), {MsgStatus#msg_status{ msg_location = MsgLocation }, State#vqstate{ store_state = StoreState, - disk_write_count = Count + 1}}; - queue_index -> {MsgStatus, State} + disk_write_count = Count + 1}} end; maybe_write_msg_to_disk(_Force, MsgStatus, State) -> {MsgStatus, State}. -%% Due to certain optimisations made inside -%% rabbit_queue_index:pre_publish/7 we need to have two separate -%% functions for index persistence. This one is only used when paging -%% during memory pressure. We didn't want to modify -%% maybe_write_index_to_disk/3 because that function is used in other -%% places. -maybe_batch_write_index_to_disk(_Force, - MsgStatus = #msg_status { - index_on_disk = true }, State) -> - {MsgStatus, State}; -maybe_batch_write_index_to_disk(Force, - MsgStatus = #msg_status { - msg = Msg, - msg_id = MsgId, - seq_id = SeqId, - is_persistent = IsPersistent, - msg_location = MsgLocation, - msg_props = MsgProps}, - State = #vqstate { - target_ram_count = TargetRamCount, - disk_write_count = DiskWriteCount, - index_state = IndexState}) - when Force orelse IsPersistent -> - {MsgOrId, DiskWriteCount1} = - case persist_to(MsgStatus) of - msg_store -> {MsgId, DiskWriteCount}; - queue_store -> {MsgId, DiskWriteCount}; - queue_index -> {prepare_to_store(Msg), DiskWriteCount + 1} - end, - IndexState1 = rabbit_classic_queue_index_v2:pre_publish( - MsgOrId, SeqId, MsgLocation, MsgProps, - IsPersistent, TargetRamCount, IndexState), - {MsgStatus#msg_status{index_on_disk = true}, - State#vqstate{index_state = IndexState1, - disk_write_count = DiskWriteCount1}}; -maybe_batch_write_index_to_disk(_Force, MsgStatus, State) -> - {MsgStatus, State}. - maybe_write_index_to_disk(_Force, MsgStatus = #msg_status { index_on_disk = true }, State) -> {MsgStatus, State}; maybe_write_index_to_disk(Force, MsgStatus = #msg_status { - msg = Msg, msg_id = MsgId, seq_id = SeqId, is_persistent = IsPersistent, msg_location = MsgLocation, msg_props = MsgProps}, - State = #vqstate{target_ram_count = TargetRamCount, - disk_write_count = DiskWriteCount, - index_state = IndexState}) + State = #vqstate{index_state = IndexState}) when Force orelse IsPersistent -> - {MsgOrId, DiskWriteCount1} = - case persist_to(MsgStatus) of - msg_store -> {MsgId, DiskWriteCount}; - queue_store -> {MsgId, DiskWriteCount}; - queue_index -> {prepare_to_store(Msg), DiskWriteCount + 1} - end, IndexState2 = rabbit_classic_queue_index_v2:publish( - MsgOrId, SeqId, MsgLocation, MsgProps, IsPersistent, - persist_to(MsgStatus) =:= msg_store, TargetRamCount, + MsgId, SeqId, MsgLocation, MsgProps, IsPersistent, + persist_to(MsgStatus) =:= msg_store, IndexState), {MsgStatus#msg_status{index_on_disk = true}, - State#vqstate{index_state = IndexState2, - disk_write_count = DiskWriteCount1}}; + State#vqstate{index_state = IndexState2}}; maybe_write_index_to_disk(_Force, MsgStatus, State) -> {MsgStatus, State}. @@ -1859,45 +1575,13 @@ maybe_write_to_disk(ForceMsg, ForceIndex, MsgStatus, State) -> {MsgStatus1, State1} = maybe_write_msg_to_disk(ForceMsg, MsgStatus, State), maybe_write_index_to_disk(ForceIndex, MsgStatus1, State1). -maybe_prepare_write_to_disk(ForceMsg, ForceIndex0, MsgStatus, State) -> - {MsgStatus1, State1} = maybe_write_msg_to_disk(ForceMsg, MsgStatus, State), - %% We want messages written to the v2 per-queue store to also - %% be written to the index for proper accounting. The situation - %% where a message can be in the store but not in the index can - %% only occur when going through this function (not via maybe_write_to_disk). - ForceIndex = case persist_to(MsgStatus) of - queue_store -> true; - _ -> ForceIndex0 - end, - maybe_batch_write_index_to_disk(ForceIndex, MsgStatus1, State1). - -determine_persist_to(Msg, - #message_properties{size = BodySize}, - IndexMaxSize) -> +determine_persist_to(Msg, IndexMaxSize) -> %% The >= is so that you can set the env to 0 and never persist %% to the index. - %% - %% We want this to be fast, so we avoid size(term_to_binary()) - %% here, or using the term size estimation from truncate.erl, both - %% of which are too slow. So instead, if the message body size - %% goes over the limit then we avoid any other checks. - %% - %% If it doesn't we need to decide if the properties will push - %% it past the limit. If we have the encoded properties (usual - %% case) we can just check their size. If we don't (message came - %% via the direct client), we make a guess based on the number of - %% headers. - - %% @todo We can probably simplify this. - {MetaSize, _BodySize} = mc:size(Msg), - case BodySize >= IndexMaxSize of + {MetaSize, BodySize} = mc:size(Msg), + case MetaSize + BodySize >= IndexMaxSize of true -> msg_store; - false -> - Est = MetaSize + BodySize, - case Est >= IndexMaxSize of - true -> msg_store; - false -> queue_store - end + false -> queue_store end. persist_to(#msg_status{persist_to = To}) -> To. @@ -2022,7 +1706,6 @@ accumulate_ack(#msg_status { seq_id = SeqId, end, case MsgLocation of ?IN_QUEUE_STORE -> [SeqId|SeqIdsInStore]; - ?IN_QUEUE_INDEX -> [SeqId|SeqIdsInStore]; _ -> SeqIdsInStore end, [MsgId | AllMsgIds]}. @@ -2062,8 +1745,6 @@ msgs_written_to_disk(Callback, MsgIdSet, written) -> %% for all message IDs. This is a waste. We should only %% call it for messages that need confirming, and avoid %% this intersection call. - %% - %% The same may apply to msg_indices_written_to_disk as well. Confirmed = sets:intersection(UC, MsgIdSet), record_confirms(sets:intersection(MsgIdSet, MIOD), State #vqstate { @@ -2071,23 +1752,6 @@ msgs_written_to_disk(Callback, MsgIdSet, written) -> sets:union(MOD, Confirmed) }) end). -msg_indices_written_to_disk(Callback, MsgIdSet) -> - Callback(?MODULE, - fun (?MODULE, State = #vqstate { msgs_on_disk = MOD, - msg_indices_on_disk = MIOD, - unconfirmed = UC }) -> - Confirmed = sets:intersection(UC, MsgIdSet), - record_confirms(sets:intersection(MsgIdSet, MOD), - State #vqstate { - msg_indices_on_disk = - sets:union(MIOD, Confirmed) }) - end). - -%% @todo Having to call run_backing_queue is probably reducing performance... -msgs_and_indices_written_to_disk(Callback, MsgIdSet) -> - Callback(?MODULE, - fun (?MODULE, State) -> record_confirms(MsgIdSet, State) end). - %%---------------------------------------------------------------------------- %% Internal plumbing for requeue %%---------------------------------------------------------------------------- @@ -2121,21 +1785,18 @@ requeue_merge(SeqIds, Q, Front, MsgIds, _Limit, State) -> {SeqIds, ?QUEUE:join(Front, Q), MsgIds, State}. -delta_merge([], Delta, MsgIds, State) -> - {Delta, MsgIds, State}; -delta_merge(SeqIds, Delta, MsgIds, State) -> - lists:foldl(fun (SeqId, {Delta0, MsgIds0, State0} = Acc) -> +q_tail_merge([], QTail, MsgIds, State) -> + {QTail, MsgIds, State}; +q_tail_merge(SeqIds, QTail, MsgIds, State) -> + lists:foldl(fun (SeqId, {QTail0, MsgIds0, State0} = Acc) -> case msg_from_pending_ack(SeqId, State0) of {none, _} -> Acc; - {#msg_status { msg_id = MsgId, - is_persistent = IsPersistent } = MsgStatus, State1} -> - {_MsgStatus, State2} = - maybe_prepare_write_to_disk(true, true, MsgStatus, State1), - {expand_delta(SeqId, Delta0, IsPersistent), [MsgId | MsgIds0], - stats_requeued_disk(MsgStatus, State2)} + {#msg_status { msg_id = MsgId } = MsgStatus, State1} -> + {expand_q_tail(SeqId, QTail0), [MsgId | MsgIds0], + stats_requeued_disk(MsgStatus, State1)} end - end, {Delta, MsgIds, State}, SeqIds). + end, {QTail, MsgIds, State}, SeqIds). %% Mostly opposite of record_pending_ack/2 msg_from_pending_ack(SeqId, State) -> @@ -2148,105 +1809,22 @@ msg_from_pending_ack(SeqId, State) -> State1} end. -delta_limit(?BLANK_DELTA_PATTERN(_)) -> undefined; -delta_limit(#delta { start_seq_id = StartSeqId }) -> StartSeqId. - -%%---------------------------------------------------------------------------- -%% Iterator -%%---------------------------------------------------------------------------- - -ram_ack_iterator(State) -> - {ack, maps:iterator(State#vqstate.ram_pending_ack)}. - -disk_ack_iterator(State) -> - {ack, maps:iterator(State#vqstate.disk_pending_ack)}. - -msg_iterator(State) -> istate(start, State). - -istate(start, State) -> {q3, State#vqstate.q3, State}; -istate(q3, State) -> {delta, State#vqstate.delta, State}; -istate(delta, _State) -> done. - -next({ack, It}, IndexState) -> - case maps:next(It) of - none -> {empty, IndexState}; - {_SeqId, MsgStatus, It1} -> Next = {ack, It1}, - {value, MsgStatus, true, Next, IndexState} - end; -next(done, IndexState) -> {empty, IndexState}; -next({delta, #delta{start_seq_id = SeqId, - end_seq_id = SeqId}, State}, IndexState) -> - next(istate(delta, State), IndexState); -next({delta, #delta{start_seq_id = SeqId, - end_seq_id = SeqIdEnd} = Delta, State}, IndexState) -> - SeqIdB = rabbit_classic_queue_index_v2:next_segment_boundary(SeqId), - %% It may make sense to limit this based on rate. But this - %% is not called outside of CMQs so I will leave it alone - %% for the time being. - SeqId1 = lists:min([SeqIdB, - %% We must limit the number of messages read at once - %% otherwise the queue will attempt to read up to segment_entry_count() - %% messages from the index each time. The value - %% chosen here is arbitrary. - SeqId + 2048, - SeqIdEnd]), - {List, IndexState1} = rabbit_classic_queue_index_v2:read(SeqId, SeqId1, IndexState), - next({delta, Delta#delta{start_seq_id = SeqId1}, List, State}, IndexState1); -next({delta, Delta, [], State}, IndexState) -> - next({delta, Delta, State}, IndexState); -next({delta, Delta, [{_, SeqId, _, _, _} = M | Rest], State}, IndexState) -> - case is_msg_in_pending_acks(SeqId, State) of - false -> Next = {delta, Delta, Rest, State}, - {value, beta_msg_status(M), false, Next, IndexState}; - true -> next({delta, Delta, Rest, State}, IndexState) - end; -next({Key, Q, State}, IndexState) -> - case ?QUEUE:out(Q) of - {empty, _Q} -> next(istate(Key, State), IndexState); - {{value, MsgStatus}, QN} -> Next = {Key, QN, State}, - {value, MsgStatus, false, Next, IndexState} - end. - -inext(It, {Its, IndexState}) -> - case next(It, IndexState) of - {empty, IndexState1} -> - {Its, IndexState1}; - {value, MsgStatus1, Unacked, It1, IndexState1} -> - {[{MsgStatus1, Unacked, It1} | Its], IndexState1} - end. - -ifold(_Fun, Acc, [], State0) -> - {Acc, State0}; -ifold(Fun, Acc, Its0, State0) -> - [{MsgStatus, Unacked, It} | Rest] = - lists:sort(fun ({#msg_status{seq_id = SeqId1}, _, _}, - {#msg_status{seq_id = SeqId2}, _, _}) -> - SeqId1 =< SeqId2 - end, Its0), - {Msg, State1} = read_msg(MsgStatus, State0), - case Fun(Msg, MsgStatus#msg_status.msg_props, Unacked, Acc) of - {stop, Acc1} -> - {Acc1, State1}; - {cont, Acc1} -> - IndexState0 = State1#vqstate.index_state, - {Its1, IndexState1} = inext(It, {Rest, IndexState0}), - State2 = State1#vqstate{index_state = IndexState1}, - ifold(Fun, Acc1, Its1, State2) - end. +q_tail_limit(?BLANK_Q_TAIL_PATTERN(_)) -> undefined; +q_tail_limit(#q_tail{ start_seq_id = StartSeqId }) -> StartSeqId. %%---------------------------------------------------------------------------- %% Phase changes %%---------------------------------------------------------------------------- -fetch_from_q3(State = #vqstate { delta = #delta { count = DeltaCount }, - q3 = Q3 }) -> - case ?QUEUE:out(Q3) of - {empty, _Q3} when DeltaCount =:= 0 -> +fetch_from_q_head(State = #vqstate { q_head = QHead, + q_tail = #q_tail { count = QTailCount }}) -> + case ?QUEUE:out(QHead) of + {empty, _QHead} when QTailCount =:= 0 -> {empty, State}; - {empty, _Q3} -> - fetch_from_q3(maybe_deltas_to_betas(State)); - {{value, MsgStatus}, Q3a} -> - State1 = State #vqstate { q3 = Q3a }, + {empty, _QHead} -> + fetch_from_q_head(read_from_q_tail(State)); + {{value, MsgStatus}, QHead1} -> + State1 = State #vqstate { q_head = QHead1 }, {loaded, {MsgStatus, State1}} end. @@ -2258,47 +1836,44 @@ fetch_from_q3(State = #vqstate { delta = #delta { count = DeltaCount }, -define(SHARED_READ_MANY_SIZE_THRESHOLD, 12000). -define(SHARED_READ_MANY_COUNT_THRESHOLD, 10). -maybe_deltas_to_betas(State = #vqstate { rates = #rates{ out = OutRate }}) -> +read_from_q_tail(State = #vqstate { rates = #rates{ out = OutRate }}) -> AfterFun = process_delivers_and_acks_fun(deliver_and_ack), %% We allow from 1 to 2048 messages in memory depending on the consume rate. MemoryLimit = min(1 + floor(2 * OutRate), 2048), - maybe_deltas_to_betas(AfterFun, State, MemoryLimit, messages). + read_from_q_tail(AfterFun, State, MemoryLimit, messages). -maybe_deltas_to_betas(_DelsAndAcksFun, - State = #vqstate {delta = ?BLANK_DELTA_PATTERN(X) }, +read_from_q_tail(_DelsAndAcksFun, + State = #vqstate {q_tail = ?BLANK_Q_TAIL_PATTERN(X) }, _MemoryLimit, _WhatToRead) -> State; -maybe_deltas_to_betas(DelsAndAcksFun, +read_from_q_tail(DelsAndAcksFun, State = #vqstate { - delta = Delta, - q3 = Q3, + q_head = QHead0, + q_tail = QTail, index_state = IndexState, store_state = StoreState, msg_store_clients = {MCStateP, MCStateT}, ram_msg_count = RamMsgCount, ram_bytes = RamBytes, disk_read_count = DiskReadCount, - delta_transient_bytes = DeltaTransientBytes, transient_threshold = TransientThreshold }, MemoryLimit, WhatToRead) -> - #delta { start_seq_id = DeltaSeqId, - count = DeltaCount, - transient = Transient, - end_seq_id = DeltaSeqIdEnd } = Delta, + #q_tail { start_seq_id = QTailSeqId, + count = QTailCount, + end_seq_id = QTailSeqIdEnd } = QTail, %% For v2 we want to limit the number of messages read at once to lower %% the memory footprint. We use the consume rate to determine how many %% messages we read. - DeltaSeqLimit = DeltaSeqId + MemoryLimit, - DeltaSeqId1 = - lists:min([rabbit_classic_queue_index_v2:next_segment_boundary(DeltaSeqId), - DeltaSeqLimit, DeltaSeqIdEnd]), - {List0, IndexState1} = rabbit_classic_queue_index_v2:read(DeltaSeqId, DeltaSeqId1, IndexState), + %% @todo Simply ask for N messages instead of low/high bounds. + QTailSeqLimit = QTailSeqId + MemoryLimit, + QTailSeqId1 = + lists:min([rabbit_classic_queue_index_v2:next_segment_boundary(QTailSeqId), + QTailSeqLimit, QTailSeqIdEnd]), + {List0, IndexState1} = rabbit_classic_queue_index_v2:read(QTailSeqId, QTailSeqId1, IndexState), {List, StoreState3, MCStateP3, MCStateT3} = case WhatToRead of messages -> %% We try to read messages from disk all at once instead of - %% 1 by 1 at fetch time. When v1 is used and messages are - %% embedded, then the message content is already read from - %% disk at this point. For v2 embedded we must do a separate + %% 1 by 1 at fetch time. For v2 embedded we must do a separate %% call to obtain the contents and then merge the contents %% back into the #msg_status records. %% @@ -2362,41 +1937,37 @@ maybe_deltas_to_betas(DelsAndAcksFun, metadata_only -> {List0, StoreState, MCStateP, MCStateT} end, - {Q3a, RamCountsInc, RamBytesInc, State1, TransientCount, TransientBytes} = - betas_from_index_entries(List, TransientThreshold, - DelsAndAcksFun, - State #vqstate { index_state = IndexState1, - store_state = StoreState3, - msg_store_clients = {MCStateP3, MCStateT3}}), + {QHead1, RamCountsInc, RamBytesInc, State1} = + become_q_head(List, TransientThreshold, + DelsAndAcksFun, + State #vqstate { index_state = IndexState1, + store_state = StoreState3, + msg_store_clients = {MCStateP3, MCStateT3}}), State2 = State1 #vqstate { ram_msg_count = RamMsgCount + RamCountsInc, ram_bytes = RamBytes + RamBytesInc, disk_read_count = DiskReadCount + RamCountsInc }, - case ?QUEUE:len(Q3a) of + case ?QUEUE:len(QHead1) of 0 -> %% we ignored every message in the segment due to it being %% transient and below the threshold - maybe_deltas_to_betas( + read_from_q_tail( DelsAndAcksFun, State2 #vqstate { - delta = d(Delta #delta { start_seq_id = DeltaSeqId1 })}, + q_tail = qt(QTail #q_tail { start_seq_id = QTailSeqId1 })}, MemoryLimit, WhatToRead); - Q3aLen -> - Q3b = ?QUEUE:join(Q3, Q3a), - case DeltaCount - Q3aLen of + QHead1Len -> + QHead = ?QUEUE:join(QHead0, QHead1), + case QTailCount - QHead1Len of 0 -> - %% delta is now empty - State2 #vqstate { delta = ?BLANK_DELTA, - q3 = Q3b, - delta_transient_bytes = 0}; + %% q_tail is now empty + State2 #vqstate { q_tail = ?BLANK_Q_TAIL, + q_head = QHead }; N when N > 0 -> - Delta1 = d(#delta { start_seq_id = DeltaSeqId1, - count = N, - %% @todo Probably something wrong, seen it become negative... - transient = Transient - TransientCount, - end_seq_id = DeltaSeqIdEnd }), - State2 #vqstate { delta = Delta1, - q3 = Q3b, - delta_transient_bytes = DeltaTransientBytes - TransientBytes } + QTail1 = qt(#q_tail { start_seq_id = QTailSeqId1, + count = N, + end_seq_id = QTailSeqIdEnd }), + State2 #vqstate { q_head = QHead, + q_tail = QTail1 } end end. @@ -2419,12 +1990,32 @@ merge_sh_read_msgs([M = {MsgId, _, _, _, _}|MTail], Reads) -> merge_sh_read_msgs(MTail, _Reads) -> MTail. -%% Flushes queue index batch caches and updates queue index state. -ui(#vqstate{index_state = IndexState, - target_ram_count = TargetRamCount} = State) -> - IndexState1 = rabbit_classic_queue_index_v2:flush_pre_publish_cache( - TargetRamCount, IndexState), - State#vqstate{index_state = IndexState1}. +become_q_head(List, TransientThreshold, DelsAndAcksFun, State = #vqstate{ next_deliver_seq_id = NextDeliverSeqId0 }) -> + {Filtered, NextDeliverSeqId, Acks, RamReadyCount, RamBytes} = + lists:foldr( + fun ({_MsgOrId, SeqId, _MsgLocation, _MsgProps, IsPersistent} = M, + {Filtered1, NextDeliverSeqId1, Acks1, RRC, RB} = Acc) -> + case SeqId < TransientThreshold andalso not IsPersistent of + true -> {Filtered1, + next_deliver_seq_id(SeqId, NextDeliverSeqId1), + [SeqId | Acks1], RRC, RB}; + false -> MsgStatus = m(msg_status(M)), + HaveMsg = msg_in_ram(MsgStatus), + Size = msg_size(MsgStatus), + case is_msg_in_pending_acks(SeqId, State) of + false -> {?QUEUE:in_r(MsgStatus, Filtered1), + NextDeliverSeqId1, Acks1, + RRC + one_if(HaveMsg), + RB + one_if(HaveMsg) * Size}; + true -> Acc %% [0] + end + end + end, {?QUEUE:new(), NextDeliverSeqId0, [], 0, 0}, List), + {Filtered, RamReadyCount, RamBytes, DelsAndAcksFun(NextDeliverSeqId, Acks, State)}. +%% [0] We don't increase RamBytes here, even though it pertains to +%% unacked messages too, since if HaveMsg then the message must have +%% been stored in the QI, thus the message must have been in +%% qi_pending_ack, thus it must already have been in RAM. maybe_client_terminate(MSCStateP) -> %% Queue might have been asked to stop by the supervisor, it needs a clean @@ -2442,16 +2033,16 @@ format_state(#vqstate{} = S) -> format_state(false, #vqstate{} = S) -> S; -format_state(true, #vqstate{q3 = Q3, +format_state(true, #vqstate{q_head = QHead, ram_pending_ack = RamPendingAck, disk_pending_ack = DiskPendingAck, index_state = IndexState, store_state = StoreState} = S) -> - S#vqstate{q3 = format_q3(Q3), + S#vqstate{q_head = format_q_head(QHead), ram_pending_ack = maps:keys(RamPendingAck), disk_pending_ack = maps:keys(DiskPendingAck), index_state = rabbit_classic_queue_index_v2:format_state(IndexState), store_state = rabbit_classic_queue_store_v2:format_state(StoreState)}. -format_q3(Q3) -> - [SeqId || #msg_status{seq_id = SeqId} <- ?QUEUE:to_list(Q3)]. +format_q_head(QHead) -> + [SeqId || #msg_status{seq_id = SeqId} <- ?QUEUE:to_list(QHead)]. diff --git a/deps/rabbit/src/rabbit_vhost.erl b/deps/rabbit/src/rabbit_vhost.erl index 7b08e3fec706..2c14a35ad712 100644 --- a/deps/rabbit/src/rabbit_vhost.erl +++ b/deps/rabbit/src/rabbit_vhost.erl @@ -110,7 +110,7 @@ ensure_config_file(VHost) -> %% The config file does not exist. %% Check if there are queues in this vhost. false -> - QueueDirs = rabbit_queue_index:all_queue_directory_names(VHost), + QueueDirs = rabbit_classic_queue_index_v2:all_queue_directory_names(VHost), SegmentEntryCount = case QueueDirs of %% There are no queues. Write the configured value for %% the segment entry count, or the new RabbitMQ default diff --git a/deps/rabbit/test/backing_queue_SUITE.erl b/deps/rabbit/test/backing_queue_SUITE.erl index 01ff9f5aa259..71f88ad7f896 100644 --- a/deps/rabbit/test/backing_queue_SUITE.erl +++ b/deps/rabbit/test/backing_queue_SUITE.erl @@ -21,7 +21,7 @@ -define(VHOST, <<"/">>). -define(VARIABLE_QUEUE_TESTCASES, [ - variable_queue_partial_segments_delta_thing, + variable_queue_partial_segments_q_tail_thing, variable_queue_all_the_bits_not_covered_elsewhere_A, variable_queue_all_the_bits_not_covered_elsewhere_B, variable_queue_drop, @@ -33,14 +33,13 @@ variable_queue_ack_limiting, variable_queue_purge, variable_queue_requeue, - variable_queue_requeue_ram_beta, - variable_queue_fold + variable_queue_requeue_ram_beta ]). -define(BACKING_QUEUE_TESTCASES, [ bq_queue_index, bq_queue_index_props, - {variable_queue_default, [parallel], ?VARIABLE_QUEUE_TESTCASES}, + {variable_queue, [parallel], ?VARIABLE_QUEUE_TESTCASES}, bq_variable_queue_delete_msg_store_files_callback, bq_queue_recover ]). @@ -128,8 +127,6 @@ init_per_group1(backing_queue_embed_limit_1024, Config) -> ok = rabbit_ct_broker_helpers:rpc(Config, 0, application, set_env, [rabbit, queue_index_embed_msgs_below, 1024]), Config; -init_per_group1(variable_queue_default, Config) -> - rabbit_ct_helpers:set_config(Config, {variable_queue_type, default}); %% @todo These groups are no longer used? init_per_group1(from_cluster_node1, Config) -> rabbit_ct_helpers:set_config(Config, {test_direction, {0, 1}}); @@ -162,15 +159,9 @@ orelse Group =:= backing_queue_embed_limit_1024 -> end_per_group1(_, Config) -> Config. -init_per_testcase(Testcase, Config) when Testcase == variable_queue_requeue; - Testcase == variable_queue_fold -> - rabbit_ct_helpers:testcase_started(Config, Testcase); init_per_testcase(Testcase, Config) -> rabbit_ct_helpers:testcase_started(Config, Testcase). -end_per_testcase(Testcase, Config) when Testcase == variable_queue_requeue; - Testcase == variable_queue_fold -> - rabbit_ct_helpers:testcase_finished(Config, Testcase); end_per_testcase(Testcase, Config) -> rabbit_ct_helpers:testcase_finished(Config, Testcase). @@ -806,7 +797,6 @@ index_mod() -> rabbit_classic_queue_index_v2. bq_queue_index1(_Config) -> - init_queue_index(), IndexMod = index_mod(), SegmentSize = IndexMod:next_segment_boundary(0), TwoSegs = SegmentSize + SegmentSize, @@ -852,7 +842,7 @@ bq_queue_index1(_Config) -> Qi13 end, {_DeletedSegments, Qi16} = IndexMod:ack(SeqIdsB, Qi15), - Qi17 = IndexMod:flush(Qi16), + {_Confirms, Qi17} = IndexMod:sync(Qi16), %% Everything will have gone now because #pubs == #acks {NextSeqIdB, NextSeqIdB, Qi18} = IndexMod:bounds(Qi17, NextSeqIdB), %% should get length back as 0 because all persistent @@ -873,7 +863,7 @@ bq_queue_index1(_Config) -> _ -> Qi1 end, {_DeletedSegments, Qi3} = IndexMod:ack(SeqIdsC, Qi2), - Qi4 = IndexMod:flush(Qi3), + {_Confirms, Qi4} = IndexMod:sync(Qi3), {Qi5, _SeqIdsMsgIdsC1} = queue_index_publish([SegmentSize], false, Qi4), Qi5 @@ -891,7 +881,8 @@ bq_queue_index1(_Config) -> {Qi3, _SeqIdsMsgIdsC3} = queue_index_publish([SegmentSize], false, Qi2), {_DeletedSegments, Qi4} = IndexMod:ack(SeqIdsC, Qi3), - IndexMod:flush(Qi4) + {_Confirms, Qi5} = IndexMod:sync(Qi4), + Qi5 end), %% c) just fill up several segments of all pubs, then +acks @@ -904,7 +895,8 @@ bq_queue_index1(_Config) -> _ -> Qi1 end, {_DeletedSegments, Qi3} = IndexMod:ack(SeqIdsD, Qi2), - IndexMod:flush(Qi3) + {_Confirms, Qi4} = IndexMod:sync(Qi3), + Qi4 end), %% d) get messages in all states to a segment, then flush, then do @@ -918,7 +910,7 @@ bq_queue_index1(_Config) -> _ -> Qi1 end, {_DeletedSegments3, Qi3} = IndexMod:ack([0], Qi2), - Qi4 = IndexMod:flush(Qi3), + {_Confirms, Qi4} = IndexMod:sync(Qi3), {Qi5, [Eight,Six|_]} = queue_index_publish([3,6,8], false, Qi4), Qi6 = case IndexMod of rabbit_queue_index -> IndexMod:deliver([2,3,5,6], Qi5); @@ -984,7 +976,7 @@ bq_queue_index_props1(_Config) -> MsgId = rabbit_guid:gen(), Props = #message_properties{expiry=12345, size = 10}, Qi1 = IndexMod:publish( - MsgId, 0, memory, Props, true, infinity, Qi0), + MsgId, 0, memory, Props, true, true, Qi0), {[{MsgId, 0, _, Props, _}], Qi2} = IndexMod:read(0, 1, Qi1), Qi2 @@ -1115,7 +1107,6 @@ bq_queue_recover(Config) -> ?MODULE, bq_queue_recover1, [Config]). bq_queue_recover1(Config) -> - init_queue_index(), IndexMod = index_mod(), Count = 2 * IndexMod:next_segment_boundary(0), QName0 = queue_name(Config, <<"bq_queue_recover-q">>), @@ -1171,16 +1162,14 @@ get_queue_sup_pid([{_, SupPid, _, _} | Rest], QueuePid) -> get_queue_sup_pid([], _QueuePid) -> undefined. -variable_queue_partial_segments_delta_thing(Config) -> +variable_queue_partial_segments_q_tail_thing(Config) -> passed = rabbit_ct_broker_helpers:rpc(Config, 0, - ?MODULE, variable_queue_partial_segments_delta_thing1, [Config]). + ?MODULE, variable_queue_partial_segments_q_tail_thing1, [Config]). -variable_queue_partial_segments_delta_thing1(Config) -> - with_fresh_variable_queue( - fun variable_queue_partial_segments_delta_thing2/2, - ?config(variable_queue_type, Config)). +variable_queue_partial_segments_q_tail_thing1(Config) -> + with_fresh_variable_queue(fun variable_queue_partial_segments_q_tail_thing2/2). -variable_queue_partial_segments_delta_thing2(VQ0, _QName) -> +variable_queue_partial_segments_q_tail_thing2(VQ0, _QName) -> IndexMod = index_mod(), SegmentSize = IndexMod:next_segment_boundary(0), HalfSegment = SegmentSize div 2, @@ -1191,25 +1180,25 @@ variable_queue_partial_segments_delta_thing2(VQ0, _QName) -> VQ2, %% We only have one message in memory because the amount in memory %% depends on the consume rate, which is nil in this test. - [{delta, {delta, 1, OneAndAHalfSegment - 1, 0, OneAndAHalfSegment}}, - {q3, 1}, + [{q_head, 1}, + {q_tail, {q_tail, 1, OneAndAHalfSegment - 1, OneAndAHalfSegment}}, {len, OneAndAHalfSegment}]), VQ5 = check_variable_queue_status( variable_queue_publish(true, 1, VQ3), - %% one alpha, but it's in the same segment as the deltas + %% one alpha, but it's in the same segment as the q_tail %% @todo That's wrong now! v1/v2 - [{delta, {delta, 1, OneAndAHalfSegment, 0, OneAndAHalfSegment + 1}}, - {q3, 1}, + [{q_head, 1}, + {q_tail, {q_tail, 1, OneAndAHalfSegment, OneAndAHalfSegment + 1}}, {len, OneAndAHalfSegment + 1}]), {VQ6, AckTags} = variable_queue_fetch(SegmentSize, true, false, SegmentSize + HalfSegment + 1, VQ5), VQ7 = check_variable_queue_status( VQ6, - %% We only read from delta up to the end of the segment, so + %% We only read from q_tail up to the end of the segment, so %% after fetching exactly one segment, we should have no %% messages in memory. - [{delta, {delta, SegmentSize, HalfSegment + 1, 0, OneAndAHalfSegment + 1}}, - {q3, 0}, + [{q_head, 0}, + {q_tail, {q_tail, SegmentSize, HalfSegment + 1, OneAndAHalfSegment + 1}}, {len, HalfSegment + 1}]), {VQ8, AckTags1} = variable_queue_fetch(HalfSegment + 1, true, false, HalfSegment + 1, VQ7), @@ -1223,9 +1212,7 @@ variable_queue_all_the_bits_not_covered_elsewhere_A(Config) -> ?MODULE, variable_queue_all_the_bits_not_covered_elsewhere_A1, [Config]). variable_queue_all_the_bits_not_covered_elsewhere_A1(Config) -> - with_fresh_variable_queue( - fun variable_queue_all_the_bits_not_covered_elsewhere_A2/2, - ?config(variable_queue_type, Config)). + with_fresh_variable_queue(fun variable_queue_all_the_bits_not_covered_elsewhere_A2/2). variable_queue_all_the_bits_not_covered_elsewhere_A2(VQ0, QName) -> IndexMod = index_mod(), @@ -1250,9 +1237,7 @@ variable_queue_all_the_bits_not_covered_elsewhere_B(Config) -> ?MODULE, variable_queue_all_the_bits_not_covered_elsewhere_B1, [Config]). variable_queue_all_the_bits_not_covered_elsewhere_B1(Config) -> - with_fresh_variable_queue( - fun variable_queue_all_the_bits_not_covered_elsewhere_B2/2, - ?config(variable_queue_type, Config)). + with_fresh_variable_queue(fun variable_queue_all_the_bits_not_covered_elsewhere_B2/2). variable_queue_all_the_bits_not_covered_elsewhere_B2(VQ1, QName) -> VQ2 = variable_queue_publish(false, 4, VQ1), @@ -1270,9 +1255,7 @@ variable_queue_drop(Config) -> ?MODULE, variable_queue_drop1, [Config]). variable_queue_drop1(Config) -> - with_fresh_variable_queue( - fun variable_queue_drop2/2, - ?config(variable_queue_type, Config)). + with_fresh_variable_queue(fun variable_queue_drop2/2). variable_queue_drop2(VQ0, _QName) -> %% start by sending a messages @@ -1295,9 +1278,7 @@ variable_queue_fold_msg_on_disk(Config) -> ?MODULE, variable_queue_fold_msg_on_disk1, [Config]). variable_queue_fold_msg_on_disk1(Config) -> - with_fresh_variable_queue( - fun variable_queue_fold_msg_on_disk2/2, - ?config(variable_queue_type, Config)). + with_fresh_variable_queue(fun variable_queue_fold_msg_on_disk2/2). variable_queue_fold_msg_on_disk2(VQ0, _QName) -> VQ1 = variable_queue_publish(true, 1, VQ0), @@ -1311,9 +1292,7 @@ variable_queue_dropfetchwhile(Config) -> ?MODULE, variable_queue_dropfetchwhile1, [Config]). variable_queue_dropfetchwhile1(Config) -> - with_fresh_variable_queue( - fun variable_queue_dropfetchwhile2/2, - ?config(variable_queue_type, Config)). + with_fresh_variable_queue(fun variable_queue_dropfetchwhile2/2). variable_queue_dropfetchwhile2(VQ0, _QName) -> Count = 10, @@ -1359,9 +1338,7 @@ variable_queue_dropwhile_restart(Config) -> ?MODULE, variable_queue_dropwhile_restart1, [Config]). variable_queue_dropwhile_restart1(Config) -> - with_fresh_variable_queue( - fun variable_queue_dropwhile_restart2/2, - ?config(variable_queue_type, Config)). + with_fresh_variable_queue(fun variable_queue_dropwhile_restart2/2). variable_queue_dropwhile_restart2(VQ0, QName) -> Count = 10000, @@ -1398,9 +1375,7 @@ variable_queue_dropwhile_sync_restart(Config) -> ?MODULE, variable_queue_dropwhile_sync_restart1, [Config]). variable_queue_dropwhile_sync_restart1(Config) -> - with_fresh_variable_queue( - fun variable_queue_dropwhile_sync_restart2/2, - ?config(variable_queue_type, Config)). + with_fresh_variable_queue(fun variable_queue_dropwhile_sync_restart2/2). variable_queue_dropwhile_sync_restart2(VQ0, QName) -> Count = 10000, @@ -1440,9 +1415,7 @@ variable_queue_restart_large_seq_id(Config) -> ?MODULE, variable_queue_restart_large_seq_id1, [Config]). variable_queue_restart_large_seq_id1(Config) -> - with_fresh_variable_queue( - fun variable_queue_restart_large_seq_id2/2, - ?config(variable_queue_type, Config)). + with_fresh_variable_queue(fun variable_queue_restart_large_seq_id2/2). variable_queue_restart_large_seq_id2(VQ0, QName) -> Count = 1, @@ -1479,9 +1452,7 @@ variable_queue_ack_limiting(Config) -> ?MODULE, variable_queue_ack_limiting1, [Config]). variable_queue_ack_limiting1(Config) -> - with_fresh_variable_queue( - fun variable_queue_ack_limiting2/2, - ?config(variable_queue_type, Config)). + with_fresh_variable_queue(fun variable_queue_ack_limiting2/2). variable_queue_ack_limiting2(VQ0, _Config) -> %% start by sending in a bunch of messages @@ -1509,9 +1480,7 @@ variable_queue_purge(Config) -> ?MODULE, variable_queue_purge1, [Config]). variable_queue_purge1(Config) -> - with_fresh_variable_queue( - fun variable_queue_purge2/2, - ?config(variable_queue_type, Config)). + with_fresh_variable_queue(fun variable_queue_purge2/2). variable_queue_purge2(VQ0, _Config) -> LenDepth = fun (VQ) -> @@ -1533,9 +1502,7 @@ variable_queue_requeue(Config) -> ?MODULE, variable_queue_requeue1, [Config]). variable_queue_requeue1(Config) -> - with_fresh_variable_queue( - fun variable_queue_requeue2/2, - ?config(variable_queue_type, Config)). + with_fresh_variable_queue(fun variable_queue_requeue2/2). variable_queue_requeue2(VQ0, _Config) -> {_PendingMsgs, RequeuedMsgs, FreshMsgs, VQ1} = @@ -1555,15 +1522,13 @@ variable_queue_requeue2(VQ0, _Config) -> {empty, VQ3} = rabbit_variable_queue:fetch(true, VQ2), VQ3. -%% requeue from ram_pending_ack into q3, move to delta and then empty queue +%% requeue from ram_pending_ack into q_head, move to q_tail and then empty queue variable_queue_requeue_ram_beta(Config) -> passed = rabbit_ct_broker_helpers:rpc(Config, 0, ?MODULE, variable_queue_requeue_ram_beta1, [Config]). variable_queue_requeue_ram_beta1(Config) -> - with_fresh_variable_queue( - fun variable_queue_requeue_ram_beta2/2, - ?config(variable_queue_type, Config)). + with_fresh_variable_queue(fun variable_queue_requeue_ram_beta2/2). variable_queue_requeue_ram_beta2(VQ0, _Config) -> IndexMod = index_mod(), @@ -1578,79 +1543,6 @@ variable_queue_requeue_ram_beta2(VQ0, _Config) -> {_, VQ8} = rabbit_variable_queue:ack(AcksAll, VQ7), VQ8. -variable_queue_fold(Config) -> - passed = rabbit_ct_broker_helpers:rpc(Config, 0, - ?MODULE, variable_queue_fold1, [Config]). - -variable_queue_fold1(Config) -> - with_fresh_variable_queue( - fun variable_queue_fold2/2, - ?config(variable_queue_type, Config)). - -variable_queue_fold2(VQ0, _Config) -> - {PendingMsgs, RequeuedMsgs, FreshMsgs, VQ1} = - variable_queue_with_holes(VQ0), - Count = rabbit_variable_queue:depth(VQ1), - Msgs = lists:sort(PendingMsgs ++ RequeuedMsgs ++ FreshMsgs), - lists:foldl(fun (Cut, VQ2) -> - test_variable_queue_fold(Cut, Msgs, PendingMsgs, VQ2) - end, VQ1, [0, 1, 2, Count div 2, - Count - 1, Count, Count + 1, Count * 2]). - -test_variable_queue_fold(Cut, Msgs, PendingMsgs, VQ0) -> - {Acc, VQ1} = rabbit_variable_queue:fold( - fun (M, _, Pending, A) -> - MInt = msg2int(M), - Pending = lists:member(MInt, PendingMsgs), %% assert - case MInt =< Cut of - true -> {cont, [MInt | A]}; - false -> {stop, A} - end - end, [], VQ0), - Expected = lists:takewhile(fun (I) -> I =< Cut end, Msgs), - Expected = lists:reverse(Acc), %% assertion - VQ1. - -%% same as test_variable_queue_requeue_ram_beta but randomly changing -%% the queue mode after every step. -variable_queue_mode_change(Config) -> - passed = rabbit_ct_broker_helpers:rpc(Config, 0, - ?MODULE, variable_queue_mode_change1, [Config]). - -variable_queue_mode_change1(Config) -> - with_fresh_variable_queue( - fun variable_queue_mode_change2/2, - ?config(variable_queue_type, Config)). - -variable_queue_mode_change2(VQ0, _Config) -> - IndexMod = index_mod(), - Count = IndexMod:next_segment_boundary(0)*2 + 2, - VQ1 = variable_queue_publish(false, Count, VQ0), - VQ2 = maybe_switch_queue_mode(VQ1), - {VQ3, AcksR} = variable_queue_fetch(Count, false, false, Count, VQ2), - VQ4 = maybe_switch_queue_mode(VQ3), - {Back, Front} = lists:split(Count div 2, AcksR), - {_, VQ5} = rabbit_variable_queue:requeue(erlang:tl(Back), VQ4), - VQ6 = maybe_switch_queue_mode(VQ5), - VQ8 = maybe_switch_queue_mode(VQ6), - {_, VQ9} = rabbit_variable_queue:requeue([erlang:hd(Back)], VQ8), - VQ10 = maybe_switch_queue_mode(VQ9), - VQ11 = requeue_one_by_one(Front, VQ10), - VQ12 = maybe_switch_queue_mode(VQ11), - {VQ13, AcksAll} = variable_queue_fetch(Count, false, true, Count, VQ12), - VQ14 = maybe_switch_queue_mode(VQ13), - {_, VQ15} = rabbit_variable_queue:ack(AcksAll, VQ14), - VQ16 = maybe_switch_queue_mode(VQ15), - VQ16. - -maybe_switch_queue_mode(VQ) -> - Mode = random_queue_mode(), - set_queue_mode(Mode, VQ). - -random_queue_mode() -> - Modes = [lazy, default], - lists:nth(rand:uniform(length(Modes)), Modes). - pub_res({_, VQS}) -> VQS; pub_res(VQS) -> @@ -1683,9 +1575,7 @@ init_test_queue(QName) -> QName, [], false, fun (MsgId) -> rabbit_msg_store:contains(MsgId, PersistentClient) - end, - fun nop/1, fun nop/1, - main), + end), ok = rabbit_msg_store:client_delete_and_terminate(PersistentClient), Res. @@ -1717,13 +1607,6 @@ with_empty_test_queue(Fun) -> IndexMod = index_mod(), IndexMod:delete_and_terminate(Fun(Qi, QName)). -init_queue_index() -> - %% We must set the segment entry count in the process dictionary - %% for tests that call the v1 queue index directly to have a correct - %% value. - put(segment_entry_count, 2048), - ok. - restart_app() -> rabbit:stop(), rabbit:start(). @@ -1743,7 +1626,7 @@ queue_index_publish(SeqIds, Persistent, Qi) -> QiM = IndexMod:publish( MsgId, SeqId, rabbit_msg_store, #message_properties{size = 10}, - Persistent, infinity, QiN), + Persistent, true, QiN), ok = rabbit_msg_store:write(SeqId, MsgId, MsgId, MSCState), {QiM, [{SeqId, MsgId} | SeqIdsMsgIdsAcc]} end, {Qi, []}, SeqIds), @@ -1752,9 +1635,6 @@ queue_index_publish(SeqIds, Persistent, Qi) -> ok = rabbit_msg_store:client_delete_and_terminate(MSCState), {A, B}. -nop(_) -> ok. -nop(_, _) -> ok. - msg_store_client_init(MsgStore, Ref) -> rabbit_vhost_msg_store:client_init(?VHOST, MsgStore, Ref, undefined). @@ -1764,7 +1644,7 @@ variable_queue_init(Q, Recover) -> true -> non_clean_shutdown; false -> new; Terms -> Terms - end, fun nop/2, fun nop/1, fun nop/1). + end, fun(_, _) -> ok end). variable_queue_read_terms(QName) -> #resource { kind = queue, @@ -1810,7 +1690,7 @@ wait_for_confirms(Unconfirmed) -> end end. -with_fresh_variable_queue(Fun, Mode) -> +with_fresh_variable_queue(Fun) -> Ref = make_ref(), Me = self(), %% Run in a separate process since rabbit_msg_store will send @@ -1820,15 +1700,12 @@ with_fresh_variable_queue(Fun, Mode) -> ok = unin_empty_test_queue(QName), VQ = variable_queue_init(test_amqqueue(QName, true), false), S0 = variable_queue_status(VQ), - assert_props(S0, [{q1, 0}, {q2, 0}, - {delta, - {delta, undefined, 0, 0, undefined}}, - {q3, 0}, {q4, 0}, + assert_props(S0, [{q_head, 0}, + {q_tail, {q_tail, undefined, 0, undefined}}, {len, 0}]), - VQ1 = set_queue_mode(Mode, VQ), try _ = rabbit_variable_queue:delete_and_terminate( - shutdown, Fun(VQ1, QName)), + shutdown, Fun(VQ, QName)), Me ! Ref catch Type:Error:Stacktrace -> @@ -1841,9 +1718,6 @@ with_fresh_variable_queue(Fun, Mode) -> end, passed. -set_queue_mode(Mode, VQ) -> - rabbit_variable_queue:set_queue_mode(Mode, VQ). - variable_queue_publish(IsPersistent, Count, VQ) -> variable_queue_publish(IsPersistent, Count, fun (_N, P) -> P end, VQ). @@ -1930,8 +1804,8 @@ requeue_one_by_one(Acks, VQ) -> VQM end, VQ, Acks). -%% Create a vq with messages in q1, delta, and q3, and holes (in the -%% form of pending acks) in the latter two. +%% Historical test case that exercised the many different +%% internal queues. Kept for completeness. variable_queue_with_holes(VQ0) -> Interval = 2048, %% should match vq:IO_BATCH_SIZE IndexMod = index_mod(), @@ -1950,7 +1824,7 @@ variable_queue_with_holes(VQ0) -> {_MsgIds, VQ4} = rabbit_variable_queue:requeue( Acks -- (Subset1 ++ Subset2 ++ Subset3), VQ3), VQ5 = requeue_one_by_one(Subset1, VQ4), - %% by now we have some messages (and holes) in delta + %% by now we have some messages (and holes) in q_tail VQ6 = requeue_one_by_one(Subset2, VQ5), %% add the q1 tail VQ8 = variable_queue_publish( @@ -1968,11 +1842,11 @@ variable_queue_with_holes(VQ0) -> vq_with_holes_assertions(VQ) -> [false = case V of - {delta, _, 0, _, _} -> true; - 0 -> true; - _ -> false + {q_tail, _, 0, _} -> true; + 0 -> true; + _ -> false end || {K, V} <- variable_queue_status(VQ), - lists:member(K, [delta, q3])]. + lists:member(K, [q_head, q_tail])]. check_variable_queue_status(VQ0, Props) -> VQ1 = variable_queue_wait_for_shuffling_end(VQ0), diff --git a/deps/rabbitmq_management/priv/www/js/tmpl/classic-queue-stats.ejs b/deps/rabbitmq_management/priv/www/js/tmpl/classic-queue-stats.ejs index d779d6cca7ff..2c5b0e6b943c 100644 --- a/deps/rabbitmq_management/priv/www/js/tmpl/classic-queue-stats.ejs +++ b/deps/rabbitmq_management/priv/www/js/tmpl/classic-queue-stats.ejs @@ -34,7 +34,6 @@ Unacked In memory Persistent - Transient, Paged Out @@ -56,9 +55,6 @@ <%= fmt_num_thousands(queue.messages_persistent) %> - - <%= fmt_num_thousands(queue.messages_paged_out) %> - @@ -80,9 +76,6 @@ <%= fmt_bytes(queue.message_bytes_persistent) %> - - <%= fmt_bytes(queue.message_bytes_paged_out) %> - diff --git a/deps/rabbitmq_prometheus/metrics.md b/deps/rabbitmq_prometheus/metrics.md index 7f61b0d3af94..e54ae4151a9f 100644 --- a/deps/rabbitmq_prometheus/metrics.md +++ b/deps/rabbitmq_prometheus/metrics.md @@ -188,8 +188,6 @@ These metrics are specific to the stream protocol. | rabbitmq_queue_disk_writes_total | Total number of times queue wrote messages to disk | | rabbitmq_queue_messages | Sum of ready and unacknowledged messages - total queue depth | | rabbitmq_queue_messages_bytes | Size in bytes of ready and unacknowledged messages | -| rabbitmq_queue_messages_paged_out | Messages paged out to disk | -| rabbitmq_queue_messages_paged_out_bytes | Size in bytes of messages paged out to disk | | rabbitmq_queue_messages_persistent | Persistent messages | | rabbitmq_queue_messages_persistent_bytes | Size in bytes of persistent messages | | rabbitmq_queue_messages_published_total | Total number of messages published to queues | diff --git a/deps/rabbitmq_prometheus/src/collectors/prometheus_rabbitmq_core_metrics_collector.erl b/deps/rabbitmq_prometheus/src/collectors/prometheus_rabbitmq_core_metrics_collector.erl index 7f6ed70d56dc..759b6b90ed26 100644 --- a/deps/rabbitmq_prometheus/src/collectors/prometheus_rabbitmq_core_metrics_collector.erl +++ b/deps/rabbitmq_prometheus/src/collectors/prometheus_rabbitmq_core_metrics_collector.erl @@ -144,8 +144,6 @@ {2, undefined, queue_messages_bytes, gauge, "Size in bytes of ready and unacknowledged messages", message_bytes}, {2, undefined, queue_messages_ready_bytes, gauge, "Size in bytes of ready messages", message_bytes_ready}, {2, undefined, queue_messages_unacked_bytes, gauge, "Size in bytes of all unacknowledged messages", message_bytes_unacknowledged}, - {2, undefined, queue_messages_paged_out, gauge, "Messages paged out to disk", messages_paged_out}, - {2, undefined, queue_messages_paged_out_bytes, gauge, "Size in bytes of messages paged out to disk", message_bytes_paged_out}, {2, undefined, queue_head_message_timestamp, gauge, "Timestamp of the first message in the queue, if any", head_message_timestamp}, {2, undefined, queue_disk_reads_total, counter, "Total number of times queue read messages from disk", disk_reads}, {2, undefined, queue_disk_writes_total, counter, "Total number of times queue wrote messages to disk", disk_writes}, @@ -673,7 +671,7 @@ get_data(queue_consumer_count = MF, false, VHostsFilter) -> end, empty(MF), Table), [{Table, [{consumers, A1}]}]; get_data(queue_metrics = Table, false, VHostsFilter) -> - {Table, A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, A17} = + {Table, A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15} = ets:foldl(fun ({#resource{kind = queue, virtual_host = VHost}, _, _}, Acc) when is_map(VHostsFilter), map_get(VHost, VHostsFilter) == false -> Acc; @@ -685,8 +683,7 @@ get_data(queue_metrics = Table, false, VHostsFilter) -> {messages_unacknowledged_ram, A7}, {messages_persistent, A8}, {messages_bytes_persistent, A9}, {message_bytes, A10}, {message_bytes_ready, A11}, {message_bytes_unacknowledged, A12}, - {messages_paged_out, A13}, {message_bytes_paged_out, A14}, - {disk_reads, A15}, {disk_writes, A16}, {segments, A17}]}]; + {disk_reads, A13}, {disk_writes, A14}, {segments, A15}]}]; get_data(Table, false, VHostsFilter) when Table == channel_exchange_metrics; Table == queue_coarse_metrics; Table == queue_delivery_metrics; @@ -861,7 +858,7 @@ get_data(Table, _, _) -> sum_queue_metrics(Props, {T, A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, - A12, A13, A14, A15, A16, A17}) -> + A12, A13, A14, A15}) -> {T, sum(proplists:get_value(consumers, Props), A1), sum(proplists:get_value(consumer_utilisation, Props), A2), @@ -875,11 +872,9 @@ sum_queue_metrics(Props, {T, A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, sum(proplists:get_value(message_bytes, Props), A10), sum(proplists:get_value(message_bytes_ready, Props), A11), sum(proplists:get_value(message_bytes_unacknowledged, Props), A12), - sum(proplists:get_value(messages_paged_out, Props), A13), - sum(proplists:get_value(message_bytes_paged_out, Props), A14), - sum(proplists:get_value(disk_reads, Props), A15), - sum(proplists:get_value(disk_writes, Props), A16), - sum(proplists:get_value(segments, Props), A17) + sum(proplists:get_value(disk_reads, Props), A13), + sum(proplists:get_value(disk_writes, Props), A14), + sum(proplists:get_value(segments, Props), A15) }. empty(T) when T == channel_queue_exchange_metrics; T == queue_exchange_metrics; T == channel_process_metrics; T == queue_consumer_count -> @@ -891,7 +886,7 @@ empty(T) when T == channel_exchange_metrics; T == exchange_metrics; T == queue_c empty(T) when T == channel_queue_metrics; T == queue_delivery_metrics; T == channel_metrics -> {T, 0, 0, 0, 0, 0, 0, 0}; empty(queue_metrics = T) -> - {T, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}. + {T, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}. sum(undefined, B) -> B;