Skip to content

Commit 4cbc293

Browse files
yizhang82inikep
authored andcommitted
Fix contention in MDL_key::ACL_CACHE
Summary: In 5.6 acl cache is protected by mutex which causes quite a bit of contention (especially if you have lots of acls), so we changed that to a mysql_rwlock_t in 5.6. In 8.0 this is replaced with a singleton MDL lock with namespace = MDL_key::ACL_CACHE with SHARED and EXCLUSIVE lock. However MDL lock has higher overhead than regular mysql_rwlock_t due to the way it maintains the tickets for granting/waiting and another internal mysql_prlock_rwlock (MDL_lock::m_rwlock) to update/traverse them, and therefore is more prone to contentions in high connection scenarios. In this particular case, the deadlock detection for ACL_CACHE leads to bad contention problems in the connection path, because taking rdlock on MDL_lock::m_rwlock while doing deadlock traversal takes longer with more granted and waiting tickets, but the releasing of these granted tickets are themselves blocked on taking wrlock on m_rwlock, meanwhile more connections are trying to come in and grant more tickets and taking rdlock as well in deadlock detection as well, so this becomes a serious lock contention that you can't get out of if you keep new connection come in at a fast pace. A easy way to trigger this is to run FLUSH PRIVILEGES on a server that has lots of incoming connections (with low connection timeout) with lots of connection from acl_check_host / has_global_grant. For ACL_CACHE, the deadlock detection in most cases doesn't provide a ton of value in production, because under ACL_cache lock we only operate on mysql.user/db/etc special system tables, so you'd need to take exclusive lock on them and then go through acl cache locking in order to trigger the deadlock detection, which seems quite unlikely in practice. We didn't see any issues in the past for 5.6 either, which was just a mutex and later we changed it to mysql_rwlock_t and had 0 deadlock detection. Therefore, I'm adding a new switch enable_acl_cache_deadlock_detection to turn it off for ACL_CACHE only. It defaults to on to maintain compat and we can turn it off in production. There are probably other options to fix this issue short term (such as replacing the ACL_cache lock with mysql_rwlock_t lock, moving allow_all_hosts check out of the lock, etc) with different risk trade offs, and longer term fixes that require bigger surgery to MDL locking system which probably should best left to upstream. This seems to be the lowest risk option at the moment. `#ifndef EXTRA_CODE_FOR_UNIT_TESTING` is needed to avoid compilation failure in locking unit tests (which doesn't seem to compile in the sys vars). Reviewed By: yoshinorim Differential Revision: D27740302
1 parent 97dc272 commit 4cbc293

File tree

8 files changed

+153
-1
lines changed

8 files changed

+153
-1
lines changed

mysql-test/r/mysqld--help-notwin.result

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -420,6 +420,9 @@ The following options may be given as the first argument:
420420
before storage engine initialization, where each plugin
421421
is identified as name=library, where name is the plugin
422422
name and library is the plugin library in plugin_dir.
423+
--enable-acl-cache-deadlock-detection
424+
Enable deadlock detection on ACL_CACHE MDL lock
425+
(Defaults to on; use --skip-enable-acl-cache-deadlock-detection to disable.)
423426
--enable-acl-db-cache
424427
Enable ACL db_cache lookup on (ip, user, db). Please
425428
issue FLUSH PRIVILEGES for the changes to take effect.
@@ -2861,6 +2864,7 @@ disallow-raft FALSE
28612864
disconnect-on-expired-password TRUE
28622865
disconnect-slave-event-count 0
28632866
div-precision-increment 4
2867+
enable-acl-cache-deadlock-detection TRUE
28642868
enable-acl-db-cache TRUE
28652869
enable-acl-fast-lookup FALSE
28662870
enable-binlog-hlc FALSE
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
SET @start_global_value = @@GLOBAL.enable_acl_cache_deadlock_detection;
2+
SELECT @start_global_value;
3+
@start_global_value
4+
1
5+
SET @@GLOBAL.enable_acl_cache_deadlock_detection = on;
6+
SET @@GLOBAL.enable_acl_cache_deadlock_detection = DEFAULT;
7+
select @@GLOBAL.enable_acl_cache_deadlock_detection;
8+
@@GLOBAL.enable_acl_cache_deadlock_detection
9+
1
10+
FLUSH PRIVILEGES;
11+
SET @@GLOBAL.enable_acl_cache_deadlock_detection = off;
12+
SET @@GLOBAL.enable_acl_cache_deadlock_detection = DEFAULT;
13+
select @@GLOBAL.enable_acl_cache_deadlock_detection;
14+
@@GLOBAL.enable_acl_cache_deadlock_detection
15+
1
16+
FLUSH PRIVILEGES;
17+
SET @@GLOBAL.enable_acl_cache_deadlock_detection = on;
18+
select @@GLOBAL.enable_acl_cache_deadlock_detection;
19+
@@GLOBAL.enable_acl_cache_deadlock_detection
20+
1
21+
FLUSH PRIVILEGES;
22+
SET @@GLOBAL.enable_acl_cache_deadlock_detection = off;
23+
select @@GLOBAL.enable_acl_cache_deadlock_detection;
24+
@@GLOBAL.enable_acl_cache_deadlock_detection
25+
0
26+
FLUSH PRIVILEGES;
27+
SET @@GLOBAL.enable_acl_cache_deadlock_detection = something;
28+
ERROR 42000: Variable 'enable_acl_cache_deadlock_detection' can't be set to the value of 'something'
29+
select @@GLOBAL.enable_acl_cache_deadlock_detection;
30+
@@GLOBAL.enable_acl_cache_deadlock_detection
31+
0
32+
SET @@GLOBAL.enable_acl_cache_deadlock_detection = somethingelse;
33+
ERROR 42000: Variable 'enable_acl_cache_deadlock_detection' can't be set to the value of 'somethingelse'
34+
select @@GLOBAL.enable_acl_cache_deadlock_detection;
35+
@@GLOBAL.enable_acl_cache_deadlock_detection
36+
0
37+
SET @@GLOBAL.enable_acl_cache_deadlock_detection = @start_global_value;
38+
SELECT @@GLOBAL.enable_acl_cache_deadlock_detection;
39+
@@GLOBAL.enable_acl_cache_deadlock_detection
40+
1
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
################ mysql-test\t\enable_acl_cache_deadlock_detection.test ########
2+
# #
3+
# Variable Name: enable_acl_cache_deadlock_detection #
4+
# Scope: Global #
5+
# #
6+
# Creation Date: 2021-04-13 #
7+
# #
8+
# #
9+
# Description:Test Cases of Dynamic System Variable #
10+
# enable_acl_cache_deadlock_detection that checks the behavior of #
11+
# this variable in the following ways #
12+
# * Default Value #
13+
# * Valid Value #
14+
# * Invalid value #
15+
# #
16+
###############################################################################
17+
SET @start_global_value = @@GLOBAL.enable_acl_cache_deadlock_detection;
18+
SELECT @start_global_value;
19+
20+
########################################################################
21+
# Display the DEFAULT value of enable_acl_cache_deadlock_detection #
22+
########################################################################
23+
24+
SET @@GLOBAL.enable_acl_cache_deadlock_detection = on;
25+
SET @@GLOBAL.enable_acl_cache_deadlock_detection = DEFAULT;
26+
select @@GLOBAL.enable_acl_cache_deadlock_detection;
27+
28+
# FLUSH PRIVILEGES not required but just a quick check to see acl cache
29+
# is working properly
30+
FLUSH PRIVILEGES;
31+
32+
SET @@GLOBAL.enable_acl_cache_deadlock_detection = off;
33+
SET @@GLOBAL.enable_acl_cache_deadlock_detection = DEFAULT;
34+
select @@GLOBAL.enable_acl_cache_deadlock_detection;
35+
36+
# FLUSH PRIVILEGES not required but just a quick check to see acl cache
37+
# is working properly
38+
FLUSH PRIVILEGES;
39+
40+
###############################################################################
41+
# change the value of enable_acl_cache_deadlock_detection to a valid value #
42+
###############################################################################
43+
44+
SET @@GLOBAL.enable_acl_cache_deadlock_detection = on;
45+
select @@GLOBAL.enable_acl_cache_deadlock_detection;
46+
47+
# FLUSH PRIVILEGES not required but just a quick check to see acl cache
48+
# is working properly
49+
FLUSH PRIVILEGES;
50+
51+
SET @@GLOBAL.enable_acl_cache_deadlock_detection = off;
52+
select @@GLOBAL.enable_acl_cache_deadlock_detection;
53+
54+
# FLUSH PRIVILEGES not required but just a quick check to see acl cache
55+
# is working properly
56+
FLUSH PRIVILEGES;
57+
58+
###############################################################################
59+
# change the value of enable_acl_cache_deadlock_detection to an invalid value #
60+
###############################################################################
61+
62+
--Error ER_WRONG_VALUE_FOR_VAR
63+
SET @@GLOBAL.enable_acl_cache_deadlock_detection = something;
64+
select @@GLOBAL.enable_acl_cache_deadlock_detection;
65+
66+
--Error ER_WRONG_VALUE_FOR_VAR
67+
SET @@GLOBAL.enable_acl_cache_deadlock_detection = somethingelse;
68+
select @@GLOBAL.enable_acl_cache_deadlock_detection;
69+
70+
###############################
71+
# Restore initial value #
72+
###############################
73+
74+
SET @@GLOBAL.enable_acl_cache_deadlock_detection = @start_global_value;
75+
SELECT @@GLOBAL.enable_acl_cache_deadlock_detection;

mysql-test/t/all_persisted_variables.test

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,9 +51,10 @@ let $total_excluded_vars=`SELECT COUNT(*) FROM performance_schema.global_variabl
5151
'binlog_file_basedir',
5252
'binlog_index_basedir',
5353
'default_collation_for_utf8mb4_init',
54+
'disable_raft_log_repointing',
55+
'enable_acl_cache_deadlock_detection',
5456
'enable_acl_db_cache',
5557
'enable_acl_fast_lookup',
56-
'disable_raft_log_repointing',
5758
'group_replication_plugin_hooks',
5859
'gtid_committed',
5960
'gtid_purged_for_tailing',

sql/mdl.cc

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4020,6 +4020,30 @@ bool MDL_lock::visit_subgraph(MDL_ticket *waiting_ticket,
40204020
MDL_context *src_ctx = waiting_ticket->get_ctx();
40214021
bool result = true;
40224022

4023+
#ifndef EXTRA_CODE_FOR_UNIT_TESTING
4024+
if (!enable_acl_cache_deadlock_detection &&
4025+
key.mdl_namespace() == MDL_key::ACL_CACHE) {
4026+
/*
4027+
Deadlock detection for ACL_CACHE may lead to bad contention problems
4028+
in the connection path. In particular taking rdlock on m_rwlock
4029+
while doing deadlock traversal takes longer with more granted and
4030+
waiting tickets, but the releasing of these granted tickets are
4031+
themselves blocked on taking wrlock on m_rwlock, meanwhile more
4032+
connections are trying to come in and grant more tickets and
4033+
taking wrlock as well, so this becomes a serious lock contention
4034+
that you can't get out of if you keep new connection come in at
4035+
a fast pace. A easy way to trigger this is to run FLUSH
4036+
PRIVILEGES on a server that has lots of incoming connections with
4037+
lots of connection from acl_check_host / has_global_grant.
4038+
4039+
For ACL_CACHE, the deadlock detection in most cases doesn't provide
4040+
a ton of value in production so this allows turning it off for
4041+
ACL_CACHE only.
4042+
*/
4043+
return false;
4044+
}
4045+
#endif
4046+
40234047
mysql_prlock_rdlock(&m_rwlock);
40244048

40254049
/*

sql/mysqld.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1233,6 +1233,7 @@ double wait_for_hlc_sleep_scaling_factor = 0.75;
12331233
char *default_collation_for_utf8mb4_init = nullptr;
12341234
bool enable_blind_replace = false;
12351235
bool enable_acl_fast_lookup = false;
1236+
bool enable_acl_cache_deadlock_detection = false;
12361237
bool enable_acl_db_cache = true;
12371238
bool enable_super_log_bin_read_only = false;
12381239
ulonglong max_tmp_disk_usage{0};

sql/mysqld.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -299,6 +299,7 @@ extern ulong wait_for_hlc_sleep_threshold_ms;
299299
extern double wait_for_hlc_sleep_scaling_factor;
300300
extern char *default_collation_for_utf8mb4_init;
301301
extern bool enable_acl_fast_lookup;
302+
extern bool enable_acl_cache_deadlock_detection;
302303
extern bool enable_acl_db_cache;
303304
extern bool enable_super_log_bin_read_only;
304305

sql/sys_vars.cc

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9103,6 +9103,12 @@ static Sys_var_bool Sys_enable_acl_fast_lookup(
91039103
DEFAULT(false), NO_MUTEX_GUARD, NOT_IN_BINLOG,
91049104
ON_CHECK(check_enable_acl_fast_lookup));
91059105

9106+
static Sys_var_bool Sys_enable_acl_cache_deadlock_detection(
9107+
"enable_acl_cache_deadlock_detection",
9108+
"Enable deadlock detection on ACL_CACHE MDL lock",
9109+
NON_PERSIST GLOBAL_VAR(enable_acl_cache_deadlock_detection),
9110+
CMD_LINE(OPT_ARG), DEFAULT(true));
9111+
91069112
static Sys_var_bool Sys_enable_acl_db_cache(
91079113
"enable_acl_db_cache",
91089114
"Enable ACL db_cache lookup on (ip, user, db). Please issue "

0 commit comments

Comments
 (0)