Skip to content

Commit b9e369a

Browse files
yizhang82inikep
authored andcommitted
Fix contention in MDL_key::ACL_CACHE
Summary: In 5.6 acl cache is protected by mutex which causes quite a bit of contention (especially if you have lots of acls), so we changed that to a mysql_rwlock_t in 5.6. In 8.0 this is replaced with a singleton MDL lock with namespace = MDL_key::ACL_CACHE with SHARED and EXCLUSIVE lock. However MDL lock has higher overhead than regular mysql_rwlock_t due to the way it maintains the tickets for granting/waiting and another internal mysql_prlock_rwlock (MDL_lock::m_rwlock) to update/traverse them, and therefore is more prone to contentions in high connection scenarios. In this particular case, the deadlock detection for ACL_CACHE leads to bad contention problems in the connection path, because taking rdlock on MDL_lock::m_rwlock while doing deadlock traversal takes longer with more granted and waiting tickets, but the releasing of these granted tickets are themselves blocked on taking wrlock on m_rwlock, meanwhile more connections are trying to come in and grant more tickets and taking rdlock as well in deadlock detection as well, so this becomes a serious lock contention that you can't get out of if you keep new connection come in at a fast pace. A easy way to trigger this is to run FLUSH PRIVILEGES on a server that has lots of incoming connections (with low connection timeout) with lots of connection from acl_check_host / has_global_grant. For ACL_CACHE, the deadlock detection in most cases doesn't provide a ton of value in production, because under ACL_cache lock we only operate on mysql.user/db/etc special system tables, so you'd need to take exclusive lock on them and then go through acl cache locking in order to trigger the deadlock detection, which seems quite unlikely in practice. We didn't see any issues in the past for 5.6 either, which was just a mutex and later we changed it to mysql_rwlock_t and had 0 deadlock detection. Therefore, I'm adding a new switch enable_acl_cache_deadlock_detection to turn it off for ACL_CACHE only. It defaults to on to maintain compat and we can turn it off in production. There are probably other options to fix this issue short term (such as replacing the ACL_cache lock with mysql_rwlock_t lock, moving allow_all_hosts check out of the lock, etc) with different risk trade offs, and longer term fixes that require bigger surgery to MDL locking system which probably should best left to upstream. This seems to be the lowest risk option at the moment. `#ifndef EXTRA_CODE_FOR_UNIT_TESTING` is needed to avoid compilation failure in locking unit tests (which doesn't seem to compile in the sys vars). Reviewed By: yoshinorim Differential Revision: D27740302
1 parent 324d214 commit b9e369a

File tree

8 files changed

+153
-1
lines changed

8 files changed

+153
-1
lines changed

mysql-test/r/mysqld--help-notwin.result

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -421,6 +421,9 @@ The following options may be given as the first argument:
421421
before storage engine initialization, where each plugin
422422
is identified as name=library, where name is the plugin
423423
name and library is the plugin library in plugin_dir.
424+
--enable-acl-cache-deadlock-detection
425+
Enable deadlock detection on ACL_CACHE MDL lock
426+
(Defaults to on; use --skip-enable-acl-cache-deadlock-detection to disable.)
424427
--enable-acl-db-cache
425428
Enable ACL db_cache lookup on (ip, user, db). Please
426429
issue FLUSH PRIVILEGES for the changes to take effect.
@@ -2874,6 +2877,7 @@ disallow-raft FALSE
28742877
disconnect-on-expired-password TRUE
28752878
disconnect-slave-event-count 0
28762879
div-precision-increment 4
2880+
enable-acl-cache-deadlock-detection TRUE
28772881
enable-acl-db-cache TRUE
28782882
enable-acl-fast-lookup FALSE
28792883
enable-binlog-hlc FALSE
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
SET @start_global_value = @@GLOBAL.enable_acl_cache_deadlock_detection;
2+
SELECT @start_global_value;
3+
@start_global_value
4+
1
5+
SET @@GLOBAL.enable_acl_cache_deadlock_detection = on;
6+
SET @@GLOBAL.enable_acl_cache_deadlock_detection = DEFAULT;
7+
select @@GLOBAL.enable_acl_cache_deadlock_detection;
8+
@@GLOBAL.enable_acl_cache_deadlock_detection
9+
1
10+
FLUSH PRIVILEGES;
11+
SET @@GLOBAL.enable_acl_cache_deadlock_detection = off;
12+
SET @@GLOBAL.enable_acl_cache_deadlock_detection = DEFAULT;
13+
select @@GLOBAL.enable_acl_cache_deadlock_detection;
14+
@@GLOBAL.enable_acl_cache_deadlock_detection
15+
1
16+
FLUSH PRIVILEGES;
17+
SET @@GLOBAL.enable_acl_cache_deadlock_detection = on;
18+
select @@GLOBAL.enable_acl_cache_deadlock_detection;
19+
@@GLOBAL.enable_acl_cache_deadlock_detection
20+
1
21+
FLUSH PRIVILEGES;
22+
SET @@GLOBAL.enable_acl_cache_deadlock_detection = off;
23+
select @@GLOBAL.enable_acl_cache_deadlock_detection;
24+
@@GLOBAL.enable_acl_cache_deadlock_detection
25+
0
26+
FLUSH PRIVILEGES;
27+
SET @@GLOBAL.enable_acl_cache_deadlock_detection = something;
28+
ERROR 42000: Variable 'enable_acl_cache_deadlock_detection' can't be set to the value of 'something'
29+
select @@GLOBAL.enable_acl_cache_deadlock_detection;
30+
@@GLOBAL.enable_acl_cache_deadlock_detection
31+
0
32+
SET @@GLOBAL.enable_acl_cache_deadlock_detection = somethingelse;
33+
ERROR 42000: Variable 'enable_acl_cache_deadlock_detection' can't be set to the value of 'somethingelse'
34+
select @@GLOBAL.enable_acl_cache_deadlock_detection;
35+
@@GLOBAL.enable_acl_cache_deadlock_detection
36+
0
37+
SET @@GLOBAL.enable_acl_cache_deadlock_detection = @start_global_value;
38+
SELECT @@GLOBAL.enable_acl_cache_deadlock_detection;
39+
@@GLOBAL.enable_acl_cache_deadlock_detection
40+
1
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
################ mysql-test\t\enable_acl_cache_deadlock_detection.test ########
2+
# #
3+
# Variable Name: enable_acl_cache_deadlock_detection #
4+
# Scope: Global #
5+
# #
6+
# Creation Date: 2021-04-13 #
7+
# #
8+
# #
9+
# Description:Test Cases of Dynamic System Variable #
10+
# enable_acl_cache_deadlock_detection that checks the behavior of #
11+
# this variable in the following ways #
12+
# * Default Value #
13+
# * Valid Value #
14+
# * Invalid value #
15+
# #
16+
###############################################################################
17+
SET @start_global_value = @@GLOBAL.enable_acl_cache_deadlock_detection;
18+
SELECT @start_global_value;
19+
20+
########################################################################
21+
# Display the DEFAULT value of enable_acl_cache_deadlock_detection #
22+
########################################################################
23+
24+
SET @@GLOBAL.enable_acl_cache_deadlock_detection = on;
25+
SET @@GLOBAL.enable_acl_cache_deadlock_detection = DEFAULT;
26+
select @@GLOBAL.enable_acl_cache_deadlock_detection;
27+
28+
# FLUSH PRIVILEGES not required but just a quick check to see acl cache
29+
# is working properly
30+
FLUSH PRIVILEGES;
31+
32+
SET @@GLOBAL.enable_acl_cache_deadlock_detection = off;
33+
SET @@GLOBAL.enable_acl_cache_deadlock_detection = DEFAULT;
34+
select @@GLOBAL.enable_acl_cache_deadlock_detection;
35+
36+
# FLUSH PRIVILEGES not required but just a quick check to see acl cache
37+
# is working properly
38+
FLUSH PRIVILEGES;
39+
40+
###############################################################################
41+
# change the value of enable_acl_cache_deadlock_detection to a valid value #
42+
###############################################################################
43+
44+
SET @@GLOBAL.enable_acl_cache_deadlock_detection = on;
45+
select @@GLOBAL.enable_acl_cache_deadlock_detection;
46+
47+
# FLUSH PRIVILEGES not required but just a quick check to see acl cache
48+
# is working properly
49+
FLUSH PRIVILEGES;
50+
51+
SET @@GLOBAL.enable_acl_cache_deadlock_detection = off;
52+
select @@GLOBAL.enable_acl_cache_deadlock_detection;
53+
54+
# FLUSH PRIVILEGES not required but just a quick check to see acl cache
55+
# is working properly
56+
FLUSH PRIVILEGES;
57+
58+
###############################################################################
59+
# change the value of enable_acl_cache_deadlock_detection to an invalid value #
60+
###############################################################################
61+
62+
--Error ER_WRONG_VALUE_FOR_VAR
63+
SET @@GLOBAL.enable_acl_cache_deadlock_detection = something;
64+
select @@GLOBAL.enable_acl_cache_deadlock_detection;
65+
66+
--Error ER_WRONG_VALUE_FOR_VAR
67+
SET @@GLOBAL.enable_acl_cache_deadlock_detection = somethingelse;
68+
select @@GLOBAL.enable_acl_cache_deadlock_detection;
69+
70+
###############################
71+
# Restore initial value #
72+
###############################
73+
74+
SET @@GLOBAL.enable_acl_cache_deadlock_detection = @start_global_value;
75+
SELECT @@GLOBAL.enable_acl_cache_deadlock_detection;

mysql-test/t/all_persisted_variables.test

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,9 +51,10 @@ let $total_excluded_vars=`SELECT COUNT(*) FROM performance_schema.global_variabl
5151
'binlog_file_basedir',
5252
'binlog_index_basedir',
5353
'default_collation_for_utf8mb4_init',
54+
'disable_raft_log_repointing',
55+
'enable_acl_cache_deadlock_detection',
5456
'enable_acl_db_cache',
5557
'enable_acl_fast_lookup',
56-
'disable_raft_log_repointing',
5758
'group_replication_plugin_hooks',
5859
'gtid_committed',
5960
'gtid_purged_for_tailing',

sql/mdl.cc

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4028,6 +4028,30 @@ bool MDL_lock::visit_subgraph(MDL_ticket *waiting_ticket,
40284028
MDL_context *src_ctx = waiting_ticket->get_ctx();
40294029
bool result = true;
40304030

4031+
#ifndef EXTRA_CODE_FOR_UNIT_TESTING
4032+
if (!enable_acl_cache_deadlock_detection &&
4033+
key.mdl_namespace() == MDL_key::ACL_CACHE) {
4034+
/*
4035+
Deadlock detection for ACL_CACHE may lead to bad contention problems
4036+
in the connection path. In particular taking rdlock on m_rwlock
4037+
while doing deadlock traversal takes longer with more granted and
4038+
waiting tickets, but the releasing of these granted tickets are
4039+
themselves blocked on taking wrlock on m_rwlock, meanwhile more
4040+
connections are trying to come in and grant more tickets and
4041+
taking wrlock as well, so this becomes a serious lock contention
4042+
that you can't get out of if you keep new connection come in at
4043+
a fast pace. A easy way to trigger this is to run FLUSH
4044+
PRIVILEGES on a server that has lots of incoming connections with
4045+
lots of connection from acl_check_host / has_global_grant.
4046+
4047+
For ACL_CACHE, the deadlock detection in most cases doesn't provide
4048+
a ton of value in production so this allows turning it off for
4049+
ACL_CACHE only.
4050+
*/
4051+
return false;
4052+
}
4053+
#endif
4054+
40314055
mysql_prlock_rdlock(&m_rwlock);
40324056

40334057
/*

sql/mysqld.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1239,6 +1239,7 @@ double wait_for_hlc_sleep_scaling_factor = 0.75;
12391239
char *default_collation_for_utf8mb4_init = nullptr;
12401240
bool enable_blind_replace = false;
12411241
bool enable_acl_fast_lookup = false;
1242+
bool enable_acl_cache_deadlock_detection = false;
12421243
bool enable_acl_db_cache = true;
12431244
bool enable_super_log_bin_read_only = false;
12441245
ulonglong max_tmp_disk_usage{0};

sql/mysqld.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -299,6 +299,7 @@ extern ulong wait_for_hlc_sleep_threshold_ms;
299299
extern double wait_for_hlc_sleep_scaling_factor;
300300
extern char *default_collation_for_utf8mb4_init;
301301
extern bool enable_acl_fast_lookup;
302+
extern bool enable_acl_cache_deadlock_detection;
302303
extern bool enable_acl_db_cache;
303304
extern bool enable_super_log_bin_read_only;
304305

sql/sys_vars.cc

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9130,6 +9130,12 @@ static Sys_var_bool Sys_enable_acl_fast_lookup(
91309130
DEFAULT(false), NO_MUTEX_GUARD, NOT_IN_BINLOG,
91319131
ON_CHECK(check_enable_acl_fast_lookup));
91329132

9133+
static Sys_var_bool Sys_enable_acl_cache_deadlock_detection(
9134+
"enable_acl_cache_deadlock_detection",
9135+
"Enable deadlock detection on ACL_CACHE MDL lock",
9136+
NON_PERSIST GLOBAL_VAR(enable_acl_cache_deadlock_detection),
9137+
CMD_LINE(OPT_ARG), DEFAULT(true));
9138+
91339139
static Sys_var_bool Sys_enable_acl_db_cache(
91349140
"enable_acl_db_cache",
91359141
"Enable ACL db_cache lookup on (ip, user, db). Please issue "

0 commit comments

Comments
 (0)