Skip to content

Commit 13d456a

Browse files
yizhang82inikep
authored andcommitted
Fix contention in MDL_key::ACL_CACHE
Summary: In 5.6 acl cache is protected by mutex which causes quite a bit of contention (especially if you have lots of acls), so we changed that to a mysql_rwlock_t in 5.6. In 8.0 this is replaced with a singleton MDL lock with namespace = MDL_key::ACL_CACHE with SHARED and EXCLUSIVE lock. However MDL lock has higher overhead than regular mysql_rwlock_t due to the way it maintains the tickets for granting/waiting and another internal mysql_prlock_rwlock (MDL_lock::m_rwlock) to update/traverse them, and therefore is more prone to contentions in high connection scenarios. In this particular case, the deadlock detection for ACL_CACHE leads to bad contention problems in the connection path, because taking rdlock on MDL_lock::m_rwlock while doing deadlock traversal takes longer with more granted and waiting tickets, but the releasing of these granted tickets are themselves blocked on taking wrlock on m_rwlock, meanwhile more connections are trying to come in and grant more tickets and taking rdlock as well in deadlock detection as well, so this becomes a serious lock contention that you can't get out of if you keep new connection come in at a fast pace. A easy way to trigger this is to run FLUSH PRIVILEGES on a server that has lots of incoming connections (with low connection timeout) with lots of connection from acl_check_host / has_global_grant. For ACL_CACHE, the deadlock detection in most cases doesn't provide a ton of value in production, because under ACL_cache lock we only operate on mysql.user/db/etc special system tables, so you'd need to take exclusive lock on them and then go through acl cache locking in order to trigger the deadlock detection, which seems quite unlikely in practice. We didn't see any issues in the past for 5.6 either, which was just a mutex and later we changed it to mysql_rwlock_t and had 0 deadlock detection. Therefore, I'm adding a new switch enable_acl_cache_deadlock_detection to turn it off for ACL_CACHE only. It defaults to on to maintain compat and we can turn it off in production. There are probably other options to fix this issue short term (such as replacing the ACL_cache lock with mysql_rwlock_t lock, moving allow_all_hosts check out of the lock, etc) with different risk trade offs, and longer term fixes that require bigger surgery to MDL locking system which probably should best left to upstream. This seems to be the lowest risk option at the moment. `#ifndef EXTRA_CODE_FOR_UNIT_TESTING` is needed to avoid compilation failure in locking unit tests (which doesn't seem to compile in the sys vars). Reviewed By: yoshinorim Differential Revision: D27740302
1 parent 86ff51d commit 13d456a

File tree

8 files changed

+153
-1
lines changed

8 files changed

+153
-1
lines changed

mysql-test/r/mysqld--help-notwin.result

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -408,6 +408,9 @@ The following options may be given as the first argument:
408408
before storage engine initialization, where each plugin
409409
is identified as name=library, where name is the plugin
410410
name and library is the plugin library in plugin_dir.
411+
--enable-acl-cache-deadlock-detection
412+
Enable deadlock detection on ACL_CACHE MDL lock
413+
(Defaults to on; use --skip-enable-acl-cache-deadlock-detection to disable.)
411414
--enable-acl-db-cache
412415
Enable ACL db_cache lookup on (ip, user, db). Please
413416
issue FLUSH PRIVILEGES for the changes to take effect.
@@ -2839,6 +2842,7 @@ disabled-storage-engines
28392842
disallow-raft FALSE
28402843
disconnect-on-expired-password TRUE
28412844
div-precision-increment 4
2845+
enable-acl-cache-deadlock-detection TRUE
28422846
enable-acl-db-cache TRUE
28432847
enable-acl-fast-lookup FALSE
28442848
enable-binlog-hlc FALSE
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
SET @start_global_value = @@GLOBAL.enable_acl_cache_deadlock_detection;
2+
SELECT @start_global_value;
3+
@start_global_value
4+
1
5+
SET @@GLOBAL.enable_acl_cache_deadlock_detection = on;
6+
SET @@GLOBAL.enable_acl_cache_deadlock_detection = DEFAULT;
7+
select @@GLOBAL.enable_acl_cache_deadlock_detection;
8+
@@GLOBAL.enable_acl_cache_deadlock_detection
9+
1
10+
FLUSH PRIVILEGES;
11+
SET @@GLOBAL.enable_acl_cache_deadlock_detection = off;
12+
SET @@GLOBAL.enable_acl_cache_deadlock_detection = DEFAULT;
13+
select @@GLOBAL.enable_acl_cache_deadlock_detection;
14+
@@GLOBAL.enable_acl_cache_deadlock_detection
15+
1
16+
FLUSH PRIVILEGES;
17+
SET @@GLOBAL.enable_acl_cache_deadlock_detection = on;
18+
select @@GLOBAL.enable_acl_cache_deadlock_detection;
19+
@@GLOBAL.enable_acl_cache_deadlock_detection
20+
1
21+
FLUSH PRIVILEGES;
22+
SET @@GLOBAL.enable_acl_cache_deadlock_detection = off;
23+
select @@GLOBAL.enable_acl_cache_deadlock_detection;
24+
@@GLOBAL.enable_acl_cache_deadlock_detection
25+
0
26+
FLUSH PRIVILEGES;
27+
SET @@GLOBAL.enable_acl_cache_deadlock_detection = something;
28+
ERROR 42000: Variable 'enable_acl_cache_deadlock_detection' can't be set to the value of 'something'
29+
select @@GLOBAL.enable_acl_cache_deadlock_detection;
30+
@@GLOBAL.enable_acl_cache_deadlock_detection
31+
0
32+
SET @@GLOBAL.enable_acl_cache_deadlock_detection = somethingelse;
33+
ERROR 42000: Variable 'enable_acl_cache_deadlock_detection' can't be set to the value of 'somethingelse'
34+
select @@GLOBAL.enable_acl_cache_deadlock_detection;
35+
@@GLOBAL.enable_acl_cache_deadlock_detection
36+
0
37+
SET @@GLOBAL.enable_acl_cache_deadlock_detection = @start_global_value;
38+
SELECT @@GLOBAL.enable_acl_cache_deadlock_detection;
39+
@@GLOBAL.enable_acl_cache_deadlock_detection
40+
1
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
################ mysql-test\t\enable_acl_cache_deadlock_detection.test ########
2+
# #
3+
# Variable Name: enable_acl_cache_deadlock_detection #
4+
# Scope: Global #
5+
# #
6+
# Creation Date: 2021-04-13 #
7+
# #
8+
# #
9+
# Description:Test Cases of Dynamic System Variable #
10+
# enable_acl_cache_deadlock_detection that checks the behavior of #
11+
# this variable in the following ways #
12+
# * Default Value #
13+
# * Valid Value #
14+
# * Invalid value #
15+
# #
16+
###############################################################################
17+
SET @start_global_value = @@GLOBAL.enable_acl_cache_deadlock_detection;
18+
SELECT @start_global_value;
19+
20+
########################################################################
21+
# Display the DEFAULT value of enable_acl_cache_deadlock_detection #
22+
########################################################################
23+
24+
SET @@GLOBAL.enable_acl_cache_deadlock_detection = on;
25+
SET @@GLOBAL.enable_acl_cache_deadlock_detection = DEFAULT;
26+
select @@GLOBAL.enable_acl_cache_deadlock_detection;
27+
28+
# FLUSH PRIVILEGES not required but just a quick check to see acl cache
29+
# is working properly
30+
FLUSH PRIVILEGES;
31+
32+
SET @@GLOBAL.enable_acl_cache_deadlock_detection = off;
33+
SET @@GLOBAL.enable_acl_cache_deadlock_detection = DEFAULT;
34+
select @@GLOBAL.enable_acl_cache_deadlock_detection;
35+
36+
# FLUSH PRIVILEGES not required but just a quick check to see acl cache
37+
# is working properly
38+
FLUSH PRIVILEGES;
39+
40+
###############################################################################
41+
# change the value of enable_acl_cache_deadlock_detection to a valid value #
42+
###############################################################################
43+
44+
SET @@GLOBAL.enable_acl_cache_deadlock_detection = on;
45+
select @@GLOBAL.enable_acl_cache_deadlock_detection;
46+
47+
# FLUSH PRIVILEGES not required but just a quick check to see acl cache
48+
# is working properly
49+
FLUSH PRIVILEGES;
50+
51+
SET @@GLOBAL.enable_acl_cache_deadlock_detection = off;
52+
select @@GLOBAL.enable_acl_cache_deadlock_detection;
53+
54+
# FLUSH PRIVILEGES not required but just a quick check to see acl cache
55+
# is working properly
56+
FLUSH PRIVILEGES;
57+
58+
###############################################################################
59+
# change the value of enable_acl_cache_deadlock_detection to an invalid value #
60+
###############################################################################
61+
62+
--Error ER_WRONG_VALUE_FOR_VAR
63+
SET @@GLOBAL.enable_acl_cache_deadlock_detection = something;
64+
select @@GLOBAL.enable_acl_cache_deadlock_detection;
65+
66+
--Error ER_WRONG_VALUE_FOR_VAR
67+
SET @@GLOBAL.enable_acl_cache_deadlock_detection = somethingelse;
68+
select @@GLOBAL.enable_acl_cache_deadlock_detection;
69+
70+
###############################
71+
# Restore initial value #
72+
###############################
73+
74+
SET @@GLOBAL.enable_acl_cache_deadlock_detection = @start_global_value;
75+
SELECT @@GLOBAL.enable_acl_cache_deadlock_detection;

mysql-test/t/all_persisted_variables.test

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,9 +61,10 @@ let $total_excluded_vars=`SELECT COUNT(*) FROM performance_schema.global_variabl
6161
'binlog_file_basedir',
6262
'binlog_index_basedir',
6363
'default_collation_for_utf8mb4_init',
64+
'disable_raft_log_repointing',
65+
'enable_acl_cache_deadlock_detection',
6466
'enable_acl_db_cache',
6567
'enable_acl_fast_lookup',
66-
'disable_raft_log_repointing',
6768
'group_replication_plugin_hooks',
6869
'gtid_committed',
6970
'gtid_purged_for_tailing',

sql/mdl.cc

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4030,6 +4030,30 @@ bool MDL_lock::visit_subgraph(MDL_ticket *waiting_ticket,
40304030
MDL_context *src_ctx = waiting_ticket->get_ctx();
40314031
bool result = true;
40324032

4033+
#ifndef EXTRA_CODE_FOR_UNIT_TESTING
4034+
if (!enable_acl_cache_deadlock_detection &&
4035+
key.mdl_namespace() == MDL_key::ACL_CACHE) {
4036+
/*
4037+
Deadlock detection for ACL_CACHE may lead to bad contention problems
4038+
in the connection path. In particular taking rdlock on m_rwlock
4039+
while doing deadlock traversal takes longer with more granted and
4040+
waiting tickets, but the releasing of these granted tickets are
4041+
themselves blocked on taking wrlock on m_rwlock, meanwhile more
4042+
connections are trying to come in and grant more tickets and
4043+
taking wrlock as well, so this becomes a serious lock contention
4044+
that you can't get out of if you keep new connection come in at
4045+
a fast pace. A easy way to trigger this is to run FLUSH
4046+
PRIVILEGES on a server that has lots of incoming connections with
4047+
lots of connection from acl_check_host / has_global_grant.
4048+
4049+
For ACL_CACHE, the deadlock detection in most cases doesn't provide
4050+
a ton of value in production so this allows turning it off for
4051+
ACL_CACHE only.
4052+
*/
4053+
return false;
4054+
}
4055+
#endif
4056+
40334057
mysql_prlock_rdlock(&m_rwlock);
40344058

40354059
/*

sql/mysqld.cc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1255,6 +1255,7 @@ double wait_for_hlc_sleep_scaling_factor = 0.75;
12551255
char *default_collation_for_utf8mb4_init = nullptr;
12561256
bool enable_blind_replace = false;
12571257
bool enable_acl_fast_lookup = false;
1258+
bool enable_acl_cache_deadlock_detection = false;
12581259
bool enable_acl_db_cache = true;
12591260
bool enable_super_log_bin_read_only = false;
12601261
ulonglong max_tmp_disk_usage{0};

sql/mysqld.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -286,6 +286,7 @@ extern ulong wait_for_hlc_sleep_threshold_ms;
286286
extern double wait_for_hlc_sleep_scaling_factor;
287287
extern char *default_collation_for_utf8mb4_init;
288288
extern bool enable_acl_fast_lookup;
289+
extern bool enable_acl_cache_deadlock_detection;
289290
extern bool enable_acl_db_cache;
290291
extern bool enable_super_log_bin_read_only;
291292

sql/sys_vars.cc

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8962,6 +8962,12 @@ static Sys_var_bool Sys_enable_acl_fast_lookup(
89628962
DEFAULT(false), NO_MUTEX_GUARD, NOT_IN_BINLOG,
89638963
ON_CHECK(check_enable_acl_fast_lookup));
89648964

8965+
static Sys_var_bool Sys_enable_acl_cache_deadlock_detection(
8966+
"enable_acl_cache_deadlock_detection",
8967+
"Enable deadlock detection on ACL_CACHE MDL lock",
8968+
NON_PERSIST GLOBAL_VAR(enable_acl_cache_deadlock_detection),
8969+
CMD_LINE(OPT_ARG), DEFAULT(true));
8970+
89658971
static Sys_var_bool Sys_enable_acl_db_cache(
89668972
"enable_acl_db_cache",
89678973
"Enable ACL db_cache lookup on (ip, user, db). Please issue "

0 commit comments

Comments
 (0)