Skip to content

Commit da391fe

Browse files
scsi: target: Move delayed/ordered tracking to per CPU
JIRA: https://issues.redhat.com/browse/RHEL-111938 The atomic use from the delayed/ordered tracking is causing perf issues when using higher perf backend devices and multiple queues. This moves the values to a per CPU counter. Combined with the per CPU stats patch, this improves IOPS by up to 33% for 8K IOS when using 4 or more queues from the initiator. Signed-off-by: Mike Christie <michael.christie@oracle.com> Link: https://lore.kernel.org/r/20250424032741.16216-3-michael.christie@oracle.com Reviewed-by: Hannes Reinecke <hare@suse.de> Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com> (cherry picked from commit 268975a) Signed-off-by: Maurizio Lombardi <mlombard@redhat.com>
1 parent 6fc4a2a commit da391fe

File tree

3 files changed

+83
-60
lines changed

3 files changed

+83
-60
lines changed

drivers/target/target_core_device.c

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -700,6 +700,18 @@ static void scsi_dump_inquiry(struct se_device *dev)
700700
pr_debug(" Type: %s ", scsi_device_type(device_type));
701701
}
702702

703+
static void target_non_ordered_release(struct percpu_ref *ref)
704+
{
705+
struct se_device *dev = container_of(ref, struct se_device,
706+
non_ordered);
707+
unsigned long flags;
708+
709+
spin_lock_irqsave(&dev->delayed_cmd_lock, flags);
710+
if (!list_empty(&dev->delayed_cmd_list))
711+
schedule_work(&dev->delayed_cmd_work);
712+
spin_unlock_irqrestore(&dev->delayed_cmd_lock, flags);
713+
}
714+
703715
struct se_device *target_alloc_device(struct se_hba *hba, const char *name)
704716
{
705717
struct se_device *dev;
@@ -730,6 +742,9 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name)
730742
INIT_WORK(&q->sq.work, target_queued_submit_work);
731743
}
732744

745+
if (percpu_ref_init(&dev->non_ordered, target_non_ordered_release,
746+
PERCPU_REF_ALLOW_REINIT, GFP_KERNEL))
747+
goto free_queues;
733748

734749
dev->se_hba = hba;
735750
dev->transport = hba->backend->ops;
@@ -816,6 +831,8 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name)
816831

817832
return dev;
818833

834+
free_queues:
835+
kfree(dev->queues);
819836
free_stats:
820837
free_percpu(dev->stats);
821838
free_device:
@@ -1010,6 +1027,9 @@ void target_free_device(struct se_device *dev)
10101027

10111028
WARN_ON(!list_empty(&dev->dev_sep_list));
10121029

1030+
percpu_ref_exit(&dev->non_ordered);
1031+
cancel_work_sync(&dev->delayed_cmd_work);
1032+
10131033
if (target_dev_configured(dev)) {
10141034
dev->transport->destroy_device(dev);
10151035

drivers/target/target_core_transport.c

Lines changed: 61 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -2213,6 +2213,7 @@ static int target_write_prot_action(struct se_cmd *cmd)
22132213
static bool target_handle_task_attr(struct se_cmd *cmd)
22142214
{
22152215
struct se_device *dev = cmd->se_dev;
2216+
unsigned long flags;
22162217

22172218
if (dev->transport_flags & TRANSPORT_FLAG_PASSTHROUGH)
22182219
return false;
@@ -2225,43 +2226,40 @@ static bool target_handle_task_attr(struct se_cmd *cmd)
22252226
*/
22262227
switch (cmd->sam_task_attr) {
22272228
case TCM_HEAD_TAG:
2228-
atomic_inc_mb(&dev->non_ordered);
22292229
pr_debug("Added HEAD_OF_QUEUE for CDB: 0x%02x\n",
22302230
cmd->t_task_cdb[0]);
22312231
return false;
22322232
case TCM_ORDERED_TAG:
2233-
atomic_inc_mb(&dev->delayed_cmd_count);
2234-
22352233
pr_debug("Added ORDERED for CDB: 0x%02x to ordered list\n",
22362234
cmd->t_task_cdb[0]);
22372235
break;
22382236
default:
22392237
/*
22402238
* For SIMPLE and UNTAGGED Task Attribute commands
22412239
*/
2242-
atomic_inc_mb(&dev->non_ordered);
2243-
2244-
if (atomic_read(&dev->delayed_cmd_count) == 0)
2240+
retry:
2241+
if (percpu_ref_tryget_live(&dev->non_ordered))
22452242
return false;
2243+
22462244
break;
22472245
}
22482246

2249-
if (cmd->sam_task_attr != TCM_ORDERED_TAG) {
2250-
atomic_inc_mb(&dev->delayed_cmd_count);
2251-
/*
2252-
* We will account for this when we dequeue from the delayed
2253-
* list.
2254-
*/
2255-
atomic_dec_mb(&dev->non_ordered);
2247+
spin_lock_irqsave(&dev->delayed_cmd_lock, flags);
2248+
if (cmd->sam_task_attr == TCM_SIMPLE_TAG &&
2249+
!percpu_ref_is_dying(&dev->non_ordered)) {
2250+
spin_unlock_irqrestore(&dev->delayed_cmd_lock, flags);
2251+
/* We raced with the last ordered completion so retry. */
2252+
goto retry;
2253+
} else if (!percpu_ref_is_dying(&dev->non_ordered)) {
2254+
percpu_ref_kill(&dev->non_ordered);
22562255
}
22572256

2258-
spin_lock_irq(&cmd->t_state_lock);
2257+
spin_lock(&cmd->t_state_lock);
22592258
cmd->transport_state &= ~CMD_T_SENT;
2260-
spin_unlock_irq(&cmd->t_state_lock);
2259+
spin_unlock(&cmd->t_state_lock);
22612260

2262-
spin_lock(&dev->delayed_cmd_lock);
22632261
list_add_tail(&cmd->se_delayed_node, &dev->delayed_cmd_list);
2264-
spin_unlock(&dev->delayed_cmd_lock);
2262+
spin_unlock_irqrestore(&dev->delayed_cmd_lock, flags);
22652263

22662264
pr_debug("Added CDB: 0x%02x Task Attr: 0x%02x to delayed CMD listn",
22672265
cmd->t_task_cdb[0], cmd->sam_task_attr);
@@ -2313,41 +2311,52 @@ void target_do_delayed_work(struct work_struct *work)
23132311
while (!dev->ordered_sync_in_progress) {
23142312
struct se_cmd *cmd;
23152313

2316-
if (list_empty(&dev->delayed_cmd_list))
2314+
/*
2315+
* We can be woken up early/late due to races or the
2316+
* extra wake up we do when adding commands to the list.
2317+
* We check for both cases here.
2318+
*/
2319+
if (list_empty(&dev->delayed_cmd_list) ||
2320+
!percpu_ref_is_zero(&dev->non_ordered))
23172321
break;
23182322

23192323
cmd = list_entry(dev->delayed_cmd_list.next,
23202324
struct se_cmd, se_delayed_node);
2325+
cmd->se_cmd_flags |= SCF_TASK_ORDERED_SYNC;
2326+
cmd->transport_state |= CMD_T_SENT;
23212327

2322-
if (cmd->sam_task_attr == TCM_ORDERED_TAG) {
2323-
/*
2324-
* Check if we started with:
2325-
* [ordered] [simple] [ordered]
2326-
* and we are now at the last ordered so we have to wait
2327-
* for the simple cmd.
2328-
*/
2329-
if (atomic_read(&dev->non_ordered) > 0)
2330-
break;
2331-
2332-
dev->ordered_sync_in_progress = true;
2333-
}
2328+
dev->ordered_sync_in_progress = true;
23342329

23352330
list_del(&cmd->se_delayed_node);
2336-
atomic_dec_mb(&dev->delayed_cmd_count);
23372331
spin_unlock(&dev->delayed_cmd_lock);
23382332

2339-
if (cmd->sam_task_attr != TCM_ORDERED_TAG)
2340-
atomic_inc_mb(&dev->non_ordered);
2341-
2342-
cmd->transport_state |= CMD_T_SENT;
2343-
23442333
__target_execute_cmd(cmd, true);
2345-
23462334
spin_lock(&dev->delayed_cmd_lock);
23472335
}
23482336
spin_unlock(&dev->delayed_cmd_lock);
23492337
}
23502338

2339+
static void transport_complete_ordered_sync(struct se_cmd *cmd)
2340+
{
2341+
struct se_device *dev = cmd->se_dev;
2342+
unsigned long flags;
2343+
2344+
spin_lock_irqsave(&dev->delayed_cmd_lock, flags);
2345+
dev->dev_cur_ordered_id++;
2346+
2347+
pr_debug("Incremented dev_cur_ordered_id: %u for type %d\n",
2348+
dev->dev_cur_ordered_id, cmd->sam_task_attr);
2349+
2350+
dev->ordered_sync_in_progress = false;
2351+
2352+
if (list_empty(&dev->delayed_cmd_list))
2353+
percpu_ref_resurrect(&dev->non_ordered);
2354+
else
2355+
schedule_work(&dev->delayed_cmd_work);
2356+
2357+
spin_unlock_irqrestore(&dev->delayed_cmd_lock, flags);
2358+
}
2359+
23512360
/*
23522361
* Called from I/O completion to determine which dormant/delayed
23532362
* and ordered cmds need to have their tasks added to the execution queue.
@@ -2360,30 +2369,24 @@ static void transport_complete_task_attr(struct se_cmd *cmd)
23602369
return;
23612370

23622371
if (!(cmd->se_cmd_flags & SCF_TASK_ATTR_SET))
2363-
goto restart;
2364-
2365-
if (cmd->sam_task_attr == TCM_SIMPLE_TAG) {
2366-
atomic_dec_mb(&dev->non_ordered);
2367-
dev->dev_cur_ordered_id++;
2368-
} else if (cmd->sam_task_attr == TCM_HEAD_TAG) {
2369-
atomic_dec_mb(&dev->non_ordered);
2370-
dev->dev_cur_ordered_id++;
2371-
pr_debug("Incremented dev_cur_ordered_id: %u for HEAD_OF_QUEUE\n",
2372-
dev->dev_cur_ordered_id);
2373-
} else if (cmd->sam_task_attr == TCM_ORDERED_TAG) {
2374-
spin_lock(&dev->delayed_cmd_lock);
2375-
dev->ordered_sync_in_progress = false;
2376-
spin_unlock(&dev->delayed_cmd_lock);
2372+
return;
23772373

2378-
dev->dev_cur_ordered_id++;
2379-
pr_debug("Incremented dev_cur_ordered_id: %u for ORDERED\n",
2380-
dev->dev_cur_ordered_id);
2381-
}
23822374
cmd->se_cmd_flags &= ~SCF_TASK_ATTR_SET;
23832375

2384-
restart:
2385-
if (atomic_read(&dev->delayed_cmd_count) > 0)
2386-
schedule_work(&dev->delayed_cmd_work);
2376+
if (cmd->se_cmd_flags & SCF_TASK_ORDERED_SYNC) {
2377+
transport_complete_ordered_sync(cmd);
2378+
return;
2379+
}
2380+
2381+
switch (cmd->sam_task_attr) {
2382+
case TCM_SIMPLE_TAG:
2383+
percpu_ref_put(&dev->non_ordered);
2384+
break;
2385+
case TCM_ORDERED_TAG:
2386+
/* All ordered should have been executed as sync */
2387+
WARN_ON(1);
2388+
break;
2389+
}
23872390
}
23882391

23892392
static void transport_complete_qf(struct se_cmd *cmd)

include/target/target_core_base.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,7 @@ enum se_cmd_flags_table {
157157
SCF_USE_CPUID = (1 << 16),
158158
SCF_TASK_ATTR_SET = (1 << 17),
159159
SCF_TREAT_READ_AS_NORMAL = (1 << 18),
160+
SCF_TASK_ORDERED_SYNC = (1 << 19),
160161
};
161162

162163
/*
@@ -833,9 +834,8 @@ struct se_device {
833834
atomic_long_t aborts_no_task;
834835
struct se_dev_io_stats __percpu *stats;
835836
/* Active commands on this virtual SE device */
836-
atomic_t non_ordered;
837+
struct percpu_ref non_ordered;
837838
bool ordered_sync_in_progress;
838-
atomic_t delayed_cmd_count;
839839
atomic_t dev_qf_count;
840840
u32 export_count;
841841
spinlock_t delayed_cmd_lock;

0 commit comments

Comments
 (0)