Skip to content

Commit e579d6c

Browse files
author
Mete Durlu
committed
s390/pci: Allow re-add of a reserved but not yet removed device
JIRA: https://issues.redhat.com/browse/RHEL-94944 commit 4b1815a Author: Niklas Schnelle <schnelle@linux.ibm.com> Date: Thu May 22 14:13:14 2025 +0200 s390/pci: Allow re-add of a reserved but not yet removed device The architecture assumes that PCI functions can be removed synchronously as PCI events are processed. This however clashes with the reference counting of struct pci_dev which allows device drivers to hold on to a struct pci_dev reference even as the underlying device is removed. To bridge this gap commit 2a671f7 ("s390/pci: fix use after free of zpci_dev") keeps the struct zpci_dev in ZPCI_FN_STATE_RESERVED state until common code releases the struct pci_dev. Only when all references are dropped, the struct zpci_dev can be removed and freed. Later commit a46044a ("s390/pci: fix zpci_zdev_put() on reserve") moved the deletion of the struct zpci_dev from the zpci_list in zpci_release_device() to the point where the device is reserved. This was done to prevent handling events for a device that is already being removed, e.g. when the platform generates both PCI event codes 0x304 and 0x308. In retrospect, deletion from the zpci_list in the release function without holding the zpci_list_lock was also racy. A side effect of this handling is that if the underlying device re-appears while the struct zpci_dev is in the ZPCI_FN_STATE_RESERVED state, the new and old instances of the struct zpci_dev and/or struct pci_dev may clash. For example when trying to create the IOMMU sysfs files for the new instance. In this case, re-adding the new instance is aborted. The old instance is removed, and the device will remain absent until the platform issues another event. Fix this by allowing the struct zpci_dev to be brought back up right until it is finally removed. To this end also keep the struct zpci_dev in the zpci_list until it is finally released when all references have been dropped. Deletion from the zpci_list from within the release function is made safe by using kref_put_lock() with the zpci_list_lock. This ensures that the releasing code holds the last reference. Cc: stable@vger.kernel.org Fixes: a46044a ("s390/pci: fix zpci_zdev_put() on reserve") Reviewed-by: Gerd Bayer <gbayer@linux.ibm.com> Tested-by: Gerd Bayer <gbayer@linux.ibm.com> Signed-off-by: Niklas Schnelle <schnelle@linux.ibm.com> Signed-off-by: Heiko Carstens <hca@linux.ibm.com> Signed-off-by: Mete Durlu <mdurlu@redhat.com>
1 parent ceff3dd commit e579d6c

File tree

3 files changed

+45
-16
lines changed

3 files changed

+45
-16
lines changed

arch/s390/pci/pci.c

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,13 @@ EXPORT_SYMBOL_GPL(zpci_aipb);
6969
struct airq_iv *zpci_aif_sbv;
7070
EXPORT_SYMBOL_GPL(zpci_aif_sbv);
7171

72+
void zpci_zdev_put(struct zpci_dev *zdev)
73+
{
74+
if (!zdev)
75+
return;
76+
kref_put_lock(&zdev->kref, zpci_release_device, &zpci_list_lock);
77+
}
78+
7279
struct zpci_dev *get_zdev_by_fid(u32 fid)
7380
{
7481
struct zpci_dev *tmp, *zdev = NULL;
@@ -919,21 +926,20 @@ int zpci_deconfigure_device(struct zpci_dev *zdev)
919926
* @zdev: the zpci_dev that was reserved
920927
*
921928
* Handle the case that a given zPCI function was reserved by another system.
922-
* After a call to this function the zpci_dev can not be found via
923-
* get_zdev_by_fid() anymore but may still be accessible via existing
924-
* references though it will not be functional anymore.
925929
*/
926930
void zpci_device_reserved(struct zpci_dev *zdev)
927931
{
928-
/*
929-
* Remove device from zpci_list as it is going away. This also
930-
* makes sure we ignore subsequent zPCI events for this device.
931-
*/
932-
spin_lock(&zpci_list_lock);
933-
list_del(&zdev->entry);
934-
spin_unlock(&zpci_list_lock);
932+
lockdep_assert_held(&zdev->state_lock);
933+
/* We may declare the device reserved multiple times */
934+
if (zdev->state == ZPCI_FN_STATE_RESERVED)
935+
return;
935936
zdev->state = ZPCI_FN_STATE_RESERVED;
936937
zpci_dbg(3, "rsv fid:%x\n", zdev->fid);
938+
/*
939+
* The underlying device is gone. Allow the zdev to be freed
940+
* as soon as all other references are gone by accounting for
941+
* the removal as a dropped reference.
942+
*/
937943
zpci_zdev_put(zdev);
938944
}
939945

@@ -942,6 +948,12 @@ void zpci_release_device(struct kref *kref)
942948
struct zpci_dev *zdev = container_of(kref, struct zpci_dev, kref);
943949

944950
WARN_ON(zdev->state != ZPCI_FN_STATE_RESERVED);
951+
/*
952+
* We already hold zpci_list_lock thanks to kref_put_lock().
953+
* This makes sure no new reference can be taken from the list.
954+
*/
955+
list_del(&zdev->entry);
956+
spin_unlock(&zpci_list_lock);
945957

946958
if (zdev->has_hp_slot)
947959
zpci_exit_slot(zdev);

arch/s390/pci/pci_bus.h

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,8 @@ int zpci_bus_scan_device(struct zpci_dev *zdev);
1717
void zpci_bus_remove_device(struct zpci_dev *zdev, bool set_error);
1818

1919
void zpci_release_device(struct kref *kref);
20-
static inline void zpci_zdev_put(struct zpci_dev *zdev)
21-
{
22-
if (zdev)
23-
kref_put(&zdev->kref, zpci_release_device);
24-
}
20+
21+
void zpci_zdev_put(struct zpci_dev *zdev);
2522

2623
static inline void zpci_zdev_get(struct zpci_dev *zdev)
2724
{

arch/s390/pci/pci_event.c

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -335,6 +335,22 @@ static void zpci_event_hard_deconfigured(struct zpci_dev *zdev, u32 fh)
335335
zdev->state = ZPCI_FN_STATE_STANDBY;
336336
}
337337

338+
static void zpci_event_reappear(struct zpci_dev *zdev)
339+
{
340+
lockdep_assert_held(&zdev->state_lock);
341+
/*
342+
* The zdev is in the reserved state. This means that it was presumed to
343+
* go away but there are still undropped references. Now, the platform
344+
* announced its availability again. Bring back the lingering zdev
345+
* to standby. This is safe because we hold a temporary reference
346+
* now so that it won't go away. Account for the re-appearance of the
347+
* underlying device by incrementing the reference count.
348+
*/
349+
zdev->state = ZPCI_FN_STATE_STANDBY;
350+
zpci_zdev_get(zdev);
351+
zpci_dbg(1, "rea fid:%x, fh:%x\n", zdev->fid, zdev->fh);
352+
}
353+
338354
static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
339355
{
340356
struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid);
@@ -358,8 +374,10 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
358374
break;
359375
}
360376
} else {
377+
if (zdev->state == ZPCI_FN_STATE_RESERVED)
378+
zpci_event_reappear(zdev);
361379
/* the configuration request may be stale */
362-
if (zdev->state != ZPCI_FN_STATE_STANDBY)
380+
else if (zdev->state != ZPCI_FN_STATE_STANDBY)
363381
break;
364382
zdev->state = ZPCI_FN_STATE_CONFIGURED;
365383
}
@@ -375,6 +393,8 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
375393
break;
376394
}
377395
} else {
396+
if (zdev->state == ZPCI_FN_STATE_RESERVED)
397+
zpci_event_reappear(zdev);
378398
zpci_update_fh(zdev, ccdf->fh);
379399
}
380400
break;

0 commit comments

Comments
 (0)