Skip to content

Commit f4903a9

Browse files
committed
Merge: Updates for powerpc EEH error inject
MR: https://gitlab.com/redhat/centos-stream/src/kernel/centos-stream-9/-/merge_requests/5756 Description: Updates for powerpc EEH error inject JIRA: https://issues.redhat.com/browse/RHEL-61572 Build Info: https://brewweb.engineering.redhat.com/brew/taskinfo?taskID=65758000 Tested: Verified Brew build test kernel RPMs and confirmed issue is resovled Signed-off-by: Mamatha Inamdar <minamdar@redhat.com> Approved-by: Steve Best <sbest@redhat.com> Approved-by: Tony Camuso <tcamuso@redhat.com> Approved-by: CKI KWF Bot <cki-ci-bot+kwf-gitlab-com@redhat.com> Merged-by: Rado Vrbovsky <rvrbovsk@redhat.com>
2 parents f76935d + 9642abf commit f4903a9

File tree

3 files changed

+138
-99
lines changed

3 files changed

+138
-99
lines changed

arch/powerpc/include/asm/eeh.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -308,6 +308,7 @@ int eeh_pe_reset(struct eeh_pe *pe, int option, bool include_passed);
308308
int eeh_pe_configure(struct eeh_pe *pe);
309309
int eeh_pe_inject_err(struct eeh_pe *pe, int type, int func,
310310
unsigned long addr, unsigned long mask);
311+
int eeh_pe_inject_mmio_error(struct pci_dev *pdev);
311312

312313
/**
313314
* EEH_POSSIBLE_ERROR() -- test for possible MMIO failure.

arch/powerpc/kernel/eeh.c

Lines changed: 99 additions & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -1533,10 +1533,6 @@ int eeh_pe_inject_err(struct eeh_pe *pe, int type, int func,
15331533
if (!eeh_ops || !eeh_ops->err_inject)
15341534
return -ENOENT;
15351535

1536-
/* Check on PCI error type */
1537-
if (type != EEH_ERR_TYPE_32 && type != EEH_ERR_TYPE_64)
1538-
return -EINVAL;
1539-
15401536
/* Check on PCI error function */
15411537
if (func < EEH_ERR_FUNC_MIN || func > EEH_ERR_FUNC_MAX)
15421538
return -EINVAL;
@@ -1574,6 +1570,104 @@ static int proc_eeh_show(struct seq_file *m, void *v)
15741570
}
15751571
#endif /* CONFIG_PROC_FS */
15761572

1573+
static int eeh_break_device(struct pci_dev *pdev)
1574+
{
1575+
struct resource *bar = NULL;
1576+
void __iomem *mapped;
1577+
u16 old, bit;
1578+
int i, pos;
1579+
1580+
/* Do we have an MMIO BAR to disable? */
1581+
for (i = 0; i <= PCI_STD_RESOURCE_END; i++) {
1582+
struct resource *r = &pdev->resource[i];
1583+
1584+
if (!r->flags || !r->start)
1585+
continue;
1586+
if (r->flags & IORESOURCE_IO)
1587+
continue;
1588+
if (r->flags & IORESOURCE_UNSET)
1589+
continue;
1590+
1591+
bar = r;
1592+
break;
1593+
}
1594+
1595+
if (!bar) {
1596+
pci_err(pdev, "Unable to find Memory BAR to cause EEH with\n");
1597+
return -ENXIO;
1598+
}
1599+
1600+
pci_err(pdev, "Going to break: %pR\n", bar);
1601+
1602+
if (pdev->is_virtfn) {
1603+
#ifndef CONFIG_PCI_IOV
1604+
return -ENXIO;
1605+
#else
1606+
/*
1607+
* VFs don't have a per-function COMMAND register, so the best
1608+
* we can do is clear the Memory Space Enable bit in the PF's
1609+
* SRIOV control reg.
1610+
*
1611+
* Unfortunately, this requires that we have a PF (i.e doesn't
1612+
* work for a passed-through VF) and it has the potential side
1613+
* effect of also causing an EEH on every other VF under the
1614+
* PF. Oh well.
1615+
*/
1616+
pdev = pdev->physfn;
1617+
if (!pdev)
1618+
return -ENXIO; /* passed through VFs have no PF */
1619+
1620+
pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_SRIOV);
1621+
pos += PCI_SRIOV_CTRL;
1622+
bit = PCI_SRIOV_CTRL_MSE;
1623+
#endif /* !CONFIG_PCI_IOV */
1624+
} else {
1625+
bit = PCI_COMMAND_MEMORY;
1626+
pos = PCI_COMMAND;
1627+
}
1628+
1629+
/*
1630+
* Process here is:
1631+
*
1632+
* 1. Disable Memory space.
1633+
*
1634+
* 2. Perform an MMIO to the device. This should result in an error
1635+
* (CA / UR) being raised by the device which results in an EEH
1636+
* PE freeze. Using the in_8() accessor skips the eeh detection hook
1637+
* so the freeze hook so the EEH Detection machinery won't be
1638+
* triggered here. This is to match the usual behaviour of EEH
1639+
* where the HW will asynchronously freeze a PE and it's up to
1640+
* the kernel to notice and deal with it.
1641+
*
1642+
* 3. Turn Memory space back on. This is more important for VFs
1643+
* since recovery will probably fail if we don't. For normal
1644+
* the COMMAND register is reset as a part of re-initialising
1645+
* the device.
1646+
*
1647+
* Breaking stuff is the point so who cares if it's racy ;)
1648+
*/
1649+
pci_read_config_word(pdev, pos, &old);
1650+
1651+
mapped = ioremap(bar->start, PAGE_SIZE);
1652+
if (!mapped) {
1653+
pci_err(pdev, "Unable to map MMIO BAR %pR\n", bar);
1654+
return -ENXIO;
1655+
}
1656+
1657+
pci_write_config_word(pdev, pos, old & ~bit);
1658+
in_8(mapped);
1659+
pci_write_config_word(pdev, pos, old);
1660+
1661+
iounmap(mapped);
1662+
1663+
return 0;
1664+
}
1665+
1666+
int eeh_pe_inject_mmio_error(struct pci_dev *pdev)
1667+
{
1668+
return eeh_break_device(pdev);
1669+
}
1670+
15771671
#ifdef CONFIG_DEBUG_FS
15781672

15791673

@@ -1727,99 +1821,6 @@ static const struct file_operations eeh_dev_check_fops = {
17271821
.read = eeh_debugfs_dev_usage,
17281822
};
17291823

1730-
static int eeh_debugfs_break_device(struct pci_dev *pdev)
1731-
{
1732-
struct resource *bar = NULL;
1733-
void __iomem *mapped;
1734-
u16 old, bit;
1735-
int i, pos;
1736-
1737-
/* Do we have an MMIO BAR to disable? */
1738-
for (i = 0; i <= PCI_STD_RESOURCE_END; i++) {
1739-
struct resource *r = &pdev->resource[i];
1740-
1741-
if (!r->flags || !r->start)
1742-
continue;
1743-
if (r->flags & IORESOURCE_IO)
1744-
continue;
1745-
if (r->flags & IORESOURCE_UNSET)
1746-
continue;
1747-
1748-
bar = r;
1749-
break;
1750-
}
1751-
1752-
if (!bar) {
1753-
pci_err(pdev, "Unable to find Memory BAR to cause EEH with\n");
1754-
return -ENXIO;
1755-
}
1756-
1757-
pci_err(pdev, "Going to break: %pR\n", bar);
1758-
1759-
if (pdev->is_virtfn) {
1760-
#ifndef CONFIG_PCI_IOV
1761-
return -ENXIO;
1762-
#else
1763-
/*
1764-
* VFs don't have a per-function COMMAND register, so the best
1765-
* we can do is clear the Memory Space Enable bit in the PF's
1766-
* SRIOV control reg.
1767-
*
1768-
* Unfortunately, this requires that we have a PF (i.e doesn't
1769-
* work for a passed-through VF) and it has the potential side
1770-
* effect of also causing an EEH on every other VF under the
1771-
* PF. Oh well.
1772-
*/
1773-
pdev = pdev->physfn;
1774-
if (!pdev)
1775-
return -ENXIO; /* passed through VFs have no PF */
1776-
1777-
pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_SRIOV);
1778-
pos += PCI_SRIOV_CTRL;
1779-
bit = PCI_SRIOV_CTRL_MSE;
1780-
#endif /* !CONFIG_PCI_IOV */
1781-
} else {
1782-
bit = PCI_COMMAND_MEMORY;
1783-
pos = PCI_COMMAND;
1784-
}
1785-
1786-
/*
1787-
* Process here is:
1788-
*
1789-
* 1. Disable Memory space.
1790-
*
1791-
* 2. Perform an MMIO to the device. This should result in an error
1792-
* (CA / UR) being raised by the device which results in an EEH
1793-
* PE freeze. Using the in_8() accessor skips the eeh detection hook
1794-
* so the freeze hook so the EEH Detection machinery won't be
1795-
* triggered here. This is to match the usual behaviour of EEH
1796-
* where the HW will asyncronously freeze a PE and it's up to
1797-
* the kernel to notice and deal with it.
1798-
*
1799-
* 3. Turn Memory space back on. This is more important for VFs
1800-
* since recovery will probably fail if we don't. For normal
1801-
* the COMMAND register is reset as a part of re-initialising
1802-
* the device.
1803-
*
1804-
* Breaking stuff is the point so who cares if it's racy ;)
1805-
*/
1806-
pci_read_config_word(pdev, pos, &old);
1807-
1808-
mapped = ioremap(bar->start, PAGE_SIZE);
1809-
if (!mapped) {
1810-
pci_err(pdev, "Unable to map MMIO BAR %pR\n", bar);
1811-
return -ENXIO;
1812-
}
1813-
1814-
pci_write_config_word(pdev, pos, old & ~bit);
1815-
in_8(mapped);
1816-
pci_write_config_word(pdev, pos, old);
1817-
1818-
iounmap(mapped);
1819-
1820-
return 0;
1821-
}
1822-
18231824
static ssize_t eeh_dev_break_write(struct file *filp,
18241825
const char __user *user_buf,
18251826
size_t count, loff_t *ppos)
@@ -1831,7 +1832,7 @@ static ssize_t eeh_dev_break_write(struct file *filp,
18311832
if (IS_ERR(pdev))
18321833
return PTR_ERR(pdev);
18331834

1834-
ret = eeh_debugfs_break_device(pdev);
1835+
ret = eeh_break_device(pdev);
18351836
pci_dev_put(pdev);
18361837

18371838
if (ret < 0)

arch/powerpc/platforms/pseries/eeh_pseries.c

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -782,6 +782,43 @@ static int pseries_notify_resume(struct eeh_dev *edev)
782782
}
783783
#endif
784784

785+
/**
786+
* pseries_eeh_err_inject - Inject specified error to the indicated PE
787+
* @pe: the indicated PE
788+
* @type: error type
789+
* @func: specific error type
790+
* @addr: address
791+
* @mask: address mask
792+
* The routine is called to inject specified error, which is
793+
* determined by @type and @func, to the indicated PE
794+
*/
795+
static int pseries_eeh_err_inject(struct eeh_pe *pe, int type, int func,
796+
unsigned long addr, unsigned long mask)
797+
{
798+
struct eeh_dev *pdev;
799+
800+
/* Check on PCI error type */
801+
if (type != EEH_ERR_TYPE_32 && type != EEH_ERR_TYPE_64)
802+
return -EINVAL;
803+
804+
switch (func) {
805+
case EEH_ERR_FUNC_LD_MEM_ADDR:
806+
case EEH_ERR_FUNC_LD_MEM_DATA:
807+
case EEH_ERR_FUNC_ST_MEM_ADDR:
808+
case EEH_ERR_FUNC_ST_MEM_DATA:
809+
/* injects a MMIO error for all pdev's belonging to PE */
810+
pci_lock_rescan_remove();
811+
list_for_each_entry(pdev, &pe->edevs, entry)
812+
eeh_pe_inject_mmio_error(pdev->pdev);
813+
pci_unlock_rescan_remove();
814+
break;
815+
default:
816+
return -ERANGE;
817+
}
818+
819+
return 0;
820+
}
821+
785822
static struct eeh_ops pseries_eeh_ops = {
786823
.name = "pseries",
787824
.probe = pseries_eeh_probe,
@@ -790,7 +827,7 @@ static struct eeh_ops pseries_eeh_ops = {
790827
.reset = pseries_eeh_reset,
791828
.get_log = pseries_eeh_get_log,
792829
.configure_bridge = pseries_eeh_configure_bridge,
793-
.err_inject = NULL,
830+
.err_inject = pseries_eeh_err_inject,
794831
.read_config = pseries_eeh_read_config,
795832
.write_config = pseries_eeh_write_config,
796833
.next_error = NULL,

0 commit comments

Comments
 (0)