Skip to content

Commit 1e56086

Browse files
jhovoldbjorn-helgaas
authored andcommitted
PCI/ASPM: Fix deadlock when enabling ASPM
A last minute revert in 6.7-final introduced a potential deadlock when enabling ASPM during probe of Qualcomm PCIe controllers as reported by lockdep: ============================================ WARNING: possible recursive locking detected 6.7.0 #40 Not tainted -------------------------------------------- kworker/u16:5/90 is trying to acquire lock: ffffacfa78ced000 (pci_bus_sem){++++}-{3:3}, at: pcie_aspm_pm_state_change+0x58/0xdc but task is already holding lock: ffffacfa78ced000 (pci_bus_sem){++++}-{3:3}, at: pci_walk_bus+0x34/0xbc other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(pci_bus_sem); lock(pci_bus_sem); *** DEADLOCK *** Call trace: print_deadlock_bug+0x25c/0x348 __lock_acquire+0x10a4/0x2064 lock_acquire+0x1e8/0x318 down_read+0x60/0x184 pcie_aspm_pm_state_change+0x58/0xdc pci_set_full_power_state+0xa8/0x114 pci_set_power_state+0xc4/0x120 qcom_pcie_enable_aspm+0x1c/0x3c [pcie_qcom] pci_walk_bus+0x64/0xbc qcom_pcie_host_post_init_2_7_0+0x28/0x34 [pcie_qcom] The deadlock can easily be reproduced on machines like the Lenovo ThinkPad X13s by adding a delay to increase the race window during asynchronous probe where another thread can take a write lock. Add a new pci_set_power_state_locked() and associated helper functions that can be called with the PCI bus semaphore held to avoid taking the read lock twice. Link: https://lore.kernel.org/r/[email protected] Link: https://lore.kernel.org/r/[email protected] Fixes: f93e71a ("Revert "PCI/ASPM: Remove pcie_aspm_pm_state_change()"") Signed-off-by: Johan Hovold <[email protected]> Signed-off-by: Bjorn Helgaas <[email protected]> Cc: <[email protected]> # 6.7
1 parent 6613476 commit 1e56086

File tree

6 files changed

+101
-50
lines changed

6 files changed

+101
-50
lines changed

drivers/pci/bus.c

Lines changed: 32 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -386,29 +386,17 @@ void pci_bus_add_devices(const struct pci_bus *bus)
386386
}
387387
EXPORT_SYMBOL(pci_bus_add_devices);
388388

389-
/** pci_walk_bus - walk devices on/under bus, calling callback.
390-
* @top bus whose devices should be walked
391-
* @cb callback to be called for each device found
392-
* @userdata arbitrary pointer to be passed to callback.
393-
*
394-
* Walk the given bus, including any bridged devices
395-
* on buses under this bus. Call the provided callback
396-
* on each device found.
397-
*
398-
* We check the return of @cb each time. If it returns anything
399-
* other than 0, we break out.
400-
*
401-
*/
402-
void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *),
403-
void *userdata)
389+
static void __pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *),
390+
void *userdata, bool locked)
404391
{
405392
struct pci_dev *dev;
406393
struct pci_bus *bus;
407394
struct list_head *next;
408395
int retval;
409396

410397
bus = top;
411-
down_read(&pci_bus_sem);
398+
if (!locked)
399+
down_read(&pci_bus_sem);
412400
next = top->devices.next;
413401
for (;;) {
414402
if (next == &bus->devices) {
@@ -431,10 +419,37 @@ void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *),
431419
if (retval)
432420
break;
433421
}
434-
up_read(&pci_bus_sem);
422+
if (!locked)
423+
up_read(&pci_bus_sem);
424+
}
425+
426+
/**
427+
* pci_walk_bus - walk devices on/under bus, calling callback.
428+
* @top: bus whose devices should be walked
429+
* @cb: callback to be called for each device found
430+
* @userdata: arbitrary pointer to be passed to callback
431+
*
432+
* Walk the given bus, including any bridged devices
433+
* on buses under this bus. Call the provided callback
434+
* on each device found.
435+
*
436+
* We check the return of @cb each time. If it returns anything
437+
* other than 0, we break out.
438+
*/
439+
void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), void *userdata)
440+
{
441+
__pci_walk_bus(top, cb, userdata, false);
435442
}
436443
EXPORT_SYMBOL_GPL(pci_walk_bus);
437444

445+
void pci_walk_bus_locked(struct pci_bus *top, int (*cb)(struct pci_dev *, void *), void *userdata)
446+
{
447+
lockdep_assert_held(&pci_bus_sem);
448+
449+
__pci_walk_bus(top, cb, userdata, true);
450+
}
451+
EXPORT_SYMBOL_GPL(pci_walk_bus_locked);
452+
438453
struct pci_bus *pci_bus_get(struct pci_bus *bus)
439454
{
440455
if (bus)

drivers/pci/controller/dwc/pcie-qcom.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -972,7 +972,7 @@ static int qcom_pcie_enable_aspm(struct pci_dev *pdev, void *userdata)
972972
* Downstream devices need to be in D0 state before enabling PCI PM
973973
* substates.
974974
*/
975-
pci_set_power_state(pdev, PCI_D0);
975+
pci_set_power_state_locked(pdev, PCI_D0);
976976
pci_enable_link_state_locked(pdev, PCIE_LINK_STATE_ALL);
977977

978978
return 0;

drivers/pci/pci.c

Lines changed: 52 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1354,6 +1354,7 @@ int pci_power_up(struct pci_dev *dev)
13541354
/**
13551355
* pci_set_full_power_state - Put a PCI device into D0 and update its state
13561356
* @dev: PCI device to power up
1357+
* @locked: whether pci_bus_sem is held
13571358
*
13581359
* Call pci_power_up() to put @dev into D0, read from its PCI_PM_CTRL register
13591360
* to confirm the state change, restore its BARs if they might be lost and
@@ -1363,7 +1364,7 @@ int pci_power_up(struct pci_dev *dev)
13631364
* to D0, it is more efficient to use pci_power_up() directly instead of this
13641365
* function.
13651366
*/
1366-
static int pci_set_full_power_state(struct pci_dev *dev)
1367+
static int pci_set_full_power_state(struct pci_dev *dev, bool locked)
13671368
{
13681369
u16 pmcsr;
13691370
int ret;
@@ -1399,7 +1400,7 @@ static int pci_set_full_power_state(struct pci_dev *dev)
13991400
}
14001401

14011402
if (dev->bus->self)
1402-
pcie_aspm_pm_state_change(dev->bus->self);
1403+
pcie_aspm_pm_state_change(dev->bus->self, locked);
14031404

14041405
return 0;
14051406
}
@@ -1428,10 +1429,22 @@ void pci_bus_set_current_state(struct pci_bus *bus, pci_power_t state)
14281429
pci_walk_bus(bus, __pci_dev_set_current_state, &state);
14291430
}
14301431

1432+
static void __pci_bus_set_current_state(struct pci_bus *bus, pci_power_t state, bool locked)
1433+
{
1434+
if (!bus)
1435+
return;
1436+
1437+
if (locked)
1438+
pci_walk_bus_locked(bus, __pci_dev_set_current_state, &state);
1439+
else
1440+
pci_walk_bus(bus, __pci_dev_set_current_state, &state);
1441+
}
1442+
14311443
/**
14321444
* pci_set_low_power_state - Put a PCI device into a low-power state.
14331445
* @dev: PCI device to handle.
14341446
* @state: PCI power state (D1, D2, D3hot) to put the device into.
1447+
* @locked: whether pci_bus_sem is held
14351448
*
14361449
* Use the device's PCI_PM_CTRL register to put it into a low-power state.
14371450
*
@@ -1442,7 +1455,7 @@ void pci_bus_set_current_state(struct pci_bus *bus, pci_power_t state)
14421455
* 0 if device already is in the requested state.
14431456
* 0 if device's power state has been successfully changed.
14441457
*/
1445-
static int pci_set_low_power_state(struct pci_dev *dev, pci_power_t state)
1458+
static int pci_set_low_power_state(struct pci_dev *dev, pci_power_t state, bool locked)
14461459
{
14471460
u16 pmcsr;
14481461

@@ -1496,29 +1509,12 @@ static int pci_set_low_power_state(struct pci_dev *dev, pci_power_t state)
14961509
pci_power_name(state));
14971510

14981511
if (dev->bus->self)
1499-
pcie_aspm_pm_state_change(dev->bus->self);
1512+
pcie_aspm_pm_state_change(dev->bus->self, locked);
15001513

15011514
return 0;
15021515
}
15031516

1504-
/**
1505-
* pci_set_power_state - Set the power state of a PCI device
1506-
* @dev: PCI device to handle.
1507-
* @state: PCI power state (D0, D1, D2, D3hot) to put the device into.
1508-
*
1509-
* Transition a device to a new power state, using the platform firmware and/or
1510-
* the device's PCI PM registers.
1511-
*
1512-
* RETURN VALUE:
1513-
* -EINVAL if the requested state is invalid.
1514-
* -EIO if device does not support PCI PM or its PM capabilities register has a
1515-
* wrong version, or device doesn't support the requested state.
1516-
* 0 if the transition is to D1 or D2 but D1 and D2 are not supported.
1517-
* 0 if device already is in the requested state.
1518-
* 0 if the transition is to D3 but D3 is not supported.
1519-
* 0 if device's power state has been successfully changed.
1520-
*/
1521-
int pci_set_power_state(struct pci_dev *dev, pci_power_t state)
1517+
static int __pci_set_power_state(struct pci_dev *dev, pci_power_t state, bool locked)
15221518
{
15231519
int error;
15241520

@@ -1542,7 +1538,7 @@ int pci_set_power_state(struct pci_dev *dev, pci_power_t state)
15421538
return 0;
15431539

15441540
if (state == PCI_D0)
1545-
return pci_set_full_power_state(dev);
1541+
return pci_set_full_power_state(dev, locked);
15461542

15471543
/*
15481544
* This device is quirked not to be put into D3, so don't put it in
@@ -1556,25 +1552,55 @@ int pci_set_power_state(struct pci_dev *dev, pci_power_t state)
15561552
* To put the device in D3cold, put it into D3hot in the native
15571553
* way, then put it into D3cold using platform ops.
15581554
*/
1559-
error = pci_set_low_power_state(dev, PCI_D3hot);
1555+
error = pci_set_low_power_state(dev, PCI_D3hot, locked);
15601556

15611557
if (pci_platform_power_transition(dev, PCI_D3cold))
15621558
return error;
15631559

15641560
/* Powering off a bridge may power off the whole hierarchy */
15651561
if (dev->current_state == PCI_D3cold)
1566-
pci_bus_set_current_state(dev->subordinate, PCI_D3cold);
1562+
__pci_bus_set_current_state(dev->subordinate, PCI_D3cold, locked);
15671563
} else {
1568-
error = pci_set_low_power_state(dev, state);
1564+
error = pci_set_low_power_state(dev, state, locked);
15691565

15701566
if (pci_platform_power_transition(dev, state))
15711567
return error;
15721568
}
15731569

15741570
return 0;
15751571
}
1572+
1573+
/**
1574+
* pci_set_power_state - Set the power state of a PCI device
1575+
* @dev: PCI device to handle.
1576+
* @state: PCI power state (D0, D1, D2, D3hot) to put the device into.
1577+
*
1578+
* Transition a device to a new power state, using the platform firmware and/or
1579+
* the device's PCI PM registers.
1580+
*
1581+
* RETURN VALUE:
1582+
* -EINVAL if the requested state is invalid.
1583+
* -EIO if device does not support PCI PM or its PM capabilities register has a
1584+
* wrong version, or device doesn't support the requested state.
1585+
* 0 if the transition is to D1 or D2 but D1 and D2 are not supported.
1586+
* 0 if device already is in the requested state.
1587+
* 0 if the transition is to D3 but D3 is not supported.
1588+
* 0 if device's power state has been successfully changed.
1589+
*/
1590+
int pci_set_power_state(struct pci_dev *dev, pci_power_t state)
1591+
{
1592+
return __pci_set_power_state(dev, state, false);
1593+
}
15761594
EXPORT_SYMBOL(pci_set_power_state);
15771595

1596+
int pci_set_power_state_locked(struct pci_dev *dev, pci_power_t state)
1597+
{
1598+
lockdep_assert_held(&pci_bus_sem);
1599+
1600+
return __pci_set_power_state(dev, state, true);
1601+
}
1602+
EXPORT_SYMBOL(pci_set_power_state_locked);
1603+
15781604
#define PCI_EXP_SAVE_REGS 7
15791605

15801606
static struct pci_cap_saved_state *_pci_find_saved_cap(struct pci_dev *pci_dev,

drivers/pci/pci.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -571,12 +571,12 @@ int pcie_retrain_link(struct pci_dev *pdev, bool use_lt);
571571
#ifdef CONFIG_PCIEASPM
572572
void pcie_aspm_init_link_state(struct pci_dev *pdev);
573573
void pcie_aspm_exit_link_state(struct pci_dev *pdev);
574-
void pcie_aspm_pm_state_change(struct pci_dev *pdev);
574+
void pcie_aspm_pm_state_change(struct pci_dev *pdev, bool locked);
575575
void pcie_aspm_powersave_config_link(struct pci_dev *pdev);
576576
#else
577577
static inline void pcie_aspm_init_link_state(struct pci_dev *pdev) { }
578578
static inline void pcie_aspm_exit_link_state(struct pci_dev *pdev) { }
579-
static inline void pcie_aspm_pm_state_change(struct pci_dev *pdev) { }
579+
static inline void pcie_aspm_pm_state_change(struct pci_dev *pdev, bool locked) { }
580580
static inline void pcie_aspm_powersave_config_link(struct pci_dev *pdev) { }
581581
#endif
582582

drivers/pci/pcie/aspm.c

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1003,8 +1003,11 @@ void pcie_aspm_exit_link_state(struct pci_dev *pdev)
10031003
up_read(&pci_bus_sem);
10041004
}
10051005

1006-
/* @pdev: the root port or switch downstream port */
1007-
void pcie_aspm_pm_state_change(struct pci_dev *pdev)
1006+
/*
1007+
* @pdev: the root port or switch downstream port
1008+
* @locked: whether pci_bus_sem is held
1009+
*/
1010+
void pcie_aspm_pm_state_change(struct pci_dev *pdev, bool locked)
10081011
{
10091012
struct pcie_link_state *link = pdev->link_state;
10101013

@@ -1014,12 +1017,14 @@ void pcie_aspm_pm_state_change(struct pci_dev *pdev)
10141017
* Devices changed PM state, we should recheck if latency
10151018
* meets all functions' requirement
10161019
*/
1017-
down_read(&pci_bus_sem);
1020+
if (!locked)
1021+
down_read(&pci_bus_sem);
10181022
mutex_lock(&aspm_lock);
10191023
pcie_update_aspm_capable(link->root);
10201024
pcie_config_aspm_path(link);
10211025
mutex_unlock(&aspm_lock);
1022-
up_read(&pci_bus_sem);
1026+
if (!locked)
1027+
up_read(&pci_bus_sem);
10231028
}
10241029

10251030
void pcie_aspm_powersave_config_link(struct pci_dev *pdev)

include/linux/pci.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1422,6 +1422,7 @@ int pci_load_and_free_saved_state(struct pci_dev *dev,
14221422
struct pci_saved_state **state);
14231423
int pci_platform_power_transition(struct pci_dev *dev, pci_power_t state);
14241424
int pci_set_power_state(struct pci_dev *dev, pci_power_t state);
1425+
int pci_set_power_state_locked(struct pci_dev *dev, pci_power_t state);
14251426
pci_power_t pci_choose_state(struct pci_dev *dev, pm_message_t state);
14261427
bool pci_pme_capable(struct pci_dev *dev, pci_power_t state);
14271428
void pci_pme_active(struct pci_dev *dev, bool enable);
@@ -1625,6 +1626,8 @@ int pci_scan_bridge(struct pci_bus *bus, struct pci_dev *dev, int max,
16251626

16261627
void pci_walk_bus(struct pci_bus *top, int (*cb)(struct pci_dev *, void *),
16271628
void *userdata);
1629+
void pci_walk_bus_locked(struct pci_bus *top, int (*cb)(struct pci_dev *, void *),
1630+
void *userdata);
16281631
int pci_cfg_space_size(struct pci_dev *dev);
16291632
unsigned char pci_bus_max_busnr(struct pci_bus *bus);
16301633
void pci_setup_bridge(struct pci_bus *bus);
@@ -2025,6 +2028,8 @@ static inline int pci_save_state(struct pci_dev *dev) { return 0; }
20252028
static inline void pci_restore_state(struct pci_dev *dev) { }
20262029
static inline int pci_set_power_state(struct pci_dev *dev, pci_power_t state)
20272030
{ return 0; }
2031+
static inline int pci_set_power_state_locked(struct pci_dev *dev, pci_power_t state)
2032+
{ return 0; }
20282033
static inline int pci_wake_from_d3(struct pci_dev *dev, bool enable)
20292034
{ return 0; }
20302035
static inline pci_power_t pci_choose_state(struct pci_dev *dev,

0 commit comments

Comments
 (0)