Skip to content

Commit c5afac5

Browse files
kvaneeshSasha Levin
authored and
Sasha Levin
committed
powerpc/nvdimm: Pick nearby online node if the device node is not online
[ Upstream commit da1115f ] Currently, nvdimm subsystem expects the device numa node for SCM device to be an online node. It also doesn't try to bring the device numa node online. Hence if we use a non-online numa node as device node we hit crashes like below. This is because we try to access uninitialized NODE_DATA in different code paths. cpu 0x0: Vector: 300 (Data Access) at [c0000000fac53170] pc: c0000000004bbc50: ___slab_alloc+0x120/0xca0 lr: c0000000004bc834: __slab_alloc+0x64/0xc0 sp: c0000000fac53400 msr: 8000000002009033 dar: 73e8 dsisr: 80000 current = 0xc0000000fabb6d80 paca = 0xc000000003870000 irqmask: 0x03 irq_happened: 0x01 pid = 7, comm = kworker/u16:0 Linux version 5.2.0-06234-g76bd729b2644 (kvaneesh@ltc-boston123) (gcc version 7.4.0 (Ubuntu 7.4.0-1ubuntu1~18.04.1)) #135 SMP Thu Jul 11 05:36:30 CDT 2019 enter ? for help [link register ] c0000000004bc834 __slab_alloc+0x64/0xc0 [c0000000fac53400] c0000000fac53480 (unreliable) [c0000000fac53500] c0000000004bc818 __slab_alloc+0x48/0xc0 [c0000000fac53560] c0000000004c30a0 __kmalloc_node_track_caller+0x3c0/0x6b0 [c0000000fac535d0] c000000000cfafe4 devm_kmalloc+0x74/0xc0 [c0000000fac53600] c000000000d69434 nd_region_activate+0x144/0x560 [c0000000fac536d0] c000000000d6b19c nd_region_probe+0x17c/0x370 [c0000000fac537b0] c000000000d6349c nvdimm_bus_probe+0x10c/0x230 [c0000000fac53840] c000000000cf3cc4 really_probe+0x254/0x4e0 [c0000000fac538d0] c000000000cf429c driver_probe_device+0x16c/0x1e0 [c0000000fac53950] c000000000cf0b44 bus_for_each_drv+0x94/0x130 [c0000000fac539b0] c000000000cf392c __device_attach+0xdc/0x200 [c0000000fac53a50] c000000000cf231c bus_probe_device+0x4c/0xf0 [c0000000fac53a90] c000000000ced268 device_add+0x528/0x810 [c0000000fac53b60] c000000000d62a58 nd_async_device_register+0x28/0xa0 [c0000000fac53bd0] c0000000001ccb8c async_run_entry_fn+0xcc/0x1f0 [c0000000fac53c50] c0000000001bcd9c process_one_work+0x46c/0x860 [c0000000fac53d20] c0000000001bd4f4 worker_thread+0x364/0x5f0 [c0000000fac53db0] c0000000001c7260 kthread+0x1b0/0x1c0 [c0000000fac53e20] c00000000000b954 ret_from_kernel_thread+0x5c/0x68 The patch tries to fix this by picking the nearest online node as the SCM node. This does have a problem of us losing the information that SCM node is equidistant from two other online nodes. If applications need to understand these fine-grained details we should express then like x86 does via /sys/devices/system/node/nodeX/accessY/initiators/ With the patch we get # numactl -H available: 2 nodes (0-1) node 0 cpus: node 0 size: 0 MB node 0 free: 0 MB node 1 cpus: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 node 1 size: 130865 MB node 1 free: 129130 MB node distances: node 0 1 0: 10 20 1: 20 10 # cat /sys/bus/nd/devices/region0/numa_node 0 # dmesg | grep papr_scm [ 91.332305] papr_scm ibm,persistent-memory:ibm,pmemory@44104001: Region registered with target node 2 and online node 0 Signed-off-by: Aneesh Kumar K.V <[email protected]> Signed-off-by: Michael Ellerman <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Sasha Levin <[email protected]>
1 parent ec1da61 commit c5afac5

File tree

1 file changed

+27
-2
lines changed

1 file changed

+27
-2
lines changed

arch/powerpc/platforms/pseries/papr_scm.c

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -199,12 +199,32 @@ static const struct attribute_group *papr_scm_dimm_groups[] = {
199199
NULL,
200200
};
201201

202+
static inline int papr_scm_node(int node)
203+
{
204+
int min_dist = INT_MAX, dist;
205+
int nid, min_node;
206+
207+
if ((node == NUMA_NO_NODE) || node_online(node))
208+
return node;
209+
210+
min_node = first_online_node;
211+
for_each_online_node(nid) {
212+
dist = node_distance(node, nid);
213+
if (dist < min_dist) {
214+
min_dist = dist;
215+
min_node = nid;
216+
}
217+
}
218+
return min_node;
219+
}
220+
202221
static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
203222
{
204223
struct device *dev = &p->pdev->dev;
205224
struct nd_mapping_desc mapping;
206225
struct nd_region_desc ndr_desc;
207226
unsigned long dimm_flags;
227+
int target_nid, online_nid;
208228

209229
p->bus_desc.ndctl = papr_scm_ndctl;
210230
p->bus_desc.module = THIS_MODULE;
@@ -243,8 +263,10 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
243263

244264
memset(&ndr_desc, 0, sizeof(ndr_desc));
245265
ndr_desc.attr_groups = region_attr_groups;
246-
ndr_desc.numa_node = dev_to_node(&p->pdev->dev);
247-
ndr_desc.target_node = ndr_desc.numa_node;
266+
target_nid = dev_to_node(&p->pdev->dev);
267+
online_nid = papr_scm_node(target_nid);
268+
ndr_desc.numa_node = online_nid;
269+
ndr_desc.target_node = target_nid;
248270
ndr_desc.res = &p->res;
249271
ndr_desc.of_node = p->dn;
250272
ndr_desc.provider_data = p;
@@ -259,6 +281,9 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
259281
ndr_desc.res, p->dn);
260282
goto err;
261283
}
284+
if (target_nid != online_nid)
285+
dev_info(dev, "Region registered with target node %d and online node %d",
286+
target_nid, online_nid);
262287

263288
return 0;
264289

0 commit comments

Comments
 (0)