Skip to content

Commit 55917db

Browse files
Alexander Aringgregkh
Alexander Aring
authored andcommitted
fs: dlm: don't call kernel_getpeername() in error_report()
[ Upstream commit 4c3d905 ] In some cases kernel_getpeername() will held the socket lock which is already held when the socket layer calls error_report() callback. Since commit 9dfc685 ("inet: remove races in inet{6}_getname()") this problem becomes more likely because the socket lock will be held always. You will see something like: bob9-u5 login: [ 562.316860] BUG: spinlock recursion on CPU#7, swapper/7/0 [ 562.318562] lock: 0xffff8f2284720088, .magic: dead4ead, .owner: swapper/7/0, .owner_cpu: 7 [ 562.319522] CPU: 7 PID: 0 Comm: swapper/7 Not tainted 5.15.0+ #135 [ 562.320346] Hardware name: Red Hat KVM/RHEL-AV, BIOS 1.13.0-2.module+el8.3.0+7353+9de0a3cc 04/01/2014 [ 562.321277] Call Trace: [ 562.321529] <IRQ> [ 562.321734] dump_stack_lvl+0x33/0x42 [ 562.322282] do_raw_spin_lock+0x8b/0xc0 [ 562.322674] lock_sock_nested+0x1e/0x50 [ 562.323057] inet_getname+0x39/0x110 [ 562.323425] ? sock_def_readable+0x80/0x80 [ 562.323838] lowcomms_error_report+0x63/0x260 [dlm] [ 562.324338] ? wait_for_completion_interruptible_timeout+0xd2/0x120 [ 562.324949] ? lock_timer_base+0x67/0x80 [ 562.325330] ? do_raw_spin_unlock+0x49/0xc0 [ 562.325735] ? _raw_spin_unlock_irqrestore+0x1e/0x40 [ 562.326218] ? del_timer+0x54/0x80 [ 562.326549] sk_error_report+0x12/0x70 [ 562.326919] tcp_validate_incoming+0x3c8/0x530 [ 562.327347] ? kvm_clock_read+0x14/0x30 [ 562.327718] ? ktime_get+0x3b/0xa0 [ 562.328055] tcp_rcv_established+0x121/0x660 [ 562.328466] tcp_v4_do_rcv+0x132/0x260 [ 562.328835] tcp_v4_rcv+0xcea/0xe20 [ 562.329173] ip_protocol_deliver_rcu+0x35/0x1f0 [ 562.329615] ip_local_deliver_finish+0x54/0x60 [ 562.330050] ip_local_deliver+0xf7/0x110 [ 562.330431] ? inet_rtm_getroute+0x211/0x840 [ 562.330848] ? ip_protocol_deliver_rcu+0x1f0/0x1f0 [ 562.331310] ip_rcv+0xe1/0xf0 [ 562.331603] ? ip_local_deliver+0x110/0x110 [ 562.332011] __netif_receive_skb_core+0x46a/0x1040 [ 562.332476] ? inet_gro_receive+0x263/0x2e0 [ 562.332885] __netif_receive_skb_list_core+0x13b/0x2c0 [ 562.333383] netif_receive_skb_list_internal+0x1c8/0x2f0 [ 562.333896] ? update_load_avg+0x7e/0x5e0 [ 562.334285] gro_normal_list.part.149+0x19/0x40 [ 562.334722] napi_complete_done+0x67/0x160 [ 562.335134] virtnet_poll+0x2ad/0x408 [virtio_net] [ 562.335644] __napi_poll+0x28/0x140 [ 562.336012] net_rx_action+0x23d/0x300 [ 562.336414] __do_softirq+0xf2/0x2ea [ 562.336803] irq_exit_rcu+0xc1/0xf0 [ 562.337173] common_interrupt+0xb9/0xd0 It is and was always forbidden to call kernel_getpeername() in context of error_report(). To get rid of the problem we access the destination address for the peer over the socket structure. While on it we fix to print out the destination port of the inet socket. Fixes: 1a31833 ("DLM: Replace nodeid_to_addr with kernel_getpeername") Reported-by: Bob Peterson <[email protected]> Signed-off-by: Alexander Aring <[email protected]> Signed-off-by: David Teigland <[email protected]> Signed-off-by: Sasha Levin <[email protected]>
1 parent 98923eb commit 55917db

File tree

1 file changed

+20
-22
lines changed

1 file changed

+20
-22
lines changed

fs/dlm/lowcomms.c

Lines changed: 20 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -471,42 +471,40 @@ int dlm_lowcomms_connect_node(int nodeid)
471471
static void lowcomms_error_report(struct sock *sk)
472472
{
473473
struct connection *con;
474-
struct sockaddr_storage saddr;
475474
void (*orig_report)(struct sock *) = NULL;
475+
struct inet_sock *inet;
476476

477477
read_lock_bh(&sk->sk_callback_lock);
478478
con = sock2con(sk);
479479
if (con == NULL)
480480
goto out;
481481

482482
orig_report = listen_sock.sk_error_report;
483-
if (kernel_getpeername(sk->sk_socket, (struct sockaddr *)&saddr) < 0) {
484-
printk_ratelimited(KERN_ERR "dlm: node %d: socket error "
485-
"sending to node %d, port %d, "
486-
"sk_err=%d/%d\n", dlm_our_nodeid(),
487-
con->nodeid, dlm_config.ci_tcp_port,
488-
sk->sk_err, sk->sk_err_soft);
489-
} else if (saddr.ss_family == AF_INET) {
490-
struct sockaddr_in *sin4 = (struct sockaddr_in *)&saddr;
491483

484+
inet = inet_sk(sk);
485+
switch (sk->sk_family) {
486+
case AF_INET:
492487
printk_ratelimited(KERN_ERR "dlm: node %d: socket error "
493-
"sending to node %d at %pI4, port %d, "
488+
"sending to node %d at %pI4, dport %d, "
494489
"sk_err=%d/%d\n", dlm_our_nodeid(),
495-
con->nodeid, &sin4->sin_addr.s_addr,
496-
dlm_config.ci_tcp_port, sk->sk_err,
490+
con->nodeid, &inet->inet_daddr,
491+
ntohs(inet->inet_dport), sk->sk_err,
497492
sk->sk_err_soft);
498-
} else {
499-
struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&saddr;
500-
493+
break;
494+
case AF_INET6:
501495
printk_ratelimited(KERN_ERR "dlm: node %d: socket error "
502-
"sending to node %d at %u.%u.%u.%u, "
503-
"port %d, sk_err=%d/%d\n", dlm_our_nodeid(),
504-
con->nodeid, sin6->sin6_addr.s6_addr32[0],
505-
sin6->sin6_addr.s6_addr32[1],
506-
sin6->sin6_addr.s6_addr32[2],
507-
sin6->sin6_addr.s6_addr32[3],
508-
dlm_config.ci_tcp_port, sk->sk_err,
496+
"sending to node %d at %pI6c, "
497+
"dport %d, sk_err=%d/%d\n", dlm_our_nodeid(),
498+
con->nodeid, &sk->sk_v6_daddr,
499+
ntohs(inet->inet_dport), sk->sk_err,
509500
sk->sk_err_soft);
501+
break;
502+
default:
503+
printk_ratelimited(KERN_ERR "dlm: node %d: socket error "
504+
"invalid socket family %d set, "
505+
"sk_err=%d/%d\n", dlm_our_nodeid(),
506+
sk->sk_family, sk->sk_err, sk->sk_err_soft);
507+
goto out;
510508
}
511509
out:
512510
read_unlock_bh(&sk->sk_callback_lock);

0 commit comments

Comments
 (0)