Skip to content

Commit 7a73c54

Browse files
David Jefferygregkh
David Jeffery
authored andcommitted
blk-mq: avoid double ->queue_rq() because of early timeout
[ Upstream commit 82c2294 ] David Jeffery found one double ->queue_rq() issue, so far it can be triggered in VM use case because of long vmexit latency or preempt latency of vCPU pthread or long page fault in vCPU pthread, then block IO req could be timed out before queuing the request to hardware but after calling blk_mq_start_request() during ->queue_rq(), then timeout handler may handle it by requeue, then double ->queue_rq() is caused, and kernel panic. So far, it is driver's responsibility to cover the race between timeout and completion, so it seems supposed to be solved in driver in theory, given driver has enough knowledge. But it is really one common problem, lots of driver could have similar issue, and could be hard to fix all affected drivers, even it isn't easy for driver to handle the race. So David suggests this patch by draining in-progress ->queue_rq() for solving this issue. Cc: Stefan Hajnoczi <[email protected]> Cc: Keith Busch <[email protected]> Cc: [email protected] Cc: Bart Van Assche <[email protected]> Signed-off-by: David Jeffery <[email protected]> Signed-off-by: Ming Lei <[email protected]> Reviewed-by: Bart Van Assche <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Jens Axboe <[email protected]> Signed-off-by: Sasha Levin <[email protected]>
1 parent 6595b6b commit 7a73c54

File tree

1 file changed

+44
-12
lines changed

1 file changed

+44
-12
lines changed

block/blk-mq.c

Lines changed: 44 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1442,7 +1442,13 @@ static void blk_mq_rq_timed_out(struct request *req)
14421442
blk_add_timer(req);
14431443
}
14441444

1445-
static bool blk_mq_req_expired(struct request *rq, unsigned long *next)
1445+
struct blk_expired_data {
1446+
bool has_timedout_rq;
1447+
unsigned long next;
1448+
unsigned long timeout_start;
1449+
};
1450+
1451+
static bool blk_mq_req_expired(struct request *rq, struct blk_expired_data *expired)
14461452
{
14471453
unsigned long deadline;
14481454

@@ -1452,13 +1458,13 @@ static bool blk_mq_req_expired(struct request *rq, unsigned long *next)
14521458
return false;
14531459

14541460
deadline = READ_ONCE(rq->deadline);
1455-
if (time_after_eq(jiffies, deadline))
1461+
if (time_after_eq(expired->timeout_start, deadline))
14561462
return true;
14571463

1458-
if (*next == 0)
1459-
*next = deadline;
1460-
else if (time_after(*next, deadline))
1461-
*next = deadline;
1464+
if (expired->next == 0)
1465+
expired->next = deadline;
1466+
else if (time_after(expired->next, deadline))
1467+
expired->next = deadline;
14621468
return false;
14631469
}
14641470

@@ -1472,7 +1478,7 @@ void blk_mq_put_rq_ref(struct request *rq)
14721478

14731479
static bool blk_mq_check_expired(struct request *rq, void *priv)
14741480
{
1475-
unsigned long *next = priv;
1481+
struct blk_expired_data *expired = priv;
14761482

14771483
/*
14781484
* blk_mq_queue_tag_busy_iter() has locked the request, so it cannot
@@ -1481,7 +1487,18 @@ static bool blk_mq_check_expired(struct request *rq, void *priv)
14811487
* it was completed and reallocated as a new request after returning
14821488
* from blk_mq_check_expired().
14831489
*/
1484-
if (blk_mq_req_expired(rq, next))
1490+
if (blk_mq_req_expired(rq, expired)) {
1491+
expired->has_timedout_rq = true;
1492+
return false;
1493+
}
1494+
return true;
1495+
}
1496+
1497+
static bool blk_mq_handle_expired(struct request *rq, void *priv)
1498+
{
1499+
struct blk_expired_data *expired = priv;
1500+
1501+
if (blk_mq_req_expired(rq, expired))
14851502
blk_mq_rq_timed_out(rq);
14861503
return true;
14871504
}
@@ -1490,7 +1507,9 @@ static void blk_mq_timeout_work(struct work_struct *work)
14901507
{
14911508
struct request_queue *q =
14921509
container_of(work, struct request_queue, timeout_work);
1493-
unsigned long next = 0;
1510+
struct blk_expired_data expired = {
1511+
.timeout_start = jiffies,
1512+
};
14941513
struct blk_mq_hw_ctx *hctx;
14951514
unsigned long i;
14961515

@@ -1510,10 +1529,23 @@ static void blk_mq_timeout_work(struct work_struct *work)
15101529
if (!percpu_ref_tryget(&q->q_usage_counter))
15111530
return;
15121531

1513-
blk_mq_queue_tag_busy_iter(q, blk_mq_check_expired, &next);
1532+
/* check if there is any timed-out request */
1533+
blk_mq_queue_tag_busy_iter(q, blk_mq_check_expired, &expired);
1534+
if (expired.has_timedout_rq) {
1535+
/*
1536+
* Before walking tags, we must ensure any submit started
1537+
* before the current time has finished. Since the submit
1538+
* uses srcu or rcu, wait for a synchronization point to
1539+
* ensure all running submits have finished
1540+
*/
1541+
blk_mq_wait_quiesce_done(q);
1542+
1543+
expired.next = 0;
1544+
blk_mq_queue_tag_busy_iter(q, blk_mq_handle_expired, &expired);
1545+
}
15141546

1515-
if (next != 0) {
1516-
mod_timer(&q->timeout, next);
1547+
if (expired.next != 0) {
1548+
mod_timer(&q->timeout, expired.next);
15171549
} else {
15181550
/*
15191551
* Request timeouts are handled as a forward rolling timer. If

0 commit comments

Comments
 (0)