Skip to content

Commit 6b596e6

Browse files
author
Peter Zijlstra
committed
sched: Provide rt_mutex specific scheduler helpers
With PREEMPT_RT there is a rt_mutex recursion problem where sched_submit_work() can use an rtlock (aka spinlock_t). More specifically what happens is: mutex_lock() /* really rt_mutex */ ... __rt_mutex_slowlock_locked() task_blocks_on_rt_mutex() // enqueue current task as waiter // do PI chain walk rt_mutex_slowlock_block() schedule() sched_submit_work() ... spin_lock() /* really rtlock */ ... __rt_mutex_slowlock_locked() task_blocks_on_rt_mutex() // enqueue current task as waiter *AGAIN* // *CONFUSION* Fix this by making rt_mutex do the sched_submit_work() early, before it enqueues itself as a waiter -- before it even knows *if* it will wait. [[ basically Thomas' patch but with different naming and a few asserts added ]] Originally-by: Thomas Gleixner <[email protected]> Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Signed-off-by: Sebastian Andrzej Siewior <[email protected]> Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Link: https://lkml.kernel.org/r/[email protected]
1 parent de1474b commit 6b596e6

File tree

3 files changed

+39
-4
lines changed

3 files changed

+39
-4
lines changed

include/linux/sched.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -911,6 +911,9 @@ struct task_struct {
911911
* ->sched_remote_wakeup gets used, so it can be in this word.
912912
*/
913913
unsigned sched_remote_wakeup:1;
914+
#ifdef CONFIG_RT_MUTEXES
915+
unsigned sched_rt_mutex:1;
916+
#endif
914917

915918
/* Bit to tell LSMs we're in execve(): */
916919
unsigned in_execve:1;

include/linux/sched/rt.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,10 @@ static inline bool task_is_realtime(struct task_struct *tsk)
3030
}
3131

3232
#ifdef CONFIG_RT_MUTEXES
33+
extern void rt_mutex_pre_schedule(void);
34+
extern void rt_mutex_schedule(void);
35+
extern void rt_mutex_post_schedule(void);
36+
3337
/*
3438
* Must hold either p->pi_lock or task_rq(p)->lock.
3539
*/

kernel/sched/core.c

Lines changed: 32 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6723,9 +6723,6 @@ static inline void sched_submit_work(struct task_struct *tsk)
67236723
static DEFINE_WAIT_OVERRIDE_MAP(sched_map, LD_WAIT_CONFIG);
67246724
unsigned int task_flags;
67256725

6726-
if (task_is_running(tsk))
6727-
return;
6728-
67296726
/*
67306727
* Establish LD_WAIT_CONFIG context to ensure none of the code called
67316728
* will use a blocking primitive -- which would lead to recursion.
@@ -6783,7 +6780,12 @@ asmlinkage __visible void __sched schedule(void)
67836780
{
67846781
struct task_struct *tsk = current;
67856782

6786-
sched_submit_work(tsk);
6783+
#ifdef CONFIG_RT_MUTEXES
6784+
lockdep_assert(!tsk->sched_rt_mutex);
6785+
#endif
6786+
6787+
if (!task_is_running(tsk))
6788+
sched_submit_work(tsk);
67876789
__schedule_loop(SM_NONE);
67886790
sched_update_worker(tsk);
67896791
}
@@ -7044,6 +7046,32 @@ static void __setscheduler_prio(struct task_struct *p, int prio)
70447046

70457047
#ifdef CONFIG_RT_MUTEXES
70467048

7049+
/*
7050+
* Would be more useful with typeof()/auto_type but they don't mix with
7051+
* bit-fields. Since it's a local thing, use int. Keep the generic sounding
7052+
* name such that if someone were to implement this function we get to compare
7053+
* notes.
7054+
*/
7055+
#define fetch_and_set(x, v) ({ int _x = (x); (x) = (v); _x; })
7056+
7057+
void rt_mutex_pre_schedule(void)
7058+
{
7059+
lockdep_assert(!fetch_and_set(current->sched_rt_mutex, 1));
7060+
sched_submit_work(current);
7061+
}
7062+
7063+
void rt_mutex_schedule(void)
7064+
{
7065+
lockdep_assert(current->sched_rt_mutex);
7066+
__schedule_loop(SM_NONE);
7067+
}
7068+
7069+
void rt_mutex_post_schedule(void)
7070+
{
7071+
sched_update_worker(current);
7072+
lockdep_assert(fetch_and_set(current->sched_rt_mutex, 0));
7073+
}
7074+
70477075
static inline int __rt_effective_prio(struct task_struct *pi_task, int prio)
70487076
{
70497077
if (pi_task)

0 commit comments

Comments
 (0)