|
5 | 5 | *
|
6 | 6 | * Writer lock-stealing by Alex Shi <[email protected]>
|
7 | 7 | * and Michel Lespinasse <[email protected]>
|
| 8 | + * |
| 9 | + * Optimistic spinning by Tim Chen <[email protected]> |
| 10 | + * and Davidlohr Bueso <[email protected]>. Based on mutexes. |
8 | 11 | */
|
9 | 12 | #include <linux/rwsem.h>
|
10 | 13 | #include <linux/sched.h>
|
11 | 14 | #include <linux/init.h>
|
12 | 15 | #include <linux/export.h>
|
| 16 | +#include <linux/sched/rt.h> |
| 17 | + |
| 18 | +#include "mcs_spinlock.h" |
13 | 19 |
|
14 | 20 | /*
|
15 | 21 | * Guide to the rw_semaphore's count field for common values.
|
@@ -76,6 +82,10 @@ void __init_rwsem(struct rw_semaphore *sem, const char *name,
|
76 | 82 | sem->count = RWSEM_UNLOCKED_VALUE;
|
77 | 83 | raw_spin_lock_init(&sem->wait_lock);
|
78 | 84 | INIT_LIST_HEAD(&sem->wait_list);
|
| 85 | +#ifdef CONFIG_SMP |
| 86 | + sem->owner = NULL; |
| 87 | + sem->osq = NULL; |
| 88 | +#endif |
79 | 89 | }
|
80 | 90 |
|
81 | 91 | EXPORT_SYMBOL(__init_rwsem);
|
@@ -190,7 +200,7 @@ __rwsem_do_wake(struct rw_semaphore *sem, enum rwsem_wake_type wake_type)
|
190 | 200 | }
|
191 | 201 |
|
192 | 202 | /*
|
193 |
| - * wait for the read lock to be granted |
| 203 | + * Wait for the read lock to be granted |
194 | 204 | */
|
195 | 205 | __visible
|
196 | 206 | struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem)
|
@@ -237,64 +247,221 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem)
|
237 | 247 | return sem;
|
238 | 248 | }
|
239 | 249 |
|
| 250 | +static inline bool rwsem_try_write_lock(long count, struct rw_semaphore *sem) |
| 251 | +{ |
| 252 | + if (!(count & RWSEM_ACTIVE_MASK)) { |
| 253 | + /* try acquiring the write lock */ |
| 254 | + if (sem->count == RWSEM_WAITING_BIAS && |
| 255 | + cmpxchg(&sem->count, RWSEM_WAITING_BIAS, |
| 256 | + RWSEM_ACTIVE_WRITE_BIAS) == RWSEM_WAITING_BIAS) { |
| 257 | + if (!list_is_singular(&sem->wait_list)) |
| 258 | + rwsem_atomic_update(RWSEM_WAITING_BIAS, sem); |
| 259 | + return true; |
| 260 | + } |
| 261 | + } |
| 262 | + return false; |
| 263 | +} |
| 264 | + |
| 265 | +#ifdef CONFIG_SMP |
240 | 266 | /*
|
241 |
| - * wait until we successfully acquire the write lock |
| 267 | + * Try to acquire write lock before the writer has been put on wait queue. |
| 268 | + */ |
| 269 | +static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem) |
| 270 | +{ |
| 271 | + long old, count = ACCESS_ONCE(sem->count); |
| 272 | + |
| 273 | + while (true) { |
| 274 | + if (!(count == 0 || count == RWSEM_WAITING_BIAS)) |
| 275 | + return false; |
| 276 | + |
| 277 | + old = cmpxchg(&sem->count, count, count + RWSEM_ACTIVE_WRITE_BIAS); |
| 278 | + if (old == count) |
| 279 | + return true; |
| 280 | + |
| 281 | + count = old; |
| 282 | + } |
| 283 | +} |
| 284 | + |
| 285 | +static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem) |
| 286 | +{ |
| 287 | + struct task_struct *owner; |
| 288 | + bool on_cpu = true; |
| 289 | + |
| 290 | + if (need_resched()) |
| 291 | + return 0; |
| 292 | + |
| 293 | + rcu_read_lock(); |
| 294 | + owner = ACCESS_ONCE(sem->owner); |
| 295 | + if (owner) |
| 296 | + on_cpu = owner->on_cpu; |
| 297 | + rcu_read_unlock(); |
| 298 | + |
| 299 | + /* |
| 300 | + * If sem->owner is not set, the rwsem owner may have |
| 301 | + * just acquired it and not set the owner yet or the rwsem |
| 302 | + * has been released. |
| 303 | + */ |
| 304 | + return on_cpu; |
| 305 | +} |
| 306 | + |
| 307 | +static inline bool owner_running(struct rw_semaphore *sem, |
| 308 | + struct task_struct *owner) |
| 309 | +{ |
| 310 | + if (sem->owner != owner) |
| 311 | + return false; |
| 312 | + |
| 313 | + /* |
| 314 | + * Ensure we emit the owner->on_cpu, dereference _after_ checking |
| 315 | + * sem->owner still matches owner, if that fails, owner might |
| 316 | + * point to free()d memory, if it still matches, the rcu_read_lock() |
| 317 | + * ensures the memory stays valid. |
| 318 | + */ |
| 319 | + barrier(); |
| 320 | + |
| 321 | + return owner->on_cpu; |
| 322 | +} |
| 323 | + |
| 324 | +static noinline |
| 325 | +bool rwsem_spin_on_owner(struct rw_semaphore *sem, struct task_struct *owner) |
| 326 | +{ |
| 327 | + rcu_read_lock(); |
| 328 | + while (owner_running(sem, owner)) { |
| 329 | + if (need_resched()) |
| 330 | + break; |
| 331 | + |
| 332 | + arch_mutex_cpu_relax(); |
| 333 | + } |
| 334 | + rcu_read_unlock(); |
| 335 | + |
| 336 | + /* |
| 337 | + * We break out the loop above on need_resched() or when the |
| 338 | + * owner changed, which is a sign for heavy contention. Return |
| 339 | + * success only when sem->owner is NULL. |
| 340 | + */ |
| 341 | + return sem->owner == NULL; |
| 342 | +} |
| 343 | + |
| 344 | +static bool rwsem_optimistic_spin(struct rw_semaphore *sem) |
| 345 | +{ |
| 346 | + struct task_struct *owner; |
| 347 | + bool taken = false; |
| 348 | + |
| 349 | + preempt_disable(); |
| 350 | + |
| 351 | + /* sem->wait_lock should not be held when doing optimistic spinning */ |
| 352 | + if (!rwsem_can_spin_on_owner(sem)) |
| 353 | + goto done; |
| 354 | + |
| 355 | + if (!osq_lock(&sem->osq)) |
| 356 | + goto done; |
| 357 | + |
| 358 | + while (true) { |
| 359 | + owner = ACCESS_ONCE(sem->owner); |
| 360 | + if (owner && !rwsem_spin_on_owner(sem, owner)) |
| 361 | + break; |
| 362 | + |
| 363 | + /* wait_lock will be acquired if write_lock is obtained */ |
| 364 | + if (rwsem_try_write_lock_unqueued(sem)) { |
| 365 | + taken = true; |
| 366 | + break; |
| 367 | + } |
| 368 | + |
| 369 | + /* |
| 370 | + * When there's no owner, we might have preempted between the |
| 371 | + * owner acquiring the lock and setting the owner field. If |
| 372 | + * we're an RT task that will live-lock because we won't let |
| 373 | + * the owner complete. |
| 374 | + */ |
| 375 | + if (!owner && (need_resched() || rt_task(current))) |
| 376 | + break; |
| 377 | + |
| 378 | + /* |
| 379 | + * The cpu_relax() call is a compiler barrier which forces |
| 380 | + * everything in this loop to be re-loaded. We don't need |
| 381 | + * memory barriers as we'll eventually observe the right |
| 382 | + * values at the cost of a few extra spins. |
| 383 | + */ |
| 384 | + arch_mutex_cpu_relax(); |
| 385 | + } |
| 386 | + osq_unlock(&sem->osq); |
| 387 | +done: |
| 388 | + preempt_enable(); |
| 389 | + return taken; |
| 390 | +} |
| 391 | + |
| 392 | +#else |
| 393 | +static bool rwsem_optimistic_spin(struct rw_semaphore *sem) |
| 394 | +{ |
| 395 | + return false; |
| 396 | +} |
| 397 | +#endif |
| 398 | + |
| 399 | +/* |
| 400 | + * Wait until we successfully acquire the write lock |
242 | 401 | */
|
243 | 402 | __visible
|
244 | 403 | struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem)
|
245 | 404 | {
|
246 |
| - long count, adjustment = -RWSEM_ACTIVE_WRITE_BIAS; |
| 405 | + long count; |
| 406 | + bool waiting = true; /* any queued threads before us */ |
247 | 407 | struct rwsem_waiter waiter;
|
248 |
| - struct task_struct *tsk = current; |
249 | 408 |
|
250 |
| - /* set up my own style of waitqueue */ |
251 |
| - waiter.task = tsk; |
| 409 | + /* undo write bias from down_write operation, stop active locking */ |
| 410 | + count = rwsem_atomic_update(-RWSEM_ACTIVE_WRITE_BIAS, sem); |
| 411 | + |
| 412 | + /* do optimistic spinning and steal lock if possible */ |
| 413 | + if (rwsem_optimistic_spin(sem)) |
| 414 | + return sem; |
| 415 | + |
| 416 | + /* |
| 417 | + * Optimistic spinning failed, proceed to the slowpath |
| 418 | + * and block until we can acquire the sem. |
| 419 | + */ |
| 420 | + waiter.task = current; |
252 | 421 | waiter.type = RWSEM_WAITING_FOR_WRITE;
|
253 | 422 |
|
254 | 423 | raw_spin_lock_irq(&sem->wait_lock);
|
| 424 | + |
| 425 | + /* account for this before adding a new element to the list */ |
255 | 426 | if (list_empty(&sem->wait_list))
|
256 |
| - adjustment += RWSEM_WAITING_BIAS; |
| 427 | + waiting = false; |
| 428 | + |
257 | 429 | list_add_tail(&waiter.list, &sem->wait_list);
|
258 | 430 |
|
259 | 431 | /* we're now waiting on the lock, but no longer actively locking */
|
260 |
| - count = rwsem_atomic_update(adjustment, sem); |
| 432 | + if (waiting) { |
| 433 | + count = ACCESS_ONCE(sem->count); |
261 | 434 |
|
262 |
| - /* If there were already threads queued before us and there are no |
263 |
| - * active writers, the lock must be read owned; so we try to wake |
264 |
| - * any read locks that were queued ahead of us. */ |
265 |
| - if (count > RWSEM_WAITING_BIAS && |
266 |
| - adjustment == -RWSEM_ACTIVE_WRITE_BIAS) |
267 |
| - sem = __rwsem_do_wake(sem, RWSEM_WAKE_READERS); |
| 435 | + /* |
| 436 | + * If there were already threads queued before us and there are no |
| 437 | + * active writers, the lock must be read owned; so we try to wake |
| 438 | + * any read locks that were queued ahead of us. |
| 439 | + */ |
| 440 | + if (count > RWSEM_WAITING_BIAS) |
| 441 | + sem = __rwsem_do_wake(sem, RWSEM_WAKE_READERS); |
| 442 | + |
| 443 | + } else |
| 444 | + count = rwsem_atomic_update(RWSEM_WAITING_BIAS, sem); |
268 | 445 |
|
269 | 446 | /* wait until we successfully acquire the lock */
|
270 |
| - set_task_state(tsk, TASK_UNINTERRUPTIBLE); |
| 447 | + set_current_state(TASK_UNINTERRUPTIBLE); |
271 | 448 | while (true) {
|
272 |
| - if (!(count & RWSEM_ACTIVE_MASK)) { |
273 |
| - /* Try acquiring the write lock. */ |
274 |
| - count = RWSEM_ACTIVE_WRITE_BIAS; |
275 |
| - if (!list_is_singular(&sem->wait_list)) |
276 |
| - count += RWSEM_WAITING_BIAS; |
277 |
| - |
278 |
| - if (sem->count == RWSEM_WAITING_BIAS && |
279 |
| - cmpxchg(&sem->count, RWSEM_WAITING_BIAS, count) == |
280 |
| - RWSEM_WAITING_BIAS) |
281 |
| - break; |
282 |
| - } |
283 |
| - |
| 449 | + if (rwsem_try_write_lock(count, sem)) |
| 450 | + break; |
284 | 451 | raw_spin_unlock_irq(&sem->wait_lock);
|
285 | 452 |
|
286 | 453 | /* Block until there are no active lockers. */
|
287 | 454 | do {
|
288 | 455 | schedule();
|
289 |
| - set_task_state(tsk, TASK_UNINTERRUPTIBLE); |
| 456 | + set_current_state(TASK_UNINTERRUPTIBLE); |
290 | 457 | } while ((count = sem->count) & RWSEM_ACTIVE_MASK);
|
291 | 458 |
|
292 | 459 | raw_spin_lock_irq(&sem->wait_lock);
|
293 | 460 | }
|
| 461 | + __set_current_state(TASK_RUNNING); |
294 | 462 |
|
295 | 463 | list_del(&waiter.list);
|
296 | 464 | raw_spin_unlock_irq(&sem->wait_lock);
|
297 |
| - tsk->state = TASK_RUNNING; |
298 | 465 |
|
299 | 466 | return sem;
|
300 | 467 | }
|
|
0 commit comments