Skip to content

Commit 013f247

Browse files
mripardpopcornmix
authored andcommitted
drm/vc4: hvs: Defer dlist slots deallocation
During normal operations, the cursor position update is done through an asynchronous plane update, which on the vc4 driver basically just modifies the right dlist word to move the plane to the new coordinates. However, when we have the overscan margins setup, we fall back to a regular commit when we are next to the edges. And since that commit happens to be on a cursor plane, it's considered a legacy cursor update by KMS. The main difference it makes is that it won't wait for its completion (ie, next vblank) before returning. This means if we have multiple commits happening in rapid succession, we can have several of them happening before the next vblank. In parallel, our dlist allocation is tied to a CRTC state, and each time we do a commit we end up with a new CRTC state, with the previous one being freed. This means that we free our previous dlist entry (but don't clear it though) every time a new one is being committed. Now, if we were to have two commits happening before the next vblank, we could end up freeing reusing the same dlist entries before the next vblank. Indeed, we would start from an initial state taking, for example, the dlist entries 10 to 20, then start a commit taking the entries 20 to 30 and setting the dlist pointer to 20, and freeing the dlist entries 10 to 20. However, since we haven't reach vblank yet, the HVS is still using the entries 10 to 20. If we were to make a new commit now, chances are the allocator are going to give the 10 to 20 entries back, and we would change their content to match the new state. If vblank hasn't happened yet, we just corrupted the active dlist entries. A first attempt to solve this was made by creating an intermediate dlist buffer to store the current (ie, as of the last commit) dlist content, that we would update each time the HVS is done with a frame. However, if the interrupt handler missed the vblank window, we would end up copying our intermediate dlist to the hardware one during the composition, essentially creating the same issue. Since making sure that our interrupt handler runs within a fixed, constrained, time window would require to make Linux a real-time kernel, this seems a bit out of scope. Instead, we can work around our original issue by keeping the dlist slots allocation longer. That way, we won't reuse a dlist slot while it's still in flight. In order to achieve this, instead of freeing the dlist slot when its associated CRTC state is destroyed, we'll queue it in a list. A naive implementation would free the buffers in that queue when we get our end of frame interrupt. However, there's still a race since, just like in the shadow dlist case, we don't control when the handler for that interrupt is going to run. Thus, we can end up with a commit adding an old dlist allocation to our queue during the window between our actual interrupt and when our handler will run. And since that buffer is still being used for the composition of the current frame, we can't free it right away, exposing us to the original bug. Fortunately for us, the hardware provides a frame counter that is increased each time the first line of a frame is being generated. Associating the frame counter the image is supposed to go away to the allocation, and then only deallocate buffers that have a counter below or equal to the one we see when the deallocation code should prevent the above race from occuring. Signed-off-by: Maxime Ripard <[email protected]>
1 parent 40e2914 commit 013f247

File tree

3 files changed

+186
-23
lines changed

3 files changed

+186
-23
lines changed

drivers/gpu/drm/vc4/vc4_crtc.c

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1096,14 +1096,8 @@ void vc4_crtc_destroy_state(struct drm_crtc *crtc,
10961096
struct vc4_dev *vc4 = to_vc4_dev(crtc->dev);
10971097
struct vc4_crtc_state *vc4_state = to_vc4_crtc_state(state);
10981098

1099-
if (drm_mm_node_allocated(&vc4_state->mm)) {
1100-
unsigned long flags;
1101-
1102-
spin_lock_irqsave(&vc4->hvs->mm_lock, flags);
1103-
drm_mm_remove_node(&vc4_state->mm);
1104-
spin_unlock_irqrestore(&vc4->hvs->mm_lock, flags);
1105-
1106-
}
1099+
vc4_hvs_mark_dlist_entry_stale(vc4->hvs, vc4_state->mm);
1100+
vc4_state->mm = NULL;
11071101

11081102
drm_atomic_helper_crtc_destroy_state(crtc, state);
11091103
}

drivers/gpu/drm/vc4/vc4_drv.h

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -346,6 +346,9 @@ struct vc4_hvs {
346346
struct drm_mm lbm_mm;
347347
spinlock_t mm_lock;
348348

349+
struct list_head stale_dlist_entries;
350+
struct work_struct free_dlist_work;
351+
349352
struct drm_mm_node mitchell_netravali_filter;
350353

351354
struct debugfs_regset32 regset;
@@ -648,10 +651,16 @@ struct drm_connector *vc4_get_crtc_connector(struct drm_crtc *crtc,
648651
struct drm_encoder *vc4_get_crtc_encoder(struct drm_crtc *crtc,
649652
struct drm_crtc_state *state);
650653

654+
struct vc4_hvs_dlist_allocation {
655+
struct list_head node;
656+
struct drm_mm_node mm_node;
657+
unsigned int channel;
658+
u8 target_frame_count;
659+
};
660+
651661
struct vc4_crtc_state {
652662
struct drm_crtc_state base;
653-
/* Dlist area for this CRTC configuration. */
654-
struct drm_mm_node mm;
663+
struct vc4_hvs_dlist_allocation *mm;
655664
bool txp_armed;
656665
unsigned int assigned_channel;
657666

@@ -1077,6 +1086,8 @@ struct vc4_hvs *__vc4_hvs_alloc(struct vc4_dev *vc4, struct platform_device *pde
10771086
void vc4_hvs_stop_channel(struct vc4_hvs *hvs, unsigned int output);
10781087
int vc4_hvs_get_fifo_from_output(struct vc4_hvs *hvs, unsigned int output);
10791088
u8 vc4_hvs_get_fifo_frame_count(struct vc4_hvs *hvs, unsigned int fifo);
1089+
void vc4_hvs_mark_dlist_entry_stale(struct vc4_hvs *hvs,
1090+
struct vc4_hvs_dlist_allocation *alloc);
10801091
int vc4_hvs_atomic_check(struct drm_crtc *crtc, struct drm_atomic_state *state);
10811092
void vc4_hvs_atomic_begin(struct drm_crtc *crtc, struct drm_atomic_state *state);
10821093
void vc4_hvs_atomic_enable(struct drm_crtc *crtc, struct drm_atomic_state *state);

drivers/gpu/drm/vc4/vc4_hvs.c

Lines changed: 171 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -412,6 +412,152 @@ static void vc5_hvs_update_gamma_lut(struct vc4_hvs *hvs,
412412
vc5_hvs_lut_load(hvs, vc4_crtc);
413413
}
414414

415+
static void vc4_hvs_irq_enable_eof(const struct vc4_hvs *hvs,
416+
unsigned int channel)
417+
{
418+
struct vc4_dev *vc4 = hvs->vc4;
419+
u32 irq_mask = vc4->is_vc5 ?
420+
SCALER5_DISPCTRL_DSPEIEOF(channel) :
421+
SCALER_DISPCTRL_DSPEIEOF(channel);
422+
423+
HVS_WRITE(SCALER_DISPCTRL,
424+
HVS_READ(SCALER_DISPCTRL) | irq_mask);
425+
}
426+
427+
static void vc4_hvs_irq_clear_eof(const struct vc4_hvs *hvs,
428+
unsigned int channel)
429+
{
430+
struct vc4_dev *vc4 = hvs->vc4;
431+
u32 irq_mask = vc4->is_vc5 ?
432+
SCALER5_DISPCTRL_DSPEIEOF(channel) :
433+
SCALER_DISPCTRL_DSPEIEOF(channel);
434+
435+
HVS_WRITE(SCALER_DISPCTRL,
436+
HVS_READ(SCALER_DISPCTRL) & ~irq_mask);
437+
}
438+
439+
static struct vc4_hvs_dlist_allocation *
440+
vc4_hvs_alloc_dlist_entry(struct vc4_hvs *hvs,
441+
unsigned int channel,
442+
size_t dlist_count)
443+
{
444+
struct vc4_hvs_dlist_allocation *alloc;
445+
unsigned long flags;
446+
int ret;
447+
448+
if (channel == VC4_HVS_CHANNEL_DISABLED)
449+
return NULL;
450+
451+
alloc = kzalloc(sizeof(*alloc), GFP_KERNEL);
452+
if (!alloc)
453+
return ERR_PTR(-ENOMEM);
454+
455+
spin_lock_irqsave(&hvs->mm_lock, flags);
456+
ret = drm_mm_insert_node(&hvs->dlist_mm, &alloc->mm_node,
457+
dlist_count);
458+
spin_unlock_irqrestore(&hvs->mm_lock, flags);
459+
if (ret)
460+
return ERR_PTR(ret);
461+
462+
alloc->channel = channel;
463+
464+
return alloc;
465+
}
466+
467+
void vc4_hvs_mark_dlist_entry_stale(struct vc4_hvs *hvs,
468+
struct vc4_hvs_dlist_allocation *alloc)
469+
{
470+
unsigned long flags;
471+
u8 frcnt;
472+
473+
if (!alloc)
474+
return;
475+
476+
if (!drm_mm_node_allocated(&alloc->mm_node))
477+
return;
478+
479+
frcnt = vc4_hvs_get_fifo_frame_count(hvs, alloc->channel);
480+
alloc->target_frame_count = (frcnt + 1) & ((1 << 6) - 1);
481+
482+
spin_lock_irqsave(&hvs->mm_lock, flags);
483+
484+
list_add_tail(&alloc->node, &hvs->stale_dlist_entries);
485+
486+
HVS_WRITE(SCALER_DISPSTAT, SCALER_DISPSTAT_EOF(alloc->channel));
487+
vc4_hvs_irq_enable_eof(hvs, alloc->channel);
488+
489+
spin_unlock_irqrestore(&hvs->mm_lock, flags);
490+
}
491+
492+
static void vc4_hvs_schedule_dlist_sweep(struct vc4_hvs *hvs,
493+
unsigned int channel)
494+
{
495+
unsigned long flags;
496+
497+
spin_lock_irqsave(&hvs->mm_lock, flags);
498+
499+
if (!list_empty(&hvs->stale_dlist_entries))
500+
queue_work(system_unbound_wq, &hvs->free_dlist_work);
501+
502+
vc4_hvs_irq_clear_eof(hvs, channel);
503+
504+
spin_unlock_irqrestore(&hvs->mm_lock, flags);
505+
}
506+
507+
/*
508+
* Frame counts are essentially sequence numbers over 6 bits, and we
509+
* thus can use sequence number arithmetic and follow the RFC1982 to
510+
* implement proper comparison between them.
511+
*/
512+
static bool vc4_hvs_frcnt_lte(u8 cnt1, u8 cnt2)
513+
{
514+
return (s8)((cnt1 << 2) - (cnt2 << 2)) <= 0;
515+
}
516+
517+
/*
518+
* Some atomic commits (legacy cursor updates, mostly) will not wait for
519+
* the next vblank and will just return once the commit has been pushed
520+
* to the hardware.
521+
*
522+
* On the hardware side, our HVS stores the planes parameters in its
523+
* context RAM, and will use part of the RAM to store data during the
524+
* frame rendering.
525+
*
526+
* This interacts badly if we get multiple commits before the next
527+
* vblank since we could end up overwriting the DLIST entries used by
528+
* previous commits if our dlist allocation reuses that entry. In such a
529+
* case, we would overwrite the data currently being used by the
530+
* hardware, resulting in a corrupted frame.
531+
*
532+
* In order to work around this, we'll queue the dlist entries in a list
533+
* once the associated CRTC state is destroyed. The HVS only allows us
534+
* to know which entry is being active, but not which one are no longer
535+
* being used, so in order to avoid freeing entries that are still used
536+
* by the hardware we add a guesstimate of the frame count where our
537+
* entry will no longer be used, and thus will only free those entries
538+
* when we will have reached that frame count.
539+
*/
540+
static void vc4_hvs_dlist_free_work(struct work_struct *work)
541+
{
542+
struct vc4_hvs *hvs = container_of(work, struct vc4_hvs, free_dlist_work);
543+
struct vc4_hvs_dlist_allocation *cur, *next;
544+
unsigned long flags;
545+
546+
spin_lock_irqsave(&hvs->mm_lock, flags);
547+
list_for_each_entry_safe(cur, next, &hvs->stale_dlist_entries, node) {
548+
u8 frcnt;
549+
550+
frcnt = vc4_hvs_get_fifo_frame_count(hvs, cur->channel);
551+
if (!vc4_hvs_frcnt_lte(cur->target_frame_count, frcnt))
552+
continue;
553+
554+
list_del(&cur->node);
555+
drm_mm_remove_node(&cur->mm_node);
556+
kfree(cur);
557+
}
558+
spin_unlock_irqrestore(&hvs->mm_lock, flags);
559+
}
560+
415561
u8 vc4_hvs_get_fifo_frame_count(struct vc4_hvs *hvs, unsigned int fifo)
416562
{
417563
struct drm_device *drm = &hvs->vc4->base;
@@ -643,13 +789,12 @@ int vc4_hvs_atomic_check(struct drm_crtc *crtc, struct drm_atomic_state *state)
643789
{
644790
struct drm_crtc_state *crtc_state = drm_atomic_get_new_crtc_state(state, crtc);
645791
struct vc4_crtc_state *vc4_state = to_vc4_crtc_state(crtc_state);
792+
struct vc4_hvs_dlist_allocation *alloc;
646793
struct drm_device *dev = crtc->dev;
647794
struct vc4_dev *vc4 = to_vc4_dev(dev);
648795
struct drm_plane *plane;
649-
unsigned long flags;
650796
const struct drm_plane_state *plane_state;
651797
u32 dlist_count = 0;
652-
int ret;
653798

654799
/* The pixelvalve can only feed one encoder (and encoders are
655800
* 1:1 with connectors.)
@@ -662,12 +807,11 @@ int vc4_hvs_atomic_check(struct drm_crtc *crtc, struct drm_atomic_state *state)
662807

663808
dlist_count++; /* Account for SCALER_CTL0_END. */
664809

665-
spin_lock_irqsave(&vc4->hvs->mm_lock, flags);
666-
ret = drm_mm_insert_node(&vc4->hvs->dlist_mm, &vc4_state->mm,
667-
dlist_count);
668-
spin_unlock_irqrestore(&vc4->hvs->mm_lock, flags);
669-
if (ret)
670-
return ret;
810+
alloc = vc4_hvs_alloc_dlist_entry(vc4->hvs, vc4_state->assigned_channel, dlist_count);
811+
if (IS_ERR(alloc))
812+
return PTR_ERR(alloc);
813+
814+
vc4_state->mm = alloc;
671815

672816
return vc4_hvs_gamma_check(crtc, state);
673817
}
@@ -683,8 +827,9 @@ static void vc4_hvs_install_dlist(struct drm_crtc *crtc)
683827
if (!drm_dev_enter(dev, &idx))
684828
return;
685829

830+
WARN_ON(!vc4_state->mm);
686831
HVS_WRITE(SCALER_DISPLISTX(vc4_state->assigned_channel),
687-
vc4_state->mm.start);
832+
vc4_state->mm->mm_node.start);
688833

689834
drm_dev_exit(idx);
690835
}
@@ -711,8 +856,10 @@ static void vc4_hvs_update_dlist(struct drm_crtc *crtc)
711856
spin_unlock_irqrestore(&dev->event_lock, flags);
712857
}
713858

859+
WARN_ON(!vc4_state->mm);
860+
714861
spin_lock_irqsave(&vc4_crtc->irq_lock, flags);
715-
vc4_crtc->current_dlist = vc4_state->mm.start;
862+
vc4_crtc->current_dlist = vc4_state->mm->mm_node.start;
716863
spin_unlock_irqrestore(&vc4_crtc->irq_lock, flags);
717864
}
718865

@@ -769,8 +916,7 @@ void vc4_hvs_atomic_flush(struct drm_crtc *crtc,
769916
struct vc4_plane_state *vc4_plane_state;
770917
bool debug_dump_regs = false;
771918
bool enable_bg_fill = false;
772-
u32 __iomem *dlist_start = vc4->hvs->dlist + vc4_state->mm.start;
773-
u32 __iomem *dlist_next = dlist_start;
919+
u32 __iomem *dlist_start, *dlist_next;
774920
unsigned int zpos = 0;
775921
bool found = false;
776922
int idx;
@@ -788,6 +934,9 @@ void vc4_hvs_atomic_flush(struct drm_crtc *crtc,
788934
vc4_hvs_dump_state(hvs);
789935
}
790936

937+
dlist_start = vc4->hvs->dlist + vc4_state->mm->mm_node.start;
938+
dlist_next = dlist_start;
939+
791940
/* Copy all the active planes' dlist contents to the hardware dlist. */
792941
do {
793942
found = false;
@@ -821,7 +970,8 @@ void vc4_hvs_atomic_flush(struct drm_crtc *crtc,
821970
writel(SCALER_CTL0_END, dlist_next);
822971
dlist_next++;
823972

824-
WARN_ON_ONCE(dlist_next - dlist_start != vc4_state->mm.size);
973+
WARN_ON(!vc4_state->mm);
974+
WARN_ON_ONCE(dlist_next - dlist_start != vc4_state->mm->mm_node.size);
825975

826976
if (enable_bg_fill)
827977
/* This sets a black background color fill, as is the case
@@ -960,6 +1110,11 @@ static irqreturn_t vc4_hvs_irq_handler(int irq, void *data)
9601110

9611111
irqret = IRQ_HANDLED;
9621112
}
1113+
1114+
if (status & SCALER_DISPSTAT_EOF(channel)) {
1115+
vc4_hvs_schedule_dlist_sweep(hvs, channel);
1116+
irqret = IRQ_HANDLED;
1117+
}
9631118
}
9641119

9651120
/* Clear every per-channel interrupt flag. */
@@ -1024,6 +1179,9 @@ struct vc4_hvs *__vc4_hvs_alloc(struct vc4_dev *vc4, struct platform_device *pde
10241179

10251180
spin_lock_init(&hvs->mm_lock);
10261181

1182+
INIT_LIST_HEAD(&hvs->stale_dlist_entries);
1183+
INIT_WORK(&hvs->free_dlist_work, vc4_hvs_dlist_free_work);
1184+
10271185
/* Set up the HVS display list memory manager. We never
10281186
* overwrite the setup from the bootloader (just 128b out of
10291187
* our 16K), since we don't want to scramble the screen when

0 commit comments

Comments
 (0)