Skip to content

Commit 63f6e48

Browse files
committed
drm/xe: Add SVM garbage collector
Add basic SVM garbage collector which destroy a SVM range upon a MMU UNMAP event. The garbage collector runs on worker or in GPU fault handler and is required as locks in the path of reclaim are required and cannot be taken the notifier. v2: - Flush garbage collector in xe_svm_close v3: - Better commit message (Thomas) - Kernel doc (Thomas) - Use list_first_entry_or_null for garbage collector loop (Thomas) - Don't add to garbage collector if VM is closed (Thomas) v4: - Use %pe to print error (Thomas) v5: - s/visable/visible (Thomas) Signed-off-by: Matthew Brost <[email protected]> Reviewed-by: Thomas Hellström <[email protected]> Reviewed-by: Himal Prasad Ghimiray <[email protected]> Link: https://patchwork.freedesktop.org/patch/msgid/[email protected]
1 parent 7d1d48f commit 63f6e48

File tree

4 files changed

+116
-2
lines changed

4 files changed

+116
-2
lines changed

drivers/gpu/drm/xe/xe_svm.c

Lines changed: 89 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ xe_svm_range_alloc(struct drm_gpusvm *gpusvm)
3838
if (!range)
3939
return ERR_PTR(-ENOMEM);
4040

41+
INIT_LIST_HEAD(&range->garbage_collector_link);
4142
xe_vm_get(gpusvm_to_vm(gpusvm));
4243

4344
return &range->base;
@@ -54,6 +55,24 @@ static struct xe_svm_range *to_xe_range(struct drm_gpusvm_range *r)
5455
return container_of(r, struct xe_svm_range, base);
5556
}
5657

58+
static void
59+
xe_svm_garbage_collector_add_range(struct xe_vm *vm, struct xe_svm_range *range,
60+
const struct mmu_notifier_range *mmu_range)
61+
{
62+
struct xe_device *xe = vm->xe;
63+
64+
drm_gpusvm_range_set_unmapped(&range->base, mmu_range);
65+
66+
spin_lock(&vm->svm.garbage_collector.lock);
67+
if (list_empty(&range->garbage_collector_link))
68+
list_add_tail(&range->garbage_collector_link,
69+
&vm->svm.garbage_collector.range_list);
70+
spin_unlock(&vm->svm.garbage_collector.lock);
71+
72+
queue_work(xe_device_get_root_tile(xe)->primary_gt->usm.pf_wq,
73+
&vm->svm.garbage_collector.work);
74+
}
75+
5776
static u8
5877
xe_svm_range_notifier_event_begin(struct xe_vm *vm, struct drm_gpusvm_range *r,
5978
const struct mmu_notifier_range *mmu_range,
@@ -98,7 +117,9 @@ xe_svm_range_notifier_event_end(struct xe_vm *vm, struct drm_gpusvm_range *r,
98117
xe_svm_assert_in_notifier(vm);
99118

100119
drm_gpusvm_range_unmap_pages(&vm->svm.gpusvm, r, &ctx);
101-
/* TODO: Add range to garbage collector if VM is not closed */
120+
if (!xe_vm_is_closed(vm) && mmu_range->event == MMU_NOTIFY_UNMAP)
121+
xe_svm_garbage_collector_add_range(vm, to_xe_range(r),
122+
mmu_range);
102123
}
103124

104125
static void xe_svm_invalidate(struct drm_gpusvm *gpusvm,
@@ -198,6 +219,63 @@ static void xe_svm_invalidate(struct drm_gpusvm *gpusvm,
198219
xe_svm_range_notifier_event_end(vm, r, mmu_range);
199220
}
200221

222+
static int __xe_svm_garbage_collector(struct xe_vm *vm,
223+
struct xe_svm_range *range)
224+
{
225+
/* TODO: Do unbind */
226+
227+
drm_gpusvm_range_remove(&vm->svm.gpusvm, &range->base);
228+
229+
return 0;
230+
}
231+
232+
static int xe_svm_garbage_collector(struct xe_vm *vm)
233+
{
234+
struct xe_svm_range *range;
235+
int err;
236+
237+
lockdep_assert_held_write(&vm->lock);
238+
239+
if (xe_vm_is_closed_or_banned(vm))
240+
return -ENOENT;
241+
242+
spin_lock(&vm->svm.garbage_collector.lock);
243+
for (;;) {
244+
range = list_first_entry_or_null(&vm->svm.garbage_collector.range_list,
245+
typeof(*range),
246+
garbage_collector_link);
247+
if (!range)
248+
break;
249+
250+
list_del(&range->garbage_collector_link);
251+
spin_unlock(&vm->svm.garbage_collector.lock);
252+
253+
err = __xe_svm_garbage_collector(vm, range);
254+
if (err) {
255+
drm_warn(&vm->xe->drm,
256+
"Garbage collection failed: %pe\n",
257+
ERR_PTR(err));
258+
xe_vm_kill(vm, true);
259+
return err;
260+
}
261+
262+
spin_lock(&vm->svm.garbage_collector.lock);
263+
}
264+
spin_unlock(&vm->svm.garbage_collector.lock);
265+
266+
return 0;
267+
}
268+
269+
static void xe_svm_garbage_collector_work_func(struct work_struct *w)
270+
{
271+
struct xe_vm *vm = container_of(w, struct xe_vm,
272+
svm.garbage_collector.work);
273+
274+
down_write(&vm->lock);
275+
xe_svm_garbage_collector(vm);
276+
up_write(&vm->lock);
277+
}
278+
201279
static const struct drm_gpusvm_ops gpusvm_ops = {
202280
.range_alloc = xe_svm_range_alloc,
203281
.range_free = xe_svm_range_free,
@@ -222,6 +300,11 @@ int xe_svm_init(struct xe_vm *vm)
222300
{
223301
int err;
224302

303+
spin_lock_init(&vm->svm.garbage_collector.lock);
304+
INIT_LIST_HEAD(&vm->svm.garbage_collector.range_list);
305+
INIT_WORK(&vm->svm.garbage_collector.work,
306+
xe_svm_garbage_collector_work_func);
307+
225308
err = drm_gpusvm_init(&vm->svm.gpusvm, "Xe SVM", &vm->xe->drm,
226309
current->mm, NULL, 0, vm->size,
227310
SZ_512M, &gpusvm_ops, fault_chunk_sizes,
@@ -243,6 +326,7 @@ int xe_svm_init(struct xe_vm *vm)
243326
void xe_svm_close(struct xe_vm *vm)
244327
{
245328
xe_assert(vm->xe, xe_vm_is_closed(vm));
329+
flush_work(&vm->svm.garbage_collector.work);
246330
}
247331

248332
/**
@@ -292,7 +376,10 @@ int xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma,
292376
xe_assert(vm->xe, xe_vma_is_cpu_addr_mirror(vma));
293377

294378
retry:
295-
/* TODO: Run garbage collector */
379+
/* Always process UNMAPs first so view SVM ranges is current */
380+
err = xe_svm_garbage_collector(vm);
381+
if (err)
382+
return err;
296383

297384
r = drm_gpusvm_range_find_or_insert(&vm->svm.gpusvm, fault_addr,
298385
xe_vma_start(vma), xe_vma_end(vma),

drivers/gpu/drm/xe/xe_svm.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,11 @@ struct xe_vma;
1919
struct xe_svm_range {
2020
/** @base: base drm_gpusvm_range */
2121
struct drm_gpusvm_range base;
22+
/**
23+
* @garbage_collector_link: Link into VM's garbage collect SVM range
24+
* list. Protected by VM's garbage collect lock.
25+
*/
26+
struct list_head garbage_collector_link;
2227
/**
2328
* @tile_present: Tile mask of binding is present for this range.
2429
* Protected by GPU SVM notifier lock.

drivers/gpu/drm/xe/xe_vm.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3220,6 +3220,10 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
32203220
}
32213221
}
32223222

3223+
/* Ensure all UNMAPs visible */
3224+
if (xe_vm_in_fault_mode(vm))
3225+
flush_work(&vm->svm.garbage_collector.work);
3226+
32233227
err = down_write_killable(&vm->lock);
32243228
if (err)
32253229
goto put_exec_queue;

drivers/gpu/drm/xe/xe_vm_types.h

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,24 @@ struct xe_vm {
150150
struct {
151151
/** @svm.gpusvm: base GPUSVM used to track fault allocations */
152152
struct drm_gpusvm gpusvm;
153+
/**
154+
* @svm.garbage_collector: Garbage collector which is used unmap
155+
* SVM range's GPU bindings and destroy the ranges.
156+
*/
157+
struct {
158+
/** @svm.garbage_collector.lock: Protect's range list */
159+
spinlock_t lock;
160+
/**
161+
* @svm.garbage_collector.range_list: List of SVM ranges
162+
* in the garbage collector.
163+
*/
164+
struct list_head range_list;
165+
/**
166+
* @svm.garbage_collector.work: Worker which the
167+
* garbage collector runs on.
168+
*/
169+
struct work_struct work;
170+
} garbage_collector;
153171
} svm;
154172

155173
struct xe_device *xe;

0 commit comments

Comments
 (0)