summaryrefslogtreecommitdiffstats
path: root/target/linux/brcm2708/patches-4.14/950-0174-drm-vc4-Add-the-DRM_IOCTL_VC4_GEM_MADVISE-ioctl.patch
blob: 8326d4bcada4d74f8a23b59cd809266cababaadb (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
From 4264bba50d050577580cc6309524e3d92959fff2 Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@free-electrons.com>
Date: Thu, 19 Oct 2017 14:57:48 +0200
Subject: [PATCH 174/454] drm/vc4: Add the DRM_IOCTL_VC4_GEM_MADVISE ioctl

This ioctl will allow us to purge inactive userspace buffers when the
system is running out of contiguous memory.

For now, the purge logic is rather dumb in that it does not try to
release only the amount of BO needed to meet the last CMA alloc request
but instead purges all objects placed in the purgeable pool as soon as
we experience a CMA allocation failure.

Note that the in-kernel BO cache is always purged before the purgeable
cache because those objects are known to be unused while objects marked
as purgeable by a userspace application/library might have to be
restored when they are marked back as unpurgeable, which can be
expensive.

Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Signed-off-by: Eric Anholt <eric@anholt.net>
Reviewed-by: Eric Anholt <eric@anholt.net>
Link: https://patchwork.freedesktop.org/patch/msgid/20171019125748.3152-1-boris.brezillon@free-electrons.com
(cherry picked from commit b9f19259b84dc648f207a46f3581d15eeaedf4b6)
---
 drivers/gpu/drm/vc4/vc4_bo.c    | 287 +++++++++++++++++++++++++++++++-
 drivers/gpu/drm/vc4/vc4_drv.c   |  10 +-
 drivers/gpu/drm/vc4/vc4_drv.h   |  30 ++++
 drivers/gpu/drm/vc4/vc4_gem.c   | 156 ++++++++++++++++-
 drivers/gpu/drm/vc4/vc4_plane.c |  20 +++
 include/uapi/drm/vc4_drm.h      |  19 +++
 6 files changed, 507 insertions(+), 15 deletions(-)

--- a/drivers/gpu/drm/vc4/vc4_bo.c
+++ b/drivers/gpu/drm/vc4/vc4_bo.c
@@ -53,6 +53,17 @@ static void vc4_bo_stats_dump(struct vc4
 			 vc4->bo_labels[i].size_allocated / 1024,
 			 vc4->bo_labels[i].num_allocated);
 	}
+
+	mutex_lock(&vc4->purgeable.lock);
+	if (vc4->purgeable.num)
+		DRM_INFO("%30s: %6zdkb BOs (%d)\n", "userspace BO cache",
+			 vc4->purgeable.size / 1024, vc4->purgeable.num);
+
+	if (vc4->purgeable.purged_num)
+		DRM_INFO("%30s: %6zdkb BOs (%d)\n", "total purged BO",
+			 vc4->purgeable.purged_size / 1024,
+			 vc4->purgeable.purged_num);
+	mutex_unlock(&vc4->purgeable.lock);
 }
 
 #ifdef CONFIG_DEBUG_FS
@@ -75,6 +86,17 @@ int vc4_bo_stats_debugfs(struct seq_file
 	}
 	mutex_unlock(&vc4->bo_lock);
 
+	mutex_lock(&vc4->purgeable.lock);
+	if (vc4->purgeable.num)
+		seq_printf(m, "%30s: %6dkb BOs (%d)\n", "userspace BO cache",
+			   vc4->purgeable.size / 1024, vc4->purgeable.num);
+
+	if (vc4->purgeable.purged_num)
+		seq_printf(m, "%30s: %6dkb BOs (%d)\n", "total purged BO",
+			   vc4->purgeable.purged_size / 1024,
+			   vc4->purgeable.purged_num);
+	mutex_unlock(&vc4->purgeable.lock);
+
 	return 0;
 }
 #endif
@@ -248,6 +270,109 @@ static void vc4_bo_cache_purge(struct dr
 	mutex_unlock(&vc4->bo_lock);
 }
 
+void vc4_bo_add_to_purgeable_pool(struct vc4_bo *bo)
+{
+	struct vc4_dev *vc4 = to_vc4_dev(bo->base.base.dev);
+
+	mutex_lock(&vc4->purgeable.lock);
+	list_add_tail(&bo->size_head, &vc4->purgeable.list);
+	vc4->purgeable.num++;
+	vc4->purgeable.size += bo->base.base.size;
+	mutex_unlock(&vc4->purgeable.lock);
+}
+
+static void vc4_bo_remove_from_purgeable_pool_locked(struct vc4_bo *bo)
+{
+	struct vc4_dev *vc4 = to_vc4_dev(bo->base.base.dev);
+
+	/* list_del_init() is used here because the caller might release
+	 * the purgeable lock in order to acquire the madv one and update the
+	 * madv status.
+	 * During this short period of time a user might decide to mark
+	 * the BO as unpurgeable, and if bo->madv is set to
+	 * VC4_MADV_DONTNEED it will try to remove the BO from the
+	 * purgeable list which will fail if the ->next/prev fields
+	 * are set to LIST_POISON1/LIST_POISON2 (which is what
+	 * list_del() does).
+	 * Re-initializing the list element guarantees that list_del()
+	 * will work correctly even if it's a NOP.
+	 */
+	list_del_init(&bo->size_head);
+	vc4->purgeable.num--;
+	vc4->purgeable.size -= bo->base.base.size;
+}
+
+void vc4_bo_remove_from_purgeable_pool(struct vc4_bo *bo)
+{
+	struct vc4_dev *vc4 = to_vc4_dev(bo->base.base.dev);
+
+	mutex_lock(&vc4->purgeable.lock);
+	vc4_bo_remove_from_purgeable_pool_locked(bo);
+	mutex_unlock(&vc4->purgeable.lock);
+}
+
+static void vc4_bo_purge(struct drm_gem_object *obj)
+{
+	struct vc4_bo *bo = to_vc4_bo(obj);
+	struct drm_device *dev = obj->dev;
+
+	WARN_ON(!mutex_is_locked(&bo->madv_lock));
+	WARN_ON(bo->madv != VC4_MADV_DONTNEED);
+
+	drm_vma_node_unmap(&obj->vma_node, dev->anon_inode->i_mapping);
+
+	dma_free_wc(dev->dev, obj->size, bo->base.vaddr, bo->base.paddr);
+	bo->base.vaddr = NULL;
+	bo->madv = __VC4_MADV_PURGED;
+}
+
+static void vc4_bo_userspace_cache_purge(struct drm_device *dev)
+{
+	struct vc4_dev *vc4 = to_vc4_dev(dev);
+
+	mutex_lock(&vc4->purgeable.lock);
+	while (!list_empty(&vc4->purgeable.list)) {
+		struct vc4_bo *bo = list_first_entry(&vc4->purgeable.list,
+						     struct vc4_bo, size_head);
+		struct drm_gem_object *obj = &bo->base.base;
+		size_t purged_size = 0;
+
+		vc4_bo_remove_from_purgeable_pool_locked(bo);
+
+		/* Release the purgeable lock while we're purging the BO so
+		 * that other people can continue inserting things in the
+		 * purgeable pool without having to wait for all BOs to be
+		 * purged.
+		 */
+		mutex_unlock(&vc4->purgeable.lock);
+		mutex_lock(&bo->madv_lock);
+
+		/* Since we released the purgeable pool lock before acquiring
+		 * the BO madv one, the user may have marked the BO as WILLNEED
+		 * and re-used it in the meantime.
+		 * Before purging the BO we need to make sure
+		 * - it is still marked as DONTNEED
+		 * - it has not been re-inserted in the purgeable list
+		 * - it is not used by HW blocks
+		 * If one of these conditions is not met, just skip the entry.
+		 */
+		if (bo->madv == VC4_MADV_DONTNEED &&
+		    list_empty(&bo->size_head) &&
+		    !refcount_read(&bo->usecnt)) {
+			purged_size = bo->base.base.size;
+			vc4_bo_purge(obj);
+		}
+		mutex_unlock(&bo->madv_lock);
+		mutex_lock(&vc4->purgeable.lock);
+
+		if (purged_size) {
+			vc4->purgeable.purged_size += purged_size;
+			vc4->purgeable.purged_num++;
+		}
+	}
+	mutex_unlock(&vc4->purgeable.lock);
+}
+
 static struct vc4_bo *vc4_bo_get_from_cache(struct drm_device *dev,
 					    uint32_t size,
 					    enum vc4_kernel_bo_type type)
@@ -294,6 +419,9 @@ struct drm_gem_object *vc4_create_object
 	if (!bo)
 		return NULL;
 
+	bo->madv = VC4_MADV_WILLNEED;
+	refcount_set(&bo->usecnt, 0);
+	mutex_init(&bo->madv_lock);
 	mutex_lock(&vc4->bo_lock);
 	bo->label = VC4_BO_TYPE_KERNEL;
 	vc4->bo_labels[VC4_BO_TYPE_KERNEL].num_allocated++;
@@ -331,16 +459,38 @@ struct vc4_bo *vc4_bo_create(struct drm_
 		 * CMA allocations we've got laying around and try again.
 		 */
 		vc4_bo_cache_purge(dev);
+		cma_obj = drm_gem_cma_create(dev, size);
+	}
 
+	if (IS_ERR(cma_obj)) {
+		/*
+		 * Still not enough CMA memory, purge the userspace BO
+		 * cache and retry.
+		 * This is sub-optimal since we purge the whole userspace
+		 * BO cache which forces user that want to re-use the BO to
+		 * restore its initial content.
+		 * Ideally, we should purge entries one by one and retry
+		 * after each to see if CMA allocation succeeds. Or even
+		 * better, try to find an entry with at least the same
+		 * size.
+		 */
+		vc4_bo_userspace_cache_purge(dev);
 		cma_obj = drm_gem_cma_create(dev, size);
-		if (IS_ERR(cma_obj)) {
-			DRM_ERROR("Failed to allocate from CMA:\n");
-			vc4_bo_stats_dump(vc4);
-			return ERR_PTR(-ENOMEM);
-		}
+	}
+
+	if (IS_ERR(cma_obj)) {
+		DRM_ERROR("Failed to allocate from CMA:\n");
+		vc4_bo_stats_dump(vc4);
+		return ERR_PTR(-ENOMEM);
 	}
 	bo = to_vc4_bo(&cma_obj->base);
 
+	/* By default, BOs do not support the MADV ioctl. This will be enabled
+	 * only on BOs that are exposed to userspace (V3D, V3D_SHADER and DUMB
+	 * BOs).
+	 */
+	bo->madv = __VC4_MADV_NOTSUPP;
+
 	mutex_lock(&vc4->bo_lock);
 	vc4_bo_set_label(&cma_obj->base, type);
 	mutex_unlock(&vc4->bo_lock);
@@ -366,6 +516,8 @@ int vc4_dumb_create(struct drm_file *fil
 	if (IS_ERR(bo))
 		return PTR_ERR(bo);
 
+	bo->madv = VC4_MADV_WILLNEED;
+
 	ret = drm_gem_handle_create(file_priv, &bo->base.base, &args->handle);
 	drm_gem_object_put_unlocked(&bo->base.base);
 
@@ -404,6 +556,12 @@ void vc4_free_object(struct drm_gem_obje
 	struct vc4_bo *bo = to_vc4_bo(gem_bo);
 	struct list_head *cache_list;
 
+	/* Remove the BO from the purgeable list. */
+	mutex_lock(&bo->madv_lock);
+	if (bo->madv == VC4_MADV_DONTNEED && !refcount_read(&bo->usecnt))
+		vc4_bo_remove_from_purgeable_pool(bo);
+	mutex_unlock(&bo->madv_lock);
+
 	mutex_lock(&vc4->bo_lock);
 	/* If the object references someone else's memory, we can't cache it.
 	 */
@@ -419,7 +577,8 @@ void vc4_free_object(struct drm_gem_obje
 	}
 
 	/* If this object was partially constructed but CMA allocation
-	 * had failed, just free it.
+	 * had failed, just free it. Can also happen when the BO has been
+	 * purged.
 	 */
 	if (!bo->base.vaddr) {
 		vc4_bo_destroy(bo);
@@ -439,6 +598,10 @@ void vc4_free_object(struct drm_gem_obje
 		bo->validated_shader = NULL;
 	}
 
+	/* Reset madv and usecnt before adding the BO to the cache. */
+	bo->madv = __VC4_MADV_NOTSUPP;
+	refcount_set(&bo->usecnt, 0);
+
 	bo->t_format = false;
 	bo->free_time = jiffies;
 	list_add(&bo->size_head, cache_list);
@@ -463,6 +626,56 @@ static void vc4_bo_cache_time_work(struc
 	mutex_unlock(&vc4->bo_lock);
 }
 
+int vc4_bo_inc_usecnt(struct vc4_bo *bo)
+{
+	int ret;
+
+	/* Fast path: if the BO is already retained by someone, no need to
+	 * check the madv status.
+	 */
+	if (refcount_inc_not_zero(&bo->usecnt))
+		return 0;
+
+	mutex_lock(&bo->madv_lock);
+	switch (bo->madv) {
+	case VC4_MADV_WILLNEED:
+		refcount_inc(&bo->usecnt);
+		ret = 0;
+		break;
+	case VC4_MADV_DONTNEED:
+		/* We shouldn't use a BO marked as purgeable if at least
+		 * someone else retained its content by incrementing usecnt.
+		 * Luckily the BO hasn't been purged yet, but something wrong
+		 * is happening here. Just throw an error instead of
+		 * authorizing this use case.
+		 */
+	case __VC4_MADV_PURGED:
+		/* We can't use a purged BO. */
+	default:
+		/* Invalid madv value. */
+		ret = -EINVAL;
+		break;
+	}
+	mutex_unlock(&bo->madv_lock);
+
+	return ret;
+}
+
+void vc4_bo_dec_usecnt(struct vc4_bo *bo)
+{
+	/* Fast path: if the BO is still retained by someone, no need to test
+	 * the madv value.
+	 */
+	if (refcount_dec_not_one(&bo->usecnt))
+		return;
+
+	mutex_lock(&bo->madv_lock);
+	if (refcount_dec_and_test(&bo->usecnt) &&
+	    bo->madv == VC4_MADV_DONTNEED)
+		vc4_bo_add_to_purgeable_pool(bo);
+	mutex_unlock(&bo->madv_lock);
+}
+
 static void vc4_bo_cache_time_timer(unsigned long data)
 {
 	struct drm_device *dev = (struct drm_device *)data;
@@ -482,18 +695,52 @@ struct dma_buf *
 vc4_prime_export(struct drm_device *dev, struct drm_gem_object *obj, int flags)
 {
 	struct vc4_bo *bo = to_vc4_bo(obj);
+	struct dma_buf *dmabuf;
+	int ret;
 
 	if (bo->validated_shader) {
 		DRM_DEBUG("Attempting to export shader BO\n");
 		return ERR_PTR(-EINVAL);
 	}
 
-	return drm_gem_prime_export(dev, obj, flags);
+	/* Note: as soon as the BO is exported it becomes unpurgeable, because
+	 * noone ever decrements the usecnt even if the reference held by the
+	 * exported BO is released. This shouldn't be a problem since we don't
+	 * expect exported BOs to be marked as purgeable.
+	 */
+	ret = vc4_bo_inc_usecnt(bo);
+	if (ret) {
+		DRM_ERROR("Failed to increment BO usecnt\n");
+		return ERR_PTR(ret);
+	}
+
+	dmabuf = drm_gem_prime_export(dev, obj, flags);
+	if (IS_ERR(dmabuf))
+		vc4_bo_dec_usecnt(bo);
+
+	return dmabuf;
+}
+
+int vc4_fault(struct vm_fault *vmf)
+{
+	struct vm_area_struct *vma = vmf->vma;
+	struct drm_gem_object *obj = vma->vm_private_data;
+	struct vc4_bo *bo = to_vc4_bo(obj);
+
+	/* The only reason we would end up here is when user-space accesses
+	 * BO's memory after it's been purged.
+	 */
+	mutex_lock(&bo->madv_lock);
+	WARN_ON(bo->madv != __VC4_MADV_PURGED);
+	mutex_unlock(&bo->madv_lock);
+
+	return VM_FAULT_SIGBUS;
 }
 
 int vc4_mmap(struct file *filp, struct vm_area_struct *vma)
 {
 	struct drm_gem_object *gem_obj;
+	unsigned long vm_pgoff;
 	struct vc4_bo *bo;
 	int ret;
 
@@ -509,16 +756,36 @@ int vc4_mmap(struct file *filp, struct v
 		return -EINVAL;
 	}
 
+	if (bo->madv != VC4_MADV_WILLNEED) {
+		DRM_DEBUG("mmaping of %s BO not allowed\n",
+			  bo->madv == VC4_MADV_DONTNEED ?
+			  "purgeable" : "purged");
+		return -EINVAL;
+	}
+
 	/*
 	 * Clear the VM_PFNMAP flag that was set by drm_gem_mmap(), and set the
 	 * vm_pgoff (used as a fake buffer offset by DRM) to 0 as we want to map
 	 * the whole buffer.
 	 */
 	vma->vm_flags &= ~VM_PFNMAP;
-	vma->vm_pgoff = 0;
 
+	/* This ->vm_pgoff dance is needed to make all parties happy:
+	 * - dma_mmap_wc() uses ->vm_pgoff as an offset within the allocated
+	 *   mem-region, hence the need to set it to zero (the value set by
+	 *   the DRM core is a virtual offset encoding the GEM object-id)
+	 * - the mmap() core logic needs ->vm_pgoff to be restored to its
+	 *   initial value before returning from this function because it
+	 *   encodes the  offset of this GEM in the dev->anon_inode pseudo-file
+	 *   and this information will be used when we invalidate userspace
+	 *   mappings  with drm_vma_node_unmap() (called from vc4_gem_purge()).
+	 */
+	vm_pgoff = vma->vm_pgoff;
+	vma->vm_pgoff = 0;
 	ret = dma_mmap_wc(bo->base.base.dev->dev, vma, bo->base.vaddr,
 			  bo->base.paddr, vma->vm_end - vma->vm_start);
+	vma->vm_pgoff = vm_pgoff;
+
 	if (ret)
 		drm_gem_vm_close(vma);
 
@@ -582,6 +849,8 @@ int vc4_create_bo_ioctl(struct drm_devic
 	if (IS_ERR(bo))
 		return PTR_ERR(bo);
 
+	bo->madv = VC4_MADV_WILLNEED;
+
 	ret = drm_gem_handle_create(file_priv, &bo->base.base, &args->handle);
 	drm_gem_object_put_unlocked(&bo->base.base);
 
@@ -635,6 +904,8 @@ vc4_create_shader_bo_ioctl(struct drm_de
 	if (IS_ERR(bo))
 		return PTR_ERR(bo);
 
+	bo->madv = VC4_MADV_WILLNEED;
+
 	if (copy_from_user(bo->base.vaddr,
 			     (void __user *)(uintptr_t)args->data,
 			     args->size)) {
--- a/drivers/gpu/drm/vc4/vc4_drv.c
+++ b/drivers/gpu/drm/vc4/vc4_drv.c
@@ -100,6 +100,7 @@ static int vc4_get_param_ioctl(struct dr
 	case DRM_VC4_PARAM_SUPPORTS_ETC1:
 	case DRM_VC4_PARAM_SUPPORTS_THREADED_FS:
 	case DRM_VC4_PARAM_SUPPORTS_FIXED_RCL_ORDER:
+	case DRM_VC4_PARAM_SUPPORTS_MADVISE:
 		args->value = true;
 		break;
 	default:
@@ -117,6 +118,12 @@ static void vc4_lastclose(struct drm_dev
 	drm_fbdev_cma_restore_mode(vc4->fbdev);
 }
 
+static const struct vm_operations_struct vc4_vm_ops = {
+	.fault = vc4_fault,
+	.open = drm_gem_vm_open,
+	.close = drm_gem_vm_close,
+};
+
 static const struct file_operations vc4_drm_fops = {
 	.owner = THIS_MODULE,
 	.open = drm_open,
@@ -142,6 +149,7 @@ static const struct drm_ioctl_desc vc4_d
 	DRM_IOCTL_DEF_DRV(VC4_SET_TILING, vc4_set_tiling_ioctl, DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(VC4_GET_TILING, vc4_get_tiling_ioctl, DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(VC4_LABEL_BO, vc4_label_bo_ioctl, DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(VC4_GEM_MADVISE, vc4_gem_madvise_ioctl, DRM_RENDER_ALLOW),
 };
 
 static struct drm_driver vc4_drm_driver = {
@@ -166,7 +174,7 @@ static struct drm_driver vc4_drm_driver
 
 	.gem_create_object = vc4_create_object,
 	.gem_free_object_unlocked = vc4_free_object,
-	.gem_vm_ops = &drm_gem_cma_vm_ops,
+	.gem_vm_ops = &vc4_vm_ops,
 
 	.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
 	.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
--- a/drivers/gpu/drm/vc4/vc4_drv.h
+++ b/drivers/gpu/drm/vc4/vc4_drv.h
@@ -77,6 +77,19 @@ struct vc4_dev {
 	/* Protects bo_cache and bo_labels. */
 	struct mutex bo_lock;
 
+	/* Purgeable BO pool. All BOs in this pool can have their memory
+	 * reclaimed if the driver is unable to allocate new BOs. We also
+	 * keep stats related to the purge mechanism here.
+	 */
+	struct {
+		struct list_head list;
+		unsigned int num;
+		size_t size;
+		unsigned int purged_num;
+		size_t purged_size;
+		struct mutex lock;
+	} purgeable;
+
 	uint64_t dma_fence_context;
 
 	/* Sequence number for the last job queued in bin_job_list.
@@ -195,6 +208,16 @@ struct vc4_bo {
 	 * for user-allocated labels.
 	 */
 	int label;
+
+	/* Count the number of active users. This is needed to determine
+	 * whether we can move the BO to the purgeable list or not (when the BO
+	 * is used by the GPU or the display engine we can't purge it).
+	 */
+	refcount_t usecnt;
+
+	/* Store purgeable/purged state here */
+	u32 madv;
+	struct mutex madv_lock;
 };
 
 static inline struct vc4_bo *
@@ -506,6 +529,7 @@ int vc4_get_hang_state_ioctl(struct drm_
 			     struct drm_file *file_priv);
 int vc4_label_bo_ioctl(struct drm_device *dev, void *data,
 		       struct drm_file *file_priv);
+int vc4_fault(struct vm_fault *vmf);
 int vc4_mmap(struct file *filp, struct vm_area_struct *vma);
 struct reservation_object *vc4_prime_res_obj(struct drm_gem_object *obj);
 int vc4_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma);
@@ -516,6 +540,10 @@ void *vc4_prime_vmap(struct drm_gem_obje
 int vc4_bo_cache_init(struct drm_device *dev);
 void vc4_bo_cache_destroy(struct drm_device *dev);
 int vc4_bo_stats_debugfs(struct seq_file *m, void *arg);
+int vc4_bo_inc_usecnt(struct vc4_bo *bo);
+void vc4_bo_dec_usecnt(struct vc4_bo *bo);
+void vc4_bo_add_to_purgeable_pool(struct vc4_bo *bo);
+void vc4_bo_remove_from_purgeable_pool(struct vc4_bo *bo);
 
 /* vc4_crtc.c */
 extern struct platform_driver vc4_crtc_driver;
@@ -564,6 +592,8 @@ void vc4_job_handle_completed(struct vc4
 int vc4_queue_seqno_cb(struct drm_device *dev,
 		       struct vc4_seqno_cb *cb, uint64_t seqno,
 		       void (*func)(struct vc4_seqno_cb *cb));
+int vc4_gem_madvise_ioctl(struct drm_device *dev, void *data,
+			  struct drm_file *file_priv);
 
 /* vc4_hdmi.c */
 extern struct platform_driver vc4_hdmi_driver;
--- a/drivers/gpu/drm/vc4/vc4_gem.c
+++ b/drivers/gpu/drm/vc4/vc4_gem.c
@@ -188,11 +188,22 @@ vc4_save_hang_state(struct drm_device *d
 			continue;
 
 		for (j = 0; j < exec[i]->bo_count; j++) {
+			bo = to_vc4_bo(&exec[i]->bo[j]->base);
+
+			/* Retain BOs just in case they were marked purgeable.
+			 * This prevents the BO from being purged before
+			 * someone had a chance to dump the hang state.
+			 */
+			WARN_ON(!refcount_read(&bo->usecnt));
+			refcount_inc(&bo->usecnt);
 			drm_gem_object_get(&exec[i]->bo[j]->base);
 			kernel_state->bo[k++] = &exec[i]->bo[j]->base;
 		}
 
 		list_for_each_entry(bo, &exec[i]->unref_list, unref_head) {
+			/* No need to retain BOs coming from the ->unref_list
+			 * because they are naturally unpurgeable.
+			 */
 			drm_gem_object_get(&bo->base.base);
 			kernel_state->bo[k++] = &bo->base.base;
 		}
@@ -233,6 +244,26 @@ vc4_save_hang_state(struct drm_device *d
 	state->fdbgs = V3D_READ(V3D_FDBGS);
 	state->errstat = V3D_READ(V3D_ERRSTAT);
 
+	/* We need to turn purgeable BOs into unpurgeable ones so that
+	 * userspace has a chance to dump the hang state before the kernel
+	 * decides to purge those BOs.
+	 * Note that BO consistency at dump time cannot be guaranteed. For
+	 * example, if the owner of these BOs decides to re-use them or mark
+	 * them purgeable again there's nothing we can do to prevent it.
+	 */
+	for (i = 0; i < kernel_state->user_state.bo_count; i++) {
+		struct vc4_bo *bo = to_vc4_bo(kernel_state->bo[i]);
+
+		if (bo->madv == __VC4_MADV_NOTSUPP)
+			continue;
+
+		mutex_lock(&bo->madv_lock);
+		if (!WARN_ON(bo->madv == __VC4_MADV_PURGED))
+			bo->madv = VC4_MADV_WILLNEED;
+		refcount_dec(&bo->usecnt);
+		mutex_unlock(&bo->madv_lock);
+	}
+
 	spin_lock_irqsave(&vc4->job_lock, irqflags);
 	if (vc4->hang_state) {
 		spin_unlock_irqrestore(&vc4->job_lock, irqflags);
@@ -639,9 +670,6 @@ vc4_queue_submit(struct drm_device *dev,
  * The command validator needs to reference BOs by their index within
  * the submitted job's BO list.  This does the validation of the job's
  * BO list and reference counting for the lifetime of the job.
- *
- * Note that this function doesn't need to unreference the BOs on
- * failure, because that will happen at vc4_complete_exec() time.
  */
 static int
 vc4_cl_lookup_bos(struct drm_device *dev,
@@ -693,16 +721,47 @@ vc4_cl_lookup_bos(struct drm_device *dev
 			DRM_DEBUG("Failed to look up GEM BO %d: %d\n",
 				  i, handles[i]);
 			ret = -EINVAL;
-			spin_unlock(&file_priv->table_lock);
-			goto fail;
+			break;
 		}
+
 		drm_gem_object_get(bo);
 		exec->bo[i] = (struct drm_gem_cma_object *)bo;
 	}
 	spin_unlock(&file_priv->table_lock);
 
+	if (ret)
+		goto fail_put_bo;
+
+	for (i = 0; i < exec->bo_count; i++) {
+		ret = vc4_bo_inc_usecnt(to_vc4_bo(&exec->bo[i]->base));
+		if (ret)
+			goto fail_dec_usecnt;
+	}
+
+	kvfree(handles);
+	return 0;
+
+fail_dec_usecnt:
+	/* Decrease usecnt on acquired objects.
+	 * We cannot rely on  vc4_complete_exec() to release resources here,
+	 * because vc4_complete_exec() has no information about which BO has
+	 * had its ->usecnt incremented.
+	 * To make things easier we just free everything explicitly and set
+	 * exec->bo to NULL so that vc4_complete_exec() skips the 'BO release'
+	 * step.
+	 */
+	for (i-- ; i >= 0; i--)
+		vc4_bo_dec_usecnt(to_vc4_bo(&exec->bo[i]->base));
+
+fail_put_bo:
+	/* Release any reference to acquired objects. */
+	for (i = 0; i < exec->bo_count && exec->bo[i]; i++)
+		drm_gem_object_put_unlocked(&exec->bo[i]->base);
+
 fail:
 	kvfree(handles);
+	kvfree(exec->bo);
+	exec->bo = NULL;
 	return ret;
 }
 
@@ -835,8 +894,12 @@ vc4_complete_exec(struct drm_device *dev
 	}
 
 	if (exec->bo) {
-		for (i = 0; i < exec->bo_count; i++)
+		for (i = 0; i < exec->bo_count; i++) {
+			struct vc4_bo *bo = to_vc4_bo(&exec->bo[i]->base);
+
+			vc4_bo_dec_usecnt(bo);
 			drm_gem_object_put_unlocked(&exec->bo[i]->base);
+		}
 		kvfree(exec->bo);
 	}
 
@@ -1100,6 +1163,9 @@ vc4_gem_init(struct drm_device *dev)
 	INIT_WORK(&vc4->job_done_work, vc4_job_done_work);
 
 	mutex_init(&vc4->power_lock);
+
+	INIT_LIST_HEAD(&vc4->purgeable.list);
+	mutex_init(&vc4->purgeable.lock);
 }
 
 void
@@ -1123,3 +1189,81 @@ vc4_gem_destroy(struct drm_device *dev)
 	if (vc4->hang_state)
 		vc4_free_hang_state(dev, vc4->hang_state);
 }
+
+int vc4_gem_madvise_ioctl(struct drm_device *dev, void *data,
+			  struct drm_file *file_priv)
+{
+	struct drm_vc4_gem_madvise *args = data;
+	struct drm_gem_object *gem_obj;
+	struct vc4_bo *bo;
+	int ret;
+
+	switch (args->madv) {
+	case VC4_MADV_DONTNEED:
+	case VC4_MADV_WILLNEED:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (args->pad != 0)
+		return -EINVAL;
+
+	gem_obj = drm_gem_object_lookup(file_priv, args->handle);
+	if (!gem_obj) {
+		DRM_DEBUG("Failed to look up GEM BO %d\n", args->handle);
+		return -ENOENT;
+	}
+
+	bo = to_vc4_bo(gem_obj);
+
+	/* Only BOs exposed to userspace can be purged. */
+	if (bo->madv == __VC4_MADV_NOTSUPP) {
+		DRM_DEBUG("madvise not supported on this BO\n");
+		ret = -EINVAL;
+		goto out_put_gem;
+	}
+
+	/* Not sure it's safe to purge imported BOs. Let's just assume it's
+	 * not until proven otherwise.
+	 */
+	if (gem_obj->import_attach) {
+		DRM_DEBUG("madvise not supported on imported BOs\n");
+		ret = -EINVAL;
+		goto out_put_gem;
+	}
+
+	mutex_lock(&bo->madv_lock);
+
+	if (args->madv == VC4_MADV_DONTNEED && bo->madv == VC4_MADV_WILLNEED &&
+	    !refcount_read(&bo->usecnt)) {
+		/* If the BO is about to be marked as purgeable, is not used
+		 * and is not already purgeable or purged, add it to the
+		 * purgeable list.
+		 */
+		vc4_bo_add_to_purgeable_pool(bo);
+	} else if (args->madv == VC4_MADV_WILLNEED &&
+		   bo->madv == VC4_MADV_DONTNEED &&
+		   !refcount_read(&bo->usecnt)) {
+		/* The BO has not been purged yet, just remove it from
+		 * the purgeable list.
+		 */
+		vc4_bo_remove_from_purgeable_pool(bo);
+	}
+
+	/* Save the purged state. */
+	args->retained = bo->madv != __VC4_MADV_PURGED;
+
+	/* Update internal madv state only if the bo was not purged. */
+	if (bo->madv != __VC4_MADV_PURGED)
+		bo->madv = args->madv;
+
+	mutex_unlock(&bo->madv_lock);
+
+	ret = 0;
+
+out_put_gem:
+	drm_gem_object_put_unlocked(gem_obj);
+
+	return ret;
+}
--- a/drivers/gpu/drm/vc4/vc4_plane.c
+++ b/drivers/gpu/drm/vc4/vc4_plane.c
@@ -23,6 +23,7 @@
 #include <drm/drm_fb_cma_helper.h>
 #include <drm/drm_plane_helper.h>
 
+#include "uapi/drm/vc4_drm.h"
 #include "vc4_drv.h"
 #include "vc4_regs.h"
 
@@ -779,21 +780,40 @@ static int vc4_prepare_fb(struct drm_pla
 {
 	struct vc4_bo *bo;
 	struct dma_fence *fence;
+	int ret;
 
 	if ((plane->state->fb == state->fb) || !state->fb)
 		return 0;
 
 	bo = to_vc4_bo(&drm_fb_cma_get_gem_obj(state->fb, 0)->base);
+
+	ret = vc4_bo_inc_usecnt(bo);
+	if (ret)
+		return ret;
+
 	fence = reservation_object_get_excl_rcu(bo->resv);
 	drm_atomic_set_fence_for_plane(state, fence);
 
 	return 0;
 }
 
+static void vc4_cleanup_fb(struct drm_plane *plane,
+			   struct drm_plane_state *state)
+{
+	struct vc4_bo *bo;
+
+	if (plane->state->fb == state->fb || !state->fb)
+		return;
+
+	bo = to_vc4_bo(&drm_fb_cma_get_gem_obj(state->fb, 0)->base);
+	vc4_bo_dec_usecnt(bo);
+}
+
 static const struct drm_plane_helper_funcs vc4_plane_helper_funcs = {
 	.atomic_check = vc4_plane_atomic_check,
 	.atomic_update = vc4_plane_atomic_update,
 	.prepare_fb = vc4_prepare_fb,
+	.cleanup_fb = vc4_cleanup_fb,
 };
 
 static void vc4_plane_destroy(struct drm_plane *plane)
--- a/include/uapi/drm/vc4_drm.h
+++ b/include/uapi/drm/vc4_drm.h
@@ -41,6 +41,7 @@ extern "C" {
 #define DRM_VC4_SET_TILING                        0x08
 #define DRM_VC4_GET_TILING                        0x09
 #define DRM_VC4_LABEL_BO                          0x0a
+#define DRM_VC4_GEM_MADVISE                       0x0b
 
 #define DRM_IOCTL_VC4_SUBMIT_CL           DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_SUBMIT_CL, struct drm_vc4_submit_cl)
 #define DRM_IOCTL_VC4_WAIT_SEQNO          DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_WAIT_SEQNO, struct drm_vc4_wait_seqno)
@@ -53,6 +54,7 @@ extern "C" {
 #define DRM_IOCTL_VC4_SET_TILING          DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_SET_TILING, struct drm_vc4_set_tiling)
 #define DRM_IOCTL_VC4_GET_TILING          DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_GET_TILING, struct drm_vc4_get_tiling)
 #define DRM_IOCTL_VC4_LABEL_BO            DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_LABEL_BO, struct drm_vc4_label_bo)
+#define DRM_IOCTL_VC4_GEM_MADVISE         DRM_IOWR(DRM_COMMAND_BASE + DRM_VC4_GEM_MADVISE, struct drm_vc4_gem_madvise)
 
 struct drm_vc4_submit_rcl_surface {
 	__u32 hindex; /* Handle index, or ~0 if not present. */
@@ -305,6 +307,7 @@ struct drm_vc4_get_hang_state {
 #define DRM_VC4_PARAM_SUPPORTS_ETC1		4
 #define DRM_VC4_PARAM_SUPPORTS_THREADED_FS	5
 #define DRM_VC4_PARAM_SUPPORTS_FIXED_RCL_ORDER	6
+#define DRM_VC4_PARAM_SUPPORTS_MADVISE		7
 
 struct drm_vc4_get_param {
 	__u32 param;
@@ -333,6 +336,22 @@ struct drm_vc4_label_bo {
 	__u64 name;
 };
 
+/*
+ * States prefixed with '__' are internal states and cannot be passed to the
+ * DRM_IOCTL_VC4_GEM_MADVISE ioctl.
+ */
+#define VC4_MADV_WILLNEED			0
+#define VC4_MADV_DONTNEED			1
+#define __VC4_MADV_PURGED			2
+#define __VC4_MADV_NOTSUPP			3
+
+struct drm_vc4_gem_madvise {
+	__u32 handle;
+	__u32 madv;
+	__u32 retained;
+	__u32 pad;
+};
+
 #if defined(__cplusplus)
 }
 #endif