Skip to content

Commit 5de1950

Browse files
lorenzo-stoakesakpm00
authored andcommitted
mm: resolve faulty mmap_region() error path behaviour
The mmap_region() function is somewhat terrifying, with spaghetti-like control flow and numerous means by which issues can arise and incomplete state, memory leaks and other unpleasantness can occur. A large amount of the complexity arises from trying to handle errors late in the process of mapping a VMA, which forms the basis of recently observed issues with resource leaks and observable inconsistent state. Taking advantage of previous patches in this series we move a number of checks earlier in the code, simplifying things by moving the core of the logic into a static internal function __mmap_region(). Doing this allows us to perform a number of checks up front before we do any real work, and allows us to unwind the writable unmap check unconditionally as required and to perform a CONFIG_DEBUG_VM_MAPLE_TREE validation unconditionally also. We move a number of things here: 1. We preallocate memory for the iterator before we call the file-backed memory hook, allowing us to exit early and avoid having to perform complicated and error-prone close/free logic. We carefully free iterator state on both success and error paths. 2. The enclosing mmap_region() function handles the mapping_map_writable() logic early. Previously the logic had the mapping_map_writable() at the point of mapping a newly allocated file-backed VMA, and a matching mapping_unmap_writable() on success and error paths. We now do this unconditionally if this is a file-backed, shared writable mapping. If a driver changes the flags to eliminate VM_MAYWRITE, however doing so does not invalidate the seal check we just performed, and we in any case always decrement the counter in the wrapper. We perform a debug assert to ensure a driver does not attempt to do the opposite. 3. We also move arch_validate_flags() up into the mmap_region() function. This is only relevant on arm64 and sparc64, and the check is only meaningful for SPARC with ADI enabled. We explicitly add a warning for this arch if a driver invalidates this check, though the code ought eventually to be fixed to eliminate the need for this. With all of these measures in place, we no longer need to explicitly close the VMA on error paths, as we place all checks which might fail prior to a call to any driver mmap hook. This eliminates an entire class of errors, makes the code easier to reason about and more robust. Link: https://lkml.kernel.org/r/6e0becb36d2f5472053ac5d544c0edfe9b899e25.1730224667.git.lorenzo.stoakes@oracle.com Fixes: deb0f65 ("mm/mmap: undo ->mmap() when arch_validate_flags() fails") Signed-off-by: Lorenzo Stoakes <[email protected]> Reported-by: Jann Horn <[email protected]> Reviewed-by: Liam R. Howlett <[email protected]> Reviewed-by: Vlastimil Babka <[email protected]> Tested-by: Mark Brown <[email protected]> Cc: Andreas Larsson <[email protected]> Cc: Catalin Marinas <[email protected]> Cc: David S. Miller <[email protected]> Cc: Helge Deller <[email protected]> Cc: James E.J. Bottomley <[email protected]> Cc: Linus Torvalds <[email protected]> Cc: Peter Xu <[email protected]> Cc: Will Deacon <[email protected]> Cc: <[email protected]> Signed-off-by: Andrew Morton <[email protected]>
1 parent 5baf8b0 commit 5de1950

File tree

1 file changed

+65
-54
lines changed

1 file changed

+65
-54
lines changed

mm/mmap.c

Lines changed: 65 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -1358,20 +1358,18 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len,
13581358
return do_vmi_munmap(&vmi, mm, start, len, uf, false);
13591359
}
13601360

1361-
unsigned long mmap_region(struct file *file, unsigned long addr,
1361+
static unsigned long __mmap_region(struct file *file, unsigned long addr,
13621362
unsigned long len, vm_flags_t vm_flags, unsigned long pgoff,
13631363
struct list_head *uf)
13641364
{
13651365
struct mm_struct *mm = current->mm;
13661366
struct vm_area_struct *vma = NULL;
13671367
pgoff_t pglen = PHYS_PFN(len);
1368-
struct vm_area_struct *merge;
13691368
unsigned long charged = 0;
13701369
struct vma_munmap_struct vms;
13711370
struct ma_state mas_detach;
13721371
struct maple_tree mt_detach;
13731372
unsigned long end = addr + len;
1374-
bool writable_file_mapping = false;
13751373
int error;
13761374
VMA_ITERATOR(vmi, mm, addr);
13771375
VMG_STATE(vmg, mm, &vmi, addr, end, vm_flags, pgoff);
@@ -1445,35 +1443,35 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
14451443
vm_flags_init(vma, vm_flags);
14461444
vma->vm_page_prot = vm_get_page_prot(vm_flags);
14471445

1446+
if (vma_iter_prealloc(&vmi, vma)) {
1447+
error = -ENOMEM;
1448+
goto free_vma;
1449+
}
1450+
14481451
if (file) {
14491452
vma->vm_file = get_file(file);
14501453
error = mmap_file(file, vma);
14511454
if (error)
1452-
goto unmap_and_free_vma;
1453-
1454-
if (vma_is_shared_maywrite(vma)) {
1455-
error = mapping_map_writable(file->f_mapping);
1456-
if (error)
1457-
goto close_and_free_vma;
1458-
1459-
writable_file_mapping = true;
1460-
}
1455+
goto unmap_and_free_file_vma;
14611456

1457+
/* Drivers cannot alter the address of the VMA. */
1458+
WARN_ON_ONCE(addr != vma->vm_start);
14621459
/*
1463-
* Expansion is handled above, merging is handled below.
1464-
* Drivers should not alter the address of the VMA.
1460+
* Drivers should not permit writability when previously it was
1461+
* disallowed.
14651462
*/
1466-
if (WARN_ON((addr != vma->vm_start))) {
1467-
error = -EINVAL;
1468-
goto close_and_free_vma;
1469-
}
1463+
VM_WARN_ON_ONCE(vm_flags != vma->vm_flags &&
1464+
!(vm_flags & VM_MAYWRITE) &&
1465+
(vma->vm_flags & VM_MAYWRITE));
14701466

14711467
vma_iter_config(&vmi, addr, end);
14721468
/*
14731469
* If vm_flags changed after mmap_file(), we should try merge
14741470
* vma again as we may succeed this time.
14751471
*/
14761472
if (unlikely(vm_flags != vma->vm_flags && vmg.prev)) {
1473+
struct vm_area_struct *merge;
1474+
14771475
vmg.flags = vma->vm_flags;
14781476
/* If this fails, state is reset ready for a reattempt. */
14791477
merge = vma_merge_new_range(&vmg);
@@ -1491,7 +1489,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
14911489
vma = merge;
14921490
/* Update vm_flags to pick up the change. */
14931491
vm_flags = vma->vm_flags;
1494-
goto unmap_writable;
1492+
goto file_expanded;
14951493
}
14961494
vma_iter_config(&vmi, addr, end);
14971495
}
@@ -1500,26 +1498,15 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
15001498
} else if (vm_flags & VM_SHARED) {
15011499
error = shmem_zero_setup(vma);
15021500
if (error)
1503-
goto free_vma;
1501+
goto free_iter_vma;
15041502
} else {
15051503
vma_set_anonymous(vma);
15061504
}
15071505

1508-
if (map_deny_write_exec(vma->vm_flags, vma->vm_flags)) {
1509-
error = -EACCES;
1510-
goto close_and_free_vma;
1511-
}
1512-
1513-
/* Allow architectures to sanity-check the vm_flags */
1514-
if (!arch_validate_flags(vma->vm_flags)) {
1515-
error = -EINVAL;
1516-
goto close_and_free_vma;
1517-
}
1518-
1519-
if (vma_iter_prealloc(&vmi, vma)) {
1520-
error = -ENOMEM;
1521-
goto close_and_free_vma;
1522-
}
1506+
#ifdef CONFIG_SPARC64
1507+
/* TODO: Fix SPARC ADI! */
1508+
WARN_ON_ONCE(!arch_validate_flags(vm_flags));
1509+
#endif
15231510

15241511
/* Lock the VMA since it is modified after insertion into VMA tree */
15251512
vma_start_write(vma);
@@ -1533,10 +1520,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
15331520
*/
15341521
khugepaged_enter_vma(vma, vma->vm_flags);
15351522

1536-
/* Once vma denies write, undo our temporary denial count */
1537-
unmap_writable:
1538-
if (writable_file_mapping)
1539-
mapping_unmap_writable(file->f_mapping);
1523+
file_expanded:
15401524
file = vma->vm_file;
15411525
ksm_add_vma(vma);
15421526
expanded:
@@ -1569,23 +1553,17 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
15691553

15701554
vma_set_page_prot(vma);
15711555

1572-
validate_mm(mm);
15731556
return addr;
15741557

1575-
close_and_free_vma:
1576-
vma_close(vma);
1577-
1578-
if (file || vma->vm_file) {
1579-
unmap_and_free_vma:
1580-
fput(vma->vm_file);
1581-
vma->vm_file = NULL;
1558+
unmap_and_free_file_vma:
1559+
fput(vma->vm_file);
1560+
vma->vm_file = NULL;
15821561

1583-
vma_iter_set(&vmi, vma->vm_end);
1584-
/* Undo any partial mapping done by a device driver. */
1585-
unmap_region(&vmi.mas, vma, vmg.prev, vmg.next);
1586-
}
1587-
if (writable_file_mapping)
1588-
mapping_unmap_writable(file->f_mapping);
1562+
vma_iter_set(&vmi, vma->vm_end);
1563+
/* Undo any partial mapping done by a device driver. */
1564+
unmap_region(&vmi.mas, vma, vmg.prev, vmg.next);
1565+
free_iter_vma:
1566+
vma_iter_free(&vmi);
15891567
free_vma:
15901568
vm_area_free(vma);
15911569
unacct_error:
@@ -1595,10 +1573,43 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
15951573
abort_munmap:
15961574
vms_abort_munmap_vmas(&vms, &mas_detach);
15971575
gather_failed:
1598-
validate_mm(mm);
15991576
return error;
16001577
}
16011578

1579+
unsigned long mmap_region(struct file *file, unsigned long addr,
1580+
unsigned long len, vm_flags_t vm_flags, unsigned long pgoff,
1581+
struct list_head *uf)
1582+
{
1583+
unsigned long ret;
1584+
bool writable_file_mapping = false;
1585+
1586+
/* Check to see if MDWE is applicable. */
1587+
if (map_deny_write_exec(vm_flags, vm_flags))
1588+
return -EACCES;
1589+
1590+
/* Allow architectures to sanity-check the vm_flags. */
1591+
if (!arch_validate_flags(vm_flags))
1592+
return -EINVAL;
1593+
1594+
/* Map writable and ensure this isn't a sealed memfd. */
1595+
if (file && is_shared_maywrite(vm_flags)) {
1596+
int error = mapping_map_writable(file->f_mapping);
1597+
1598+
if (error)
1599+
return error;
1600+
writable_file_mapping = true;
1601+
}
1602+
1603+
ret = __mmap_region(file, addr, len, vm_flags, pgoff, uf);
1604+
1605+
/* Clear our write mapping regardless of error. */
1606+
if (writable_file_mapping)
1607+
mapping_unmap_writable(file->f_mapping);
1608+
1609+
validate_mm(current->mm);
1610+
return ret;
1611+
}
1612+
16021613
static int __vm_munmap(unsigned long start, size_t len, bool unlock)
16031614
{
16041615
int ret;

0 commit comments

Comments
 (0)