Skip to content

Commit 50256c8

Browse files
committed
x86/kaslr: Expose and use the end of the physical memory address space
jira SECO-170 bugfix cuda hangs commit-author Thomas Gleixner <[email protected]> commit ea72ce5 upstream-diff Missing CONFIG_KMSAN commits in arch/x86/include/asm/pgtable_64_types.h iounmap() on x86 occasionally fails to unmap because the provided valid ioremap address is not below high_memory. It turned out that this happens due to KASLR. KASLR uses the full address space between PAGE_OFFSET and vaddr_end to randomize the starting points of the direct map, vmalloc and vmemmap regions. It thereby limits the size of the direct map by using the installed memory size plus an extra configurable margin for hot-plug memory. This limitation is done to gain more randomization space because otherwise only the holes between the direct map, vmalloc, vmemmap and vaddr_end would be usable for randomizing. The limited direct map size is not exposed to the rest of the kernel, so the memory hot-plug and resource management related code paths still operate under the assumption that the available address space can be determined with MAX_PHYSMEM_BITS. request_free_mem_region() allocates from (1 << MAX_PHYSMEM_BITS) - 1 downwards. That means the first allocation happens past the end of the direct map and if unlucky this address is in the vmalloc space, which causes high_memory to become greater than VMALLOC_START and consequently causes iounmap() to fail for valid ioremap addresses. MAX_PHYSMEM_BITS cannot be changed for that because the randomization does not align with address bit boundaries and there are other places which actually require to know the maximum number of address bits. All remaining usage sites of MAX_PHYSMEM_BITS have been analyzed and found to be correct. Cure this by exposing the end of the direct map via PHYSMEM_END and use that for the memory hot-plug and resource management related places instead of relying on MAX_PHYSMEM_BITS. In the KASLR case PHYSMEM_END maps to a variable which is initialized by the KASLR initialization and otherwise it is based on MAX_PHYSMEM_BITS as before. To prevent future hickups add a check into add_pages() to catch callers trying to add memory above PHYSMEM_END. Fixes: 0483e1f ("x86/mm: Implement ASLR for kernel memory regions") Reported-by: Max Ramanouski <[email protected]> Reported-by: Alistair Popple <[email protected]> Signed-off-by: Thomas Gleixner <[email protected]> Tested-By: Max Ramanouski <[email protected]> Tested-by: Alistair Popple <[email protected]> Reviewed-by: Dan Williams <[email protected]> Reviewed-by: Alistair Popple <[email protected]> Reviewed-by: Kees Cook <[email protected]> Cc: [email protected] Link: https://lore.kernel.org/all/87ed6soy3z.ffs@tglx (cherry picked from commit ea72ce5) Signed-off-by: Jonathan Maple <[email protected]> Signed-off-by: Jonathan Maple <[email protected]>
1 parent ff9cc6c commit 50256c8

File tree

8 files changed

+43
-12
lines changed

8 files changed

+43
-12
lines changed

arch/x86/include/asm/page_64.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ extern unsigned long phys_base;
1717
extern unsigned long page_offset_base;
1818
extern unsigned long vmalloc_base;
1919
extern unsigned long vmemmap_base;
20+
extern unsigned long physmem_end;
2021

2122
static __always_inline unsigned long __phys_addr_nodebug(unsigned long x)
2223
{

arch/x86/include/asm/pgtable_64_types.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,10 @@ extern unsigned int ptrs_per_p4d;
140140
# define VMEMMAP_START __VMEMMAP_BASE_L4
141141
#endif /* CONFIG_DYNAMIC_MEMORY_LAYOUT */
142142

143+
#ifdef CONFIG_RANDOMIZE_MEMORY
144+
# define PHYSMEM_END physmem_end
145+
#endif
146+
143147
/*
144148
* End of the region for which vmalloc page tables are pre-allocated.
145149
* For non-KMSAN builds, this is the same as VMALLOC_END.

arch/x86/mm/init_64.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -950,8 +950,12 @@ static void update_end_of_memory_vars(u64 start, u64 size)
950950
int add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages,
951951
struct mhp_params *params)
952952
{
953+
unsigned long end = ((start_pfn + nr_pages) << PAGE_SHIFT) - 1;
953954
int ret;
954955

956+
if (WARN_ON_ONCE(end > PHYSMEM_END))
957+
return -ERANGE;
958+
955959
ret = __add_pages(nid, start_pfn, nr_pages, params);
956960
WARN_ON_ONCE(ret);
957961

arch/x86/mm/kaslr.c

Lines changed: 26 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -47,13 +47,24 @@ static const unsigned long vaddr_end = CPU_ENTRY_AREA_BASE;
4747
*/
4848
static __initdata struct kaslr_memory_region {
4949
unsigned long *base;
50+
unsigned long *end;
5051
unsigned long size_tb;
5152
} kaslr_regions[] = {
52-
{ &page_offset_base, 0 },
53-
{ &vmalloc_base, 0 },
54-
{ &vmemmap_base, 0 },
53+
{
54+
.base = &page_offset_base,
55+
.end = &physmem_end,
56+
},
57+
{
58+
.base = &vmalloc_base,
59+
},
60+
{
61+
.base = &vmemmap_base,
62+
},
5563
};
5664

65+
/* The end of the possible address space for physical memory */
66+
unsigned long physmem_end __ro_after_init;
67+
5768
/* Get size in bytes used by the memory region */
5869
static inline unsigned long get_padding(struct kaslr_memory_region *region)
5970
{
@@ -82,6 +93,8 @@ void __init kernel_randomize_memory(void)
8293
BUILD_BUG_ON(vaddr_end != CPU_ENTRY_AREA_BASE);
8394
BUILD_BUG_ON(vaddr_end > __START_KERNEL_map);
8495

96+
/* Preset the end of the possible address space for physical memory */
97+
physmem_end = ((1ULL << MAX_PHYSMEM_BITS) - 1);
8598
if (!kaslr_memory_enabled())
8699
return;
87100

@@ -128,11 +141,18 @@ void __init kernel_randomize_memory(void)
128141
vaddr += entropy;
129142
*kaslr_regions[i].base = vaddr;
130143

144+
/* Calculate the end of the region */
145+
vaddr += get_padding(&kaslr_regions[i]);
131146
/*
132-
* Jump the region and add a minimum padding based on
133-
* randomization alignment.
147+
* KASLR trims the maximum possible size of the
148+
* direct-map. Update the physmem_end boundary.
149+
* No rounding required as the region starts
150+
* PUD aligned and size is in units of TB.
134151
*/
135-
vaddr += get_padding(&kaslr_regions[i]);
152+
if (kaslr_regions[i].end)
153+
*kaslr_regions[i].end = __pa_nodebug(vaddr - 1);
154+
155+
/* Add a minimum padding based on randomization alignment. */
136156
vaddr = round_up(vaddr + 1, PUD_SIZE);
137157
remain_entropy -= entropy;
138158
}

include/linux/mm.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,10 @@ extern const int mmap_rnd_compat_bits_max;
9595
extern int mmap_rnd_compat_bits __read_mostly;
9696
#endif
9797

98+
#ifndef PHYSMEM_END
99+
# define PHYSMEM_END ((1ULL << MAX_PHYSMEM_BITS) - 1)
100+
#endif
101+
98102
#include <asm/page.h>
99103
#include <asm/processor.h>
100104

kernel/resource.c

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1806,8 +1806,7 @@ static resource_size_t gfr_start(struct resource *base, resource_size_t size,
18061806
if (flags & GFR_DESCENDING) {
18071807
resource_size_t end;
18081808

1809-
end = min_t(resource_size_t, base->end,
1810-
(1ULL << MAX_PHYSMEM_BITS) - 1);
1809+
end = min_t(resource_size_t, base->end, PHYSMEM_END);
18111810
return end - size + 1;
18121811
}
18131812

@@ -1824,8 +1823,7 @@ static bool gfr_continue(struct resource *base, resource_size_t addr,
18241823
* @size did not wrap 0.
18251824
*/
18261825
return addr > addr - size &&
1827-
addr <= min_t(resource_size_t, base->end,
1828-
(1ULL << MAX_PHYSMEM_BITS) - 1);
1826+
addr <= min_t(resource_size_t, base->end, PHYSMEM_END);
18291827
}
18301828

18311829
static resource_size_t gfr_next(resource_size_t addr, resource_size_t size,

mm/memory_hotplug.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1706,7 +1706,7 @@ struct range __weak arch_get_mappable_range(void)
17061706

17071707
struct range mhp_get_pluggable_range(bool need_mapping)
17081708
{
1709-
const u64 max_phys = (1ULL << MAX_PHYSMEM_BITS) - 1;
1709+
const u64 max_phys = PHYSMEM_END;
17101710
struct range mhp_range;
17111711

17121712
if (need_mapping) {

mm/sparse.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,7 @@ static inline int sparse_early_nid(struct mem_section *section)
129129
static void __meminit mminit_validate_memmodel_limits(unsigned long *start_pfn,
130130
unsigned long *end_pfn)
131131
{
132-
unsigned long max_sparsemem_pfn = 1UL << (MAX_PHYSMEM_BITS-PAGE_SHIFT);
132+
unsigned long max_sparsemem_pfn = (PHYSMEM_END + 1) >> PAGE_SHIFT;
133133

134134
/*
135135
* Sanity checks - do not allow an architecture to pass

0 commit comments

Comments
 (0)