Skip to content

Commit e8d1955

Browse files
tang-chentorvalds
authored andcommitted
acpi, memory-hotplug: parse SRAT before memblock is ready
On linux, the pages used by kernel could not be migrated. As a result, if a memory range is used by kernel, it cannot be hot-removed. So if we want to hot-remove memory, we should prevent kernel from using it. The way now used to prevent this is specify a memory range by movablemem_map boot option and set it as ZONE_MOVABLE. But when the system is booting, memblock will allocate memory, and reserve the memory for kernel. And before we parse SRAT, and know the node memory ranges, memblock is working. And it may allocate memory in ranges to be set as ZONE_MOVABLE. This memory can be used by kernel, and never be freed. So, let's parse SRAT before memblock is called first. And it is early enough. The first call of memblock_find_in_range_node() is in: setup_arch() |-->setup_real_mode() so, this patch add a function early_parse_srat() to parse SRAT, and call it before setup_real_mode() is called. NOTE: 1) early_parse_srat() is called before numa_init(), and has initialized numa_meminfo. So DO NOT clear numa_nodes_parsed in numa_init() and DO NOT zero numa_meminfo in numa_init(), otherwise we will lose memory numa info. 2) I don't know why using count of memory affinities parsed from SRAT as a return value in original acpi_numa_init(). So I add a static variable srat_mem_cnt to remember this count and use it as the return value of the new acpi_numa_init() [[email protected]: parse SRAT before memblock is ready fix] Signed-off-by: Tang Chen <[email protected]> Reviewed-by: Wen Congyang <[email protected]> Cc: KOSAKI Motohiro <[email protected]> Cc: Jiang Liu <[email protected]> Cc: Jianguo Wu <[email protected]> Cc: Kamezawa Hiroyuki <[email protected]> Cc: Lai Jiangshan <[email protected]> Cc: Wu Jianguo <[email protected]> Cc: Yasuaki Ishimatsu <[email protected]> Cc: Ingo Molnar <[email protected]> Cc: Thomas Gleixner <[email protected]> Cc: "H. Peter Anvin" <[email protected]> Cc: Len Brown <[email protected]> Cc: "Brown, Len" <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent fb06bc8 commit e8d1955

File tree

4 files changed

+34
-16
lines changed

4 files changed

+34
-16
lines changed

arch/x86/kernel/setup.c

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1056,6 +1056,15 @@ void __init setup_arch(char **cmdline_p)
10561056
setup_bios_corruption_check();
10571057
#endif
10581058

1059+
/*
1060+
* In the memory hotplug case, the kernel needs info from SRAT to
1061+
* determine which memory is hotpluggable before allocating memory
1062+
* using memblock.
1063+
*/
1064+
acpi_boot_table_init();
1065+
early_acpi_boot_init();
1066+
early_parse_srat();
1067+
10591068
#ifdef CONFIG_X86_32
10601069
printk(KERN_DEBUG "initial memory mapped: [mem 0x00000000-%#010lx]\n",
10611070
(max_pfn_mapped<<PAGE_SHIFT) - 1);
@@ -1101,10 +1110,6 @@ void __init setup_arch(char **cmdline_p)
11011110
/*
11021111
* Parse the ACPI tables for possible boot-time SMP configuration.
11031112
*/
1104-
acpi_boot_table_init();
1105-
1106-
early_acpi_boot_init();
1107-
11081113
initmem_init();
11091114
memblock_find_dma_reserve();
11101115

arch/x86/mm/numa.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -560,10 +560,12 @@ static int __init numa_init(int (*init_func)(void))
560560
for (i = 0; i < MAX_LOCAL_APIC; i++)
561561
set_apicid_to_node(i, NUMA_NO_NODE);
562562

563-
nodes_clear(numa_nodes_parsed);
563+
/*
564+
* Do not clear numa_nodes_parsed or zero numa_meminfo here, because
565+
* SRAT was parsed earlier in early_parse_srat().
566+
*/
564567
nodes_clear(node_possible_map);
565568
nodes_clear(node_online_map);
566-
memset(&numa_meminfo, 0, sizeof(numa_meminfo));
567569
WARN_ON(memblock_set_node(0, ULLONG_MAX, MAX_NUMNODES));
568570
numa_reset_distance();
569571

drivers/acpi/numa.c

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -282,10 +282,10 @@ acpi_table_parse_srat(enum acpi_srat_type id,
282282
handler, max_entries);
283283
}
284284

285-
int __init acpi_numa_init(void)
286-
{
287-
int cnt = 0;
285+
static int srat_mem_cnt;
288286

287+
void __init early_parse_srat(void)
288+
{
289289
/*
290290
* Should not limit number with cpu num that is from NR_CPUS or nr_cpus=
291291
* SRAT cpu entries could have different order with that in MADT.
@@ -295,21 +295,24 @@ int __init acpi_numa_init(void)
295295
/* SRAT: Static Resource Affinity Table */
296296
if (!acpi_table_parse(ACPI_SIG_SRAT, acpi_parse_srat)) {
297297
acpi_table_parse_srat(ACPI_SRAT_TYPE_X2APIC_CPU_AFFINITY,
298-
acpi_parse_x2apic_affinity, 0);
298+
acpi_parse_x2apic_affinity, 0);
299299
acpi_table_parse_srat(ACPI_SRAT_TYPE_CPU_AFFINITY,
300-
acpi_parse_processor_affinity, 0);
301-
cnt = acpi_table_parse_srat(ACPI_SRAT_TYPE_MEMORY_AFFINITY,
302-
acpi_parse_memory_affinity,
303-
NR_NODE_MEMBLKS);
300+
acpi_parse_processor_affinity, 0);
301+
srat_mem_cnt = acpi_table_parse_srat(ACPI_SRAT_TYPE_MEMORY_AFFINITY,
302+
acpi_parse_memory_affinity,
303+
NR_NODE_MEMBLKS);
304304
}
305+
}
305306

307+
int __init acpi_numa_init(void)
308+
{
306309
/* SLIT: System Locality Information Table */
307310
acpi_table_parse(ACPI_SIG_SLIT, acpi_parse_slit);
308311

309312
acpi_numa_arch_fixup();
310313

311-
if (cnt < 0)
312-
return cnt;
314+
if (srat_mem_cnt < 0)
315+
return srat_mem_cnt;
313316
else if (!parsed_numa_memblks)
314317
return -ENOENT;
315318
return 0;

include/linux/acpi.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -485,6 +485,14 @@ static inline bool acpi_driver_match_device(struct device *dev,
485485

486486
#endif /* !CONFIG_ACPI */
487487

488+
#ifdef CONFIG_ACPI_NUMA
489+
void __init early_parse_srat(void);
490+
#else
491+
static inline void early_parse_srat(void)
492+
{
493+
}
494+
#endif
495+
488496
#ifdef CONFIG_ACPI
489497
void acpi_os_set_prepare_sleep(int (*func)(u8 sleep_state,
490498
u32 pm1a_ctrl, u32 pm1b_ctrl));

0 commit comments

Comments
 (0)