Skip to content

Commit 7cf91a9

Browse files
JoonsooKimtorvalds
authored andcommitted
mm/compaction: speed up pageblock_pfn_to_page() when zone is contiguous
There is a performance drop report due to hugepage allocation and in there half of cpu time are spent on pageblock_pfn_to_page() in compaction [1]. In that workload, compaction is triggered to make hugepage but most of pageblocks are un-available for compaction due to pageblock type and skip bit so compaction usually fails. Most costly operations in this case is to find valid pageblock while scanning whole zone range. To check if pageblock is valid to compact, valid pfn within pageblock is required and we can obtain it by calling pageblock_pfn_to_page(). This function checks whether pageblock is in a single zone and return valid pfn if possible. Problem is that we need to check it every time before scanning pageblock even if we re-visit it and this turns out to be very expensive in this workload. Although we have no way to skip this pageblock check in the system where hole exists at arbitrary position, we can use cached value for zone continuity and just do pfn_to_page() in the system where hole doesn't exist. This optimization considerably speeds up in above workload. Before vs After Max: 1096 MB/s vs 1325 MB/s Min: 635 MB/s 1015 MB/s Avg: 899 MB/s 1194 MB/s Avg is improved by roughly 30% [2]. [1]: http://www.spinics.net/lists/linux-mm/msg97378.html [2]: https://lkml.org/lkml/2015/12/9/23 [[email protected]: don't forget to restore zone->contiguous on error path, per Vlastimil] Signed-off-by: Joonsoo Kim <[email protected]> Reported-by: Aaron Lu <[email protected]> Acked-by: Vlastimil Babka <[email protected]> Tested-by: Aaron Lu <[email protected]> Cc: Mel Gorman <[email protected]> Cc: Rik van Riel <[email protected]> Cc: David Rientjes <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent e1409c3 commit 7cf91a9

File tree

7 files changed

+105
-52
lines changed

7 files changed

+105
-52
lines changed

include/linux/gfp.h

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -519,13 +519,7 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp);
519519
void drain_all_pages(struct zone *zone);
520520
void drain_local_pages(struct zone *zone);
521521

522-
#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
523522
void page_alloc_init_late(void);
524-
#else
525-
static inline void page_alloc_init_late(void)
526-
{
527-
}
528-
#endif
529523

530524
/*
531525
* gfp_allowed_mask is set to GFP_BOOT_MASK during early boot to restrict what

include/linux/memory_hotplug.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,9 @@ void put_online_mems(void);
198198
void mem_hotplug_begin(void);
199199
void mem_hotplug_done(void);
200200

201+
extern void set_zone_contiguous(struct zone *zone);
202+
extern void clear_zone_contiguous(struct zone *zone);
203+
201204
#else /* ! CONFIG_MEMORY_HOTPLUG */
202205
/*
203206
* Stub functions for when hotplug is off

include/linux/mmzone.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -522,6 +522,8 @@ struct zone {
522522
bool compact_blockskip_flush;
523523
#endif
524524

525+
bool contiguous;
526+
525527
ZONE_PADDING(_pad3_)
526528
/* Zone statistics */
527529
atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS];

mm/compaction.c

Lines changed: 0 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -71,49 +71,6 @@ static inline bool migrate_async_suitable(int migratetype)
7171
return is_migrate_cma(migratetype) || migratetype == MIGRATE_MOVABLE;
7272
}
7373

74-
/*
75-
* Check that the whole (or subset of) a pageblock given by the interval of
76-
* [start_pfn, end_pfn) is valid and within the same zone, before scanning it
77-
* with the migration of free compaction scanner. The scanners then need to
78-
* use only pfn_valid_within() check for arches that allow holes within
79-
* pageblocks.
80-
*
81-
* Return struct page pointer of start_pfn, or NULL if checks were not passed.
82-
*
83-
* It's possible on some configurations to have a setup like node0 node1 node0
84-
* i.e. it's possible that all pages within a zones range of pages do not
85-
* belong to a single zone. We assume that a border between node0 and node1
86-
* can occur within a single pageblock, but not a node0 node1 node0
87-
* interleaving within a single pageblock. It is therefore sufficient to check
88-
* the first and last page of a pageblock and avoid checking each individual
89-
* page in a pageblock.
90-
*/
91-
static struct page *pageblock_pfn_to_page(unsigned long start_pfn,
92-
unsigned long end_pfn, struct zone *zone)
93-
{
94-
struct page *start_page;
95-
struct page *end_page;
96-
97-
/* end_pfn is one past the range we are checking */
98-
end_pfn--;
99-
100-
if (!pfn_valid(start_pfn) || !pfn_valid(end_pfn))
101-
return NULL;
102-
103-
start_page = pfn_to_page(start_pfn);
104-
105-
if (page_zone(start_page) != zone)
106-
return NULL;
107-
108-
end_page = pfn_to_page(end_pfn);
109-
110-
/* This gives a shorter code than deriving page_zone(end_page) */
111-
if (page_zone_id(start_page) != page_zone_id(end_page))
112-
return NULL;
113-
114-
return start_page;
115-
}
116-
11774
#ifdef CONFIG_COMPACTION
11875

11976
/* Do not skip compaction more than 64 times */

mm/internal.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,18 @@ __find_buddy_index(unsigned long page_idx, unsigned int order)
132132
return page_idx ^ (1 << order);
133133
}
134134

135+
extern struct page *__pageblock_pfn_to_page(unsigned long start_pfn,
136+
unsigned long end_pfn, struct zone *zone);
137+
138+
static inline struct page *pageblock_pfn_to_page(unsigned long start_pfn,
139+
unsigned long end_pfn, struct zone *zone)
140+
{
141+
if (zone->contiguous)
142+
return pfn_to_page(start_pfn);
143+
144+
return __pageblock_pfn_to_page(start_pfn, end_pfn, zone);
145+
}
146+
135147
extern int __isolate_free_page(struct page *page, unsigned int order);
136148
extern void __free_pages_bootmem(struct page *page, unsigned long pfn,
137149
unsigned int order);

mm/memory_hotplug.c

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -512,6 +512,8 @@ int __ref __add_pages(int nid, struct zone *zone, unsigned long phys_start_pfn,
512512
int start_sec, end_sec;
513513
struct vmem_altmap *altmap;
514514

515+
clear_zone_contiguous(zone);
516+
515517
/* during initialize mem_map, align hot-added range to section */
516518
start_sec = pfn_to_section_nr(phys_start_pfn);
517519
end_sec = pfn_to_section_nr(phys_start_pfn + nr_pages - 1);
@@ -524,7 +526,8 @@ int __ref __add_pages(int nid, struct zone *zone, unsigned long phys_start_pfn,
524526
if (altmap->base_pfn != phys_start_pfn
525527
|| vmem_altmap_offset(altmap) > nr_pages) {
526528
pr_warn_once("memory add fail, invalid altmap\n");
527-
return -EINVAL;
529+
err = -EINVAL;
530+
goto out;
528531
}
529532
altmap->alloc = 0;
530533
}
@@ -542,7 +545,8 @@ int __ref __add_pages(int nid, struct zone *zone, unsigned long phys_start_pfn,
542545
err = 0;
543546
}
544547
vmemmap_populate_print_last();
545-
548+
out:
549+
set_zone_contiguous(zone);
546550
return err;
547551
}
548552
EXPORT_SYMBOL_GPL(__add_pages);
@@ -814,6 +818,8 @@ int __remove_pages(struct zone *zone, unsigned long phys_start_pfn,
814818
}
815819
}
816820

821+
clear_zone_contiguous(zone);
822+
817823
/*
818824
* We can only remove entire sections
819825
*/
@@ -829,6 +835,9 @@ int __remove_pages(struct zone *zone, unsigned long phys_start_pfn,
829835
if (ret)
830836
break;
831837
}
838+
839+
set_zone_contiguous(zone);
840+
832841
return ret;
833842
}
834843
EXPORT_SYMBOL_GPL(__remove_pages);

mm/page_alloc.c

Lines changed: 77 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1128,6 +1128,75 @@ void __init __free_pages_bootmem(struct page *page, unsigned long pfn,
11281128
return __free_pages_boot_core(page, pfn, order);
11291129
}
11301130

1131+
/*
1132+
* Check that the whole (or subset of) a pageblock given by the interval of
1133+
* [start_pfn, end_pfn) is valid and within the same zone, before scanning it
1134+
* with the migration of free compaction scanner. The scanners then need to
1135+
* use only pfn_valid_within() check for arches that allow holes within
1136+
* pageblocks.
1137+
*
1138+
* Return struct page pointer of start_pfn, or NULL if checks were not passed.
1139+
*
1140+
* It's possible on some configurations to have a setup like node0 node1 node0
1141+
* i.e. it's possible that all pages within a zones range of pages do not
1142+
* belong to a single zone. We assume that a border between node0 and node1
1143+
* can occur within a single pageblock, but not a node0 node1 node0
1144+
* interleaving within a single pageblock. It is therefore sufficient to check
1145+
* the first and last page of a pageblock and avoid checking each individual
1146+
* page in a pageblock.
1147+
*/
1148+
struct page *__pageblock_pfn_to_page(unsigned long start_pfn,
1149+
unsigned long end_pfn, struct zone *zone)
1150+
{
1151+
struct page *start_page;
1152+
struct page *end_page;
1153+
1154+
/* end_pfn is one past the range we are checking */
1155+
end_pfn--;
1156+
1157+
if (!pfn_valid(start_pfn) || !pfn_valid(end_pfn))
1158+
return NULL;
1159+
1160+
start_page = pfn_to_page(start_pfn);
1161+
1162+
if (page_zone(start_page) != zone)
1163+
return NULL;
1164+
1165+
end_page = pfn_to_page(end_pfn);
1166+
1167+
/* This gives a shorter code than deriving page_zone(end_page) */
1168+
if (page_zone_id(start_page) != page_zone_id(end_page))
1169+
return NULL;
1170+
1171+
return start_page;
1172+
}
1173+
1174+
void set_zone_contiguous(struct zone *zone)
1175+
{
1176+
unsigned long block_start_pfn = zone->zone_start_pfn;
1177+
unsigned long block_end_pfn;
1178+
1179+
block_end_pfn = ALIGN(block_start_pfn + 1, pageblock_nr_pages);
1180+
for (; block_start_pfn < zone_end_pfn(zone);
1181+
block_start_pfn = block_end_pfn,
1182+
block_end_pfn += pageblock_nr_pages) {
1183+
1184+
block_end_pfn = min(block_end_pfn, zone_end_pfn(zone));
1185+
1186+
if (!__pageblock_pfn_to_page(block_start_pfn,
1187+
block_end_pfn, zone))
1188+
return;
1189+
}
1190+
1191+
/* We confirm that there is no hole */
1192+
zone->contiguous = true;
1193+
}
1194+
1195+
void clear_zone_contiguous(struct zone *zone)
1196+
{
1197+
zone->contiguous = false;
1198+
}
1199+
11311200
#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
11321201
static void __init deferred_free_range(struct page *page,
11331202
unsigned long pfn, int nr_pages)
@@ -1278,9 +1347,13 @@ static int __init deferred_init_memmap(void *data)
12781347
pgdat_init_report_one_done();
12791348
return 0;
12801349
}
1350+
#endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
12811351

12821352
void __init page_alloc_init_late(void)
12831353
{
1354+
struct zone *zone;
1355+
1356+
#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
12841357
int nid;
12851358

12861359
/* There will be num_node_state(N_MEMORY) threads */
@@ -1294,8 +1367,11 @@ void __init page_alloc_init_late(void)
12941367

12951368
/* Reinit limits that are based on free pages after the kernel is up */
12961369
files_maxfiles_init();
1370+
#endif
1371+
1372+
for_each_populated_zone(zone)
1373+
set_zone_contiguous(zone);
12971374
}
1298-
#endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
12991375

13001376
#ifdef CONFIG_CMA
13011377
/* Free whole pageblock and set its migration type to MIGRATE_CMA. */

0 commit comments

Comments
 (0)