Skip to content

Commit 5c359cc

Browse files
ouptonMarc Zyngier
authored and
Marc Zyngier
committed
KVM: arm64: Tear down unlinked stage-2 subtree after break-before-make
The break-before-make sequence is a bit annoying as it opens a window wherein memory is unmapped from the guest. KVM should replace the PTE as quickly as possible and avoid unnecessary work in between. Presently, the stage-2 map walker tears down a removed table before installing a block mapping when coalescing a table into a block. As the removed table is no longer visible to hardware walkers after the DSB+TLBI, it is possible to move the remaining cleanup to happen after installing the new PTE. Reshuffle the stage-2 map walker to install the new block entry in the pre-order callback. Unwire all of the teardown logic and replace it with a call to kvm_pgtable_stage2_free_removed() after fixing the PTE. The post-order visitor is now completely unnecessary, so drop it. Finally, touch up the comments to better represent the now simplified map walker. Note that the call to tear down the unlinked stage-2 is indirected as a subsequent change will use an RCU callback to trigger tear down. RCU is not available to pKVM, so there is a need to use different implementations on pKVM and non-pKVM VMs. Signed-off-by: Oliver Upton <[email protected]> Reviewed-by: Ben Gardon <[email protected]> Signed-off-by: Marc Zyngier <[email protected]> Link: https://lore.kernel.org/r/[email protected]
1 parent 6b91b8f commit 5c359cc

File tree

4 files changed

+39
-63
lines changed

4 files changed

+39
-63
lines changed

arch/arm64/include/asm/kvm_pgtable.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,8 @@ static inline bool kvm_level_supports_block_mapping(u32 level)
9292
* allocation is physically contiguous.
9393
* @free_pages_exact: Free an exact number of memory pages previously
9494
* allocated by zalloc_pages_exact.
95+
* @free_removed_table: Free a removed paging structure by unlinking and
96+
* dropping references.
9597
* @get_page: Increment the refcount on a page.
9698
* @put_page: Decrement the refcount on a page. When the
9799
* refcount reaches 0 the page is automatically
@@ -110,6 +112,7 @@ struct kvm_pgtable_mm_ops {
110112
void* (*zalloc_page)(void *arg);
111113
void* (*zalloc_pages_exact)(size_t size);
112114
void (*free_pages_exact)(void *addr, size_t size);
115+
void (*free_removed_table)(void *addr, u32 level);
113116
void (*get_page)(void *addr);
114117
void (*put_page)(void *addr);
115118
int (*page_count)(void *addr);

arch/arm64/kvm/hyp/nvhe/mem_protect.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,11 @@ static void host_s2_put_page(void *addr)
7979
hyp_put_page(&host_s2_pool, addr);
8080
}
8181

82+
static void host_s2_free_removed_table(void *addr, u32 level)
83+
{
84+
kvm_pgtable_stage2_free_removed(&host_kvm.mm_ops, addr, level);
85+
}
86+
8287
static int prepare_s2_pool(void *pgt_pool_base)
8388
{
8489
unsigned long nr_pages, pfn;
@@ -93,6 +98,7 @@ static int prepare_s2_pool(void *pgt_pool_base)
9398
host_kvm.mm_ops = (struct kvm_pgtable_mm_ops) {
9499
.zalloc_pages_exact = host_s2_zalloc_pages_exact,
95100
.zalloc_page = host_s2_zalloc_page,
101+
.free_removed_table = host_s2_free_removed_table,
96102
.phys_to_virt = hyp_phys_to_virt,
97103
.virt_to_phys = hyp_virt_to_phys,
98104
.page_count = hyp_page_count,

arch/arm64/kvm/hyp/pgtable.c

Lines changed: 22 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -750,13 +750,13 @@ static int stage2_map_walker_try_leaf(const struct kvm_pgtable_visit_ctx *ctx,
750750
static int stage2_map_walk_table_pre(const struct kvm_pgtable_visit_ctx *ctx,
751751
struct stage2_map_data *data)
752752
{
753-
if (data->anchor)
754-
return 0;
753+
struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops;
754+
kvm_pte_t *childp = kvm_pte_follow(ctx->old, mm_ops);
755+
int ret;
755756

756757
if (!stage2_leaf_mapping_allowed(ctx, data))
757758
return 0;
758759

759-
data->childp = kvm_pte_follow(ctx->old, ctx->mm_ops);
760760
kvm_clear_pte(ctx->ptep);
761761

762762
/*
@@ -765,8 +765,13 @@ static int stage2_map_walk_table_pre(const struct kvm_pgtable_visit_ctx *ctx,
765765
* individually.
766766
*/
767767
kvm_call_hyp(__kvm_tlb_flush_vmid, data->mmu);
768-
data->anchor = ctx->ptep;
769-
return 0;
768+
769+
ret = stage2_map_walker_try_leaf(ctx, data);
770+
771+
mm_ops->put_page(ctx->ptep);
772+
mm_ops->free_removed_table(childp, ctx->level);
773+
774+
return ret;
770775
}
771776

772777
static int stage2_map_walk_leaf(const struct kvm_pgtable_visit_ctx *ctx,
@@ -776,13 +781,6 @@ static int stage2_map_walk_leaf(const struct kvm_pgtable_visit_ctx *ctx,
776781
kvm_pte_t *childp;
777782
int ret;
778783

779-
if (data->anchor) {
780-
if (stage2_pte_is_counted(ctx->old))
781-
mm_ops->put_page(ctx->ptep);
782-
783-
return 0;
784-
}
785-
786784
ret = stage2_map_walker_try_leaf(ctx, data);
787785
if (ret != -E2BIG)
788786
return ret;
@@ -811,49 +809,14 @@ static int stage2_map_walk_leaf(const struct kvm_pgtable_visit_ctx *ctx,
811809
return 0;
812810
}
813811

814-
static int stage2_map_walk_table_post(const struct kvm_pgtable_visit_ctx *ctx,
815-
struct stage2_map_data *data)
816-
{
817-
struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops;
818-
kvm_pte_t *childp;
819-
int ret = 0;
820-
821-
if (!data->anchor)
822-
return 0;
823-
824-
if (data->anchor == ctx->ptep) {
825-
childp = data->childp;
826-
data->anchor = NULL;
827-
data->childp = NULL;
828-
ret = stage2_map_walk_leaf(ctx, data);
829-
} else {
830-
childp = kvm_pte_follow(ctx->old, mm_ops);
831-
}
832-
833-
mm_ops->put_page(childp);
834-
mm_ops->put_page(ctx->ptep);
835-
836-
return ret;
837-
}
838-
839812
/*
840-
* This is a little fiddly, as we use all three of the walk flags. The idea
841-
* is that the TABLE_PRE callback runs for table entries on the way down,
842-
* looking for table entries which we could conceivably replace with a
843-
* block entry for this mapping. If it finds one, then it sets the 'anchor'
844-
* field in 'struct stage2_map_data' to point at the table entry, before
845-
* clearing the entry to zero and descending into the now detached table.
846-
*
847-
* The behaviour of the LEAF callback then depends on whether or not the
848-
* anchor has been set. If not, then we're not using a block mapping higher
849-
* up the table and we perform the mapping at the existing leaves instead.
850-
* If, on the other hand, the anchor _is_ set, then we drop references to
851-
* all valid leaves so that the pages beneath the anchor can be freed.
813+
* The TABLE_PRE callback runs for table entries on the way down, looking
814+
* for table entries which we could conceivably replace with a block entry
815+
* for this mapping. If it finds one it replaces the entry and calls
816+
* kvm_pgtable_mm_ops::free_removed_table() to tear down the detached table.
852817
*
853-
* Finally, the TABLE_POST callback does nothing if the anchor has not
854-
* been set, but otherwise frees the page-table pages while walking back up
855-
* the page-table, installing the block entry when it revisits the anchor
856-
* pointer and clearing the anchor to NULL.
818+
* Otherwise, the LEAF callback performs the mapping at the existing leaves
819+
* instead.
857820
*/
858821
static int stage2_map_walker(const struct kvm_pgtable_visit_ctx *ctx,
859822
enum kvm_pgtable_walk_flags visit)
@@ -865,11 +828,9 @@ static int stage2_map_walker(const struct kvm_pgtable_visit_ctx *ctx,
865828
return stage2_map_walk_table_pre(ctx, data);
866829
case KVM_PGTABLE_WALK_LEAF:
867830
return stage2_map_walk_leaf(ctx, data);
868-
case KVM_PGTABLE_WALK_TABLE_POST:
869-
return stage2_map_walk_table_post(ctx, data);
831+
default:
832+
return -EINVAL;
870833
}
871-
872-
return -EINVAL;
873834
}
874835

875836
int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
@@ -886,8 +847,7 @@ int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
886847
struct kvm_pgtable_walker walker = {
887848
.cb = stage2_map_walker,
888849
.flags = KVM_PGTABLE_WALK_TABLE_PRE |
889-
KVM_PGTABLE_WALK_LEAF |
890-
KVM_PGTABLE_WALK_TABLE_POST,
850+
KVM_PGTABLE_WALK_LEAF,
891851
.arg = &map_data,
892852
};
893853

@@ -917,8 +877,7 @@ int kvm_pgtable_stage2_set_owner(struct kvm_pgtable *pgt, u64 addr, u64 size,
917877
struct kvm_pgtable_walker walker = {
918878
.cb = stage2_map_walker,
919879
.flags = KVM_PGTABLE_WALK_TABLE_PRE |
920-
KVM_PGTABLE_WALK_LEAF |
921-
KVM_PGTABLE_WALK_TABLE_POST,
880+
KVM_PGTABLE_WALK_LEAF,
922881
.arg = &map_data,
923882
};
924883

@@ -1207,7 +1166,7 @@ void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt)
12071166

12081167
void kvm_pgtable_stage2_free_removed(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, u32 level)
12091168
{
1210-
kvm_pte_t *ptep = (kvm_pte_t *)pgtable;
1169+
kvm_pteref_t ptep = (kvm_pteref_t)pgtable;
12111170
struct kvm_pgtable_walker walker = {
12121171
.cb = stage2_free_walker,
12131172
.flags = KVM_PGTABLE_WALK_LEAF |
@@ -1225,5 +1184,5 @@ void kvm_pgtable_stage2_free_removed(struct kvm_pgtable_mm_ops *mm_ops, void *pg
12251184
.end = kvm_granule_size(level),
12261185
};
12271186

1228-
WARN_ON(__kvm_pgtable_walk(&data, mm_ops, ptep, level));
1187+
WARN_ON(__kvm_pgtable_walk(&data, mm_ops, ptep, level + 1));
12291188
}

arch/arm64/kvm/mmu.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,13 @@ static void kvm_s2_free_pages_exact(void *virt, size_t size)
128128
free_pages_exact(virt, size);
129129
}
130130

131+
static struct kvm_pgtable_mm_ops kvm_s2_mm_ops;
132+
133+
static void stage2_free_removed_table(void *addr, u32 level)
134+
{
135+
kvm_pgtable_stage2_free_removed(&kvm_s2_mm_ops, addr, level);
136+
}
137+
131138
static void kvm_host_get_page(void *addr)
132139
{
133140
get_page(virt_to_page(addr));
@@ -662,6 +669,7 @@ static struct kvm_pgtable_mm_ops kvm_s2_mm_ops = {
662669
.zalloc_page = stage2_memcache_zalloc_page,
663670
.zalloc_pages_exact = kvm_s2_zalloc_pages_exact,
664671
.free_pages_exact = kvm_s2_free_pages_exact,
672+
.free_removed_table = stage2_free_removed_table,
665673
.get_page = kvm_host_get_page,
666674
.put_page = kvm_s2_put_page,
667675
.page_count = kvm_host_page_count,

0 commit comments

Comments
 (0)