@@ -1501,6 +1501,77 @@ int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
1501
1501
return 0 ;
1502
1502
}
1503
1503
1504
+ int madvise_free_huge_pmd (struct mmu_gather * tlb , struct vm_area_struct * vma ,
1505
+ pmd_t * pmd , unsigned long addr , unsigned long next )
1506
+
1507
+ {
1508
+ spinlock_t * ptl ;
1509
+ pmd_t orig_pmd ;
1510
+ struct page * page ;
1511
+ struct mm_struct * mm = tlb -> mm ;
1512
+ int ret = 0 ;
1513
+
1514
+ if (!pmd_trans_huge_lock (pmd , vma , & ptl ))
1515
+ goto out ;
1516
+
1517
+ orig_pmd = * pmd ;
1518
+ if (is_huge_zero_pmd (orig_pmd )) {
1519
+ ret = 1 ;
1520
+ goto out ;
1521
+ }
1522
+
1523
+ page = pmd_page (orig_pmd );
1524
+ /*
1525
+ * If other processes are mapping this page, we couldn't discard
1526
+ * the page unless they all do MADV_FREE so let's skip the page.
1527
+ */
1528
+ if (page_mapcount (page ) != 1 )
1529
+ goto out ;
1530
+
1531
+ if (!trylock_page (page ))
1532
+ goto out ;
1533
+
1534
+ /*
1535
+ * If user want to discard part-pages of THP, split it so MADV_FREE
1536
+ * will deactivate only them.
1537
+ */
1538
+ if (next - addr != HPAGE_PMD_SIZE ) {
1539
+ get_page (page );
1540
+ spin_unlock (ptl );
1541
+ if (split_huge_page (page )) {
1542
+ put_page (page );
1543
+ unlock_page (page );
1544
+ goto out_unlocked ;
1545
+ }
1546
+ put_page (page );
1547
+ unlock_page (page );
1548
+ ret = 1 ;
1549
+ goto out_unlocked ;
1550
+ }
1551
+
1552
+ if (PageDirty (page ))
1553
+ ClearPageDirty (page );
1554
+ unlock_page (page );
1555
+
1556
+ if (PageActive (page ))
1557
+ deactivate_page (page );
1558
+
1559
+ if (pmd_young (orig_pmd ) || pmd_dirty (orig_pmd )) {
1560
+ orig_pmd = pmdp_huge_get_and_clear_full (tlb -> mm , addr , pmd ,
1561
+ tlb -> fullmm );
1562
+ orig_pmd = pmd_mkold (orig_pmd );
1563
+ orig_pmd = pmd_mkclean (orig_pmd );
1564
+
1565
+ set_pmd_at (mm , addr , pmd , orig_pmd );
1566
+ tlb_remove_pmd_tlb_entry (tlb , pmd , addr );
1567
+ }
1568
+ ret = 1 ;
1569
+ out :
1570
+ spin_unlock (ptl );
1571
+ out_unlocked :
1572
+ return ret ;
1573
+ }
1574
+
1504
1575
int zap_huge_pmd (struct mmu_gather * tlb , struct vm_area_struct * vma ,
1505
1576
pmd_t * pmd , unsigned long addr )
1506
1577
{
@@ -2710,7 +2781,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
2710
2781
struct page * page ;
2711
2782
pgtable_t pgtable ;
2712
2783
pmd_t _pmd ;
2713
- bool young , write ;
2784
+ bool young , write , dirty ;
2714
2785
int i ;
2715
2786
2716
2787
VM_BUG_ON (haddr & ~HPAGE_PMD_MASK );
@@ -2734,6 +2805,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
2734
2805
atomic_add (HPAGE_PMD_NR - 1 , & page -> _count );
2735
2806
write = pmd_write (* pmd );
2736
2807
young = pmd_young (* pmd );
2808
+ dirty = pmd_dirty (* pmd );
2737
2809
2738
2810
pgtable = pgtable_trans_huge_withdraw (mm , pmd );
2739
2811
pmd_populate (mm , & _pmd , pgtable );
@@ -2751,12 +2823,14 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
2751
2823
entry = swp_entry_to_pte (swp_entry );
2752
2824
} else {
2753
2825
entry = mk_pte (page + i , vma -> vm_page_prot );
2754
- entry = maybe_mkwrite (pte_mkdirty ( entry ) , vma );
2826
+ entry = maybe_mkwrite (entry , vma );
2755
2827
if (!write )
2756
2828
entry = pte_wrprotect (entry );
2757
2829
if (!young )
2758
2830
entry = pte_mkold (entry );
2759
2831
}
2832
+ if (dirty )
2833
+ SetPageDirty (page + i );
2760
2834
pte = pte_offset_map (& _pmd , haddr );
2761
2835
BUG_ON (!pte_none (* pte ));
2762
2836
set_pte_at (mm , haddr , pte , entry );
@@ -2962,6 +3036,8 @@ static void freeze_page_vma(struct vm_area_struct *vma, struct page *page,
2962
3036
continue ;
2963
3037
flush_cache_page (vma , address , page_to_pfn (page ));
2964
3038
entry = ptep_clear_flush (vma , address , pte + i );
3039
+ if (pte_dirty (entry ))
3040
+ SetPageDirty (page );
2965
3041
swp_entry = make_migration_entry (page , pte_write (entry ));
2966
3042
swp_pte = swp_entry_to_pte (swp_entry );
2967
3043
if (pte_soft_dirty (entry ))
@@ -3028,7 +3104,8 @@ static void unfreeze_page_vma(struct vm_area_struct *vma, struct page *page,
3028
3104
page_add_anon_rmap (page , vma , address , false);
3029
3105
3030
3106
entry = pte_mkold (mk_pte (page , vma -> vm_page_prot ));
3031
- entry = pte_mkdirty (entry );
3107
+ if (PageDirty (page ))
3108
+ entry = pte_mkdirty (entry );
3032
3109
if (is_write_migration_entry (swp_entry ))
3033
3110
entry = maybe_mkwrite (entry , vma );
3034
3111
@@ -3089,8 +3166,8 @@ static int __split_huge_page_tail(struct page *head, int tail,
3089
3166
(1L << PG_uptodate ) |
3090
3167
(1L << PG_active ) |
3091
3168
(1L << PG_locked ) |
3092
- (1L << PG_unevictable )));
3093
- page_tail -> flags |= (1L << PG_dirty );
3169
+ (1L << PG_unevictable ) |
3170
+ (1L << PG_dirty )) );
3094
3171
3095
3172
/*
3096
3173
* After clearing PageTail the gup refcount can be released.
0 commit comments