Skip to content

Commit bfdc502

Browse files
riteshharjanitytso
authored andcommitted
ext4: fix ext4_mb_mark_bb() with flex_bg with fast_commit
In case of flex_bg feature (which is by default enabled), extents for any given inode might span across blocks from two different block group. ext4_mb_mark_bb() only reads the buffer_head of block bitmap once for the starting block group, but it fails to read it again when the extent length boundary overflows to another block group. Then in this below loop it accesses memory beyond the block group bitmap buffer_head and results into a data abort. for (i = 0; i < clen; i++) if (!mb_test_bit(blkoff + i, bitmap_bh->b_data) == !state) already++; This patch adds this functionality for checking block group boundary in ext4_mb_mark_bb() and update the buffer_head(bitmap_bh) for every different block group. w/o this patch, I was easily able to hit a data access abort using Power platform. <...> [ 74.327662] EXT4-fs error (device loop3): ext4_mb_generate_buddy:1141: group 11, block bitmap and bg descriptor inconsistent: 21248 vs 23294 free clusters [ 74.533214] EXT4-fs (loop3): shut down requested (2) [ 74.536705] Aborting journal on device loop3-8. [ 74.702705] BUG: Unable to handle kernel data access on read at 0xc00000005e980000 [ 74.703727] Faulting instruction address: 0xc0000000007bffb8 cpu 0xd: Vector: 300 (Data Access) at [c000000015db7060] pc: c0000000007bffb8: ext4_mb_mark_bb+0x198/0x5a0 lr: c0000000007bfeec: ext4_mb_mark_bb+0xcc/0x5a0 sp: c000000015db7300 msr: 800000000280b033 dar: c00000005e980000 dsisr: 40000000 current = 0xc000000027af6880 paca = 0xc00000003ffd5200 irqmask: 0x03 irq_happened: 0x01 pid = 5167, comm = mount <...> enter ? for help [c000000015db7380] c000000000782708 ext4_ext_clear_bb+0x378/0x410 [c000000015db7400] c000000000813f14 ext4_fc_replay+0x1794/0x2000 [c000000015db7580] c000000000833f7c do_one_pass+0xe9c/0x12a0 [c000000015db7710] c000000000834504 jbd2_journal_recover+0x184/0x2d0 [c000000015db77c0] c000000000841398 jbd2_journal_load+0x188/0x4a0 [c000000015db7880] c000000000804de8 ext4_fill_super+0x2638/0x3e10 [c000000015db7a40] c0000000005f8404 get_tree_bdev+0x2b4/0x350 [c000000015db7ae0] c0000000007ef058 ext4_get_tree+0x28/0x40 [c000000015db7b00] c0000000005f6344 vfs_get_tree+0x44/0x100 [c000000015db7b70] c00000000063c408 path_mount+0xdd8/0xe70 [c000000015db7c40] c00000000063c8f0 sys_mount+0x450/0x550 [c000000015db7d50] c000000000035770 system_call_exception+0x4a0/0x4e0 [c000000015db7e10] c00000000000c74c system_call_common+0xec/0x250 Signed-off-by: Ritesh Harjani <[email protected]> Reviewed-by: Jan Kara <[email protected]> Link: https://lore.kernel.org/r/2609bc8f66fc15870616ee416a18a3d392a209c4.1644992609.git.riteshh@linux.ibm.com Signed-off-by: Theodore Ts'o <[email protected]>
1 parent a5c0e2f commit bfdc502

File tree

1 file changed

+76
-55
lines changed

1 file changed

+76
-55
lines changed

fs/ext4/mballoc.c

Lines changed: 76 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -3901,72 +3901,93 @@ void ext4_mb_mark_bb(struct super_block *sb, ext4_fsblk_t block,
39013901
ext4_grpblk_t blkoff;
39023902
int i, err;
39033903
int already;
3904-
unsigned int clen, clen_changed;
3904+
unsigned int clen, clen_changed, thisgrp_len;
39053905

3906-
clen = EXT4_NUM_B2C(sbi, len);
3907-
3908-
ext4_get_group_no_and_offset(sb, block, &group, &blkoff);
3909-
bitmap_bh = ext4_read_block_bitmap(sb, group);
3910-
if (IS_ERR(bitmap_bh)) {
3911-
err = PTR_ERR(bitmap_bh);
3912-
bitmap_bh = NULL;
3913-
goto out_err;
3914-
}
3915-
3916-
err = -EIO;
3917-
gdp = ext4_get_group_desc(sb, group, &gdp_bh);
3918-
if (!gdp)
3919-
goto out_err;
3906+
while (len > 0) {
3907+
ext4_get_group_no_and_offset(sb, block, &group, &blkoff);
39203908

3921-
ext4_lock_group(sb, group);
3922-
already = 0;
3923-
for (i = 0; i < clen; i++)
3924-
if (!mb_test_bit(blkoff + i, bitmap_bh->b_data) == !state)
3925-
already++;
3926-
3927-
clen_changed = clen - already;
3928-
if (state)
3929-
ext4_set_bits(bitmap_bh->b_data, blkoff, clen);
3930-
else
3931-
mb_test_and_clear_bits(bitmap_bh->b_data, blkoff, clen);
3932-
if (ext4_has_group_desc_csum(sb) &&
3933-
(gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) {
3934-
gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
3935-
ext4_free_group_clusters_set(sb, gdp,
3936-
ext4_free_clusters_after_init(sb,
3937-
group, gdp));
3938-
}
3939-
if (state)
3940-
clen = ext4_free_group_clusters(sb, gdp) - clen_changed;
3941-
else
3942-
clen = ext4_free_group_clusters(sb, gdp) + clen_changed;
3909+
/*
3910+
* Check to see if we are freeing blocks across a group
3911+
* boundary.
3912+
* In case of flex_bg, this can happen that (block, len) may
3913+
* span across more than one group. In that case we need to
3914+
* get the corresponding group metadata to work with.
3915+
* For this we have goto again loop.
3916+
*/
3917+
thisgrp_len = min_t(unsigned int, (unsigned int)len,
3918+
EXT4_BLOCKS_PER_GROUP(sb) - EXT4_C2B(sbi, blkoff));
3919+
clen = EXT4_NUM_B2C(sbi, thisgrp_len);
39433920

3944-
ext4_free_group_clusters_set(sb, gdp, clen);
3945-
ext4_block_bitmap_csum_set(sb, group, gdp, bitmap_bh);
3946-
ext4_group_desc_csum_set(sb, group, gdp);
3921+
bitmap_bh = ext4_read_block_bitmap(sb, group);
3922+
if (IS_ERR(bitmap_bh)) {
3923+
err = PTR_ERR(bitmap_bh);
3924+
bitmap_bh = NULL;
3925+
break;
3926+
}
39473927

3948-
ext4_unlock_group(sb, group);
3928+
err = -EIO;
3929+
gdp = ext4_get_group_desc(sb, group, &gdp_bh);
3930+
if (!gdp)
3931+
break;
39493932

3950-
if (sbi->s_log_groups_per_flex) {
3951-
ext4_group_t flex_group = ext4_flex_group(sbi, group);
3952-
struct flex_groups *fg = sbi_array_rcu_deref(sbi,
3953-
s_flex_groups, flex_group);
3933+
ext4_lock_group(sb, group);
3934+
already = 0;
3935+
for (i = 0; i < clen; i++)
3936+
if (!mb_test_bit(blkoff + i, bitmap_bh->b_data) ==
3937+
!state)
3938+
already++;
39543939

3940+
clen_changed = clen - already;
39553941
if (state)
3956-
atomic64_sub(clen_changed, &fg->free_clusters);
3942+
ext4_set_bits(bitmap_bh->b_data, blkoff, clen);
39573943
else
3958-
atomic64_add(clen_changed, &fg->free_clusters);
3944+
mb_test_and_clear_bits(bitmap_bh->b_data, blkoff, clen);
3945+
if (ext4_has_group_desc_csum(sb) &&
3946+
(gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) {
3947+
gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
3948+
ext4_free_group_clusters_set(sb, gdp,
3949+
ext4_free_clusters_after_init(sb, group, gdp));
3950+
}
3951+
if (state)
3952+
clen = ext4_free_group_clusters(sb, gdp) - clen_changed;
3953+
else
3954+
clen = ext4_free_group_clusters(sb, gdp) + clen_changed;
3955+
3956+
ext4_free_group_clusters_set(sb, gdp, clen);
3957+
ext4_block_bitmap_csum_set(sb, group, gdp, bitmap_bh);
3958+
ext4_group_desc_csum_set(sb, group, gdp);
3959+
3960+
ext4_unlock_group(sb, group);
3961+
3962+
if (sbi->s_log_groups_per_flex) {
3963+
ext4_group_t flex_group = ext4_flex_group(sbi, group);
3964+
struct flex_groups *fg = sbi_array_rcu_deref(sbi,
3965+
s_flex_groups, flex_group);
3966+
3967+
if (state)
3968+
atomic64_sub(clen_changed, &fg->free_clusters);
3969+
else
3970+
atomic64_add(clen_changed, &fg->free_clusters);
3971+
3972+
}
3973+
3974+
err = ext4_handle_dirty_metadata(NULL, NULL, bitmap_bh);
3975+
if (err)
3976+
break;
3977+
sync_dirty_buffer(bitmap_bh);
3978+
err = ext4_handle_dirty_metadata(NULL, NULL, gdp_bh);
3979+
sync_dirty_buffer(gdp_bh);
3980+
if (err)
3981+
break;
3982+
3983+
block += thisgrp_len;
3984+
len -= thisgrp_len;
3985+
brelse(bitmap_bh);
3986+
BUG_ON(len < 0);
39593987
}
39603988

3961-
err = ext4_handle_dirty_metadata(NULL, NULL, bitmap_bh);
39623989
if (err)
3963-
goto out_err;
3964-
sync_dirty_buffer(bitmap_bh);
3965-
err = ext4_handle_dirty_metadata(NULL, NULL, gdp_bh);
3966-
sync_dirty_buffer(gdp_bh);
3967-
3968-
out_err:
3969-
brelse(bitmap_bh);
3990+
brelse(bitmap_bh);
39703991
}
39713992

39723993
/*

0 commit comments

Comments
 (0)