Code Diff
diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
index c1d2b3fd9c65d1..423ec40e2e4e2d 100644
--- a/Documentation/ABI/testing/sysfs-fs-f2fs
+++ b/Documentation/ABI/testing/sysfs-fs-f2fs
@@ -407,6 +407,12 @@ Contact: "Hridya Valsaraju" <hridya@google.com>
Description: Average number of valid blocks.
Available when CONFIG_F2FS_STAT_FS=y.
+What: /sys/fs/f2fs/<disk>/defrag_blocks
+Date: February 2026
+Contact: "Jinbao Liu" <liujinbao1@xiaomi.com>
+Description: Number of blocks moved by defragment.
+ Available when CONFIG_F2FS_STAT_FS=y.
+
What: /sys/fs/f2fs/<disk>/mounted_time_sec
Date: February 2020
Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 0143365c07dc17..c00a6b6ebcbd8a 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -232,15 +232,6 @@ static inline void f2fs_unlock_all(struct f2fs_sb_info *sbi)
static struct kmem_cache *ino_entry_slab;
struct kmem_cache *f2fs_inode_entry_slab;
-void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io,
- unsigned char reason)
-{
- f2fs_build_fault_attr(sbi, 0, 0, FAULT_ALL);
- if (!end_io)
- f2fs_flush_merged_writes(sbi);
- f2fs_handle_critical_error(sbi, reason);
-}
-
/*
* We guarantee no failure on the returned page.
*/
diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
index 50fac72734ac29..881e76158b967a 100644
--- a/fs/f2fs/compress.c
+++ b/fs/f2fs/compress.c
@@ -1491,10 +1491,10 @@ void f2fs_compress_write_end_io(struct bio *bio, struct folio *folio)
f2fs_compress_free_page(page);
- dec_page_count(sbi, type);
-
- if (atomic_dec_return(&cic->pending_pages))
+ if (atomic_dec_return(&cic->pending_pages)) {
+ dec_page_count(sbi, type);
return;
+ }
for (i = 0; i < cic->nr_rpages; i++) {
WARN_ON(!cic->rpages[i]);
@@ -1504,6 +1504,14 @@ void f2fs_compress_write_end_io(struct bio *bio, struct folio *folio)
page_array_free(sbi, cic->rpages, cic->nr_rpages);
kmem_cache_free(cic_entry_slab, cic);
+
+ /*
+ * Make sure dec_page_count() is the last access to sbi.
+ * Once it drops the F2FS_WB_CP_DATA counter to zero, the
+ * unmount thread can proceed to destroy sbi and
+ * sbi->page_array_slab.
+ */
+ dec_page_count(sbi, type);
}
static int f2fs_write_raw_pages(struct compress_ctx *cc,
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index cf05014fa5e37b..8d4f1e75dee3eb 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -173,7 +173,8 @@ static void f2fs_finish_read_bio(struct bio *bio, bool in_task)
while (nr_pages--)
dec_page_count(F2FS_F_SB(folio), __read_io_type(folio));
- if (F2FS_F_SB(folio)->node_inode && is_node_folio(folio) &&
+ if (bio->bi_status == BLK_STS_OK &&
+ F2FS_F_SB(folio)->node_inode && is_node_folio(folio) &&
f2fs_sanity_check_node_footer(F2FS_F_SB(folio),
folio, folio->index, NODE_TYPE_REGULAR, true))
bio->bi_status = BLK_STS_IOERR;
@@ -386,6 +387,8 @@ static void f2fs_write_end_io(struct bio *bio)
folio->index, NODE_TYPE_REGULAR, true);
f2fs_bug_on(sbi, folio->index != nid_of_node(folio));
}
+ if (f2fs_in_warm_node_list(folio))
+ f2fs_del_fsync_node_entry(sbi, folio);
dec_page_count(sbi, type);
@@ -397,8 +400,6 @@ static void f2fs_write_end_io(struct bio *bio)
wq_has_sleeper(&sbi->cp_wait))
wake_up(&sbi->cp_wait);
- if (f2fs_in_warm_node_list(sbi, folio))
- f2fs_del_fsync_node_entry(sbi, folio);
folio_clear_f2fs_gcing(folio);
folio_end_writeback(folio);
}
@@ -1578,7 +1579,8 @@ static bool f2fs_map_blocks_cached(struct inode *inode,
f2fs_wait_on_block_writeback_range(inode,
map->m_pblk, map->m_len);
- if (f2fs_allow_multi_device_dio(sbi, flag)) {
+ map->m_multidev_dio = f2fs_allow_multi_device_dio(sbi, flag);
+ if (map->m_multidev_dio) {
int bidx = f2fs_target_device_index(sbi, map->m_pblk);
struct f2fs_dev_info *dev = &sbi->devs[bidx];
@@ -1638,8 +1640,26 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int flag)
lfs_dio_write = (flag == F2FS_GET_BLOCK_DIO && f2fs_lfs_mode(sbi) &&
map->m_may_create);
- if (!map->m_may_create && f2fs_map_blocks_cached(inode, map, flag))
- goto out;
+ if (!map->m_may_create && f2fs_map_blocks_cached(inode, map, flag)) {
+ struct extent_info ei;
+
+ /*
+ * 1. If map->m_multidev_dio is true, map->m_pblk cannot be
+ * waitted by f2fs_wait_on_block_writeback_range() and are not
+ * mergeable.
+ * 2. If pgofs hits the read extent cache, it means the mapping
+ * is already cached in the extent cache, but it is not
+ * mergeable, and there is no need to query the mapping again
+ * via f2fs_get_dnode_of_data().
+ */
+ pgofs = (pgoff_t)map->m_lblk + map->m_len;
+ if (map->m_len == maxblocks ||
+ map->m_multidev_dio ||
+ f2fs_lookup_read_extent_cache(inode, pgofs, &ei))
+ goto out;
+ ofs = map->m_len;
+ goto map_more;
+ }
map->m_bdev = inode->i_sb->s_bdev;
map->m_multidev_dio =
@@ -1650,7 +1670,8 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int flag)
/* it only supports block size == page size */
pgofs = (pgoff_t)map->m_lblk;
- end = pgofs + maxblocks;
+map_more:
+ end = (pgoff_t)map->m_lblk + maxblocks;
if (flag == F2FS_GET_BLOCK_PRECACHE)
mode = LOOKUP_NODE_RA;
@@ -2490,6 +2511,8 @@ static int f2fs_read_data_large_folio(struct inode *inode,
if (!folio)
goto out;
+ f2fs_update_read_folio_count(F2FS_I_SB(inode), folio);
+
folio_in_bio = false;
index = folio->index;
offset = 0;
@@ -2664,6 +2687,8 @@ static int f2fs_mpage_readpages(struct inode *inode, struct fsverity_info *vi,
prefetchw(&folio->flags);
}
+ f2fs_update_read_folio_count(F2FS_I_SB(inode), folio);
+
#ifdef CONFIG_F2FS_FS_COMPRESSION
index = folio->index;
@@ -2790,7 +2815,6 @@ int f2fs_encrypt_one_page(struct f2fs_io_info *fio)
struct inode *inode = fio_inode(fio);
struct folio *mfolio;
struct page *page;
- gfp_t gfp_flags = GFP_NOFS;
if (!f2fs_encrypted_file(inode))
return 0;
@@ -2800,19 +2824,10 @@ int f2fs_encrypt_one_page(struct f2fs_io_info *fio)
if (fscrypt_inode_uses_inline_crypto(inode))
return 0;
-retry_encrypt:
fio->encrypted_page = fscrypt_encrypt_pagecache_blocks(page_folio(page),
- PAGE_SIZE, 0, gfp_flags);
- if (IS_ERR(fio->encrypted_page)) {
- /* flush pending IOs and wait for a while in the ENOMEM case */
- if (PTR_ERR(fio->encrypted_page) == -ENOMEM) {
- f2fs_flush_merged_writes(fio->sbi);
- memalloc_retry_wait(GFP_NOFS);
- gfp_flags |= __GFP_NOFAIL;
- goto retry_encrypt;
- }
+ PAGE_SIZE, 0, GFP_NOFS);
+ if (IS_ERR(fio->encrypted_page))
return PTR_ERR(fio->encrypted_page);
- }
mfolio = filemap_lock_folio(META_MAPPING(fio->sbi), fio->old_blkaddr);
if (!IS_ERR(mfolio)) {
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index 8e1040e375a7b0..af88db8fdb7112 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -659,6 +659,7 @@ static int stat_show(struct seq_file *s, void *v)
si->bg_node_blks);
seq_printf(s, "BG skip : IO: %u, Other: %u\n",
si->io_skip_bggc, si->other_skip_bggc);
+ seq_printf(s, "defrag blocks : %u\n", si->defrag_blks);
seq_puts(s, "\nExtent Cache (Read):\n");
seq_printf(s, " - Hit Count: L1-1:%llu L1-2:%llu L2:%llu\n",
si->hit_largest, si->hit_cached[EX_READ],
diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
index d73aeef333a247..d2e006420f040c 100644
--- a/fs/f2fs/extent_cache.c
+++ b/fs/f2fs/extent_cache.c
@@ -119,9 +119,10 @@ static bool __may_extent_tree(struct inode *inode, enum extent_type type)
if (!__init_may_extent_tree(inode, type))
return false;
+ if (is_inode_flag_set(inode, FI_NO_EXTENT))
+ return false;
+
if (type == EX_READ) {
- if (is_inode_flag_set(inode, FI_NO_EXTENT))
- return false;
if (is_inode_flag_set(inode, FI_COMPRESSED_FILE) &&
!f2fs_sb_has_readonly(F2FS_I_SB(inode)))
return false;
@@ -644,6 +645,8 @@ static unsigned int __destroy_extent_node(struct inode *inode,
while (atomic_read(&et->node_cnt)) {
write_lock(&et->lock);
+ if (!is_inode_flag_set(inode, FI_NO_EXTENT))
+ set_inode_flag(inode, FI_NO_EXTENT);
node_cnt += __free_extent_tree(sbi, et, nr_shrink);
write_unlock(&et->lock);
}
@@ -688,12 +691,12 @@ static void __update_extent_tree_range(struct inode *inode,
write_lock(&et->lock);
- if (type == EX_READ) {
- if (is_inode_flag_set(inode, FI_NO_EXTENT)) {
- write_unlock(&et->lock);
- return;
- }
+ if (is_inode_flag_set(inode, FI_NO_EXTENT)) {
+ write_unlock(&et->lock);
+ return;
+ }
+ if (type == EX_READ) {
prev = et->largest;
dei.len = 0;
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 7d0a467982d61c..91f506e7c9cfb5 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -10,6 +10,7 @@
#include <linux/uio.h>
#include <linux/types.h>
+#include <linux/mmzone.h>
#include <linux/page-flags.h>
#include <linux/slab.h>
#include <linux/crc32.h>
@@ -2032,6 +2033,8 @@ struct f2fs_sb_info {
unsigned long long iostat_count[NR_IO_TYPE];
unsigned long long iostat_bytes[NR_IO_TYPE];
unsigned long long prev_iostat_bytes[NR_IO_TYPE];
+ unsigned long long iostat_read_folio_count[NR_PAGE_ORDERS];
+ unsigned long long prev_iostat_read_folio_count[NR_PAGE_ORDERS];
bool iostat_enable;
unsigned long iostat_next_period;
unsigned int iostat_period_ms;
@@ -2040,6 +2043,9 @@ struct f2fs_sb_info {
spinlock_t iostat_lat_lock;
struct iostat_lat_info *iostat_io_lat;
#endif
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+ struct lock_class_key cp_global_sem_key;
+#endif
};
/* Definitions to access f2fs_sb_info */
@@ -3900,7 +3906,6 @@ int f2fs_do_quota_sync(struct super_block *sb, int type);
loff_t max_file_blocks(struct inode *inode);
void f2fs_quota_off_umount(struct super_block *sb);
void f2fs_save_errors(struct f2fs_sb_info *sbi, unsigned char flag);
-void f2fs_handle_critical_error(struct f2fs_sb_info *sbi, unsigned char reason);
void f2fs_handle_error(struct f2fs_sb_info *sbi, unsigned char error);
int f2fs_commit_super(struct f2fs_sb_info *sbi, bool recover);
int f2fs_sync_fs(struct super_block *sb, int sync);
@@ -3919,11 +3924,11 @@ enum node_type;
int f2fs_check_nid_range(struct f2fs_sb_info *sbi, nid_t nid);
bool f2fs_available_free_memory(struct f2fs_sb_info *sbi, int type);
-bool f2fs_in_warm_node_list(struct f2fs_sb_info *sbi, struct folio *folio);
+bool f2fs_in_warm_node_list(struct folio *folio);
void f2fs_init_fsync_node_info(struct f2fs_sb_info *sbi);
void f2fs_del_fsync_node_entry(struct f2fs_sb_info *sbi, struct folio *folio);
void f2fs_reset_fsync_node_info(struct f2fs_sb_info *sbi);
-int f2fs_need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid);
+bool f2fs_need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid);
bool f2fs_is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid);
bool f2fs_need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino);
int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
@@ -3945,6 +3950,8 @@ int f2fs_sanity_check_node_footer(struct f2fs_sb_info *sbi,
enum node_type ntype, bool in_irq);
struct folio *f2fs_get_inode_folio(struct f2fs_sb_info *sbi, pgoff_t ino);
struct folio *f2fs_get_xnode_folio(struct f2fs_sb_info *sbi, pgoff_t xnid);
+int f2fs_write_single_node_folio(struct folio *node_folio, int sync_mode,
+ bool mark_dirty, enum iostat_type io_type);
int f2fs_move_node_folio(struct folio *node_folio, int gc_type);
void f2fs_flush_inline_data(struct f2fs_sb_info *sbi);
int f2fs_fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode,
@@ -3987,7 +3994,7 @@ bool f2fs_is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr);
int f2fs_start_discard_thread(struct f2fs_sb_info *sbi);
void f2fs_drop_discard_cmd(struct f2fs_sb_info *sbi);
void f2fs_stop_discard_thread(struct f2fs_sb_info *sbi);
-bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi);
+bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi, bool need_check);
void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi,
struct cp_control *cpc);
void f2fs_dirty_to_prefree(struct f2fs_sb_info *sbi);
@@ -4286,6 +4293,7 @@ struct f2fs_stat_info {
int gc_secs[2][2];
int tot_blks, data_blks, node_blks;
int bg_data_blks, bg_node_blks;
+ unsigned int defrag_blks;
int blkoff[NR_CURSEG_TYPE];
int curseg[NR_CURSEG_TYPE];
int cursec[NR_CURSEG_TYPE];
@@ -4420,6 +4428,9 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi)
si->bg_node_blks += ((gc_type) == BG_GC) ? (blks) : 0; \
} while (0)
+#define stat_inc_defrag_blk_count(sbi, blks) \
+ (F2FS_STAT(sbi)->defrag_blks += (blks))
+
int f2fs_build_stats(struct f2fs_sb_info *sbi);
void f2fs_destroy_stats(struct f2fs_sb_info *sbi);
void __init f2fs_create_root_stats(void);
@@ -4461,6 +4472,7 @@ void f2fs_update_sit_info(struct f2fs_sb_info *sbi);
#define stat_inc_tot_blk_count(si, blks) do { } while (0)
#define stat_inc_data_blk_count(sbi, blks, gc_type) do { } while (0)
#define stat_inc_node_blk_count(sbi, blks, gc_type) do { } while (0)
+#define stat_inc_defrag_blk_count(sbi, blks) do { } while (0)
static inline int f2fs_build_stats(struct f2fs_sb_info *sbi) { return 0; }
static inline void f2fs_destroy_stats(struct f2fs_sb_info *sbi) { }
@@ -5063,8 +5075,25 @@ static inline void f2fs_handle_page_eio(struct f2fs_sb_info *sbi,
return;
if (ofs == sbi->page_eio_ofs[type]) {
- if (sbi->page_eio_cnt[type]++ == MAX_RETRY_PAGE_EIO)
- set_ckpt_flags(sbi, CP_ERROR_FLAG);
+ if (sbi->page_eio_cnt[type]++ == MAX_RETRY_PAGE_EIO) {
+ enum stop_cp_reason stop_reason;
+
+ switch (type) {
+ case META:
+ stop_reason = STOP_CP_REASON_READ_META;
+ break;
+ case NODE:
+ stop_reason = STOP_CP_REASON_READ_NODE;
+ break;
+ case DATA:
+ stop_reason = STOP_CP_REASON_READ_DATA;
+ break;
+ default:
+ f2fs_bug_on(sbi, 1);
+ return;
+ }
+ f2fs_stop_checkpoint(sbi, false, stop_reason);
+ }
} else {
sbi->page_eio_ofs[type] = ofs;
sbi->page_eio_cnt[type] = 0;
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 42f5832242b3a1..fb12c5c9affda0 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -81,8 +81,17 @@ static vm_fault_t f2fs_vm_page_mkwrite(struct vm_fault *vmf)
int err = 0;
vm_fault_t ret;
- if (unlikely(IS_IMMUTABLE(inode)))
+ /*
+ * We only support large folio on the read case.
+ * Don't make any dirty pages.
+ */
+ if (unlikely(IS_IMMUTABLE(inode)) ||
+ mapping_large_folio_support(inode->i_mapping)) {
+ f2fs_err(sbi, "Not expected: immutable: %d large_folio: %d",
+ IS_IMMUTABLE(inode),
+ mapping_large_folio_support(inode->i_mapping));
return VM_FAULT_SIGBUS;
+ }
if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) {
err = -EIO;
@@ -3042,8 +3051,10 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi,
clear_inode_flag(inode, FI_OPU_WRITE);
unlock_out:
inode_unlock(inode);
- if (!err)
+ if (!err) {
range->len = (u64)total << PAGE_SHIFT;
... [truncated]