Skip to content

Commit 2dfb1e4

Browse files
adam900710kdave
authored andcommitted
btrfs: preallocate anon block device at first phase of snapshot creation
[BUG] When the anonymous block device pool is exhausted, subvolume/snapshot creation fails with EMFILE (Too many files open). This has been reported by a user. The allocation happens in the second phase during transaction commit where it's only way out is to abort the transaction BTRFS: Transaction aborted (error -24) WARNING: CPU: 17 PID: 17041 at fs/btrfs/transaction.c:1576 create_pending_snapshot+0xbc4/0xd10 [btrfs] RIP: 0010:create_pending_snapshot+0xbc4/0xd10 [btrfs] Call Trace: create_pending_snapshots+0x82/0xa0 [btrfs] btrfs_commit_transaction+0x275/0x8c0 [btrfs] btrfs_mksubvol+0x4b9/0x500 [btrfs] btrfs_ioctl_snap_create_transid+0x174/0x180 [btrfs] btrfs_ioctl_snap_create_v2+0x11c/0x180 [btrfs] btrfs_ioctl+0x11a4/0x2da0 [btrfs] do_vfs_ioctl+0xa9/0x640 ksys_ioctl+0x67/0x90 __x64_sys_ioctl+0x1a/0x20 do_syscall_64+0x5a/0x110 entry_SYSCALL_64_after_hwframe+0x44/0xa9 ---[ end trace 33f2f83f3d5250e9 ]--- BTRFS: error (device sda1) in create_pending_snapshot:1576: errno=-24 unknown BTRFS info (device sda1): forced readonly BTRFS warning (device sda1): Skipping commit of aborted transaction. BTRFS: error (device sda1) in cleanup_transaction:1831: errno=-24 unknown [CAUSE] When the global anonymous block device pool is exhausted, the following call chain will fail, and lead to transaction abort: btrfs_ioctl_snap_create_v2() |- btrfs_ioctl_snap_create_transid() |- btrfs_mksubvol() |- btrfs_commit_transaction() |- create_pending_snapshot() |- btrfs_get_fs_root() |- btrfs_init_fs_root() |- get_anon_bdev() [FIX] Although we can't enlarge the anonymous block device pool, at least we can preallocate anon_dev for subvolume/snapshot in the first phase, outside of transaction context and exactly at the moment the user calls the creation ioctl. Reported-by: Greed Rong <[email protected]> Link: https://lore.kernel.org/linux-btrfs/CA+UqX+NTrZ6boGnWHhSeZmEY5J76CTqmYjO2S+=tHJX7nb9DPw@mail.gmail.com/ CC: [email protected] # 4.4+ Signed-off-by: Qu Wenruo <[email protected]> Reviewed-by: David Sterba <[email protected]> Signed-off-by: David Sterba <[email protected]>
1 parent 082b6c9 commit 2dfb1e4

File tree

5 files changed

+89
-9
lines changed

5 files changed

+89
-9
lines changed

fs/btrfs/disk-io.c

Lines changed: 64 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1391,7 +1391,12 @@ struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root,
13911391
goto out;
13921392
}
13931393

1394-
static int btrfs_init_fs_root(struct btrfs_root *root)
1394+
/*
1395+
* Initialize subvolume root in-memory structure
1396+
*
1397+
* @anon_dev: anonymous device to attach to the root, if zero, allocate new
1398+
*/
1399+
static int btrfs_init_fs_root(struct btrfs_root *root, dev_t anon_dev)
13951400
{
13961401
int ret;
13971402
unsigned int nofs_flag;
@@ -1430,9 +1435,13 @@ static int btrfs_init_fs_root(struct btrfs_root *root)
14301435
*/
14311436
if (is_fstree(root->root_key.objectid) &&
14321437
btrfs_root_refs(&root->root_item) > 0) {
1433-
ret = get_anon_bdev(&root->anon_dev);
1434-
if (ret)
1435-
goto fail;
1438+
if (!anon_dev) {
1439+
ret = get_anon_bdev(&root->anon_dev);
1440+
if (ret)
1441+
goto fail;
1442+
} else {
1443+
root->anon_dev = anon_dev;
1444+
}
14361445
}
14371446

14381447
mutex_lock(&root->objectid_mutex);
@@ -1537,8 +1546,27 @@ void btrfs_free_fs_info(struct btrfs_fs_info *fs_info)
15371546
}
15381547

15391548

1540-
struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
1541-
u64 objectid, bool check_ref)
1549+
/*
1550+
* Get an in-memory reference of a root structure.
1551+
*
1552+
* For essential trees like root/extent tree, we grab it from fs_info directly.
1553+
* For subvolume trees, we check the cached filesystem roots first. If not
1554+
* found, then read it from disk and add it to cached fs roots.
1555+
*
1556+
* Caller should release the root by calling btrfs_put_root() after the usage.
1557+
*
1558+
* NOTE: Reloc and log trees can't be read by this function as they share the
1559+
* same root objectid.
1560+
*
1561+
* @objectid: root id
1562+
* @anon_dev: preallocated anonymous block device number for new roots,
1563+
* pass 0 for new allocation.
1564+
* @check_ref: whether to check root item references, If true, return -ENOENT
1565+
* for orphan roots
1566+
*/
1567+
static struct btrfs_root *btrfs_get_root_ref(struct btrfs_fs_info *fs_info,
1568+
u64 objectid, dev_t anon_dev,
1569+
bool check_ref)
15421570
{
15431571
struct btrfs_root *root;
15441572
struct btrfs_path *path;
@@ -1567,6 +1595,8 @@ struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
15671595
again:
15681596
root = btrfs_lookup_fs_root(fs_info, objectid);
15691597
if (root) {
1598+
/* Shouldn't get preallocated anon_dev for cached roots */
1599+
ASSERT(!anon_dev);
15701600
if (check_ref && btrfs_root_refs(&root->root_item) == 0) {
15711601
btrfs_put_root(root);
15721602
return ERR_PTR(-ENOENT);
@@ -1586,7 +1616,7 @@ struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
15861616
goto fail;
15871617
}
15881618

1589-
ret = btrfs_init_fs_root(root);
1619+
ret = btrfs_init_fs_root(root, anon_dev);
15901620
if (ret)
15911621
goto fail;
15921622

@@ -1619,6 +1649,33 @@ struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
16191649
return ERR_PTR(ret);
16201650
}
16211651

1652+
/*
1653+
* Get in-memory reference of a root structure
1654+
*
1655+
* @objectid: tree objectid
1656+
* @check_ref: if set, verify that the tree exists and the item has at least
1657+
* one reference
1658+
*/
1659+
struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
1660+
u64 objectid, bool check_ref)
1661+
{
1662+
return btrfs_get_root_ref(fs_info, objectid, 0, check_ref);
1663+
}
1664+
1665+
/*
1666+
* Get in-memory reference of a root structure, created as new, optionally pass
1667+
* the anonymous block device id
1668+
*
1669+
* @objectid: tree objectid
1670+
* @anon_dev: if zero, allocate a new anonymous block device or use the
1671+
* parameter value
1672+
*/
1673+
struct btrfs_root *btrfs_get_new_fs_root(struct btrfs_fs_info *fs_info,
1674+
u64 objectid, dev_t anon_dev)
1675+
{
1676+
return btrfs_get_root_ref(fs_info, objectid, anon_dev, true);
1677+
}
1678+
16221679
static int btrfs_congested_fn(void *congested_data, int bdi_bits)
16231680
{
16241681
struct btrfs_fs_info *info = (struct btrfs_fs_info *)congested_data;

fs/btrfs/disk-io.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,8 @@ void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info);
6767

6868
struct btrfs_root *btrfs_get_fs_root(struct btrfs_fs_info *fs_info,
6969
u64 objectid, bool check_ref);
70+
struct btrfs_root *btrfs_get_new_fs_root(struct btrfs_fs_info *fs_info,
71+
u64 objectid, dev_t anon_dev);
7072

7173
void btrfs_free_fs_info(struct btrfs_fs_info *fs_info);
7274
int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info);

fs/btrfs/ioctl.c

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -566,6 +566,7 @@ static noinline int create_subvol(struct inode *dir,
566566
struct inode *inode;
567567
int ret;
568568
int err;
569+
dev_t anon_dev = 0;
569570
u64 objectid;
570571
u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID;
571572
u64 index = 0;
@@ -578,6 +579,10 @@ static noinline int create_subvol(struct inode *dir,
578579
if (ret)
579580
goto fail_free;
580581

582+
ret = get_anon_bdev(&anon_dev);
583+
if (ret < 0)
584+
goto fail_free;
585+
581586
/*
582587
* Don't create subvolume whose level is not zero. Or qgroup will be
583588
* screwed up since it assumes subvolume qgroup's level to be 0.
@@ -660,12 +665,15 @@ static noinline int create_subvol(struct inode *dir,
660665
goto fail;
661666

662667
key.offset = (u64)-1;
663-
new_root = btrfs_get_fs_root(fs_info, objectid, true);
668+
new_root = btrfs_get_new_fs_root(fs_info, objectid, anon_dev);
664669
if (IS_ERR(new_root)) {
670+
free_anon_bdev(anon_dev);
665671
ret = PTR_ERR(new_root);
666672
btrfs_abort_transaction(trans, ret);
667673
goto fail;
668674
}
675+
/* Freeing will be done in btrfs_put_root() of new_root */
676+
anon_dev = 0;
669677

670678
btrfs_record_root_in_trans(trans, new_root);
671679

@@ -735,6 +743,8 @@ static noinline int create_subvol(struct inode *dir,
735743
return ret;
736744

737745
fail_free:
746+
if (anon_dev)
747+
free_anon_bdev(anon_dev);
738748
kfree(root_item);
739749
return ret;
740750
}
@@ -762,6 +772,9 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
762772
if (!pending_snapshot)
763773
return -ENOMEM;
764774

775+
ret = get_anon_bdev(&pending_snapshot->anon_dev);
776+
if (ret < 0)
777+
goto free_pending;
765778
pending_snapshot->root_item = kzalloc(sizeof(struct btrfs_root_item),
766779
GFP_KERNEL);
767780
pending_snapshot->path = btrfs_alloc_path();
@@ -823,10 +836,16 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
823836

824837
d_instantiate(dentry, inode);
825838
ret = 0;
839+
pending_snapshot->anon_dev = 0;
826840
fail:
841+
/* Prevent double freeing of anon_dev */
842+
if (ret && pending_snapshot->snap)
843+
pending_snapshot->snap->anon_dev = 0;
827844
btrfs_put_root(pending_snapshot->snap);
828845
btrfs_subvolume_release_metadata(fs_info, &pending_snapshot->block_rsv);
829846
free_pending:
847+
if (pending_snapshot->anon_dev)
848+
free_anon_bdev(pending_snapshot->anon_dev);
830849
kfree(pending_snapshot->root_item);
831850
btrfs_free_path(pending_snapshot->path);
832851
kfree(pending_snapshot);

fs/btrfs/transaction.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1630,7 +1630,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
16301630
}
16311631

16321632
key.offset = (u64)-1;
1633-
pending->snap = btrfs_get_fs_root(fs_info, objectid, true);
1633+
pending->snap = btrfs_get_new_fs_root(fs_info, objectid, pending->anon_dev);
16341634
if (IS_ERR(pending->snap)) {
16351635
ret = PTR_ERR(pending->snap);
16361636
btrfs_abort_transaction(trans, ret);

fs/btrfs/transaction.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,8 @@ struct btrfs_pending_snapshot {
151151
struct btrfs_block_rsv block_rsv;
152152
/* extra metadata reservation for relocation */
153153
int error;
154+
/* Preallocated anonymous block device number */
155+
dev_t anon_dev;
154156
bool readonly;
155157
struct list_head list;
156158
};

0 commit comments

Comments
 (0)